Mercurial > hg > Members > kokubo > emacs
comparison .emacs.d/haskell-mode/haskell-str.el @ 0:2764b4f45f9f
1st commit
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 21 Apr 2014 04:30:59 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2764b4f45f9f |
---|---|
1 ;;; haskell-str.el --- Haskell related string utilities | |
2 | |
3 ;; Copyright (C) 2013 Herbert Valerio Riedel | |
4 | |
5 ;; Author: Herbert Valerio Riedel <hvr@gnu.org> | |
6 | |
7 ;; This file is not part of GNU Emacs. | |
8 | |
9 ;; This file is free software; you can redistribute it and/or modify | |
10 ;; it under the terms of the GNU General Public License as published by | |
11 ;; the Free Software Foundation; either version 3 of the License, or | |
12 ;; (at your option) any later version. | |
13 | |
14 ;; This file is distributed in the hope that it will be useful, | |
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 ;; GNU General Public License for more details. | |
18 | |
19 ;; You should have received a copy of the GNU General Public License | |
20 ;; along with this program. If not, see <http://www.gnu.org/licenses/>. | |
21 | |
22 ;;; Commentary: | |
23 | |
24 ;;; Todo: | |
25 | |
26 ;; - write ERT tests | |
27 | |
28 ;;; Code: | |
29 | |
30 (defun haskell-str-trim (string) | |
31 "Remove whitespace around STRING. | |
32 | |
33 A Whitespace character is defined in the Haskell Report as follows | |
34 | |
35 whitechar -> newline | vertab | space | tab | uniWhite | |
36 newline -> return linefeed | return | linefeed | formfeed | |
37 uniWhite -> any Unicode character defined as whitespace | |
38 | |
39 Note: The implementation currently only supports ASCII | |
40 white-space characters, i.e. the implemention doesn't | |
41 consider uniWhite." | |
42 | |
43 (let ((s1 (if (string-match "[\t\n\v\f\r ]+\\'" string) (replace-match "" t t string) string))) | |
44 (if (string-match "\\`[\t\n\v\f\r ]+" s1) (replace-match "" t t s1) s1))) | |
45 | |
46 (defun haskell-str-only-spaces-p (string) | |
47 "Return t if STRING contains only whitespace (or is empty)." | |
48 (string= "" (haskell-str-trim string))) | |
49 | |
50 (defun haskell-str-take (string n) | |
51 "Return (up to) N character length prefix of STRING." | |
52 (substring string 0 (min (length string) n))) | |
53 | |
54 (defalias 'haskell-str-is-prefix-of-p 'string-prefix-p) | |
55 | |
56 (defun haskell-str-is-suffix-of-p (str1 str2 &optional ignore-case) | |
57 "Return non-nil if STR1 is a suffix of STR2. | |
58 If IGNORE-CASE is non-nil, the comparison is done without paying attention | |
59 to case differences. | |
60 | |
61 Dual to `haskell-str-is-prefix-of-p'" | |
62 (let ((pos (- (length str2) (length str1)))) | |
63 (if (>= pos 0) | |
64 (eq t (compare-strings str1 nil nil | |
65 str2 pos nil ignore-case))))) | |
66 | |
67 (defconst haskell-str-literal-encode-ascii-array | |
68 [ "\\NUL" "\\SOH" "\\STX" "\\ETX" "\\EOT" "\\ENQ" "\\ACK" "\\a" "\\b" "\\t" "\\n" "\\v" "\\f" "\\r" "\\SO" "\\SI" "\\DLE" "\\DC1" "\\DC2" "\\DC3" "\\DC4" "\\NAK" "\\SYN" "\\ETB" "\\CAN" "\\EM" "\\SUB" "\\ESC" "\\FS" "\\GS" "\\RS" "\\US" " " "!" "\\\"" "#" "$" "%" "&" "'" "(" ")" "*" "+" "," "-" "." "/" "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" ":" ";" "<" "=" ">" "?" "@" "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z" "[" "\\\\" "]" "^" "_" "`" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" "{" "|" "}" "~" "\\DEL" ] | |
69 "Array of encodings for 7-bit ASCII character points indexed by ASCII value.") | |
70 | |
71 (defun haskell-str-literal-encode (str &optional no-quotes) | |
72 "Encode STR according Haskell escape rules using 7-bit ASCII representation. | |
73 | |
74 The serialization has been implement to closely match the | |
75 behaviour of GHC's Show instance for Strings. | |
76 | |
77 If NO-QUOTES is non-nil, omit wrapping result in quotes. | |
78 | |
79 This is the dual operation to `haskell-str-literal-decode'." | |
80 | |
81 (let ((lastc -1)) | |
82 (let ((encode (lambda (c) | |
83 (let ((lc lastc)) | |
84 (setq lastc c) | |
85 (if (>= c 128) ;; if non-ASCII code point | |
86 (format "\\%d" c) | |
87 ;; else, for ASCII code points | |
88 (if (or (and (= lc 14) (= c ?H)) ;; "\SO\&H" | |
89 (and (>= lc 128) (>= c ?0) (<= c ?9))) ;; "\123\&4" | |
90 (concat "\\&" (aref haskell-str-literal-encode-ascii-array c)) | |
91 (aref haskell-str-literal-encode-ascii-array c) | |
92 )))))) | |
93 | |
94 (if no-quotes | |
95 (mapconcat encode str "") | |
96 (concat "\"" (mapconcat encode str "") "\""))))) | |
97 | |
98 (defconst haskell-str-literal-escapes-regexp | |
99 (concat "[\\]\\(?:" | |
100 (regexp-opt (append | |
101 (mapcar (lambda (c) (format "%c" c)) | |
102 "abfnrtv\\\"'&") ;; "charesc" escape sequences | |
103 (mapcar (lambda (c) (format "^%c" c)) | |
104 "ABCDEFGHIJKLMNOPQRSTUVWXYZ@[\\]^_") ;; "cntrl" escape sequences | |
105 (mapcar (lambda (s) (format "%s" s)) | |
106 (split-string "NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR | |
107 SO SI DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC | |
108 FS GS RS US SP DEL")))) ;; "ascii" (w\o "cntrl") escape sequences | |
109 "\\|" "[\t\n\v\f\r ]+[\\]" ;; whitespace gaps | |
110 "\\|" "[0-9]+" ;; decimal escape sequence | |
111 "\\|" "o[0-7]+" ;; octal escape sequence | |
112 "\\|" "x[0-9a-f]+" ;; hex escape sequence | |
113 "\\)?") ;; everything else is an invalid escape sequence | |
114 "Regexp for matching escape codes in string literals. | |
115 See Haskell Report Sect 2.6, | |
116 URL `http://www.haskell.org/onlinereport/haskell2010/haskellch2.html#x7-200002.6', | |
117 for more details.") | |
118 | |
119 (defconst haskell-str-literal-decode1-table | |
120 (let ((h (make-hash-table :test 'equal))) | |
121 (mapc (lambda (c) (puthash (concat "\\" (car c)) (cdr c) h)) | |
122 '(;; ascii-escapes | |
123 ("NUL" . "\x00") ("SOH" . "\x01") ("STX" . "\x02") ("ETX" . "\x03") ("EOT" . "\x04") ("ENQ" . "\x05") | |
124 ("ACK" . "\x06") ("BEL" . "\x07") ("BS" . "\x08") ("HT" . "\x09") ("LF" . "\x0a") ("VT" . "\x0b") | |
125 ("FF" . "\x0c") ("CR" . "\x0d") ("SO" . "\x0e") ("SI" . "\x0f") ("DLE" . "\x10") ("DC1" . "\x11") | |
126 ("DC2" . "\x12") ("DC3" . "\x13") ("DC4" . "\x14") ("NAK" . "\x15") ("SYN" . "\x16") ("ETB" . "\x17") | |
127 ("CAN" . "\x18") ("EM" . "\x19") ("SUB" . "\x1a") ("ESC" . "\x1b") ("FS" . "\x1c") ("GS" . "\x1d") | |
128 ("RS" . "\x1e") ("US" . "\x1f") ("SP" . "\x20") ("DEL" . "\x7f" ) | |
129 ;; C-compatible single-char escape sequences | |
130 ("a" . "\x07") ("b" . "\x08") ("f" . "\x0c") ("n" . "\x0a") ("r" . "\x0d") ("t" . "\x09") ("v" . "\x0b") | |
131 ;; trivial escapes | |
132 ("\\" . "\\") ("\"" . "\"") ("'" . "'") | |
133 ;; "empty" escape | |
134 ("&" . ""))) | |
135 h) | |
136 "Hash table containing irregular escape sequences and their decoded strings. | |
137 Used by `haskell-str-literal-decode1'.") | |
138 | |
139 (defun haskell-str-literal-decode1 (l) | |
140 "Decode a single string literal escape sequence. | |
141 L must contain exactly one escape sequence. | |
142 This is an internal function used by `haskell-str-literal-decode'." | |
143 (let ((case-fold-search nil)) | |
144 (cond | |
145 ((gethash l haskell-str-literal-decode1-table)) | |
146 ((string-match "\\`[\\][0-9]+\\'" l) (char-to-string (string-to-number (substring l 1) 10))) | |
147 ((string-match "\\`[\\]x[[:xdigit:]]+\\'" l) (char-to-string (string-to-number (substring l 2) 16))) | |
148 ((string-match "\\`[\\]o[0-7]+\\'" l) (char-to-string (string-to-number (substring l 2) 8))) | |
149 ((string-match "\\`[\\]\\^[@-_]\\'" l) (char-to-string (- (aref l 2) ?@))) ;; "cntrl" escapes | |
150 ((string-match "\\`[\\][\t\n\v\f\r ]+[\\]\\'" l) "") ;; whitespace gap | |
151 (t (error "Invalid escape sequence"))))) | |
152 | |
153 (defun haskell-str-literal-decode (estr &optional no-quotes) | |
154 "Decode a Haskell string-literal. | |
155 If NO-QUOTES is nil, ESTR must be surrounded by quotes. | |
156 | |
157 This is the dual operation to `haskell-str-literal-encode'." | |
158 (if (and (not no-quotes) | |
159 (string-match-p "\\`\"[^\\\"[:cntrl:]]*\"\\'" estr)) | |
160 (substring estr 1 -1) ;; optimized fast-path for trivial strings | |
161 (let ((s (if no-quotes ;; else: do general decoding | |
162 estr | |
163 (if (string-match-p "\\`\".*\"\\'" estr) | |
164 (substring estr 1 -1) | |
165 (error "String literal must be delimited by quotes")))) | |
166 (case-fold-search nil)) | |
167 (replace-regexp-in-string haskell-str-literal-escapes-regexp #'haskell-str-literal-decode1 s t t)))) | |
168 | |
169 (defun haskell-str-ellipsize (string n) | |
170 "Return STRING truncated to (at most) N characters. | |
171 If truncation occured, last character in string is replaced by `…'. | |
172 See also `haskell-str-take'." | |
173 (cond | |
174 ((<= (length string) n) string) ;; no truncation needed | |
175 ((< n 1) "") | |
176 (t (concat (substring string 0 (1- n)) "…")))) | |
177 | |
178 (provide 'haskell-str) | |
179 | |
180 ;;; haskell-str.el ends here |