annotate libcpp/charset.c @ 120:f93fa5091070

fix conv1.c
author mir3636
date Thu, 08 Mar 2018 14:53:42 +0900
parents 04ced10e8804
children 84e7813d76e9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 /* CPP Library - charsets
111
kono
parents: 67
diff changeset
2 Copyright (C) 1998-2017 Free Software Foundation, Inc.
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 This program is free software; you can redistribute it and/or modify it
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 under the terms of the GNU General Public License as published by the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 Free Software Foundation; either version 3, or (at your option) any
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 later version.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 This program is distributed in the hope that it will be useful,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 GNU General Public License for more details.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
15
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 along with this program; see the file COPYING3. If not see
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 <http://www.gnu.org/licenses/>. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
19
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 #include "config.h"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 #include "system.h"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 #include "cpplib.h"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 #include "internal.h"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 /* Character set handling for C-family languages.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 Terminological note: In what follows, "charset" or "character set"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 will be taken to mean both an abstract set of characters and an
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 encoding for that set.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 The C99 standard discusses two character sets: source and execution.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 The source character set is used for internal processing in translation
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 phases 1 through 4; the execution character set is used thereafter.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 Both are required by 5.2.1.2p1 to be multibyte encodings, not wide
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 character encodings (see 3.7.2, 3.7.3 for the standardese meanings
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 of these terms). Furthermore, the "basic character set" (listed in
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 5.2.1p3) is to be encoded in each with values one byte wide, and is
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 to appear in the initial shift state.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 It is not explicitly mentioned, but there is also a "wide execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 character set" used to encode wide character constants and wide
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 string literals; this is supposed to be the result of applying the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 standard library function mbstowcs() to an equivalent narrow string
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 (6.4.5p5). However, the behavior of hexadecimal and octal
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 \-escapes is at odds with this; they are supposed to be translated
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 directly to wchar_t values (6.4.4.4p5,6).
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 The source character set is not necessarily the character set used
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 to encode physical source files on disk; translation phase 1 converts
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 from whatever that encoding is to the source character set.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
51
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 The presence of universal character names in C99 (6.4.3 et seq.)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 forces the source character set to be isomorphic to ISO 10646,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 that is, Unicode. There is no such constraint on the execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 character set; note also that the conversion from source to
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 execution character set does not occur for identifiers (5.1.1.2p1#5).
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
57
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
58 For convenience of implementation, the source character set's
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 encoding of the basic character set should be identical to the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 execution character set OF THE HOST SYSTEM's encoding of the basic
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 character set, and it should not be a state-dependent encoding.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
62
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 cpplib uses UTF-8 or UTF-EBCDIC for the source character set,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
64 depending on whether the host is based on ASCII or EBCDIC (see
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 respectively Unicode section 2.3/ISO10646 Amendment 2, and Unicode
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 Technical Report #16). With limited exceptions, it relies on the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 system library's iconv() primitive to do charset conversion
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 (specified in SUSv2). */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
69
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 #if !HAVE_ICONV
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 /* Make certain that the uses of iconv(), iconv_open(), iconv_close()
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 below, which are guarded only by if statements with compile-time
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
73 constant conditions, do not cause link errors. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 #define iconv_open(x, y) (errno = EINVAL, (iconv_t)-1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 #define iconv(a,b,c,d,e) (errno = EINVAL, (size_t)-1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 #define iconv_close(x) (void)0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 #define ICONV_CONST
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
79
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
80 #if HOST_CHARSET == HOST_CHARSET_ASCII
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
81 #define SOURCE_CHARSET "UTF-8"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 #define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0x7e
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 #elif HOST_CHARSET == HOST_CHARSET_EBCDIC
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
84 #define SOURCE_CHARSET "UTF-EBCDIC"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 #define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0xFF
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
86 #else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 #error "Unrecognized basic host character set"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
89
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
90 #ifndef EILSEQ
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 #define EILSEQ EINVAL
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
92 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
93
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
94 /* This structure is used for a resizable string buffer throughout. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 /* Don't call it strbuf, as that conflicts with unistd.h on systems
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 such as DYNIX/ptx where unistd.h includes stropts.h. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 struct _cpp_strbuf
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 uchar *text;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
100 size_t asize;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 size_t len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
102 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
103
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
104 /* This is enough to hold any string that fits on a single 80-column
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
105 line, even if iconv quadruples its size (e.g. conversion from
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 ASCII to UTF-32) rounded up to a power of two. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 #define OUTBUF_BLOCK_SIZE 256
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
108
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
109 /* Conversions between UTF-8 and UTF-16/32 are implemented by custom
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 logic. This is because a depressing number of systems lack iconv,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
111 or have have iconv libraries that do not do these conversions, so
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 we need a fallback implementation for them. To ensure the fallback
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
113 doesn't break due to neglect, it is used on all systems.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
114
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
115 UTF-32 encoding is nice and simple: a four-byte binary number,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
116 constrained to the range 00000000-7FFFFFFF to avoid questions of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
117 signedness. We do have to cope with big- and little-endian
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
118 variants.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
119
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
120 UTF-16 encoding uses two-byte binary numbers, again in big- and
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
121 little-endian variants, for all values in the 00000000-0000FFFF
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
122 range. Values in the 00010000-0010FFFF range are encoded as pairs
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
123 of two-byte numbers, called "surrogate pairs": given a number S in
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
124 this range, it is mapped to a pair (H, L) as follows:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
125
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
126 H = (S - 0x10000) / 0x400 + 0xD800
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
127 L = (S - 0x10000) % 0x400 + 0xDC00
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
128
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
129 Two-byte values in the D800...DFFF range are ill-formed except as a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
130 component of a surrogate pair. Even if the encoding within a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 two-byte value is little-endian, the H member of the surrogate pair
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
132 comes first.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
133
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
134 There is no way to encode values in the 00110000-7FFFFFFF range,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 which is not currently a problem as there are no assigned code
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
136 points in that range; however, the author expects that it will
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
137 eventually become necessary to abandon UTF-16 due to this
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
138 limitation. Note also that, because of these pairs, UTF-16 does
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
139 not meet the requirements of the C standard for a wide character
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
140 encoding (see 3.7.3 and 6.4.4.4p11).
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
141
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
142 UTF-8 encoding looks like this:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
143
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
144 value range encoded as
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
145 00000000-0000007F 0xxxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
146 00000080-000007FF 110xxxxx 10xxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
147 00000800-0000FFFF 1110xxxx 10xxxxxx 10xxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
148 00010000-001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
149 00200000-03FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
150 04000000-7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
151
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
152 Values in the 0000D800 ... 0000DFFF range (surrogates) are invalid,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
153 which means that three-byte sequences ED xx yy, with A0 <= xx <= BF,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
154 never occur. Note also that any value that can be encoded by a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
155 given row of the table can also be encoded by all successive rows,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
156 but this is not done; only the shortest possible encoding for any
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
157 given value is valid. For instance, the character 07C0 could be
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
158 encoded as any of DF 80, E0 9F 80, F0 80 9F 80, F8 80 80 9F 80, or
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
159 FC 80 80 80 9F 80. Only the first is valid.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
160
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
161 An implementation note: the transformation from UTF-16 to UTF-8, or
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
162 vice versa, is easiest done by using UTF-32 as an intermediary. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
163
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
164 /* Internal primitives which go from an UTF-8 byte stream to native-endian
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
165 UTF-32 in a cppchar_t, or vice versa; this avoids an extra marshal/unmarshal
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
166 operation in several places below. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
167 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
168 one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
169 cppchar_t *cp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
170 {
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
171 static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
172 static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
173
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
174 cppchar_t c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
175 const uchar *inbuf = *inbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
176 size_t nbytes, i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
177
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
178 if (*inbytesleftp < 1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
179 return EINVAL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
180
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
181 c = *inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
182 if (c < 0x80)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
183 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
184 *cp = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
185 *inbytesleftp -= 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
186 *inbufp += 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
187 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
188 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
189
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
190 /* The number of leading 1-bits in the first byte indicates how many
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
191 bytes follow. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
192 for (nbytes = 2; nbytes < 7; nbytes++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
193 if ((c & ~masks[nbytes-1]) == patns[nbytes-1])
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
194 goto found;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
195 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
196 found:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
197
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
198 if (*inbytesleftp < nbytes)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
199 return EINVAL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
200
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
201 c = (c & masks[nbytes-1]);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
202 inbuf++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
203 for (i = 1; i < nbytes; i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
204 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
205 cppchar_t n = *inbuf++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
206 if ((n & 0xC0) != 0x80)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
207 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
208 c = ((c << 6) + (n & 0x3F));
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
209 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
210
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
211 /* Make sure the shortest possible encoding was used. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
212 if (c <= 0x7F && nbytes > 1) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
213 if (c <= 0x7FF && nbytes > 2) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
214 if (c <= 0xFFFF && nbytes > 3) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
215 if (c <= 0x1FFFFF && nbytes > 4) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
216 if (c <= 0x3FFFFFF && nbytes > 5) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
217
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
218 /* Make sure the character is valid. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
219 if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF)) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
220
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
221 *cp = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
222 *inbufp = inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
223 *inbytesleftp -= nbytes;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
224 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
225 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
226
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
227 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
228 one_cppchar_to_utf8 (cppchar_t c, uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
229 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
230 static const uchar masks[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
231 static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
232 size_t nbytes;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
233 uchar buf[6], *p = &buf[6];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
234 uchar *outbuf = *outbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
235
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
236 nbytes = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
237 if (c < 0x80)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
238 *--p = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
239 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
240 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
241 do
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
242 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
243 *--p = ((c & 0x3F) | 0x80);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
244 c >>= 6;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
245 nbytes++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
246 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
247 while (c >= 0x3F || (c & limits[nbytes-1]));
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
248 *--p = (c | masks[nbytes-1]);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
249 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
250
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
251 if (*outbytesleftp < nbytes)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
252 return E2BIG;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
253
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
254 while (p < &buf[6])
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
255 *outbuf++ = *p++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
256 *outbytesleftp -= nbytes;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
257 *outbufp = outbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
258 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
259 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
260
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
261 /* The following four functions transform one character between the two
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
262 encodings named in the function name. All have the signature
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
263 int (*)(iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
264 uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
265
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
266 BIGEND must have the value 0 or 1, coerced to (iconv_t); it is
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
267 interpreted as a boolean indicating whether big-endian or
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
268 little-endian encoding is to be used for the member of the pair
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
269 that is not UTF-8.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
270
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
271 INBUFP, INBYTESLEFTP, OUTBUFP, OUTBYTESLEFTP work exactly as they
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
272 do for iconv.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
273
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
274 The return value is either 0 for success, or an errno value for
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
275 failure, which may be E2BIG (need more space), EILSEQ (ill-formed
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
276 input sequence), ir EINVAL (incomplete input sequence). */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
277
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
278 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
279 one_utf8_to_utf32 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
280 uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
281 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
282 uchar *outbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
283 cppchar_t s = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
284 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
285
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
286 /* Check for space first, since we know exactly how much we need. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
287 if (*outbytesleftp < 4)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
288 return E2BIG;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
289
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
290 rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
291 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
292 return rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
293
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
294 outbuf = *outbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
295 outbuf[bigend ? 3 : 0] = (s & 0x000000FF);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
296 outbuf[bigend ? 2 : 1] = (s & 0x0000FF00) >> 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
297 outbuf[bigend ? 1 : 2] = (s & 0x00FF0000) >> 16;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
298 outbuf[bigend ? 0 : 3] = (s & 0xFF000000) >> 24;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
299
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
300 *outbufp += 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
301 *outbytesleftp -= 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
302 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
303 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
304
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
305 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
306 one_utf32_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
307 uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
308 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
309 cppchar_t s;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
310 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
311 const uchar *inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
312
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
313 if (*inbytesleftp < 4)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
314 return EINVAL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
315
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
316 inbuf = *inbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
317
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
318 s = inbuf[bigend ? 0 : 3] << 24;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
319 s += inbuf[bigend ? 1 : 2] << 16;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
320 s += inbuf[bigend ? 2 : 1] << 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
321 s += inbuf[bigend ? 3 : 0];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
322
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
323 if (s >= 0x7FFFFFFF || (s >= 0xD800 && s <= 0xDFFF))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
324 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
325
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
326 rval = one_cppchar_to_utf8 (s, outbufp, outbytesleftp);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
327 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
328 return rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
329
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
330 *inbufp += 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
331 *inbytesleftp -= 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
332 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
333 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
334
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
335 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
336 one_utf8_to_utf16 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
337 uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
338 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
339 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
340 cppchar_t s = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
341 const uchar *save_inbuf = *inbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
342 size_t save_inbytesleft = *inbytesleftp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
343 uchar *outbuf = *outbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
344
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
345 rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
346 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
347 return rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
348
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
349 if (s > 0x0010FFFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
350 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
351 *inbufp = save_inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
352 *inbytesleftp = save_inbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
353 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
354 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
355
111
kono
parents: 67
diff changeset
356 if (s <= 0xFFFF)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
357 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
358 if (*outbytesleftp < 2)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
359 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
360 *inbufp = save_inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
361 *inbytesleftp = save_inbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
362 return E2BIG;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
363 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
364 outbuf[bigend ? 1 : 0] = (s & 0x00FF);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
365 outbuf[bigend ? 0 : 1] = (s & 0xFF00) >> 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
366
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
367 *outbufp += 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
368 *outbytesleftp -= 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
369 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
370 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
371 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
372 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
373 cppchar_t hi, lo;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
374
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
375 if (*outbytesleftp < 4)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
376 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
377 *inbufp = save_inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
378 *inbytesleftp = save_inbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
379 return E2BIG;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
380 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
381
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
382 hi = (s - 0x10000) / 0x400 + 0xD800;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
383 lo = (s - 0x10000) % 0x400 + 0xDC00;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
384
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
385 /* Even if we are little-endian, put the high surrogate first.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
386 ??? Matches practice? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
387 outbuf[bigend ? 1 : 0] = (hi & 0x00FF);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
388 outbuf[bigend ? 0 : 1] = (hi & 0xFF00) >> 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
389 outbuf[bigend ? 3 : 2] = (lo & 0x00FF);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
390 outbuf[bigend ? 2 : 3] = (lo & 0xFF00) >> 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
391
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
392 *outbufp += 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
393 *outbytesleftp -= 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
394 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
395 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
396 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
397
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
398 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
399 one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
400 uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
401 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
402 cppchar_t s;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
403 const uchar *inbuf = *inbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
404 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
405
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
406 if (*inbytesleftp < 2)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
407 return EINVAL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
408 s = inbuf[bigend ? 0 : 1] << 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
409 s += inbuf[bigend ? 1 : 0];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
410
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
411 /* Low surrogate without immediately preceding high surrogate is invalid. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
412 if (s >= 0xDC00 && s <= 0xDFFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
413 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
414 /* High surrogate must have a following low surrogate. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
415 else if (s >= 0xD800 && s <= 0xDBFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
416 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
417 cppchar_t hi = s, lo;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
418 if (*inbytesleftp < 4)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
419 return EINVAL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
420
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
421 lo = inbuf[bigend ? 2 : 3] << 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
422 lo += inbuf[bigend ? 3 : 2];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
423
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
424 if (lo < 0xDC00 || lo > 0xDFFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
425 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
426
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
427 s = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
428 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
429
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
430 rval = one_cppchar_to_utf8 (s, outbufp, outbytesleftp);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
431 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
432 return rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
433
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
434 /* Success - update the input pointers (one_cppchar_to_utf8 has done
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
435 the output pointers for us). */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
436 if (s <= 0xFFFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
437 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
438 *inbufp += 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
439 *inbytesleftp -= 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
440 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
441 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
442 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
443 *inbufp += 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
444 *inbytesleftp -= 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
445 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
446 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
447 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
448
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
449 /* Helper routine for the next few functions. The 'const' on
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
450 one_conversion means that we promise not to modify what function is
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
451 pointed to, which lets the inliner see through it. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
452
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
453 static inline bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
454 conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
455 uchar **, size_t *),
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
456 iconv_t cd, const uchar *from, size_t flen, struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
457 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
458 const uchar *inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
459 uchar *outbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
460 size_t inbytesleft, outbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
461 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
462
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
463 inbuf = from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
464 inbytesleft = flen;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
465 outbuf = to->text + to->len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
466 outbytesleft = to->asize - to->len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
467
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
468 for (;;)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
469 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
470 do
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
471 rval = one_conversion (cd, &inbuf, &inbytesleft,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
472 &outbuf, &outbytesleft);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
473 while (inbytesleft && !rval);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
474
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
475 if (__builtin_expect (inbytesleft == 0, 1))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
476 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
477 to->len = to->asize - outbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
478 return true;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
479 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
480 if (rval != E2BIG)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
481 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
482 errno = rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
483 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
484 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
485
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
486 outbytesleft += OUTBUF_BLOCK_SIZE;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
487 to->asize += OUTBUF_BLOCK_SIZE;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
488 to->text = XRESIZEVEC (uchar, to->text, to->asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
489 outbuf = to->text + to->asize - outbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
490 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
491 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
492
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
493
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
494 /* These functions convert entire strings between character sets.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
495 They all have the signature
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
496
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
497 bool (*)(iconv_t cd, const uchar *from, size_t flen, struct _cpp_strbuf *to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
498
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
499 The input string FROM is converted as specified by the function
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
500 name plus the iconv descriptor CD (which may be fake), and the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
501 result appended to TO. On any error, false is returned, otherwise true. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
502
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
503 /* These four use the custom conversion code above. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
504 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
505 convert_utf8_utf16 (iconv_t cd, const uchar *from, size_t flen,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
506 struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
507 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
508 return conversion_loop (one_utf8_to_utf16, cd, from, flen, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
509 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
510
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
511 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
512 convert_utf8_utf32 (iconv_t cd, const uchar *from, size_t flen,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
513 struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
514 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
515 return conversion_loop (one_utf8_to_utf32, cd, from, flen, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
516 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
517
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
518 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
519 convert_utf16_utf8 (iconv_t cd, const uchar *from, size_t flen,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
520 struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
521 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
522 return conversion_loop (one_utf16_to_utf8, cd, from, flen, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
523 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
524
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
525 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
526 convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
527 struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
528 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
529 return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
530 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
531
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
532 /* Identity conversion, used when we have no alternative. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
533 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
534 convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
535 const uchar *from, size_t flen, struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
536 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
537 if (to->len + flen > to->asize)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
538 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
539 to->asize = to->len + flen;
111
kono
parents: 67
diff changeset
540 to->asize += to->asize / 4;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
541 to->text = XRESIZEVEC (uchar, to->text, to->asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
542 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
543 memcpy (to->text + to->len, from, flen);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
544 to->len += flen;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
545 return true;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
546 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
547
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
548 /* And this one uses the system iconv primitive. It's a little
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
549 different, since iconv's interface is a little different. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
550 #if HAVE_ICONV
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
551
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
552 #define CONVERT_ICONV_GROW_BUFFER \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
553 do { \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
554 outbytesleft += OUTBUF_BLOCK_SIZE; \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
555 to->asize += OUTBUF_BLOCK_SIZE; \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
556 to->text = XRESIZEVEC (uchar, to->text, to->asize); \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
557 outbuf = (char *)to->text + to->asize - outbytesleft; \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
558 } while (0)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
559
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
560 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
561 convert_using_iconv (iconv_t cd, const uchar *from, size_t flen,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
562 struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
563 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
564 ICONV_CONST char *inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
565 char *outbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
566 size_t inbytesleft, outbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
567
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
568 /* Reset conversion descriptor and check that it is valid. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
569 if (iconv (cd, 0, 0, 0, 0) == (size_t)-1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
570 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
571
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
572 inbuf = (ICONV_CONST char *)from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
573 inbytesleft = flen;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
574 outbuf = (char *)to->text + to->len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
575 outbytesleft = to->asize - to->len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
576
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
577 for (;;)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
578 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
579 iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
580 if (__builtin_expect (inbytesleft == 0, 1))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
581 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
582 /* Close out any shift states, returning to the initial state. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
583 if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t)-1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
584 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
585 if (errno != E2BIG)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
586 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
587
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
588 CONVERT_ICONV_GROW_BUFFER;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
589 if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t)-1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
590 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
591 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
592
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
593 to->len = to->asize - outbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
594 return true;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
595 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
596 if (errno != E2BIG)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
597 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
598
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
599 CONVERT_ICONV_GROW_BUFFER;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
600 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
601 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
602 #else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
603 #define convert_using_iconv 0 /* prevent undefined symbol error below */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
604 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
605
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
606 /* Arrange for the above custom conversion logic to be used automatically
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
607 when conversion between a suitable pair of character sets is requested. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
608
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
609 #define APPLY_CONVERSION(CONVERTER, FROM, FLEN, TO) \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
610 CONVERTER.func (CONVERTER.cd, FROM, FLEN, TO)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
611
111
kono
parents: 67
diff changeset
612 struct cpp_conversion
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
613 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
614 const char *pair;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
615 convert_f func;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
616 iconv_t fake_cd;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
617 };
111
kono
parents: 67
diff changeset
618 static const struct cpp_conversion conversion_tab[] = {
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
619 { "UTF-8/UTF-32LE", convert_utf8_utf32, (iconv_t)0 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
620 { "UTF-8/UTF-32BE", convert_utf8_utf32, (iconv_t)1 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
621 { "UTF-8/UTF-16LE", convert_utf8_utf16, (iconv_t)0 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
622 { "UTF-8/UTF-16BE", convert_utf8_utf16, (iconv_t)1 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
623 { "UTF-32LE/UTF-8", convert_utf32_utf8, (iconv_t)0 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
624 { "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
625 { "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
626 { "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
627 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
628
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
629 /* Subroutine of cpp_init_iconv: initialize and return a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
630 cset_converter structure for conversion from FROM to TO. If
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
631 iconv_open() fails, issue an error and return an identity
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
632 converter. Silently return an identity converter if FROM and TO
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
633 are identical. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
634 static struct cset_converter
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
635 init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
636 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
637 struct cset_converter ret;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
638 char *pair;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
639 size_t i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
640
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
641 if (!strcasecmp (to, from))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
642 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
643 ret.func = convert_no_conversion;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
644 ret.cd = (iconv_t) -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
645 ret.width = -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
646 return ret;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
647 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
648
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
649 pair = (char *) alloca(strlen(to) + strlen(from) + 2);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
650
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
651 strcpy(pair, from);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
652 strcat(pair, "/");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
653 strcat(pair, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
654 for (i = 0; i < ARRAY_SIZE (conversion_tab); i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
655 if (!strcasecmp (pair, conversion_tab[i].pair))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
656 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
657 ret.func = conversion_tab[i].func;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
658 ret.cd = conversion_tab[i].fake_cd;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
659 ret.width = -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
660 return ret;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
661 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
662
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
663 /* No custom converter - try iconv. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
664 if (HAVE_ICONV)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
665 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
666 ret.func = convert_using_iconv;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
667 ret.cd = iconv_open (to, from);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
668 ret.width = -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
669
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
670 if (ret.cd == (iconv_t) -1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
671 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
672 if (errno == EINVAL)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
673 cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
674 "conversion from %s to %s not supported by iconv",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
675 from, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
676 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
677 cpp_errno (pfile, CPP_DL_ERROR, "iconv_open");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
678
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
679 ret.func = convert_no_conversion;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
680 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
681 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
682 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
683 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
684 cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
685 "no iconv implementation, cannot convert from %s to %s",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
686 from, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
687 ret.func = convert_no_conversion;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
688 ret.cd = (iconv_t) -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
689 ret.width = -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
690 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
691 return ret;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
692 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
693
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
694 /* If charset conversion is requested, initialize iconv(3) descriptors
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
695 for conversion from the source character set to the execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
696 character sets. If iconv is not present in the C library, and
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
697 conversion is requested, issue an error. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
698
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
699 void
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
700 cpp_init_iconv (cpp_reader *pfile)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
701 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
702 const char *ncset = CPP_OPTION (pfile, narrow_charset);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
703 const char *wcset = CPP_OPTION (pfile, wide_charset);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
704 const char *default_wcset;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
705
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
706 bool be = CPP_OPTION (pfile, bytes_big_endian);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
707
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
708 if (CPP_OPTION (pfile, wchar_precision) >= 32)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
709 default_wcset = be ? "UTF-32BE" : "UTF-32LE";
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
710 else if (CPP_OPTION (pfile, wchar_precision) >= 16)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
711 default_wcset = be ? "UTF-16BE" : "UTF-16LE";
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
712 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
713 /* This effectively means that wide strings are not supported,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
714 so don't do any conversion at all. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
715 default_wcset = SOURCE_CHARSET;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
716
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
717 if (!ncset)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
718 ncset = SOURCE_CHARSET;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
719 if (!wcset)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
720 wcset = default_wcset;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
721
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
722 pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
723 pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
724 pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
725 pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
726 pfile->char16_cset_desc = init_iconv_desc (pfile,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
727 be ? "UTF-16BE" : "UTF-16LE",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
728 SOURCE_CHARSET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
729 pfile->char16_cset_desc.width = 16;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
730 pfile->char32_cset_desc = init_iconv_desc (pfile,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
731 be ? "UTF-32BE" : "UTF-32LE",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
732 SOURCE_CHARSET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
733 pfile->char32_cset_desc.width = 32;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
734 pfile->wide_cset_desc = init_iconv_desc (pfile, wcset, SOURCE_CHARSET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
735 pfile->wide_cset_desc.width = CPP_OPTION (pfile, wchar_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
736 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
737
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
738 /* Destroy iconv(3) descriptors set up by cpp_init_iconv, if necessary. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
739 void
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
740 _cpp_destroy_iconv (cpp_reader *pfile)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
741 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
742 if (HAVE_ICONV)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
743 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
744 if (pfile->narrow_cset_desc.func == convert_using_iconv)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
745 iconv_close (pfile->narrow_cset_desc.cd);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
746 if (pfile->utf8_cset_desc.func == convert_using_iconv)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
747 iconv_close (pfile->utf8_cset_desc.cd);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
748 if (pfile->char16_cset_desc.func == convert_using_iconv)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
749 iconv_close (pfile->char16_cset_desc.cd);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
750 if (pfile->char32_cset_desc.func == convert_using_iconv)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
751 iconv_close (pfile->char32_cset_desc.cd);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
752 if (pfile->wide_cset_desc.func == convert_using_iconv)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
753 iconv_close (pfile->wide_cset_desc.cd);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
754 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
755 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
756
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
757 /* Utility routine for use by a full compiler. C is a character taken
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
758 from the *basic* source character set, encoded in the host's
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
759 execution encoding. Convert it to (the target's) execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
760 encoding, and return that value.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
761
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
762 Issues an internal error if C's representation in the narrow
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
763 execution character set fails to be a single-byte value (C99
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
764 5.2.1p3: "The representation of each member of the source and
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
765 execution character sets shall fit in a byte.") May also issue an
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
766 internal error if C fails to be a member of the basic source
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
767 character set (testing this exactly is too hard, especially when
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
768 the host character set is EBCDIC). */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
769 cppchar_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
770 cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
771 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
772 uchar sbuf[1];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
773 struct _cpp_strbuf tbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
774
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
775 /* This test is merely an approximation, but it suffices to catch
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
776 the most important thing, which is that we don't get handed a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
777 character outside the unibyte range of the host character set. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
778 if (c > LAST_POSSIBLY_BASIC_SOURCE_CHAR)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
779 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
780 cpp_error (pfile, CPP_DL_ICE,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
781 "character 0x%lx is not in the basic source character set\n",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
782 (unsigned long)c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
783 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
784 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
785
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
786 /* Being a character in the unibyte range of the host character set,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
787 we can safely splat it into a one-byte buffer and trust that that
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
788 is a well-formed string. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
789 sbuf[0] = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
790
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
791 /* This should never need to reallocate, but just in case... */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
792 tbuf.asize = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
793 tbuf.text = XNEWVEC (uchar, tbuf.asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
794 tbuf.len = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
795
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
796 if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
797 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
798 cpp_errno (pfile, CPP_DL_ICE, "converting to execution character set");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
799 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
800 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
801 if (tbuf.len != 1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
802 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
803 cpp_error (pfile, CPP_DL_ICE,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
804 "character 0x%lx is not unibyte in execution character set",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
805 (unsigned long)c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
806 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
807 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
808 c = tbuf.text[0];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
809 free(tbuf.text);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
810 return c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
811 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
812
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
813
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
814
111
kono
parents: 67
diff changeset
815 /* cpp_substring_ranges's constructor. */
kono
parents: 67
diff changeset
816
kono
parents: 67
diff changeset
817 cpp_substring_ranges::cpp_substring_ranges () :
kono
parents: 67
diff changeset
818 m_ranges (NULL),
kono
parents: 67
diff changeset
819 m_num_ranges (0),
kono
parents: 67
diff changeset
820 m_alloc_ranges (8)
kono
parents: 67
diff changeset
821 {
kono
parents: 67
diff changeset
822 m_ranges = XNEWVEC (source_range, m_alloc_ranges);
kono
parents: 67
diff changeset
823 }
kono
parents: 67
diff changeset
824
kono
parents: 67
diff changeset
825 /* cpp_substring_ranges's destructor. */
kono
parents: 67
diff changeset
826
kono
parents: 67
diff changeset
827 cpp_substring_ranges::~cpp_substring_ranges ()
kono
parents: 67
diff changeset
828 {
kono
parents: 67
diff changeset
829 free (m_ranges);
kono
parents: 67
diff changeset
830 }
kono
parents: 67
diff changeset
831
kono
parents: 67
diff changeset
832 /* Add RANGE to the vector of source_range information. */
kono
parents: 67
diff changeset
833
kono
parents: 67
diff changeset
834 void
kono
parents: 67
diff changeset
835 cpp_substring_ranges::add_range (source_range range)
kono
parents: 67
diff changeset
836 {
kono
parents: 67
diff changeset
837 if (m_num_ranges >= m_alloc_ranges)
kono
parents: 67
diff changeset
838 {
kono
parents: 67
diff changeset
839 m_alloc_ranges *= 2;
kono
parents: 67
diff changeset
840 m_ranges
kono
parents: 67
diff changeset
841 = (source_range *)xrealloc (m_ranges,
kono
parents: 67
diff changeset
842 sizeof (source_range) * m_alloc_ranges);
kono
parents: 67
diff changeset
843 }
kono
parents: 67
diff changeset
844 m_ranges[m_num_ranges++] = range;
kono
parents: 67
diff changeset
845 }
kono
parents: 67
diff changeset
846
kono
parents: 67
diff changeset
847 /* Read NUM ranges from LOC_READER, adding them to the vector of source_range
kono
parents: 67
diff changeset
848 information. */
kono
parents: 67
diff changeset
849
kono
parents: 67
diff changeset
850 void
kono
parents: 67
diff changeset
851 cpp_substring_ranges::add_n_ranges (int num,
kono
parents: 67
diff changeset
852 cpp_string_location_reader &loc_reader)
kono
parents: 67
diff changeset
853 {
kono
parents: 67
diff changeset
854 for (int i = 0; i < num; i++)
kono
parents: 67
diff changeset
855 add_range (loc_reader.get_next ());
kono
parents: 67
diff changeset
856 }
kono
parents: 67
diff changeset
857
kono
parents: 67
diff changeset
858
kono
parents: 67
diff changeset
859
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
860 /* Utility routine that computes a mask of the form 0000...111... with
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
861 WIDTH 1-bits. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
862 static inline size_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
863 width_to_mask (size_t width)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
864 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
865 width = MIN (width, BITS_PER_CPPCHAR_T);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
866 if (width >= CHAR_BIT * sizeof (size_t))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
867 return ~(size_t) 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
868 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
869 return ((size_t) 1 << width) - 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
870 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
871
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
872 /* A large table of unicode character information. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
873 enum {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
874 /* Valid in a C99 identifier? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
875 C99 = 1,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
876 /* Valid in a C99 identifier, but not as the first character? */
111
kono
parents: 67
diff changeset
877 N99 = 2,
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
878 /* Valid in a C++ identifier? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
879 CXX = 4,
111
kono
parents: 67
diff changeset
880 /* Valid in a C11/C++11 identifier? */
kono
parents: 67
diff changeset
881 C11 = 8,
kono
parents: 67
diff changeset
882 /* Valid in a C11/C++11 identifier, but not as the first character? */
kono
parents: 67
diff changeset
883 N11 = 16,
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
884 /* NFC representation is not valid in an identifier? */
111
kono
parents: 67
diff changeset
885 CID = 32,
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
886 /* Might be valid NFC form? */
111
kono
parents: 67
diff changeset
887 NFC = 64,
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
888 /* Might be valid NFKC form? */
111
kono
parents: 67
diff changeset
889 NKC = 128,
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
890 /* Certain preceding characters might make it not valid NFC/NKFC form? */
111
kono
parents: 67
diff changeset
891 CTX = 256
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
892 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
893
111
kono
parents: 67
diff changeset
894 struct ucnrange {
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
895 /* Bitmap of flags above. */
111
kono
parents: 67
diff changeset
896 unsigned short flags;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
897 /* Combining class of the character. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
898 unsigned char combine;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
899 /* Last character in the range described by this entry. */
111
kono
parents: 67
diff changeset
900 unsigned int end;
kono
parents: 67
diff changeset
901 };
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
902 #include "ucnid.h"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
903
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
904 /* Returns 1 if C is valid in an identifier, 2 if C is valid except at
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
905 the start of an identifier, and 0 if C is not valid in an
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
906 identifier. We assume C has already gone through the checks of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
907 _cpp_valid_ucn. Also update NST for C if returning nonzero. The
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
908 algorithm is a simple binary search on the table defined in
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
909 ucnid.h. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
910
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
911 static int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
912 ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
913 struct normalize_state *nst)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
914 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
915 int mn, mx, md;
111
kono
parents: 67
diff changeset
916 unsigned short valid_flags, invalid_start_flags;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
917
111
kono
parents: 67
diff changeset
918 if (c > 0x10FFFF)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
919 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
920
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
921 mn = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
922 mx = ARRAY_SIZE (ucnranges) - 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
923 while (mx != mn)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
924 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
925 md = (mn + mx) / 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
926 if (c <= ucnranges[md].end)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
927 mx = md;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
928 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
929 mn = md + 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
930 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
931
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
932 /* When -pedantic, we require the character to have been listed by
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
933 the standard for the current language. Otherwise, we accept the
111
kono
parents: 67
diff changeset
934 union of the acceptable sets for all supported language versions. */
kono
parents: 67
diff changeset
935 valid_flags = C99 | CXX | C11;
kono
parents: 67
diff changeset
936 if (CPP_PEDANTIC (pfile))
kono
parents: 67
diff changeset
937 {
kono
parents: 67
diff changeset
938 if (CPP_OPTION (pfile, c11_identifiers))
kono
parents: 67
diff changeset
939 valid_flags = C11;
kono
parents: 67
diff changeset
940 else if (CPP_OPTION (pfile, c99))
kono
parents: 67
diff changeset
941 valid_flags = C99;
kono
parents: 67
diff changeset
942 else if (CPP_OPTION (pfile, cplusplus))
kono
parents: 67
diff changeset
943 valid_flags = CXX;
kono
parents: 67
diff changeset
944 }
kono
parents: 67
diff changeset
945 if (! (ucnranges[mn].flags & valid_flags))
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
946 return 0;
111
kono
parents: 67
diff changeset
947 if (CPP_OPTION (pfile, c11_identifiers))
kono
parents: 67
diff changeset
948 invalid_start_flags = N11;
kono
parents: 67
diff changeset
949 else if (CPP_OPTION (pfile, c99))
kono
parents: 67
diff changeset
950 invalid_start_flags = N99;
kono
parents: 67
diff changeset
951 else
kono
parents: 67
diff changeset
952 invalid_start_flags = 0;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
953
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
954 /* Update NST. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
955 if (ucnranges[mn].combine != 0 && ucnranges[mn].combine < nst->prev_class)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
956 nst->level = normalized_none;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
957 else if (ucnranges[mn].flags & CTX)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
958 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
959 bool safe;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
960 cppchar_t p = nst->previous;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
961
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
962 /* For Hangul, characters in the range AC00-D7A3 are NFC/NFKC,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
963 and are combined algorithmically from a sequence of the form
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
964 1100-1112 1161-1175 11A8-11C2
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
965 (if the third is not present, it is treated as 11A7, which is not
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
966 really a valid character).
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
967 Unfortunately, C99 allows (only) the NFC form, but C++ allows
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
968 only the combining characters. */
111
kono
parents: 67
diff changeset
969 if (c >= 0x1161 && c <= 0x1175)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
970 safe = p < 0x1100 || p > 0x1112;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
971 else if (c >= 0x11A8 && c <= 0x11C2)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
972 safe = (p < 0xAC00 || p > 0xD7A3 || (p - 0xAC00) % 28 != 0);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
973 else
111
kono
parents: 67
diff changeset
974 safe = check_nfc (pfile, c, p);
kono
parents: 67
diff changeset
975 if (!safe)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
976 {
111
kono
parents: 67
diff changeset
977 if ((c >= 0x1161 && c <= 0x1175) || (c >= 0x11A8 && c <= 0x11C2))
kono
parents: 67
diff changeset
978 nst->level = MAX (nst->level, normalized_identifier_C);
kono
parents: 67
diff changeset
979 else
kono
parents: 67
diff changeset
980 nst->level = normalized_none;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
981 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
982 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
983 else if (ucnranges[mn].flags & NKC)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
984 ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
985 else if (ucnranges[mn].flags & NFC)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
986 nst->level = MAX (nst->level, normalized_C);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
987 else if (ucnranges[mn].flags & CID)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
988 nst->level = MAX (nst->level, normalized_identifier_C);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
989 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
990 nst->level = normalized_none;
111
kono
parents: 67
diff changeset
991 if (ucnranges[mn].combine == 0)
kono
parents: 67
diff changeset
992 nst->previous = c;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
993 nst->prev_class = ucnranges[mn].combine;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
994
111
kono
parents: 67
diff changeset
995 /* In C99, UCN digits may not begin identifiers. In C11 and C++11,
kono
parents: 67
diff changeset
996 UCN combining characters may not begin identifiers. */
kono
parents: 67
diff changeset
997 if (ucnranges[mn].flags & invalid_start_flags)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
998 return 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
999
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1000 return 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1001 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1002
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1003 /* [lex.charset]: The character designated by the universal character
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1004 name \UNNNNNNNN is that character whose character short name in
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1005 ISO/IEC 10646 is NNNNNNNN; the character designated by the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1006 universal character name \uNNNN is that character whose character
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1007 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1008 for a universal character name corresponds to a surrogate code point
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1009 (in the range 0xD800-0xDFFF, inclusive), the program is ill-formed.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1010 Additionally, if the hexadecimal value for a universal-character-name
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1011 outside a character or string literal corresponds to a control character
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1012 (in either of the ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1013 character in the basic source character set, the program is ill-formed.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1014
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1015 C99 6.4.3: A universal character name shall not specify a character
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1016 whose short identifier is less than 00A0 other than 0024 ($), 0040 (@),
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1017 or 0060 (`), nor one in the range D800 through DFFF inclusive.
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1018
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1019 *PSTR must be preceded by "\u" or "\U"; it is assumed that the
111
kono
parents: 67
diff changeset
1020 buffer end is delimited by a non-hex digit. Returns false if the
kono
parents: 67
diff changeset
1021 UCN has not been consumed, true otherwise.
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1022
111
kono
parents: 67
diff changeset
1023 The value of the UCN, whether valid or invalid, is returned in *CP.
kono
parents: 67
diff changeset
1024 Diagnostics are emitted for invalid values. PSTR is updated to point
kono
parents: 67
diff changeset
1025 one beyond the UCN, or to the syntactically invalid character.
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1026
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1027 IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
111
kono
parents: 67
diff changeset
1028 an identifier, or 2 otherwise.
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1029
111
kono
parents: 67
diff changeset
1030 If LOC_READER is non-NULL, then position information is
kono
parents: 67
diff changeset
1031 read from *LOC_READER and CHAR_RANGE->m_finish is updated accordingly. */
kono
parents: 67
diff changeset
1032
kono
parents: 67
diff changeset
1033 bool
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1034 _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1035 const uchar *limit, int identifier_pos,
111
kono
parents: 67
diff changeset
1036 struct normalize_state *nst, cppchar_t *cp,
kono
parents: 67
diff changeset
1037 source_range *char_range,
kono
parents: 67
diff changeset
1038 cpp_string_location_reader *loc_reader)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1039 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1040 cppchar_t result, c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1041 unsigned int length;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1042 const uchar *str = *pstr;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1043 const uchar *base = str - 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1044
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1045 if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1046 cpp_error (pfile, CPP_DL_WARNING,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1047 "universal character names are only valid in C++ and C99");
111
kono
parents: 67
diff changeset
1048 else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
kono
parents: 67
diff changeset
1049 && !CPP_OPTION (pfile, cplusplus))
kono
parents: 67
diff changeset
1050 cpp_error (pfile, CPP_DL_WARNING,
kono
parents: 67
diff changeset
1051 "C99's universal character names are incompatible with C90");
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1052 else if (CPP_WTRADITIONAL (pfile) && identifier_pos == 0)
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1053 cpp_warning (pfile, CPP_W_TRADITIONAL,
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1054 "the meaning of '\\%c' is different in traditional C",
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1055 (int) str[-1]);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1056
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1057 if (str[-1] == 'u')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1058 length = 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1059 else if (str[-1] == 'U')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1060 length = 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1061 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1062 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1063 cpp_error (pfile, CPP_DL_ICE, "In _cpp_valid_ucn but not a UCN");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1064 length = 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1065 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1066
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1067 result = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1068 do
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1069 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1070 c = *str;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1071 if (!ISXDIGIT (c))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1072 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1073 str++;
111
kono
parents: 67
diff changeset
1074 if (loc_reader)
kono
parents: 67
diff changeset
1075 {
kono
parents: 67
diff changeset
1076 gcc_assert (char_range);
kono
parents: 67
diff changeset
1077 char_range->m_finish = loc_reader->get_next ().m_finish;
kono
parents: 67
diff changeset
1078 }
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1079 result = (result << 4) + hex_value (c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1080 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1081 while (--length && str < limit);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1082
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1083 /* Partial UCNs are not valid in strings, but decompose into
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1084 multiple tokens in identifiers, so we can't give a helpful
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1085 error message in that case. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1086 if (length && identifier_pos)
111
kono
parents: 67
diff changeset
1087 {
kono
parents: 67
diff changeset
1088 *cp = 0;
kono
parents: 67
diff changeset
1089 return false;
kono
parents: 67
diff changeset
1090 }
kono
parents: 67
diff changeset
1091
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1092 *pstr = str;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1093 if (length)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1094 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1095 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1096 "incomplete universal character name %.*s",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1097 (int) (str - base), base);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1098 result = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1099 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1100 /* The C99 standard permits $, @ and ` to be specified as UCNs. We use
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1101 hex escapes so that this also works with EBCDIC hosts.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1102 C++0x permits everything below 0xa0 within literals;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1103 ucn_valid_in_identifier will complain about identifiers. */
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1104 else if ((result < 0xa0
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1105 && !CPP_OPTION (pfile, cplusplus)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1106 && (result != 0x24 && result != 0x40 && result != 0x60))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1107 || (result & 0x80000000)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1108 || (result >= 0xD800 && result <= 0xDFFF))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1109 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1110 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1111 "%.*s is not a valid universal character",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1112 (int) (str - base), base);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1113 result = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1114 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1115 else if (identifier_pos && result == 0x24
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1116 && CPP_OPTION (pfile, dollars_in_ident))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1117 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1118 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1119 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1120 CPP_OPTION (pfile, warn_dollars) = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1121 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1122 }
111
kono
parents: 67
diff changeset
1123 NORMALIZE_STATE_UPDATE_IDNUM (nst, result);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1124 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1125 else if (identifier_pos)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1126 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1127 int validity = ucn_valid_in_identifier (pfile, result, nst);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1128
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1129 if (validity == 0)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1130 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1131 "universal character %.*s is not valid in an identifier",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1132 (int) (str - base), base);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1133 else if (validity == 2 && identifier_pos == 1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1134 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1135 "universal character %.*s is not valid at the start of an identifier",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1136 (int) (str - base), base);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1137 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1138
111
kono
parents: 67
diff changeset
1139 *cp = result;
kono
parents: 67
diff changeset
1140 return true;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1141 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1142
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1143 /* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
111
kono
parents: 67
diff changeset
1144 it to the execution character set and write the result into TBUF,
kono
parents: 67
diff changeset
1145 if TBUF is non-NULL.
kono
parents: 67
diff changeset
1146 An advanced pointer is returned. Issues all relevant diagnostics.
kono
parents: 67
diff changeset
1147 If LOC_READER is non-NULL, then RANGES must be non-NULL and CHAR_RANGE
kono
parents: 67
diff changeset
1148 contains the location of the character so far: location information
kono
parents: 67
diff changeset
1149 is read from *LOC_READER, and *RANGES is updated accordingly. */
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1150 static const uchar *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1151 convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
111
kono
parents: 67
diff changeset
1152 struct _cpp_strbuf *tbuf, struct cset_converter cvt,
kono
parents: 67
diff changeset
1153 source_range char_range,
kono
parents: 67
diff changeset
1154 cpp_string_location_reader *loc_reader,
kono
parents: 67
diff changeset
1155 cpp_substring_ranges *ranges)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1156 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1157 cppchar_t ucn;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1158 uchar buf[6];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1159 uchar *bufp = buf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1160 size_t bytesleft = 6;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1161 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1162 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1163
111
kono
parents: 67
diff changeset
1164 /* loc_reader and ranges must either be both NULL, or both be non-NULL. */
kono
parents: 67
diff changeset
1165 gcc_assert ((loc_reader != NULL) == (ranges != NULL));
kono
parents: 67
diff changeset
1166
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1167 from++; /* Skip u/U. */
111
kono
parents: 67
diff changeset
1168
kono
parents: 67
diff changeset
1169 if (loc_reader)
kono
parents: 67
diff changeset
1170 /* The u/U is part of the spelling of this character. */
kono
parents: 67
diff changeset
1171 char_range.m_finish = loc_reader->get_next ().m_finish;
kono
parents: 67
diff changeset
1172
kono
parents: 67
diff changeset
1173 _cpp_valid_ucn (pfile, &from, limit, 0, &nst,
kono
parents: 67
diff changeset
1174 &ucn, &char_range, loc_reader);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1175
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1176 rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1177 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1178 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1179 errno = rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1180 cpp_errno (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1181 "converting UCN to source character set");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1182 }
111
kono
parents: 67
diff changeset
1183 else
kono
parents: 67
diff changeset
1184 {
kono
parents: 67
diff changeset
1185 if (tbuf)
kono
parents: 67
diff changeset
1186 if (!APPLY_CONVERSION (cvt, buf, 6 - bytesleft, tbuf))
kono
parents: 67
diff changeset
1187 cpp_errno (pfile, CPP_DL_ERROR,
kono
parents: 67
diff changeset
1188 "converting UCN to execution character set");
kono
parents: 67
diff changeset
1189
kono
parents: 67
diff changeset
1190 if (loc_reader)
kono
parents: 67
diff changeset
1191 {
kono
parents: 67
diff changeset
1192 int num_encoded_bytes = 6 - bytesleft;
kono
parents: 67
diff changeset
1193 for (int i = 0; i < num_encoded_bytes; i++)
kono
parents: 67
diff changeset
1194 ranges->add_range (char_range);
kono
parents: 67
diff changeset
1195 }
kono
parents: 67
diff changeset
1196 }
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1197
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1198 return from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1199 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1200
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1201 /* Subroutine of convert_hex and convert_oct. N is the representation
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1202 in the execution character set of a numeric escape; write it into the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1203 string buffer TBUF and update the end-of-string pointer therein. WIDE
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1204 is true if it's a wide string that's being assembled in TBUF. This
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1205 function issues no diagnostics and never fails. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1206 static void
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1207 emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1208 struct _cpp_strbuf *tbuf, struct cset_converter cvt)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1209 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1210 size_t width = cvt.width;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1211
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1212 if (width != CPP_OPTION (pfile, char_precision))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1213 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1214 /* We have to render this into the target byte order, which may not
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1215 be our byte order. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1216 bool bigend = CPP_OPTION (pfile, bytes_big_endian);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1217 size_t cwidth = CPP_OPTION (pfile, char_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1218 size_t cmask = width_to_mask (cwidth);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1219 size_t nbwc = width / cwidth;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1220 size_t i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1221 size_t off = tbuf->len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1222 cppchar_t c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1223
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1224 if (tbuf->len + nbwc > tbuf->asize)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1225 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1226 tbuf->asize += OUTBUF_BLOCK_SIZE;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1227 tbuf->text = XRESIZEVEC (uchar, tbuf->text, tbuf->asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1228 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1229
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1230 for (i = 0; i < nbwc; i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1231 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1232 c = n & cmask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1233 n >>= cwidth;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1234 tbuf->text[off + (bigend ? nbwc - i - 1 : i)] = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1235 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1236 tbuf->len += nbwc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1237 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1238 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1239 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1240 /* Note: this code does not handle the case where the target
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1241 and host have a different number of bits in a byte. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1242 if (tbuf->len + 1 > tbuf->asize)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1243 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1244 tbuf->asize += OUTBUF_BLOCK_SIZE;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1245 tbuf->text = XRESIZEVEC (uchar, tbuf->text, tbuf->asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1246 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1247 tbuf->text[tbuf->len++] = n;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1248 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1249 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1250
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1251 /* Convert a hexadecimal escape, pointed to by FROM, to the execution
111
kono
parents: 67
diff changeset
1252 character set and write it into the string buffer TBUF (if non-NULL).
kono
parents: 67
diff changeset
1253 Returns an advanced pointer, and issues diagnostics as necessary.
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1254 No character set translation occurs; this routine always produces the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1255 execution-set character with numeric value equal to the given hex
111
kono
parents: 67
diff changeset
1256 number. You can, e.g. generate surrogate pairs this way.
kono
parents: 67
diff changeset
1257 If LOC_READER is non-NULL, then RANGES must be non-NULL and CHAR_RANGE
kono
parents: 67
diff changeset
1258 contains the location of the character so far: location information
kono
parents: 67
diff changeset
1259 is read from *LOC_READER, and *RANGES is updated accordingly. */
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1260 static const uchar *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1261 convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
111
kono
parents: 67
diff changeset
1262 struct _cpp_strbuf *tbuf, struct cset_converter cvt,
kono
parents: 67
diff changeset
1263 source_range char_range,
kono
parents: 67
diff changeset
1264 cpp_string_location_reader *loc_reader,
kono
parents: 67
diff changeset
1265 cpp_substring_ranges *ranges)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1266 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1267 cppchar_t c, n = 0, overflow = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1268 int digits_found = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1269 size_t width = cvt.width;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1270 size_t mask = width_to_mask (width);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1271
111
kono
parents: 67
diff changeset
1272 /* loc_reader and ranges must either be both NULL, or both be non-NULL. */
kono
parents: 67
diff changeset
1273 gcc_assert ((loc_reader != NULL) == (ranges != NULL));
kono
parents: 67
diff changeset
1274
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1275 if (CPP_WTRADITIONAL (pfile))
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1276 cpp_warning (pfile, CPP_W_TRADITIONAL,
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1277 "the meaning of '\\x' is different in traditional C");
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1278
111
kono
parents: 67
diff changeset
1279 /* Skip 'x'. */
kono
parents: 67
diff changeset
1280 from++;
kono
parents: 67
diff changeset
1281
kono
parents: 67
diff changeset
1282 /* The 'x' is part of the spelling of this character. */
kono
parents: 67
diff changeset
1283 if (loc_reader)
kono
parents: 67
diff changeset
1284 char_range.m_finish = loc_reader->get_next ().m_finish;
kono
parents: 67
diff changeset
1285
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1286 while (from < limit)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1287 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1288 c = *from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1289 if (! hex_p (c))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1290 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1291 from++;
111
kono
parents: 67
diff changeset
1292 if (loc_reader)
kono
parents: 67
diff changeset
1293 char_range.m_finish = loc_reader->get_next ().m_finish;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1294 overflow |= n ^ (n << 4 >> 4);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1295 n = (n << 4) + hex_value (c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1296 digits_found = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1297 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1298
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1299 if (!digits_found)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1300 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1301 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1302 "\\x used with no following hex digits");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1303 return from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1304 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1305
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1306 if (overflow | (n != (n & mask)))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1307 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1308 cpp_error (pfile, CPP_DL_PEDWARN,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1309 "hex escape sequence out of range");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1310 n &= mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1311 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1312
111
kono
parents: 67
diff changeset
1313 if (tbuf)
kono
parents: 67
diff changeset
1314 emit_numeric_escape (pfile, n, tbuf, cvt);
kono
parents: 67
diff changeset
1315 if (ranges)
kono
parents: 67
diff changeset
1316 ranges->add_range (char_range);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1317
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1318 return from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1319 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1320
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1321 /* Convert an octal escape, pointed to by FROM, to the execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1322 character set and write it into the string buffer TBUF. Returns an
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1323 advanced pointer, and issues diagnostics as necessary.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1324 No character set translation occurs; this routine always produces the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1325 execution-set character with numeric value equal to the given octal
111
kono
parents: 67
diff changeset
1326 number.
kono
parents: 67
diff changeset
1327 If LOC_READER is non-NULL, then RANGES must be non-NULL and CHAR_RANGE
kono
parents: 67
diff changeset
1328 contains the location of the character so far: location information
kono
parents: 67
diff changeset
1329 is read from *LOC_READER, and *RANGES is updated accordingly. */
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1330 static const uchar *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1331 convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
111
kono
parents: 67
diff changeset
1332 struct _cpp_strbuf *tbuf, struct cset_converter cvt,
kono
parents: 67
diff changeset
1333 source_range char_range,
kono
parents: 67
diff changeset
1334 cpp_string_location_reader *loc_reader,
kono
parents: 67
diff changeset
1335 cpp_substring_ranges *ranges)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1336 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1337 size_t count = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1338 cppchar_t c, n = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1339 size_t width = cvt.width;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1340 size_t mask = width_to_mask (width);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1341 bool overflow = false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1342
111
kono
parents: 67
diff changeset
1343 /* loc_reader and ranges must either be both NULL, or both be non-NULL. */
kono
parents: 67
diff changeset
1344 gcc_assert ((loc_reader != NULL) == (ranges != NULL));
kono
parents: 67
diff changeset
1345
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1346 while (from < limit && count++ < 3)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1347 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1348 c = *from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1349 if (c < '0' || c > '7')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1350 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1351 from++;
111
kono
parents: 67
diff changeset
1352 if (loc_reader)
kono
parents: 67
diff changeset
1353 char_range.m_finish = loc_reader->get_next ().m_finish;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1354 overflow |= n ^ (n << 3 >> 3);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1355 n = (n << 3) + c - '0';
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1356 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1357
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1358 if (n != (n & mask))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1359 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1360 cpp_error (pfile, CPP_DL_PEDWARN,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1361 "octal escape sequence out of range");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1362 n &= mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1363 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1364
111
kono
parents: 67
diff changeset
1365 if (tbuf)
kono
parents: 67
diff changeset
1366 emit_numeric_escape (pfile, n, tbuf, cvt);
kono
parents: 67
diff changeset
1367 if (ranges)
kono
parents: 67
diff changeset
1368 ranges->add_range (char_range);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1369
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1370 return from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1371 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1372
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1373 /* Convert an escape sequence (pointed to by FROM) to its value on
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1374 the target, and to the execution character set. Do not scan past
111
kono
parents: 67
diff changeset
1375 LIMIT. Write the converted value into TBUF, if TBUF is non-NULL.
kono
parents: 67
diff changeset
1376 Returns an advanced pointer. Handles all relevant diagnostics.
kono
parents: 67
diff changeset
1377 If LOC_READER is non-NULL, then RANGES must be non-NULL: location
kono
parents: 67
diff changeset
1378 information is read from *LOC_READER, and *RANGES is updated
kono
parents: 67
diff changeset
1379 accordingly. */
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1380 static const uchar *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1381 convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
111
kono
parents: 67
diff changeset
1382 struct _cpp_strbuf *tbuf, struct cset_converter cvt,
kono
parents: 67
diff changeset
1383 cpp_string_location_reader *loc_reader,
kono
parents: 67
diff changeset
1384 cpp_substring_ranges *ranges)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1385 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1386 /* Values of \a \b \e \f \n \r \t \v respectively. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1387 #if HOST_CHARSET == HOST_CHARSET_ASCII
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1388 static const uchar charconsts[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1389 #elif HOST_CHARSET == HOST_CHARSET_EBCDIC
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1390 static const uchar charconsts[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1391 #else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1392 #error "unknown host character set"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1393 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1394
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1395 uchar c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1396
111
kono
parents: 67
diff changeset
1397 /* Record the location of the backslash. */
kono
parents: 67
diff changeset
1398 source_range char_range;
kono
parents: 67
diff changeset
1399 if (loc_reader)
kono
parents: 67
diff changeset
1400 char_range = loc_reader->get_next ();
kono
parents: 67
diff changeset
1401
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1402 c = *from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1403 switch (c)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1404 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1405 /* UCNs, hex escapes, and octal escapes are processed separately. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1406 case 'u': case 'U':
111
kono
parents: 67
diff changeset
1407 return convert_ucn (pfile, from, limit, tbuf, cvt,
kono
parents: 67
diff changeset
1408 char_range, loc_reader, ranges);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1409
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1410 case 'x':
111
kono
parents: 67
diff changeset
1411 return convert_hex (pfile, from, limit, tbuf, cvt,
kono
parents: 67
diff changeset
1412 char_range, loc_reader, ranges);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1413 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1414
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1415 case '0': case '1': case '2': case '3':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1416 case '4': case '5': case '6': case '7':
111
kono
parents: 67
diff changeset
1417 return convert_oct (pfile, from, limit, tbuf, cvt,
kono
parents: 67
diff changeset
1418 char_range, loc_reader, ranges);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1419
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1420 /* Various letter escapes. Get the appropriate host-charset
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1421 value into C. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1422 case '\\': case '\'': case '"': case '?': break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1423
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1424 case '(': case '{': case '[': case '%':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1425 /* '\(', etc, can be used at the beginning of a line in a long
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1426 string split onto multiple lines with \-newline, to prevent
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1427 Emacs or other text editors from getting confused. '\%' can
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1428 be used to prevent SCCS from mangling printf format strings. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1429 if (CPP_PEDANTIC (pfile))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1430 goto unknown;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1431 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1432
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1433 case 'b': c = charconsts[1]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1434 case 'f': c = charconsts[3]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1435 case 'n': c = charconsts[4]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1436 case 'r': c = charconsts[5]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1437 case 't': c = charconsts[6]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1438 case 'v': c = charconsts[7]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1439
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1440 case 'a':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1441 if (CPP_WTRADITIONAL (pfile))
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1442 cpp_warning (pfile, CPP_W_TRADITIONAL,
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1443 "the meaning of '\\a' is different in traditional C");
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1444 c = charconsts[0];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1445 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1446
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1447 case 'e': case 'E':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1448 if (CPP_PEDANTIC (pfile))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1449 cpp_error (pfile, CPP_DL_PEDWARN,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1450 "non-ISO-standard escape sequence, '\\%c'", (int) c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1451 c = charconsts[2];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1452 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1453
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1454 default:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1455 unknown:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1456 if (ISGRAPH (c))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1457 cpp_error (pfile, CPP_DL_PEDWARN,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1458 "unknown escape sequence: '\\%c'", (int) c);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1459 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1460 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1461 /* diagnostic.c does not support "%03o". When it does, this
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1462 code can use %03o directly in the diagnostic again. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1463 char buf[32];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1464 sprintf(buf, "%03o", (int) c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1465 cpp_error (pfile, CPP_DL_PEDWARN,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1466 "unknown escape sequence: '\\%s'", buf);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1467 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1468 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1469
111
kono
parents: 67
diff changeset
1470 if (tbuf)
kono
parents: 67
diff changeset
1471 /* Now convert what we have to the execution character set. */
kono
parents: 67
diff changeset
1472 if (!APPLY_CONVERSION (cvt, &c, 1, tbuf))
kono
parents: 67
diff changeset
1473 cpp_errno (pfile, CPP_DL_ERROR,
kono
parents: 67
diff changeset
1474 "converting escape sequence to execution character set");
kono
parents: 67
diff changeset
1475
kono
parents: 67
diff changeset
1476 if (loc_reader)
kono
parents: 67
diff changeset
1477 {
kono
parents: 67
diff changeset
1478 char_range.m_finish = loc_reader->get_next ().m_finish;
kono
parents: 67
diff changeset
1479 ranges->add_range (char_range);
kono
parents: 67
diff changeset
1480 }
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1481
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1482 return from + 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1483 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1484
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1485 /* TYPE is a token type. The return value is the conversion needed to
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1486 convert from source to execution character set for the given type. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1487 static struct cset_converter
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1488 converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1489 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1490 switch (type)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1491 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1492 default:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1493 return pfile->narrow_cset_desc;
111
kono
parents: 67
diff changeset
1494 case CPP_UTF8CHAR:
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1495 case CPP_UTF8STRING:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1496 return pfile->utf8_cset_desc;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1497 case CPP_CHAR16:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1498 case CPP_STRING16:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1499 return pfile->char16_cset_desc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1500 case CPP_CHAR32:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1501 case CPP_STRING32:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1502 return pfile->char32_cset_desc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1503 case CPP_WCHAR:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1504 case CPP_WSTRING:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1505 return pfile->wide_cset_desc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1506 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1507 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1508
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1509 /* FROM is an array of cpp_string structures of length COUNT. These
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1510 are to be converted from the source to the execution character set,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1511 escape sequences translated, and finally all are to be
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1512 concatenated. WIDE indicates whether or not to produce a wide
111
kono
parents: 67
diff changeset
1513 string. If TO is non-NULL, the result is written into TO.
kono
parents: 67
diff changeset
1514 If LOC_READERS and OUT are non-NULL, then location information
kono
parents: 67
diff changeset
1515 is read from LOC_READERS (which must be an array of length COUNT),
kono
parents: 67
diff changeset
1516 and location information is written to *RANGES.
kono
parents: 67
diff changeset
1517
kono
parents: 67
diff changeset
1518 Returns true for success, false for failure. */
kono
parents: 67
diff changeset
1519
kono
parents: 67
diff changeset
1520 static bool
kono
parents: 67
diff changeset
1521 cpp_interpret_string_1 (cpp_reader *pfile, const cpp_string *from, size_t count,
kono
parents: 67
diff changeset
1522 cpp_string *to, enum cpp_ttype type,
kono
parents: 67
diff changeset
1523 cpp_string_location_reader *loc_readers,
kono
parents: 67
diff changeset
1524 cpp_substring_ranges *out)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1525 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1526 struct _cpp_strbuf tbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1527 const uchar *p, *base, *limit;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1528 size_t i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1529 struct cset_converter cvt = converter_for_type (pfile, type);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1530
111
kono
parents: 67
diff changeset
1531 /* loc_readers and out must either be both NULL, or both be non-NULL. */
kono
parents: 67
diff changeset
1532 gcc_assert ((loc_readers != NULL) == (out != NULL));
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1533
111
kono
parents: 67
diff changeset
1534 if (to)
kono
parents: 67
diff changeset
1535 {
kono
parents: 67
diff changeset
1536 tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
kono
parents: 67
diff changeset
1537 tbuf.text = XNEWVEC (uchar, tbuf.asize);
kono
parents: 67
diff changeset
1538 tbuf.len = 0;
kono
parents: 67
diff changeset
1539 }
kono
parents: 67
diff changeset
1540
kono
parents: 67
diff changeset
1541 cpp_string_location_reader *loc_reader = NULL;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1542 for (i = 0; i < count; i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1543 {
111
kono
parents: 67
diff changeset
1544 if (loc_readers)
kono
parents: 67
diff changeset
1545 loc_reader = &loc_readers[i];
kono
parents: 67
diff changeset
1546
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1547 p = from[i].text;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1548 if (*p == 'u')
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1549 {
111
kono
parents: 67
diff changeset
1550 p++;
kono
parents: 67
diff changeset
1551 if (loc_reader)
kono
parents: 67
diff changeset
1552 loc_reader->get_next ();
kono
parents: 67
diff changeset
1553 if (*p == '8')
kono
parents: 67
diff changeset
1554 {
kono
parents: 67
diff changeset
1555 p++;
kono
parents: 67
diff changeset
1556 if (loc_reader)
kono
parents: 67
diff changeset
1557 loc_reader->get_next ();
kono
parents: 67
diff changeset
1558 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1559 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1560 else if (*p == 'L' || *p == 'U') p++;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1561 if (*p == 'R')
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1562 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1563 const uchar *prefix;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1564
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1565 /* Skip over 'R"'. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1566 p += 2;
111
kono
parents: 67
diff changeset
1567 if (loc_reader)
kono
parents: 67
diff changeset
1568 {
kono
parents: 67
diff changeset
1569 loc_reader->get_next ();
kono
parents: 67
diff changeset
1570 loc_reader->get_next ();
kono
parents: 67
diff changeset
1571 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1572 prefix = p;
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1573 while (*p != '(')
111
kono
parents: 67
diff changeset
1574 {
kono
parents: 67
diff changeset
1575 p++;
kono
parents: 67
diff changeset
1576 if (loc_reader)
kono
parents: 67
diff changeset
1577 loc_reader->get_next ();
kono
parents: 67
diff changeset
1578 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1579 p++;
111
kono
parents: 67
diff changeset
1580 if (loc_reader)
kono
parents: 67
diff changeset
1581 loc_reader->get_next ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1582 limit = from[i].text + from[i].len;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1583 if (limit >= p + (p - prefix) + 1)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1584 limit -= (p - prefix) + 1;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1585
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1586 /* Raw strings are all normal characters; these can be fed
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1587 directly to convert_cset. */
111
kono
parents: 67
diff changeset
1588 if (to)
kono
parents: 67
diff changeset
1589 if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))
kono
parents: 67
diff changeset
1590 goto fail;
kono
parents: 67
diff changeset
1591
kono
parents: 67
diff changeset
1592 if (loc_reader)
kono
parents: 67
diff changeset
1593 {
kono
parents: 67
diff changeset
1594 /* If generating source ranges, assume we have a 1:1
kono
parents: 67
diff changeset
1595 correspondence between bytes in the source encoding and bytes
kono
parents: 67
diff changeset
1596 in the execution encoding (e.g. if we have a UTF-8 to UTF-8
kono
parents: 67
diff changeset
1597 conversion), so that this run of bytes in the source file
kono
parents: 67
diff changeset
1598 corresponds to a run of bytes in the execution string.
kono
parents: 67
diff changeset
1599 This requirement is guaranteed by an early-reject in
kono
parents: 67
diff changeset
1600 cpp_interpret_string_ranges. */
kono
parents: 67
diff changeset
1601 gcc_assert (cvt.func == convert_no_conversion);
kono
parents: 67
diff changeset
1602 out->add_n_ranges (limit - p, *loc_reader);
kono
parents: 67
diff changeset
1603 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1604
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1605 continue;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1606 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1607
111
kono
parents: 67
diff changeset
1608 /* If we don't now have a leading quote, something has gone wrong.
kono
parents: 67
diff changeset
1609 This can occur if cpp_interpret_string_ranges is handling a
kono
parents: 67
diff changeset
1610 stringified macro argument, but should not be possible otherwise. */
kono
parents: 67
diff changeset
1611 if (*p != '"' && *p != '\'')
kono
parents: 67
diff changeset
1612 {
kono
parents: 67
diff changeset
1613 gcc_assert (out != NULL);
kono
parents: 67
diff changeset
1614 cpp_error (pfile, CPP_DL_ERROR, "missing open quote");
kono
parents: 67
diff changeset
1615 if (to)
kono
parents: 67
diff changeset
1616 free (tbuf.text);
kono
parents: 67
diff changeset
1617 return false;
kono
parents: 67
diff changeset
1618 }
kono
parents: 67
diff changeset
1619
kono
parents: 67
diff changeset
1620 /* Skip leading quote. */
kono
parents: 67
diff changeset
1621 p++;
kono
parents: 67
diff changeset
1622 if (loc_reader)
kono
parents: 67
diff changeset
1623 loc_reader->get_next ();
kono
parents: 67
diff changeset
1624
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1625 limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1626
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1627 for (;;)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1628 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1629 base = p;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1630 while (p < limit && *p != '\\')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1631 p++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1632 if (p > base)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1633 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1634 /* We have a run of normal characters; these can be fed
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1635 directly to convert_cset. */
111
kono
parents: 67
diff changeset
1636 if (to)
kono
parents: 67
diff changeset
1637 if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
kono
parents: 67
diff changeset
1638 goto fail;
kono
parents: 67
diff changeset
1639 /* Similar to above: assumes we have a 1:1 correspondence
kono
parents: 67
diff changeset
1640 between bytes in the source encoding and bytes in the
kono
parents: 67
diff changeset
1641 execution encoding. */
kono
parents: 67
diff changeset
1642 if (loc_reader)
kono
parents: 67
diff changeset
1643 {
kono
parents: 67
diff changeset
1644 gcc_assert (cvt.func == convert_no_conversion);
kono
parents: 67
diff changeset
1645 out->add_n_ranges (p - base, *loc_reader);
kono
parents: 67
diff changeset
1646 }
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1647 }
111
kono
parents: 67
diff changeset
1648 if (p >= limit)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1649 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1650
111
kono
parents: 67
diff changeset
1651 struct _cpp_strbuf *tbuf_ptr = to ? &tbuf : NULL;
kono
parents: 67
diff changeset
1652 p = convert_escape (pfile, p + 1, limit, tbuf_ptr, cvt,
kono
parents: 67
diff changeset
1653 loc_reader, out);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1654 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1655 }
111
kono
parents: 67
diff changeset
1656
kono
parents: 67
diff changeset
1657 if (to)
kono
parents: 67
diff changeset
1658 {
kono
parents: 67
diff changeset
1659 /* NUL-terminate the 'to' buffer and translate it to a cpp_string
kono
parents: 67
diff changeset
1660 structure. */
kono
parents: 67
diff changeset
1661 emit_numeric_escape (pfile, 0, &tbuf, cvt);
kono
parents: 67
diff changeset
1662 tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len);
kono
parents: 67
diff changeset
1663 to->text = tbuf.text;
kono
parents: 67
diff changeset
1664 to->len = tbuf.len;
kono
parents: 67
diff changeset
1665 }
kono
parents: 67
diff changeset
1666 /* Use the location of the trailing quote as the location of the
kono
parents: 67
diff changeset
1667 NUL-terminator. */
kono
parents: 67
diff changeset
1668 if (loc_reader)
kono
parents: 67
diff changeset
1669 {
kono
parents: 67
diff changeset
1670 source_range range = loc_reader->get_next ();
kono
parents: 67
diff changeset
1671 out->add_range (range);
kono
parents: 67
diff changeset
1672 }
kono
parents: 67
diff changeset
1673
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1674 return true;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1675
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1676 fail:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1677 cpp_errno (pfile, CPP_DL_ERROR, "converting to execution character set");
111
kono
parents: 67
diff changeset
1678 if (to)
kono
parents: 67
diff changeset
1679 free (tbuf.text);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1680 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1681 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1682
111
kono
parents: 67
diff changeset
1683 /* FROM is an array of cpp_string structures of length COUNT. These
kono
parents: 67
diff changeset
1684 are to be converted from the source to the execution character set,
kono
parents: 67
diff changeset
1685 escape sequences translated, and finally all are to be
kono
parents: 67
diff changeset
1686 concatenated. WIDE indicates whether or not to produce a wide
kono
parents: 67
diff changeset
1687 string. The result is written into TO. Returns true for success,
kono
parents: 67
diff changeset
1688 false for failure. */
kono
parents: 67
diff changeset
1689 bool
kono
parents: 67
diff changeset
1690 cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
kono
parents: 67
diff changeset
1691 cpp_string *to, enum cpp_ttype type)
kono
parents: 67
diff changeset
1692 {
kono
parents: 67
diff changeset
1693 return cpp_interpret_string_1 (pfile, from, count, to, type, NULL, NULL);
kono
parents: 67
diff changeset
1694 }
kono
parents: 67
diff changeset
1695
kono
parents: 67
diff changeset
1696 /* A "do nothing" error-handling callback for use by
kono
parents: 67
diff changeset
1697 cpp_interpret_string_ranges, so that it can temporarily suppress
kono
parents: 67
diff changeset
1698 error-handling. */
kono
parents: 67
diff changeset
1699
kono
parents: 67
diff changeset
1700 static bool
kono
parents: 67
diff changeset
1701 noop_error_cb (cpp_reader *, int, int, rich_location *,
kono
parents: 67
diff changeset
1702 const char *, va_list *)
kono
parents: 67
diff changeset
1703 {
kono
parents: 67
diff changeset
1704 /* no-op. */
kono
parents: 67
diff changeset
1705 return true;
kono
parents: 67
diff changeset
1706 }
kono
parents: 67
diff changeset
1707
kono
parents: 67
diff changeset
1708 /* This function mimics the behavior of cpp_interpret_string, but
kono
parents: 67
diff changeset
1709 rather than generating a string in the execution character set,
kono
parents: 67
diff changeset
1710 *OUT is written to with the source code ranges of the characters
kono
parents: 67
diff changeset
1711 in such a string.
kono
parents: 67
diff changeset
1712 FROM and LOC_READERS should both be arrays of length COUNT.
kono
parents: 67
diff changeset
1713 Returns NULL for success, or an error message for failure. */
kono
parents: 67
diff changeset
1714
kono
parents: 67
diff changeset
1715 const char *
kono
parents: 67
diff changeset
1716 cpp_interpret_string_ranges (cpp_reader *pfile, const cpp_string *from,
kono
parents: 67
diff changeset
1717 cpp_string_location_reader *loc_readers,
kono
parents: 67
diff changeset
1718 size_t count,
kono
parents: 67
diff changeset
1719 cpp_substring_ranges *out,
kono
parents: 67
diff changeset
1720 enum cpp_ttype type)
kono
parents: 67
diff changeset
1721 {
kono
parents: 67
diff changeset
1722 /* There are a couple of cases in the range-handling in
kono
parents: 67
diff changeset
1723 cpp_interpret_string_1 that rely on there being a 1:1 correspondence
kono
parents: 67
diff changeset
1724 between bytes in the source encoding and bytes in the execution
kono
parents: 67
diff changeset
1725 encoding, so that each byte in the execution string can correspond
kono
parents: 67
diff changeset
1726 to the location of a byte in the source string.
kono
parents: 67
diff changeset
1727
kono
parents: 67
diff changeset
1728 This holds for the typical case of a UTF-8 to UTF-8 conversion.
kono
parents: 67
diff changeset
1729 Enforce this requirement by only attempting to track substring
kono
parents: 67
diff changeset
1730 locations if we have source encoding == execution encoding.
kono
parents: 67
diff changeset
1731
kono
parents: 67
diff changeset
1732 This is a stronger condition than we need, since we could e.g.
kono
parents: 67
diff changeset
1733 have ASCII to EBCDIC (with 1 byte per character before and after),
kono
parents: 67
diff changeset
1734 but it seems to be a reasonable restriction. */
kono
parents: 67
diff changeset
1735 struct cset_converter cvt = converter_for_type (pfile, type);
kono
parents: 67
diff changeset
1736 if (cvt.func != convert_no_conversion)
kono
parents: 67
diff changeset
1737 return "execution character set != source character set";
kono
parents: 67
diff changeset
1738
kono
parents: 67
diff changeset
1739 /* For on-demand strings we have already lexed the strings, so there
kono
parents: 67
diff changeset
1740 should be no errors. However, if we have bogus source location
kono
parents: 67
diff changeset
1741 data (or stringified macro arguments), the attempt to lex the
kono
parents: 67
diff changeset
1742 strings could fail with an error. Temporarily install an
kono
parents: 67
diff changeset
1743 error-handler to catch the error, so that it can lead to this call
kono
parents: 67
diff changeset
1744 failing, rather than being emitted as a user-visible diagnostic.
kono
parents: 67
diff changeset
1745 If an error does occur, we should see it via the return value of
kono
parents: 67
diff changeset
1746 cpp_interpret_string_1. */
kono
parents: 67
diff changeset
1747 bool (*saved_error_handler) (cpp_reader *, int, int, rich_location *,
kono
parents: 67
diff changeset
1748 const char *, va_list *)
kono
parents: 67
diff changeset
1749 ATTRIBUTE_FPTR_PRINTF(5,0);
kono
parents: 67
diff changeset
1750
kono
parents: 67
diff changeset
1751 saved_error_handler = pfile->cb.error;
kono
parents: 67
diff changeset
1752 pfile->cb.error = noop_error_cb;
kono
parents: 67
diff changeset
1753
kono
parents: 67
diff changeset
1754 bool result = cpp_interpret_string_1 (pfile, from, count, NULL, type,
kono
parents: 67
diff changeset
1755 loc_readers, out);
kono
parents: 67
diff changeset
1756
kono
parents: 67
diff changeset
1757 /* Restore the saved error-handler. */
kono
parents: 67
diff changeset
1758 pfile->cb.error = saved_error_handler;
kono
parents: 67
diff changeset
1759
kono
parents: 67
diff changeset
1760 if (!result)
kono
parents: 67
diff changeset
1761 return "cpp_interpret_string_1 failed";
kono
parents: 67
diff changeset
1762
kono
parents: 67
diff changeset
1763 /* Success. */
kono
parents: 67
diff changeset
1764 return NULL;
kono
parents: 67
diff changeset
1765 }
kono
parents: 67
diff changeset
1766
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1767 /* Subroutine of do_line and do_linemarker. Convert escape sequences
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1768 in a string, but do not perform character set conversion. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1769 bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1770 cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1771 size_t count, cpp_string *to,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1772 enum cpp_ttype type ATTRIBUTE_UNUSED)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1773 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1774 struct cset_converter save_narrow_cset_desc = pfile->narrow_cset_desc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1775 bool retval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1776
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1777 pfile->narrow_cset_desc.func = convert_no_conversion;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1778 pfile->narrow_cset_desc.cd = (iconv_t) -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1779 pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1780
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1781 retval = cpp_interpret_string (pfile, from, count, to, CPP_STRING);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1782
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1783 pfile->narrow_cset_desc = save_narrow_cset_desc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1784 return retval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1785 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1786
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1787
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1788 /* Subroutine of cpp_interpret_charconst which performs the conversion
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1789 to a number, for narrow strings. STR is the string structure returned
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1790 by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1791 cpp_interpret_charconst. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1792 static cppchar_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1793 narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1794 unsigned int *pchars_seen, int *unsignedp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1795 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1796 size_t width = CPP_OPTION (pfile, char_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1797 size_t max_chars = CPP_OPTION (pfile, int_precision) / width;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1798 size_t mask = width_to_mask (width);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1799 size_t i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1800 cppchar_t result, c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1801 bool unsigned_p;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1802
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1803 /* The value of a multi-character character constant, or a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1804 single-character character constant whose representation in the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1805 execution character set is more than one byte long, is
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1806 implementation defined. This implementation defines it to be the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1807 number formed by interpreting the byte sequence in memory as a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1808 big-endian binary number. If overflow occurs, the high bytes are
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1809 lost, and a warning is issued.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1810
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1811 We don't want to process the NUL terminator handed back by
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1812 cpp_interpret_string. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1813 result = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1814 for (i = 0; i < str.len - 1; i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1815 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1816 c = str.text[i] & mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1817 if (width < BITS_PER_CPPCHAR_T)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1818 result = (result << width) | c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1819 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1820 result = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1821 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1822
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1823 if (i > max_chars)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1824 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1825 i = max_chars;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1826 cpp_error (pfile, CPP_DL_WARNING,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1827 "character constant too long for its type");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1828 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1829 else if (i > 1 && CPP_OPTION (pfile, warn_multichar))
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1830 cpp_warning (pfile, CPP_W_MULTICHAR, "multi-character character constant");
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1831
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1832 /* Multichar constants are of type int and therefore signed. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1833 if (i > 1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1834 unsigned_p = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1835 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1836 unsigned_p = CPP_OPTION (pfile, unsigned_char);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1837
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1838 /* Truncate the constant to its natural width, and simultaneously
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1839 sign- or zero-extend to the full width of cppchar_t.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1840 For single-character constants, the value is WIDTH bits wide.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1841 For multi-character constants, the value is INT_PRECISION bits wide. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1842 if (i > 1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1843 width = CPP_OPTION (pfile, int_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1844 if (width < BITS_PER_CPPCHAR_T)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1845 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1846 mask = ((cppchar_t) 1 << width) - 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1847 if (unsigned_p || !(result & (1 << (width - 1))))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1848 result &= mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1849 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1850 result |= ~mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1851 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1852 *pchars_seen = i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1853 *unsignedp = unsigned_p;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1854 return result;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1855 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1856
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1857 /* Subroutine of cpp_interpret_charconst which performs the conversion
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1858 to a number, for wide strings. STR is the string structure returned
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1859 by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1860 cpp_interpret_charconst. TYPE is the token type. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1861 static cppchar_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1862 wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1863 unsigned int *pchars_seen, int *unsignedp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1864 enum cpp_ttype type)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1865 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1866 bool bigend = CPP_OPTION (pfile, bytes_big_endian);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1867 size_t width = converter_for_type (pfile, type).width;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1868 size_t cwidth = CPP_OPTION (pfile, char_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1869 size_t mask = width_to_mask (width);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1870 size_t cmask = width_to_mask (cwidth);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1871 size_t nbwc = width / cwidth;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1872 size_t off, i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1873 cppchar_t result = 0, c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1874
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1875 /* This is finicky because the string is in the target's byte order,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1876 which may not be our byte order. Only the last character, ignoring
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1877 the NUL terminator, is relevant. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1878 off = str.len - (nbwc * 2);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1879 result = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1880 for (i = 0; i < nbwc; i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1881 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1882 c = bigend ? str.text[off + i] : str.text[off + nbwc - i - 1];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1883 result = (result << cwidth) | (c & cmask);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1884 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1885
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1886 /* Wide character constants have type wchar_t, and a single
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1887 character exactly fills a wchar_t, so a multi-character wide
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1888 character constant is guaranteed to overflow. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1889 if (str.len > nbwc * 2)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1890 cpp_error (pfile, CPP_DL_WARNING,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1891 "character constant too long for its type");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1892
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1893 /* Truncate the constant to its natural width, and simultaneously
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1894 sign- or zero-extend to the full width of cppchar_t. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1895 if (width < BITS_PER_CPPCHAR_T)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1896 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1897 if (type == CPP_CHAR16 || type == CPP_CHAR32
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1898 || CPP_OPTION (pfile, unsigned_wchar)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1899 || !(result & (1 << (width - 1))))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1900 result &= mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1901 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1902 result |= ~mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1903 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1904
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1905 if (type == CPP_CHAR16 || type == CPP_CHAR32
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1906 || CPP_OPTION (pfile, unsigned_wchar))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1907 *unsignedp = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1908 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1909 *unsignedp = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1910
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1911 *pchars_seen = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1912 return result;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1913 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1914
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1915 /* Interpret a (possibly wide) character constant in TOKEN.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1916 PCHARS_SEEN points to a variable that is filled in with the number
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1917 of characters seen, and UNSIGNEDP to a variable that indicates
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1918 whether the result has signed type. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1919 cppchar_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1920 cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1921 unsigned int *pchars_seen, int *unsignedp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1922 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1923 cpp_string str = { 0, 0 };
111
kono
parents: 67
diff changeset
1924 bool wide = (token->type != CPP_CHAR && token->type != CPP_UTF8CHAR);
kono
parents: 67
diff changeset
1925 int u8 = 2 * int(token->type == CPP_UTF8CHAR);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1926 cppchar_t result;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1927
111
kono
parents: 67
diff changeset
1928 /* An empty constant will appear as L'', u'', U'', u8'', or '' */
kono
parents: 67
diff changeset
1929 if (token->val.str.len == (size_t) (2 + wide + u8))
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1930 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1931 cpp_error (pfile, CPP_DL_ERROR, "empty character constant");
111
kono
parents: 67
diff changeset
1932 *pchars_seen = 0;
kono
parents: 67
diff changeset
1933 *unsignedp = 0;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1934 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1935 }
111
kono
parents: 67
diff changeset
1936 else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str,
kono
parents: 67
diff changeset
1937 token->type))
kono
parents: 67
diff changeset
1938 {
kono
parents: 67
diff changeset
1939 *pchars_seen = 0;
kono
parents: 67
diff changeset
1940 *unsignedp = 0;
kono
parents: 67
diff changeset
1941 return 0;
kono
parents: 67
diff changeset
1942 }
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1943
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1944 if (wide)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1945 result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1946 token->type);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1947 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1948 result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1949
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1950 if (str.text != token->val.str.text)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1951 free ((void *)str.text);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1952
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1953 return result;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1954 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1955
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1956 /* Convert an identifier denoted by ID and LEN, which might contain
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1957 UCN escapes, to the source character set, either UTF-8 or
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1958 UTF-EBCDIC. Assumes that the identifier is actually a valid identifier. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1959 cpp_hashnode *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1960 _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1961 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1962 /* It turns out that a UCN escape always turns into fewer characters
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1963 than the escape itself, so we can allocate a temporary in advance. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1964 uchar * buf = (uchar *) alloca (len + 1);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1965 uchar * bufp = buf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1966 size_t idp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1967
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1968 for (idp = 0; idp < len; idp++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1969 if (id[idp] != '\\')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1970 *bufp++ = id[idp];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1971 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1972 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1973 unsigned length = id[idp+1] == 'u' ? 4 : 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1974 cppchar_t value = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1975 size_t bufleft = len - (bufp - buf);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1976 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1977
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1978 idp += 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1979 while (length && idp < len && ISXDIGIT (id[idp]))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1980 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1981 value = (value << 4) + hex_value (id[idp]);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1982 idp++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1983 length--;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1984 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1985 idp--;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1986
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1987 /* Special case for EBCDIC: if the identifier contains
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1988 a '$' specified using a UCN, translate it to EBCDIC. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1989 if (value == 0x24)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1990 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1991 *bufp++ = '$';
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1992 continue;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1993 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1994
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1995 rval = one_cppchar_to_utf8 (value, &bufp, &bufleft);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1996 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1997 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1998 errno = rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1999 cpp_errno (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2000 "converting UCN to source character set");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2001 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2002 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2003 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2004
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2005 return CPP_HASHNODE (ht_lookup (pfile->hash_table,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2006 buf, bufp - buf, HT_ALLOC));
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2007 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2008
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2009 /* Convert an input buffer (containing the complete contents of one
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2010 source file) from INPUT_CHARSET to the source character set. INPUT
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2011 points to the input buffer, SIZE is its allocated size, and LEN is
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2012 the length of the meaningful data within the buffer. The
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2013 translated buffer is returned, *ST_SIZE is set to the length of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2014 the meaningful data within the translated buffer, and *BUFFER_START
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2015 is set to the start of the returned buffer. *BUFFER_START may
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2016 differ from the return value in the case of a BOM or other ignored
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2017 marker information.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2018
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2019 INPUT is expected to have been allocated with xmalloc. This
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2020 function will either set *BUFFER_START to INPUT, or free it and set
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2021 *BUFFER_START to a pointer to another xmalloc-allocated block of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2022 memory. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2023 uchar *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2024 _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2025 uchar *input, size_t size, size_t len,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2026 const unsigned char **buffer_start, off_t *st_size)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2027 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2028 struct cset_converter input_cset;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2029 struct _cpp_strbuf to;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2030 unsigned char *buffer;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2031
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2032 input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2033 if (input_cset.func == convert_no_conversion)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2034 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2035 to.text = input;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2036 to.asize = size;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2037 to.len = len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2038 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2039 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2040 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2041 to.asize = MAX (65536, len);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2042 to.text = XNEWVEC (uchar, to.asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2043 to.len = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2044
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2045 if (!APPLY_CONVERSION (input_cset, input, len, &to))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2046 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2047 "failure to convert %s to %s",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2048 CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2049
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2050 free (input);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2051 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2052
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2053 /* Clean up the mess. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2054 if (input_cset.func == convert_using_iconv)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2055 iconv_close (input_cset.cd);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2056
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2057 /* Resize buffer if we allocated substantially too much, or if we
111
kono
parents: 67
diff changeset
2058 haven't enough space for the \n-terminator or following
kono
parents: 67
diff changeset
2059 15 bytes of padding (used to quiet warnings from valgrind or
kono
parents: 67
diff changeset
2060 Address Sanitizer, when the optimized lexer accesses aligned
kono
parents: 67
diff changeset
2061 16-byte memory chunks, including the bytes after the malloced,
kono
parents: 67
diff changeset
2062 area, and stops lexing on '\n'). */
kono
parents: 67
diff changeset
2063 if (to.len + 4096 < to.asize || to.len + 16 > to.asize)
kono
parents: 67
diff changeset
2064 to.text = XRESIZEVEC (uchar, to.text, to.len + 16);
kono
parents: 67
diff changeset
2065
kono
parents: 67
diff changeset
2066 memset (to.text + to.len, '\0', 16);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2067
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2068 /* If the file is using old-school Mac line endings (\r only),
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2069 terminate with another \r, not an \n, so that we do not mistake
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2070 the \r\n sequence for a single DOS line ending and erroneously
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2071 issue the "No newline at end of file" diagnostic. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2072 if (to.len && to.text[to.len - 1] == '\r')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2073 to.text[to.len] = '\r';
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2074 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2075 to.text[to.len] = '\n';
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2076
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2077 buffer = to.text;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2078 *st_size = to.len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2079 #if HOST_CHARSET == HOST_CHARSET_ASCII
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2080 /* The HOST_CHARSET test just above ensures that the source charset
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2081 is UTF-8. So, ignore a UTF-8 BOM if we see one. Note that
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2082 glib'c UTF-8 iconv() provider (as of glibc 2.7) does not ignore a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2083 BOM -- however, even if it did, we would still need this code due
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2084 to the 'convert_no_conversion' case. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2085 if (to.len >= 3 && to.text[0] == 0xef && to.text[1] == 0xbb
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2086 && to.text[2] == 0xbf)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2087 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2088 *st_size -= 3;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2089 buffer += 3;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2090 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2091 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2092
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2093 *buffer_start = to.text;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2094 return buffer;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2095 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2096
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2097 /* Decide on the default encoding to assume for input files. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2098 const char *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2099 _cpp_default_encoding (void)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2100 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2101 const char *current_encoding = NULL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2102
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2103 /* We disable this because the default codeset is 7-bit ASCII on
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2104 most platforms, and this causes conversion failures on every
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2105 file in GCC that happens to have one of the upper 128 characters
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2106 in it -- most likely, as part of the name of a contributor.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2107 We should definitely recognize in-band markers of file encoding,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2108 like:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2109 - the appropriate Unicode byte-order mark (FE FF) to recognize
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2110 UTF16 and UCS4 (in both big-endian and little-endian flavors)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2111 and UTF8
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2112 - a "#i", "#d", "/ *", "//", " #p" or "#p" (for #pragma) to
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2113 distinguish ASCII and EBCDIC.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2114 - now we can parse something like "#pragma GCC encoding <xyz>
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2115 on the first line, or even Emacs/VIM's mode line tags (there's
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2116 a problem here in that VIM uses the last line, and Emacs has
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2117 its more elaborate "local variables" convention).
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2118 - investigate whether Java has another common convention, which
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2119 would be friendly to support.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2120 (Zack Weinberg and Paolo Bonzini, May 20th 2004) */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2121 #if defined (HAVE_LOCALE_H) && defined (HAVE_LANGINFO_CODESET) && 0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2122 setlocale (LC_CTYPE, "");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2123 current_encoding = nl_langinfo (CODESET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2124 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2125 if (current_encoding == NULL || *current_encoding == '\0')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2126 current_encoding = SOURCE_CHARSET;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2127
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2128 return current_encoding;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2129 }
111
kono
parents: 67
diff changeset
2130
kono
parents: 67
diff changeset
2131 /* Implementation of class cpp_string_location_reader. */
kono
parents: 67
diff changeset
2132
kono
parents: 67
diff changeset
2133 /* Constructor for cpp_string_location_reader. */
kono
parents: 67
diff changeset
2134
kono
parents: 67
diff changeset
2135 cpp_string_location_reader::
kono
parents: 67
diff changeset
2136 cpp_string_location_reader (source_location src_loc,
kono
parents: 67
diff changeset
2137 line_maps *line_table)
kono
parents: 67
diff changeset
2138 : m_line_table (line_table)
kono
parents: 67
diff changeset
2139 {
kono
parents: 67
diff changeset
2140 src_loc = get_range_from_loc (line_table, src_loc).m_start;
kono
parents: 67
diff changeset
2141
kono
parents: 67
diff changeset
2142 /* SRC_LOC might be a macro location. It only makes sense to do
kono
parents: 67
diff changeset
2143 column-by-column calculations on ordinary maps, so get the
kono
parents: 67
diff changeset
2144 corresponding location in an ordinary map. */
kono
parents: 67
diff changeset
2145 m_loc
kono
parents: 67
diff changeset
2146 = linemap_resolve_location (line_table, src_loc,
kono
parents: 67
diff changeset
2147 LRK_SPELLING_LOCATION, NULL);
kono
parents: 67
diff changeset
2148
kono
parents: 67
diff changeset
2149 const line_map_ordinary *map
kono
parents: 67
diff changeset
2150 = linemap_check_ordinary (linemap_lookup (line_table, m_loc));
kono
parents: 67
diff changeset
2151 m_offset_per_column = (1 << map->m_range_bits);
kono
parents: 67
diff changeset
2152 }
kono
parents: 67
diff changeset
2153
kono
parents: 67
diff changeset
2154 /* Get the range of the next source byte. */
kono
parents: 67
diff changeset
2155
kono
parents: 67
diff changeset
2156 source_range
kono
parents: 67
diff changeset
2157 cpp_string_location_reader::get_next ()
kono
parents: 67
diff changeset
2158 {
kono
parents: 67
diff changeset
2159 source_range result;
kono
parents: 67
diff changeset
2160 result.m_start = m_loc;
kono
parents: 67
diff changeset
2161 result.m_finish = m_loc;
kono
parents: 67
diff changeset
2162 if (m_loc <= LINE_MAP_MAX_LOCATION_WITH_COLS)
kono
parents: 67
diff changeset
2163 m_loc += m_offset_per_column;
kono
parents: 67
diff changeset
2164 return result;
kono
parents: 67
diff changeset
2165 }