annotate libcpp/charset.c @ 67:f6334be47118

update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Mar 2011 17:18:12 +0900
parents b7f97abdc517
children 04ced10e8804
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 /* CPP Library - charsets
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
2 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008, 2009,
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
3 2010 Free Software Foundation, Inc.
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 This program is free software; you can redistribute it and/or modify it
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 under the terms of the GNU General Public License as published by the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 Free Software Foundation; either version 3, or (at your option) any
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 later version.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 This program is distributed in the hope that it will be useful,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 GNU General Public License for more details.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
16
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 You should have received a copy of the GNU General Public License
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 along with this program; see the file COPYING3. If not see
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 <http://www.gnu.org/licenses/>. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 #include "config.h"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 #include "system.h"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 #include "cpplib.h"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 #include "internal.h"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 /* Character set handling for C-family languages.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 Terminological note: In what follows, "charset" or "character set"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 will be taken to mean both an abstract set of characters and an
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 encoding for that set.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
31
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 The C99 standard discusses two character sets: source and execution.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 The source character set is used for internal processing in translation
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 phases 1 through 4; the execution character set is used thereafter.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 Both are required by 5.2.1.2p1 to be multibyte encodings, not wide
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 character encodings (see 3.7.2, 3.7.3 for the standardese meanings
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 of these terms). Furthermore, the "basic character set" (listed in
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 5.2.1p3) is to be encoded in each with values one byte wide, and is
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 to appear in the initial shift state.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 It is not explicitly mentioned, but there is also a "wide execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 character set" used to encode wide character constants and wide
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 string literals; this is supposed to be the result of applying the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 standard library function mbstowcs() to an equivalent narrow string
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 (6.4.5p5). However, the behavior of hexadecimal and octal
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 \-escapes is at odds with this; they are supposed to be translated
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47 directly to wchar_t values (6.4.4.4p5,6).
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 The source character set is not necessarily the character set used
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 to encode physical source files on disk; translation phase 1 converts
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
51 from whatever that encoding is to the source character set.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
52
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 The presence of universal character names in C99 (6.4.3 et seq.)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 forces the source character set to be isomorphic to ISO 10646,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 that is, Unicode. There is no such constraint on the execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 character set; note also that the conversion from source to
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 execution character set does not occur for identifiers (5.1.1.2p1#5).
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
58
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 For convenience of implementation, the source character set's
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 encoding of the basic character set should be identical to the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 execution character set OF THE HOST SYSTEM's encoding of the basic
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 character set, and it should not be a state-dependent encoding.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
63
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
64 cpplib uses UTF-8 or UTF-EBCDIC for the source character set,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 depending on whether the host is based on ASCII or EBCDIC (see
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 respectively Unicode section 2.3/ISO10646 Amendment 2, and Unicode
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 Technical Report #16). With limited exceptions, it relies on the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 system library's iconv() primitive to do charset conversion
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 (specified in SUSv2). */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
70
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 #if !HAVE_ICONV
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 /* Make certain that the uses of iconv(), iconv_open(), iconv_close()
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
73 below, which are guarded only by if statements with compile-time
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 constant conditions, do not cause link errors. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 #define iconv_open(x, y) (errno = EINVAL, (iconv_t)-1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 #define iconv(a,b,c,d,e) (errno = EINVAL, (size_t)-1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 #define iconv_close(x) (void)0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 #define ICONV_CONST
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
80
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
81 #if HOST_CHARSET == HOST_CHARSET_ASCII
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 #define SOURCE_CHARSET "UTF-8"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 #define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0x7e
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
84 #elif HOST_CHARSET == HOST_CHARSET_EBCDIC
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 #define SOURCE_CHARSET "UTF-EBCDIC"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
86 #define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0xFF
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 #else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 #error "Unrecognized basic host character set"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
90
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 #ifndef EILSEQ
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
92 #define EILSEQ EINVAL
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
94
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 /* This structure is used for a resizable string buffer throughout. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 /* Don't call it strbuf, as that conflicts with unistd.h on systems
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 such as DYNIX/ptx where unistd.h includes stropts.h. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 struct _cpp_strbuf
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
100 uchar *text;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 size_t asize;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
102 size_t len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
104
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
105 /* This is enough to hold any string that fits on a single 80-column
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 line, even if iconv quadruples its size (e.g. conversion from
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 ASCII to UTF-32) rounded up to a power of two. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 #define OUTBUF_BLOCK_SIZE 256
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
109
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 /* Conversions between UTF-8 and UTF-16/32 are implemented by custom
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
111 logic. This is because a depressing number of systems lack iconv,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 or have have iconv libraries that do not do these conversions, so
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
113 we need a fallback implementation for them. To ensure the fallback
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
114 doesn't break due to neglect, it is used on all systems.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
115
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
116 UTF-32 encoding is nice and simple: a four-byte binary number,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
117 constrained to the range 00000000-7FFFFFFF to avoid questions of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
118 signedness. We do have to cope with big- and little-endian
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
119 variants.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
120
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
121 UTF-16 encoding uses two-byte binary numbers, again in big- and
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
122 little-endian variants, for all values in the 00000000-0000FFFF
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
123 range. Values in the 00010000-0010FFFF range are encoded as pairs
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
124 of two-byte numbers, called "surrogate pairs": given a number S in
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
125 this range, it is mapped to a pair (H, L) as follows:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
126
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
127 H = (S - 0x10000) / 0x400 + 0xD800
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
128 L = (S - 0x10000) % 0x400 + 0xDC00
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
129
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
130 Two-byte values in the D800...DFFF range are ill-formed except as a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 component of a surrogate pair. Even if the encoding within a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
132 two-byte value is little-endian, the H member of the surrogate pair
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 comes first.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
134
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 There is no way to encode values in the 00110000-7FFFFFFF range,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
136 which is not currently a problem as there are no assigned code
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
137 points in that range; however, the author expects that it will
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
138 eventually become necessary to abandon UTF-16 due to this
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
139 limitation. Note also that, because of these pairs, UTF-16 does
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
140 not meet the requirements of the C standard for a wide character
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
141 encoding (see 3.7.3 and 6.4.4.4p11).
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
142
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
143 UTF-8 encoding looks like this:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
144
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
145 value range encoded as
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
146 00000000-0000007F 0xxxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
147 00000080-000007FF 110xxxxx 10xxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
148 00000800-0000FFFF 1110xxxx 10xxxxxx 10xxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
149 00010000-001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
150 00200000-03FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
151 04000000-7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
152
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
153 Values in the 0000D800 ... 0000DFFF range (surrogates) are invalid,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
154 which means that three-byte sequences ED xx yy, with A0 <= xx <= BF,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
155 never occur. Note also that any value that can be encoded by a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
156 given row of the table can also be encoded by all successive rows,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
157 but this is not done; only the shortest possible encoding for any
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
158 given value is valid. For instance, the character 07C0 could be
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
159 encoded as any of DF 80, E0 9F 80, F0 80 9F 80, F8 80 80 9F 80, or
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
160 FC 80 80 80 9F 80. Only the first is valid.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
161
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
162 An implementation note: the transformation from UTF-16 to UTF-8, or
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
163 vice versa, is easiest done by using UTF-32 as an intermediary. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
164
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
165 /* Internal primitives which go from an UTF-8 byte stream to native-endian
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
166 UTF-32 in a cppchar_t, or vice versa; this avoids an extra marshal/unmarshal
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
167 operation in several places below. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
168 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
169 one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
170 cppchar_t *cp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
171 {
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
172 static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
173 static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
174
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
175 cppchar_t c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
176 const uchar *inbuf = *inbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
177 size_t nbytes, i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
178
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
179 if (*inbytesleftp < 1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
180 return EINVAL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
181
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
182 c = *inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
183 if (c < 0x80)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
184 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
185 *cp = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
186 *inbytesleftp -= 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
187 *inbufp += 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
188 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
189 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
190
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
191 /* The number of leading 1-bits in the first byte indicates how many
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
192 bytes follow. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
193 for (nbytes = 2; nbytes < 7; nbytes++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
194 if ((c & ~masks[nbytes-1]) == patns[nbytes-1])
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
195 goto found;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
196 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
197 found:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
198
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
199 if (*inbytesleftp < nbytes)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
200 return EINVAL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
201
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
202 c = (c & masks[nbytes-1]);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
203 inbuf++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
204 for (i = 1; i < nbytes; i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
205 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
206 cppchar_t n = *inbuf++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
207 if ((n & 0xC0) != 0x80)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
208 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
209 c = ((c << 6) + (n & 0x3F));
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
210 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
211
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
212 /* Make sure the shortest possible encoding was used. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
213 if (c <= 0x7F && nbytes > 1) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
214 if (c <= 0x7FF && nbytes > 2) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
215 if (c <= 0xFFFF && nbytes > 3) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
216 if (c <= 0x1FFFFF && nbytes > 4) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
217 if (c <= 0x3FFFFFF && nbytes > 5) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
218
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
219 /* Make sure the character is valid. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
220 if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF)) return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
221
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
222 *cp = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
223 *inbufp = inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
224 *inbytesleftp -= nbytes;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
225 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
226 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
227
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
228 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
229 one_cppchar_to_utf8 (cppchar_t c, uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
230 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
231 static const uchar masks[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
232 static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
233 size_t nbytes;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
234 uchar buf[6], *p = &buf[6];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
235 uchar *outbuf = *outbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
236
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
237 nbytes = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
238 if (c < 0x80)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
239 *--p = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
240 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
241 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
242 do
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
243 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
244 *--p = ((c & 0x3F) | 0x80);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
245 c >>= 6;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
246 nbytes++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
247 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
248 while (c >= 0x3F || (c & limits[nbytes-1]));
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
249 *--p = (c | masks[nbytes-1]);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
250 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
251
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
252 if (*outbytesleftp < nbytes)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
253 return E2BIG;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
254
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
255 while (p < &buf[6])
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
256 *outbuf++ = *p++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
257 *outbytesleftp -= nbytes;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
258 *outbufp = outbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
259 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
260 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
261
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
262 /* The following four functions transform one character between the two
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
263 encodings named in the function name. All have the signature
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
264 int (*)(iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
265 uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
266
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
267 BIGEND must have the value 0 or 1, coerced to (iconv_t); it is
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
268 interpreted as a boolean indicating whether big-endian or
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
269 little-endian encoding is to be used for the member of the pair
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
270 that is not UTF-8.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
271
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
272 INBUFP, INBYTESLEFTP, OUTBUFP, OUTBYTESLEFTP work exactly as they
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
273 do for iconv.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
274
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
275 The return value is either 0 for success, or an errno value for
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
276 failure, which may be E2BIG (need more space), EILSEQ (ill-formed
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
277 input sequence), ir EINVAL (incomplete input sequence). */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
278
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
279 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
280 one_utf8_to_utf32 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
281 uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
282 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
283 uchar *outbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
284 cppchar_t s = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
285 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
286
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
287 /* Check for space first, since we know exactly how much we need. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
288 if (*outbytesleftp < 4)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
289 return E2BIG;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
290
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
291 rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
292 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
293 return rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
294
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
295 outbuf = *outbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
296 outbuf[bigend ? 3 : 0] = (s & 0x000000FF);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
297 outbuf[bigend ? 2 : 1] = (s & 0x0000FF00) >> 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
298 outbuf[bigend ? 1 : 2] = (s & 0x00FF0000) >> 16;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
299 outbuf[bigend ? 0 : 3] = (s & 0xFF000000) >> 24;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
300
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
301 *outbufp += 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
302 *outbytesleftp -= 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
303 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
304 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
305
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
306 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
307 one_utf32_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
308 uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
309 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
310 cppchar_t s;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
311 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
312 const uchar *inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
313
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
314 if (*inbytesleftp < 4)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
315 return EINVAL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
316
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
317 inbuf = *inbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
318
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
319 s = inbuf[bigend ? 0 : 3] << 24;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
320 s += inbuf[bigend ? 1 : 2] << 16;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
321 s += inbuf[bigend ? 2 : 1] << 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
322 s += inbuf[bigend ? 3 : 0];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
323
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
324 if (s >= 0x7FFFFFFF || (s >= 0xD800 && s <= 0xDFFF))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
325 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
326
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
327 rval = one_cppchar_to_utf8 (s, outbufp, outbytesleftp);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
328 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
329 return rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
330
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
331 *inbufp += 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
332 *inbytesleftp -= 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
333 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
334 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
335
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
336 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
337 one_utf8_to_utf16 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
338 uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
339 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
340 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
341 cppchar_t s = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
342 const uchar *save_inbuf = *inbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
343 size_t save_inbytesleft = *inbytesleftp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
344 uchar *outbuf = *outbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
345
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
346 rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
347 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
348 return rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
349
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
350 if (s > 0x0010FFFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
351 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
352 *inbufp = save_inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
353 *inbytesleftp = save_inbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
354 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
355 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
356
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
357 if (s < 0xFFFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
358 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
359 if (*outbytesleftp < 2)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
360 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
361 *inbufp = save_inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
362 *inbytesleftp = save_inbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
363 return E2BIG;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
364 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
365 outbuf[bigend ? 1 : 0] = (s & 0x00FF);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
366 outbuf[bigend ? 0 : 1] = (s & 0xFF00) >> 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
367
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
368 *outbufp += 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
369 *outbytesleftp -= 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
370 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
371 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
372 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
373 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
374 cppchar_t hi, lo;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
375
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
376 if (*outbytesleftp < 4)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
377 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
378 *inbufp = save_inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
379 *inbytesleftp = save_inbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
380 return E2BIG;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
381 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
382
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
383 hi = (s - 0x10000) / 0x400 + 0xD800;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
384 lo = (s - 0x10000) % 0x400 + 0xDC00;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
385
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
386 /* Even if we are little-endian, put the high surrogate first.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
387 ??? Matches practice? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
388 outbuf[bigend ? 1 : 0] = (hi & 0x00FF);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
389 outbuf[bigend ? 0 : 1] = (hi & 0xFF00) >> 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
390 outbuf[bigend ? 3 : 2] = (lo & 0x00FF);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
391 outbuf[bigend ? 2 : 3] = (lo & 0xFF00) >> 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
392
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
393 *outbufp += 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
394 *outbytesleftp -= 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
395 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
396 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
397 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
398
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
399 static inline int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
400 one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
401 uchar **outbufp, size_t *outbytesleftp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
402 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
403 cppchar_t s;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
404 const uchar *inbuf = *inbufp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
405 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
406
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
407 if (*inbytesleftp < 2)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
408 return EINVAL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
409 s = inbuf[bigend ? 0 : 1] << 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
410 s += inbuf[bigend ? 1 : 0];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
411
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
412 /* Low surrogate without immediately preceding high surrogate is invalid. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
413 if (s >= 0xDC00 && s <= 0xDFFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
414 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
415 /* High surrogate must have a following low surrogate. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
416 else if (s >= 0xD800 && s <= 0xDBFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
417 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
418 cppchar_t hi = s, lo;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
419 if (*inbytesleftp < 4)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
420 return EINVAL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
421
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
422 lo = inbuf[bigend ? 2 : 3] << 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
423 lo += inbuf[bigend ? 3 : 2];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
424
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
425 if (lo < 0xDC00 || lo > 0xDFFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
426 return EILSEQ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
427
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
428 s = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
429 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
430
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
431 rval = one_cppchar_to_utf8 (s, outbufp, outbytesleftp);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
432 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
433 return rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
434
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
435 /* Success - update the input pointers (one_cppchar_to_utf8 has done
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
436 the output pointers for us). */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
437 if (s <= 0xFFFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
438 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
439 *inbufp += 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
440 *inbytesleftp -= 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
441 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
442 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
443 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
444 *inbufp += 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
445 *inbytesleftp -= 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
446 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
447 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
448 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
449
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
450 /* Helper routine for the next few functions. The 'const' on
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
451 one_conversion means that we promise not to modify what function is
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
452 pointed to, which lets the inliner see through it. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
453
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
454 static inline bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
455 conversion_loop (int (*const one_conversion)(iconv_t, const uchar **, size_t *,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
456 uchar **, size_t *),
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
457 iconv_t cd, const uchar *from, size_t flen, struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
458 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
459 const uchar *inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
460 uchar *outbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
461 size_t inbytesleft, outbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
462 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
463
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
464 inbuf = from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
465 inbytesleft = flen;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
466 outbuf = to->text + to->len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
467 outbytesleft = to->asize - to->len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
468
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
469 for (;;)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
470 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
471 do
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
472 rval = one_conversion (cd, &inbuf, &inbytesleft,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
473 &outbuf, &outbytesleft);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
474 while (inbytesleft && !rval);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
475
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
476 if (__builtin_expect (inbytesleft == 0, 1))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
477 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
478 to->len = to->asize - outbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
479 return true;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
480 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
481 if (rval != E2BIG)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
482 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
483 errno = rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
484 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
485 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
486
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
487 outbytesleft += OUTBUF_BLOCK_SIZE;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
488 to->asize += OUTBUF_BLOCK_SIZE;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
489 to->text = XRESIZEVEC (uchar, to->text, to->asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
490 outbuf = to->text + to->asize - outbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
491 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
492 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
493
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
494
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
495 /* These functions convert entire strings between character sets.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
496 They all have the signature
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
497
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
498 bool (*)(iconv_t cd, const uchar *from, size_t flen, struct _cpp_strbuf *to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
499
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
500 The input string FROM is converted as specified by the function
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
501 name plus the iconv descriptor CD (which may be fake), and the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
502 result appended to TO. On any error, false is returned, otherwise true. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
503
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
504 /* These four use the custom conversion code above. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
505 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
506 convert_utf8_utf16 (iconv_t cd, const uchar *from, size_t flen,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
507 struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
508 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
509 return conversion_loop (one_utf8_to_utf16, cd, from, flen, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
510 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
511
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
512 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
513 convert_utf8_utf32 (iconv_t cd, const uchar *from, size_t flen,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
514 struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
515 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
516 return conversion_loop (one_utf8_to_utf32, cd, from, flen, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
517 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
518
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
519 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
520 convert_utf16_utf8 (iconv_t cd, const uchar *from, size_t flen,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
521 struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
522 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
523 return conversion_loop (one_utf16_to_utf8, cd, from, flen, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
524 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
525
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
526 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
527 convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
528 struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
529 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
530 return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
531 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
532
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
533 /* Identity conversion, used when we have no alternative. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
534 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
535 convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
536 const uchar *from, size_t flen, struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
537 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
538 if (to->len + flen > to->asize)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
539 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
540 to->asize = to->len + flen;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
541 to->text = XRESIZEVEC (uchar, to->text, to->asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
542 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
543 memcpy (to->text + to->len, from, flen);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
544 to->len += flen;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
545 return true;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
546 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
547
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
548 /* And this one uses the system iconv primitive. It's a little
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
549 different, since iconv's interface is a little different. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
550 #if HAVE_ICONV
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
551
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
552 #define CONVERT_ICONV_GROW_BUFFER \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
553 do { \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
554 outbytesleft += OUTBUF_BLOCK_SIZE; \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
555 to->asize += OUTBUF_BLOCK_SIZE; \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
556 to->text = XRESIZEVEC (uchar, to->text, to->asize); \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
557 outbuf = (char *)to->text + to->asize - outbytesleft; \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
558 } while (0)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
559
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
560 static bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
561 convert_using_iconv (iconv_t cd, const uchar *from, size_t flen,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
562 struct _cpp_strbuf *to)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
563 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
564 ICONV_CONST char *inbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
565 char *outbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
566 size_t inbytesleft, outbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
567
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
568 /* Reset conversion descriptor and check that it is valid. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
569 if (iconv (cd, 0, 0, 0, 0) == (size_t)-1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
570 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
571
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
572 inbuf = (ICONV_CONST char *)from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
573 inbytesleft = flen;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
574 outbuf = (char *)to->text + to->len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
575 outbytesleft = to->asize - to->len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
576
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
577 for (;;)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
578 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
579 iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
580 if (__builtin_expect (inbytesleft == 0, 1))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
581 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
582 /* Close out any shift states, returning to the initial state. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
583 if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t)-1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
584 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
585 if (errno != E2BIG)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
586 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
587
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
588 CONVERT_ICONV_GROW_BUFFER;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
589 if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t)-1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
590 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
591 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
592
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
593 to->len = to->asize - outbytesleft;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
594 return true;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
595 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
596 if (errno != E2BIG)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
597 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
598
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
599 CONVERT_ICONV_GROW_BUFFER;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
600 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
601 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
602 #else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
603 #define convert_using_iconv 0 /* prevent undefined symbol error below */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
604 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
605
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
606 /* Arrange for the above custom conversion logic to be used automatically
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
607 when conversion between a suitable pair of character sets is requested. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
608
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
609 #define APPLY_CONVERSION(CONVERTER, FROM, FLEN, TO) \
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
610 CONVERTER.func (CONVERTER.cd, FROM, FLEN, TO)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
611
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
612 struct conversion
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
613 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
614 const char *pair;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
615 convert_f func;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
616 iconv_t fake_cd;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
617 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
618 static const struct conversion conversion_tab[] = {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
619 { "UTF-8/UTF-32LE", convert_utf8_utf32, (iconv_t)0 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
620 { "UTF-8/UTF-32BE", convert_utf8_utf32, (iconv_t)1 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
621 { "UTF-8/UTF-16LE", convert_utf8_utf16, (iconv_t)0 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
622 { "UTF-8/UTF-16BE", convert_utf8_utf16, (iconv_t)1 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
623 { "UTF-32LE/UTF-8", convert_utf32_utf8, (iconv_t)0 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
624 { "UTF-32BE/UTF-8", convert_utf32_utf8, (iconv_t)1 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
625 { "UTF-16LE/UTF-8", convert_utf16_utf8, (iconv_t)0 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
626 { "UTF-16BE/UTF-8", convert_utf16_utf8, (iconv_t)1 },
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
627 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
628
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
629 /* Subroutine of cpp_init_iconv: initialize and return a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
630 cset_converter structure for conversion from FROM to TO. If
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
631 iconv_open() fails, issue an error and return an identity
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
632 converter. Silently return an identity converter if FROM and TO
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
633 are identical. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
634 static struct cset_converter
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
635 init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
636 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
637 struct cset_converter ret;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
638 char *pair;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
639 size_t i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
640
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
641 if (!strcasecmp (to, from))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
642 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
643 ret.func = convert_no_conversion;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
644 ret.cd = (iconv_t) -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
645 ret.width = -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
646 return ret;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
647 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
648
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
649 pair = (char *) alloca(strlen(to) + strlen(from) + 2);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
650
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
651 strcpy(pair, from);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
652 strcat(pair, "/");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
653 strcat(pair, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
654 for (i = 0; i < ARRAY_SIZE (conversion_tab); i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
655 if (!strcasecmp (pair, conversion_tab[i].pair))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
656 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
657 ret.func = conversion_tab[i].func;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
658 ret.cd = conversion_tab[i].fake_cd;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
659 ret.width = -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
660 return ret;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
661 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
662
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
663 /* No custom converter - try iconv. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
664 if (HAVE_ICONV)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
665 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
666 ret.func = convert_using_iconv;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
667 ret.cd = iconv_open (to, from);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
668 ret.width = -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
669
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
670 if (ret.cd == (iconv_t) -1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
671 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
672 if (errno == EINVAL)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
673 cpp_error (pfile, CPP_DL_ERROR, /* FIXME should be DL_SORRY */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
674 "conversion from %s to %s not supported by iconv",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
675 from, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
676 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
677 cpp_errno (pfile, CPP_DL_ERROR, "iconv_open");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
678
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
679 ret.func = convert_no_conversion;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
680 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
681 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
682 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
683 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
684 cpp_error (pfile, CPP_DL_ERROR, /* FIXME: should be DL_SORRY */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
685 "no iconv implementation, cannot convert from %s to %s",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
686 from, to);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
687 ret.func = convert_no_conversion;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
688 ret.cd = (iconv_t) -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
689 ret.width = -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
690 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
691 return ret;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
692 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
693
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
694 /* If charset conversion is requested, initialize iconv(3) descriptors
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
695 for conversion from the source character set to the execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
696 character sets. If iconv is not present in the C library, and
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
697 conversion is requested, issue an error. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
698
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
699 void
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
700 cpp_init_iconv (cpp_reader *pfile)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
701 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
702 const char *ncset = CPP_OPTION (pfile, narrow_charset);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
703 const char *wcset = CPP_OPTION (pfile, wide_charset);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
704 const char *default_wcset;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
705
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
706 bool be = CPP_OPTION (pfile, bytes_big_endian);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
707
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
708 if (CPP_OPTION (pfile, wchar_precision) >= 32)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
709 default_wcset = be ? "UTF-32BE" : "UTF-32LE";
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
710 else if (CPP_OPTION (pfile, wchar_precision) >= 16)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
711 default_wcset = be ? "UTF-16BE" : "UTF-16LE";
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
712 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
713 /* This effectively means that wide strings are not supported,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
714 so don't do any conversion at all. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
715 default_wcset = SOURCE_CHARSET;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
716
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
717 if (!ncset)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
718 ncset = SOURCE_CHARSET;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
719 if (!wcset)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
720 wcset = default_wcset;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
721
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
722 pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
723 pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
724 pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
725 pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
726 pfile->char16_cset_desc = init_iconv_desc (pfile,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
727 be ? "UTF-16BE" : "UTF-16LE",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
728 SOURCE_CHARSET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
729 pfile->char16_cset_desc.width = 16;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
730 pfile->char32_cset_desc = init_iconv_desc (pfile,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
731 be ? "UTF-32BE" : "UTF-32LE",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
732 SOURCE_CHARSET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
733 pfile->char32_cset_desc.width = 32;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
734 pfile->wide_cset_desc = init_iconv_desc (pfile, wcset, SOURCE_CHARSET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
735 pfile->wide_cset_desc.width = CPP_OPTION (pfile, wchar_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
736 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
737
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
738 /* Destroy iconv(3) descriptors set up by cpp_init_iconv, if necessary. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
739 void
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
740 _cpp_destroy_iconv (cpp_reader *pfile)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
741 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
742 if (HAVE_ICONV)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
743 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
744 if (pfile->narrow_cset_desc.func == convert_using_iconv)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
745 iconv_close (pfile->narrow_cset_desc.cd);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
746 if (pfile->utf8_cset_desc.func == convert_using_iconv)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
747 iconv_close (pfile->utf8_cset_desc.cd);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
748 if (pfile->char16_cset_desc.func == convert_using_iconv)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
749 iconv_close (pfile->char16_cset_desc.cd);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
750 if (pfile->char32_cset_desc.func == convert_using_iconv)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
751 iconv_close (pfile->char32_cset_desc.cd);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
752 if (pfile->wide_cset_desc.func == convert_using_iconv)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
753 iconv_close (pfile->wide_cset_desc.cd);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
754 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
755 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
756
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
757 /* Utility routine for use by a full compiler. C is a character taken
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
758 from the *basic* source character set, encoded in the host's
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
759 execution encoding. Convert it to (the target's) execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
760 encoding, and return that value.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
761
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
762 Issues an internal error if C's representation in the narrow
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
763 execution character set fails to be a single-byte value (C99
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
764 5.2.1p3: "The representation of each member of the source and
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
765 execution character sets shall fit in a byte.") May also issue an
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
766 internal error if C fails to be a member of the basic source
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
767 character set (testing this exactly is too hard, especially when
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
768 the host character set is EBCDIC). */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
769 cppchar_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
770 cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
771 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
772 uchar sbuf[1];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
773 struct _cpp_strbuf tbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
774
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
775 /* This test is merely an approximation, but it suffices to catch
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
776 the most important thing, which is that we don't get handed a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
777 character outside the unibyte range of the host character set. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
778 if (c > LAST_POSSIBLY_BASIC_SOURCE_CHAR)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
779 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
780 cpp_error (pfile, CPP_DL_ICE,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
781 "character 0x%lx is not in the basic source character set\n",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
782 (unsigned long)c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
783 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
784 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
785
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
786 /* Being a character in the unibyte range of the host character set,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
787 we can safely splat it into a one-byte buffer and trust that that
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
788 is a well-formed string. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
789 sbuf[0] = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
790
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
791 /* This should never need to reallocate, but just in case... */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
792 tbuf.asize = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
793 tbuf.text = XNEWVEC (uchar, tbuf.asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
794 tbuf.len = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
795
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
796 if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
797 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
798 cpp_errno (pfile, CPP_DL_ICE, "converting to execution character set");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
799 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
800 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
801 if (tbuf.len != 1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
802 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
803 cpp_error (pfile, CPP_DL_ICE,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
804 "character 0x%lx is not unibyte in execution character set",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
805 (unsigned long)c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
806 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
807 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
808 c = tbuf.text[0];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
809 free(tbuf.text);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
810 return c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
811 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
812
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
813
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
814
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
815 /* Utility routine that computes a mask of the form 0000...111... with
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
816 WIDTH 1-bits. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
817 static inline size_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
818 width_to_mask (size_t width)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
819 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
820 width = MIN (width, BITS_PER_CPPCHAR_T);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
821 if (width >= CHAR_BIT * sizeof (size_t))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
822 return ~(size_t) 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
823 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
824 return ((size_t) 1 << width) - 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
825 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
826
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
827 /* A large table of unicode character information. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
828 enum {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
829 /* Valid in a C99 identifier? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
830 C99 = 1,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
831 /* Valid in a C99 identifier, but not as the first character? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
832 DIG = 2,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
833 /* Valid in a C++ identifier? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
834 CXX = 4,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
835 /* NFC representation is not valid in an identifier? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
836 CID = 8,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
837 /* Might be valid NFC form? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
838 NFC = 16,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
839 /* Might be valid NFKC form? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
840 NKC = 32,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
841 /* Certain preceding characters might make it not valid NFC/NKFC form? */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
842 CTX = 64
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
843 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
844
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
845 static const struct {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
846 /* Bitmap of flags above. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
847 unsigned char flags;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
848 /* Combining class of the character. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
849 unsigned char combine;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
850 /* Last character in the range described by this entry. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
851 unsigned short end;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
852 } ucnranges[] = {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
853 #include "ucnid.h"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
854 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
855
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
856 /* Returns 1 if C is valid in an identifier, 2 if C is valid except at
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
857 the start of an identifier, and 0 if C is not valid in an
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
858 identifier. We assume C has already gone through the checks of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
859 _cpp_valid_ucn. Also update NST for C if returning nonzero. The
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
860 algorithm is a simple binary search on the table defined in
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
861 ucnid.h. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
862
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
863 static int
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
864 ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
865 struct normalize_state *nst)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
866 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
867 int mn, mx, md;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
868
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
869 if (c > 0xFFFF)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
870 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
871
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
872 mn = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
873 mx = ARRAY_SIZE (ucnranges) - 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
874 while (mx != mn)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
875 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
876 md = (mn + mx) / 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
877 if (c <= ucnranges[md].end)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
878 mx = md;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
879 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
880 mn = md + 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
881 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
882
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
883 /* When -pedantic, we require the character to have been listed by
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
884 the standard for the current language. Otherwise, we accept the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
885 union of the acceptable sets for C++98 and C99. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
886 if (! (ucnranges[mn].flags & (C99 | CXX)))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
887 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
888
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
889 if (CPP_PEDANTIC (pfile)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
890 && ((CPP_OPTION (pfile, c99) && !(ucnranges[mn].flags & C99))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
891 || (CPP_OPTION (pfile, cplusplus)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
892 && !(ucnranges[mn].flags & CXX))))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
893 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
894
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
895 /* Update NST. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
896 if (ucnranges[mn].combine != 0 && ucnranges[mn].combine < nst->prev_class)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
897 nst->level = normalized_none;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
898 else if (ucnranges[mn].flags & CTX)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
899 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
900 bool safe;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
901 cppchar_t p = nst->previous;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
902
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
903 /* Easy cases from Bengali, Oriya, Tamil, Jannada, and Malayalam. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
904 if (c == 0x09BE)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
905 safe = p != 0x09C7; /* Use 09CB instead of 09C7 09BE. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
906 else if (c == 0x0B3E)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
907 safe = p != 0x0B47; /* Use 0B4B instead of 0B47 0B3E. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
908 else if (c == 0x0BBE)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
909 safe = p != 0x0BC6 && p != 0x0BC7; /* Use 0BCA/0BCB instead. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
910 else if (c == 0x0CC2)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
911 safe = p != 0x0CC6; /* Use 0CCA instead of 0CC6 0CC2. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
912 else if (c == 0x0D3E)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
913 safe = p != 0x0D46 && p != 0x0D47; /* Use 0D4A/0D4B instead. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
914 /* For Hangul, characters in the range AC00-D7A3 are NFC/NFKC,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
915 and are combined algorithmically from a sequence of the form
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
916 1100-1112 1161-1175 11A8-11C2
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
917 (if the third is not present, it is treated as 11A7, which is not
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
918 really a valid character).
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
919 Unfortunately, C99 allows (only) the NFC form, but C++ allows
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
920 only the combining characters. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
921 else if (c >= 0x1161 && c <= 0x1175)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
922 safe = p < 0x1100 || p > 0x1112;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
923 else if (c >= 0x11A8 && c <= 0x11C2)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
924 safe = (p < 0xAC00 || p > 0xD7A3 || (p - 0xAC00) % 28 != 0);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
925 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
926 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
927 /* Uh-oh, someone updated ucnid.h without updating this code. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
928 cpp_error (pfile, CPP_DL_ICE, "Character %x might not be NFKC", c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
929 safe = true;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
930 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
931 if (!safe && c < 0x1161)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
932 nst->level = normalized_none;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
933 else if (!safe)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
934 nst->level = MAX (nst->level, normalized_identifier_C);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
935 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
936 else if (ucnranges[mn].flags & NKC)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
937 ;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
938 else if (ucnranges[mn].flags & NFC)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
939 nst->level = MAX (nst->level, normalized_C);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
940 else if (ucnranges[mn].flags & CID)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
941 nst->level = MAX (nst->level, normalized_identifier_C);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
942 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
943 nst->level = normalized_none;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
944 nst->previous = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
945 nst->prev_class = ucnranges[mn].combine;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
946
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
947 /* In C99, UCN digits may not begin identifiers. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
948 if (CPP_OPTION (pfile, c99) && (ucnranges[mn].flags & DIG))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
949 return 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
950
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
951 return 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
952 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
953
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
954 /* [lex.charset]: The character designated by the universal character
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
955 name \UNNNNNNNN is that character whose character short name in
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
956 ISO/IEC 10646 is NNNNNNNN; the character designated by the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
957 universal character name \uNNNN is that character whose character
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
958 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
959 for a universal character name corresponds to a surrogate code point
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
960 (in the range 0xD800-0xDFFF, inclusive), the program is ill-formed.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
961 Additionally, if the hexadecimal value for a universal-character-name
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
962 outside a character or string literal corresponds to a control character
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
963 (in either of the ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
964 character in the basic source character set, the program is ill-formed.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
965
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
966 C99 6.4.3: A universal character name shall not specify a character
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
967 whose short identifier is less than 00A0 other than 0024 ($), 0040 (@),
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
968 or 0060 (`), nor one in the range D800 through DFFF inclusive.
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
969
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
970 *PSTR must be preceded by "\u" or "\U"; it is assumed that the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
971 buffer end is delimited by a non-hex digit. Returns zero if the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
972 UCN has not been consumed.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
973
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
974 Otherwise the nonzero value of the UCN, whether valid or invalid,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
975 is returned. Diagnostics are emitted for invalid values. PSTR
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
976 is updated to point one beyond the UCN, or to the syntactically
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
977 invalid character.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
978
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
979 IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
980 an identifier, or 2 otherwise. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
981
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
982 cppchar_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
983 _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
984 const uchar *limit, int identifier_pos,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
985 struct normalize_state *nst)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
986 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
987 cppchar_t result, c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
988 unsigned int length;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
989 const uchar *str = *pstr;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
990 const uchar *base = str - 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
991
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
992 if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
993 cpp_error (pfile, CPP_DL_WARNING,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
994 "universal character names are only valid in C++ and C99");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
995 else if (CPP_WTRADITIONAL (pfile) && identifier_pos == 0)
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
996 cpp_warning (pfile, CPP_W_TRADITIONAL,
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
997 "the meaning of '\\%c' is different in traditional C",
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
998 (int) str[-1]);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
999
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1000 if (str[-1] == 'u')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1001 length = 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1002 else if (str[-1] == 'U')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1003 length = 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1004 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1005 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1006 cpp_error (pfile, CPP_DL_ICE, "In _cpp_valid_ucn but not a UCN");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1007 length = 4;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1008 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1009
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1010 result = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1011 do
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1012 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1013 c = *str;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1014 if (!ISXDIGIT (c))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1015 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1016 str++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1017 result = (result << 4) + hex_value (c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1018 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1019 while (--length && str < limit);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1020
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1021 /* Partial UCNs are not valid in strings, but decompose into
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1022 multiple tokens in identifiers, so we can't give a helpful
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1023 error message in that case. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1024 if (length && identifier_pos)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1025 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1026
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1027 *pstr = str;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1028 if (length)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1029 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1030 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1031 "incomplete universal character name %.*s",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1032 (int) (str - base), base);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1033 result = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1034 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1035 /* The C99 standard permits $, @ and ` to be specified as UCNs. We use
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1036 hex escapes so that this also works with EBCDIC hosts.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1037 C++0x permits everything below 0xa0 within literals;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1038 ucn_valid_in_identifier will complain about identifiers. */
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1039 else if ((result < 0xa0
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1040 && !CPP_OPTION (pfile, cplusplus)
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1041 && (result != 0x24 && result != 0x40 && result != 0x60))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1042 || (result & 0x80000000)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1043 || (result >= 0xD800 && result <= 0xDFFF))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1044 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1045 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1046 "%.*s is not a valid universal character",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1047 (int) (str - base), base);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1048 result = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1049 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1050 else if (identifier_pos && result == 0x24
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1051 && CPP_OPTION (pfile, dollars_in_ident))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1052 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1053 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1054 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1055 CPP_OPTION (pfile, warn_dollars) = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1056 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1057 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1058 NORMALIZE_STATE_UPDATE_IDNUM (nst);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1059 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1060 else if (identifier_pos)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1061 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1062 int validity = ucn_valid_in_identifier (pfile, result, nst);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1063
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1064 if (validity == 0)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1065 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1066 "universal character %.*s is not valid in an identifier",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1067 (int) (str - base), base);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1068 else if (validity == 2 && identifier_pos == 1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1069 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1070 "universal character %.*s is not valid at the start of an identifier",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1071 (int) (str - base), base);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1072 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1073
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1074 if (result == 0)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1075 result = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1076
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1077 return result;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1078 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1079
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1080 /* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1081 it to the execution character set and write the result into TBUF.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1082 An advanced pointer is returned. Issues all relevant diagnostics. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1083 static const uchar *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1084 convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1085 struct _cpp_strbuf *tbuf, struct cset_converter cvt)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1086 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1087 cppchar_t ucn;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1088 uchar buf[6];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1089 uchar *bufp = buf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1090 size_t bytesleft = 6;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1091 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1092 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1093
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1094 from++; /* Skip u/U. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1095 ucn = _cpp_valid_ucn (pfile, &from, limit, 0, &nst);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1096
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1097 rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1098 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1099 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1100 errno = rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1101 cpp_errno (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1102 "converting UCN to source character set");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1103 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1104 else if (!APPLY_CONVERSION (cvt, buf, 6 - bytesleft, tbuf))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1105 cpp_errno (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1106 "converting UCN to execution character set");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1107
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1108 return from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1109 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1110
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1111 /* Subroutine of convert_hex and convert_oct. N is the representation
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1112 in the execution character set of a numeric escape; write it into the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1113 string buffer TBUF and update the end-of-string pointer therein. WIDE
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1114 is true if it's a wide string that's being assembled in TBUF. This
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1115 function issues no diagnostics and never fails. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1116 static void
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1117 emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1118 struct _cpp_strbuf *tbuf, struct cset_converter cvt)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1119 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1120 size_t width = cvt.width;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1121
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1122 if (width != CPP_OPTION (pfile, char_precision))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1123 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1124 /* We have to render this into the target byte order, which may not
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1125 be our byte order. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1126 bool bigend = CPP_OPTION (pfile, bytes_big_endian);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1127 size_t cwidth = CPP_OPTION (pfile, char_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1128 size_t cmask = width_to_mask (cwidth);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1129 size_t nbwc = width / cwidth;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1130 size_t i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1131 size_t off = tbuf->len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1132 cppchar_t c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1133
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1134 if (tbuf->len + nbwc > tbuf->asize)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1135 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1136 tbuf->asize += OUTBUF_BLOCK_SIZE;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1137 tbuf->text = XRESIZEVEC (uchar, tbuf->text, tbuf->asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1138 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1139
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1140 for (i = 0; i < nbwc; i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1141 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1142 c = n & cmask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1143 n >>= cwidth;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1144 tbuf->text[off + (bigend ? nbwc - i - 1 : i)] = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1145 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1146 tbuf->len += nbwc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1147 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1148 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1149 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1150 /* Note: this code does not handle the case where the target
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1151 and host have a different number of bits in a byte. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1152 if (tbuf->len + 1 > tbuf->asize)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1153 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1154 tbuf->asize += OUTBUF_BLOCK_SIZE;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1155 tbuf->text = XRESIZEVEC (uchar, tbuf->text, tbuf->asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1156 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1157 tbuf->text[tbuf->len++] = n;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1158 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1159 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1160
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1161 /* Convert a hexadecimal escape, pointed to by FROM, to the execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1162 character set and write it into the string buffer TBUF. Returns an
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1163 advanced pointer, and issues diagnostics as necessary.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1164 No character set translation occurs; this routine always produces the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1165 execution-set character with numeric value equal to the given hex
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1166 number. You can, e.g. generate surrogate pairs this way. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1167 static const uchar *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1168 convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1169 struct _cpp_strbuf *tbuf, struct cset_converter cvt)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1170 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1171 cppchar_t c, n = 0, overflow = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1172 int digits_found = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1173 size_t width = cvt.width;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1174 size_t mask = width_to_mask (width);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1175
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1176 if (CPP_WTRADITIONAL (pfile))
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1177 cpp_warning (pfile, CPP_W_TRADITIONAL,
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1178 "the meaning of '\\x' is different in traditional C");
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1179
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1180 from++; /* Skip 'x'. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1181 while (from < limit)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1182 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1183 c = *from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1184 if (! hex_p (c))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1185 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1186 from++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1187 overflow |= n ^ (n << 4 >> 4);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1188 n = (n << 4) + hex_value (c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1189 digits_found = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1190 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1191
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1192 if (!digits_found)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1193 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1194 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1195 "\\x used with no following hex digits");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1196 return from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1197 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1198
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1199 if (overflow | (n != (n & mask)))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1200 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1201 cpp_error (pfile, CPP_DL_PEDWARN,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1202 "hex escape sequence out of range");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1203 n &= mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1204 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1205
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1206 emit_numeric_escape (pfile, n, tbuf, cvt);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1207
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1208 return from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1209 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1210
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1211 /* Convert an octal escape, pointed to by FROM, to the execution
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1212 character set and write it into the string buffer TBUF. Returns an
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1213 advanced pointer, and issues diagnostics as necessary.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1214 No character set translation occurs; this routine always produces the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1215 execution-set character with numeric value equal to the given octal
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1216 number. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1217 static const uchar *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1218 convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1219 struct _cpp_strbuf *tbuf, struct cset_converter cvt)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1220 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1221 size_t count = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1222 cppchar_t c, n = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1223 size_t width = cvt.width;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1224 size_t mask = width_to_mask (width);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1225 bool overflow = false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1226
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1227 while (from < limit && count++ < 3)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1228 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1229 c = *from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1230 if (c < '0' || c > '7')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1231 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1232 from++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1233 overflow |= n ^ (n << 3 >> 3);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1234 n = (n << 3) + c - '0';
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1235 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1236
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1237 if (n != (n & mask))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1238 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1239 cpp_error (pfile, CPP_DL_PEDWARN,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1240 "octal escape sequence out of range");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1241 n &= mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1242 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1243
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1244 emit_numeric_escape (pfile, n, tbuf, cvt);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1245
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1246 return from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1247 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1248
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1249 /* Convert an escape sequence (pointed to by FROM) to its value on
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1250 the target, and to the execution character set. Do not scan past
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1251 LIMIT. Write the converted value into TBUF. Returns an advanced
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1252 pointer. Handles all relevant diagnostics. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1253 static const uchar *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1254 convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1255 struct _cpp_strbuf *tbuf, struct cset_converter cvt)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1256 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1257 /* Values of \a \b \e \f \n \r \t \v respectively. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1258 #if HOST_CHARSET == HOST_CHARSET_ASCII
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1259 static const uchar charconsts[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1260 #elif HOST_CHARSET == HOST_CHARSET_EBCDIC
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1261 static const uchar charconsts[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1262 #else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1263 #error "unknown host character set"
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1264 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1265
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1266 uchar c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1267
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1268 c = *from;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1269 switch (c)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1270 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1271 /* UCNs, hex escapes, and octal escapes are processed separately. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1272 case 'u': case 'U':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1273 return convert_ucn (pfile, from, limit, tbuf, cvt);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1274
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1275 case 'x':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1276 return convert_hex (pfile, from, limit, tbuf, cvt);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1277 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1278
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1279 case '0': case '1': case '2': case '3':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1280 case '4': case '5': case '6': case '7':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1281 return convert_oct (pfile, from, limit, tbuf, cvt);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1282
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1283 /* Various letter escapes. Get the appropriate host-charset
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1284 value into C. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1285 case '\\': case '\'': case '"': case '?': break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1286
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1287 case '(': case '{': case '[': case '%':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1288 /* '\(', etc, can be used at the beginning of a line in a long
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1289 string split onto multiple lines with \-newline, to prevent
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1290 Emacs or other text editors from getting confused. '\%' can
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1291 be used to prevent SCCS from mangling printf format strings. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1292 if (CPP_PEDANTIC (pfile))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1293 goto unknown;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1294 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1295
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1296 case 'b': c = charconsts[1]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1297 case 'f': c = charconsts[3]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1298 case 'n': c = charconsts[4]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1299 case 'r': c = charconsts[5]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1300 case 't': c = charconsts[6]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1301 case 'v': c = charconsts[7]; break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1302
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1303 case 'a':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1304 if (CPP_WTRADITIONAL (pfile))
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1305 cpp_warning (pfile, CPP_W_TRADITIONAL,
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1306 "the meaning of '\\a' is different in traditional C");
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1307 c = charconsts[0];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1308 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1309
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1310 case 'e': case 'E':
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1311 if (CPP_PEDANTIC (pfile))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1312 cpp_error (pfile, CPP_DL_PEDWARN,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1313 "non-ISO-standard escape sequence, '\\%c'", (int) c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1314 c = charconsts[2];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1315 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1316
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1317 default:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1318 unknown:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1319 if (ISGRAPH (c))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1320 cpp_error (pfile, CPP_DL_PEDWARN,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1321 "unknown escape sequence: '\\%c'", (int) c);
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1322 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1323 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1324 /* diagnostic.c does not support "%03o". When it does, this
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1325 code can use %03o directly in the diagnostic again. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1326 char buf[32];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1327 sprintf(buf, "%03o", (int) c);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1328 cpp_error (pfile, CPP_DL_PEDWARN,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1329 "unknown escape sequence: '\\%s'", buf);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1330 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1331 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1332
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1333 /* Now convert what we have to the execution character set. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1334 if (!APPLY_CONVERSION (cvt, &c, 1, tbuf))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1335 cpp_errno (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1336 "converting escape sequence to execution character set");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1337
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1338 return from + 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1339 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1340
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1341 /* TYPE is a token type. The return value is the conversion needed to
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1342 convert from source to execution character set for the given type. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1343 static struct cset_converter
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1344 converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1345 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1346 switch (type)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1347 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1348 default:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1349 return pfile->narrow_cset_desc;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1350 case CPP_UTF8STRING:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1351 return pfile->utf8_cset_desc;
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1352 case CPP_CHAR16:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1353 case CPP_STRING16:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1354 return pfile->char16_cset_desc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1355 case CPP_CHAR32:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1356 case CPP_STRING32:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1357 return pfile->char32_cset_desc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1358 case CPP_WCHAR:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1359 case CPP_WSTRING:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1360 return pfile->wide_cset_desc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1361 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1362 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1363
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1364 /* FROM is an array of cpp_string structures of length COUNT. These
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1365 are to be converted from the source to the execution character set,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1366 escape sequences translated, and finally all are to be
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1367 concatenated. WIDE indicates whether or not to produce a wide
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1368 string. The result is written into TO. Returns true for success,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1369 false for failure. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1370 bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1371 cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1372 cpp_string *to, enum cpp_ttype type)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1373 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1374 struct _cpp_strbuf tbuf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1375 const uchar *p, *base, *limit;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1376 size_t i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1377 struct cset_converter cvt = converter_for_type (pfile, type);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1378
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1379 tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1380 tbuf.text = XNEWVEC (uchar, tbuf.asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1381 tbuf.len = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1382
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1383 for (i = 0; i < count; i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1384 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1385 p = from[i].text;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1386 if (*p == 'u')
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1387 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1388 if (*++p == '8')
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1389 p++;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1390 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1391 else if (*p == 'L' || *p == 'U') p++;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1392 if (*p == 'R')
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1393 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1394 const uchar *prefix;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1395
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1396 /* Skip over 'R"'. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1397 p += 2;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1398 prefix = p;
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1399 while (*p != '(')
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1400 p++;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1401 p++;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1402 limit = from[i].text + from[i].len;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1403 if (limit >= p + (p - prefix) + 1)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1404 limit -= (p - prefix) + 1;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1405
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1406 /* Raw strings are all normal characters; these can be fed
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1407 directly to convert_cset. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1408 if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1409 goto fail;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1410
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1411 continue;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1412 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 0
diff changeset
1413
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1414 p++; /* Skip leading quote. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1415 limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1416
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1417 for (;;)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1418 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1419 base = p;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1420 while (p < limit && *p != '\\')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1421 p++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1422 if (p > base)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1423 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1424 /* We have a run of normal characters; these can be fed
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1425 directly to convert_cset. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1426 if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1427 goto fail;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1428 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1429 if (p == limit)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1430 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1431
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1432 p = convert_escape (pfile, p + 1, limit, &tbuf, cvt);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1433 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1434 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1435 /* NUL-terminate the 'to' buffer and translate it to a cpp_string
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1436 structure. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1437 emit_numeric_escape (pfile, 0, &tbuf, cvt);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1438 tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1439 to->text = tbuf.text;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1440 to->len = tbuf.len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1441 return true;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1442
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1443 fail:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1444 cpp_errno (pfile, CPP_DL_ERROR, "converting to execution character set");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1445 free (tbuf.text);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1446 return false;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1447 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1448
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1449 /* Subroutine of do_line and do_linemarker. Convert escape sequences
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1450 in a string, but do not perform character set conversion. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1451 bool
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1452 cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1453 size_t count, cpp_string *to,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1454 enum cpp_ttype type ATTRIBUTE_UNUSED)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1455 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1456 struct cset_converter save_narrow_cset_desc = pfile->narrow_cset_desc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1457 bool retval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1458
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1459 pfile->narrow_cset_desc.func = convert_no_conversion;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1460 pfile->narrow_cset_desc.cd = (iconv_t) -1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1461 pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1462
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1463 retval = cpp_interpret_string (pfile, from, count, to, CPP_STRING);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1464
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1465 pfile->narrow_cset_desc = save_narrow_cset_desc;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1466 return retval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1467 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1468
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1469
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1470 /* Subroutine of cpp_interpret_charconst which performs the conversion
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1471 to a number, for narrow strings. STR is the string structure returned
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1472 by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1473 cpp_interpret_charconst. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1474 static cppchar_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1475 narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1476 unsigned int *pchars_seen, int *unsignedp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1477 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1478 size_t width = CPP_OPTION (pfile, char_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1479 size_t max_chars = CPP_OPTION (pfile, int_precision) / width;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1480 size_t mask = width_to_mask (width);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1481 size_t i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1482 cppchar_t result, c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1483 bool unsigned_p;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1484
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1485 /* The value of a multi-character character constant, or a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1486 single-character character constant whose representation in the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1487 execution character set is more than one byte long, is
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1488 implementation defined. This implementation defines it to be the
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1489 number formed by interpreting the byte sequence in memory as a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1490 big-endian binary number. If overflow occurs, the high bytes are
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1491 lost, and a warning is issued.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1492
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1493 We don't want to process the NUL terminator handed back by
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1494 cpp_interpret_string. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1495 result = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1496 for (i = 0; i < str.len - 1; i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1497 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1498 c = str.text[i] & mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1499 if (width < BITS_PER_CPPCHAR_T)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1500 result = (result << width) | c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1501 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1502 result = c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1503 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1504
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1505 if (i > max_chars)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1506 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1507 i = max_chars;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1508 cpp_error (pfile, CPP_DL_WARNING,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1509 "character constant too long for its type");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1510 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1511 else if (i > 1 && CPP_OPTION (pfile, warn_multichar))
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1512 cpp_warning (pfile, CPP_W_MULTICHAR, "multi-character character constant");
0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1513
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1514 /* Multichar constants are of type int and therefore signed. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1515 if (i > 1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1516 unsigned_p = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1517 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1518 unsigned_p = CPP_OPTION (pfile, unsigned_char);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1519
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1520 /* Truncate the constant to its natural width, and simultaneously
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1521 sign- or zero-extend to the full width of cppchar_t.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1522 For single-character constants, the value is WIDTH bits wide.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1523 For multi-character constants, the value is INT_PRECISION bits wide. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1524 if (i > 1)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1525 width = CPP_OPTION (pfile, int_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1526 if (width < BITS_PER_CPPCHAR_T)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1527 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1528 mask = ((cppchar_t) 1 << width) - 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1529 if (unsigned_p || !(result & (1 << (width - 1))))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1530 result &= mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1531 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1532 result |= ~mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1533 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1534 *pchars_seen = i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1535 *unsignedp = unsigned_p;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1536 return result;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1537 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1538
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1539 /* Subroutine of cpp_interpret_charconst which performs the conversion
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1540 to a number, for wide strings. STR is the string structure returned
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1541 by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1542 cpp_interpret_charconst. TYPE is the token type. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1543 static cppchar_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1544 wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1545 unsigned int *pchars_seen, int *unsignedp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1546 enum cpp_ttype type)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1547 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1548 bool bigend = CPP_OPTION (pfile, bytes_big_endian);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1549 size_t width = converter_for_type (pfile, type).width;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1550 size_t cwidth = CPP_OPTION (pfile, char_precision);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1551 size_t mask = width_to_mask (width);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1552 size_t cmask = width_to_mask (cwidth);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1553 size_t nbwc = width / cwidth;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1554 size_t off, i;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1555 cppchar_t result = 0, c;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1556
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1557 /* This is finicky because the string is in the target's byte order,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1558 which may not be our byte order. Only the last character, ignoring
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1559 the NUL terminator, is relevant. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1560 off = str.len - (nbwc * 2);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1561 result = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1562 for (i = 0; i < nbwc; i++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1563 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1564 c = bigend ? str.text[off + i] : str.text[off + nbwc - i - 1];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1565 result = (result << cwidth) | (c & cmask);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1566 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1567
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1568 /* Wide character constants have type wchar_t, and a single
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1569 character exactly fills a wchar_t, so a multi-character wide
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1570 character constant is guaranteed to overflow. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1571 if (str.len > nbwc * 2)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1572 cpp_error (pfile, CPP_DL_WARNING,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1573 "character constant too long for its type");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1574
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1575 /* Truncate the constant to its natural width, and simultaneously
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1576 sign- or zero-extend to the full width of cppchar_t. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1577 if (width < BITS_PER_CPPCHAR_T)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1578 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1579 if (type == CPP_CHAR16 || type == CPP_CHAR32
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1580 || CPP_OPTION (pfile, unsigned_wchar)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1581 || !(result & (1 << (width - 1))))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1582 result &= mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1583 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1584 result |= ~mask;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1585 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1586
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1587 if (type == CPP_CHAR16 || type == CPP_CHAR32
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1588 || CPP_OPTION (pfile, unsigned_wchar))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1589 *unsignedp = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1590 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1591 *unsignedp = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1592
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1593 *pchars_seen = 1;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1594 return result;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1595 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1596
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1597 /* Interpret a (possibly wide) character constant in TOKEN.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1598 PCHARS_SEEN points to a variable that is filled in with the number
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1599 of characters seen, and UNSIGNEDP to a variable that indicates
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1600 whether the result has signed type. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1601 cppchar_t
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1602 cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1603 unsigned int *pchars_seen, int *unsignedp)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1604 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1605 cpp_string str = { 0, 0 };
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1606 bool wide = (token->type != CPP_CHAR);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1607 cppchar_t result;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1608
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1609 /* an empty constant will appear as L'', u'', U'' or '' */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1610 if (token->val.str.len == (size_t) (2 + wide))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1611 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1612 cpp_error (pfile, CPP_DL_ERROR, "empty character constant");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1613 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1614 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1615 else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, token->type))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1616 return 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1617
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1618 if (wide)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1619 result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1620 token->type);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1621 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1622 result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1623
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1624 if (str.text != token->val.str.text)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1625 free ((void *)str.text);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1626
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1627 return result;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1628 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1629
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1630 /* Convert an identifier denoted by ID and LEN, which might contain
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1631 UCN escapes, to the source character set, either UTF-8 or
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1632 UTF-EBCDIC. Assumes that the identifier is actually a valid identifier. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1633 cpp_hashnode *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1634 _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1635 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1636 /* It turns out that a UCN escape always turns into fewer characters
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1637 than the escape itself, so we can allocate a temporary in advance. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1638 uchar * buf = (uchar *) alloca (len + 1);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1639 uchar * bufp = buf;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1640 size_t idp;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1641
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1642 for (idp = 0; idp < len; idp++)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1643 if (id[idp] != '\\')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1644 *bufp++ = id[idp];
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1645 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1646 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1647 unsigned length = id[idp+1] == 'u' ? 4 : 8;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1648 cppchar_t value = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1649 size_t bufleft = len - (bufp - buf);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1650 int rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1651
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1652 idp += 2;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1653 while (length && idp < len && ISXDIGIT (id[idp]))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1654 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1655 value = (value << 4) + hex_value (id[idp]);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1656 idp++;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1657 length--;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1658 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1659 idp--;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1660
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1661 /* Special case for EBCDIC: if the identifier contains
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1662 a '$' specified using a UCN, translate it to EBCDIC. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1663 if (value == 0x24)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1664 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1665 *bufp++ = '$';
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1666 continue;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1667 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1668
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1669 rval = one_cppchar_to_utf8 (value, &bufp, &bufleft);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1670 if (rval)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1671 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1672 errno = rval;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1673 cpp_errno (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1674 "converting UCN to source character set");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1675 break;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1676 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1677 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1678
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1679 return CPP_HASHNODE (ht_lookup (pfile->hash_table,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1680 buf, bufp - buf, HT_ALLOC));
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1681 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1682
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1683 /* Convert an input buffer (containing the complete contents of one
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1684 source file) from INPUT_CHARSET to the source character set. INPUT
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1685 points to the input buffer, SIZE is its allocated size, and LEN is
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1686 the length of the meaningful data within the buffer. The
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1687 translated buffer is returned, *ST_SIZE is set to the length of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1688 the meaningful data within the translated buffer, and *BUFFER_START
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1689 is set to the start of the returned buffer. *BUFFER_START may
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1690 differ from the return value in the case of a BOM or other ignored
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1691 marker information.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1692
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1693 INPUT is expected to have been allocated with xmalloc. This
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1694 function will either set *BUFFER_START to INPUT, or free it and set
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1695 *BUFFER_START to a pointer to another xmalloc-allocated block of
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1696 memory. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1697 uchar *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1698 _cpp_convert_input (cpp_reader *pfile, const char *input_charset,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1699 uchar *input, size_t size, size_t len,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1700 const unsigned char **buffer_start, off_t *st_size)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1701 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1702 struct cset_converter input_cset;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1703 struct _cpp_strbuf to;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1704 unsigned char *buffer;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1705
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1706 input_cset = init_iconv_desc (pfile, SOURCE_CHARSET, input_charset);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1707 if (input_cset.func == convert_no_conversion)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1708 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1709 to.text = input;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1710 to.asize = size;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1711 to.len = len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1712 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1713 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1714 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1715 to.asize = MAX (65536, len);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1716 to.text = XNEWVEC (uchar, to.asize);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1717 to.len = 0;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1718
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1719 if (!APPLY_CONVERSION (input_cset, input, len, &to))
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1720 cpp_error (pfile, CPP_DL_ERROR,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1721 "failure to convert %s to %s",
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1722 CPP_OPTION (pfile, input_charset), SOURCE_CHARSET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1723
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1724 free (input);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1725 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1726
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1727 /* Clean up the mess. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1728 if (input_cset.func == convert_using_iconv)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1729 iconv_close (input_cset.cd);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1730
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1731 /* Resize buffer if we allocated substantially too much, or if we
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1732 haven't enough space for the \n-terminator. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1733 if (to.len + 4096 < to.asize || to.len >= to.asize)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1734 to.text = XRESIZEVEC (uchar, to.text, to.len + 1);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1735
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1736 /* If the file is using old-school Mac line endings (\r only),
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1737 terminate with another \r, not an \n, so that we do not mistake
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1738 the \r\n sequence for a single DOS line ending and erroneously
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1739 issue the "No newline at end of file" diagnostic. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1740 if (to.len && to.text[to.len - 1] == '\r')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1741 to.text[to.len] = '\r';
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1742 else
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1743 to.text[to.len] = '\n';
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1744
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1745 buffer = to.text;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1746 *st_size = to.len;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1747 #if HOST_CHARSET == HOST_CHARSET_ASCII
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1748 /* The HOST_CHARSET test just above ensures that the source charset
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1749 is UTF-8. So, ignore a UTF-8 BOM if we see one. Note that
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1750 glib'c UTF-8 iconv() provider (as of glibc 2.7) does not ignore a
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1751 BOM -- however, even if it did, we would still need this code due
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1752 to the 'convert_no_conversion' case. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1753 if (to.len >= 3 && to.text[0] == 0xef && to.text[1] == 0xbb
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1754 && to.text[2] == 0xbf)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1755 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1756 *st_size -= 3;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1757 buffer += 3;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1758 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1759 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1760
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1761 *buffer_start = to.text;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1762 return buffer;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1763 }
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1764
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1765 /* Decide on the default encoding to assume for input files. */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1766 const char *
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1767 _cpp_default_encoding (void)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1768 {
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1769 const char *current_encoding = NULL;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1770
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1771 /* We disable this because the default codeset is 7-bit ASCII on
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1772 most platforms, and this causes conversion failures on every
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1773 file in GCC that happens to have one of the upper 128 characters
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1774 in it -- most likely, as part of the name of a contributor.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1775 We should definitely recognize in-band markers of file encoding,
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1776 like:
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1777 - the appropriate Unicode byte-order mark (FE FF) to recognize
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1778 UTF16 and UCS4 (in both big-endian and little-endian flavors)
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1779 and UTF8
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1780 - a "#i", "#d", "/ *", "//", " #p" or "#p" (for #pragma) to
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1781 distinguish ASCII and EBCDIC.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1782 - now we can parse something like "#pragma GCC encoding <xyz>
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1783 on the first line, or even Emacs/VIM's mode line tags (there's
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1784 a problem here in that VIM uses the last line, and Emacs has
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1785 its more elaborate "local variables" convention).
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1786 - investigate whether Java has another common convention, which
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1787 would be friendly to support.
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1788 (Zack Weinberg and Paolo Bonzini, May 20th 2004) */
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1789 #if defined (HAVE_LOCALE_H) && defined (HAVE_LANGINFO_CODESET) && 0
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1790 setlocale (LC_CTYPE, "");
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1791 current_encoding = nl_langinfo (CODESET);
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1792 #endif
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1793 if (current_encoding == NULL || *current_encoding == '\0')
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1794 current_encoding = SOURCE_CHARSET;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1795
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1796 return current_encoding;
a06113de4d67 first commit
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1797 }