annotate gcc/ada/libgnat/g-decstr.adb @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
111
kono
parents:
diff changeset
1 ------------------------------------------------------------------------------
kono
parents:
diff changeset
2 -- --
kono
parents:
diff changeset
3 -- GNAT RUN-TIME COMPONENTS --
kono
parents:
diff changeset
4 -- --
kono
parents:
diff changeset
5 -- G N A T . D E C O D E _ S T R I N G --
kono
parents:
diff changeset
6 -- --
kono
parents:
diff changeset
7 -- S p e c --
kono
parents:
diff changeset
8 -- --
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
9 -- Copyright (C) 2007-2018, AdaCore --
111
kono
parents:
diff changeset
10 -- --
kono
parents:
diff changeset
11 -- GNAT is free software; you can redistribute it and/or modify it under --
kono
parents:
diff changeset
12 -- terms of the GNU General Public License as published by the Free Soft- --
kono
parents:
diff changeset
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
kono
parents:
diff changeset
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
kono
parents:
diff changeset
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
kono
parents:
diff changeset
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
kono
parents:
diff changeset
17 -- --
kono
parents:
diff changeset
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
kono
parents:
diff changeset
19 -- additional permissions described in the GCC Runtime Library Exception, --
kono
parents:
diff changeset
20 -- version 3.1, as published by the Free Software Foundation. --
kono
parents:
diff changeset
21 -- --
kono
parents:
diff changeset
22 -- You should have received a copy of the GNU General Public License and --
kono
parents:
diff changeset
23 -- a copy of the GCC Runtime Library Exception along with this program; --
kono
parents:
diff changeset
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
kono
parents:
diff changeset
25 -- <http://www.gnu.org/licenses/>. --
kono
parents:
diff changeset
26 -- --
kono
parents:
diff changeset
27 -- GNAT was originally developed by the GNAT team at New York University. --
kono
parents:
diff changeset
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
kono
parents:
diff changeset
29 -- --
kono
parents:
diff changeset
30 ------------------------------------------------------------------------------
kono
parents:
diff changeset
31
kono
parents:
diff changeset
32 -- This package provides a utility routine for converting from an encoded
kono
parents:
diff changeset
33 -- string to a corresponding Wide_String or Wide_Wide_String value.
kono
parents:
diff changeset
34
kono
parents:
diff changeset
35 with Interfaces; use Interfaces;
kono
parents:
diff changeset
36
kono
parents:
diff changeset
37 with System.WCh_Cnv; use System.WCh_Cnv;
kono
parents:
diff changeset
38 with System.WCh_Con; use System.WCh_Con;
kono
parents:
diff changeset
39
kono
parents:
diff changeset
40 package body GNAT.Decode_String is
kono
parents:
diff changeset
41
kono
parents:
diff changeset
42 -----------------------
kono
parents:
diff changeset
43 -- Local Subprograms --
kono
parents:
diff changeset
44 -----------------------
kono
parents:
diff changeset
45
kono
parents:
diff changeset
46 procedure Bad;
kono
parents:
diff changeset
47 pragma No_Return (Bad);
kono
parents:
diff changeset
48 -- Raise error for bad encoding
kono
parents:
diff changeset
49
kono
parents:
diff changeset
50 procedure Past_End;
kono
parents:
diff changeset
51 pragma No_Return (Past_End);
kono
parents:
diff changeset
52 -- Raise error for off end of string
kono
parents:
diff changeset
53
kono
parents:
diff changeset
54 ---------
kono
parents:
diff changeset
55 -- Bad --
kono
parents:
diff changeset
56 ---------
kono
parents:
diff changeset
57
kono
parents:
diff changeset
58 procedure Bad is
kono
parents:
diff changeset
59 begin
kono
parents:
diff changeset
60 raise Constraint_Error with
kono
parents:
diff changeset
61 "bad encoding or character out of range";
kono
parents:
diff changeset
62 end Bad;
kono
parents:
diff changeset
63
kono
parents:
diff changeset
64 ---------------------------
kono
parents:
diff changeset
65 -- Decode_Wide_Character --
kono
parents:
diff changeset
66 ---------------------------
kono
parents:
diff changeset
67
kono
parents:
diff changeset
68 procedure Decode_Wide_Character
kono
parents:
diff changeset
69 (Input : String;
kono
parents:
diff changeset
70 Ptr : in out Natural;
kono
parents:
diff changeset
71 Result : out Wide_Character)
kono
parents:
diff changeset
72 is
kono
parents:
diff changeset
73 Char : Wide_Wide_Character;
kono
parents:
diff changeset
74 begin
kono
parents:
diff changeset
75 Decode_Wide_Wide_Character (Input, Ptr, Char);
kono
parents:
diff changeset
76
kono
parents:
diff changeset
77 if Wide_Wide_Character'Pos (Char) > 16#FFFF# then
kono
parents:
diff changeset
78 Bad;
kono
parents:
diff changeset
79 else
kono
parents:
diff changeset
80 Result := Wide_Character'Val (Wide_Wide_Character'Pos (Char));
kono
parents:
diff changeset
81 end if;
kono
parents:
diff changeset
82 end Decode_Wide_Character;
kono
parents:
diff changeset
83
kono
parents:
diff changeset
84 ------------------------
kono
parents:
diff changeset
85 -- Decode_Wide_String --
kono
parents:
diff changeset
86 ------------------------
kono
parents:
diff changeset
87
kono
parents:
diff changeset
88 function Decode_Wide_String (S : String) return Wide_String is
kono
parents:
diff changeset
89 Result : Wide_String (1 .. S'Length);
kono
parents:
diff changeset
90 Length : Natural;
kono
parents:
diff changeset
91 begin
kono
parents:
diff changeset
92 Decode_Wide_String (S, Result, Length);
kono
parents:
diff changeset
93 return Result (1 .. Length);
kono
parents:
diff changeset
94 end Decode_Wide_String;
kono
parents:
diff changeset
95
kono
parents:
diff changeset
96 procedure Decode_Wide_String
kono
parents:
diff changeset
97 (S : String;
kono
parents:
diff changeset
98 Result : out Wide_String;
kono
parents:
diff changeset
99 Length : out Natural)
kono
parents:
diff changeset
100 is
kono
parents:
diff changeset
101 Ptr : Natural;
kono
parents:
diff changeset
102
kono
parents:
diff changeset
103 begin
kono
parents:
diff changeset
104 Ptr := S'First;
kono
parents:
diff changeset
105 Length := 0;
kono
parents:
diff changeset
106 while Ptr <= S'Last loop
kono
parents:
diff changeset
107 if Length >= Result'Last then
kono
parents:
diff changeset
108 Past_End;
kono
parents:
diff changeset
109 end if;
kono
parents:
diff changeset
110
kono
parents:
diff changeset
111 Length := Length + 1;
kono
parents:
diff changeset
112 Decode_Wide_Character (S, Ptr, Result (Length));
kono
parents:
diff changeset
113 end loop;
kono
parents:
diff changeset
114 end Decode_Wide_String;
kono
parents:
diff changeset
115
kono
parents:
diff changeset
116 --------------------------------
kono
parents:
diff changeset
117 -- Decode_Wide_Wide_Character --
kono
parents:
diff changeset
118 --------------------------------
kono
parents:
diff changeset
119
kono
parents:
diff changeset
120 procedure Decode_Wide_Wide_Character
kono
parents:
diff changeset
121 (Input : String;
kono
parents:
diff changeset
122 Ptr : in out Natural;
kono
parents:
diff changeset
123 Result : out Wide_Wide_Character)
kono
parents:
diff changeset
124 is
kono
parents:
diff changeset
125 C : Character;
kono
parents:
diff changeset
126
kono
parents:
diff changeset
127 function In_Char return Character;
kono
parents:
diff changeset
128 pragma Inline (In_Char);
kono
parents:
diff changeset
129 -- Function to get one input character
kono
parents:
diff changeset
130
kono
parents:
diff changeset
131 -------------
kono
parents:
diff changeset
132 -- In_Char --
kono
parents:
diff changeset
133 -------------
kono
parents:
diff changeset
134
kono
parents:
diff changeset
135 function In_Char return Character is
kono
parents:
diff changeset
136 begin
kono
parents:
diff changeset
137 if Ptr <= Input'Last then
kono
parents:
diff changeset
138 Ptr := Ptr + 1;
kono
parents:
diff changeset
139 return Input (Ptr - 1);
kono
parents:
diff changeset
140 else
kono
parents:
diff changeset
141 Past_End;
kono
parents:
diff changeset
142 end if;
kono
parents:
diff changeset
143 end In_Char;
kono
parents:
diff changeset
144
kono
parents:
diff changeset
145 -- Start of processing for Decode_Wide_Wide_Character
kono
parents:
diff changeset
146
kono
parents:
diff changeset
147 begin
kono
parents:
diff changeset
148 C := In_Char;
kono
parents:
diff changeset
149
kono
parents:
diff changeset
150 -- Special fast processing for UTF-8 case
kono
parents:
diff changeset
151
kono
parents:
diff changeset
152 if Encoding_Method = WCEM_UTF8 then
kono
parents:
diff changeset
153 UTF8 : declare
kono
parents:
diff changeset
154 U : Unsigned_32;
kono
parents:
diff changeset
155 W : Unsigned_32;
kono
parents:
diff changeset
156
kono
parents:
diff changeset
157 procedure Get_UTF_Byte;
kono
parents:
diff changeset
158 pragma Inline (Get_UTF_Byte);
kono
parents:
diff changeset
159 -- Used to interpret 2#10xxxxxx# continuation byte in UTF-8 mode.
kono
parents:
diff changeset
160 -- Reads a byte, and raises CE if the first two bits are not 10.
kono
parents:
diff changeset
161 -- Otherwise shifts W 6 bits left and or's in the 6 xxxxxx bits.
kono
parents:
diff changeset
162
kono
parents:
diff changeset
163 ------------------
kono
parents:
diff changeset
164 -- Get_UTF_Byte --
kono
parents:
diff changeset
165 ------------------
kono
parents:
diff changeset
166
kono
parents:
diff changeset
167 procedure Get_UTF_Byte is
kono
parents:
diff changeset
168 begin
kono
parents:
diff changeset
169 U := Unsigned_32 (Character'Pos (In_Char));
kono
parents:
diff changeset
170
kono
parents:
diff changeset
171 if (U and 2#11000000#) /= 2#10_000000# then
kono
parents:
diff changeset
172 Bad;
kono
parents:
diff changeset
173 end if;
kono
parents:
diff changeset
174
kono
parents:
diff changeset
175 W := Shift_Left (W, 6) or (U and 2#00111111#);
kono
parents:
diff changeset
176 end Get_UTF_Byte;
kono
parents:
diff changeset
177
kono
parents:
diff changeset
178 -- Start of processing for UTF8 case
kono
parents:
diff changeset
179
kono
parents:
diff changeset
180 begin
kono
parents:
diff changeset
181 -- Note: for details of UTF8 encoding see RFC 3629
kono
parents:
diff changeset
182
kono
parents:
diff changeset
183 U := Unsigned_32 (Character'Pos (C));
kono
parents:
diff changeset
184
kono
parents:
diff changeset
185 -- 16#00_0000#-16#00_007F#: 0xxxxxxx
kono
parents:
diff changeset
186
kono
parents:
diff changeset
187 if (U and 2#10000000#) = 2#00000000# then
kono
parents:
diff changeset
188 Result := Wide_Wide_Character'Val (Character'Pos (C));
kono
parents:
diff changeset
189
kono
parents:
diff changeset
190 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
kono
parents:
diff changeset
191
kono
parents:
diff changeset
192 elsif (U and 2#11100000#) = 2#110_00000# then
kono
parents:
diff changeset
193 W := U and 2#00011111#;
kono
parents:
diff changeset
194 Get_UTF_Byte;
kono
parents:
diff changeset
195
kono
parents:
diff changeset
196 if W not in 16#00_0080# .. 16#00_07FF# then
kono
parents:
diff changeset
197 Bad;
kono
parents:
diff changeset
198 end if;
kono
parents:
diff changeset
199
kono
parents:
diff changeset
200 Result := Wide_Wide_Character'Val (W);
kono
parents:
diff changeset
201
kono
parents:
diff changeset
202 -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
203
kono
parents:
diff changeset
204 elsif (U and 2#11110000#) = 2#1110_0000# then
kono
parents:
diff changeset
205 W := U and 2#00001111#;
kono
parents:
diff changeset
206 Get_UTF_Byte;
kono
parents:
diff changeset
207 Get_UTF_Byte;
kono
parents:
diff changeset
208
kono
parents:
diff changeset
209 if W not in 16#00_0800# .. 16#00_FFFF# then
kono
parents:
diff changeset
210 Bad;
kono
parents:
diff changeset
211 end if;
kono
parents:
diff changeset
212
kono
parents:
diff changeset
213 Result := Wide_Wide_Character'Val (W);
kono
parents:
diff changeset
214
kono
parents:
diff changeset
215 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
216
kono
parents:
diff changeset
217 elsif (U and 2#11111000#) = 2#11110_000# then
kono
parents:
diff changeset
218 W := U and 2#00000111#;
kono
parents:
diff changeset
219
kono
parents:
diff changeset
220 for K in 1 .. 3 loop
kono
parents:
diff changeset
221 Get_UTF_Byte;
kono
parents:
diff changeset
222 end loop;
kono
parents:
diff changeset
223
kono
parents:
diff changeset
224 if W not in 16#01_0000# .. 16#10_FFFF# then
kono
parents:
diff changeset
225 Bad;
kono
parents:
diff changeset
226 end if;
kono
parents:
diff changeset
227
kono
parents:
diff changeset
228 Result := Wide_Wide_Character'Val (W);
kono
parents:
diff changeset
229
kono
parents:
diff changeset
230 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
231 -- 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
232
kono
parents:
diff changeset
233 elsif (U and 2#11111100#) = 2#111110_00# then
kono
parents:
diff changeset
234 W := U and 2#00000011#;
kono
parents:
diff changeset
235
kono
parents:
diff changeset
236 for K in 1 .. 4 loop
kono
parents:
diff changeset
237 Get_UTF_Byte;
kono
parents:
diff changeset
238 end loop;
kono
parents:
diff changeset
239
kono
parents:
diff changeset
240 if W not in 16#0020_0000# .. 16#03FF_FFFF# then
kono
parents:
diff changeset
241 Bad;
kono
parents:
diff changeset
242 end if;
kono
parents:
diff changeset
243
kono
parents:
diff changeset
244 Result := Wide_Wide_Character'Val (W);
kono
parents:
diff changeset
245
kono
parents:
diff changeset
246 -- All other cases are invalid, note that this includes:
kono
parents:
diff changeset
247
kono
parents:
diff changeset
248 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
249 -- 10xxxxxx 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
250
kono
parents:
diff changeset
251 -- since Wide_Wide_Character does not include code values
kono
parents:
diff changeset
252 -- greater than 16#03FF_FFFF#.
kono
parents:
diff changeset
253
kono
parents:
diff changeset
254 else
kono
parents:
diff changeset
255 Bad;
kono
parents:
diff changeset
256 end if;
kono
parents:
diff changeset
257 end UTF8;
kono
parents:
diff changeset
258
kono
parents:
diff changeset
259 -- All encoding functions other than UTF-8
kono
parents:
diff changeset
260
kono
parents:
diff changeset
261 else
kono
parents:
diff changeset
262 Non_UTF8 : declare
kono
parents:
diff changeset
263 function Char_Sequence_To_UTF is
kono
parents:
diff changeset
264 new Char_Sequence_To_UTF_32 (In_Char);
kono
parents:
diff changeset
265
kono
parents:
diff changeset
266 begin
kono
parents:
diff changeset
267 -- For brackets, must test for specific case of [ not followed by
kono
parents:
diff changeset
268 -- quotation, where we must not call Char_Sequence_To_UTF, but
kono
parents:
diff changeset
269 -- instead just return the bracket unchanged.
kono
parents:
diff changeset
270
kono
parents:
diff changeset
271 if Encoding_Method = WCEM_Brackets
kono
parents:
diff changeset
272 and then C = '['
kono
parents:
diff changeset
273 and then (Ptr > Input'Last or else Input (Ptr) /= '"')
kono
parents:
diff changeset
274 then
kono
parents:
diff changeset
275 Result := '[';
kono
parents:
diff changeset
276
kono
parents:
diff changeset
277 -- All other cases including [" with Brackets
kono
parents:
diff changeset
278
kono
parents:
diff changeset
279 else
kono
parents:
diff changeset
280 Result :=
kono
parents:
diff changeset
281 Wide_Wide_Character'Val
kono
parents:
diff changeset
282 (Char_Sequence_To_UTF (C, Encoding_Method));
kono
parents:
diff changeset
283 end if;
kono
parents:
diff changeset
284 end Non_UTF8;
kono
parents:
diff changeset
285 end if;
kono
parents:
diff changeset
286 end Decode_Wide_Wide_Character;
kono
parents:
diff changeset
287
kono
parents:
diff changeset
288 -----------------------------
kono
parents:
diff changeset
289 -- Decode_Wide_Wide_String --
kono
parents:
diff changeset
290 -----------------------------
kono
parents:
diff changeset
291
kono
parents:
diff changeset
292 function Decode_Wide_Wide_String (S : String) return Wide_Wide_String is
kono
parents:
diff changeset
293 Result : Wide_Wide_String (1 .. S'Length);
kono
parents:
diff changeset
294 Length : Natural;
kono
parents:
diff changeset
295 begin
kono
parents:
diff changeset
296 Decode_Wide_Wide_String (S, Result, Length);
kono
parents:
diff changeset
297 return Result (1 .. Length);
kono
parents:
diff changeset
298 end Decode_Wide_Wide_String;
kono
parents:
diff changeset
299
kono
parents:
diff changeset
300 procedure Decode_Wide_Wide_String
kono
parents:
diff changeset
301 (S : String;
kono
parents:
diff changeset
302 Result : out Wide_Wide_String;
kono
parents:
diff changeset
303 Length : out Natural)
kono
parents:
diff changeset
304 is
kono
parents:
diff changeset
305 Ptr : Natural;
kono
parents:
diff changeset
306
kono
parents:
diff changeset
307 begin
kono
parents:
diff changeset
308 Ptr := S'First;
kono
parents:
diff changeset
309 Length := 0;
kono
parents:
diff changeset
310 while Ptr <= S'Last loop
kono
parents:
diff changeset
311 if Length >= Result'Last then
kono
parents:
diff changeset
312 Past_End;
kono
parents:
diff changeset
313 end if;
kono
parents:
diff changeset
314
kono
parents:
diff changeset
315 Length := Length + 1;
kono
parents:
diff changeset
316 Decode_Wide_Wide_Character (S, Ptr, Result (Length));
kono
parents:
diff changeset
317 end loop;
kono
parents:
diff changeset
318 end Decode_Wide_Wide_String;
kono
parents:
diff changeset
319
kono
parents:
diff changeset
320 -------------------------
kono
parents:
diff changeset
321 -- Next_Wide_Character --
kono
parents:
diff changeset
322 -------------------------
kono
parents:
diff changeset
323
kono
parents:
diff changeset
324 procedure Next_Wide_Character (Input : String; Ptr : in out Natural) is
kono
parents:
diff changeset
325 Discard : Wide_Character;
kono
parents:
diff changeset
326 begin
kono
parents:
diff changeset
327 Decode_Wide_Character (Input, Ptr, Discard);
kono
parents:
diff changeset
328 end Next_Wide_Character;
kono
parents:
diff changeset
329
kono
parents:
diff changeset
330 ------------------------------
kono
parents:
diff changeset
331 -- Next_Wide_Wide_Character --
kono
parents:
diff changeset
332 ------------------------------
kono
parents:
diff changeset
333
kono
parents:
diff changeset
334 procedure Next_Wide_Wide_Character (Input : String; Ptr : in out Natural) is
kono
parents:
diff changeset
335 Discard : Wide_Wide_Character;
kono
parents:
diff changeset
336 begin
kono
parents:
diff changeset
337 Decode_Wide_Wide_Character (Input, Ptr, Discard);
kono
parents:
diff changeset
338 end Next_Wide_Wide_Character;
kono
parents:
diff changeset
339
kono
parents:
diff changeset
340 --------------
kono
parents:
diff changeset
341 -- Past_End --
kono
parents:
diff changeset
342 --------------
kono
parents:
diff changeset
343
kono
parents:
diff changeset
344 procedure Past_End is
kono
parents:
diff changeset
345 begin
kono
parents:
diff changeset
346 raise Constraint_Error with "past end of string";
kono
parents:
diff changeset
347 end Past_End;
kono
parents:
diff changeset
348
kono
parents:
diff changeset
349 -------------------------
kono
parents:
diff changeset
350 -- Prev_Wide_Character --
kono
parents:
diff changeset
351 -------------------------
kono
parents:
diff changeset
352
kono
parents:
diff changeset
353 procedure Prev_Wide_Character (Input : String; Ptr : in out Natural) is
kono
parents:
diff changeset
354 begin
kono
parents:
diff changeset
355 if Ptr > Input'Last + 1 then
kono
parents:
diff changeset
356 Past_End;
kono
parents:
diff changeset
357 end if;
kono
parents:
diff changeset
358
kono
parents:
diff changeset
359 -- Special efficient encoding for UTF-8 case
kono
parents:
diff changeset
360
kono
parents:
diff changeset
361 if Encoding_Method = WCEM_UTF8 then
kono
parents:
diff changeset
362 UTF8 : declare
kono
parents:
diff changeset
363 U : Unsigned_32;
kono
parents:
diff changeset
364
kono
parents:
diff changeset
365 procedure Getc;
kono
parents:
diff changeset
366 pragma Inline (Getc);
kono
parents:
diff changeset
367 -- Gets the character at Input (Ptr - 1) and returns code in U as
kono
parents:
diff changeset
368 -- Unsigned_32 value. On return Ptr is decremented by one.
kono
parents:
diff changeset
369
kono
parents:
diff changeset
370 procedure Skip_UTF_Byte;
kono
parents:
diff changeset
371 pragma Inline (Skip_UTF_Byte);
kono
parents:
diff changeset
372 -- Checks that U is 2#10xxxxxx# and then calls Get
kono
parents:
diff changeset
373
kono
parents:
diff changeset
374 ----------
kono
parents:
diff changeset
375 -- Getc --
kono
parents:
diff changeset
376 ----------
kono
parents:
diff changeset
377
kono
parents:
diff changeset
378 procedure Getc is
kono
parents:
diff changeset
379 begin
kono
parents:
diff changeset
380 if Ptr <= Input'First then
kono
parents:
diff changeset
381 Past_End;
kono
parents:
diff changeset
382 else
kono
parents:
diff changeset
383 Ptr := Ptr - 1;
kono
parents:
diff changeset
384 U := Unsigned_32 (Character'Pos (Input (Ptr)));
kono
parents:
diff changeset
385 end if;
kono
parents:
diff changeset
386 end Getc;
kono
parents:
diff changeset
387
kono
parents:
diff changeset
388 -------------------
kono
parents:
diff changeset
389 -- Skip_UTF_Byte --
kono
parents:
diff changeset
390 -------------------
kono
parents:
diff changeset
391
kono
parents:
diff changeset
392 procedure Skip_UTF_Byte is
kono
parents:
diff changeset
393 begin
kono
parents:
diff changeset
394 if (U and 2#11000000#) = 2#10_000000# then
kono
parents:
diff changeset
395 Getc;
kono
parents:
diff changeset
396 else
kono
parents:
diff changeset
397 Bad;
kono
parents:
diff changeset
398 end if;
kono
parents:
diff changeset
399 end Skip_UTF_Byte;
kono
parents:
diff changeset
400
kono
parents:
diff changeset
401 -- Start of processing for UTF-8 case
kono
parents:
diff changeset
402
kono
parents:
diff changeset
403 begin
kono
parents:
diff changeset
404 -- 16#00_0000#-16#00_007F#: 0xxxxxxx
kono
parents:
diff changeset
405
kono
parents:
diff changeset
406 Getc;
kono
parents:
diff changeset
407
kono
parents:
diff changeset
408 if (U and 2#10000000#) = 2#00000000# then
kono
parents:
diff changeset
409 return;
kono
parents:
diff changeset
410
kono
parents:
diff changeset
411 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
kono
parents:
diff changeset
412
kono
parents:
diff changeset
413 else
kono
parents:
diff changeset
414 Skip_UTF_Byte;
kono
parents:
diff changeset
415
kono
parents:
diff changeset
416 if (U and 2#11100000#) = 2#110_00000# then
kono
parents:
diff changeset
417 return;
kono
parents:
diff changeset
418
kono
parents:
diff changeset
419 -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
420
kono
parents:
diff changeset
421 else
kono
parents:
diff changeset
422 Skip_UTF_Byte;
kono
parents:
diff changeset
423
kono
parents:
diff changeset
424 if (U and 2#11110000#) = 2#1110_0000# then
kono
parents:
diff changeset
425 return;
kono
parents:
diff changeset
426
kono
parents:
diff changeset
427 -- Any other code is invalid, note that this includes:
kono
parents:
diff changeset
428
kono
parents:
diff changeset
429 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
430 -- 10xxxxxx
kono
parents:
diff changeset
431
kono
parents:
diff changeset
432 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx
kono
parents:
diff changeset
433 -- 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
434 -- 10xxxxxx
kono
parents:
diff changeset
435
kono
parents:
diff changeset
436 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx
kono
parents:
diff changeset
437 -- 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
438 -- 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
439
kono
parents:
diff changeset
440 -- since Wide_Character does not allow codes > 16#FFFF#
kono
parents:
diff changeset
441
kono
parents:
diff changeset
442 else
kono
parents:
diff changeset
443 Bad;
kono
parents:
diff changeset
444 end if;
kono
parents:
diff changeset
445 end if;
kono
parents:
diff changeset
446 end if;
kono
parents:
diff changeset
447 end UTF8;
kono
parents:
diff changeset
448
kono
parents:
diff changeset
449 -- Special efficient encoding for brackets case
kono
parents:
diff changeset
450
kono
parents:
diff changeset
451 elsif Encoding_Method = WCEM_Brackets then
kono
parents:
diff changeset
452 Brackets : declare
kono
parents:
diff changeset
453 P : Natural;
kono
parents:
diff changeset
454 S : Natural;
kono
parents:
diff changeset
455
kono
parents:
diff changeset
456 begin
kono
parents:
diff changeset
457 -- See if we have "] at end positions
kono
parents:
diff changeset
458
kono
parents:
diff changeset
459 if Ptr > Input'First + 1
kono
parents:
diff changeset
460 and then Input (Ptr - 1) = ']'
kono
parents:
diff changeset
461 and then Input (Ptr - 2) = '"'
kono
parents:
diff changeset
462 then
kono
parents:
diff changeset
463 P := Ptr - 2;
kono
parents:
diff changeset
464
kono
parents:
diff changeset
465 -- Loop back looking for [" at start
kono
parents:
diff changeset
466
kono
parents:
diff changeset
467 while P >= Ptr - 10 loop
kono
parents:
diff changeset
468 if P <= Input'First + 1 then
kono
parents:
diff changeset
469 Bad;
kono
parents:
diff changeset
470
kono
parents:
diff changeset
471 elsif Input (P - 1) = '"'
kono
parents:
diff changeset
472 and then Input (P - 2) = '['
kono
parents:
diff changeset
473 then
kono
parents:
diff changeset
474 -- Found ["..."], scan forward to check it
kono
parents:
diff changeset
475
kono
parents:
diff changeset
476 S := P - 2;
kono
parents:
diff changeset
477 P := S;
kono
parents:
diff changeset
478 Next_Wide_Character (Input, P);
kono
parents:
diff changeset
479
kono
parents:
diff changeset
480 -- OK if at original pointer, else error
kono
parents:
diff changeset
481
kono
parents:
diff changeset
482 if P = Ptr then
kono
parents:
diff changeset
483 Ptr := S;
kono
parents:
diff changeset
484 return;
kono
parents:
diff changeset
485 else
kono
parents:
diff changeset
486 Bad;
kono
parents:
diff changeset
487 end if;
kono
parents:
diff changeset
488 end if;
kono
parents:
diff changeset
489
kono
parents:
diff changeset
490 P := P - 1;
kono
parents:
diff changeset
491 end loop;
kono
parents:
diff changeset
492
kono
parents:
diff changeset
493 -- Falling through loop means more than 8 chars between the
kono
parents:
diff changeset
494 -- enclosing brackets (or simply a missing left bracket)
kono
parents:
diff changeset
495
kono
parents:
diff changeset
496 Bad;
kono
parents:
diff changeset
497
kono
parents:
diff changeset
498 -- Here if no bracket sequence present
kono
parents:
diff changeset
499
kono
parents:
diff changeset
500 else
kono
parents:
diff changeset
501 if Ptr = Input'First then
kono
parents:
diff changeset
502 Past_End;
kono
parents:
diff changeset
503 else
kono
parents:
diff changeset
504 Ptr := Ptr - 1;
kono
parents:
diff changeset
505 end if;
kono
parents:
diff changeset
506 end if;
kono
parents:
diff changeset
507 end Brackets;
kono
parents:
diff changeset
508
kono
parents:
diff changeset
509 -- Non-UTF-8/Brackets. These are the inefficient cases where we have to
kono
parents:
diff changeset
510 -- go to the start of the string and skip forwards till Ptr matches.
kono
parents:
diff changeset
511
kono
parents:
diff changeset
512 else
kono
parents:
diff changeset
513 Non_UTF_Brackets : declare
kono
parents:
diff changeset
514 Discard : Wide_Character;
kono
parents:
diff changeset
515 PtrS : Natural;
kono
parents:
diff changeset
516 PtrP : Natural;
kono
parents:
diff changeset
517
kono
parents:
diff changeset
518 begin
kono
parents:
diff changeset
519 PtrS := Input'First;
kono
parents:
diff changeset
520
kono
parents:
diff changeset
521 if Ptr <= PtrS then
kono
parents:
diff changeset
522 Past_End;
kono
parents:
diff changeset
523 end if;
kono
parents:
diff changeset
524
kono
parents:
diff changeset
525 loop
kono
parents:
diff changeset
526 PtrP := PtrS;
kono
parents:
diff changeset
527 Decode_Wide_Character (Input, PtrS, Discard);
kono
parents:
diff changeset
528
kono
parents:
diff changeset
529 if PtrS = Ptr then
kono
parents:
diff changeset
530 Ptr := PtrP;
kono
parents:
diff changeset
531 return;
kono
parents:
diff changeset
532
kono
parents:
diff changeset
533 elsif PtrS > Ptr then
kono
parents:
diff changeset
534 Bad;
kono
parents:
diff changeset
535 end if;
kono
parents:
diff changeset
536 end loop;
kono
parents:
diff changeset
537
kono
parents:
diff changeset
538 exception
kono
parents:
diff changeset
539 when Constraint_Error =>
kono
parents:
diff changeset
540 Bad;
kono
parents:
diff changeset
541 end Non_UTF_Brackets;
kono
parents:
diff changeset
542 end if;
kono
parents:
diff changeset
543 end Prev_Wide_Character;
kono
parents:
diff changeset
544
kono
parents:
diff changeset
545 ------------------------------
kono
parents:
diff changeset
546 -- Prev_Wide_Wide_Character --
kono
parents:
diff changeset
547 ------------------------------
kono
parents:
diff changeset
548
kono
parents:
diff changeset
549 procedure Prev_Wide_Wide_Character (Input : String; Ptr : in out Natural) is
kono
parents:
diff changeset
550 begin
kono
parents:
diff changeset
551 if Ptr > Input'Last + 1 then
kono
parents:
diff changeset
552 Past_End;
kono
parents:
diff changeset
553 end if;
kono
parents:
diff changeset
554
kono
parents:
diff changeset
555 -- Special efficient encoding for UTF-8 case
kono
parents:
diff changeset
556
kono
parents:
diff changeset
557 if Encoding_Method = WCEM_UTF8 then
kono
parents:
diff changeset
558 UTF8 : declare
kono
parents:
diff changeset
559 U : Unsigned_32;
kono
parents:
diff changeset
560
kono
parents:
diff changeset
561 procedure Getc;
kono
parents:
diff changeset
562 pragma Inline (Getc);
kono
parents:
diff changeset
563 -- Gets the character at Input (Ptr - 1) and returns code in U as
kono
parents:
diff changeset
564 -- Unsigned_32 value. On return Ptr is decremented by one.
kono
parents:
diff changeset
565
kono
parents:
diff changeset
566 procedure Skip_UTF_Byte;
kono
parents:
diff changeset
567 pragma Inline (Skip_UTF_Byte);
kono
parents:
diff changeset
568 -- Checks that U is 2#10xxxxxx# and then calls Get
kono
parents:
diff changeset
569
kono
parents:
diff changeset
570 ----------
kono
parents:
diff changeset
571 -- Getc --
kono
parents:
diff changeset
572 ----------
kono
parents:
diff changeset
573
kono
parents:
diff changeset
574 procedure Getc is
kono
parents:
diff changeset
575 begin
kono
parents:
diff changeset
576 if Ptr <= Input'First then
kono
parents:
diff changeset
577 Past_End;
kono
parents:
diff changeset
578 else
kono
parents:
diff changeset
579 Ptr := Ptr - 1;
kono
parents:
diff changeset
580 U := Unsigned_32 (Character'Pos (Input (Ptr)));
kono
parents:
diff changeset
581 end if;
kono
parents:
diff changeset
582 end Getc;
kono
parents:
diff changeset
583
kono
parents:
diff changeset
584 -------------------
kono
parents:
diff changeset
585 -- Skip_UTF_Byte --
kono
parents:
diff changeset
586 -------------------
kono
parents:
diff changeset
587
kono
parents:
diff changeset
588 procedure Skip_UTF_Byte is
kono
parents:
diff changeset
589 begin
kono
parents:
diff changeset
590 if (U and 2#11000000#) = 2#10_000000# then
kono
parents:
diff changeset
591 Getc;
kono
parents:
diff changeset
592 else
kono
parents:
diff changeset
593 Bad;
kono
parents:
diff changeset
594 end if;
kono
parents:
diff changeset
595 end Skip_UTF_Byte;
kono
parents:
diff changeset
596
kono
parents:
diff changeset
597 -- Start of processing for UTF-8 case
kono
parents:
diff changeset
598
kono
parents:
diff changeset
599 begin
kono
parents:
diff changeset
600 -- 16#00_0000#-16#00_007F#: 0xxxxxxx
kono
parents:
diff changeset
601
kono
parents:
diff changeset
602 Getc;
kono
parents:
diff changeset
603
kono
parents:
diff changeset
604 if (U and 2#10000000#) = 2#00000000# then
kono
parents:
diff changeset
605 return;
kono
parents:
diff changeset
606
kono
parents:
diff changeset
607 -- 16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx
kono
parents:
diff changeset
608
kono
parents:
diff changeset
609 else
kono
parents:
diff changeset
610 Skip_UTF_Byte;
kono
parents:
diff changeset
611
kono
parents:
diff changeset
612 if (U and 2#11100000#) = 2#110_00000# then
kono
parents:
diff changeset
613 return;
kono
parents:
diff changeset
614
kono
parents:
diff changeset
615 -- 16#00_0800#-16#00_ffff#: 1110xxxx 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
616
kono
parents:
diff changeset
617 else
kono
parents:
diff changeset
618 Skip_UTF_Byte;
kono
parents:
diff changeset
619
kono
parents:
diff changeset
620 if (U and 2#11110000#) = 2#1110_0000# then
kono
parents:
diff changeset
621 return;
kono
parents:
diff changeset
622
kono
parents:
diff changeset
623 -- 16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
624 -- 10xxxxxx
kono
parents:
diff changeset
625
kono
parents:
diff changeset
626 else
kono
parents:
diff changeset
627 Skip_UTF_Byte;
kono
parents:
diff changeset
628
kono
parents:
diff changeset
629 if (U and 2#11111000#) = 2#11110_000# then
kono
parents:
diff changeset
630 return;
kono
parents:
diff changeset
631
kono
parents:
diff changeset
632 -- 16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx
kono
parents:
diff changeset
633 -- 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
634 -- 10xxxxxx
kono
parents:
diff changeset
635
kono
parents:
diff changeset
636 else
kono
parents:
diff changeset
637 Skip_UTF_Byte;
kono
parents:
diff changeset
638
kono
parents:
diff changeset
639 if (U and 2#11111100#) = 2#111110_00# then
kono
parents:
diff changeset
640 return;
kono
parents:
diff changeset
641
kono
parents:
diff changeset
642 -- Any other code is invalid, note that this includes:
kono
parents:
diff changeset
643
kono
parents:
diff changeset
644 -- 16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx
kono
parents:
diff changeset
645 -- 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
646 -- 10xxxxxx 10xxxxxx
kono
parents:
diff changeset
647
kono
parents:
diff changeset
648 -- since Wide_Wide_Character does not allow codes
kono
parents:
diff changeset
649 -- greater than 16#03FF_FFFF#
kono
parents:
diff changeset
650
kono
parents:
diff changeset
651 else
kono
parents:
diff changeset
652 Bad;
kono
parents:
diff changeset
653 end if;
kono
parents:
diff changeset
654 end if;
kono
parents:
diff changeset
655 end if;
kono
parents:
diff changeset
656 end if;
kono
parents:
diff changeset
657 end if;
kono
parents:
diff changeset
658 end UTF8;
kono
parents:
diff changeset
659
kono
parents:
diff changeset
660 -- Special efficient encoding for brackets case
kono
parents:
diff changeset
661
kono
parents:
diff changeset
662 elsif Encoding_Method = WCEM_Brackets then
kono
parents:
diff changeset
663 Brackets : declare
kono
parents:
diff changeset
664 P : Natural;
kono
parents:
diff changeset
665 S : Natural;
kono
parents:
diff changeset
666
kono
parents:
diff changeset
667 begin
kono
parents:
diff changeset
668 -- See if we have "] at end positions
kono
parents:
diff changeset
669
kono
parents:
diff changeset
670 if Ptr > Input'First + 1
kono
parents:
diff changeset
671 and then Input (Ptr - 1) = ']'
kono
parents:
diff changeset
672 and then Input (Ptr - 2) = '"'
kono
parents:
diff changeset
673 then
kono
parents:
diff changeset
674 P := Ptr - 2;
kono
parents:
diff changeset
675
kono
parents:
diff changeset
676 -- Loop back looking for [" at start
kono
parents:
diff changeset
677
kono
parents:
diff changeset
678 while P >= Ptr - 10 loop
kono
parents:
diff changeset
679 if P <= Input'First + 1 then
kono
parents:
diff changeset
680 Bad;
kono
parents:
diff changeset
681
kono
parents:
diff changeset
682 elsif Input (P - 1) = '"'
kono
parents:
diff changeset
683 and then Input (P - 2) = '['
kono
parents:
diff changeset
684 then
kono
parents:
diff changeset
685 -- Found ["..."], scan forward to check it
kono
parents:
diff changeset
686
kono
parents:
diff changeset
687 S := P - 2;
kono
parents:
diff changeset
688 P := S;
kono
parents:
diff changeset
689 Next_Wide_Wide_Character (Input, P);
kono
parents:
diff changeset
690
kono
parents:
diff changeset
691 -- OK if at original pointer, else error
kono
parents:
diff changeset
692
kono
parents:
diff changeset
693 if P = Ptr then
kono
parents:
diff changeset
694 Ptr := S;
kono
parents:
diff changeset
695 return;
kono
parents:
diff changeset
696 else
kono
parents:
diff changeset
697 Bad;
kono
parents:
diff changeset
698 end if;
kono
parents:
diff changeset
699 end if;
kono
parents:
diff changeset
700
kono
parents:
diff changeset
701 P := P - 1;
kono
parents:
diff changeset
702 end loop;
kono
parents:
diff changeset
703
kono
parents:
diff changeset
704 -- Falling through loop means more than 8 chars between the
kono
parents:
diff changeset
705 -- enclosing brackets (or simply a missing left bracket)
kono
parents:
diff changeset
706
kono
parents:
diff changeset
707 Bad;
kono
parents:
diff changeset
708
kono
parents:
diff changeset
709 -- Here if no bracket sequence present
kono
parents:
diff changeset
710
kono
parents:
diff changeset
711 else
kono
parents:
diff changeset
712 if Ptr = Input'First then
kono
parents:
diff changeset
713 Past_End;
kono
parents:
diff changeset
714 else
kono
parents:
diff changeset
715 Ptr := Ptr - 1;
kono
parents:
diff changeset
716 end if;
kono
parents:
diff changeset
717 end if;
kono
parents:
diff changeset
718 end Brackets;
kono
parents:
diff changeset
719
kono
parents:
diff changeset
720 -- Non-UTF-8/Brackets. These are the inefficient cases where we have to
kono
parents:
diff changeset
721 -- go to the start of the string and skip forwards till Ptr matches.
kono
parents:
diff changeset
722
kono
parents:
diff changeset
723 else
kono
parents:
diff changeset
724 Non_UTF8_Brackets : declare
kono
parents:
diff changeset
725 Discard : Wide_Wide_Character;
kono
parents:
diff changeset
726 PtrS : Natural;
kono
parents:
diff changeset
727 PtrP : Natural;
kono
parents:
diff changeset
728
kono
parents:
diff changeset
729 begin
kono
parents:
diff changeset
730 PtrS := Input'First;
kono
parents:
diff changeset
731
kono
parents:
diff changeset
732 if Ptr <= PtrS then
kono
parents:
diff changeset
733 Past_End;
kono
parents:
diff changeset
734 end if;
kono
parents:
diff changeset
735
kono
parents:
diff changeset
736 loop
kono
parents:
diff changeset
737 PtrP := PtrS;
kono
parents:
diff changeset
738 Decode_Wide_Wide_Character (Input, PtrS, Discard);
kono
parents:
diff changeset
739
kono
parents:
diff changeset
740 if PtrS = Ptr then
kono
parents:
diff changeset
741 Ptr := PtrP;
kono
parents:
diff changeset
742 return;
kono
parents:
diff changeset
743
kono
parents:
diff changeset
744 elsif PtrS > Ptr then
kono
parents:
diff changeset
745 Bad;
kono
parents:
diff changeset
746 end if;
kono
parents:
diff changeset
747 end loop;
kono
parents:
diff changeset
748
kono
parents:
diff changeset
749 exception
kono
parents:
diff changeset
750 when Constraint_Error =>
kono
parents:
diff changeset
751 Bad;
kono
parents:
diff changeset
752 end Non_UTF8_Brackets;
kono
parents:
diff changeset
753 end if;
kono
parents:
diff changeset
754 end Prev_Wide_Wide_Character;
kono
parents:
diff changeset
755
kono
parents:
diff changeset
756 --------------------------
kono
parents:
diff changeset
757 -- Validate_Wide_String --
kono
parents:
diff changeset
758 --------------------------
kono
parents:
diff changeset
759
kono
parents:
diff changeset
760 function Validate_Wide_String (S : String) return Boolean is
kono
parents:
diff changeset
761 Ptr : Natural;
kono
parents:
diff changeset
762
kono
parents:
diff changeset
763 begin
kono
parents:
diff changeset
764 Ptr := S'First;
kono
parents:
diff changeset
765 while Ptr <= S'Last loop
kono
parents:
diff changeset
766 Next_Wide_Character (S, Ptr);
kono
parents:
diff changeset
767 end loop;
kono
parents:
diff changeset
768
kono
parents:
diff changeset
769 return True;
kono
parents:
diff changeset
770
kono
parents:
diff changeset
771 exception
kono
parents:
diff changeset
772 when Constraint_Error =>
kono
parents:
diff changeset
773 return False;
kono
parents:
diff changeset
774 end Validate_Wide_String;
kono
parents:
diff changeset
775
kono
parents:
diff changeset
776 -------------------------------
kono
parents:
diff changeset
777 -- Validate_Wide_Wide_String --
kono
parents:
diff changeset
778 -------------------------------
kono
parents:
diff changeset
779
kono
parents:
diff changeset
780 function Validate_Wide_Wide_String (S : String) return Boolean is
kono
parents:
diff changeset
781 Ptr : Natural;
kono
parents:
diff changeset
782
kono
parents:
diff changeset
783 begin
kono
parents:
diff changeset
784 Ptr := S'First;
kono
parents:
diff changeset
785 while Ptr <= S'Last loop
kono
parents:
diff changeset
786 Next_Wide_Wide_Character (S, Ptr);
kono
parents:
diff changeset
787 end loop;
kono
parents:
diff changeset
788
kono
parents:
diff changeset
789 return True;
kono
parents:
diff changeset
790
kono
parents:
diff changeset
791 exception
kono
parents:
diff changeset
792 when Constraint_Error =>
kono
parents:
diff changeset
793 return False;
kono
parents:
diff changeset
794 end Validate_Wide_Wide_String;
kono
parents:
diff changeset
795
kono
parents:
diff changeset
796 end GNAT.Decode_String;