annotate gcc/ada/libgnat/a-suewst.adb @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
111
kono
parents:
diff changeset
1 ------------------------------------------------------------------------------
kono
parents:
diff changeset
2 -- --
kono
parents:
diff changeset
3 -- GNAT RUN-TIME COMPONENTS --
kono
parents:
diff changeset
4 -- --
kono
parents:
diff changeset
5 -- ADA.STRINGS.UTF_ENCODING.WIDE_STRINGS --
kono
parents:
diff changeset
6 -- --
kono
parents:
diff changeset
7 -- B o d y --
kono
parents:
diff changeset
8 -- --
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
9 -- Copyright (C) 2010-2018, Free Software Foundation, Inc. --
111
kono
parents:
diff changeset
10 -- --
kono
parents:
diff changeset
11 -- GNAT is free software; you can redistribute it and/or modify it under --
kono
parents:
diff changeset
12 -- terms of the GNU General Public License as published by the Free Soft- --
kono
parents:
diff changeset
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
kono
parents:
diff changeset
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
kono
parents:
diff changeset
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
kono
parents:
diff changeset
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
kono
parents:
diff changeset
17 -- --
kono
parents:
diff changeset
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
kono
parents:
diff changeset
19 -- additional permissions described in the GCC Runtime Library Exception, --
kono
parents:
diff changeset
20 -- version 3.1, as published by the Free Software Foundation. --
kono
parents:
diff changeset
21 -- --
kono
parents:
diff changeset
22 -- You should have received a copy of the GNU General Public License and --
kono
parents:
diff changeset
23 -- a copy of the GCC Runtime Library Exception along with this program; --
kono
parents:
diff changeset
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
kono
parents:
diff changeset
25 -- <http://www.gnu.org/licenses/>. --
kono
parents:
diff changeset
26 -- --
kono
parents:
diff changeset
27 -- GNAT was originally developed by the GNAT team at New York University. --
kono
parents:
diff changeset
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
kono
parents:
diff changeset
29 -- --
kono
parents:
diff changeset
30 ------------------------------------------------------------------------------
kono
parents:
diff changeset
31
kono
parents:
diff changeset
32 package body Ada.Strings.UTF_Encoding.Wide_Strings is
kono
parents:
diff changeset
33 use Interfaces;
kono
parents:
diff changeset
34
kono
parents:
diff changeset
35 ------------
kono
parents:
diff changeset
36 -- Decode --
kono
parents:
diff changeset
37 ------------
kono
parents:
diff changeset
38
kono
parents:
diff changeset
39 -- Decode UTF-8/UTF-16BE/UTF-16LE input to Wide_String
kono
parents:
diff changeset
40
kono
parents:
diff changeset
41 function Decode
kono
parents:
diff changeset
42 (Item : UTF_String;
kono
parents:
diff changeset
43 Input_Scheme : Encoding_Scheme) return Wide_String
kono
parents:
diff changeset
44 is
kono
parents:
diff changeset
45 begin
kono
parents:
diff changeset
46 if Input_Scheme = UTF_8 then
kono
parents:
diff changeset
47 return Decode (Item);
kono
parents:
diff changeset
48 else
kono
parents:
diff changeset
49 return Decode (To_UTF_16 (Item, Input_Scheme));
kono
parents:
diff changeset
50 end if;
kono
parents:
diff changeset
51 end Decode;
kono
parents:
diff changeset
52
kono
parents:
diff changeset
53 -- Decode UTF-8 input to Wide_String
kono
parents:
diff changeset
54
kono
parents:
diff changeset
55 function Decode (Item : UTF_8_String) return Wide_String is
kono
parents:
diff changeset
56 Result : Wide_String (1 .. Item'Length);
kono
parents:
diff changeset
57 -- Result string (worst case is same length as input)
kono
parents:
diff changeset
58
kono
parents:
diff changeset
59 Len : Natural := 0;
kono
parents:
diff changeset
60 -- Length of result stored so far
kono
parents:
diff changeset
61
kono
parents:
diff changeset
62 Iptr : Natural;
kono
parents:
diff changeset
63 -- Input Item pointer
kono
parents:
diff changeset
64
kono
parents:
diff changeset
65 C : Unsigned_8;
kono
parents:
diff changeset
66 R : Unsigned_16;
kono
parents:
diff changeset
67
kono
parents:
diff changeset
68 procedure Get_Continuation;
kono
parents:
diff changeset
69 -- Reads a continuation byte of the form 10xxxxxx, shifts R left by 6
kono
parents:
diff changeset
70 -- bits, and or's in the xxxxxx to the low order 6 bits. On return Ptr
kono
parents:
diff changeset
71 -- is incremented. Raises exception if continuation byte does not exist
kono
parents:
diff changeset
72 -- or is invalid.
kono
parents:
diff changeset
73
kono
parents:
diff changeset
74 ----------------------
kono
parents:
diff changeset
75 -- Get_Continuation --
kono
parents:
diff changeset
76 ----------------------
kono
parents:
diff changeset
77
kono
parents:
diff changeset
78 procedure Get_Continuation is
kono
parents:
diff changeset
79 begin
kono
parents:
diff changeset
80 if Iptr > Item'Last then
kono
parents:
diff changeset
81 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
82
kono
parents:
diff changeset
83 else
kono
parents:
diff changeset
84 C := To_Unsigned_8 (Item (Iptr));
kono
parents:
diff changeset
85 Iptr := Iptr + 1;
kono
parents:
diff changeset
86
kono
parents:
diff changeset
87 if C not in 2#10_000000# .. 2#10_111111# then
kono
parents:
diff changeset
88 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
89 else
kono
parents:
diff changeset
90 R := Shift_Left (R, 6) or Unsigned_16 (C and 2#00_111111#);
kono
parents:
diff changeset
91 end if;
kono
parents:
diff changeset
92 end if;
kono
parents:
diff changeset
93 end Get_Continuation;
kono
parents:
diff changeset
94
kono
parents:
diff changeset
95 -- Start of processing for Decode
kono
parents:
diff changeset
96
kono
parents:
diff changeset
97 begin
kono
parents:
diff changeset
98 Iptr := Item'First;
kono
parents:
diff changeset
99
kono
parents:
diff changeset
100 -- Skip BOM at start
kono
parents:
diff changeset
101
kono
parents:
diff changeset
102 if Item'Length >= 3
kono
parents:
diff changeset
103 and then Item (Iptr .. Iptr + 2) = BOM_8
kono
parents:
diff changeset
104 then
kono
parents:
diff changeset
105 Iptr := Iptr + 3;
kono
parents:
diff changeset
106
kono
parents:
diff changeset
107 -- Error if bad BOM
kono
parents:
diff changeset
108
kono
parents:
diff changeset
109 elsif Item'Length >= 2
kono
parents:
diff changeset
110 and then (Item (Iptr .. Iptr + 1) = BOM_16BE
kono
parents:
diff changeset
111 or else
kono
parents:
diff changeset
112 Item (Iptr .. Iptr + 1) = BOM_16LE)
kono
parents:
diff changeset
113 then
kono
parents:
diff changeset
114 Raise_Encoding_Error (Iptr);
kono
parents:
diff changeset
115 end if;
kono
parents:
diff changeset
116
kono
parents:
diff changeset
117 while Iptr <= Item'Last loop
kono
parents:
diff changeset
118 C := To_Unsigned_8 (Item (Iptr));
kono
parents:
diff changeset
119 Iptr := Iptr + 1;
kono
parents:
diff changeset
120
kono
parents:
diff changeset
121 -- Codes in the range 16#00# - 16#7F# are represented as
kono
parents:
diff changeset
122 -- 0xxxxxxx
kono
parents:
diff changeset
123
kono
parents:
diff changeset
124 if C <= 16#7F# then
kono
parents:
diff changeset
125 R := Unsigned_16 (C);
kono
parents:
diff changeset
126
kono
parents:
diff changeset
127 -- No initial code can be of the form 10xxxxxx. Such codes are used
kono
parents:
diff changeset
128 -- only for continuations.
kono
parents:
diff changeset
129
kono
parents:
diff changeset
130 elsif C <= 2#10_111111# then
kono
parents:
diff changeset
131 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
132
kono
parents:
diff changeset
133 -- Codes in the range 16#80# - 16#7FF# are represented as
kono
parents:
diff changeset
134 -- 110yyyxx 10xxxxxx
kono
parents:
diff changeset
135
kono
parents:
diff changeset
136 elsif C <= 2#110_11111# then
kono
parents:
diff changeset
137 R := Unsigned_16 (C and 2#000_11111#);
kono
parents:
diff changeset
138 Get_Continuation;
kono
parents:
diff changeset
139
kono
parents:
diff changeset
140 -- Codes in the range 16#800# - 16#FFFF# are represented as
kono
parents:
diff changeset
141 -- 1110yyyy 10yyyyxx 10xxxxxx
kono
parents:
diff changeset
142
kono
parents:
diff changeset
143 elsif C <= 2#1110_1111# then
kono
parents:
diff changeset
144 R := Unsigned_16 (C and 2#0000_1111#);
kono
parents:
diff changeset
145 Get_Continuation;
kono
parents:
diff changeset
146 Get_Continuation;
kono
parents:
diff changeset
147
kono
parents:
diff changeset
148 -- Codes in the range 16#10000# - 16#10FFFF# are represented as
kono
parents:
diff changeset
149 -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
kono
parents:
diff changeset
150
kono
parents:
diff changeset
151 -- Such codes are out of range for Wide_String output
kono
parents:
diff changeset
152
kono
parents:
diff changeset
153 else
kono
parents:
diff changeset
154 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
155 end if;
kono
parents:
diff changeset
156
kono
parents:
diff changeset
157 Len := Len + 1;
kono
parents:
diff changeset
158 Result (Len) := Wide_Character'Val (R);
kono
parents:
diff changeset
159 end loop;
kono
parents:
diff changeset
160
kono
parents:
diff changeset
161 return Result (1 .. Len);
kono
parents:
diff changeset
162 end Decode;
kono
parents:
diff changeset
163
kono
parents:
diff changeset
164 -- Decode UTF-16 input to Wide_String
kono
parents:
diff changeset
165
kono
parents:
diff changeset
166 function Decode (Item : UTF_16_Wide_String) return Wide_String is
kono
parents:
diff changeset
167 Result : Wide_String (1 .. Item'Length);
kono
parents:
diff changeset
168 -- Result is same length as input (possibly minus 1 if BOM present)
kono
parents:
diff changeset
169
kono
parents:
diff changeset
170 Len : Natural := 0;
kono
parents:
diff changeset
171 -- Length of result
kono
parents:
diff changeset
172
kono
parents:
diff changeset
173 Iptr : Natural;
kono
parents:
diff changeset
174 -- Index of next Item element
kono
parents:
diff changeset
175
kono
parents:
diff changeset
176 C : Unsigned_16;
kono
parents:
diff changeset
177
kono
parents:
diff changeset
178 begin
kono
parents:
diff changeset
179 -- Skip UTF-16 BOM at start
kono
parents:
diff changeset
180
kono
parents:
diff changeset
181 Iptr := Item'First;
kono
parents:
diff changeset
182
kono
parents:
diff changeset
183 if Item'Length > 0 and then Item (Iptr) = BOM_16 (1) then
kono
parents:
diff changeset
184 Iptr := Iptr + 1;
kono
parents:
diff changeset
185 end if;
kono
parents:
diff changeset
186
kono
parents:
diff changeset
187 -- Loop through input characters
kono
parents:
diff changeset
188
kono
parents:
diff changeset
189 while Iptr <= Item'Last loop
kono
parents:
diff changeset
190 C := To_Unsigned_16 (Item (Iptr));
kono
parents:
diff changeset
191 Iptr := Iptr + 1;
kono
parents:
diff changeset
192
kono
parents:
diff changeset
193 -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD#
kono
parents:
diff changeset
194 -- represent their own value.
kono
parents:
diff changeset
195
kono
parents:
diff changeset
196 if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then
kono
parents:
diff changeset
197 Len := Len + 1;
kono
parents:
diff changeset
198 Result (Len) := Wide_Character'Val (C);
kono
parents:
diff changeset
199
kono
parents:
diff changeset
200 -- Codes in the range 16#D800#..16#DBFF# represent the first of the
kono
parents:
diff changeset
201 -- two surrogates used to encode the range 16#01_000#..16#10_FFFF".
kono
parents:
diff changeset
202 -- Such codes are out of range for 16-bit output.
kono
parents:
diff changeset
203
kono
parents:
diff changeset
204 -- The case of input in the range 16#DC00#..16#DFFF# must never
kono
parents:
diff changeset
205 -- occur, since it means we have a second surrogate character with
kono
parents:
diff changeset
206 -- no corresponding first surrogate.
kono
parents:
diff changeset
207
kono
parents:
diff changeset
208 -- Codes in the range 16#FFFE# .. 16#FFFF# are also invalid since
kono
parents:
diff changeset
209 -- they conflict with codes used for BOM values.
kono
parents:
diff changeset
210
kono
parents:
diff changeset
211 -- Thus all remaining codes are invalid
kono
parents:
diff changeset
212
kono
parents:
diff changeset
213 else
kono
parents:
diff changeset
214 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
215 end if;
kono
parents:
diff changeset
216 end loop;
kono
parents:
diff changeset
217
kono
parents:
diff changeset
218 return Result (1 .. Len);
kono
parents:
diff changeset
219 end Decode;
kono
parents:
diff changeset
220
kono
parents:
diff changeset
221 ------------
kono
parents:
diff changeset
222 -- Encode --
kono
parents:
diff changeset
223 ------------
kono
parents:
diff changeset
224
kono
parents:
diff changeset
225 -- Encode Wide_String in UTF-8, UTF-16BE or UTF-16LE
kono
parents:
diff changeset
226
kono
parents:
diff changeset
227 function Encode
kono
parents:
diff changeset
228 (Item : Wide_String;
kono
parents:
diff changeset
229 Output_Scheme : Encoding_Scheme;
kono
parents:
diff changeset
230 Output_BOM : Boolean := False) return UTF_String
kono
parents:
diff changeset
231 is
kono
parents:
diff changeset
232 begin
kono
parents:
diff changeset
233 -- Case of UTF_8
kono
parents:
diff changeset
234
kono
parents:
diff changeset
235 if Output_Scheme = UTF_8 then
kono
parents:
diff changeset
236 return Encode (Item, Output_BOM);
kono
parents:
diff changeset
237
kono
parents:
diff changeset
238 -- Case of UTF_16LE or UTF_16BE, use UTF-16 intermediary
kono
parents:
diff changeset
239
kono
parents:
diff changeset
240 else
kono
parents:
diff changeset
241 return From_UTF_16 (UTF_16_Wide_String'(Encode (Item)),
kono
parents:
diff changeset
242 Output_Scheme, Output_BOM);
kono
parents:
diff changeset
243 end if;
kono
parents:
diff changeset
244 end Encode;
kono
parents:
diff changeset
245
kono
parents:
diff changeset
246 -- Encode Wide_String in UTF-8
kono
parents:
diff changeset
247
kono
parents:
diff changeset
248 function Encode
kono
parents:
diff changeset
249 (Item : Wide_String;
kono
parents:
diff changeset
250 Output_BOM : Boolean := False) return UTF_8_String
kono
parents:
diff changeset
251 is
kono
parents:
diff changeset
252 Result : UTF_8_String (1 .. 3 * Item'Length + 3);
kono
parents:
diff changeset
253 -- Worst case is three bytes per input byte + space for BOM
kono
parents:
diff changeset
254
kono
parents:
diff changeset
255 Len : Natural;
kono
parents:
diff changeset
256 -- Number of output codes stored in Result
kono
parents:
diff changeset
257
kono
parents:
diff changeset
258 C : Unsigned_16;
kono
parents:
diff changeset
259 -- Single input character
kono
parents:
diff changeset
260
kono
parents:
diff changeset
261 procedure Store (C : Unsigned_16);
kono
parents:
diff changeset
262 pragma Inline (Store);
kono
parents:
diff changeset
263 -- Store one output code, C is in the range 0 .. 255
kono
parents:
diff changeset
264
kono
parents:
diff changeset
265 -----------
kono
parents:
diff changeset
266 -- Store --
kono
parents:
diff changeset
267 -----------
kono
parents:
diff changeset
268
kono
parents:
diff changeset
269 procedure Store (C : Unsigned_16) is
kono
parents:
diff changeset
270 begin
kono
parents:
diff changeset
271 Len := Len + 1;
kono
parents:
diff changeset
272 Result (Len) := Character'Val (C);
kono
parents:
diff changeset
273 end Store;
kono
parents:
diff changeset
274
kono
parents:
diff changeset
275 -- Start of processing for UTF8_Encode
kono
parents:
diff changeset
276
kono
parents:
diff changeset
277 begin
kono
parents:
diff changeset
278 -- Output BOM if required
kono
parents:
diff changeset
279
kono
parents:
diff changeset
280 if Output_BOM then
kono
parents:
diff changeset
281 Result (1 .. 3) := BOM_8;
kono
parents:
diff changeset
282 Len := 3;
kono
parents:
diff changeset
283 else
kono
parents:
diff changeset
284 Len := 0;
kono
parents:
diff changeset
285 end if;
kono
parents:
diff changeset
286
kono
parents:
diff changeset
287 -- Loop through characters of input
kono
parents:
diff changeset
288
kono
parents:
diff changeset
289 for J in Item'Range loop
kono
parents:
diff changeset
290 C := To_Unsigned_16 (Item (J));
kono
parents:
diff changeset
291
kono
parents:
diff changeset
292 -- Codes in the range 16#00# - 16#7F# are represented as
kono
parents:
diff changeset
293 -- 0xxxxxxx
kono
parents:
diff changeset
294
kono
parents:
diff changeset
295 if C <= 16#7F# then
kono
parents:
diff changeset
296 Store (C);
kono
parents:
diff changeset
297
kono
parents:
diff changeset
298 -- Codes in the range 16#80# - 16#7FF# are represented as
kono
parents:
diff changeset
299 -- 110yyyxx 10xxxxxx
kono
parents:
diff changeset
300
kono
parents:
diff changeset
301 elsif C <= 16#7FF# then
kono
parents:
diff changeset
302 Store (2#110_00000# or Shift_Right (C, 6));
kono
parents:
diff changeset
303 Store (2#10_000000# or (C and 2#00_111111#));
kono
parents:
diff changeset
304
kono
parents:
diff changeset
305 -- Codes in the range 16#800# - 16#FFFF# are represented as
kono
parents:
diff changeset
306 -- 1110yyyy 10yyyyxx 10xxxxxx
kono
parents:
diff changeset
307
kono
parents:
diff changeset
308 else
kono
parents:
diff changeset
309 Store (2#1110_0000# or Shift_Right (C, 12));
kono
parents:
diff changeset
310 Store (2#10_000000# or
kono
parents:
diff changeset
311 Shift_Right (C and 2#111111_000000#, 6));
kono
parents:
diff changeset
312 Store (2#10_000000# or (C and 2#00_111111#));
kono
parents:
diff changeset
313 end if;
kono
parents:
diff changeset
314 end loop;
kono
parents:
diff changeset
315
kono
parents:
diff changeset
316 return Result (1 .. Len);
kono
parents:
diff changeset
317 end Encode;
kono
parents:
diff changeset
318
kono
parents:
diff changeset
319 -- Encode Wide_String in UTF-16
kono
parents:
diff changeset
320
kono
parents:
diff changeset
321 function Encode
kono
parents:
diff changeset
322 (Item : Wide_String;
kono
parents:
diff changeset
323 Output_BOM : Boolean := False) return UTF_16_Wide_String
kono
parents:
diff changeset
324 is
kono
parents:
diff changeset
325 Result : UTF_16_Wide_String
kono
parents:
diff changeset
326 (1 .. Item'Length + Boolean'Pos (Output_BOM));
kono
parents:
diff changeset
327 -- Output is same length as input + possible BOM
kono
parents:
diff changeset
328
kono
parents:
diff changeset
329 Len : Integer;
kono
parents:
diff changeset
330 -- Length of output string
kono
parents:
diff changeset
331
kono
parents:
diff changeset
332 C : Unsigned_16;
kono
parents:
diff changeset
333
kono
parents:
diff changeset
334 begin
kono
parents:
diff changeset
335 -- Output BOM if required
kono
parents:
diff changeset
336
kono
parents:
diff changeset
337 if Output_BOM then
kono
parents:
diff changeset
338 Result (1) := BOM_16 (1);
kono
parents:
diff changeset
339 Len := 1;
kono
parents:
diff changeset
340 else
kono
parents:
diff changeset
341 Len := 0;
kono
parents:
diff changeset
342 end if;
kono
parents:
diff changeset
343
kono
parents:
diff changeset
344 -- Loop through input characters encoding them
kono
parents:
diff changeset
345
kono
parents:
diff changeset
346 for Iptr in Item'Range loop
kono
parents:
diff changeset
347 C := To_Unsigned_16 (Item (Iptr));
kono
parents:
diff changeset
348
kono
parents:
diff changeset
349 -- Codes in the range 16#0000#..16#D7FF# or 16#E000#..16#FFFD# are
kono
parents:
diff changeset
350 -- output unchanged.
kono
parents:
diff changeset
351
kono
parents:
diff changeset
352 if C <= 16#D7FF# or else C in 16#E000# .. 16#FFFD# then
kono
parents:
diff changeset
353 Len := Len + 1;
kono
parents:
diff changeset
354 Result (Len) := Wide_Character'Val (C);
kono
parents:
diff changeset
355
kono
parents:
diff changeset
356 -- Codes in the range 16#D800#..16#DFFF# should never appear in the
kono
parents:
diff changeset
357 -- input, since no valid Unicode characters are in this range (which
kono
parents:
diff changeset
358 -- would conflict with the UTF-16 surrogate encodings). Similarly
kono
parents:
diff changeset
359 -- codes in the range 16#FFFE#..16#FFFF conflict with BOM codes.
kono
parents:
diff changeset
360 -- Thus all remaining codes are illegal.
kono
parents:
diff changeset
361
kono
parents:
diff changeset
362 else
kono
parents:
diff changeset
363 Raise_Encoding_Error (Iptr);
kono
parents:
diff changeset
364 end if;
kono
parents:
diff changeset
365 end loop;
kono
parents:
diff changeset
366
kono
parents:
diff changeset
367 return Result;
kono
parents:
diff changeset
368 end Encode;
kono
parents:
diff changeset
369
kono
parents:
diff changeset
370 end Ada.Strings.UTF_Encoding.Wide_Strings;