annotate gcc/ada/libgnat/a-suenst.adb @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
111
kono
parents:
diff changeset
1 ------------------------------------------------------------------------------
kono
parents:
diff changeset
2 -- --
kono
parents:
diff changeset
3 -- GNAT RUN-TIME COMPONENTS --
kono
parents:
diff changeset
4 -- --
kono
parents:
diff changeset
5 -- ADA.STRINGS.UTF_ENCODING.STRINGS --
kono
parents:
diff changeset
6 -- --
kono
parents:
diff changeset
7 -- B o d y --
kono
parents:
diff changeset
8 -- --
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
9 -- Copyright (C) 2010-2018, Free Software Foundation, Inc. --
111
kono
parents:
diff changeset
10 -- --
kono
parents:
diff changeset
11 -- GNAT is free software; you can redistribute it and/or modify it under --
kono
parents:
diff changeset
12 -- terms of the GNU General Public License as published by the Free Soft- --
kono
parents:
diff changeset
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
kono
parents:
diff changeset
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
kono
parents:
diff changeset
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
kono
parents:
diff changeset
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
kono
parents:
diff changeset
17 -- --
kono
parents:
diff changeset
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
kono
parents:
diff changeset
19 -- additional permissions described in the GCC Runtime Library Exception, --
kono
parents:
diff changeset
20 -- version 3.1, as published by the Free Software Foundation. --
kono
parents:
diff changeset
21 -- --
kono
parents:
diff changeset
22 -- You should have received a copy of the GNU General Public License and --
kono
parents:
diff changeset
23 -- a copy of the GCC Runtime Library Exception along with this program; --
kono
parents:
diff changeset
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
kono
parents:
diff changeset
25 -- <http://www.gnu.org/licenses/>. --
kono
parents:
diff changeset
26 -- --
kono
parents:
diff changeset
27 -- GNAT was originally developed by the GNAT team at New York University. --
kono
parents:
diff changeset
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
kono
parents:
diff changeset
29 -- --
kono
parents:
diff changeset
30 ------------------------------------------------------------------------------
kono
parents:
diff changeset
31
kono
parents:
diff changeset
32 package body Ada.Strings.UTF_Encoding.Strings is
kono
parents:
diff changeset
33 use Interfaces;
kono
parents:
diff changeset
34
kono
parents:
diff changeset
35 ------------
kono
parents:
diff changeset
36 -- Decode --
kono
parents:
diff changeset
37 ------------
kono
parents:
diff changeset
38
kono
parents:
diff changeset
39 -- Decode UTF-8/UTF-16BE/UTF-16LE input to String
kono
parents:
diff changeset
40
kono
parents:
diff changeset
41 function Decode
kono
parents:
diff changeset
42 (Item : UTF_String;
kono
parents:
diff changeset
43 Input_Scheme : Encoding_Scheme) return String
kono
parents:
diff changeset
44 is
kono
parents:
diff changeset
45 begin
kono
parents:
diff changeset
46 if Input_Scheme = UTF_8 then
kono
parents:
diff changeset
47 return Decode (Item);
kono
parents:
diff changeset
48 else
kono
parents:
diff changeset
49 return Decode (To_UTF_16 (Item, Input_Scheme));
kono
parents:
diff changeset
50 end if;
kono
parents:
diff changeset
51 end Decode;
kono
parents:
diff changeset
52
kono
parents:
diff changeset
53 -- Decode UTF-8 input to String
kono
parents:
diff changeset
54
kono
parents:
diff changeset
55 function Decode (Item : UTF_8_String) return String is
kono
parents:
diff changeset
56 Result : String (1 .. Item'Length);
kono
parents:
diff changeset
57 -- Result string (worst case is same length as input)
kono
parents:
diff changeset
58
kono
parents:
diff changeset
59 Len : Natural := 0;
kono
parents:
diff changeset
60 -- Length of result stored so far
kono
parents:
diff changeset
61
kono
parents:
diff changeset
62 Iptr : Natural;
kono
parents:
diff changeset
63 -- Input Item pointer
kono
parents:
diff changeset
64
kono
parents:
diff changeset
65 C : Unsigned_8;
kono
parents:
diff changeset
66 R : Unsigned_16;
kono
parents:
diff changeset
67
kono
parents:
diff changeset
68 procedure Get_Continuation;
kono
parents:
diff changeset
69 -- Reads a continuation byte of the form 10xxxxxx, shifts R left
kono
parents:
diff changeset
70 -- by 6 bits, and or's in the xxxxxx to the low order 6 bits. On
kono
parents:
diff changeset
71 -- return Ptr is incremented. Raises exception if continuation
kono
parents:
diff changeset
72 -- byte does not exist or is invalid.
kono
parents:
diff changeset
73
kono
parents:
diff changeset
74 ----------------------
kono
parents:
diff changeset
75 -- Get_Continuation --
kono
parents:
diff changeset
76 ----------------------
kono
parents:
diff changeset
77
kono
parents:
diff changeset
78 procedure Get_Continuation is
kono
parents:
diff changeset
79 begin
kono
parents:
diff changeset
80 if Iptr > Item'Last then
kono
parents:
diff changeset
81 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
82
kono
parents:
diff changeset
83 else
kono
parents:
diff changeset
84 C := To_Unsigned_8 (Item (Iptr));
kono
parents:
diff changeset
85 Iptr := Iptr + 1;
kono
parents:
diff changeset
86
kono
parents:
diff changeset
87 if C not in 2#10_000000# .. 2#10_111111# then
kono
parents:
diff changeset
88 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
89 else
kono
parents:
diff changeset
90 R := Shift_Left (R, 6) or Unsigned_16 (C and 2#00_111111#);
kono
parents:
diff changeset
91 end if;
kono
parents:
diff changeset
92 end if;
kono
parents:
diff changeset
93 end Get_Continuation;
kono
parents:
diff changeset
94
kono
parents:
diff changeset
95 -- Start of processing for Decode
kono
parents:
diff changeset
96
kono
parents:
diff changeset
97 begin
kono
parents:
diff changeset
98 Iptr := Item'First;
kono
parents:
diff changeset
99
kono
parents:
diff changeset
100 -- Skip BOM at start
kono
parents:
diff changeset
101
kono
parents:
diff changeset
102 if Item'Length >= 3
kono
parents:
diff changeset
103 and then Item (Iptr .. Iptr + 2) = BOM_8
kono
parents:
diff changeset
104 then
kono
parents:
diff changeset
105 Iptr := Iptr + 3;
kono
parents:
diff changeset
106
kono
parents:
diff changeset
107 -- Error if bad BOM
kono
parents:
diff changeset
108
kono
parents:
diff changeset
109 elsif Item'Length >= 2
kono
parents:
diff changeset
110 and then (Item (Iptr .. Iptr + 1) = BOM_16BE
kono
parents:
diff changeset
111 or else
kono
parents:
diff changeset
112 Item (Iptr .. Iptr + 1) = BOM_16LE)
kono
parents:
diff changeset
113 then
kono
parents:
diff changeset
114 Raise_Encoding_Error (Iptr);
kono
parents:
diff changeset
115 end if;
kono
parents:
diff changeset
116
kono
parents:
diff changeset
117 while Iptr <= Item'Last loop
kono
parents:
diff changeset
118 C := To_Unsigned_8 (Item (Iptr));
kono
parents:
diff changeset
119 Iptr := Iptr + 1;
kono
parents:
diff changeset
120
kono
parents:
diff changeset
121 -- Codes in the range 16#00# - 16#7F# are represented as
kono
parents:
diff changeset
122 -- 0xxxxxxx
kono
parents:
diff changeset
123
kono
parents:
diff changeset
124 if C <= 16#7F# then
kono
parents:
diff changeset
125 R := Unsigned_16 (C);
kono
parents:
diff changeset
126
kono
parents:
diff changeset
127 -- No initial code can be of the form 10xxxxxx. Such codes are used
kono
parents:
diff changeset
128 -- only for continuations.
kono
parents:
diff changeset
129
kono
parents:
diff changeset
130 elsif C <= 2#10_111111# then
kono
parents:
diff changeset
131 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
132
kono
parents:
diff changeset
133 -- Codes in the range 16#80# - 16#7FF# are represented as
kono
parents:
diff changeset
134 -- 110yyyxx 10xxxxxx
kono
parents:
diff changeset
135
kono
parents:
diff changeset
136 elsif C <= 2#110_11111# then
kono
parents:
diff changeset
137 R := Unsigned_16 (C and 2#000_11111#);
kono
parents:
diff changeset
138 Get_Continuation;
kono
parents:
diff changeset
139
kono
parents:
diff changeset
140 -- Codes in the range 16#800# - 16#FFFF# are represented as
kono
parents:
diff changeset
141 -- 1110yyyy 10yyyyxx 10xxxxxx
kono
parents:
diff changeset
142
kono
parents:
diff changeset
143 -- Such codes are out of range for type Character
kono
parents:
diff changeset
144
kono
parents:
diff changeset
145 -- Codes in the range 16#10000# - 16#10FFFF# are represented as
kono
parents:
diff changeset
146 -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
kono
parents:
diff changeset
147
kono
parents:
diff changeset
148 -- Such codes are out of range for Wide_String output
kono
parents:
diff changeset
149
kono
parents:
diff changeset
150 -- Thus all remaining cases raise Encoding_Error
kono
parents:
diff changeset
151
kono
parents:
diff changeset
152 else
kono
parents:
diff changeset
153 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
154 end if;
kono
parents:
diff changeset
155
kono
parents:
diff changeset
156 Len := Len + 1;
kono
parents:
diff changeset
157
kono
parents:
diff changeset
158 -- The value may still be out of range of Standard.Character. We make
kono
parents:
diff changeset
159 -- the check explicit because the library is typically compiled with
kono
parents:
diff changeset
160 -- range checks disabled.
kono
parents:
diff changeset
161
kono
parents:
diff changeset
162 if R > Character'Pos (Character'Last) then
kono
parents:
diff changeset
163 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
164 end if;
kono
parents:
diff changeset
165
kono
parents:
diff changeset
166 Result (Len) := Character'Val (R);
kono
parents:
diff changeset
167 end loop;
kono
parents:
diff changeset
168
kono
parents:
diff changeset
169 return Result (1 .. Len);
kono
parents:
diff changeset
170 end Decode;
kono
parents:
diff changeset
171
kono
parents:
diff changeset
172 -- Decode UTF-16 input to String
kono
parents:
diff changeset
173
kono
parents:
diff changeset
174 function Decode (Item : UTF_16_Wide_String) return String is
kono
parents:
diff changeset
175 Result : String (1 .. Item'Length);
kono
parents:
diff changeset
176 -- Result is same length as input (possibly minus 1 if BOM present)
kono
parents:
diff changeset
177
kono
parents:
diff changeset
178 Len : Natural := 0;
kono
parents:
diff changeset
179 -- Length of result
kono
parents:
diff changeset
180
kono
parents:
diff changeset
181 Iptr : Natural;
kono
parents:
diff changeset
182 -- Index of next Item element
kono
parents:
diff changeset
183
kono
parents:
diff changeset
184 C : Unsigned_16;
kono
parents:
diff changeset
185
kono
parents:
diff changeset
186 begin
kono
parents:
diff changeset
187 -- Skip UTF-16 BOM at start
kono
parents:
diff changeset
188
kono
parents:
diff changeset
189 Iptr := Item'First;
kono
parents:
diff changeset
190
kono
parents:
diff changeset
191 if Item'Length > 0 and then Item (Iptr) = BOM_16 (1) then
kono
parents:
diff changeset
192 Iptr := Iptr + 1;
kono
parents:
diff changeset
193 end if;
kono
parents:
diff changeset
194
kono
parents:
diff changeset
195 -- Loop through input characters
kono
parents:
diff changeset
196
kono
parents:
diff changeset
197 while Iptr <= Item'Last loop
kono
parents:
diff changeset
198 C := To_Unsigned_16 (Item (Iptr));
kono
parents:
diff changeset
199 Iptr := Iptr + 1;
kono
parents:
diff changeset
200
kono
parents:
diff changeset
201 -- Codes in the range 16#0000#..16#00FF# represent their own value
kono
parents:
diff changeset
202
kono
parents:
diff changeset
203 if C <= 16#00FF# then
kono
parents:
diff changeset
204 Len := Len + 1;
kono
parents:
diff changeset
205 Result (Len) := Character'Val (C);
kono
parents:
diff changeset
206
kono
parents:
diff changeset
207 -- All other codes are invalid, either they are invalid UTF-16
kono
parents:
diff changeset
208 -- encoding sequences, or they represent values that are out of
kono
parents:
diff changeset
209 -- range for type Character.
kono
parents:
diff changeset
210
kono
parents:
diff changeset
211 else
kono
parents:
diff changeset
212 Raise_Encoding_Error (Iptr - 1);
kono
parents:
diff changeset
213 end if;
kono
parents:
diff changeset
214 end loop;
kono
parents:
diff changeset
215
kono
parents:
diff changeset
216 return Result (1 .. Len);
kono
parents:
diff changeset
217 end Decode;
kono
parents:
diff changeset
218
kono
parents:
diff changeset
219 ------------
kono
parents:
diff changeset
220 -- Encode --
kono
parents:
diff changeset
221 ------------
kono
parents:
diff changeset
222
kono
parents:
diff changeset
223 -- Encode String in UTF-8, UTF-16BE or UTF-16LE
kono
parents:
diff changeset
224
kono
parents:
diff changeset
225 function Encode
kono
parents:
diff changeset
226 (Item : String;
kono
parents:
diff changeset
227 Output_Scheme : Encoding_Scheme;
kono
parents:
diff changeset
228 Output_BOM : Boolean := False) return UTF_String
kono
parents:
diff changeset
229 is
kono
parents:
diff changeset
230 begin
kono
parents:
diff changeset
231 -- Case of UTF_8
kono
parents:
diff changeset
232
kono
parents:
diff changeset
233 if Output_Scheme = UTF_8 then
kono
parents:
diff changeset
234 return Encode (Item, Output_BOM);
kono
parents:
diff changeset
235
kono
parents:
diff changeset
236 -- Case of UTF_16LE or UTF_16BE, use UTF-16 intermediary
kono
parents:
diff changeset
237
kono
parents:
diff changeset
238 else
kono
parents:
diff changeset
239 return From_UTF_16 (UTF_16_Wide_String'(Encode (Item)),
kono
parents:
diff changeset
240 Output_Scheme, Output_BOM);
kono
parents:
diff changeset
241 end if;
kono
parents:
diff changeset
242 end Encode;
kono
parents:
diff changeset
243
kono
parents:
diff changeset
244 -- Encode String in UTF-8
kono
parents:
diff changeset
245
kono
parents:
diff changeset
246 function Encode
kono
parents:
diff changeset
247 (Item : String;
kono
parents:
diff changeset
248 Output_BOM : Boolean := False) return UTF_8_String
kono
parents:
diff changeset
249 is
kono
parents:
diff changeset
250 Result : UTF_8_String (1 .. 3 * Item'Length + 3);
kono
parents:
diff changeset
251 -- Worst case is three bytes per input byte + space for BOM
kono
parents:
diff changeset
252
kono
parents:
diff changeset
253 Len : Natural;
kono
parents:
diff changeset
254 -- Number of output codes stored in Result
kono
parents:
diff changeset
255
kono
parents:
diff changeset
256 C : Unsigned_8;
kono
parents:
diff changeset
257 -- Single input character
kono
parents:
diff changeset
258
kono
parents:
diff changeset
259 procedure Store (C : Unsigned_8);
kono
parents:
diff changeset
260 pragma Inline (Store);
kono
parents:
diff changeset
261 -- Store one output code, C is in the range 0 .. 255
kono
parents:
diff changeset
262
kono
parents:
diff changeset
263 -----------
kono
parents:
diff changeset
264 -- Store --
kono
parents:
diff changeset
265 -----------
kono
parents:
diff changeset
266
kono
parents:
diff changeset
267 procedure Store (C : Unsigned_8) is
kono
parents:
diff changeset
268 begin
kono
parents:
diff changeset
269 Len := Len + 1;
kono
parents:
diff changeset
270 Result (Len) := Character'Val (C);
kono
parents:
diff changeset
271 end Store;
kono
parents:
diff changeset
272
kono
parents:
diff changeset
273 -- Start of processing for UTF8_Encode
kono
parents:
diff changeset
274
kono
parents:
diff changeset
275 begin
kono
parents:
diff changeset
276 -- Output BOM if required
kono
parents:
diff changeset
277
kono
parents:
diff changeset
278 if Output_BOM then
kono
parents:
diff changeset
279 Result (1 .. 3) := BOM_8;
kono
parents:
diff changeset
280 Len := 3;
kono
parents:
diff changeset
281 else
kono
parents:
diff changeset
282 Len := 0;
kono
parents:
diff changeset
283 end if;
kono
parents:
diff changeset
284
kono
parents:
diff changeset
285 -- Loop through characters of input
kono
parents:
diff changeset
286
kono
parents:
diff changeset
287 for J in Item'Range loop
kono
parents:
diff changeset
288 C := To_Unsigned_8 (Item (J));
kono
parents:
diff changeset
289
kono
parents:
diff changeset
290 -- Codes in the range 16#00# - 16#7F# are represented as
kono
parents:
diff changeset
291 -- 0xxxxxxx
kono
parents:
diff changeset
292
kono
parents:
diff changeset
293 if C <= 16#7F# then
kono
parents:
diff changeset
294 Store (C);
kono
parents:
diff changeset
295
kono
parents:
diff changeset
296 -- Codes in the range 16#80# - 16#7FF# are represented as
kono
parents:
diff changeset
297 -- 110yyyxx 10xxxxxx
kono
parents:
diff changeset
298
kono
parents:
diff changeset
299 -- For type character of course, the limit is 16#FF# in any case
kono
parents:
diff changeset
300
kono
parents:
diff changeset
301 else
kono
parents:
diff changeset
302 Store (2#110_00000# or Shift_Right (C, 6));
kono
parents:
diff changeset
303 Store (2#10_000000# or (C and 2#00_111111#));
kono
parents:
diff changeset
304 end if;
kono
parents:
diff changeset
305 end loop;
kono
parents:
diff changeset
306
kono
parents:
diff changeset
307 return Result (1 .. Len);
kono
parents:
diff changeset
308 end Encode;
kono
parents:
diff changeset
309
kono
parents:
diff changeset
310 -- Encode String in UTF-16
kono
parents:
diff changeset
311
kono
parents:
diff changeset
312 function Encode
kono
parents:
diff changeset
313 (Item : String;
kono
parents:
diff changeset
314 Output_BOM : Boolean := False) return UTF_16_Wide_String
kono
parents:
diff changeset
315 is
kono
parents:
diff changeset
316 Result : UTF_16_Wide_String
kono
parents:
diff changeset
317 (1 .. Item'Length + Boolean'Pos (Output_BOM));
kono
parents:
diff changeset
318 -- Output is same length as input + possible BOM
kono
parents:
diff changeset
319
kono
parents:
diff changeset
320 Len : Integer;
kono
parents:
diff changeset
321 -- Length of output string
kono
parents:
diff changeset
322
kono
parents:
diff changeset
323 C : Unsigned_8;
kono
parents:
diff changeset
324
kono
parents:
diff changeset
325 begin
kono
parents:
diff changeset
326 -- Output BOM if required
kono
parents:
diff changeset
327
kono
parents:
diff changeset
328 if Output_BOM then
kono
parents:
diff changeset
329 Result (1) := BOM_16 (1);
kono
parents:
diff changeset
330 Len := 1;
kono
parents:
diff changeset
331 else
kono
parents:
diff changeset
332 Len := 0;
kono
parents:
diff changeset
333 end if;
kono
parents:
diff changeset
334
kono
parents:
diff changeset
335 -- Loop through input characters encoding them
kono
parents:
diff changeset
336
kono
parents:
diff changeset
337 for Iptr in Item'Range loop
kono
parents:
diff changeset
338 C := To_Unsigned_8 (Item (Iptr));
kono
parents:
diff changeset
339
kono
parents:
diff changeset
340 -- Codes in the range 16#0000#..16#00FF# are output unchanged. This
kono
parents:
diff changeset
341 -- includes all possible cases of Character values.
kono
parents:
diff changeset
342
kono
parents:
diff changeset
343 Len := Len + 1;
kono
parents:
diff changeset
344 Result (Len) := Wide_Character'Val (C);
kono
parents:
diff changeset
345 end loop;
kono
parents:
diff changeset
346
kono
parents:
diff changeset
347 return Result;
kono
parents:
diff changeset
348 end Encode;
kono
parents:
diff changeset
349
kono
parents:
diff changeset
350 end Ada.Strings.UTF_Encoding.Strings;