111
|
1 ------------------------------------------------------------------------------
|
|
2 -- --
|
|
3 -- GNAT RUN-TIME COMPONENTS --
|
|
4 -- --
|
|
5 -- A D A . S T R I N G S . U T F _ E N C O D I N G --
|
|
6 -- --
|
|
7 -- B o d y --
|
|
8 -- --
|
131
|
9 -- Copyright (C) 2010-2018, Free Software Foundation, Inc. --
|
111
|
10 -- --
|
|
11 -- GNAT is free software; you can redistribute it and/or modify it under --
|
|
12 -- terms of the GNU General Public License as published by the Free Soft- --
|
|
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
|
|
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
|
|
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
|
|
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
|
|
17 -- --
|
|
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
|
|
19 -- additional permissions described in the GCC Runtime Library Exception, --
|
|
20 -- version 3.1, as published by the Free Software Foundation. --
|
|
21 -- --
|
|
22 -- You should have received a copy of the GNU General Public License and --
|
|
23 -- a copy of the GCC Runtime Library Exception along with this program; --
|
|
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
|
|
25 -- <http://www.gnu.org/licenses/>. --
|
|
26 -- --
|
|
27 -- GNAT was originally developed by the GNAT team at New York University. --
|
|
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
|
|
29 -- --
|
|
30 ------------------------------------------------------------------------------
|
|
31
|
|
32 package body Ada.Strings.UTF_Encoding is
|
|
33 use Interfaces;
|
|
34
|
|
35 --------------
|
|
36 -- Encoding --
|
|
37 --------------
|
|
38
|
|
39 function Encoding
|
|
40 (Item : UTF_String;
|
|
41 Default : Encoding_Scheme := UTF_8) return Encoding_Scheme
|
|
42 is
|
|
43 begin
|
|
44 if Item'Length >= 2 then
|
|
45 if Item (Item'First .. Item'First + 1) = BOM_16BE then
|
|
46 return UTF_16BE;
|
|
47
|
|
48 elsif Item (Item'First .. Item'First + 1) = BOM_16LE then
|
|
49 return UTF_16LE;
|
|
50
|
|
51 elsif Item'Length >= 3
|
|
52 and then Item (Item'First .. Item'First + 2) = BOM_8
|
|
53 then
|
|
54 return UTF_8;
|
|
55 end if;
|
|
56 end if;
|
|
57
|
|
58 return Default;
|
|
59 end Encoding;
|
|
60
|
|
61 -----------------
|
|
62 -- From_UTF_16 --
|
|
63 -----------------
|
|
64
|
|
65 function From_UTF_16
|
|
66 (Item : UTF_16_Wide_String;
|
|
67 Output_Scheme : UTF_XE_Encoding;
|
|
68 Output_BOM : Boolean := False) return UTF_String
|
|
69 is
|
|
70 BSpace : constant Natural := 2 * Boolean'Pos (Output_BOM);
|
|
71 Result : UTF_String (1 .. 2 * Item'Length + BSpace);
|
|
72 Len : Natural;
|
|
73 C : Unsigned_16;
|
|
74 Iptr : Natural;
|
|
75
|
|
76 begin
|
|
77 if Output_BOM then
|
|
78 Result (1 .. 2) :=
|
|
79 (if Output_Scheme = UTF_16BE then BOM_16BE else BOM_16LE);
|
|
80 Len := 2;
|
|
81 else
|
|
82 Len := 0;
|
|
83 end if;
|
|
84
|
|
85 -- Skip input BOM
|
|
86
|
|
87 Iptr := Item'First;
|
|
88
|
|
89 if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then
|
|
90 Iptr := Iptr + 1;
|
|
91 end if;
|
|
92
|
|
93 -- UTF-16BE case
|
|
94
|
|
95 if Output_Scheme = UTF_16BE then
|
|
96 while Iptr <= Item'Last loop
|
|
97 C := To_Unsigned_16 (Item (Iptr));
|
|
98 Result (Len + 1) := Character'Val (Shift_Right (C, 8));
|
|
99 Result (Len + 2) := Character'Val (C and 16#00_FF#);
|
|
100 Len := Len + 2;
|
|
101 Iptr := Iptr + 1;
|
|
102 end loop;
|
|
103
|
|
104 -- UTF-16LE case
|
|
105
|
|
106 else
|
|
107 while Iptr <= Item'Last loop
|
|
108 C := To_Unsigned_16 (Item (Iptr));
|
|
109 Result (Len + 1) := Character'Val (C and 16#00_FF#);
|
|
110 Result (Len + 2) := Character'Val (Shift_Right (C, 8));
|
|
111 Len := Len + 2;
|
|
112 Iptr := Iptr + 1;
|
|
113 end loop;
|
|
114 end if;
|
|
115
|
|
116 return Result (1 .. Len);
|
|
117 end From_UTF_16;
|
|
118
|
|
119 --------------------------
|
|
120 -- Raise_Encoding_Error --
|
|
121 --------------------------
|
|
122
|
|
123 procedure Raise_Encoding_Error (Index : Natural) is
|
|
124 Val : constant String := Index'Img;
|
|
125 begin
|
|
126 raise Encoding_Error with
|
|
127 "bad input at Item (" & Val (Val'First + 1 .. Val'Last) & ')';
|
|
128 end Raise_Encoding_Error;
|
|
129
|
|
130 ---------------
|
|
131 -- To_UTF_16 --
|
|
132 ---------------
|
|
133
|
|
134 function To_UTF_16
|
|
135 (Item : UTF_String;
|
|
136 Input_Scheme : UTF_XE_Encoding;
|
|
137 Output_BOM : Boolean := False) return UTF_16_Wide_String
|
|
138 is
|
|
139 Result : UTF_16_Wide_String (1 .. Item'Length / 2 + 1);
|
|
140 Len : Natural;
|
|
141 Iptr : Natural;
|
|
142
|
|
143 begin
|
|
144 if Item'Length mod 2 /= 0 then
|
|
145 raise Encoding_Error with "UTF-16BE/LE string has odd length";
|
|
146 end if;
|
|
147
|
|
148 -- Deal with input BOM, skip if OK, error if bad BOM
|
|
149
|
|
150 Iptr := Item'First;
|
|
151
|
|
152 if Item'Length >= 2 then
|
|
153 if Item (Iptr .. Iptr + 1) = BOM_16BE then
|
|
154 if Input_Scheme = UTF_16BE then
|
|
155 Iptr := Iptr + 2;
|
|
156 else
|
|
157 Raise_Encoding_Error (Iptr);
|
|
158 end if;
|
|
159
|
|
160 elsif Item (Iptr .. Iptr + 1) = BOM_16LE then
|
|
161 if Input_Scheme = UTF_16LE then
|
|
162 Iptr := Iptr + 2;
|
|
163 else
|
|
164 Raise_Encoding_Error (Iptr);
|
|
165 end if;
|
|
166
|
|
167 elsif Item'Length >= 3 and then Item (Iptr .. Iptr + 2) = BOM_8 then
|
|
168 Raise_Encoding_Error (Iptr);
|
|
169 end if;
|
|
170 end if;
|
|
171
|
|
172 -- Output BOM if specified
|
|
173
|
|
174 if Output_BOM then
|
|
175 Result (1) := BOM_16 (1);
|
|
176 Len := 1;
|
|
177 else
|
|
178 Len := 0;
|
|
179 end if;
|
|
180
|
|
181 -- UTF-16BE case
|
|
182
|
|
183 if Input_Scheme = UTF_16BE then
|
|
184 while Iptr < Item'Last loop
|
|
185 Len := Len + 1;
|
|
186 Result (Len) :=
|
|
187 Wide_Character'Val
|
|
188 (Character'Pos (Item (Iptr)) * 256 +
|
|
189 Character'Pos (Item (Iptr + 1)));
|
|
190 Iptr := Iptr + 2;
|
|
191 end loop;
|
|
192
|
|
193 -- UTF-16LE case
|
|
194
|
|
195 else
|
|
196 while Iptr < Item'Last loop
|
|
197 Len := Len + 1;
|
|
198 Result (Len) :=
|
|
199 Wide_Character'Val
|
|
200 (Character'Pos (Item (Iptr)) +
|
|
201 Character'Pos (Item (Iptr + 1)) * 256);
|
|
202 Iptr := Iptr + 2;
|
|
203 end loop;
|
|
204 end if;
|
|
205
|
|
206 return Result (1 .. Len);
|
|
207 end To_UTF_16;
|
|
208
|
|
209 end Ada.Strings.UTF_Encoding;
|