111
|
1 ------------------------------------------------------------------------------
|
|
2 -- --
|
|
3 -- GNAT COMPILER COMPONENTS --
|
|
4 -- --
|
|
5 -- G N A T . B Y T E _ O R D E R _ M A R K --
|
|
6 -- --
|
|
7 -- B o d y --
|
|
8 -- --
|
|
9 -- Copyright (C) 2006-2017, AdaCore --
|
|
10 -- --
|
|
11 -- GNAT is free software; you can redistribute it and/or modify it under --
|
|
12 -- terms of the GNU General Public License as published by the Free Soft- --
|
|
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
|
|
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
|
|
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
|
|
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
|
|
17 -- --
|
|
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
|
|
19 -- additional permissions described in the GCC Runtime Library Exception, --
|
|
20 -- version 3.1, as published by the Free Software Foundation. --
|
|
21 -- --
|
|
22 -- You should have received a copy of the GNU General Public License and --
|
|
23 -- a copy of the GCC Runtime Library Exception along with this program; --
|
|
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
|
|
25 -- <http://www.gnu.org/licenses/>. --
|
|
26 -- --
|
|
27 -- GNAT was originally developed by the GNAT team at New York University. --
|
|
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
|
|
29 -- --
|
|
30 ------------------------------------------------------------------------------
|
|
31
|
|
32 pragma Compiler_Unit_Warning;
|
|
33
|
|
34 package body GNAT.Byte_Order_Mark is
|
|
35
|
|
36 --------------
|
|
37 -- Read_BOM --
|
|
38 --------------
|
|
39
|
|
40 procedure Read_BOM
|
|
41 (Str : String;
|
|
42 Len : out Natural;
|
|
43 BOM : out BOM_Kind;
|
|
44 XML_Support : Boolean := False)
|
|
45 is
|
|
46 begin
|
|
47 -- Note: the order of these tests is important, because in some cases
|
|
48 -- one sequence is a prefix of a longer sequence, and we must test for
|
|
49 -- the longer sequence first
|
|
50
|
|
51 -- UTF-32 (big-endian)
|
|
52
|
|
53 if Str'Length >= 4
|
|
54 and then Str (Str'First) = Character'Val (16#00#)
|
|
55 and then Str (Str'First + 1) = Character'Val (16#00#)
|
|
56 and then Str (Str'First + 2) = Character'Val (16#FE#)
|
|
57 and then Str (Str'First + 3) = Character'Val (16#FF#)
|
|
58 then
|
|
59 Len := 4;
|
|
60 BOM := UTF32_BE;
|
|
61
|
|
62 -- UTF-32 (little-endian)
|
|
63
|
|
64 elsif Str'Length >= 4
|
|
65 and then Str (Str'First) = Character'Val (16#FF#)
|
|
66 and then Str (Str'First + 1) = Character'Val (16#FE#)
|
|
67 and then Str (Str'First + 2) = Character'Val (16#00#)
|
|
68 and then Str (Str'First + 3) = Character'Val (16#00#)
|
|
69 then
|
|
70 Len := 4;
|
|
71 BOM := UTF32_LE;
|
|
72
|
|
73 -- UTF-16 (big-endian)
|
|
74
|
|
75 elsif Str'Length >= 2
|
|
76 and then Str (Str'First) = Character'Val (16#FE#)
|
|
77 and then Str (Str'First + 1) = Character'Val (16#FF#)
|
|
78 then
|
|
79 Len := 2;
|
|
80 BOM := UTF16_BE;
|
|
81
|
|
82 -- UTF-16 (little-endian)
|
|
83
|
|
84 elsif Str'Length >= 2
|
|
85 and then Str (Str'First) = Character'Val (16#FF#)
|
|
86 and then Str (Str'First + 1) = Character'Val (16#FE#)
|
|
87 then
|
|
88 Len := 2;
|
|
89 BOM := UTF16_LE;
|
|
90
|
|
91 -- UTF-8 (endian-independent)
|
|
92
|
|
93 elsif Str'Length >= 3
|
|
94 and then Str (Str'First) = Character'Val (16#EF#)
|
|
95 and then Str (Str'First + 1) = Character'Val (16#BB#)
|
|
96 and then Str (Str'First + 2) = Character'Val (16#BF#)
|
|
97 then
|
|
98 Len := 3;
|
|
99 BOM := UTF8_All;
|
|
100
|
|
101 -- UCS-4 (big-endian) XML only
|
|
102
|
|
103 elsif XML_Support
|
|
104 and then Str'Length >= 4
|
|
105 and then Str (Str'First) = Character'Val (16#00#)
|
|
106 and then Str (Str'First + 1) = Character'Val (16#00#)
|
|
107 and then Str (Str'First + 2) = Character'Val (16#00#)
|
|
108 and then Str (Str'First + 3) = Character'Val (16#3C#)
|
|
109 then
|
|
110 Len := 0;
|
|
111 BOM := UCS4_BE;
|
|
112
|
|
113 -- UCS-4 (little-endian) XML case
|
|
114
|
|
115 elsif XML_Support
|
|
116 and then Str'Length >= 4
|
|
117 and then Str (Str'First) = Character'Val (16#3C#)
|
|
118 and then Str (Str'First + 1) = Character'Val (16#00#)
|
|
119 and then Str (Str'First + 2) = Character'Val (16#00#)
|
|
120 and then Str (Str'First + 3) = Character'Val (16#00#)
|
|
121 then
|
|
122 Len := 0;
|
|
123 BOM := UCS4_LE;
|
|
124
|
|
125 -- UCS-4 (unusual byte order 2143) XML case
|
|
126
|
|
127 elsif XML_Support
|
|
128 and then Str'Length >= 4
|
|
129 and then Str (Str'First) = Character'Val (16#00#)
|
|
130 and then Str (Str'First + 1) = Character'Val (16#00#)
|
|
131 and then Str (Str'First + 2) = Character'Val (16#3C#)
|
|
132 and then Str (Str'First + 3) = Character'Val (16#00#)
|
|
133 then
|
|
134 Len := 0;
|
|
135 BOM := UCS4_2143;
|
|
136
|
|
137 -- UCS-4 (unusual byte order 3412) XML case
|
|
138
|
|
139 elsif XML_Support
|
|
140 and then Str'Length >= 4
|
|
141 and then Str (Str'First) = Character'Val (16#00#)
|
|
142 and then Str (Str'First + 1) = Character'Val (16#3C#)
|
|
143 and then Str (Str'First + 2) = Character'Val (16#00#)
|
|
144 and then Str (Str'First + 3) = Character'Val (16#00#)
|
|
145 then
|
|
146 Len := 0;
|
|
147 BOM := UCS4_3412;
|
|
148
|
|
149 -- UTF-16 (big-endian) XML case
|
|
150
|
|
151 elsif XML_Support
|
|
152 and then Str'Length >= 4
|
|
153 and then Str (Str'First) = Character'Val (16#00#)
|
|
154 and then Str (Str'First + 1) = Character'Val (16#3C#)
|
|
155 and then Str (Str'First + 2) = Character'Val (16#00#)
|
|
156 and then Str (Str'First + 3) = Character'Val (16#3F#)
|
|
157 then
|
|
158 Len := 0;
|
|
159 BOM := UTF16_BE;
|
|
160
|
|
161 -- UTF-32 (little-endian) XML case
|
|
162
|
|
163 elsif XML_Support
|
|
164 and then Str'Length >= 4
|
|
165 and then Str (Str'First) = Character'Val (16#3C#)
|
|
166 and then Str (Str'First + 1) = Character'Val (16#00#)
|
|
167 and then Str (Str'First + 2) = Character'Val (16#3F#)
|
|
168 and then Str (Str'First + 3) = Character'Val (16#00#)
|
|
169 then
|
|
170 Len := 0;
|
|
171 BOM := UTF16_LE;
|
|
172
|
|
173 -- Unrecognized special encodings XML only
|
|
174
|
|
175 elsif XML_Support
|
|
176 and then Str'Length >= 4
|
|
177 and then Str (Str'First) = Character'Val (16#3C#)
|
|
178 and then Str (Str'First + 1) = Character'Val (16#3F#)
|
|
179 and then Str (Str'First + 2) = Character'Val (16#78#)
|
|
180 and then Str (Str'First + 3) = Character'Val (16#6D#)
|
|
181 then
|
|
182 -- UTF-8, ASCII, some part of ISO8859, Shift-JIS, EUC,...
|
|
183
|
|
184 Len := 0;
|
|
185 BOM := Unknown;
|
|
186
|
|
187 -- No BOM recognized
|
|
188
|
|
189 else
|
|
190 Len := 0;
|
|
191 BOM := Unknown;
|
|
192 end if;
|
|
193 end Read_BOM;
|
|
194
|
|
195 end GNAT.Byte_Order_Mark;
|