111
|
1 ------------------------------------------------------------------------------
|
|
2 -- --
|
|
3 -- GNAT RUN-TIME COMPONENTS --
|
|
4 -- --
|
|
5 -- S Y S T E M . W C H _ C N V --
|
|
6 -- --
|
|
7 -- S p e c --
|
|
8 -- --
|
145
|
9 -- Copyright (C) 1992-2019, Free Software Foundation, Inc. --
|
111
|
10 -- --
|
|
11 -- GNAT is free software; you can redistribute it and/or modify it under --
|
|
12 -- terms of the GNU General Public License as published by the Free Soft- --
|
|
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
|
|
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
|
|
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
|
|
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
|
|
17 -- --
|
|
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
|
|
19 -- additional permissions described in the GCC Runtime Library Exception, --
|
|
20 -- version 3.1, as published by the Free Software Foundation. --
|
|
21 -- --
|
|
22 -- You should have received a copy of the GNU General Public License and --
|
|
23 -- a copy of the GCC Runtime Library Exception along with this program; --
|
|
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
|
|
25 -- <http://www.gnu.org/licenses/>. --
|
|
26 -- --
|
|
27 -- GNAT was originally developed by the GNAT team at New York University. --
|
|
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
|
|
29 -- --
|
|
30 ------------------------------------------------------------------------------
|
|
31
|
|
32 -- This package contains generic subprograms used for converting between
|
|
33 -- sequences of Character and Wide_Character. Wide_Wide_Character values
|
|
34 -- are also handled, but represented using integer range types defined in
|
|
35 -- this package, so that this package can be used from applications that
|
|
36 -- are restricted to Ada 95 compatibility (such as the compiler itself).
|
|
37
|
|
38 -- All the algorithms for encoding and decoding are isolated in this package
|
|
39 -- and in System.WCh_JIS and should not be duplicated elsewhere. The only
|
|
40 -- exception to this is that GNAT.Decode_String and GNAT.Encode_String have
|
|
41 -- their own circuits for UTF-8 conversions, for improved efficiency.
|
|
42
|
|
43 -- This unit may be used directly from an application program by providing
|
|
44 -- an appropriate WITH, and the interface can be expected to remain stable.
|
|
45
|
|
46 pragma Compiler_Unit_Warning;
|
|
47
|
|
48 with System.WCh_Con;
|
|
49
|
|
50 package System.WCh_Cnv is
|
|
51 pragma Pure;
|
|
52
|
|
53 type UTF_32_Code is range 0 .. 16#7FFF_FFFF#;
|
|
54 for UTF_32_Code'Size use 32;
|
|
55 -- Range of allowed UTF-32 encoding values
|
|
56
|
|
57 type UTF_32_String is array (Positive range <>) of UTF_32_Code;
|
|
58
|
|
59 generic
|
|
60 with function In_Char return Character;
|
|
61 function Char_Sequence_To_Wide_Char
|
|
62 (C : Character;
|
|
63 EM : System.WCh_Con.WC_Encoding_Method) return Wide_Character;
|
|
64 -- C is the first character of a sequence of one or more characters which
|
|
65 -- represent a wide character sequence. Calling the function In_Char for
|
|
66 -- additional characters as required, Char_To_Wide_Char returns the
|
|
67 -- corresponding wide character value. Constraint_Error is raised if the
|
|
68 -- sequence of characters encountered is not a valid wide character
|
|
69 -- sequence for the given encoding method.
|
|
70 --
|
|
71 -- Note on the use of brackets encoding (WCEM_Brackets). The brackets
|
|
72 -- encoding method is ambiguous in the context of this function, since
|
|
73 -- there is no way to tell if ["1234"] is eight unencoded characters or
|
|
74 -- one encoded character. In the context of Ada sources, any sequence
|
|
75 -- starting [" must be the start of an encoding (since that sequence is
|
|
76 -- not valid in Ada source otherwise). The routines in this package use
|
|
77 -- the same approach. If the input string contains the sequence [" then
|
|
78 -- this is assumed to be the start of a brackets encoding sequence, and
|
|
79 -- if it does not match the syntax, an error is raised.
|
|
80
|
|
81 generic
|
|
82 with function In_Char return Character;
|
|
83 function Char_Sequence_To_UTF_32
|
|
84 (C : Character;
|
|
85 EM : System.WCh_Con.WC_Encoding_Method) return UTF_32_Code;
|
|
86 -- This is similar to the above, but the function returns a code from
|
|
87 -- the full UTF_32 code set, which covers the full range of possible
|
|
88 -- values in Wide_Wide_Character. The result can be converted to
|
|
89 -- Wide_Wide_Character form using Wide_Wide_Character'Val.
|
|
90
|
|
91 generic
|
|
92 with procedure Out_Char (C : Character);
|
|
93 procedure Wide_Char_To_Char_Sequence
|
|
94 (WC : Wide_Character;
|
|
95 EM : System.WCh_Con.WC_Encoding_Method);
|
|
96 -- Given a wide character, converts it into a sequence of one or
|
|
97 -- more characters, calling the given Out_Char procedure for each.
|
|
98 -- Constraint_Error is raised if the given wide character value is
|
|
99 -- not a valid value for the given encoding method.
|
|
100 --
|
|
101 -- Note on brackets encoding (WCEM_Brackets). For the input routines above,
|
|
102 -- upper half characters can be represented as ["hh"] but this procedure
|
|
103 -- will only use brackets encodings for codes higher than 16#FF#, so upper
|
|
104 -- half characters will be output as single Character values.
|
|
105
|
|
106 generic
|
|
107 with procedure Out_Char (C : Character);
|
|
108 procedure UTF_32_To_Char_Sequence
|
|
109 (Val : UTF_32_Code;
|
|
110 EM : System.WCh_Con.WC_Encoding_Method);
|
|
111 -- This is similar to the above, but the input value is a code from the
|
|
112 -- full UTF_32 code set, which covers the full range of possible values
|
|
113 -- in Wide_Wide_Character. To convert a Wide_Wide_Character value, the
|
|
114 -- caller can use Wide_Wide_Character'Pos in the call.
|
|
115
|
|
116 end System.WCh_Cnv;
|