------------------------------------------------------------------------------ -- -- -- GNAT RUN-TIME COMPONENTS -- -- -- -- A D A . S T R I N G S . U T F _ E N C O D I N G -- -- -- -- B o d y -- -- -- -- Copyright (C) 2010-2018, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- -- ware Foundation; either version 3, or (at your option) any later ver- -- -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- -- or FITNESS FOR A PARTICULAR PURPOSE. -- -- -- -- As a special exception under Section 7 of GPL version 3, you are granted -- -- additional permissions described in the GCC Runtime Library Exception, -- -- version 3.1, as published by the Free Software Foundation. -- -- -- -- You should have received a copy of the GNU General Public License and -- -- a copy of the GCC Runtime Library Exception along with this program; -- -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- -- . -- -- -- -- GNAT was originally developed by the GNAT team at New York University. -- -- Extensive contributions were provided by Ada Core Technologies Inc. -- -- -- ------------------------------------------------------------------------------ package body Ada.Strings.UTF_Encoding is use Interfaces; -------------- -- Encoding -- -------------- function Encoding (Item : UTF_String; Default : Encoding_Scheme := UTF_8) return Encoding_Scheme is begin if Item'Length >= 2 then if Item (Item'First .. Item'First + 1) = BOM_16BE then return UTF_16BE; elsif Item (Item'First .. Item'First + 1) = BOM_16LE then return UTF_16LE; elsif Item'Length >= 3 and then Item (Item'First .. Item'First + 2) = BOM_8 then return UTF_8; end if; end if; return Default; end Encoding; ----------------- -- From_UTF_16 -- ----------------- function From_UTF_16 (Item : UTF_16_Wide_String; Output_Scheme : UTF_XE_Encoding; Output_BOM : Boolean := False) return UTF_String is BSpace : constant Natural := 2 * Boolean'Pos (Output_BOM); Result : UTF_String (1 .. 2 * Item'Length + BSpace); Len : Natural; C : Unsigned_16; Iptr : Natural; begin if Output_BOM then Result (1 .. 2) := (if Output_Scheme = UTF_16BE then BOM_16BE else BOM_16LE); Len := 2; else Len := 0; end if; -- Skip input BOM Iptr := Item'First; if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then Iptr := Iptr + 1; end if; -- UTF-16BE case if Output_Scheme = UTF_16BE then while Iptr <= Item'Last loop C := To_Unsigned_16 (Item (Iptr)); Result (Len + 1) := Character'Val (Shift_Right (C, 8)); Result (Len + 2) := Character'Val (C and 16#00_FF#); Len := Len + 2; Iptr := Iptr + 1; end loop; -- UTF-16LE case else while Iptr <= Item'Last loop C := To_Unsigned_16 (Item (Iptr)); Result (Len + 1) := Character'Val (C and 16#00_FF#); Result (Len + 2) := Character'Val (Shift_Right (C, 8)); Len := Len + 2; Iptr := Iptr + 1; end loop; end if; return Result (1 .. Len); end From_UTF_16; -------------------------- -- Raise_Encoding_Error -- -------------------------- procedure Raise_Encoding_Error (Index : Natural) is Val : constant String := Index'Img; begin raise Encoding_Error with "bad input at Item (" & Val (Val'First + 1 .. Val'Last) & ')'; end Raise_Encoding_Error; --------------- -- To_UTF_16 -- --------------- function To_UTF_16 (Item : UTF_String; Input_Scheme : UTF_XE_Encoding; Output_BOM : Boolean := False) return UTF_16_Wide_String is Result : UTF_16_Wide_String (1 .. Item'Length / 2 + 1); Len : Natural; Iptr : Natural; begin if Item'Length mod 2 /= 0 then raise Encoding_Error with "UTF-16BE/LE string has odd length"; end if; -- Deal with input BOM, skip if OK, error if bad BOM Iptr := Item'First; if Item'Length >= 2 then if Item (Iptr .. Iptr + 1) = BOM_16BE then if Input_Scheme = UTF_16BE then Iptr := Iptr + 2; else Raise_Encoding_Error (Iptr); end if; elsif Item (Iptr .. Iptr + 1) = BOM_16LE then if Input_Scheme = UTF_16LE then Iptr := Iptr + 2; else Raise_Encoding_Error (Iptr); end if; elsif Item'Length >= 3 and then Item (Iptr .. Iptr + 2) = BOM_8 then Raise_Encoding_Error (Iptr); end if; end if; -- Output BOM if specified if Output_BOM then Result (1) := BOM_16 (1); Len := 1; else Len := 0; end if; -- UTF-16BE case if Input_Scheme = UTF_16BE then while Iptr < Item'Last loop Len := Len + 1; Result (Len) := Wide_Character'Val (Character'Pos (Item (Iptr)) * 256 + Character'Pos (Item (Iptr + 1))); Iptr := Iptr + 2; end loop; -- UTF-16LE case else while Iptr < Item'Last loop Len := Len + 1; Result (Len) := Wide_Character'Val (Character'Pos (Item (Iptr)) + Character'Pos (Item (Iptr + 1)) * 256); Iptr := Iptr + 2; end loop; end if; return Result (1 .. Len); end To_UTF_16; end Ada.Strings.UTF_Encoding;