view gcc/ada/libgnat/g-encstr.adb @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
line wrap: on
line source

------------------------------------------------------------------------------
--                                                                          --
--                         GNAT RUN-TIME COMPONENTS                         --
--                                                                          --
--                    G N A T . E N C O D E _ S T R I N G                   --
--                                                                          --
--                                 B o d y                                  --
--                                                                          --
--           Copyright (C) 2007-2018, Free Software Foundation, Inc.        --
--                                                                          --
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
-- terms of the  GNU General Public License as published  by the Free Soft- --
-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
--                                                                          --
-- As a special exception under Section 7 of GPL version 3, you are granted --
-- additional permissions described in the GCC Runtime Library Exception,   --
-- version 3.1, as published by the Free Software Foundation.               --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
-- GNAT was originally developed  by the GNAT team at  New York University. --
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
--                                                                          --
------------------------------------------------------------------------------

with Interfaces; use Interfaces;

with System.WCh_Con; use System.WCh_Con;
with System.WCh_Cnv; use System.WCh_Cnv;

package body GNAT.Encode_String is

   -----------------------
   -- Local Subprograms --
   -----------------------

   procedure Bad;
   pragma No_Return (Bad);
   --  Raise error for bad character code

   procedure Past_End;
   pragma No_Return (Past_End);
   --  Raise error for off end of string

   ---------
   -- Bad --
   ---------

   procedure Bad is
   begin
      raise Constraint_Error with
        "character cannot be encoded with given Encoding_Method";
   end Bad;

   ------------------------
   -- Encode_Wide_String --
   ------------------------

   function Encode_Wide_String (S : Wide_String) return String is
      Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
      Result : String (1 .. S'Length * Long);
      Length : Natural;
   begin
      Encode_Wide_String (S, Result, Length);
      return Result (1 .. Length);
   end Encode_Wide_String;

   procedure Encode_Wide_String
     (S      : Wide_String;
      Result : out String;
      Length : out Natural)
   is
      Ptr : Natural;

   begin
      Ptr := S'First;
      for J in S'Range loop
         Encode_Wide_Character (S (J), Result, Ptr);
      end loop;

      Length := Ptr - S'First;
   end Encode_Wide_String;

   -----------------------------
   -- Encode_Wide_Wide_String --
   -----------------------------

   function Encode_Wide_Wide_String (S : Wide_Wide_String) return String is
      Long : constant Natural := WC_Longest_Sequences (Encoding_Method);
      Result : String (1 .. S'Length * Long);
      Length : Natural;
   begin
      Encode_Wide_Wide_String (S, Result, Length);
      return Result (1 .. Length);
   end Encode_Wide_Wide_String;

   procedure Encode_Wide_Wide_String
     (S      : Wide_Wide_String;
      Result : out String;
      Length : out Natural)
   is
      Ptr : Natural;

   begin
      Ptr := S'First;
      for J in S'Range loop
         Encode_Wide_Wide_Character (S (J), Result, Ptr);
      end loop;

      Length := Ptr - S'First;
   end Encode_Wide_Wide_String;

   ---------------------------
   -- Encode_Wide_Character --
   ---------------------------

   procedure Encode_Wide_Character
     (Char   : Wide_Character;
      Result : in out String;
      Ptr    : in out Natural)
   is
   begin
      Encode_Wide_Wide_Character
        (Wide_Wide_Character'Val (Wide_Character'Pos (Char)), Result, Ptr);

   exception
      when Constraint_Error =>
         Bad;
   end Encode_Wide_Character;

   --------------------------------
   -- Encode_Wide_Wide_Character --
   --------------------------------

   procedure Encode_Wide_Wide_Character
     (Char   : Wide_Wide_Character;
      Result : in out String;
      Ptr    : in out Natural)
   is
      U : Unsigned_32;

      procedure Out_Char (C : Character);
      pragma Inline (Out_Char);
      --  Procedure to store one character for instantiation below

      --------------
      -- Out_Char --
      --------------

      procedure Out_Char (C : Character) is
      begin
         if Ptr > Result'Last then
            Past_End;
         else
            Result (Ptr) := C;
            Ptr := Ptr + 1;
         end if;
      end Out_Char;

   --  Start of processing for Encode_Wide_Wide_Character;

   begin
      --  Efficient code for UTF-8 case

      if Encoding_Method = WCEM_UTF8 then

         --  Note: for details of UTF8 encoding see RFC 3629

         U := Unsigned_32 (Wide_Wide_Character'Pos (Char));

         --  16#00_0000#-16#00_007F#: 0xxxxxxx

         if U <= 16#00_007F# then
            Out_Char (Character'Val (U));

         --  16#00_0080#-16#00_07FF#: 110xxxxx 10xxxxxx

         elsif U <= 16#00_07FF# then
            Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6)));
            Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));

         --  16#00_0800#-16#00_FFFF#: 1110xxxx 10xxxxxx 10xxxxxx

         elsif U <= 16#00_FFFF# then
            Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12)));
            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
                                                          and 2#00111111#)));
            Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));

         --  16#01_0000#-16#10_FFFF#: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

         elsif U <= 16#10_FFFF# then
            Out_Char (Character'Val (2#11110000# or Shift_Right (U, 18)));
            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
                                                          and 2#00111111#)));
            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
                                                       and 2#00111111#)));
            Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));

         --  16#0020_0000#-16#03FF_FFFF#: 111110xx 10xxxxxx 10xxxxxx
         --                               10xxxxxx 10xxxxxx

         elsif U <= 16#03FF_FFFF# then
            Out_Char (Character'Val (2#11111000# or Shift_Right (U, 24)));
            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 18)
                                                       and 2#00111111#)));
            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 12)
                                                       and 2#00111111#)));
            Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6)
                                                       and 2#00111111#)));
            Out_Char (Character'Val (2#10000000# or (U and 2#00111111#)));

         --  All other cases are invalid character codes, not this includes:

         --  16#0400_0000#-16#7FFF_FFFF#: 1111110x 10xxxxxx 10xxxxxx
         --                               10xxxxxx 10xxxxxx 10xxxxxx

         --  since Wide_Wide_Character values cannot exceed 16#3F_FFFF#

         else
            Bad;
         end if;

      --  All encoding methods other than UTF-8

      else
         Non_UTF8 : declare
            procedure UTF_32_To_String is
              new UTF_32_To_Char_Sequence (Out_Char);
            --  Instantiate conversion procedure with above Out_Char routine

         begin
            UTF_32_To_String
              (UTF_32_Code (Wide_Wide_Character'Pos (Char)), Encoding_Method);

         exception
            when Constraint_Error =>
               Bad;
         end Non_UTF8;
      end if;
   end Encode_Wide_Wide_Character;

   --------------
   -- Past_End --
   --------------

   procedure Past_End is
   begin
      raise Constraint_Error with "past end of string";
   end Past_End;

end GNAT.Encode_String;