view gcc/ada/libgnat/s-regexp.adb @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
line wrap: on
line source

------------------------------------------------------------------------------
--                                                                          --
--                         GNAT COMPILER COMPONENTS                         --
--                                                                          --
--                        S Y S T E M . R E G E X P                         --
--                                                                          --
--                                 B o d y                                  --
--                                                                          --
--                     Copyright (C) 1999-2018, AdaCore                     --
--                                                                          --
-- GNAT is free software;  you can  redistribute it  and/or modify it under --
-- terms of the  GNU General Public License as published  by the Free Soft- --
-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
--                                                                          --
-- As a special exception under Section 7 of GPL version 3, you are granted --
-- additional permissions described in the GCC Runtime Library Exception,   --
-- version 3.1, as published by the Free Software Foundation.               --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
-- GNAT was originally developed  by the GNAT team at  New York University. --
-- Extensive contributions were provided by Ada Core Technologies Inc.      --
--                                                                          --
------------------------------------------------------------------------------

with Ada.Unchecked_Deallocation;
with System.Case_Util;

package body System.Regexp is

   Initial_Max_States_In_Primary_Table : constant := 100;
   --  Initial size for the number of states in the indefinite state
   --  machine. The number of states will be increased as needed.
   --
   --  This is also used as the maximal number of meta states (groups of
   --  states) in the secondary table.

   Open_Paren    : constant Character := '(';
   Close_Paren   : constant Character := ')';
   Open_Bracket  : constant Character := '[';
   Close_Bracket : constant Character := ']';

   type State_Index is new Natural;
   type Column_Index is new Natural;

   type Regexp_Array is array
     (State_Index range <>, Column_Index range <>) of State_Index;
   --  First index is for the state number. Second index is for the character
   --  type. Contents is the new State.

   type Regexp_Array_Access is access Regexp_Array;
   --  Use this type through the functions Set below, so that it can grow
   --  dynamically depending on the needs.

   type Mapping is array (Character'Range) of Column_Index;
   --  Mapping between characters and column in the Regexp_Array

   type Boolean_Array is array (State_Index range <>) of Boolean;

   type Regexp_Value
     (Alphabet_Size : Column_Index;
      Num_States    : State_Index) is
   record
      Map            : Mapping;
      Case_Sensitive : Boolean;
      States         : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
      Is_Final       : Boolean_Array (1 .. Num_States);
   end record;
   --  Deterministic finite-state machine

   -----------------------
   -- Local Subprograms --
   -----------------------

   procedure Set
     (Table  : in out Regexp_Array_Access;
      State  : State_Index;
      Column : Column_Index;
      Value  : State_Index);
   --  Sets a value in the table. If the table is too small, reallocate it
   --  dynamically so that (State, Column) is a valid index in it.

   function Get
     (Table  : Regexp_Array_Access;
      State  : State_Index;
      Column : Column_Index) return State_Index;
   --  Returns the value in the table at (State, Column). If this index does
   --  not exist in the table, returns zero.

   procedure Free is new Ada.Unchecked_Deallocation
     (Regexp_Array, Regexp_Array_Access);

   ------------
   -- Adjust --
   ------------

   procedure Adjust (R : in out Regexp) is
      Tmp : Regexp_Access;
   begin
      if R.R /= null then
         Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
                                  Num_States    => R.R.Num_States);
         Tmp.all := R.R.all;
         R.R := Tmp;
      end if;
   end Adjust;

   -------------
   -- Compile --
   -------------

   function Compile
     (Pattern        : String;
      Glob           : Boolean := False;
      Case_Sensitive : Boolean := True) return Regexp
   is
      S : String := Pattern;
      --  The pattern which is really compiled (when the pattern is case
      --  insensitive, we convert this string to lower-cases

      Map : Mapping := (others => 0);
      --  Mapping between characters and columns in the tables

      Alphabet_Size : Column_Index := 0;
      --  Number of significant characters in the regular expression.
      --  This total does not include special operators, such as *, (, ...

      procedure Check_Well_Formed_Pattern;
      --  Check that the pattern to compile is well-formed, so that subsequent
      --  code can rely on this without performing each time the checks to
      --  avoid accessing the pattern outside its bounds. However, not all
      --  well-formedness rules are checked. In particular, rules about special
      --  characters not being treated as regular characters are not checked.

      procedure Create_Mapping;
      --  Creates a mapping between characters in the regexp and columns
      --  in the tables representing the regexp. Test that the regexp is
      --  well-formed Modifies Alphabet_Size and Map

      procedure Create_Primary_Table
        (Table       : out Regexp_Array_Access;
         Num_States  : out State_Index;
         Start_State : out State_Index;
         End_State   : out State_Index);
      --  Creates the first version of the regexp (this is a non deterministic
      --  finite state machine, which is unadapted for a fast pattern
      --  matching algorithm). We use a recursive algorithm to process the
      --  parenthesis sub-expressions.
      --
      --  Table : at the end of the procedure : Column 0 is for any character
      --  ('.') and the last columns are for no character (closure). Num_States
      --  is set to the number of states in the table Start_State is the number
      --  of the starting state in the regexp End_State is the number of the
      --  final state when the regexp matches.

      procedure Create_Primary_Table_Glob
        (Table       : out Regexp_Array_Access;
         Num_States  : out State_Index;
         Start_State : out State_Index;
         End_State   : out State_Index);
      --  Same function as above, but it deals with the second possible
      --  grammar for 'globbing pattern', which is a kind of subset of the
      --  whole regular expression grammar.

      function Create_Secondary_Table
        (First_Table : Regexp_Array_Access;
         Start_State : State_Index;
         End_State   : State_Index) return Regexp;
      --  Creates the definitive table representing the regular expression
      --  This is actually a transformation of the primary table First_Table,
      --  where every state is grouped with the states in its 'no-character'
      --  columns. The transitions between the new states are then recalculated
      --  and if necessary some new states are created.
      --
      --  Note that the resulting finite-state machine is not optimized in
      --  terms of the number of states : it would be more time-consuming to
      --  add a third pass to reduce the number of states in the machine, with
      --  no speed improvement...

      procedure Raise_Exception (M : String; Index : Integer);
      pragma No_Return (Raise_Exception);
      --  Raise an exception, indicating an error at character Index in S

      -------------------------------
      -- Check_Well_Formed_Pattern --
      -------------------------------

      procedure Check_Well_Formed_Pattern is
         J : Integer;

         Past_Elmt : Boolean := False;
         --  Set to True everywhere an elmt has been parsed, if Glob=False,
         --  meaning there can be now an occurrence of '*', '+' and '?'.

         Past_Term : Boolean := False;
         --  Set to True everywhere a term has been parsed, if Glob=False,
         --  meaning there can be now an occurrence of '|'.

         Parenthesis_Level : Integer := 0;
         Curly_Level       : Integer := 0;

         Last_Open : Integer := S'First - 1;
         --  The last occurrence of an opening parenthesis, if Glob=False,
         --  or the last occurrence of an opening curly brace, if Glob=True.

         procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
         --  If no more characters are raised, call Raise_Exception

         --------------------------------------
         -- Raise_Exception_If_No_More_Chars --
         --------------------------------------

         procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is
         begin
            if J + K > S'Last then
               Raise_Exception ("Ill-formed pattern while parsing", J);
            end if;
         end Raise_Exception_If_No_More_Chars;

      --  Start of processing for Check_Well_Formed_Pattern

      begin
         J := S'First;
         while J <= S'Last loop
            case S (J) is
               when Open_Bracket =>
                  J := J + 1;
                  Raise_Exception_If_No_More_Chars;

                  if not Glob then
                     if S (J) = '^' then
                        J := J + 1;
                        Raise_Exception_If_No_More_Chars;
                     end if;
                  end if;

                  --  The first character never has a special meaning

                  if S (J) = ']' or else S (J) = '-' then
                     J := J + 1;
                     Raise_Exception_If_No_More_Chars;
                  end if;

                  --  The set of characters cannot be empty

                  if S (J) = ']' then
                     Raise_Exception
                       ("Set of characters cannot be empty in regular "
                          & "expression", J);
                  end if;

                  declare
                     Possible_Range_Start : Boolean := True;
                     --  Set True everywhere a range character '-' can occur

                  begin
                     loop
                        exit when S (J) = Close_Bracket;

                        --  The current character should be followed by a
                        --  closing bracket.

                        Raise_Exception_If_No_More_Chars (1);

                        if S (J) = '-'
                          and then S (J + 1) /= Close_Bracket
                        then
                           if not Possible_Range_Start then
                              Raise_Exception
                                ("No mix of ranges is allowed in "
                                   & "regular expression", J);
                           end if;

                           J := J + 1;
                           Raise_Exception_If_No_More_Chars;

                           --  Range cannot be followed by '-' character,
                           --  except as last character in the set.

                           Possible_Range_Start := False;

                        else
                           Possible_Range_Start := True;
                        end if;

                        if S (J) = '\' then
                           J := J + 1;
                           Raise_Exception_If_No_More_Chars;
                        end if;

                        J := J + 1;
                     end loop;
                  end;

                  --  A closing bracket can end an elmt or term

                  Past_Elmt := True;
                  Past_Term := True;

               when Close_Bracket =>

                  --  A close bracket must follow a open_bracket, and cannot be
                  --  found alone on the line.

                  Raise_Exception
                    ("Incorrect character ']' in regular expression", J);

               when '\' =>
                  if J < S'Last then
                     J := J + 1;

                     --  Any character can be an elmt or a term

                     Past_Elmt := True;
                     Past_Term := True;

                  else
                     --  \ not allowed at the end of the regexp

                     Raise_Exception
                       ("Incorrect character '\' in regular expression", J);
                  end if;

               when Open_Paren =>
                  if not Glob then
                     Parenthesis_Level := Parenthesis_Level + 1;
                     Last_Open := J;

                     --  An open parenthesis does not end an elmt or term

                     Past_Elmt := False;
                     Past_Term := False;
                  end if;

               when Close_Paren =>
                  if not Glob then
                     Parenthesis_Level := Parenthesis_Level - 1;

                     if Parenthesis_Level < 0 then
                        Raise_Exception
                          ("')' is not associated with '(' in regular "
                           & "expression", J);
                     end if;

                     if J = Last_Open + 1 then
                        Raise_Exception
                          ("Empty parentheses not allowed in regular "
                           & "expression", J);
                     end if;

                     if not Past_Term then
                        Raise_Exception
                          ("Closing parenthesis not allowed here in regular "
                             & "expression", J);
                     end if;

                     --  A closing parenthesis can end an elmt or term

                     Past_Elmt := True;
                     Past_Term := True;
                  end if;

               when '{' =>
                  if Glob then
                     Curly_Level := Curly_Level + 1;
                     Last_Open := J;

                  else
                     --  Any character can be an elmt or a term

                     Past_Elmt := True;
                     Past_Term := True;
                  end if;

                  --  No need to check for ',' as the code always accepts them

               when '}' =>
                  if Glob then
                     Curly_Level := Curly_Level - 1;

                     if Curly_Level < 0 then
                        Raise_Exception
                          ("'}' is not associated with '{' in regular "
                           & "expression", J);
                     end if;

                     if J = Last_Open + 1 then
                        Raise_Exception
                          ("Empty curly braces not allowed in regular "
                           & "expression", J);
                     end if;

                  else
                     --  Any character can be an elmt or a term

                     Past_Elmt := True;
                     Past_Term := True;
                  end if;

               when '*' | '?' | '+' =>
                  if not Glob then

                     --  These operators must apply to an elmt sub-expression,
                     --  and cannot be found if one has not just been parsed.

                     if not Past_Elmt then
                        Raise_Exception
                          ("'*', '+' and '?' operators must be "
                           & "applied to an element in regular expression", J);
                     end if;

                     Past_Elmt := False;
                     Past_Term := True;
                  end if;

               when '|' =>
                  if not Glob then

                     --  This operator must apply to a term sub-expression,
                     --  and cannot be found if one has not just been parsed.

                     if not Past_Term then
                        Raise_Exception
                          ("'|' operator must be "
                           & "applied to a term in regular expression", J);
                     end if;

                     Past_Elmt := False;
                     Past_Term := False;
                  end if;

               when others =>
                  if not Glob then

                     --  Any character can be an elmt or a term

                     Past_Elmt := True;
                     Past_Term := True;
                  end if;
            end case;

            J := J + 1;
         end loop;

         --  A closing parenthesis must follow an open parenthesis

         if Parenthesis_Level /= 0 then
            Raise_Exception
              ("'(' must always be associated with a ')'", J);
         end if;

         --  A closing curly brace must follow an open curly brace

         if Curly_Level /= 0 then
            Raise_Exception
              ("'{' must always be associated with a '}'", J);
         end if;
      end Check_Well_Formed_Pattern;

      --------------------
      -- Create_Mapping --
      --------------------

      procedure Create_Mapping is

         procedure Add_In_Map (C : Character);
         --  Add a character in the mapping, if it is not already defined

         ----------------
         -- Add_In_Map --
         ----------------

         procedure Add_In_Map (C : Character) is
         begin
            if Map (C) = 0 then
               Alphabet_Size := Alphabet_Size + 1;
               Map (C) := Alphabet_Size;
            end if;
         end Add_In_Map;

         J                 : Integer := S'First;
         Parenthesis_Level : Integer := 0;
         Curly_Level       : Integer := 0;
         Last_Open         : Integer := S'First - 1;

      --  Start of processing for Create_Mapping

      begin
         while J <= S'Last loop
            case S (J) is
               when Open_Bracket =>
                  J := J + 1;

                  if S (J) = '^' then
                     J := J + 1;
                  end if;

                  if S (J) = ']' or else S (J) = '-' then
                     J := J + 1;
                  end if;

                  --  The first character never has a special meaning

                  loop
                     if J > S'Last then
                        Raise_Exception
                          ("Ran out of characters while parsing ", J);
                     end if;

                     exit when S (J) = Close_Bracket;

                     if S (J) = '-'
                       and then S (J + 1) /= Close_Bracket
                     then
                        declare
                           Start : constant Integer := J - 1;

                        begin
                           J := J + 1;

                           if S (J) = '\' then
                              J := J + 1;
                           end if;

                           for Char in S (Start) .. S (J) loop
                              Add_In_Map (Char);
                           end loop;
                        end;
                     else
                        if S (J) = '\' then
                           J := J + 1;
                        end if;

                        Add_In_Map (S (J));
                     end if;

                     J := J + 1;
                  end loop;

                  --  A close bracket must follow a open_bracket and cannot be
                  --  found alone on the line

               when Close_Bracket =>
                  Raise_Exception
                    ("Incorrect character ']' in regular expression", J);

               when '\' =>
                  if J < S'Last then
                     J := J + 1;
                     Add_In_Map (S (J));

                  else
                     --  Back slash \ not allowed at the end of the regexp

                     Raise_Exception
                       ("Incorrect character '\' in regular expression", J);
                  end if;

               when Open_Paren =>
                  if not Glob then
                     Parenthesis_Level := Parenthesis_Level + 1;
                     Last_Open := J;
                  else
                     Add_In_Map (Open_Paren);
                  end if;

               when Close_Paren =>
                  if not Glob then
                     Parenthesis_Level := Parenthesis_Level - 1;

                     if Parenthesis_Level < 0 then
                        Raise_Exception
                          ("')' is not associated with '(' in regular "
                           & "expression", J);
                     end if;

                     if J = Last_Open + 1 then
                        Raise_Exception
                          ("Empty parenthesis not allowed in regular "
                           & "expression", J);
                     end if;

                  else
                     Add_In_Map (Close_Paren);
                  end if;

               when '.' =>
                  if Glob then
                     Add_In_Map ('.');
                  end if;

               when '{' =>
                  if not Glob then
                     Add_In_Map (S (J));
                  else
                     Curly_Level := Curly_Level + 1;
                  end if;

               when '}' =>
                  if not Glob then
                     Add_In_Map (S (J));
                  else
                     Curly_Level := Curly_Level - 1;
                  end if;

               when '*' | '?' =>
                  if not Glob then
                     if J = S'First then
                        Raise_Exception
                          ("'*', '+', '?' and '|' operators cannot be in "
                           & "first position in regular expression", J);
                     end if;
                  end if;

               when '|' | '+' =>
                  if not Glob then
                     if J = S'First then

                        --  These operators must apply to a sub-expression,
                        --  and cannot be found at the beginning of the line

                        Raise_Exception
                          ("'*', '+', '?' and '|' operators cannot be in "
                           & "first position in regular expression", J);
                     end if;

                  else
                     Add_In_Map (S (J));
                  end if;

               when others =>
                  Add_In_Map (S (J));
            end case;

            J := J + 1;
         end loop;

         --  A closing parenthesis must follow an open parenthesis

         if Parenthesis_Level /= 0 then
            Raise_Exception
              ("'(' must always be associated with a ')'", J);
         end if;

         if Curly_Level /= 0 then
            Raise_Exception
              ("'{' must always be associated with a '}'", J);
         end if;
      end Create_Mapping;

      --------------------------
      -- Create_Primary_Table --
      --------------------------

      procedure Create_Primary_Table
        (Table       : out Regexp_Array_Access;
         Num_States  : out State_Index;
         Start_State : out State_Index;
         End_State   : out State_Index)
      is
         Empty_Char : constant Column_Index := Alphabet_Size + 1;

         Current_State : State_Index := 0;
         --  Index of the last created state

         procedure Add_Empty_Char
           (State    : State_Index;
            To_State : State_Index);
         --  Add a empty-character transition from State to To_State

         procedure Create_Repetition
           (Repetition : Character;
            Start_Prev : State_Index;
            End_Prev   : State_Index;
            New_Start  : out State_Index;
            New_End    : in out State_Index);
         --  Create the table in case we have a '*', '+' or '?'.
         --  Start_Prev .. End_Prev should indicate respectively the start and
         --  end index of the previous expression, to which '*', '+' or '?' is
         --  applied.

         procedure Create_Simple
           (Start_Index : Integer;
            End_Index   : Integer;
            Start_State : out State_Index;
            End_State   : out State_Index);
         --  Fill the table for the regexp Simple. This is the recursive
         --  procedure called to handle () expressions If End_State = 0, then
         --  the call to Create_Simple creates an independent regexp, not a
         --  concatenation Start_Index .. End_Index is the starting index in
         --  the string S.
         --
         --  Warning: it may look like we are creating too many empty-string
         --  transitions, but they are needed to get the correct regexp.
         --  The table is filled as follow ( s means start-state, e means
         --  end-state) :
         --
         --  regexp   state_num | a b * empty_string
         --  -------  ------------------------------
         --    a          1 (s) | 2 - - -
         --               2 (e) | - - - -
         --
         --    ab         1 (s) | 2 - - -
         --               2     | - - - 3
         --               3     | - 4 - -
         --               4 (e) | - - - -
         --
         --    a|b        1     | 2 - - -
         --               2     | - - - 6
         --               3     | - 4 - -
         --               4     | - - - 6
         --               5 (s) | - - - 1,3
         --               6 (e) | - - - -
         --
         --    a*         1     | 2 - - -
         --               2     | - - - 4
         --               3 (s) | - - - 1,4
         --               4 (e) | - - - 3
         --
         --    (a)        1 (s) | 2 - - -
         --               2 (e) | - - - -
         --
         --    a+         1     | 2 - - -
         --               2     | - - - 4
         --               3 (s) | - - - 1
         --               4 (e) | - - - 3
         --
         --    a?         1     | 2 - - -
         --               2     | - - - 4
         --               3 (s) | - - - 1,4
         --               4 (e) | - - - -
         --
         --    .          1 (s) | 2 2 2 -
         --               2 (e) | - - - -

         function Next_Sub_Expression
           (Start_Index : Integer;
            End_Index   : Integer) return Integer;
         --  Returns the index of the last character of the next sub-expression
         --  in Simple. Index cannot be greater than End_Index.

         --------------------
         -- Add_Empty_Char --
         --------------------

         procedure Add_Empty_Char
           (State    : State_Index;
            To_State : State_Index)
         is
            J : Column_Index := Empty_Char;

         begin
            while Get (Table, State, J) /= 0 loop
               J := J + 1;
            end loop;

            Set (Table, State, J, To_State);
         end Add_Empty_Char;

         -----------------------
         -- Create_Repetition --
         -----------------------

         procedure Create_Repetition
           (Repetition : Character;
            Start_Prev : State_Index;
            End_Prev   : State_Index;
            New_Start  : out State_Index;
            New_End    : in out State_Index)
         is
         begin
            New_Start := Current_State + 1;

            if New_End /= 0 then
               Add_Empty_Char (New_End, New_Start);
            end if;

            Current_State := Current_State + 2;
            New_End   := Current_State;

            Add_Empty_Char (End_Prev, New_End);
            Add_Empty_Char (New_Start, Start_Prev);

            if Repetition /= '+' then
               Add_Empty_Char (New_Start, New_End);
            end if;

            if Repetition /= '?' then
               Add_Empty_Char (New_End, New_Start);
            end if;
         end Create_Repetition;

         -------------------
         -- Create_Simple --
         -------------------

         procedure Create_Simple
           (Start_Index : Integer;
            End_Index   : Integer;
            Start_State : out State_Index;
            End_State   : out State_Index)
         is
            J          : Integer := Start_Index;
            Last_Start : State_Index := 0;

         begin
            Start_State := 0;
            End_State   := 0;
            while J <= End_Index loop
               case S (J) is
                  when Open_Paren =>
                     declare
                        J_Start    : constant Integer := J + 1;
                        Next_Start : State_Index;
                        Next_End   : State_Index;

                     begin
                        J := Next_Sub_Expression (J, End_Index);
                        Create_Simple (J_Start, J - 1, Next_Start, Next_End);

                        if J < End_Index
                          and then (S (J + 1) = '*' or else
                                    S (J + 1) = '+' or else
                                    S (J + 1) = '?')
                        then
                           J := J + 1;
                           Create_Repetition
                             (S (J),
                              Next_Start,
                              Next_End,
                              Last_Start,
                              End_State);

                        else
                           Last_Start := Next_Start;

                           if End_State /= 0 then
                              Add_Empty_Char (End_State, Last_Start);
                           end if;

                           End_State := Next_End;
                        end if;
                     end;

                  when '|' =>
                     declare
                        Start_Prev : constant State_Index := Start_State;
                        End_Prev   : constant State_Index := End_State;
                        Start_J    : constant Integer     := J + 1;
                        Start_Next : State_Index := 0;
                        End_Next   : State_Index := 0;

                     begin
                        J := Next_Sub_Expression (J, End_Index);

                        --  Create a new state for the start of the alternative

                        Current_State := Current_State + 1;
                        Last_Start := Current_State;
                        Start_State := Last_Start;

                        --  Create the tree for the second part of alternative

                        Create_Simple (Start_J, J, Start_Next, End_Next);

                        --  Create the end state

                        Add_Empty_Char (Last_Start, Start_Next);
                        Add_Empty_Char (Last_Start, Start_Prev);
                        Current_State := Current_State + 1;
                        End_State := Current_State;
                        Add_Empty_Char (End_Prev, End_State);
                        Add_Empty_Char (End_Next, End_State);
                     end;

                  when Open_Bracket =>
                     Current_State := Current_State + 1;

                     declare
                        Next_State : State_Index := Current_State + 1;

                     begin
                        J := J + 1;

                        if S (J) = '^' then
                           J := J + 1;

                           Next_State := 0;

                           for Column in 0 .. Alphabet_Size loop
                              Set (Table, Current_State, Column,
                                   Value => Current_State + 1);
                           end loop;
                        end if;

                        --  Automatically add the first character

                        if S (J) = '-' or else S (J) = ']' then
                           Set (Table, Current_State, Map (S (J)),
                                Value => Next_State);
                           J := J + 1;
                        end if;

                        --  Loop till closing bracket found

                        loop
                           exit when S (J) = Close_Bracket;

                           if S (J) = '-'
                             and then S (J + 1) /= ']'
                           then
                              declare
                                 Start : constant Integer := J - 1;

                              begin
                                 J := J + 1;

                                 if S (J) = '\' then
                                    J := J + 1;
                                 end if;

                                 for Char in S (Start) .. S (J) loop
                                    Set (Table, Current_State, Map (Char),
                                         Value => Next_State);
                                 end loop;
                              end;

                           else
                              if S (J) = '\' then
                                 J := J + 1;
                              end if;

                              Set (Table, Current_State, Map (S (J)),
                                   Value => Next_State);
                           end if;
                           J := J + 1;
                        end loop;
                     end;

                     Current_State := Current_State + 1;

                     --  If the next symbol is a special symbol

                     if J < End_Index
                       and then (S (J + 1) = '*' or else
                                 S (J + 1) = '+' or else
                                 S (J + 1) = '?')
                     then
                        J := J + 1;
                        Create_Repetition
                          (S (J),
                           Current_State - 1,
                           Current_State,
                           Last_Start,
                           End_State);

                     else
                        Last_Start := Current_State - 1;

                        if End_State /= 0 then
                           Add_Empty_Char (End_State, Last_Start);
                        end if;

                        End_State := Current_State;
                     end if;

                  when Close_Bracket
                     | Close_Paren
                     | '*' | '+' | '?'
                  =>
                     Raise_Exception
                       ("Incorrect character in regular expression :", J);

                  when others =>
                     Current_State := Current_State + 1;

                     --  Create the state for the symbol S (J)

                     if S (J) = '.' then
                        for K in 0 .. Alphabet_Size loop
                           Set (Table, Current_State, K,
                                Value => Current_State + 1);
                        end loop;

                     else
                        if S (J) = '\' then
                           J := J + 1;
                        end if;

                        Set (Table, Current_State, Map (S (J)),
                             Value => Current_State + 1);
                     end if;

                     Current_State := Current_State + 1;

                     --  If the next symbol is a special symbol

                     if J < End_Index
                       and then (S (J + 1) = '*' or else
                                 S (J + 1) = '+' or else
                                 S (J + 1) = '?')
                     then
                        J := J + 1;
                        Create_Repetition
                          (S (J),
                           Current_State - 1,
                           Current_State,
                           Last_Start,
                           End_State);

                     else
                        Last_Start := Current_State - 1;

                        if End_State /= 0 then
                           Add_Empty_Char (End_State, Last_Start);
                        end if;

                        End_State := Current_State;
                     end if;
               end case;

               if Start_State = 0 then
                  Start_State := Last_Start;
               end if;

               J := J + 1;
            end loop;
         end Create_Simple;

         -------------------------
         -- Next_Sub_Expression --
         -------------------------

         function Next_Sub_Expression
           (Start_Index : Integer;
            End_Index   : Integer) return Integer
         is
            J              : Integer := Start_Index;
            Start_On_Alter : Boolean := False;

         begin
            if S (J) = '|' then
               Start_On_Alter := True;
            end if;

            loop
               exit when J = End_Index;
               J := J + 1;

               case S (J) is
                  when '\' =>
                     J := J + 1;

                  when Open_Bracket =>
                     loop
                        J := J + 1;
                        exit when S (J) = Close_Bracket;

                        if S (J) = '\' then
                           J := J + 1;
                        end if;
                     end loop;

                  when Open_Paren =>
                     J := Next_Sub_Expression (J, End_Index);

                  when Close_Paren =>
                     return J;

                  when '|' =>
                     if Start_On_Alter then
                        return J - 1;
                     end if;

                  when others =>
                     null;
               end case;
            end loop;

            return J;
         end Next_Sub_Expression;

      --  Start of processing for Create_Primary_Table

      begin
         Table.all := (others => (others => 0));
         Create_Simple (S'First, S'Last, Start_State, End_State);
         Num_States := Current_State;
      end Create_Primary_Table;

      -------------------------------
      -- Create_Primary_Table_Glob --
      -------------------------------

      procedure Create_Primary_Table_Glob
        (Table       : out Regexp_Array_Access;
         Num_States  : out State_Index;
         Start_State : out State_Index;
         End_State   : out State_Index)
      is
         Empty_Char : constant Column_Index := Alphabet_Size + 1;

         Current_State : State_Index := 0;
         --  Index of the last created state

         procedure Add_Empty_Char
           (State    : State_Index;
            To_State : State_Index);
         --  Add a empty-character transition from State to To_State

         procedure Create_Simple
           (Start_Index : Integer;
            End_Index   : Integer;
            Start_State : out State_Index;
            End_State   : out State_Index);
         --  Fill the table for the S (Start_Index .. End_Index).
         --  This is the recursive procedure called to handle () expressions

         --------------------
         -- Add_Empty_Char --
         --------------------

         procedure Add_Empty_Char
           (State    : State_Index;
            To_State : State_Index)
         is
            J : Column_Index;

         begin
            J := Empty_Char;
            while Get (Table, State, J) /= 0 loop
               J := J + 1;
            end loop;

            Set (Table, State, J, Value => To_State);
         end Add_Empty_Char;

         -------------------
         -- Create_Simple --
         -------------------

         procedure Create_Simple
           (Start_Index : Integer;
            End_Index   : Integer;
            Start_State : out State_Index;
            End_State   : out State_Index)
         is
            J          : Integer;
            Last_Start : State_Index := 0;

         begin
            Start_State := 0;
            End_State   := 0;

            J := Start_Index;
            while J <= End_Index loop
               case S (J) is
                  when Open_Bracket =>
                     Current_State := Current_State + 1;

                     declare
                        Next_State : State_Index := Current_State + 1;

                     begin
                        J := J + 1;

                        if S (J) = '^' then
                           J := J + 1;
                           Next_State := 0;

                           for Column in 0 .. Alphabet_Size loop
                              Set (Table, Current_State, Column,
                                   Value => Current_State + 1);
                           end loop;
                        end if;

                        --  Automatically add the first character

                        if S (J) = '-' or else S (J) = ']' then
                           Set (Table, Current_State, Map (S (J)),
                                Value => Current_State);
                           J := J + 1;
                        end if;

                        --  Loop till closing bracket found

                        loop
                           exit when S (J) = Close_Bracket;

                           if S (J) = '-'
                             and then S (J + 1) /= ']'
                           then
                              declare
                                 Start : constant Integer := J - 1;

                              begin
                                 J := J + 1;

                                 if S (J) = '\' then
                                    J := J + 1;
                                 end if;

                                 for Char in S (Start) .. S (J) loop
                                    Set (Table, Current_State, Map (Char),
                                         Value => Next_State);
                                 end loop;
                              end;

                           else
                              if S (J) = '\' then
                                 J := J + 1;
                              end if;

                              Set (Table, Current_State, Map (S (J)),
                                   Value => Next_State);
                           end if;
                           J := J + 1;
                        end loop;
                     end;

                     Last_Start := Current_State;
                     Current_State := Current_State + 1;

                     if End_State /= 0 then
                        Add_Empty_Char (End_State, Last_Start);
                     end if;

                     End_State := Current_State;

                  when '{' =>
                     declare
                        End_Sub          : Integer;
                        Start_Regexp_Sub : State_Index;
                        End_Regexp_Sub   : State_Index;
                        Create_Start     : State_Index := 0;

                        Create_End : State_Index := 0;
                        --  Initialized to avoid junk warning

                     begin
                        while S (J) /= '}' loop

                           --  First step : find sub pattern

                           End_Sub := J + 1;
                           while S (End_Sub) /= ','
                             and then S (End_Sub) /= '}'
                           loop
                              End_Sub := End_Sub + 1;
                           end loop;

                           --  Second step : create a sub pattern

                           Create_Simple
                             (J + 1,
                              End_Sub - 1,
                              Start_Regexp_Sub,
                              End_Regexp_Sub);

                           J := End_Sub;

                           --  Third step : create an alternative

                           if Create_Start = 0 then
                              Current_State := Current_State + 1;
                              Create_Start := Current_State;
                              Add_Empty_Char (Create_Start, Start_Regexp_Sub);
                              Current_State := Current_State + 1;
                              Create_End := Current_State;
                              Add_Empty_Char (End_Regexp_Sub, Create_End);

                           else
                              Current_State := Current_State + 1;
                              Add_Empty_Char (Current_State, Create_Start);
                              Create_Start := Current_State;
                              Add_Empty_Char (Create_Start, Start_Regexp_Sub);
                              Add_Empty_Char (End_Regexp_Sub, Create_End);
                           end if;
                        end loop;

                        if End_State /= 0 then
                           Add_Empty_Char (End_State, Create_Start);
                        end if;

                        End_State := Create_End;
                        Last_Start := Create_Start;
                     end;

                  when '*' =>
                     Current_State := Current_State + 1;

                     if End_State /= 0 then
                        Add_Empty_Char (End_State, Current_State);
                     end if;

                     Add_Empty_Char (Current_State, Current_State + 1);
                     Add_Empty_Char (Current_State, Current_State + 3);
                     Last_Start := Current_State;

                     Current_State := Current_State + 1;

                     for K in 0 .. Alphabet_Size loop
                        Set (Table, Current_State, K,
                             Value => Current_State + 1);
                     end loop;

                     Current_State := Current_State + 1;
                     Add_Empty_Char (Current_State, Current_State + 1);

                     Current_State := Current_State + 1;
                     Add_Empty_Char (Current_State,  Last_Start);
                     End_State := Current_State;

                  when others =>
                     Current_State := Current_State + 1;

                     if S (J) = '?' then
                        for K in 0 .. Alphabet_Size loop
                           Set (Table, Current_State, K,
                                Value => Current_State + 1);
                        end loop;

                     else
                        if S (J) = '\' then
                           J := J + 1;
                        end if;

                        --  Create the state for the symbol S (J)

                        Set (Table, Current_State, Map (S (J)),
                             Value => Current_State + 1);
                     end if;

                     Last_Start := Current_State;
                     Current_State := Current_State + 1;

                     if End_State /= 0 then
                        Add_Empty_Char (End_State, Last_Start);
                     end if;

                     End_State := Current_State;
               end case;

               if Start_State = 0 then
                  Start_State := Last_Start;
               end if;

               J := J + 1;
            end loop;
         end Create_Simple;

      --  Start of processing for Create_Primary_Table_Glob

      begin
         Table.all := (others => (others => 0));
         Create_Simple (S'First, S'Last, Start_State, End_State);
         Num_States := Current_State;
      end Create_Primary_Table_Glob;

      ----------------------------
      -- Create_Secondary_Table --
      ----------------------------

      function Create_Secondary_Table
        (First_Table : Regexp_Array_Access;
         Start_State : State_Index;
         End_State   : State_Index) return Regexp
      is
         Last_Index : constant State_Index := First_Table'Last (1);

         type Meta_State is array (0 .. Last_Index) of Boolean;
         pragma Pack (Meta_State);
         --  Whether a state from first_table belongs to a metastate.

         No_States : constant Meta_State := (others => False);

         type Meta_States_Array is array (State_Index range <>) of Meta_State;
         type Meta_States_List is access all Meta_States_Array;
         procedure Unchecked_Free is new Ada.Unchecked_Deallocation
            (Meta_States_Array, Meta_States_List);
         Meta_States : Meta_States_List;
         --  Components of meta-states. A given state might belong to
         --  several meta-states.
         --  This array grows dynamically.

         type Char_To_State is array (0 .. Alphabet_Size) of State_Index;
         type Meta_States_Transition_Arr is
            array (State_Index range <>) of Char_To_State;
         type Meta_States_Transition is access all Meta_States_Transition_Arr;
         procedure Unchecked_Free is new Ada.Unchecked_Deallocation
           (Meta_States_Transition_Arr, Meta_States_Transition);
         Table : Meta_States_Transition;
         --  Documents the transitions between each meta-state. The
         --  first index is the meta-state, the second column is the
         --  character seen in the input, the value is the new meta-state.

         Temp_State_Not_Null : Boolean;

         Current_State       : State_Index := 1;
         --  The current meta-state we are creating

         Nb_State            : State_Index := 1;
         --  The total number of meta-states created so far.

         procedure Closure
           (Meta_State : State_Index;
            State      : State_Index);
         --  Compute the closure of the state (that is every other state which
         --  has a empty-character transition) and add it to the state

         procedure Ensure_Meta_State (Meta : State_Index);
         --  grows the Meta_States array as needed to make sure that there
         --  is enough space to store the new meta state.

         -----------------------
         -- Ensure_Meta_State --
         -----------------------

         procedure Ensure_Meta_State (Meta : State_Index) is
            Tmp  : Meta_States_List       := Meta_States;
            Tmp2 : Meta_States_Transition := Table;

         begin
            if Meta_States = null then
               Meta_States := new Meta_States_Array
                  (1 .. State_Index'Max (Last_Index, Meta) + 1);
               Meta_States (Meta_States'Range) := (others => No_States);

               Table := new Meta_States_Transition_Arr
                  (1 .. State_Index'Max (Last_Index, Meta) + 1);
               Table.all := (others => (others => 0));

            elsif Meta > Meta_States'Last then
               Meta_States := new Meta_States_Array
                  (1 .. State_Index'Max (2 * Tmp'Last, Meta));
               Meta_States (Tmp'Range) := Tmp.all;
               Meta_States (Tmp'Last + 1 .. Meta_States'Last) :=
                  (others => No_States);
               Unchecked_Free (Tmp);

               Table := new Meta_States_Transition_Arr
                  (1 .. State_Index'Max (2 * Tmp2'Last, Meta) + 1);
               Table (Tmp2'Range) := Tmp2.all;
               Table (Tmp2'Last + 1 .. Table'Last) :=
                  (others => (others => 0));
               Unchecked_Free (Tmp2);
            end if;
         end Ensure_Meta_State;

         -------------
         -- Closure --
         -------------

         procedure Closure
           (Meta_State : State_Index;
            State      : State_Index)
         is
         begin
            if not Meta_States (Meta_State)(State) then
               Meta_States (Meta_State)(State) := True;

               --  For each transition on empty-character

               for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
                  exit when First_Table (State, Column) = 0;
                  Closure (Meta_State, First_Table (State, Column));
               end loop;
            end if;
         end Closure;

      --  Start of processing for Create_Secondary_Table

      begin
         --  Create a new state

         Ensure_Meta_State (Current_State);
         Closure (Current_State, Start_State);

         while Current_State <= Nb_State loop

            --  We will be trying, below, to create the next meta-state

            Ensure_Meta_State (Nb_State + 1);

            --  For every character in the regexp, calculate the possible
            --  transitions from Current_State.

            for Column in 0 .. Alphabet_Size loop
               Temp_State_Not_Null := False;

               for K in Meta_States (Current_State)'Range loop
                  if Meta_States (Current_State)(K)
                    and then First_Table (K, Column) /= 0
                  then
                     Closure (Nb_State + 1, First_Table (K, Column));
                     Temp_State_Not_Null := True;
                  end if;
               end loop;

               --  If at least one transition existed

               if Temp_State_Not_Null then

                  --  Check if this new state corresponds to an old one

                  for K in 1 .. Nb_State loop
                     if Meta_States (K) = Meta_States (Nb_State + 1) then
                        Table (Current_State)(Column) := K;

                        --  Reset data, for the next time we try that state

                        Meta_States (Nb_State + 1) := No_States;
                        exit;
                     end if;
                  end loop;

                  --  If not, create a new state

                  if Table (Current_State)(Column) = 0 then
                     Nb_State := Nb_State + 1;
                     Ensure_Meta_State (Nb_State + 1);
                     Table (Current_State)(Column) := Nb_State;
                  end if;
               end if;
            end loop;

            Current_State := Current_State + 1;
         end loop;

         --  Returns the regexp

         declare
            R : Regexp_Access;

         begin
            R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
                                   Num_States    => Nb_State);
            R.Map            := Map;
            R.Case_Sensitive := Case_Sensitive;

            for S in 1 .. Nb_State loop
               R.Is_Final (S) := Meta_States (S)(End_State);
            end loop;

            for State in 1 .. Nb_State loop
               for K in 0 .. Alphabet_Size loop
                  R.States (State, K) := Table (State)(K);
               end loop;
            end loop;

            Unchecked_Free (Meta_States);
            Unchecked_Free (Table);

            return (Ada.Finalization.Controlled with R => R);
         end;
      end Create_Secondary_Table;

      ---------------------
      -- Raise_Exception --
      ---------------------

      procedure Raise_Exception (M : String; Index : Integer) is
      begin
         raise Error_In_Regexp with M & " at offset" & Index'Img;
      end Raise_Exception;

   --  Start of processing for Compile

   begin
      --  Special case for the empty string: it always matches, and the
      --  following processing would fail on it.

      if S = "" then
         return (Ada.Finalization.Controlled with
                 R => new Regexp_Value'
                      (Alphabet_Size => 0,
                       Num_States    => 1,
                       Map           => (others => 0),
                       States        => (others => (others => 1)),
                       Is_Final      => (others => True),
                       Case_Sensitive => True));
      end if;

      if not Case_Sensitive then
         System.Case_Util.To_Lower (S);
      end if;

      --  Check the pattern is well-formed before any treatment

      Check_Well_Formed_Pattern;

      Create_Mapping;

      --  Creates the primary table

      declare
         Table       : Regexp_Array_Access;
         Num_States  : State_Index;
         Start_State : State_Index;
         End_State   : State_Index;
         R           : Regexp;

      begin
         Table := new Regexp_Array (1 .. Initial_Max_States_In_Primary_Table,
                                    0 .. Alphabet_Size + 10);
         if not Glob then
            Create_Primary_Table (Table, Num_States, Start_State, End_State);
         else
            Create_Primary_Table_Glob
              (Table, Num_States, Start_State, End_State);
         end if;

         --  Creates the secondary table

         R := Create_Secondary_Table (Table, Start_State, End_State);
         Free (Table);
         return R;
      end;
   end Compile;

   --------------
   -- Finalize --
   --------------

   procedure Finalize (R : in out Regexp) is
      procedure Free is new
        Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
   begin
      Free (R.R);
   end Finalize;

   ---------
   -- Get --
   ---------

   function Get
     (Table  : Regexp_Array_Access;
      State  : State_Index;
      Column : Column_Index) return State_Index
   is
   begin
      if State <= Table'Last (1)
        and then Column <= Table'Last (2)
      then
         return Table (State, Column);
      else
         return 0;
      end if;
   end Get;

   -----------
   -- Match --
   -----------

   function Match (S : String; R : Regexp) return Boolean is
      Current_State : State_Index := 1;

   begin
      if R.R = null then
         raise Constraint_Error;
      end if;

      for Char in S'Range loop

         if R.R.Case_Sensitive then
            Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
         else
            Current_State :=
              R.R.States (Current_State,
                          R.R.Map (System.Case_Util.To_Lower (S (Char))));
         end if;

         if Current_State = 0 then
            return False;
         end if;

      end loop;

      return R.R.Is_Final (Current_State);
   end Match;

   ---------
   -- Set --
   ---------

   procedure Set
     (Table  : in out Regexp_Array_Access;
      State  : State_Index;
      Column : Column_Index;
      Value  : State_Index)
   is
      New_Lines   : State_Index;
      New_Columns : Column_Index;
      New_Table   : Regexp_Array_Access;

   begin
      if State <= Table'Last (1)
        and then Column <= Table'Last (2)
      then
         Table (State, Column) := Value;
      else
         --  Doubles the size of the table until it is big enough that
         --  (State, Column) is a valid index.

         New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
         New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
         New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
                                        Table'First (2) .. New_Columns);
         New_Table.all := (others => (others => 0));

         for J in Table'Range (1) loop
            for K in Table'Range (2) loop
               New_Table (J, K) := Table (J, K);
            end loop;
         end loop;

         Free (Table);
         Table := New_Table;
         Table (State, Column) := Value;
      end if;
   end Set;

end System.Regexp;