LCOV - code coverage report
Current view: top level - 11/bits - regex_scanner.h (source / functions) Hit Total Coverage
Test: jami-coverage-filtered.info Lines: 22 35 62.9 %
Date: 2025-08-24 09:11:10 Functions: 6 7 85.7 %

          Line data    Source code
       1             : // class template regex -*- C++ -*-
       2             : 
       3             : // Copyright (C) 2013-2021 Free Software Foundation, Inc.
       4             : //
       5             : // This file is part of the GNU ISO C++ Library.  This library is free
       6             : // software; you can redistribute it and/or modify it under the
       7             : // terms of the GNU General Public License as published by the
       8             : // Free Software Foundation; either version 3, or (at your option)
       9             : // any later version.
      10             : 
      11             : // This library is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : 
      16             : // Under Section 7 of GPL version 3, you are granted additional
      17             : // permissions described in the GCC Runtime Library Exception, version
      18             : // 3.1, as published by the Free Software Foundation.
      19             : 
      20             : // You should have received a copy of the GNU General Public License and
      21             : // a copy of the GCC Runtime Library Exception along with this program;
      22             : // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23             : // <http://www.gnu.org/licenses/>.
      24             : 
      25             : /**
      26             :  *  @file bits/regex_scanner.h
      27             :  *  This is an internal header file, included by other library headers.
      28             :  *  Do not attempt to use it directly. @headername{regex}
      29             :  */
      30             : 
      31             : namespace std _GLIBCXX_VISIBILITY(default)
      32             : {
      33             : _GLIBCXX_BEGIN_NAMESPACE_VERSION
      34             : 
      35             : namespace __detail
      36             : {
      37             :   /**
      38             :    * @addtogroup regex-detail
      39             :    * @{
      40             :    */
      41             : 
      42             :   struct _ScannerBase
      43             :   {
      44             :   public:
      45             :     /// Token types returned from the scanner.
      46             :     enum _TokenT : unsigned
      47             :     {
      48             :       _S_token_anychar,
      49             :       _S_token_ord_char,
      50             :       _S_token_oct_num,
      51             :       _S_token_hex_num,
      52             :       _S_token_backref,
      53             :       _S_token_subexpr_begin,
      54             :       _S_token_subexpr_no_group_begin,
      55             :       _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
      56             :       _S_token_subexpr_end,
      57             :       _S_token_bracket_begin,
      58             :       _S_token_bracket_neg_begin,
      59             :       _S_token_bracket_end,
      60             :       _S_token_interval_begin,
      61             :       _S_token_interval_end,
      62             :       _S_token_quoted_class,
      63             :       _S_token_char_class_name,
      64             :       _S_token_collsymbol,
      65             :       _S_token_equiv_class_name,
      66             :       _S_token_opt,
      67             :       _S_token_or,
      68             :       _S_token_closure0,
      69             :       _S_token_closure1,
      70             :       _S_token_line_begin,
      71             :       _S_token_line_end,
      72             :       _S_token_word_bound, // neg if _M_value[0] == 'n'
      73             :       _S_token_comma,
      74             :       _S_token_dup_count,
      75             :       _S_token_eof,
      76             :       _S_token_bracket_dash,
      77             :       _S_token_unknown = -1u
      78             :     };
      79             : 
      80             :   protected:
      81             :     typedef regex_constants::syntax_option_type _FlagT;
      82             : 
      83             :     enum _StateT
      84             :     {
      85             :       _S_state_normal,
      86             :       _S_state_in_brace,
      87             :       _S_state_in_bracket,
      88             :     };
      89             : 
      90             :   protected:
      91         368 :     _ScannerBase(_FlagT __flags)
      92         368 :     : _M_state(_S_state_normal),
      93         368 :     _M_flags(__flags),
      94         736 :     _M_escape_tbl(_M_is_ecma()
      95         368 :                   ? _M_ecma_escape_tbl
      96             :                   : _M_awk_escape_tbl),
      97         368 :     _M_spec_char(_M_is_ecma()
      98         368 :                  ? _M_ecma_spec_char
      99           0 :                  : _M_flags & regex_constants::basic
     100           0 :                  ? _M_basic_spec_char
     101           0 :                  : _M_flags & regex_constants::extended
     102           0 :                  ? _M_extended_spec_char
     103           0 :                  : _M_flags & regex_constants::grep
     104           0 :                  ?  ".[\\*^$\n"
     105           0 :                  : _M_flags & regex_constants::egrep
     106           0 :                  ? ".[\\()*+?{|^$\n"
     107           0 :                  : _M_flags & regex_constants::awk
     108           0 :                  ? _M_extended_spec_char
     109             :                  : nullptr),
     110         736 :     _M_at_bracket_start(false)
     111         368 :     { __glibcxx_assert(_M_spec_char); }
     112             : 
     113             :   protected:
     114             :     const char*
     115         921 :     _M_find_escape(char __c)
     116             :     {
     117         921 :       auto __it = _M_escape_tbl;
     118        7368 :       for (; __it->first != '\0'; ++__it)
     119        6447 :         if (__it->first == __c)
     120           0 :           return &__it->second;
     121         921 :       return nullptr;
     122             :     }
     123             : 
     124             :     bool
     125        2671 :     _M_is_ecma() const
     126        2671 :     { return _M_flags & regex_constants::ECMAScript; }
     127             : 
     128             :     bool
     129         686 :     _M_is_basic() const
     130         686 :     { return _M_flags & (regex_constants::basic | regex_constants::grep); }
     131             : 
     132             :     bool
     133             :     _M_is_extended() const
     134             :     {
     135             :       return _M_flags & (regex_constants::extended
     136             :                          | regex_constants::egrep
     137             :                          | regex_constants::awk);
     138             :     }
     139             : 
     140             :     bool
     141             :     _M_is_grep() const
     142             :     { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
     143             : 
     144             :     bool
     145           0 :     _M_is_awk() const
     146           0 :     { return _M_flags & regex_constants::awk; }
     147             : 
     148             :   protected:
     149             :     // TODO: Make them static in the next abi change.
     150             :     const std::pair<char, _TokenT> _M_token_tbl[9] =
     151             :       {
     152             :         {'^', _S_token_line_begin},
     153             :         {'$', _S_token_line_end},
     154             :         {'.', _S_token_anychar},
     155             :         {'*', _S_token_closure0},
     156             :         {'+', _S_token_closure1},
     157             :         {'?', _S_token_opt},
     158             :         {'|', _S_token_or},
     159             :         {'\n', _S_token_or}, // grep and egrep
     160             :         {'\0', _S_token_or},
     161             :       };
     162             :     const std::pair<char, char> _M_ecma_escape_tbl[8] =
     163             :       {
     164             :         {'0', '\0'},
     165             :         {'b', '\b'},
     166             :         {'f', '\f'},
     167             :         {'n', '\n'},
     168             :         {'r', '\r'},
     169             :         {'t', '\t'},
     170             :         {'v', '\v'},
     171             :         {'\0', '\0'},
     172             :       };
     173             :     const std::pair<char, char> _M_awk_escape_tbl[11] =
     174             :       {
     175             :         {'"', '"'},
     176             :         {'/', '/'},
     177             :         {'\\', '\\'},
     178             :         {'a', '\a'},
     179             :         {'b', '\b'},
     180             :         {'f', '\f'},
     181             :         {'n', '\n'},
     182             :         {'r', '\r'},
     183             :         {'t', '\t'},
     184             :         {'v', '\v'},
     185             :         {'\0', '\0'},
     186             :       };
     187             :     const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
     188             :     const char* _M_basic_spec_char = ".[\\*^$";
     189             :     const char* _M_extended_spec_char = ".[\\()*+?{|^$";
     190             : 
     191             :     _StateT                       _M_state;
     192             :     _FlagT                        _M_flags;
     193             :     _TokenT                       _M_token;
     194             :     const std::pair<char, char>*  _M_escape_tbl;
     195             :     const char*                   _M_spec_char;
     196             :     bool                          _M_at_bracket_start;
     197             :   };
     198             : 
     199             :   /**
     200             :    * @brief Scans an input range for regex tokens.
     201             :    *
     202             :    * The %_Scanner class interprets the regular expression pattern in
     203             :    * the input range passed to its constructor as a sequence of parse
     204             :    * tokens passed to the regular expression compiler.  The sequence
     205             :    * of tokens provided depends on the flag settings passed to the
     206             :    * constructor: different regular expression grammars will interpret
     207             :    * the same input pattern in syntactically different ways.
     208             :    */
     209             :   template<typename _CharT>
     210             :     class _Scanner
     211             :     : public _ScannerBase
     212             :     {
     213             :     public:
     214             :       typedef std::basic_string<_CharT>                           _StringT;
     215             :       typedef regex_constants::syntax_option_type                 _FlagT;
     216             :       typedef const std::ctype<_CharT>                            _CtypeT;
     217             : 
     218             :       _Scanner(const _CharT* __begin, const _CharT* __end,
     219             :                _FlagT __flags, std::locale __loc);
     220             : 
     221             :       void
     222             :       _M_advance();
     223             : 
     224             :       _TokenT
     225      117294 :       _M_get_token() const noexcept
     226      117294 :       { return _M_token; }
     227             : 
     228             :       const _StringT&
     229       11613 :       _M_get_value() const noexcept
     230       11613 :       { return _M_value; }
     231             : 
     232             : #ifdef _GLIBCXX_DEBUG
     233             :       std::ostream&
     234             :       _M_print(std::ostream&);
     235             : #endif
     236             : 
     237             :     private:
     238             :       void
     239             :       _M_scan_normal();
     240             : 
     241             :       void
     242             :       _M_scan_in_bracket();
     243             : 
     244             :       void
     245             :       _M_scan_in_brace();
     246             : 
     247             :       void
     248             :       _M_eat_escape_ecma();
     249             : 
     250             :       void
     251             :       _M_eat_escape_posix();
     252             : 
     253             :       void
     254             :       _M_eat_escape_awk();
     255             : 
     256             :       void
     257             :       _M_eat_class(char);
     258             : 
     259             :       const _CharT*                 _M_current;
     260             :       const _CharT*                 _M_end;
     261             :       _CtypeT&                      _M_ctype;
     262             :       _StringT                      _M_value;
     263             :       void (_Scanner::* _M_eat_escape)();
     264             :     };
     265             : 
     266             :  ///@} regex-detail
     267             : } // namespace __detail
     268             : _GLIBCXX_END_NAMESPACE_VERSION
     269             : } // namespace std
     270             : 
     271             : #include <bits/regex_scanner.tcc>

Generated by: LCOV version 1.14