LCOV - code coverage report
Current view: top level - 11/bits - regex_executor.h (source / functions) Hit Total Coverage
Test: jami-coverage-filtered.info Lines: 39 68 57.4 %
Date: 2025-08-24 09:11:10 Functions: 18 48 37.5 %

          Line data    Source code
       1             : // class template regex -*- C++ -*-
       2             : 
       3             : // Copyright (C) 2013-2021 Free Software Foundation, Inc.
       4             : //
       5             : // This file is part of the GNU ISO C++ Library.  This library is free
       6             : // software; you can redistribute it and/or modify it under the
       7             : // terms of the GNU General Public License as published by the
       8             : // Free Software Foundation; either version 3, or (at your option)
       9             : // any later version.
      10             : 
      11             : // This library is distributed in the hope that it will be useful,
      12             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             : // GNU General Public License for more details.
      15             : 
      16             : // Under Section 7 of GPL version 3, you are granted additional
      17             : // permissions described in the GCC Runtime Library Exception, version
      18             : // 3.1, as published by the Free Software Foundation.
      19             : 
      20             : // You should have received a copy of the GNU General Public License and
      21             : // a copy of the GCC Runtime Library Exception along with this program;
      22             : // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      23             : // <http://www.gnu.org/licenses/>.
      24             : 
      25             : /**
      26             :  *  @file bits/regex_executor.h
      27             :  *  This is an internal header file, included by other library headers.
      28             :  *  Do not attempt to use it directly. @headername{regex}
      29             :  */
      30             : 
      31             : // FIXME convert comments to doxygen format.
      32             : 
      33             : namespace std _GLIBCXX_VISIBILITY(default)
      34             : {
      35             : _GLIBCXX_BEGIN_NAMESPACE_VERSION
      36             : 
      37             : namespace __detail
      38             : {
      39             :   /**
      40             :    * @addtogroup regex-detail
      41             :    * @{
      42             :    */
      43             : 
      44             :   /**
      45             :    * @brief Takes a regex and an input string and does the matching.
      46             :    *
      47             :    * The %_Executor class has two modes: DFS mode and BFS mode, controlled
      48             :    * by the template parameter %__dfs_mode.
      49             :    */
      50             :   template<typename _BiIter, typename _Alloc, typename _TraitsT,
      51             :            bool __dfs_mode>
      52             :     class _Executor
      53             :     {
      54             :       using __search_mode = integral_constant<bool, __dfs_mode>;
      55             :       using __dfs = true_type;
      56             :       using __bfs = false_type;
      57             : 
      58             :       enum class _Match_mode : unsigned char { _Exact, _Prefix };
      59             : 
      60             :     public:
      61             :       typedef typename iterator_traits<_BiIter>::value_type _CharT;
      62             :       typedef basic_regex<_CharT, _TraitsT>                 _RegexT;
      63             :       typedef std::vector<sub_match<_BiIter>, _Alloc>       _ResultsVec;
      64             :       typedef regex_constants::match_flag_type              _FlagT;
      65             :       typedef typename _TraitsT::char_class_type            _ClassT;
      66             :       typedef _NFA<_TraitsT>                                _NFAT;
      67             : 
      68             :     public:
      69       23136 :       _Executor(_BiIter         __begin,
      70             :                 _BiIter         __end,
      71             :                 _ResultsVec&    __results,
      72             :                 const _RegexT&  __re,
      73             :                 _FlagT          __flags)
      74       23136 :       : _M_begin(__begin),
      75       23136 :       _M_end(__end),
      76       23136 :       _M_re(__re),
      77       23136 :       _M_nfa(*__re._M_automaton),
      78       23136 :       _M_results(__results),
      79       23136 :       _M_rep_count(_M_nfa.size()),
      80       23136 :       _M_states(_M_nfa._M_start(), _M_nfa.size()),
      81       23136 :       _M_flags(__flags)
      82             :       {
      83             :         using namespace regex_constants;
      84       23136 :         if (__flags & match_prev_avail) // ignore not_bol and not_bow
      85        6501 :           _M_flags &= ~(match_not_bol | match_not_bow);
      86       23136 :       }
      87             : 
      88             :       // Set matched when string exactly matches the pattern.
      89             :       bool
      90          31 :       _M_match()
      91             :       {
      92          31 :         _M_current = _M_begin;
      93          31 :         return _M_main(_Match_mode::_Exact);
      94             :       }
      95             : 
      96             :       // Set matched when some prefix of the string matches the pattern.
      97             :       bool
      98      790539 :       _M_search_from_first()
      99             :       {
     100      790539 :         _M_current = _M_begin;
     101      790539 :         return _M_main(_Match_mode::_Prefix);
     102             :       }
     103             : 
     104             :       bool
     105             :       _M_search();
     106             : 
     107             :     private:
     108             :       void
     109             :       _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
     110             : 
     111             :       void
     112             :       _M_handle_repeat(_Match_mode, _StateIdT);
     113             : 
     114             :       void
     115             :       _M_handle_subexpr_begin(_Match_mode, _StateIdT);
     116             : 
     117             :       void
     118             :       _M_handle_subexpr_end(_Match_mode, _StateIdT);
     119             : 
     120             :       void
     121             :       _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
     122             : 
     123             :       void
     124             :       _M_handle_line_end_assertion(_Match_mode, _StateIdT);
     125             : 
     126             :       void
     127             :       _M_handle_word_boundary(_Match_mode, _StateIdT);
     128             : 
     129             :       void
     130             :       _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
     131             : 
     132             :       void
     133             :       _M_handle_match(_Match_mode, _StateIdT);
     134             : 
     135             :       void
     136             :       _M_handle_backref(_Match_mode, _StateIdT);
     137             : 
     138             :       void
     139             :       _M_handle_accept(_Match_mode, _StateIdT);
     140             : 
     141             :       void
     142             :       _M_handle_alternative(_Match_mode, _StateIdT);
     143             : 
     144             :       void
     145             :       _M_dfs(_Match_mode __match_mode, _StateIdT __start);
     146             : 
     147             :       bool
     148      790570 :       _M_main(_Match_mode __match_mode)
     149      790570 :       { return _M_main_dispatch(__match_mode, __search_mode{}); }
     150             : 
     151             :       bool
     152             :       _M_main_dispatch(_Match_mode __match_mode, __dfs);
     153             : 
     154             :       bool
     155             :       _M_main_dispatch(_Match_mode __match_mode, __bfs);
     156             : 
     157             :       bool
     158           0 :       _M_is_word(_CharT __ch) const
     159             :       {
     160             :         static const _CharT __s[2] = { 'w' };
     161           0 :         return _M_re._M_automaton->_M_traits.isctype
     162           0 :           (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
     163             :       }
     164             : 
     165             :       bool
     166        1181 :       _M_at_begin() const
     167             :       {
     168        1181 :         if (_M_current == _M_begin)
     169             :           {
     170             :             // match_not_bol means ^ does not match [_M_begin,_M_begin)
     171        1181 :             if (_M_flags & regex_constants::match_not_bol)
     172           0 :               return false;
     173             :             // match_prev_avail means _M_begin is not the start of the input.
     174        1181 :             if (_M_flags & regex_constants::match_prev_avail)
     175             :               {
     176             :                 // For ECMAScript multiline matches, check if the previous
     177             :                 // character is a line terminator.
     178         378 :                 if (_M_match_multiline())
     179           0 :                   return _M_is_line_terminator(*std::prev(_M_current));
     180             :                 else
     181         378 :                   return false;
     182             :               }
     183             :             else // ^ matches at _M_begin
     184         803 :               return true;
     185             :           }
     186           0 :         else if (_M_match_multiline())
     187           0 :           return _M_is_line_terminator(*std::prev(_M_current));
     188             :         else
     189           0 :           return false;
     190             :       }
     191             : 
     192             :       bool
     193         355 :       _M_at_end() const
     194             :       {
     195         355 :         if (_M_current == _M_end)
     196          16 :           return !(_M_flags & regex_constants::match_not_eol);
     197         339 :         else if (_M_match_multiline())
     198           0 :           return _M_is_line_terminator(*_M_current);
     199             :         else
     200         339 :           return false;
     201             :       }
     202             : 
     203             :       bool
     204             :       _M_word_boundary() const;
     205             : 
     206             :       bool
     207             :       _M_lookahead(_StateIdT __next);
     208             : 
     209             :       bool
     210           0 :       _M_is_line_terminator(_CharT __c) const
     211             :       {
     212           0 :         const auto& __traits = _M_re._M_automaton->_M_traits;
     213           0 :         const auto& __ct = use_facet<ctype<_CharT>>(__traits.getloc());
     214           0 :         const char __n{ __ct.narrow(__c, ' ') };
     215           0 :         if (__n == '\n')
     216           0 :           return true;
     217           0 :         if (_M_re._M_automaton->_M_options() & regex_constants::ECMAScript)
     218             :           {
     219           0 :             if (__n == '\r')
     220           0 :               return true;
     221             :             // FIXME: U+2028 (line separator) and U+2029 (paragraph separator)
     222             :           }
     223           0 :         return false;
     224             :       }
     225             : 
     226             :       bool
     227         717 :       _M_match_multiline() const noexcept
     228             :       {
     229         717 :         constexpr auto __m
     230             :           = regex_constants::ECMAScript | regex_constants::__multiline;
     231         717 :         return (_M_re._M_automaton->_M_options() & __m) == __m;
     232             :       }
     233             : 
     234             :        // Holds additional information used in BFS-mode.
     235             :       template<typename _SearchMode, typename _ResultsVec>
     236             :         struct _State_info;
     237             : 
     238             :       template<typename _ResultsVec>
     239             :         struct _State_info<__bfs, _ResultsVec>
     240             :         {
     241             :           explicit
     242           0 :           _State_info(_StateIdT __start, size_t __n)
     243           0 :           : _M_visited_states(new bool[__n]()), _M_start(__start)
     244           0 :           { }
     245             : 
     246           0 :           bool _M_visited(_StateIdT __i)
     247             :           {
     248           0 :             if (_M_visited_states[__i])
     249           0 :               return true;
     250           0 :             _M_visited_states[__i] = true;
     251           0 :             return false;
     252             :           }
     253             : 
     254           0 :           void _M_queue(_StateIdT __i, const _ResultsVec& __res)
     255           0 :           { _M_match_queue.emplace_back(__i, __res); }
     256             : 
     257             :           // Dummy implementations for BFS mode.
     258             :           _BiIter* _M_get_sol_pos() { return nullptr; }
     259             : 
     260             :           // Saves states that need to be considered for the next character.
     261             :           vector<pair<_StateIdT, _ResultsVec>>      _M_match_queue;
     262             :           // Indicates which states are already visited.
     263             :           unique_ptr<bool[]>                      _M_visited_states;
     264             :           // To record current solution.
     265             :           _StateIdT _M_start;
     266             :         };
     267             : 
     268             :       template<typename _ResultsVec>
     269             :         struct _State_info<__dfs, _ResultsVec>
     270             :         {
     271             :           explicit
     272       23136 :           _State_info(_StateIdT __start, size_t) : _M_start(__start)
     273       23136 :           { }
     274             : 
     275             :           // Dummy implementations for DFS mode.
     276     2710506 :           bool _M_visited(_StateIdT) const { return false; }
     277             :           void _M_queue(_StateIdT, const _ResultsVec&) { }
     278             : 
     279      790570 :           _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
     280             : 
     281             :           // To record current solution.
     282             :           _StateIdT _M_start;
     283             :           _BiIter   _M_sol_pos;
     284             :         };
     285             : 
     286             :     public:
     287             :       _ResultsVec                                           _M_cur_results;
     288             :       _BiIter                                               _M_current;
     289             :       _BiIter                                               _M_begin;
     290             :       const _BiIter                                         _M_end;
     291             :       const _RegexT&                                        _M_re;
     292             :       const _NFAT&                                          _M_nfa;
     293             :       _ResultsVec&                                          _M_results;
     294             :       vector<pair<_BiIter, int>>                            _M_rep_count;
     295             :       _State_info<__search_mode, _ResultsVec>                 _M_states;
     296             :       _FlagT                                                _M_flags;
     297             :       // Do we have a solution so far?
     298             :       bool                                                  _M_has_sol;
     299             :     };
     300             : 
     301             :  ///@} regex-detail
     302             : } // namespace __detail
     303             : _GLIBCXX_END_NAMESPACE_VERSION
     304             : } // namespace std
     305             : 
     306             : #include <bits/regex_executor.tcc>

Generated by: LCOV version 1.14