Line data Source code
1 : // class template regex -*- C++ -*-
2 :
3 : // Copyright (C) 2013-2021 Free Software Foundation, Inc.
4 : //
5 : // This file is part of the GNU ISO C++ Library. This library is free
6 : // software; you can redistribute it and/or modify it under the
7 : // terms of the GNU General Public License as published by the
8 : // Free Software Foundation; either version 3, or (at your option)
9 : // any later version.
10 :
11 : // This library is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 :
16 : // Under Section 7 of GPL version 3, you are granted additional
17 : // permissions described in the GCC Runtime Library Exception, version
18 : // 3.1, as published by the Free Software Foundation.
19 :
20 : // You should have received a copy of the GNU General Public License and
21 : // a copy of the GCC Runtime Library Exception along with this program;
22 : // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 : // <http://www.gnu.org/licenses/>.
24 :
25 : /**
26 : * @file bits/regex_executor.h
27 : * This is an internal header file, included by other library headers.
28 : * Do not attempt to use it directly. @headername{regex}
29 : */
30 :
31 : // FIXME convert comments to doxygen format.
32 :
33 : namespace std _GLIBCXX_VISIBILITY(default)
34 : {
35 : _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 :
37 : namespace __detail
38 : {
39 : /**
40 : * @addtogroup regex-detail
41 : * @{
42 : */
43 :
44 : /**
45 : * @brief Takes a regex and an input string and does the matching.
46 : *
47 : * The %_Executor class has two modes: DFS mode and BFS mode, controlled
48 : * by the template parameter %__dfs_mode.
49 : */
50 : template<typename _BiIter, typename _Alloc, typename _TraitsT,
51 : bool __dfs_mode>
52 : class _Executor
53 : {
54 : using __search_mode = integral_constant<bool, __dfs_mode>;
55 : using __dfs = true_type;
56 : using __bfs = false_type;
57 :
58 : enum class _Match_mode : unsigned char { _Exact, _Prefix };
59 :
60 : public:
61 : typedef typename iterator_traits<_BiIter>::value_type _CharT;
62 : typedef basic_regex<_CharT, _TraitsT> _RegexT;
63 : typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
64 : typedef regex_constants::match_flag_type _FlagT;
65 : typedef typename _TraitsT::char_class_type _ClassT;
66 : typedef _NFA<_TraitsT> _NFAT;
67 :
68 : public:
69 23136 : _Executor(_BiIter __begin,
70 : _BiIter __end,
71 : _ResultsVec& __results,
72 : const _RegexT& __re,
73 : _FlagT __flags)
74 23136 : : _M_begin(__begin),
75 23136 : _M_end(__end),
76 23136 : _M_re(__re),
77 23136 : _M_nfa(*__re._M_automaton),
78 23136 : _M_results(__results),
79 23136 : _M_rep_count(_M_nfa.size()),
80 23136 : _M_states(_M_nfa._M_start(), _M_nfa.size()),
81 23136 : _M_flags(__flags)
82 : {
83 : using namespace regex_constants;
84 23136 : if (__flags & match_prev_avail) // ignore not_bol and not_bow
85 6501 : _M_flags &= ~(match_not_bol | match_not_bow);
86 23136 : }
87 :
88 : // Set matched when string exactly matches the pattern.
89 : bool
90 31 : _M_match()
91 : {
92 31 : _M_current = _M_begin;
93 31 : return _M_main(_Match_mode::_Exact);
94 : }
95 :
96 : // Set matched when some prefix of the string matches the pattern.
97 : bool
98 790539 : _M_search_from_first()
99 : {
100 790539 : _M_current = _M_begin;
101 790539 : return _M_main(_Match_mode::_Prefix);
102 : }
103 :
104 : bool
105 : _M_search();
106 :
107 : private:
108 : void
109 : _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
110 :
111 : void
112 : _M_handle_repeat(_Match_mode, _StateIdT);
113 :
114 : void
115 : _M_handle_subexpr_begin(_Match_mode, _StateIdT);
116 :
117 : void
118 : _M_handle_subexpr_end(_Match_mode, _StateIdT);
119 :
120 : void
121 : _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
122 :
123 : void
124 : _M_handle_line_end_assertion(_Match_mode, _StateIdT);
125 :
126 : void
127 : _M_handle_word_boundary(_Match_mode, _StateIdT);
128 :
129 : void
130 : _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
131 :
132 : void
133 : _M_handle_match(_Match_mode, _StateIdT);
134 :
135 : void
136 : _M_handle_backref(_Match_mode, _StateIdT);
137 :
138 : void
139 : _M_handle_accept(_Match_mode, _StateIdT);
140 :
141 : void
142 : _M_handle_alternative(_Match_mode, _StateIdT);
143 :
144 : void
145 : _M_dfs(_Match_mode __match_mode, _StateIdT __start);
146 :
147 : bool
148 790570 : _M_main(_Match_mode __match_mode)
149 790570 : { return _M_main_dispatch(__match_mode, __search_mode{}); }
150 :
151 : bool
152 : _M_main_dispatch(_Match_mode __match_mode, __dfs);
153 :
154 : bool
155 : _M_main_dispatch(_Match_mode __match_mode, __bfs);
156 :
157 : bool
158 0 : _M_is_word(_CharT __ch) const
159 : {
160 : static const _CharT __s[2] = { 'w' };
161 0 : return _M_re._M_automaton->_M_traits.isctype
162 0 : (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
163 : }
164 :
165 : bool
166 1181 : _M_at_begin() const
167 : {
168 1181 : if (_M_current == _M_begin)
169 : {
170 : // match_not_bol means ^ does not match [_M_begin,_M_begin)
171 1181 : if (_M_flags & regex_constants::match_not_bol)
172 0 : return false;
173 : // match_prev_avail means _M_begin is not the start of the input.
174 1181 : if (_M_flags & regex_constants::match_prev_avail)
175 : {
176 : // For ECMAScript multiline matches, check if the previous
177 : // character is a line terminator.
178 378 : if (_M_match_multiline())
179 0 : return _M_is_line_terminator(*std::prev(_M_current));
180 : else
181 378 : return false;
182 : }
183 : else // ^ matches at _M_begin
184 803 : return true;
185 : }
186 0 : else if (_M_match_multiline())
187 0 : return _M_is_line_terminator(*std::prev(_M_current));
188 : else
189 0 : return false;
190 : }
191 :
192 : bool
193 355 : _M_at_end() const
194 : {
195 355 : if (_M_current == _M_end)
196 16 : return !(_M_flags & regex_constants::match_not_eol);
197 339 : else if (_M_match_multiline())
198 0 : return _M_is_line_terminator(*_M_current);
199 : else
200 339 : return false;
201 : }
202 :
203 : bool
204 : _M_word_boundary() const;
205 :
206 : bool
207 : _M_lookahead(_StateIdT __next);
208 :
209 : bool
210 0 : _M_is_line_terminator(_CharT __c) const
211 : {
212 0 : const auto& __traits = _M_re._M_automaton->_M_traits;
213 0 : const auto& __ct = use_facet<ctype<_CharT>>(__traits.getloc());
214 0 : const char __n{ __ct.narrow(__c, ' ') };
215 0 : if (__n == '\n')
216 0 : return true;
217 0 : if (_M_re._M_automaton->_M_options() & regex_constants::ECMAScript)
218 : {
219 0 : if (__n == '\r')
220 0 : return true;
221 : // FIXME: U+2028 (line separator) and U+2029 (paragraph separator)
222 : }
223 0 : return false;
224 : }
225 :
226 : bool
227 717 : _M_match_multiline() const noexcept
228 : {
229 717 : constexpr auto __m
230 : = regex_constants::ECMAScript | regex_constants::__multiline;
231 717 : return (_M_re._M_automaton->_M_options() & __m) == __m;
232 : }
233 :
234 : // Holds additional information used in BFS-mode.
235 : template<typename _SearchMode, typename _ResultsVec>
236 : struct _State_info;
237 :
238 : template<typename _ResultsVec>
239 : struct _State_info<__bfs, _ResultsVec>
240 : {
241 : explicit
242 0 : _State_info(_StateIdT __start, size_t __n)
243 0 : : _M_visited_states(new bool[__n]()), _M_start(__start)
244 0 : { }
245 :
246 0 : bool _M_visited(_StateIdT __i)
247 : {
248 0 : if (_M_visited_states[__i])
249 0 : return true;
250 0 : _M_visited_states[__i] = true;
251 0 : return false;
252 : }
253 :
254 0 : void _M_queue(_StateIdT __i, const _ResultsVec& __res)
255 0 : { _M_match_queue.emplace_back(__i, __res); }
256 :
257 : // Dummy implementations for BFS mode.
258 : _BiIter* _M_get_sol_pos() { return nullptr; }
259 :
260 : // Saves states that need to be considered for the next character.
261 : vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
262 : // Indicates which states are already visited.
263 : unique_ptr<bool[]> _M_visited_states;
264 : // To record current solution.
265 : _StateIdT _M_start;
266 : };
267 :
268 : template<typename _ResultsVec>
269 : struct _State_info<__dfs, _ResultsVec>
270 : {
271 : explicit
272 23136 : _State_info(_StateIdT __start, size_t) : _M_start(__start)
273 23136 : { }
274 :
275 : // Dummy implementations for DFS mode.
276 2710506 : bool _M_visited(_StateIdT) const { return false; }
277 : void _M_queue(_StateIdT, const _ResultsVec&) { }
278 :
279 790570 : _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
280 :
281 : // To record current solution.
282 : _StateIdT _M_start;
283 : _BiIter _M_sol_pos;
284 : };
285 :
286 : public:
287 : _ResultsVec _M_cur_results;
288 : _BiIter _M_current;
289 : _BiIter _M_begin;
290 : const _BiIter _M_end;
291 : const _RegexT& _M_re;
292 : const _NFAT& _M_nfa;
293 : _ResultsVec& _M_results;
294 : vector<pair<_BiIter, int>> _M_rep_count;
295 : _State_info<__search_mode, _ResultsVec> _M_states;
296 : _FlagT _M_flags;
297 : // Do we have a solution so far?
298 : bool _M_has_sol;
299 : };
300 :
301 : ///@} regex-detail
302 : } // namespace __detail
303 : _GLIBCXX_END_NAMESPACE_VERSION
304 : } // namespace std
305 :
306 : #include <bits/regex_executor.tcc>
|