Line data Source code
1 : // class template regex -*- C++ -*-
2 :
3 : // Copyright (C) 2013-2021 Free Software Foundation, Inc.
4 : //
5 : // This file is part of the GNU ISO C++ Library. This library is free
6 : // software; you can redistribute it and/or modify it under the
7 : // terms of the GNU General Public License as published by the
8 : // Free Software Foundation; either version 3, or (at your option)
9 : // any later version.
10 :
11 : // This library is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 :
16 : // Under Section 7 of GPL version 3, you are granted additional
17 : // permissions described in the GCC Runtime Library Exception, version
18 : // 3.1, as published by the Free Software Foundation.
19 :
20 : // You should have received a copy of the GNU General Public License and
21 : // a copy of the GCC Runtime Library Exception along with this program;
22 : // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 : // <http://www.gnu.org/licenses/>.
24 :
25 : /**
26 : * @file bits/regex.tcc
27 : * This is an internal header file, included by other library headers.
28 : * Do not attempt to use it directly. @headername{regex}
29 : */
30 :
31 : namespace std _GLIBCXX_VISIBILITY(default)
32 : {
33 : _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 :
35 : namespace __detail
36 : {
37 : /// @cond undocumented
38 :
39 : // Result of merging regex_match and regex_search.
40 : //
41 : // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42 : // the other one if possible, for test purpose).
43 : //
44 : // That __match_mode is true means regex_match, else regex_search.
45 : template<typename _BiIter, typename _Alloc,
46 : typename _CharT, typename _TraitsT,
47 : _RegexExecutorPolicy __policy,
48 : bool __match_mode>
49 : bool
50 23136 : __regex_algo_impl(_BiIter __s,
51 : _BiIter __e,
52 : match_results<_BiIter, _Alloc>& __m,
53 : const basic_regex<_CharT, _TraitsT>& __re,
54 : regex_constants::match_flag_type __flags)
55 : {
56 23136 : if (__re._M_automaton == nullptr)
57 0 : return false;
58 :
59 23136 : typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
60 23136 : __m._M_begin = __s;
61 23136 : __m._M_resize(__re._M_automaton->_M_sub_count());
62 :
63 : bool __ret;
64 23136 : if ((__re.flags() & regex_constants::__polynomial)
65 23136 : || (__policy == _RegexExecutorPolicy::_S_alternate
66 : && !__re._M_automaton->_M_has_backref))
67 : {
68 : _Executor<_BiIter, _Alloc, _TraitsT, false>
69 0 : __executor(__s, __e, __m, __re, __flags);
70 : if (__match_mode)
71 0 : __ret = __executor._M_match();
72 : else
73 0 : __ret = __executor._M_search();
74 0 : }
75 : else
76 : {
77 : _Executor<_BiIter, _Alloc, _TraitsT, true>
78 23136 : __executor(__s, __e, __m, __re, __flags);
79 : if (__match_mode)
80 31 : __ret = __executor._M_match();
81 : else
82 23105 : __ret = __executor._M_search();
83 23136 : }
84 23136 : if (__ret)
85 : {
86 78250 : for (auto& __it : __res)
87 64090 : if (!__it.matched)
88 46388 : __it.first = __it.second = __e;
89 14160 : auto& __pre = __m._M_prefix();
90 14160 : auto& __suf = __m._M_suffix();
91 : if (__match_mode)
92 : {
93 18 : __pre.matched = false;
94 18 : __pre.first = __s;
95 18 : __pre.second = __s;
96 18 : __suf.matched = false;
97 18 : __suf.first = __e;
98 18 : __suf.second = __e;
99 : }
100 : else
101 : {
102 14142 : __pre.first = __s;
103 14142 : __pre.second = __res[0].first;
104 14142 : __pre.matched = (__pre.first != __pre.second);
105 14142 : __suf.first = __res[0].second;
106 14142 : __suf.second = __e;
107 14142 : __suf.matched = (__suf.first != __suf.second);
108 : }
109 : }
110 : else
111 : {
112 8976 : __m._M_establish_failed_match(__e);
113 : }
114 23136 : return __ret;
115 : }
116 : /// @endcond
117 : } // namespace __detail
118 :
119 : /// @cond
120 :
121 : template<typename _Ch_type>
122 : template<typename _Fwd_iter>
123 : typename regex_traits<_Ch_type>::string_type
124 0 : regex_traits<_Ch_type>::
125 : lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
126 : {
127 : typedef std::ctype<char_type> __ctype_type;
128 0 : const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
129 :
130 : static const char* __collatenames[] =
131 : {
132 : "NUL",
133 : "SOH",
134 : "STX",
135 : "ETX",
136 : "EOT",
137 : "ENQ",
138 : "ACK",
139 : "alert",
140 : "backspace",
141 : "tab",
142 : "newline",
143 : "vertical-tab",
144 : "form-feed",
145 : "carriage-return",
146 : "SO",
147 : "SI",
148 : "DLE",
149 : "DC1",
150 : "DC2",
151 : "DC3",
152 : "DC4",
153 : "NAK",
154 : "SYN",
155 : "ETB",
156 : "CAN",
157 : "EM",
158 : "SUB",
159 : "ESC",
160 : "IS4",
161 : "IS3",
162 : "IS2",
163 : "IS1",
164 : "space",
165 : "exclamation-mark",
166 : "quotation-mark",
167 : "number-sign",
168 : "dollar-sign",
169 : "percent-sign",
170 : "ampersand",
171 : "apostrophe",
172 : "left-parenthesis",
173 : "right-parenthesis",
174 : "asterisk",
175 : "plus-sign",
176 : "comma",
177 : "hyphen",
178 : "period",
179 : "slash",
180 : "zero",
181 : "one",
182 : "two",
183 : "three",
184 : "four",
185 : "five",
186 : "six",
187 : "seven",
188 : "eight",
189 : "nine",
190 : "colon",
191 : "semicolon",
192 : "less-than-sign",
193 : "equals-sign",
194 : "greater-than-sign",
195 : "question-mark",
196 : "commercial-at",
197 : "A",
198 : "B",
199 : "C",
200 : "D",
201 : "E",
202 : "F",
203 : "G",
204 : "H",
205 : "I",
206 : "J",
207 : "K",
208 : "L",
209 : "M",
210 : "N",
211 : "O",
212 : "P",
213 : "Q",
214 : "R",
215 : "S",
216 : "T",
217 : "U",
218 : "V",
219 : "W",
220 : "X",
221 : "Y",
222 : "Z",
223 : "left-square-bracket",
224 : "backslash",
225 : "right-square-bracket",
226 : "circumflex",
227 : "underscore",
228 : "grave-accent",
229 : "a",
230 : "b",
231 : "c",
232 : "d",
233 : "e",
234 : "f",
235 : "g",
236 : "h",
237 : "i",
238 : "j",
239 : "k",
240 : "l",
241 : "m",
242 : "n",
243 : "o",
244 : "p",
245 : "q",
246 : "r",
247 : "s",
248 : "t",
249 : "u",
250 : "v",
251 : "w",
252 : "x",
253 : "y",
254 : "z",
255 : "left-curly-bracket",
256 : "vertical-line",
257 : "right-curly-bracket",
258 : "tilde",
259 : "DEL",
260 : };
261 :
262 0 : string __s;
263 0 : for (; __first != __last; ++__first)
264 0 : __s += __fctyp.narrow(*__first, 0);
265 :
266 0 : for (const auto& __it : __collatenames)
267 0 : if (__s == __it)
268 0 : return string_type(1, __fctyp.widen(
269 0 : static_cast<char>(&__it - __collatenames)));
270 :
271 : // TODO Add digraph support:
272 : // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
273 :
274 0 : return string_type();
275 0 : }
276 :
277 : template<typename _Ch_type>
278 : template<typename _Fwd_iter>
279 : typename regex_traits<_Ch_type>::char_class_type
280 255 : regex_traits<_Ch_type>::
281 : lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
282 : {
283 : typedef std::ctype<char_type> __ctype_type;
284 255 : const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
285 :
286 : // Mappings from class name to class mask.
287 : static const pair<const char*, char_class_type> __classnames[] =
288 : {
289 : {"d", ctype_base::digit},
290 : {"w", {ctype_base::alnum, _RegexMask::_S_under}},
291 : {"s", ctype_base::space},
292 : {"alnum", ctype_base::alnum},
293 : {"alpha", ctype_base::alpha},
294 : {"blank", ctype_base::blank},
295 : {"cntrl", ctype_base::cntrl},
296 : {"digit", ctype_base::digit},
297 : {"graph", ctype_base::graph},
298 : {"lower", ctype_base::lower},
299 : {"print", ctype_base::print},
300 : {"punct", ctype_base::punct},
301 : {"space", ctype_base::space},
302 : {"upper", ctype_base::upper},
303 : {"xdigit", ctype_base::xdigit},
304 : };
305 :
306 255 : string __s;
307 510 : for (; __first != __last; ++__first)
308 255 : __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
309 :
310 568 : for (const auto& __it : __classnames)
311 568 : if (__s == __it.first)
312 : {
313 255 : if (__icase
314 255 : && ((__it.second
315 255 : & (ctype_base::lower | ctype_base::upper)) != 0))
316 0 : return ctype_base::alpha;
317 255 : return __it.second;
318 : }
319 0 : return 0;
320 255 : }
321 :
322 : template<typename _Ch_type>
323 : bool
324 116770 : regex_traits<_Ch_type>::
325 : isctype(_Ch_type __c, char_class_type __f) const
326 : {
327 : typedef std::ctype<char_type> __ctype_type;
328 116770 : const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
329 :
330 116770 : return __fctyp.is(__f._M_base, __c)
331 : // [[:w:]]
332 138295 : || ((__f._M_extended & _RegexMask::_S_under)
333 138295 : && __c == __fctyp.widen('_'));
334 : }
335 :
336 : template<typename _Ch_type>
337 : int
338 290 : regex_traits<_Ch_type>::
339 : value(_Ch_type __ch, int __radix) const
340 : {
341 580 : std::basic_istringstream<char_type> __is(string_type(1, __ch));
342 : long __v;
343 290 : if (__radix == 8)
344 0 : __is >> std::oct;
345 290 : else if (__radix == 16)
346 0 : __is >> std::hex;
347 290 : __is >> __v;
348 580 : return __is.fail() ? -1 : __v;
349 290 : }
350 :
351 : template<typename _Bi_iter, typename _Alloc>
352 : template<typename _Out_iter>
353 : _Out_iter
354 4962 : match_results<_Bi_iter, _Alloc>::
355 : format(_Out_iter __out,
356 : const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
357 : const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
358 : match_flag_type __flags) const
359 : {
360 4962 : __glibcxx_assert( ready() );
361 4962 : regex_traits<char_type> __traits;
362 : typedef std::ctype<char_type> __ctype_type;
363 : const __ctype_type&
364 4962 : __fctyp(use_facet<__ctype_type>(__traits.getloc()));
365 :
366 4962 : auto __output = [&](size_t __idx)
367 : {
368 0 : auto& __sub = (*this)[__idx];
369 0 : if (__sub.matched)
370 0 : __out = std::copy(__sub.first, __sub.second, __out);
371 : };
372 :
373 4962 : if (__flags & regex_constants::format_sed)
374 : {
375 0 : bool __escaping = false;
376 0 : for (; __fmt_first != __fmt_last; __fmt_first++)
377 : {
378 0 : if (__escaping)
379 : {
380 0 : __escaping = false;
381 0 : if (__fctyp.is(__ctype_type::digit, *__fmt_first))
382 0 : __output(__traits.value(*__fmt_first, 10));
383 : else
384 0 : *__out++ = *__fmt_first;
385 0 : continue;
386 : }
387 0 : if (*__fmt_first == '\\')
388 : {
389 0 : __escaping = true;
390 0 : continue;
391 : }
392 0 : if (*__fmt_first == '&')
393 : {
394 0 : __output(0);
395 0 : continue;
396 : }
397 0 : *__out++ = *__fmt_first;
398 : }
399 0 : if (__escaping)
400 0 : *__out++ = '\\';
401 : }
402 : else
403 : {
404 0 : while (1)
405 : {
406 4962 : auto __next = std::find(__fmt_first, __fmt_last, '$');
407 4962 : if (__next == __fmt_last)
408 4962 : break;
409 :
410 0 : __out = std::copy(__fmt_first, __next, __out);
411 :
412 0 : auto __eat = [&](char __ch) -> bool
413 : {
414 0 : if (*__next == __ch)
415 : {
416 0 : ++__next;
417 0 : return true;
418 : }
419 0 : return false;
420 : };
421 :
422 0 : if (++__next == __fmt_last)
423 0 : *__out++ = '$';
424 0 : else if (__eat('$'))
425 0 : *__out++ = '$';
426 0 : else if (__eat('&'))
427 0 : __output(0);
428 0 : else if (__eat('`'))
429 : {
430 0 : auto& __sub = _M_prefix();
431 0 : if (__sub.matched)
432 0 : __out = std::copy(__sub.first, __sub.second, __out);
433 : }
434 0 : else if (__eat('\''))
435 : {
436 0 : auto& __sub = _M_suffix();
437 0 : if (__sub.matched)
438 0 : __out = std::copy(__sub.first, __sub.second, __out);
439 : }
440 0 : else if (__fctyp.is(__ctype_type::digit, *__next))
441 : {
442 0 : long __num = __traits.value(*__next, 10);
443 0 : if (++__next != __fmt_last
444 0 : && __fctyp.is(__ctype_type::digit, *__next))
445 : {
446 0 : __num *= 10;
447 0 : __num += __traits.value(*__next++, 10);
448 : }
449 0 : if (0 <= __num && __num < this->size())
450 0 : __output(__num);
451 : }
452 : else
453 0 : *__out++ = '$';
454 0 : __fmt_first = __next;
455 : }
456 4962 : __out = std::copy(__fmt_first, __fmt_last, __out);
457 : }
458 4962 : return __out;
459 4962 : }
460 :
461 : template<typename _Out_iter, typename _Bi_iter,
462 : typename _Rx_traits, typename _Ch_type>
463 : _Out_iter
464 5485 : __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
465 : const basic_regex<_Ch_type, _Rx_traits>& __e,
466 : const _Ch_type* __fmt, size_t __len,
467 : regex_constants::match_flag_type __flags)
468 : {
469 : typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
470 5485 : _IterT __i(__first, __last, __e, __flags);
471 5485 : _IterT __end;
472 5485 : if (__i == __end)
473 : {
474 523 : if (!(__flags & regex_constants::format_no_copy))
475 523 : __out = std::copy(__first, __last, __out);
476 : }
477 : else
478 : {
479 4962 : sub_match<_Bi_iter> __last;
480 9904 : for (; __i != __end; ++__i)
481 : {
482 4962 : if (!(__flags & regex_constants::format_no_copy))
483 4962 : __out = std::copy(__i->prefix().first, __i->prefix().second,
484 : __out);
485 4962 : __out = __i->format(__out, __fmt, __fmt + __len, __flags);
486 4962 : __last = __i->suffix();
487 4962 : if (__flags & regex_constants::format_first_only)
488 20 : break;
489 : }
490 4962 : if (!(__flags & regex_constants::format_no_copy))
491 4962 : __out = std::copy(__last.first, __last.second, __out);
492 : }
493 5485 : return __out;
494 5485 : }
495 :
496 : template<typename _Bi_iter,
497 : typename _Ch_type,
498 : typename _Rx_traits>
499 : bool
500 18513 : regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
501 : operator==(const regex_iterator& __rhs) const noexcept
502 : {
503 18513 : if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
504 7027 : return true;
505 11486 : return _M_pregex == __rhs._M_pregex
506 0 : && _M_begin == __rhs._M_begin
507 0 : && _M_end == __rhs._M_end
508 0 : && _M_flags == __rhs._M_flags
509 11486 : && _M_match[0] == __rhs._M_match[0];
510 : }
511 :
512 : template<typename _Bi_iter,
513 : typename _Ch_type,
514 : typename _Rx_traits>
515 : regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
516 6501 : regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
517 : operator++()
518 : {
519 : // In all cases in which the call to regex_search returns true,
520 : // match.prefix().first shall be equal to the previous value of
521 : // match[0].second, and for each index i in the half-open range
522 : // [0, match.size()) for which match[i].matched is true,
523 : // match[i].position() shall return distance(begin, match[i].first).
524 : // [28.12.1.4.5]
525 6501 : if (_M_match[0].matched)
526 : {
527 6501 : auto __start = _M_match[0].second;
528 6501 : auto __prefix_first = _M_match[0].second;
529 6501 : if (_M_match[0].first == _M_match[0].second)
530 : {
531 0 : if (__start == _M_end)
532 : {
533 0 : _M_pregex = nullptr;
534 0 : return *this;
535 : }
536 : else
537 : {
538 0 : if (regex_search(__start, _M_end, _M_match, *_M_pregex,
539 : _M_flags
540 : | regex_constants::match_not_null
541 : | regex_constants::match_continuous))
542 : {
543 0 : __glibcxx_assert(_M_match[0].matched);
544 0 : auto& __prefix = _M_match._M_prefix();
545 0 : __prefix.first = __prefix_first;
546 0 : __prefix.matched = __prefix.first != __prefix.second;
547 : // [28.12.1.4.5]
548 0 : _M_match._M_begin = _M_begin;
549 0 : return *this;
550 : }
551 : else
552 0 : ++__start;
553 : }
554 : }
555 6501 : _M_flags |= regex_constants::match_prev_avail;
556 6501 : if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
557 : {
558 778 : __glibcxx_assert(_M_match[0].matched);
559 778 : auto& __prefix = _M_match._M_prefix();
560 778 : __prefix.first = __prefix_first;
561 778 : __prefix.matched = __prefix.first != __prefix.second;
562 : // [28.12.1.4.5]
563 778 : _M_match._M_begin = _M_begin;
564 : }
565 : else
566 5723 : _M_pregex = nullptr;
567 : }
568 6501 : return *this;
569 : }
570 :
571 : template<typename _Bi_iter,
572 : typename _Ch_type,
573 : typename _Rx_traits>
574 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
575 778 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
576 : operator=(const regex_token_iterator& __rhs)
577 : {
578 778 : _M_position = __rhs._M_position;
579 778 : _M_subs = __rhs._M_subs;
580 778 : _M_n = __rhs._M_n;
581 778 : _M_suffix = __rhs._M_suffix;
582 778 : _M_has_m1 = __rhs._M_has_m1;
583 778 : _M_normalize_result();
584 778 : return *this;
585 : }
586 :
587 : template<typename _Bi_iter,
588 : typename _Ch_type,
589 : typename _Rx_traits>
590 : bool
591 3112 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
592 : operator==(const regex_token_iterator& __rhs) const
593 : {
594 3112 : if (_M_end_of_seq() && __rhs._M_end_of_seq())
595 778 : return true;
596 778 : if (_M_suffix.matched && __rhs._M_suffix.matched
597 3112 : && _M_suffix == __rhs._M_suffix)
598 0 : return true;
599 4668 : if (_M_end_of_seq() || _M_suffix.matched
600 4668 : || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
601 2334 : return false;
602 0 : return _M_position == __rhs._M_position
603 0 : && _M_n == __rhs._M_n
604 0 : && _M_subs == __rhs._M_subs;
605 : }
606 :
607 : template<typename _Bi_iter,
608 : typename _Ch_type,
609 : typename _Rx_traits>
610 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
611 2334 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
612 : operator++()
613 : {
614 2334 : _Position __prev = _M_position;
615 2334 : if (_M_suffix.matched)
616 778 : *this = regex_token_iterator();
617 1556 : else if (_M_n + 1 < _M_subs.size())
618 : {
619 0 : _M_n++;
620 0 : _M_result = &_M_current_match();
621 : }
622 : else
623 : {
624 1556 : _M_n = 0;
625 1556 : ++_M_position;
626 1556 : if (_M_position != _Position())
627 778 : _M_result = &_M_current_match();
628 778 : else if (_M_has_m1 && __prev->suffix().length() != 0)
629 : {
630 778 : _M_suffix.matched = true;
631 778 : _M_suffix.first = __prev->suffix().first;
632 778 : _M_suffix.second = __prev->suffix().second;
633 778 : _M_result = &_M_suffix;
634 : }
635 : else
636 0 : *this = regex_token_iterator();
637 : }
638 2334 : return *this;
639 2334 : }
640 :
641 : template<typename _Bi_iter,
642 : typename _Ch_type,
643 : typename _Rx_traits>
644 : void
645 778 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
646 : _M_init(_Bi_iter __a, _Bi_iter __b)
647 : {
648 778 : _M_has_m1 = false;
649 778 : for (auto __it : _M_subs)
650 778 : if (__it == -1)
651 : {
652 778 : _M_has_m1 = true;
653 778 : break;
654 : }
655 778 : if (_M_position != _Position())
656 778 : _M_result = &_M_current_match();
657 0 : else if (_M_has_m1)
658 : {
659 0 : _M_suffix.matched = true;
660 0 : _M_suffix.first = __a;
661 0 : _M_suffix.second = __b;
662 0 : _M_result = &_M_suffix;
663 : }
664 : else
665 0 : _M_result = nullptr;
666 778 : }
667 :
668 : /// @endcond
669 :
670 : _GLIBCXX_END_NAMESPACE_VERSION
671 : } // namespace
|