libstdc++
regex.tcc
Go to the documentation of this file.
1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2016 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33namespace __detail
34{
35_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37 // Result of merging regex_match and regex_search.
38 //
39 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40 // the other one if possible, for test purpose).
41 //
42 // That __match_mode is true means regex_match, else regex_search.
43 template<typename _BiIter, typename _Alloc,
44 typename _CharT, typename _TraitsT,
45 _RegexExecutorPolicy __policy,
46 bool __match_mode>
47 bool
48 __regex_algo_impl(_BiIter __s,
49 _BiIter __e,
50 match_results<_BiIter, _Alloc>& __m,
51 const basic_regex<_CharT, _TraitsT>& __re,
53 {
54 if (__re._M_automaton == nullptr)
55 return false;
56
57 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58 __m._M_begin = __s;
59 __m._M_resize(__re._M_automaton->_M_sub_count());
60 for (auto& __it : __res)
61 __it.matched = false;
62
63 bool __ret;
64 if ((__re.flags() & regex_constants::__polynomial)
65 || (__policy == _RegexExecutorPolicy::_S_alternate
66 && !__re._M_automaton->_M_has_backref))
67 {
68 _Executor<_BiIter, _Alloc, _TraitsT, false>
69 __executor(__s, __e, __m, __re, __flags);
70 if (__match_mode)
71 __ret = __executor._M_match();
72 else
73 __ret = __executor._M_search();
74 }
75 else
76 {
77 _Executor<_BiIter, _Alloc, _TraitsT, true>
78 __executor(__s, __e, __m, __re, __flags);
79 if (__match_mode)
80 __ret = __executor._M_match();
81 else
82 __ret = __executor._M_search();
83 }
84 if (__ret)
85 {
86 for (auto& __it : __res)
87 if (!__it.matched)
88 __it.first = __it.second = __e;
89 auto& __pre = __m._M_prefix();
90 auto& __suf = __m._M_suffix();
91 if (__match_mode)
92 {
93 __pre.matched = false;
94 __pre.first = __s;
95 __pre.second = __s;
96 __suf.matched = false;
97 __suf.first = __e;
98 __suf.second = __e;
99 }
100 else
101 {
102 __pre.first = __s;
103 __pre.second = __res[0].first;
104 __pre.matched = (__pre.first != __pre.second);
105 __suf.first = __res[0].second;
106 __suf.second = __e;
107 __suf.matched = (__suf.first != __suf.second);
108 }
109 }
110 else
111 {
112 __m._M_resize(0);
113 for (auto& __it : __res)
114 {
115 __it.matched = false;
116 __it.first = __it.second = __e;
117 }
118 }
119 return __ret;
120 }
121
122_GLIBCXX_END_NAMESPACE_VERSION
123}
124
125_GLIBCXX_BEGIN_NAMESPACE_VERSION
126
127 template<typename _Ch_type>
128 template<typename _Fwd_iter>
129 typename regex_traits<_Ch_type>::string_type
131 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
132 {
133 typedef std::ctype<char_type> __ctype_type;
134 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
135
136 static const char* __collatenames[] =
137 {
138 "NUL",
139 "SOH",
140 "STX",
141 "ETX",
142 "EOT",
143 "ENQ",
144 "ACK",
145 "alert",
146 "backspace",
147 "tab",
148 "newline",
149 "vertical-tab",
150 "form-feed",
151 "carriage-return",
152 "SO",
153 "SI",
154 "DLE",
155 "DC1",
156 "DC2",
157 "DC3",
158 "DC4",
159 "NAK",
160 "SYN",
161 "ETB",
162 "CAN",
163 "EM",
164 "SUB",
165 "ESC",
166 "IS4",
167 "IS3",
168 "IS2",
169 "IS1",
170 "space",
171 "exclamation-mark",
172 "quotation-mark",
173 "number-sign",
174 "dollar-sign",
175 "percent-sign",
176 "ampersand",
177 "apostrophe",
178 "left-parenthesis",
179 "right-parenthesis",
180 "asterisk",
181 "plus-sign",
182 "comma",
183 "hyphen",
184 "period",
185 "slash",
186 "zero",
187 "one",
188 "two",
189 "three",
190 "four",
191 "five",
192 "six",
193 "seven",
194 "eight",
195 "nine",
196 "colon",
197 "semicolon",
198 "less-than-sign",
199 "equals-sign",
200 "greater-than-sign",
201 "question-mark",
202 "commercial-at",
203 "A",
204 "B",
205 "C",
206 "D",
207 "E",
208 "F",
209 "G",
210 "H",
211 "I",
212 "J",
213 "K",
214 "L",
215 "M",
216 "N",
217 "O",
218 "P",
219 "Q",
220 "R",
221 "S",
222 "T",
223 "U",
224 "V",
225 "W",
226 "X",
227 "Y",
228 "Z",
229 "left-square-bracket",
230 "backslash",
231 "right-square-bracket",
232 "circumflex",
233 "underscore",
234 "grave-accent",
235 "a",
236 "b",
237 "c",
238 "d",
239 "e",
240 "f",
241 "g",
242 "h",
243 "i",
244 "j",
245 "k",
246 "l",
247 "m",
248 "n",
249 "o",
250 "p",
251 "q",
252 "r",
253 "s",
254 "t",
255 "u",
256 "v",
257 "w",
258 "x",
259 "y",
260 "z",
261 "left-curly-bracket",
262 "vertical-line",
263 "right-curly-bracket",
264 "tilde",
265 "DEL",
266 };
267
268 string __s;
269 for (; __first != __last; ++__first)
270 __s += __fctyp.narrow(*__first, 0);
271
272 for (const auto& __it : __collatenames)
273 if (__s == __it)
274 return string_type(1, __fctyp.widen(
275 static_cast<char>(&__it - __collatenames)));
276
277 // TODO Add digraph support:
278 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
279
280 return string_type();
281 }
282
283 template<typename _Ch_type>
284 template<typename _Fwd_iter>
285 typename regex_traits<_Ch_type>::char_class_type
287 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
288 {
289 typedef std::ctype<char_type> __ctype_type;
290 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
291
292 // Mappings from class name to class mask.
294 {
295 {"d", ctype_base::digit},
296 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
297 {"s", ctype_base::space},
298 {"alnum", ctype_base::alnum},
299 {"alpha", ctype_base::alpha},
300 {"blank", ctype_base::blank},
301 {"cntrl", ctype_base::cntrl},
302 {"digit", ctype_base::digit},
303 {"graph", ctype_base::graph},
304 {"lower", ctype_base::lower},
305 {"print", ctype_base::print},
306 {"punct", ctype_base::punct},
307 {"space", ctype_base::space},
308 {"upper", ctype_base::upper},
309 {"xdigit", ctype_base::xdigit},
310 };
311
312 string __s;
313 for (; __first != __last; ++__first)
314 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
315
316 for (const auto& __it : __classnames)
317 if (__s == __it.first)
318 {
319 if (__icase
320 && ((__it.second
321 & (ctype_base::lower | ctype_base::upper)) != 0))
322 return ctype_base::alpha;
323 return __it.second;
324 }
325 return 0;
326 }
327
328 template<typename _Ch_type>
329 bool
331 isctype(_Ch_type __c, char_class_type __f) const
332 {
333 typedef std::ctype<char_type> __ctype_type;
334 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
335
336 return __fctyp.is(__f._M_base, __c)
337 // [[:w:]]
338 || ((__f._M_extended & _RegexMask::_S_under)
339 && __c == __fctyp.widen('_'));
340 }
341
342 template<typename _Ch_type>
343 int
345 value(_Ch_type __ch, int __radix) const
346 {
348 long __v;
349 if (__radix == 8)
350 __is >> std::oct;
351 else if (__radix == 16)
352 __is >> std::hex;
353 __is >> __v;
354 return __is.fail() ? -1 : __v;
355 }
356
357 template<typename _Bi_iter, typename _Alloc>
358 template<typename _Out_iter>
361 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
362 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
363 match_flag_type __flags) const
364 {
365 __glibcxx_assert( ready() );
367 typedef std::ctype<char_type> __ctype_type;
368 const __ctype_type&
369 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
370
371 auto __output = [&](size_t __idx)
372 {
373 auto& __sub = (*this)[__idx];
374 if (__sub.matched)
375 __out = std::copy(__sub.first, __sub.second, __out);
376 };
377
378 if (__flags & regex_constants::format_sed)
379 {
380 for (; __fmt_first != __fmt_last;)
381 if (*__fmt_first == '&')
382 {
383 __output(0);
384 ++__fmt_first;
385 }
386 else if (*__fmt_first == '\\')
387 {
388 if (++__fmt_first != __fmt_last
389 && __fctyp.is(__ctype_type::digit, *__fmt_first))
390 __output(__traits.value(*__fmt_first++, 10));
391 else
392 *__out++ = '\\';
393 }
394 else
395 *__out++ = *__fmt_first++;
396 }
397 else
398 {
399 while (1)
400 {
401 auto __next = std::find(__fmt_first, __fmt_last, '$');
402 if (__next == __fmt_last)
403 break;
404
405 __out = std::copy(__fmt_first, __next, __out);
406
407 auto __eat = [&](char __ch) -> bool
408 {
409 if (*__next == __ch)
410 {
411 ++__next;
412 return true;
413 }
414 return false;
415 };
416
417 if (++__next == __fmt_last)
418 *__out++ = '$';
419 else if (__eat('$'))
420 *__out++ = '$';
421 else if (__eat('&'))
422 __output(0);
423 else if (__eat('`'))
424 {
425 auto& __sub = _M_prefix();
426 if (__sub.matched)
427 __out = std::copy(__sub.first, __sub.second, __out);
428 }
429 else if (__eat('\''))
430 {
431 auto& __sub = _M_suffix();
432 if (__sub.matched)
433 __out = std::copy(__sub.first, __sub.second, __out);
434 }
435 else if (__fctyp.is(__ctype_type::digit, *__next))
436 {
437 long __num = __traits.value(*__next, 10);
438 if (++__next != __fmt_last
439 && __fctyp.is(__ctype_type::digit, *__next))
440 {
441 __num *= 10;
442 __num += __traits.value(*__next++, 10);
443 }
444 if (0 <= __num && __num < this->size())
445 __output(__num);
446 }
447 else
448 *__out++ = '$';
449 __fmt_first = __next;
450 }
451 __out = std::copy(__fmt_first, __fmt_last, __out);
452 }
453 return __out;
454 }
455
456 template<typename _Out_iter, typename _Bi_iter,
457 typename _Rx_traits, typename _Ch_type>
458 _Out_iter
459 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
461 const _Ch_type* __fmt,
463 {
465 _IterT __i(__first, __last, __e, __flags);
466 _IterT __end;
467 if (__i == __end)
468 {
469 if (!(__flags & regex_constants::format_no_copy))
470 __out = std::copy(__first, __last, __out);
471 }
472 else
473 {
474 sub_match<_Bi_iter> __last;
476 for (; __i != __end; ++__i)
477 {
478 if (!(__flags & regex_constants::format_no_copy))
479 __out = std::copy(__i->prefix().first, __i->prefix().second,
480 __out);
481 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
482 __last = __i->suffix();
484 break;
485 }
486 if (!(__flags & regex_constants::format_no_copy))
487 __out = std::copy(__last.first, __last.second, __out);
488 }
489 return __out;
490 }
491
492 template<typename _Bi_iter,
493 typename _Ch_type,
494 typename _Rx_traits>
495 bool
497 operator==(const regex_iterator& __rhs) const
498 {
499 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
500 return true;
501 return _M_pregex == __rhs._M_pregex
502 && _M_begin == __rhs._M_begin
503 && _M_end == __rhs._M_end
504 && _M_flags == __rhs._M_flags
505 && _M_match[0] == __rhs._M_match[0];
506 }
507
508 template<typename _Bi_iter,
509 typename _Ch_type,
510 typename _Rx_traits>
514 {
515 // In all cases in which the call to regex_search returns true,
516 // match.prefix().first shall be equal to the previous value of
517 // match[0].second, and for each index i in the half-open range
518 // [0, match.size()) for which match[i].matched is true,
519 // match[i].position() shall return distance(begin, match[i].first).
520 // [28.12.1.4.5]
521 if (_M_match[0].matched)
522 {
523 auto __start = _M_match[0].second;
524 auto __prefix_first = _M_match[0].second;
525 if (_M_match[0].first == _M_match[0].second)
526 {
527 if (__start == _M_end)
528 {
529 _M_pregex = nullptr;
530 return *this;
531 }
532 else
533 {
534 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
535 _M_flags
538 {
539 __glibcxx_assert(_M_match[0].matched);
540 auto& __prefix = _M_match._M_prefix();
541 __prefix.first = __prefix_first;
542 __prefix.matched = __prefix.first != __prefix.second;
543 // [28.12.1.4.5]
544 _M_match._M_begin = _M_begin;
545 return *this;
546 }
547 else
548 ++__start;
549 }
550 }
552 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
553 {
554 __glibcxx_assert(_M_match[0].matched);
555 auto& __prefix = _M_match._M_prefix();
556 __prefix.first = __prefix_first;
557 __prefix.matched = __prefix.first != __prefix.second;
558 // [28.12.1.4.5]
559 _M_match._M_begin = _M_begin;
560 }
561 else
562 _M_pregex = nullptr;
563 }
564 return *this;
565 }
566
567 template<typename _Bi_iter,
568 typename _Ch_type,
569 typename _Rx_traits>
573 {
574 _M_position = __rhs._M_position;
575 _M_subs = __rhs._M_subs;
576 _M_n = __rhs._M_n;
577 _M_suffix = __rhs._M_suffix;
578 _M_has_m1 = __rhs._M_has_m1;
579 _M_normalize_result();
580 return *this;
581 }
582
583 template<typename _Bi_iter,
584 typename _Ch_type,
585 typename _Rx_traits>
586 bool
589 {
590 if (_M_end_of_seq() && __rhs._M_end_of_seq())
591 return true;
592 if (_M_suffix.matched && __rhs._M_suffix.matched
593 && _M_suffix == __rhs._M_suffix)
594 return true;
595 if (_M_end_of_seq() || _M_suffix.matched
596 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
597 return false;
598 return _M_position == __rhs._M_position
599 && _M_n == __rhs._M_n
600 && _M_subs == __rhs._M_subs;
601 }
602
603 template<typename _Bi_iter,
604 typename _Ch_type,
605 typename _Rx_traits>
609 {
610 _Position __prev = _M_position;
611 if (_M_suffix.matched)
612 *this = regex_token_iterator();
613 else if (_M_n + 1 < _M_subs.size())
614 {
615 _M_n++;
616 _M_result = &_M_current_match();
617 }
618 else
619 {
620 _M_n = 0;
621 ++_M_position;
622 if (_M_position != _Position())
623 _M_result = &_M_current_match();
624 else if (_M_has_m1 && __prev->suffix().length() != 0)
625 {
626 _M_suffix.matched = true;
627 _M_suffix.first = __prev->suffix().first;
628 _M_suffix.second = __prev->suffix().second;
629 _M_result = &_M_suffix;
630 }
631 else
632 *this = regex_token_iterator();
633 }
634 return *this;
635 }
636
637 template<typename _Bi_iter,
638 typename _Ch_type,
639 typename _Rx_traits>
640 void
642 _M_init(_Bi_iter __a, _Bi_iter __b)
643 {
644 _M_has_m1 = false;
645 for (auto __it : _M_subs)
646 if (__it == -1)
647 {
648 _M_has_m1 = true;
649 break;
650 }
651 if (_M_position != _Position())
652 _M_result = &_M_current_match();
653 else if (_M_has_m1)
654 {
655 _M_suffix.matched = true;
656 _M_suffix.first = __a;
657 _M_suffix.second = __b;
658 _M_result = &_M_suffix;
659 }
660 else
661 _M_result = nullptr;
662 }
663
664_GLIBCXX_END_NAMESPACE_VERSION
665} // namespace
666
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition regex.tcc:588
constexpr syntax_option_type __polynomial
bool operator==(const regex_iterator &__rhs) const
Tests the equivalence of two regex iterators.
Definition regex.tcc:497
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition regex.tcc:345
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition regex.tcc:572
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition regex.tcc:608
constexpr match_flag_type format_first_only
constexpr match_flag_type match_continuous
_Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const basic_string< _Ch_type, _St, _Sa > &__fmt, regex_constants::match_flag_type __flags=regex_constants::match_default)
Search for a regular expression within a range for multiple times, and replace the matched parts thro...
Definition regex.h:2334
match_flag_type
This is a bitmask type indicating regex matching rules.
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition regex.h:2186
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition regex.tcc:131
constexpr match_flag_type match_prev_avail
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition regex.tcc:331
constexpr match_flag_type format_sed
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition regex.tcc:287
regex_iterator & operator++()
Increments a regex_iterator.
Definition regex.tcc:513
constexpr match_flag_type match_not_null
constexpr match_flag_type format_no_copy
ISO C++ entities toplevel namespace is std.
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition ios_base.h:1024
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition ios_base.h:1032
Basis for explicit traits specializations.