| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | // class template regex -*- C++ -*- | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | // Copyright (C) 2013-2015 Free Software Foundation, Inc. | 
| 4 |  |  |  |  |  |  | // | 
| 5 |  |  |  |  |  |  | // This file is part of the GNU ISO C++ Library.  This library is free | 
| 6 |  |  |  |  |  |  | // software; you can redistribute it and/or modify it under the | 
| 7 |  |  |  |  |  |  | // terms of the GNU General Public License as published by the | 
| 8 |  |  |  |  |  |  | // Free Software Foundation; either version 3, or (at your option) | 
| 9 |  |  |  |  |  |  | // any later version. | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | // This library is distributed in the hope that it will be useful, | 
| 12 |  |  |  |  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 13 |  |  |  |  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 14 |  |  |  |  |  |  | // GNU General Public License for more details. | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | // Under Section 7 of GPL version 3, you are granted additional | 
| 17 |  |  |  |  |  |  | // permissions described in the GCC Runtime Library Exception, version | 
| 18 |  |  |  |  |  |  | // 3.1, as published by the Free Software Foundation. | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | // You should have received a copy of the GNU General Public License and | 
| 21 |  |  |  |  |  |  | // a copy of the GCC Runtime Library Exception along with this program; | 
| 22 |  |  |  |  |  |  | // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see | 
| 23 |  |  |  |  |  |  | // . | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | /** | 
| 26 |  |  |  |  |  |  | *  @file bits/regex_scanner.tcc | 
| 27 |  |  |  |  |  |  | *  This is an internal header file, included by other library headers. | 
| 28 |  |  |  |  |  |  | *  Do not attempt to use it directly. @headername{regex} | 
| 29 |  |  |  |  |  |  | */ | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | // FIXME make comments doxygen format. | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep | 
| 34 |  |  |  |  |  |  | // and awk | 
| 35 |  |  |  |  |  |  | // 1) grep is basic except '\n' is treated as '|' | 
| 36 |  |  |  |  |  |  | // 2) egrep is extended except '\n' is treated as '|' | 
| 37 |  |  |  |  |  |  | // 3) awk is extended except special escaping rules, and there's no | 
| 38 |  |  |  |  |  |  | //    back-reference. | 
| 39 |  |  |  |  |  |  | // | 
| 40 |  |  |  |  |  |  | // References: | 
| 41 |  |  |  |  |  |  | // | 
| 42 |  |  |  |  |  |  | // ECMAScript: ECMA-262 15.10 | 
| 43 |  |  |  |  |  |  | // | 
| 44 |  |  |  |  |  |  | // basic, extended: | 
| 45 |  |  |  |  |  |  | // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html | 
| 46 |  |  |  |  |  |  | // | 
| 47 |  |  |  |  |  |  | // awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | namespace std _GLIBCXX_VISIBILITY(default) | 
| 50 |  |  |  |  |  |  | { | 
| 51 |  |  |  |  |  |  | namespace __detail | 
| 52 |  |  |  |  |  |  | { | 
| 53 |  |  |  |  |  |  | _GLIBCXX_BEGIN_NAMESPACE_VERSION | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | template | 
| 56 | 0 |  |  |  |  |  | _Scanner<_CharT>:: | 
| 57 |  |  |  |  |  |  | _Scanner(typename _Scanner::_IterT __begin, | 
| 58 |  |  |  |  |  |  | typename _Scanner::_IterT __end, | 
| 59 |  |  |  |  |  |  | _FlagT __flags, std::locale __loc) | 
| 60 |  |  |  |  |  |  | : _ScannerBase(__flags), | 
| 61 |  |  |  |  |  |  | _M_current(__begin), _M_end(__end), | 
| 62 | 0 |  |  |  |  |  | _M_ctype(std::use_facet<_CtypeT>(__loc)), | 
| 63 |  |  |  |  |  |  | _M_eat_escape(_M_is_ecma() | 
| 64 | 0 |  |  |  |  |  | ? &_Scanner::_M_eat_escape_ecma | 
| 65 | 0 | 0 |  |  |  |  | : &_Scanner::_M_eat_escape_posix) | 
| 66 | 0 | 0 |  |  |  |  | { _M_advance(); } | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | template | 
| 69 |  |  |  |  |  |  | void | 
| 70 | 0 |  |  |  |  |  | _Scanner<_CharT>:: | 
| 71 |  |  |  |  |  |  | _M_advance() | 
| 72 |  |  |  |  |  |  | { | 
| 73 | 0 | 0 |  |  |  |  | if (_M_current == _M_end) | 
| 74 |  |  |  |  |  |  | { | 
| 75 | 0 |  |  |  |  |  | _M_token = _S_token_eof; | 
| 76 | 0 |  |  |  |  |  | return; | 
| 77 |  |  |  |  |  |  | } | 
| 78 |  |  |  |  |  |  |  | 
| 79 | 0 | 0 |  |  |  |  | if (_M_state == _S_state_normal) | 
| 80 | 0 |  |  |  |  |  | _M_scan_normal(); | 
| 81 | 0 | 0 |  |  |  |  | else if (_M_state == _S_state_in_bracket) | 
| 82 | 0 |  |  |  |  |  | _M_scan_in_bracket(); | 
| 83 | 0 | 0 |  |  |  |  | else if (_M_state == _S_state_in_brace) | 
| 84 | 0 |  |  |  |  |  | _M_scan_in_brace(); | 
| 85 |  |  |  |  |  |  | else | 
| 86 |  |  |  |  |  |  | { | 
| 87 |  |  |  |  |  |  | _GLIBCXX_DEBUG_ASSERT(false); | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | // Differences between styles: | 
| 92 |  |  |  |  |  |  | // 1) "\(", "\)", "\{" in basic. It's not escaping. | 
| 93 |  |  |  |  |  |  | // 2) "(?:", "(?=", "(?!" in ECMAScript. | 
| 94 |  |  |  |  |  |  | template | 
| 95 |  |  |  |  |  |  | void | 
| 96 | 0 |  |  |  |  |  | _Scanner<_CharT>:: | 
| 97 |  |  |  |  |  |  | _M_scan_normal() | 
| 98 |  |  |  |  |  |  | { | 
| 99 | 0 |  |  |  |  |  | auto __c = *_M_current++; | 
| 100 |  |  |  |  |  |  |  | 
| 101 | 0 | 0 |  |  |  |  | if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr) | 
| 102 |  |  |  |  |  |  | { | 
| 103 | 0 |  |  |  |  |  | _M_token = _S_token_ord_char; | 
| 104 | 0 |  |  |  |  |  | _M_value.assign(1, __c); | 
| 105 | 0 |  |  |  |  |  | return; | 
| 106 |  |  |  |  |  |  | } | 
| 107 | 0 | 0 |  |  |  |  | if (__c == '\\') | 
| 108 |  |  |  |  |  |  | { | 
| 109 | 0 | 0 |  |  |  |  | if (_M_current == _M_end) | 
| 110 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_escape); | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 0 | 0 |  |  |  |  | if (!_M_is_basic() | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 113 | 0 |  |  |  |  |  | || (*_M_current != '(' | 
| 114 | 0 |  |  |  |  |  | && *_M_current != ')' | 
| 115 | 0 |  |  |  |  |  | && *_M_current != '{')) | 
| 116 |  |  |  |  |  |  | { | 
| 117 | 0 | 0 |  |  |  |  | (this->*_M_eat_escape)(); | 
| 118 | 0 |  |  |  |  |  | return; | 
| 119 |  |  |  |  |  |  | } | 
| 120 | 0 |  |  |  |  |  | __c = *_M_current++; | 
| 121 |  |  |  |  |  |  | } | 
| 122 | 0 | 0 |  |  |  |  | if (__c == '(') | 
| 123 |  |  |  |  |  |  | { | 
| 124 | 0 | 0 |  |  |  |  | if (_M_is_ecma() && *_M_current == '?') | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | { | 
| 126 | 0 | 0 |  |  |  |  | if (++_M_current == _M_end) | 
| 127 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_paren); | 
| 128 |  |  |  |  |  |  |  | 
| 129 | 0 | 0 |  |  |  |  | if (*_M_current == ':') | 
| 130 |  |  |  |  |  |  | { | 
| 131 | 0 |  |  |  |  |  | ++_M_current; | 
| 132 | 0 |  |  |  |  |  | _M_token = _S_token_subexpr_no_group_begin; | 
| 133 |  |  |  |  |  |  | } | 
| 134 | 0 | 0 |  |  |  |  | else if (*_M_current == '=') | 
| 135 |  |  |  |  |  |  | { | 
| 136 | 0 |  |  |  |  |  | ++_M_current; | 
| 137 | 0 |  |  |  |  |  | _M_token = _S_token_subexpr_lookahead_begin; | 
| 138 | 0 |  |  |  |  |  | _M_value.assign(1, 'p'); | 
| 139 |  |  |  |  |  |  | } | 
| 140 | 0 | 0 |  |  |  |  | else if (*_M_current == '!') | 
| 141 |  |  |  |  |  |  | { | 
| 142 | 0 |  |  |  |  |  | ++_M_current; | 
| 143 | 0 |  |  |  |  |  | _M_token = _S_token_subexpr_lookahead_begin; | 
| 144 | 0 |  |  |  |  |  | _M_value.assign(1, 'n'); | 
| 145 |  |  |  |  |  |  | } | 
| 146 |  |  |  |  |  |  | else | 
| 147 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_paren); | 
| 148 |  |  |  |  |  |  | } | 
| 149 | 0 | 0 |  |  |  |  | else if (_M_flags & regex_constants::nosubs) | 
| 150 | 0 |  |  |  |  |  | _M_token = _S_token_subexpr_no_group_begin; | 
| 151 |  |  |  |  |  |  | else | 
| 152 | 0 |  |  |  |  |  | _M_token = _S_token_subexpr_begin; | 
| 153 |  |  |  |  |  |  | } | 
| 154 | 0 | 0 |  |  |  |  | else if (__c == ')') | 
| 155 | 0 |  |  |  |  |  | _M_token = _S_token_subexpr_end; | 
| 156 | 0 | 0 |  |  |  |  | else if (__c == '[') | 
| 157 |  |  |  |  |  |  | { | 
| 158 | 0 |  |  |  |  |  | _M_state = _S_state_in_bracket; | 
| 159 | 0 |  |  |  |  |  | _M_at_bracket_start = true; | 
| 160 | 0 | 0 |  |  |  |  | if (_M_current != _M_end && *_M_current == '^') | 
|  |  | 0 |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | { | 
| 162 | 0 |  |  |  |  |  | _M_token = _S_token_bracket_neg_begin; | 
| 163 | 0 |  |  |  |  |  | ++_M_current; | 
| 164 |  |  |  |  |  |  | } | 
| 165 |  |  |  |  |  |  | else | 
| 166 | 0 |  |  |  |  |  | _M_token = _S_token_bracket_begin; | 
| 167 |  |  |  |  |  |  | } | 
| 168 | 0 | 0 |  |  |  |  | else if (__c == '{') | 
| 169 |  |  |  |  |  |  | { | 
| 170 | 0 |  |  |  |  |  | _M_state = _S_state_in_brace; | 
| 171 | 0 |  |  |  |  |  | _M_token = _S_token_interval_begin; | 
| 172 |  |  |  |  |  |  | } | 
| 173 | 0 | 0 |  |  |  |  | else if (__c != ']' && __c != '}') | 
|  |  | 0 |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | { | 
| 175 | 0 |  |  |  |  |  | auto __it = _M_token_tbl; | 
| 176 | 0 |  |  |  |  |  | auto __narrowc = _M_ctype.narrow(__c, '\0'); | 
| 177 | 0 | 0 |  |  |  |  | for (; __it->first != '\0'; ++__it) | 
| 178 | 0 | 0 |  |  |  |  | if (__it->first == __narrowc) | 
| 179 |  |  |  |  |  |  | { | 
| 180 | 0 |  |  |  |  |  | _M_token = __it->second; | 
| 181 | 0 |  |  |  |  |  | return; | 
| 182 |  |  |  |  |  |  | } | 
| 183 | 0 |  |  |  |  |  | _GLIBCXX_DEBUG_ASSERT(false); | 
| 184 |  |  |  |  |  |  | } | 
| 185 |  |  |  |  |  |  | else | 
| 186 |  |  |  |  |  |  | { | 
| 187 | 0 |  |  |  |  |  | _M_token = _S_token_ord_char; | 
| 188 | 0 |  |  |  |  |  | _M_value.assign(1, __c); | 
| 189 |  |  |  |  |  |  | } | 
| 190 |  |  |  |  |  |  | } | 
| 191 |  |  |  |  |  |  |  | 
| 192 |  |  |  |  |  |  | // Differences between styles: | 
| 193 |  |  |  |  |  |  | // 1) different semantics of "[]" and "[^]". | 
| 194 |  |  |  |  |  |  | // 2) Escaping in bracket expr. | 
| 195 |  |  |  |  |  |  | template | 
| 196 |  |  |  |  |  |  | void | 
| 197 | 0 |  |  |  |  |  | _Scanner<_CharT>:: | 
| 198 |  |  |  |  |  |  | _M_scan_in_bracket() | 
| 199 |  |  |  |  |  |  | { | 
| 200 | 0 | 0 |  |  |  |  | if (_M_current == _M_end) | 
| 201 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_brack); | 
| 202 |  |  |  |  |  |  |  | 
| 203 | 0 |  |  |  |  |  | auto __c = *_M_current++; | 
| 204 |  |  |  |  |  |  |  | 
| 205 | 0 | 0 |  |  |  |  | if (__c == '[') | 
| 206 |  |  |  |  |  |  | { | 
| 207 | 0 | 0 |  |  |  |  | if (_M_current == _M_end) | 
| 208 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_brack); | 
| 209 |  |  |  |  |  |  |  | 
| 210 | 0 | 0 |  |  |  |  | if (*_M_current == '.') | 
| 211 |  |  |  |  |  |  | { | 
| 212 | 0 |  |  |  |  |  | _M_token = _S_token_collsymbol; | 
| 213 | 0 |  |  |  |  |  | _M_eat_class(*_M_current++); | 
| 214 |  |  |  |  |  |  | } | 
| 215 | 0 | 0 |  |  |  |  | else if (*_M_current == ':') | 
| 216 |  |  |  |  |  |  | { | 
| 217 | 0 |  |  |  |  |  | _M_token = _S_token_char_class_name; | 
| 218 | 0 |  |  |  |  |  | _M_eat_class(*_M_current++); | 
| 219 |  |  |  |  |  |  | } | 
| 220 | 0 | 0 |  |  |  |  | else if (*_M_current == '=') | 
| 221 |  |  |  |  |  |  | { | 
| 222 | 0 |  |  |  |  |  | _M_token = _S_token_equiv_class_name; | 
| 223 | 0 |  |  |  |  |  | _M_eat_class(*_M_current++); | 
| 224 |  |  |  |  |  |  | } | 
| 225 |  |  |  |  |  |  | else | 
| 226 |  |  |  |  |  |  | { | 
| 227 | 0 |  |  |  |  |  | _M_token = _S_token_ord_char; | 
| 228 | 0 |  |  |  |  |  | _M_value.assign(1, __c); | 
| 229 |  |  |  |  |  |  | } | 
| 230 |  |  |  |  |  |  | } | 
| 231 |  |  |  |  |  |  | // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted | 
| 232 |  |  |  |  |  |  | // literally. So "[]]" and "[^]]" are valid regexes. See the testcases | 
| 233 |  |  |  |  |  |  | // `*/empty_range.cc`. | 
| 234 | 0 | 0 |  |  |  |  | else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start)) | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | { | 
| 236 | 0 |  |  |  |  |  | _M_token = _S_token_bracket_end; | 
| 237 | 0 |  |  |  |  |  | _M_state = _S_state_normal; | 
| 238 |  |  |  |  |  |  | } | 
| 239 |  |  |  |  |  |  | // ECMAScript and awk permits escaping in bracket. | 
| 240 | 0 | 0 |  |  |  |  | else if (__c == '\\' && (_M_is_ecma() || _M_is_awk())) | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 241 | 0 | 0 |  |  |  |  | (this->*_M_eat_escape)(); | 
| 242 |  |  |  |  |  |  | else | 
| 243 |  |  |  |  |  |  | { | 
| 244 | 0 |  |  |  |  |  | _M_token = _S_token_ord_char; | 
| 245 | 0 |  |  |  |  |  | _M_value.assign(1, __c); | 
| 246 |  |  |  |  |  |  | } | 
| 247 | 0 |  |  |  |  |  | _M_at_bracket_start = false; | 
| 248 | 0 |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | // Differences between styles: | 
| 251 |  |  |  |  |  |  | // 1) "\}" in basic style. | 
| 252 |  |  |  |  |  |  | template | 
| 253 |  |  |  |  |  |  | void | 
| 254 | 0 |  |  |  |  |  | _Scanner<_CharT>:: | 
| 255 |  |  |  |  |  |  | _M_scan_in_brace() | 
| 256 |  |  |  |  |  |  | { | 
| 257 | 0 | 0 |  |  |  |  | if (_M_current == _M_end) | 
| 258 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_brace); | 
| 259 |  |  |  |  |  |  |  | 
| 260 | 0 |  |  |  |  |  | auto __c = *_M_current++; | 
| 261 |  |  |  |  |  |  |  | 
| 262 | 0 | 0 |  |  |  |  | if (_M_ctype.is(_CtypeT::digit, __c)) | 
| 263 |  |  |  |  |  |  | { | 
| 264 | 0 |  |  |  |  |  | _M_token = _S_token_dup_count; | 
| 265 | 0 |  |  |  |  |  | _M_value.assign(1, __c); | 
| 266 | 0 | 0 |  |  |  |  | while (_M_current != _M_end | 
| 267 | 0 |  |  |  |  |  | && _M_ctype.is(_CtypeT::digit, *_M_current)) | 
| 268 | 0 |  |  |  |  |  | _M_value += *_M_current++; | 
| 269 |  |  |  |  |  |  | } | 
| 270 | 0 | 0 |  |  |  |  | else if (__c == ',') | 
| 271 | 0 |  |  |  |  |  | _M_token = _S_token_comma; | 
| 272 |  |  |  |  |  |  | // basic use \}. | 
| 273 | 0 | 0 |  |  |  |  | else if (_M_is_basic()) | 
| 274 |  |  |  |  |  |  | { | 
| 275 | 0 | 0 |  |  |  |  | if (__c == '\\' && _M_current != _M_end && *_M_current == '}') | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 276 |  |  |  |  |  |  | { | 
| 277 | 0 |  |  |  |  |  | _M_state = _S_state_normal; | 
| 278 | 0 |  |  |  |  |  | _M_token = _S_token_interval_end; | 
| 279 | 0 |  |  |  |  |  | ++_M_current; | 
| 280 |  |  |  |  |  |  | } | 
| 281 |  |  |  |  |  |  | else | 
| 282 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_badbrace); | 
| 283 |  |  |  |  |  |  | } | 
| 284 | 0 | 0 |  |  |  |  | else if (__c == '}') | 
| 285 |  |  |  |  |  |  | { | 
| 286 | 0 |  |  |  |  |  | _M_state = _S_state_normal; | 
| 287 | 0 |  |  |  |  |  | _M_token = _S_token_interval_end; | 
| 288 |  |  |  |  |  |  | } | 
| 289 |  |  |  |  |  |  | else | 
| 290 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_badbrace); | 
| 291 | 0 |  |  |  |  |  | } | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  | template | 
| 294 |  |  |  |  |  |  | void | 
| 295 | 0 |  |  |  |  |  | _Scanner<_CharT>:: | 
| 296 |  |  |  |  |  |  | _M_eat_escape_ecma() | 
| 297 |  |  |  |  |  |  | { | 
| 298 | 0 | 0 |  |  |  |  | if (_M_current == _M_end) | 
| 299 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_escape); | 
| 300 |  |  |  |  |  |  |  | 
| 301 | 0 |  |  |  |  |  | auto __c = *_M_current++; | 
| 302 | 0 |  |  |  |  |  | auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); | 
| 303 |  |  |  |  |  |  |  | 
| 304 | 0 | 0 |  |  |  |  | if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket)) | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 305 |  |  |  |  |  |  | { | 
| 306 | 0 |  |  |  |  |  | _M_token = _S_token_ord_char; | 
| 307 | 0 |  |  |  |  |  | _M_value.assign(1, *__pos); | 
| 308 |  |  |  |  |  |  | } | 
| 309 | 0 | 0 |  |  |  |  | else if (__c == 'b') | 
| 310 |  |  |  |  |  |  | { | 
| 311 | 0 |  |  |  |  |  | _M_token = _S_token_word_bound; | 
| 312 | 0 |  |  |  |  |  | _M_value.assign(1, 'p'); | 
| 313 |  |  |  |  |  |  | } | 
| 314 | 0 | 0 |  |  |  |  | else if (__c == 'B') | 
| 315 |  |  |  |  |  |  | { | 
| 316 | 0 |  |  |  |  |  | _M_token = _S_token_word_bound; | 
| 317 | 0 |  |  |  |  |  | _M_value.assign(1, 'n'); | 
| 318 |  |  |  |  |  |  | } | 
| 319 |  |  |  |  |  |  | // N3376 28.13 | 
| 320 | 0 | 0 |  |  |  |  | else if (__c == 'd' | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | || __c == 'D' | 
| 322 |  |  |  |  |  |  | || __c == 's' | 
| 323 |  |  |  |  |  |  | || __c == 'S' | 
| 324 |  |  |  |  |  |  | || __c == 'w' | 
| 325 |  |  |  |  |  |  | || __c == 'W') | 
| 326 |  |  |  |  |  |  | { | 
| 327 | 0 |  |  |  |  |  | _M_token = _S_token_quoted_class; | 
| 328 | 0 |  |  |  |  |  | _M_value.assign(1, __c); | 
| 329 |  |  |  |  |  |  | } | 
| 330 | 0 | 0 |  |  |  |  | else if (__c == 'c') | 
| 331 |  |  |  |  |  |  | { | 
| 332 | 0 | 0 |  |  |  |  | if (_M_current == _M_end) | 
| 333 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_escape); | 
| 334 | 0 |  |  |  |  |  | _M_token = _S_token_ord_char; | 
| 335 | 0 |  |  |  |  |  | _M_value.assign(1, *_M_current++); | 
| 336 |  |  |  |  |  |  | } | 
| 337 | 0 | 0 |  |  |  |  | else if (__c == 'x' || __c == 'u') | 
|  |  | 0 |  |  |  |  |  | 
| 338 |  |  |  |  |  |  | { | 
| 339 | 0 |  |  |  |  |  | _M_value.erase(); | 
| 340 | 0 | 0 |  |  |  |  | for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++) | 
|  |  | 0 |  |  |  |  |  | 
| 341 |  |  |  |  |  |  | { | 
| 342 | 0 | 0 |  |  |  |  | if (_M_current == _M_end | 
| 343 | 0 |  |  |  |  |  | || !_M_ctype.is(_CtypeT::xdigit, *_M_current)) | 
| 344 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_escape); | 
| 345 | 0 |  |  |  |  |  | _M_value += *_M_current++; | 
| 346 |  |  |  |  |  |  | } | 
| 347 | 0 |  |  |  |  |  | _M_token = _S_token_hex_num; | 
| 348 |  |  |  |  |  |  | } | 
| 349 |  |  |  |  |  |  | // ECMAScript recognizes multi-digit back-references. | 
| 350 | 0 | 0 |  |  |  |  | else if (_M_ctype.is(_CtypeT::digit, __c)) | 
| 351 |  |  |  |  |  |  | { | 
| 352 | 0 |  |  |  |  |  | _M_value.assign(1, __c); | 
| 353 | 0 | 0 |  |  |  |  | while (_M_current != _M_end | 
| 354 | 0 |  |  |  |  |  | && _M_ctype.is(_CtypeT::digit, *_M_current)) | 
| 355 | 0 |  |  |  |  |  | _M_value += *_M_current++; | 
| 356 | 0 |  |  |  |  |  | _M_token = _S_token_backref; | 
| 357 |  |  |  |  |  |  | } | 
| 358 |  |  |  |  |  |  | else | 
| 359 |  |  |  |  |  |  | { | 
| 360 | 0 |  |  |  |  |  | _M_token = _S_token_ord_char; | 
| 361 | 0 |  |  |  |  |  | _M_value.assign(1, __c); | 
| 362 |  |  |  |  |  |  | } | 
| 363 | 0 |  |  |  |  |  | } | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | // Differences between styles: | 
| 366 |  |  |  |  |  |  | // 1) Extended doesn't support backref, but basic does. | 
| 367 |  |  |  |  |  |  | template | 
| 368 |  |  |  |  |  |  | void | 
| 369 | 0 |  |  |  |  |  | _Scanner<_CharT>:: | 
| 370 |  |  |  |  |  |  | _M_eat_escape_posix() | 
| 371 |  |  |  |  |  |  | { | 
| 372 | 0 | 0 |  |  |  |  | if (_M_current == _M_end) | 
| 373 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_escape); | 
| 374 |  |  |  |  |  |  |  | 
| 375 | 0 |  |  |  |  |  | auto __c = *_M_current; | 
| 376 | 0 |  |  |  |  |  | auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')); | 
| 377 |  |  |  |  |  |  |  | 
| 378 | 0 | 0 |  |  |  |  | if (__pos != nullptr && *__pos != '\0') | 
|  |  | 0 |  |  |  |  |  | 
| 379 |  |  |  |  |  |  | { | 
| 380 | 0 |  |  |  |  |  | _M_token = _S_token_ord_char; | 
| 381 | 0 |  |  |  |  |  | _M_value.assign(1, __c); | 
| 382 |  |  |  |  |  |  | } | 
| 383 |  |  |  |  |  |  | // We MUST judge awk before handling backrefs. There's no backref in awk. | 
| 384 | 0 | 0 |  |  |  |  | else if (_M_is_awk()) | 
| 385 |  |  |  |  |  |  | { | 
| 386 | 0 |  |  |  |  |  | _M_eat_escape_awk(); | 
| 387 | 0 |  |  |  |  |  | return; | 
| 388 |  |  |  |  |  |  | } | 
| 389 | 0 | 0 |  |  |  |  | else if (_M_is_basic() && _M_ctype.is(_CtypeT::digit, __c) && __c != '0') | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 390 |  |  |  |  |  |  | { | 
| 391 | 0 |  |  |  |  |  | _M_token = _S_token_backref; | 
| 392 | 0 |  |  |  |  |  | _M_value.assign(1, __c); | 
| 393 |  |  |  |  |  |  | } | 
| 394 |  |  |  |  |  |  | else | 
| 395 |  |  |  |  |  |  | { | 
| 396 |  |  |  |  |  |  | #ifdef __STRICT_ANSI__ | 
| 397 |  |  |  |  |  |  | // POSIX says it is undefined to escape ordinary characters | 
| 398 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_escape); | 
| 399 |  |  |  |  |  |  | #else | 
| 400 |  |  |  |  |  |  | _M_token = _S_token_ord_char; | 
| 401 |  |  |  |  |  |  | _M_value.assign(1, __c); | 
| 402 |  |  |  |  |  |  | #endif | 
| 403 |  |  |  |  |  |  | } | 
| 404 | 0 |  |  |  |  |  | ++_M_current; | 
| 405 |  |  |  |  |  |  | } | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  | template | 
| 408 |  |  |  |  |  |  | void | 
| 409 | 0 |  |  |  |  |  | _Scanner<_CharT>:: | 
| 410 |  |  |  |  |  |  | _M_eat_escape_awk() | 
| 411 |  |  |  |  |  |  | { | 
| 412 | 0 |  |  |  |  |  | auto __c = *_M_current++; | 
| 413 | 0 |  |  |  |  |  | auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); | 
| 414 |  |  |  |  |  |  |  | 
| 415 | 0 | 0 |  |  |  |  | if (__pos != nullptr) | 
| 416 |  |  |  |  |  |  | { | 
| 417 | 0 |  |  |  |  |  | _M_token = _S_token_ord_char; | 
| 418 | 0 |  |  |  |  |  | _M_value.assign(1, *__pos); | 
| 419 |  |  |  |  |  |  | } | 
| 420 |  |  |  |  |  |  | // \ddd for oct representation | 
| 421 | 0 | 0 |  |  |  |  | else if (_M_ctype.is(_CtypeT::digit, __c) | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 422 | 0 |  |  |  |  |  | && __c != '8' | 
| 423 |  |  |  |  |  |  | && __c != '9') | 
| 424 |  |  |  |  |  |  | { | 
| 425 | 0 |  |  |  |  |  | _M_value.assign(1,  __c); | 
| 426 | 0 | 0 |  |  |  |  | for (int __i = 0; | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 427 |  |  |  |  |  |  | __i < 2 | 
| 428 | 0 |  |  |  |  |  | && _M_current != _M_end | 
| 429 | 0 |  |  |  |  |  | && _M_ctype.is(_CtypeT::digit, *_M_current) | 
| 430 | 0 |  |  |  |  |  | && *_M_current != '8' | 
| 431 | 0 |  |  |  |  |  | && *_M_current != '9'; | 
| 432 |  |  |  |  |  |  | __i++) | 
| 433 | 0 |  |  |  |  |  | _M_value += *_M_current++; | 
| 434 | 0 |  |  |  |  |  | _M_token = _S_token_oct_num; | 
| 435 | 0 |  |  |  |  |  | return; | 
| 436 |  |  |  |  |  |  | } | 
| 437 |  |  |  |  |  |  | else | 
| 438 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_escape); | 
| 439 |  |  |  |  |  |  | } | 
| 440 |  |  |  |  |  |  |  | 
| 441 |  |  |  |  |  |  | // Eats a character class or throws an exception. | 
| 442 |  |  |  |  |  |  | // __ch could be ':', '.' or '=', _M_current is the char after ']' when | 
| 443 |  |  |  |  |  |  | // returning. | 
| 444 |  |  |  |  |  |  | template | 
| 445 |  |  |  |  |  |  | void | 
| 446 | 0 |  |  |  |  |  | _Scanner<_CharT>:: | 
| 447 |  |  |  |  |  |  | _M_eat_class(char __ch) | 
| 448 |  |  |  |  |  |  | { | 
| 449 | 0 | 0 |  |  |  |  | for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;) | 
|  |  | 0 |  |  |  |  |  | 
| 450 | 0 |  |  |  |  |  | _M_value += *_M_current++; | 
| 451 | 0 | 0 |  |  |  |  | if (_M_current == _M_end | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 452 | 0 |  |  |  |  |  | || *_M_current++ != __ch | 
| 453 | 0 |  |  |  |  |  | || _M_current == _M_end // skip __ch | 
| 454 | 0 |  |  |  |  |  | || *_M_current++ != ']') // skip ']' | 
| 455 |  |  |  |  |  |  | { | 
| 456 | 0 | 0 |  |  |  |  | if (__ch == ':') | 
| 457 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_ctype); | 
| 458 |  |  |  |  |  |  | else | 
| 459 | 0 |  |  |  |  |  | __throw_regex_error(regex_constants::error_collate); | 
| 460 |  |  |  |  |  |  | } | 
| 461 | 0 |  |  |  |  |  | } | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | #ifdef _GLIBCXX_DEBUG | 
| 464 |  |  |  |  |  |  | template | 
| 465 |  |  |  |  |  |  | std::ostream& | 
| 466 |  |  |  |  |  |  | _Scanner<_CharT>:: | 
| 467 |  |  |  |  |  |  | _M_print(std::ostream& ostr) | 
| 468 |  |  |  |  |  |  | { | 
| 469 |  |  |  |  |  |  | switch (_M_token) | 
| 470 |  |  |  |  |  |  | { | 
| 471 |  |  |  |  |  |  | case _S_token_anychar: | 
| 472 |  |  |  |  |  |  | ostr << "any-character\n"; | 
| 473 |  |  |  |  |  |  | break; | 
| 474 |  |  |  |  |  |  | case _S_token_backref: | 
| 475 |  |  |  |  |  |  | ostr << "backref\n"; | 
| 476 |  |  |  |  |  |  | break; | 
| 477 |  |  |  |  |  |  | case _S_token_bracket_begin: | 
| 478 |  |  |  |  |  |  | ostr << "bracket-begin\n"; | 
| 479 |  |  |  |  |  |  | break; | 
| 480 |  |  |  |  |  |  | case _S_token_bracket_neg_begin: | 
| 481 |  |  |  |  |  |  | ostr << "bracket-neg-begin\n"; | 
| 482 |  |  |  |  |  |  | break; | 
| 483 |  |  |  |  |  |  | case _S_token_bracket_end: | 
| 484 |  |  |  |  |  |  | ostr << "bracket-end\n"; | 
| 485 |  |  |  |  |  |  | break; | 
| 486 |  |  |  |  |  |  | case _S_token_char_class_name: | 
| 487 |  |  |  |  |  |  | ostr << "char-class-name \"" << _M_value << "\"\n"; | 
| 488 |  |  |  |  |  |  | break; | 
| 489 |  |  |  |  |  |  | case _S_token_closure0: | 
| 490 |  |  |  |  |  |  | ostr << "closure0\n"; | 
| 491 |  |  |  |  |  |  | break; | 
| 492 |  |  |  |  |  |  | case _S_token_closure1: | 
| 493 |  |  |  |  |  |  | ostr << "closure1\n"; | 
| 494 |  |  |  |  |  |  | break; | 
| 495 |  |  |  |  |  |  | case _S_token_collsymbol: | 
| 496 |  |  |  |  |  |  | ostr << "collsymbol \"" << _M_value << "\"\n"; | 
| 497 |  |  |  |  |  |  | break; | 
| 498 |  |  |  |  |  |  | case _S_token_comma: | 
| 499 |  |  |  |  |  |  | ostr << "comma\n"; | 
| 500 |  |  |  |  |  |  | break; | 
| 501 |  |  |  |  |  |  | case _S_token_dup_count: | 
| 502 |  |  |  |  |  |  | ostr << "dup count: " << _M_value << "\n"; | 
| 503 |  |  |  |  |  |  | break; | 
| 504 |  |  |  |  |  |  | case _S_token_eof: | 
| 505 |  |  |  |  |  |  | ostr << "EOF\n"; | 
| 506 |  |  |  |  |  |  | break; | 
| 507 |  |  |  |  |  |  | case _S_token_equiv_class_name: | 
| 508 |  |  |  |  |  |  | ostr << "equiv-class-name \"" << _M_value << "\"\n"; | 
| 509 |  |  |  |  |  |  | break; | 
| 510 |  |  |  |  |  |  | case _S_token_interval_begin: | 
| 511 |  |  |  |  |  |  | ostr << "interval begin\n"; | 
| 512 |  |  |  |  |  |  | break; | 
| 513 |  |  |  |  |  |  | case _S_token_interval_end: | 
| 514 |  |  |  |  |  |  | ostr << "interval end\n"; | 
| 515 |  |  |  |  |  |  | break; | 
| 516 |  |  |  |  |  |  | case _S_token_line_begin: | 
| 517 |  |  |  |  |  |  | ostr << "line begin\n"; | 
| 518 |  |  |  |  |  |  | break; | 
| 519 |  |  |  |  |  |  | case _S_token_line_end: | 
| 520 |  |  |  |  |  |  | ostr << "line end\n"; | 
| 521 |  |  |  |  |  |  | break; | 
| 522 |  |  |  |  |  |  | case _S_token_opt: | 
| 523 |  |  |  |  |  |  | ostr << "opt\n"; | 
| 524 |  |  |  |  |  |  | break; | 
| 525 |  |  |  |  |  |  | case _S_token_or: | 
| 526 |  |  |  |  |  |  | ostr << "or\n"; | 
| 527 |  |  |  |  |  |  | break; | 
| 528 |  |  |  |  |  |  | case _S_token_ord_char: | 
| 529 |  |  |  |  |  |  | ostr << "ordinary character: \"" << _M_value << "\"\n"; | 
| 530 |  |  |  |  |  |  | break; | 
| 531 |  |  |  |  |  |  | case _S_token_subexpr_begin: | 
| 532 |  |  |  |  |  |  | ostr << "subexpr begin\n"; | 
| 533 |  |  |  |  |  |  | break; | 
| 534 |  |  |  |  |  |  | case _S_token_subexpr_no_group_begin: | 
| 535 |  |  |  |  |  |  | ostr << "no grouping subexpr begin\n"; | 
| 536 |  |  |  |  |  |  | break; | 
| 537 |  |  |  |  |  |  | case _S_token_subexpr_lookahead_begin: | 
| 538 |  |  |  |  |  |  | ostr << "lookahead subexpr begin\n"; | 
| 539 |  |  |  |  |  |  | break; | 
| 540 |  |  |  |  |  |  | case _S_token_subexpr_end: | 
| 541 |  |  |  |  |  |  | ostr << "subexpr end\n"; | 
| 542 |  |  |  |  |  |  | break; | 
| 543 |  |  |  |  |  |  | case _S_token_unknown: | 
| 544 |  |  |  |  |  |  | ostr << "-- unknown token --\n"; | 
| 545 |  |  |  |  |  |  | break; | 
| 546 |  |  |  |  |  |  | case _S_token_oct_num: | 
| 547 |  |  |  |  |  |  | ostr << "oct number " << _M_value << "\n"; | 
| 548 |  |  |  |  |  |  | break; | 
| 549 |  |  |  |  |  |  | case _S_token_hex_num: | 
| 550 |  |  |  |  |  |  | ostr << "hex number " << _M_value << "\n"; | 
| 551 |  |  |  |  |  |  | break; | 
| 552 |  |  |  |  |  |  | case _S_token_quoted_class: | 
| 553 |  |  |  |  |  |  | ostr << "quoted class " << "\\" << _M_value << "\n"; | 
| 554 |  |  |  |  |  |  | break; | 
| 555 |  |  |  |  |  |  | default: | 
| 556 |  |  |  |  |  |  | _GLIBCXX_DEBUG_ASSERT(false); | 
| 557 |  |  |  |  |  |  | } | 
| 558 |  |  |  |  |  |  | return ostr; | 
| 559 |  |  |  |  |  |  | } | 
| 560 |  |  |  |  |  |  | #endif | 
| 561 |  |  |  |  |  |  |  | 
| 562 |  |  |  |  |  |  | _GLIBCXX_END_NAMESPACE_VERSION | 
| 563 |  |  |  |  |  |  | } // namespace __detail | 
| 564 |  |  |  |  |  |  | } // namespace |