line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
// class template regex -*- C++ -*- |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
// Copyright (C) 2013-2015 Free Software Foundation, Inc. |
4
|
|
|
|
|
|
|
// |
5
|
|
|
|
|
|
|
// This file is part of the GNU ISO C++ Library. This library is free |
6
|
|
|
|
|
|
|
// software; you can redistribute it and/or modify it under the |
7
|
|
|
|
|
|
|
// terms of the GNU General Public License as published by the |
8
|
|
|
|
|
|
|
// Free Software Foundation; either version 3, or (at your option) |
9
|
|
|
|
|
|
|
// any later version. |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
// This library is distributed in the hope that it will be useful, |
12
|
|
|
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
13
|
|
|
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14
|
|
|
|
|
|
|
// GNU General Public License for more details. |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
// Under Section 7 of GPL version 3, you are granted additional |
17
|
|
|
|
|
|
|
// permissions described in the GCC Runtime Library Exception, version |
18
|
|
|
|
|
|
|
// 3.1, as published by the Free Software Foundation. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
// You should have received a copy of the GNU General Public License and |
21
|
|
|
|
|
|
|
// a copy of the GCC Runtime Library Exception along with this program; |
22
|
|
|
|
|
|
|
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
23
|
|
|
|
|
|
|
// . |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
/** |
26
|
|
|
|
|
|
|
* @file bits/regex_scanner.tcc |
27
|
|
|
|
|
|
|
* This is an internal header file, included by other library headers. |
28
|
|
|
|
|
|
|
* Do not attempt to use it directly. @headername{regex} |
29
|
|
|
|
|
|
|
*/ |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
// FIXME make comments doxygen format. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
// N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep |
34
|
|
|
|
|
|
|
// and awk |
35
|
|
|
|
|
|
|
// 1) grep is basic except '\n' is treated as '|' |
36
|
|
|
|
|
|
|
// 2) egrep is extended except '\n' is treated as '|' |
37
|
|
|
|
|
|
|
// 3) awk is extended except special escaping rules, and there's no |
38
|
|
|
|
|
|
|
// back-reference. |
39
|
|
|
|
|
|
|
// |
40
|
|
|
|
|
|
|
// References: |
41
|
|
|
|
|
|
|
// |
42
|
|
|
|
|
|
|
// ECMAScript: ECMA-262 15.10 |
43
|
|
|
|
|
|
|
// |
44
|
|
|
|
|
|
|
// basic, extended: |
45
|
|
|
|
|
|
|
// http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html |
46
|
|
|
|
|
|
|
// |
47
|
|
|
|
|
|
|
// awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
namespace std _GLIBCXX_VISIBILITY(default) |
50
|
|
|
|
|
|
|
{ |
51
|
|
|
|
|
|
|
namespace __detail |
52
|
|
|
|
|
|
|
{ |
53
|
|
|
|
|
|
|
_GLIBCXX_BEGIN_NAMESPACE_VERSION |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
template |
56
|
0
|
|
|
|
|
|
_Scanner<_CharT>:: |
57
|
|
|
|
|
|
|
_Scanner(typename _Scanner::_IterT __begin, |
58
|
|
|
|
|
|
|
typename _Scanner::_IterT __end, |
59
|
|
|
|
|
|
|
_FlagT __flags, std::locale __loc) |
60
|
|
|
|
|
|
|
: _ScannerBase(__flags), |
61
|
|
|
|
|
|
|
_M_current(__begin), _M_end(__end), |
62
|
0
|
|
|
|
|
|
_M_ctype(std::use_facet<_CtypeT>(__loc)), |
63
|
|
|
|
|
|
|
_M_eat_escape(_M_is_ecma() |
64
|
0
|
|
|
|
|
|
? &_Scanner::_M_eat_escape_ecma |
65
|
0
|
0
|
|
|
|
|
: &_Scanner::_M_eat_escape_posix) |
66
|
0
|
0
|
|
|
|
|
{ _M_advance(); } |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
template |
69
|
|
|
|
|
|
|
void |
70
|
0
|
|
|
|
|
|
_Scanner<_CharT>:: |
71
|
|
|
|
|
|
|
_M_advance() |
72
|
|
|
|
|
|
|
{ |
73
|
0
|
0
|
|
|
|
|
if (_M_current == _M_end) |
74
|
|
|
|
|
|
|
{ |
75
|
0
|
|
|
|
|
|
_M_token = _S_token_eof; |
76
|
0
|
|
|
|
|
|
return; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
0
|
0
|
|
|
|
|
if (_M_state == _S_state_normal) |
80
|
0
|
|
|
|
|
|
_M_scan_normal(); |
81
|
0
|
0
|
|
|
|
|
else if (_M_state == _S_state_in_bracket) |
82
|
0
|
|
|
|
|
|
_M_scan_in_bracket(); |
83
|
0
|
0
|
|
|
|
|
else if (_M_state == _S_state_in_brace) |
84
|
0
|
|
|
|
|
|
_M_scan_in_brace(); |
85
|
|
|
|
|
|
|
else |
86
|
|
|
|
|
|
|
{ |
87
|
|
|
|
|
|
|
_GLIBCXX_DEBUG_ASSERT(false); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
// Differences between styles: |
92
|
|
|
|
|
|
|
// 1) "\(", "\)", "\{" in basic. It's not escaping. |
93
|
|
|
|
|
|
|
// 2) "(?:", "(?=", "(?!" in ECMAScript. |
94
|
|
|
|
|
|
|
template |
95
|
|
|
|
|
|
|
void |
96
|
0
|
|
|
|
|
|
_Scanner<_CharT>:: |
97
|
|
|
|
|
|
|
_M_scan_normal() |
98
|
|
|
|
|
|
|
{ |
99
|
0
|
|
|
|
|
|
auto __c = *_M_current++; |
100
|
|
|
|
|
|
|
|
101
|
0
|
0
|
|
|
|
|
if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr) |
102
|
|
|
|
|
|
|
{ |
103
|
0
|
|
|
|
|
|
_M_token = _S_token_ord_char; |
104
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
105
|
0
|
|
|
|
|
|
return; |
106
|
|
|
|
|
|
|
} |
107
|
0
|
0
|
|
|
|
|
if (__c == '\\') |
108
|
|
|
|
|
|
|
{ |
109
|
0
|
0
|
|
|
|
|
if (_M_current == _M_end) |
110
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_escape); |
111
|
|
|
|
|
|
|
|
112
|
0
|
0
|
|
|
|
|
if (!_M_is_basic() |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
113
|
0
|
|
|
|
|
|
|| (*_M_current != '(' |
114
|
0
|
|
|
|
|
|
&& *_M_current != ')' |
115
|
0
|
|
|
|
|
|
&& *_M_current != '{')) |
116
|
|
|
|
|
|
|
{ |
117
|
0
|
0
|
|
|
|
|
(this->*_M_eat_escape)(); |
118
|
0
|
|
|
|
|
|
return; |
119
|
|
|
|
|
|
|
} |
120
|
0
|
|
|
|
|
|
__c = *_M_current++; |
121
|
|
|
|
|
|
|
} |
122
|
0
|
0
|
|
|
|
|
if (__c == '(') |
123
|
|
|
|
|
|
|
{ |
124
|
0
|
0
|
|
|
|
|
if (_M_is_ecma() && *_M_current == '?') |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
125
|
|
|
|
|
|
|
{ |
126
|
0
|
0
|
|
|
|
|
if (++_M_current == _M_end) |
127
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_paren); |
128
|
|
|
|
|
|
|
|
129
|
0
|
0
|
|
|
|
|
if (*_M_current == ':') |
130
|
|
|
|
|
|
|
{ |
131
|
0
|
|
|
|
|
|
++_M_current; |
132
|
0
|
|
|
|
|
|
_M_token = _S_token_subexpr_no_group_begin; |
133
|
|
|
|
|
|
|
} |
134
|
0
|
0
|
|
|
|
|
else if (*_M_current == '=') |
135
|
|
|
|
|
|
|
{ |
136
|
0
|
|
|
|
|
|
++_M_current; |
137
|
0
|
|
|
|
|
|
_M_token = _S_token_subexpr_lookahead_begin; |
138
|
0
|
|
|
|
|
|
_M_value.assign(1, 'p'); |
139
|
|
|
|
|
|
|
} |
140
|
0
|
0
|
|
|
|
|
else if (*_M_current == '!') |
141
|
|
|
|
|
|
|
{ |
142
|
0
|
|
|
|
|
|
++_M_current; |
143
|
0
|
|
|
|
|
|
_M_token = _S_token_subexpr_lookahead_begin; |
144
|
0
|
|
|
|
|
|
_M_value.assign(1, 'n'); |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
else |
147
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_paren); |
148
|
|
|
|
|
|
|
} |
149
|
0
|
0
|
|
|
|
|
else if (_M_flags & regex_constants::nosubs) |
150
|
0
|
|
|
|
|
|
_M_token = _S_token_subexpr_no_group_begin; |
151
|
|
|
|
|
|
|
else |
152
|
0
|
|
|
|
|
|
_M_token = _S_token_subexpr_begin; |
153
|
|
|
|
|
|
|
} |
154
|
0
|
0
|
|
|
|
|
else if (__c == ')') |
155
|
0
|
|
|
|
|
|
_M_token = _S_token_subexpr_end; |
156
|
0
|
0
|
|
|
|
|
else if (__c == '[') |
157
|
|
|
|
|
|
|
{ |
158
|
0
|
|
|
|
|
|
_M_state = _S_state_in_bracket; |
159
|
0
|
|
|
|
|
|
_M_at_bracket_start = true; |
160
|
0
|
0
|
|
|
|
|
if (_M_current != _M_end && *_M_current == '^') |
|
|
0
|
|
|
|
|
|
161
|
|
|
|
|
|
|
{ |
162
|
0
|
|
|
|
|
|
_M_token = _S_token_bracket_neg_begin; |
163
|
0
|
|
|
|
|
|
++_M_current; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
else |
166
|
0
|
|
|
|
|
|
_M_token = _S_token_bracket_begin; |
167
|
|
|
|
|
|
|
} |
168
|
0
|
0
|
|
|
|
|
else if (__c == '{') |
169
|
|
|
|
|
|
|
{ |
170
|
0
|
|
|
|
|
|
_M_state = _S_state_in_brace; |
171
|
0
|
|
|
|
|
|
_M_token = _S_token_interval_begin; |
172
|
|
|
|
|
|
|
} |
173
|
0
|
0
|
|
|
|
|
else if (__c != ']' && __c != '}') |
|
|
0
|
|
|
|
|
|
174
|
|
|
|
|
|
|
{ |
175
|
0
|
|
|
|
|
|
auto __it = _M_token_tbl; |
176
|
0
|
|
|
|
|
|
auto __narrowc = _M_ctype.narrow(__c, '\0'); |
177
|
0
|
0
|
|
|
|
|
for (; __it->first != '\0'; ++__it) |
178
|
0
|
0
|
|
|
|
|
if (__it->first == __narrowc) |
179
|
|
|
|
|
|
|
{ |
180
|
0
|
|
|
|
|
|
_M_token = __it->second; |
181
|
0
|
|
|
|
|
|
return; |
182
|
|
|
|
|
|
|
} |
183
|
0
|
|
|
|
|
|
_GLIBCXX_DEBUG_ASSERT(false); |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
else |
186
|
|
|
|
|
|
|
{ |
187
|
0
|
|
|
|
|
|
_M_token = _S_token_ord_char; |
188
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
// Differences between styles: |
193
|
|
|
|
|
|
|
// 1) different semantics of "[]" and "[^]". |
194
|
|
|
|
|
|
|
// 2) Escaping in bracket expr. |
195
|
|
|
|
|
|
|
template |
196
|
|
|
|
|
|
|
void |
197
|
0
|
|
|
|
|
|
_Scanner<_CharT>:: |
198
|
|
|
|
|
|
|
_M_scan_in_bracket() |
199
|
|
|
|
|
|
|
{ |
200
|
0
|
0
|
|
|
|
|
if (_M_current == _M_end) |
201
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_brack); |
202
|
|
|
|
|
|
|
|
203
|
0
|
|
|
|
|
|
auto __c = *_M_current++; |
204
|
|
|
|
|
|
|
|
205
|
0
|
0
|
|
|
|
|
if (__c == '[') |
206
|
|
|
|
|
|
|
{ |
207
|
0
|
0
|
|
|
|
|
if (_M_current == _M_end) |
208
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_brack); |
209
|
|
|
|
|
|
|
|
210
|
0
|
0
|
|
|
|
|
if (*_M_current == '.') |
211
|
|
|
|
|
|
|
{ |
212
|
0
|
|
|
|
|
|
_M_token = _S_token_collsymbol; |
213
|
0
|
|
|
|
|
|
_M_eat_class(*_M_current++); |
214
|
|
|
|
|
|
|
} |
215
|
0
|
0
|
|
|
|
|
else if (*_M_current == ':') |
216
|
|
|
|
|
|
|
{ |
217
|
0
|
|
|
|
|
|
_M_token = _S_token_char_class_name; |
218
|
0
|
|
|
|
|
|
_M_eat_class(*_M_current++); |
219
|
|
|
|
|
|
|
} |
220
|
0
|
0
|
|
|
|
|
else if (*_M_current == '=') |
221
|
|
|
|
|
|
|
{ |
222
|
0
|
|
|
|
|
|
_M_token = _S_token_equiv_class_name; |
223
|
0
|
|
|
|
|
|
_M_eat_class(*_M_current++); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
else |
226
|
|
|
|
|
|
|
{ |
227
|
0
|
|
|
|
|
|
_M_token = _S_token_ord_char; |
228
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
// In POSIX, when encountering "[]" or "[^]", the ']' is interpreted |
232
|
|
|
|
|
|
|
// literally. So "[]]" and "[^]]" are valid regexes. See the testcases |
233
|
|
|
|
|
|
|
// `*/empty_range.cc`. |
234
|
0
|
0
|
|
|
|
|
else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start)) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
235
|
|
|
|
|
|
|
{ |
236
|
0
|
|
|
|
|
|
_M_token = _S_token_bracket_end; |
237
|
0
|
|
|
|
|
|
_M_state = _S_state_normal; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
// ECMAScript and awk permits escaping in bracket. |
240
|
0
|
0
|
|
|
|
|
else if (__c == '\\' && (_M_is_ecma() || _M_is_awk())) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
241
|
0
|
0
|
|
|
|
|
(this->*_M_eat_escape)(); |
242
|
|
|
|
|
|
|
else |
243
|
|
|
|
|
|
|
{ |
244
|
0
|
|
|
|
|
|
_M_token = _S_token_ord_char; |
245
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
246
|
|
|
|
|
|
|
} |
247
|
0
|
|
|
|
|
|
_M_at_bracket_start = false; |
248
|
0
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
// Differences between styles: |
251
|
|
|
|
|
|
|
// 1) "\}" in basic style. |
252
|
|
|
|
|
|
|
template |
253
|
|
|
|
|
|
|
void |
254
|
0
|
|
|
|
|
|
_Scanner<_CharT>:: |
255
|
|
|
|
|
|
|
_M_scan_in_brace() |
256
|
|
|
|
|
|
|
{ |
257
|
0
|
0
|
|
|
|
|
if (_M_current == _M_end) |
258
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_brace); |
259
|
|
|
|
|
|
|
|
260
|
0
|
|
|
|
|
|
auto __c = *_M_current++; |
261
|
|
|
|
|
|
|
|
262
|
0
|
0
|
|
|
|
|
if (_M_ctype.is(_CtypeT::digit, __c)) |
263
|
|
|
|
|
|
|
{ |
264
|
0
|
|
|
|
|
|
_M_token = _S_token_dup_count; |
265
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
266
|
0
|
0
|
|
|
|
|
while (_M_current != _M_end |
267
|
0
|
|
|
|
|
|
&& _M_ctype.is(_CtypeT::digit, *_M_current)) |
268
|
0
|
|
|
|
|
|
_M_value += *_M_current++; |
269
|
|
|
|
|
|
|
} |
270
|
0
|
0
|
|
|
|
|
else if (__c == ',') |
271
|
0
|
|
|
|
|
|
_M_token = _S_token_comma; |
272
|
|
|
|
|
|
|
// basic use \}. |
273
|
0
|
0
|
|
|
|
|
else if (_M_is_basic()) |
274
|
|
|
|
|
|
|
{ |
275
|
0
|
0
|
|
|
|
|
if (__c == '\\' && _M_current != _M_end && *_M_current == '}') |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
276
|
|
|
|
|
|
|
{ |
277
|
0
|
|
|
|
|
|
_M_state = _S_state_normal; |
278
|
0
|
|
|
|
|
|
_M_token = _S_token_interval_end; |
279
|
0
|
|
|
|
|
|
++_M_current; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
else |
282
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_badbrace); |
283
|
|
|
|
|
|
|
} |
284
|
0
|
0
|
|
|
|
|
else if (__c == '}') |
285
|
|
|
|
|
|
|
{ |
286
|
0
|
|
|
|
|
|
_M_state = _S_state_normal; |
287
|
0
|
|
|
|
|
|
_M_token = _S_token_interval_end; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
else |
290
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_badbrace); |
291
|
0
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
template |
294
|
|
|
|
|
|
|
void |
295
|
0
|
|
|
|
|
|
_Scanner<_CharT>:: |
296
|
|
|
|
|
|
|
_M_eat_escape_ecma() |
297
|
|
|
|
|
|
|
{ |
298
|
0
|
0
|
|
|
|
|
if (_M_current == _M_end) |
299
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_escape); |
300
|
|
|
|
|
|
|
|
301
|
0
|
|
|
|
|
|
auto __c = *_M_current++; |
302
|
0
|
|
|
|
|
|
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); |
303
|
|
|
|
|
|
|
|
304
|
0
|
0
|
|
|
|
|
if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket)) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
305
|
|
|
|
|
|
|
{ |
306
|
0
|
|
|
|
|
|
_M_token = _S_token_ord_char; |
307
|
0
|
|
|
|
|
|
_M_value.assign(1, *__pos); |
308
|
|
|
|
|
|
|
} |
309
|
0
|
0
|
|
|
|
|
else if (__c == 'b') |
310
|
|
|
|
|
|
|
{ |
311
|
0
|
|
|
|
|
|
_M_token = _S_token_word_bound; |
312
|
0
|
|
|
|
|
|
_M_value.assign(1, 'p'); |
313
|
|
|
|
|
|
|
} |
314
|
0
|
0
|
|
|
|
|
else if (__c == 'B') |
315
|
|
|
|
|
|
|
{ |
316
|
0
|
|
|
|
|
|
_M_token = _S_token_word_bound; |
317
|
0
|
|
|
|
|
|
_M_value.assign(1, 'n'); |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
// N3376 28.13 |
320
|
0
|
0
|
|
|
|
|
else if (__c == 'd' |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
321
|
|
|
|
|
|
|
|| __c == 'D' |
322
|
|
|
|
|
|
|
|| __c == 's' |
323
|
|
|
|
|
|
|
|| __c == 'S' |
324
|
|
|
|
|
|
|
|| __c == 'w' |
325
|
|
|
|
|
|
|
|| __c == 'W') |
326
|
|
|
|
|
|
|
{ |
327
|
0
|
|
|
|
|
|
_M_token = _S_token_quoted_class; |
328
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
329
|
|
|
|
|
|
|
} |
330
|
0
|
0
|
|
|
|
|
else if (__c == 'c') |
331
|
|
|
|
|
|
|
{ |
332
|
0
|
0
|
|
|
|
|
if (_M_current == _M_end) |
333
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_escape); |
334
|
0
|
|
|
|
|
|
_M_token = _S_token_ord_char; |
335
|
0
|
|
|
|
|
|
_M_value.assign(1, *_M_current++); |
336
|
|
|
|
|
|
|
} |
337
|
0
|
0
|
|
|
|
|
else if (__c == 'x' || __c == 'u') |
|
|
0
|
|
|
|
|
|
338
|
|
|
|
|
|
|
{ |
339
|
0
|
|
|
|
|
|
_M_value.erase(); |
340
|
0
|
0
|
|
|
|
|
for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++) |
|
|
0
|
|
|
|
|
|
341
|
|
|
|
|
|
|
{ |
342
|
0
|
0
|
|
|
|
|
if (_M_current == _M_end |
343
|
0
|
|
|
|
|
|
|| !_M_ctype.is(_CtypeT::xdigit, *_M_current)) |
344
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_escape); |
345
|
0
|
|
|
|
|
|
_M_value += *_M_current++; |
346
|
|
|
|
|
|
|
} |
347
|
0
|
|
|
|
|
|
_M_token = _S_token_hex_num; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
// ECMAScript recognizes multi-digit back-references. |
350
|
0
|
0
|
|
|
|
|
else if (_M_ctype.is(_CtypeT::digit, __c)) |
351
|
|
|
|
|
|
|
{ |
352
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
353
|
0
|
0
|
|
|
|
|
while (_M_current != _M_end |
354
|
0
|
|
|
|
|
|
&& _M_ctype.is(_CtypeT::digit, *_M_current)) |
355
|
0
|
|
|
|
|
|
_M_value += *_M_current++; |
356
|
0
|
|
|
|
|
|
_M_token = _S_token_backref; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
else |
359
|
|
|
|
|
|
|
{ |
360
|
0
|
|
|
|
|
|
_M_token = _S_token_ord_char; |
361
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
362
|
|
|
|
|
|
|
} |
363
|
0
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
// Differences between styles: |
366
|
|
|
|
|
|
|
// 1) Extended doesn't support backref, but basic does. |
367
|
|
|
|
|
|
|
template |
368
|
|
|
|
|
|
|
void |
369
|
0
|
|
|
|
|
|
_Scanner<_CharT>:: |
370
|
|
|
|
|
|
|
_M_eat_escape_posix() |
371
|
|
|
|
|
|
|
{ |
372
|
0
|
0
|
|
|
|
|
if (_M_current == _M_end) |
373
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_escape); |
374
|
|
|
|
|
|
|
|
375
|
0
|
|
|
|
|
|
auto __c = *_M_current; |
376
|
0
|
|
|
|
|
|
auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')); |
377
|
|
|
|
|
|
|
|
378
|
0
|
0
|
|
|
|
|
if (__pos != nullptr && *__pos != '\0') |
|
|
0
|
|
|
|
|
|
379
|
|
|
|
|
|
|
{ |
380
|
0
|
|
|
|
|
|
_M_token = _S_token_ord_char; |
381
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
// We MUST judge awk before handling backrefs. There's no backref in awk. |
384
|
0
|
0
|
|
|
|
|
else if (_M_is_awk()) |
385
|
|
|
|
|
|
|
{ |
386
|
0
|
|
|
|
|
|
_M_eat_escape_awk(); |
387
|
0
|
|
|
|
|
|
return; |
388
|
|
|
|
|
|
|
} |
389
|
0
|
0
|
|
|
|
|
else if (_M_is_basic() && _M_ctype.is(_CtypeT::digit, __c) && __c != '0') |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
390
|
|
|
|
|
|
|
{ |
391
|
0
|
|
|
|
|
|
_M_token = _S_token_backref; |
392
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
else |
395
|
|
|
|
|
|
|
{ |
396
|
|
|
|
|
|
|
#ifdef __STRICT_ANSI__ |
397
|
|
|
|
|
|
|
// POSIX says it is undefined to escape ordinary characters |
398
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_escape); |
399
|
|
|
|
|
|
|
#else |
400
|
|
|
|
|
|
|
_M_token = _S_token_ord_char; |
401
|
|
|
|
|
|
|
_M_value.assign(1, __c); |
402
|
|
|
|
|
|
|
#endif |
403
|
|
|
|
|
|
|
} |
404
|
0
|
|
|
|
|
|
++_M_current; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
template |
408
|
|
|
|
|
|
|
void |
409
|
0
|
|
|
|
|
|
_Scanner<_CharT>:: |
410
|
|
|
|
|
|
|
_M_eat_escape_awk() |
411
|
|
|
|
|
|
|
{ |
412
|
0
|
|
|
|
|
|
auto __c = *_M_current++; |
413
|
0
|
|
|
|
|
|
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); |
414
|
|
|
|
|
|
|
|
415
|
0
|
0
|
|
|
|
|
if (__pos != nullptr) |
416
|
|
|
|
|
|
|
{ |
417
|
0
|
|
|
|
|
|
_M_token = _S_token_ord_char; |
418
|
0
|
|
|
|
|
|
_M_value.assign(1, *__pos); |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
// \ddd for oct representation |
421
|
0
|
0
|
|
|
|
|
else if (_M_ctype.is(_CtypeT::digit, __c) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
422
|
0
|
|
|
|
|
|
&& __c != '8' |
423
|
|
|
|
|
|
|
&& __c != '9') |
424
|
|
|
|
|
|
|
{ |
425
|
0
|
|
|
|
|
|
_M_value.assign(1, __c); |
426
|
0
|
0
|
|
|
|
|
for (int __i = 0; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
427
|
|
|
|
|
|
|
__i < 2 |
428
|
0
|
|
|
|
|
|
&& _M_current != _M_end |
429
|
0
|
|
|
|
|
|
&& _M_ctype.is(_CtypeT::digit, *_M_current) |
430
|
0
|
|
|
|
|
|
&& *_M_current != '8' |
431
|
0
|
|
|
|
|
|
&& *_M_current != '9'; |
432
|
|
|
|
|
|
|
__i++) |
433
|
0
|
|
|
|
|
|
_M_value += *_M_current++; |
434
|
0
|
|
|
|
|
|
_M_token = _S_token_oct_num; |
435
|
0
|
|
|
|
|
|
return; |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
else |
438
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_escape); |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
// Eats a character class or throws an exception. |
442
|
|
|
|
|
|
|
// __ch could be ':', '.' or '=', _M_current is the char after ']' when |
443
|
|
|
|
|
|
|
// returning. |
444
|
|
|
|
|
|
|
template |
445
|
|
|
|
|
|
|
void |
446
|
0
|
|
|
|
|
|
_Scanner<_CharT>:: |
447
|
|
|
|
|
|
|
_M_eat_class(char __ch) |
448
|
|
|
|
|
|
|
{ |
449
|
0
|
0
|
|
|
|
|
for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;) |
|
|
0
|
|
|
|
|
|
450
|
0
|
|
|
|
|
|
_M_value += *_M_current++; |
451
|
0
|
0
|
|
|
|
|
if (_M_current == _M_end |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
452
|
0
|
|
|
|
|
|
|| *_M_current++ != __ch |
453
|
0
|
|
|
|
|
|
|| _M_current == _M_end // skip __ch |
454
|
0
|
|
|
|
|
|
|| *_M_current++ != ']') // skip ']' |
455
|
|
|
|
|
|
|
{ |
456
|
0
|
0
|
|
|
|
|
if (__ch == ':') |
457
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_ctype); |
458
|
|
|
|
|
|
|
else |
459
|
0
|
|
|
|
|
|
__throw_regex_error(regex_constants::error_collate); |
460
|
|
|
|
|
|
|
} |
461
|
0
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
#ifdef _GLIBCXX_DEBUG |
464
|
|
|
|
|
|
|
template |
465
|
|
|
|
|
|
|
std::ostream& |
466
|
|
|
|
|
|
|
_Scanner<_CharT>:: |
467
|
|
|
|
|
|
|
_M_print(std::ostream& ostr) |
468
|
|
|
|
|
|
|
{ |
469
|
|
|
|
|
|
|
switch (_M_token) |
470
|
|
|
|
|
|
|
{ |
471
|
|
|
|
|
|
|
case _S_token_anychar: |
472
|
|
|
|
|
|
|
ostr << "any-character\n"; |
473
|
|
|
|
|
|
|
break; |
474
|
|
|
|
|
|
|
case _S_token_backref: |
475
|
|
|
|
|
|
|
ostr << "backref\n"; |
476
|
|
|
|
|
|
|
break; |
477
|
|
|
|
|
|
|
case _S_token_bracket_begin: |
478
|
|
|
|
|
|
|
ostr << "bracket-begin\n"; |
479
|
|
|
|
|
|
|
break; |
480
|
|
|
|
|
|
|
case _S_token_bracket_neg_begin: |
481
|
|
|
|
|
|
|
ostr << "bracket-neg-begin\n"; |
482
|
|
|
|
|
|
|
break; |
483
|
|
|
|
|
|
|
case _S_token_bracket_end: |
484
|
|
|
|
|
|
|
ostr << "bracket-end\n"; |
485
|
|
|
|
|
|
|
break; |
486
|
|
|
|
|
|
|
case _S_token_char_class_name: |
487
|
|
|
|
|
|
|
ostr << "char-class-name \"" << _M_value << "\"\n"; |
488
|
|
|
|
|
|
|
break; |
489
|
|
|
|
|
|
|
case _S_token_closure0: |
490
|
|
|
|
|
|
|
ostr << "closure0\n"; |
491
|
|
|
|
|
|
|
break; |
492
|
|
|
|
|
|
|
case _S_token_closure1: |
493
|
|
|
|
|
|
|
ostr << "closure1\n"; |
494
|
|
|
|
|
|
|
break; |
495
|
|
|
|
|
|
|
case _S_token_collsymbol: |
496
|
|
|
|
|
|
|
ostr << "collsymbol \"" << _M_value << "\"\n"; |
497
|
|
|
|
|
|
|
break; |
498
|
|
|
|
|
|
|
case _S_token_comma: |
499
|
|
|
|
|
|
|
ostr << "comma\n"; |
500
|
|
|
|
|
|
|
break; |
501
|
|
|
|
|
|
|
case _S_token_dup_count: |
502
|
|
|
|
|
|
|
ostr << "dup count: " << _M_value << "\n"; |
503
|
|
|
|
|
|
|
break; |
504
|
|
|
|
|
|
|
case _S_token_eof: |
505
|
|
|
|
|
|
|
ostr << "EOF\n"; |
506
|
|
|
|
|
|
|
break; |
507
|
|
|
|
|
|
|
case _S_token_equiv_class_name: |
508
|
|
|
|
|
|
|
ostr << "equiv-class-name \"" << _M_value << "\"\n"; |
509
|
|
|
|
|
|
|
break; |
510
|
|
|
|
|
|
|
case _S_token_interval_begin: |
511
|
|
|
|
|
|
|
ostr << "interval begin\n"; |
512
|
|
|
|
|
|
|
break; |
513
|
|
|
|
|
|
|
case _S_token_interval_end: |
514
|
|
|
|
|
|
|
ostr << "interval end\n"; |
515
|
|
|
|
|
|
|
break; |
516
|
|
|
|
|
|
|
case _S_token_line_begin: |
517
|
|
|
|
|
|
|
ostr << "line begin\n"; |
518
|
|
|
|
|
|
|
break; |
519
|
|
|
|
|
|
|
case _S_token_line_end: |
520
|
|
|
|
|
|
|
ostr << "line end\n"; |
521
|
|
|
|
|
|
|
break; |
522
|
|
|
|
|
|
|
case _S_token_opt: |
523
|
|
|
|
|
|
|
ostr << "opt\n"; |
524
|
|
|
|
|
|
|
break; |
525
|
|
|
|
|
|
|
case _S_token_or: |
526
|
|
|
|
|
|
|
ostr << "or\n"; |
527
|
|
|
|
|
|
|
break; |
528
|
|
|
|
|
|
|
case _S_token_ord_char: |
529
|
|
|
|
|
|
|
ostr << "ordinary character: \"" << _M_value << "\"\n"; |
530
|
|
|
|
|
|
|
break; |
531
|
|
|
|
|
|
|
case _S_token_subexpr_begin: |
532
|
|
|
|
|
|
|
ostr << "subexpr begin\n"; |
533
|
|
|
|
|
|
|
break; |
534
|
|
|
|
|
|
|
case _S_token_subexpr_no_group_begin: |
535
|
|
|
|
|
|
|
ostr << "no grouping subexpr begin\n"; |
536
|
|
|
|
|
|
|
break; |
537
|
|
|
|
|
|
|
case _S_token_subexpr_lookahead_begin: |
538
|
|
|
|
|
|
|
ostr << "lookahead subexpr begin\n"; |
539
|
|
|
|
|
|
|
break; |
540
|
|
|
|
|
|
|
case _S_token_subexpr_end: |
541
|
|
|
|
|
|
|
ostr << "subexpr end\n"; |
542
|
|
|
|
|
|
|
break; |
543
|
|
|
|
|
|
|
case _S_token_unknown: |
544
|
|
|
|
|
|
|
ostr << "-- unknown token --\n"; |
545
|
|
|
|
|
|
|
break; |
546
|
|
|
|
|
|
|
case _S_token_oct_num: |
547
|
|
|
|
|
|
|
ostr << "oct number " << _M_value << "\n"; |
548
|
|
|
|
|
|
|
break; |
549
|
|
|
|
|
|
|
case _S_token_hex_num: |
550
|
|
|
|
|
|
|
ostr << "hex number " << _M_value << "\n"; |
551
|
|
|
|
|
|
|
break; |
552
|
|
|
|
|
|
|
case _S_token_quoted_class: |
553
|
|
|
|
|
|
|
ostr << "quoted class " << "\\" << _M_value << "\n"; |
554
|
|
|
|
|
|
|
break; |
555
|
|
|
|
|
|
|
default: |
556
|
|
|
|
|
|
|
_GLIBCXX_DEBUG_ASSERT(false); |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
return ostr; |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
#endif |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
_GLIBCXX_END_NAMESPACE_VERSION |
563
|
|
|
|
|
|
|
} // namespace __detail |
564
|
|
|
|
|
|
|
} // namespace |