| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package DMS::Parser; |
|
2
|
|
|
|
|
|
|
# DMS parser — Perl port of the Rust reference. |
|
3
|
|
|
|
|
|
|
# Public (SPEC v0.14): DMS::Parser::decode($src) -> hashref/arrayref/scalar/blessed-DT. |
|
4
|
|
|
|
|
|
|
# The legacy spelling DMS::Parser::parse($src) is retained as a deprecated |
|
5
|
|
|
|
|
|
|
# alias and emits a one-time Carp::carp warning per process. SPEC §Decode/Encode. |
|
6
|
|
|
|
|
|
|
|
|
7
|
3
|
|
|
3
|
|
367385
|
use strict; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
111
|
|
|
8
|
3
|
|
|
3
|
|
32
|
use warnings; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
161
|
|
|
9
|
3
|
|
|
3
|
|
1557
|
use utf8; |
|
|
3
|
|
|
|
|
956
|
|
|
|
3
|
|
|
|
|
18
|
|
|
10
|
3
|
|
|
3
|
|
97
|
use Carp (); |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
91869
|
|
|
11
|
|
|
|
|
|
|
# Tie::IxHash is loaded lazily (only when non-lite mode actually |
|
12
|
|
|
|
|
|
|
# constructs a tied table) — saves ~30ms of startup and avoids the |
|
13
|
|
|
|
|
|
|
# import entirely on the lite/encoder hot path. |
|
14
|
|
|
|
|
|
|
# Several historical imports (POSIX qw(floor), Math::BigInt, Encode) |
|
15
|
|
|
|
|
|
|
# have been removed: floor is unused; integer parsing now hand-rolls |
|
16
|
|
|
|
|
|
|
# i64 on native IVs (no BigInt); UTF-8 work goes through utf8::is_utf8 |
|
17
|
|
|
|
|
|
|
# / utf8::decode core builtins (no Encode). Importing those modules |
|
18
|
|
|
|
|
|
|
# only at need keeps DMS::Parser load time low — material on small |
|
19
|
|
|
|
|
|
|
# CLI invocations like the conformance encoder. |
|
20
|
|
|
|
|
|
|
# Unicode::Normalize is loaded lazily — pure-ASCII source short-circuits |
|
21
|
|
|
|
|
|
|
# the NFC pass, so most CLI invocations never need the module at all. |
|
22
|
0
|
|
|
0
|
|
0
|
sub _NFC { require Unicode::Normalize; *_NFC = \&Unicode::Normalize::NFC; goto &_NFC } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our $VERSION = '0.5.3'; |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# Capability flag — this port ships lite-mode decode + lite-mode encode_lite. |
|
27
|
|
|
|
|
|
|
# See SPEC §Parsing modes — full and lite. |
|
28
|
|
|
|
|
|
|
our $SUPPORTS_LITE_MODE = 1; |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# Capability flag — this port ships unordered-table parse mode. |
|
31
|
|
|
|
|
|
|
# See SPEC §Unordered tables. |
|
32
|
|
|
|
|
|
|
our $SUPPORTS_IGNORE_ORDER = 1; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# Datetime sentinel classes ----------------------------------------------- |
|
35
|
|
|
|
|
|
|
# All typed-scalar sentinels are blessed *scalar refs* rather than blessed |
|
36
|
|
|
|
|
|
|
# hashrefs. That's one allocation instead of three (HV + hv_entry + RV) |
|
37
|
|
|
|
|
|
|
# per value — noticeable on wide-flat documents where every leaf is a |
|
38
|
|
|
|
|
|
|
# typed scalar. Public accessor API (->value, ->bstr, ->is_neg) unchanged. |
|
39
|
0
|
|
|
0
|
|
0
|
package DMS::Parser::LocalDate; sub new { my $v = "$_[1]"; bless \$v, $_[0] } sub value { ${ $_[0] } } |
|
|
0
|
|
|
0
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
40
|
0
|
|
|
0
|
|
0
|
package DMS::Parser::LocalTime; sub new { my $v = "$_[1]"; bless \$v, $_[0] } sub value { ${ $_[0] } } |
|
|
0
|
|
|
0
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
41
|
0
|
|
|
0
|
|
0
|
package DMS::Parser::LocalDateTime; sub new { my $v = "$_[1]"; bless \$v, $_[0] } sub value { ${ $_[0] } } |
|
|
0
|
|
|
0
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
42
|
0
|
|
|
0
|
|
0
|
package DMS::Parser::OffsetDateTime; sub new { my $v = "$_[1]"; bless \$v, $_[0] } sub value { ${ $_[0] } } |
|
|
0
|
|
|
0
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
43
|
0
|
|
|
0
|
|
0
|
package DMS::Parser::Float; sub new { my $v = 0 + $_[1]; bless \$v, $_[0] } sub value { ${ $_[0] } } |
|
|
0
|
|
|
0
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
44
|
|
|
|
|
|
|
# DMS::Parser::Integer holds the canonical decimal representation. On 64-bit Perl |
|
45
|
|
|
|
|
|
|
# it's stored as a native IV so the parser doesn't pay a string-format |
|
46
|
|
|
|
|
|
|
# cost just to build it. `value()` returns the object itself so callers |
|
47
|
|
|
|
|
|
|
# can chain `->bstr` / `->is_neg` — the public API matches what was |
|
48
|
|
|
|
|
|
|
# previously a Math::BigInt instance (`$int->value->bstr` still works). |
|
49
|
|
|
|
|
|
|
package DMS::Parser::Integer; |
|
50
|
27
|
|
|
27
|
|
124
|
sub new { my $v = 0 + $_[1]; bless \$v, $_[0] } |
|
|
27
|
|
|
|
|
108
|
|
|
51
|
0
|
|
|
0
|
|
0
|
sub value { $_[0] } |
|
52
|
30
|
|
|
30
|
|
50
|
sub bstr { "${ $_[0] }" } # force stringification of the IV |
|
|
30
|
|
|
|
|
116
|
|
|
53
|
0
|
|
|
0
|
|
0
|
sub is_neg { ${ $_[0] } < 0 } |
|
|
0
|
|
|
|
|
0
|
|
|
54
|
0
|
0
|
|
0
|
|
0
|
package DMS::Parser::Bool; sub new { my $v = $_[1]?1:0; bless \$v, $_[0] } sub value { ${ $_[0] } } |
|
|
0
|
|
|
0
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# Path-segment marker class used inside attached-comment paths to |
|
57
|
|
|
|
|
|
|
# distinguish list-index segments (DMS::Parser::Index) from string-keyed table |
|
58
|
|
|
|
|
|
|
# segments (plain Perl scalars). Mirrors Rust's BreadcrumbSegment::Index. |
|
59
|
|
|
|
|
|
|
package DMS::Parser::Index; |
|
60
|
22
|
|
|
22
|
|
42
|
sub new { my $v = 0 + $_[1]; bless \$v, $_[0] } |
|
|
22
|
|
|
|
|
70
|
|
|
61
|
1
|
|
|
1
|
|
7016
|
sub value { ${ $_[0] } } |
|
|
1
|
|
|
|
|
9
|
|
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# SPEC §"Unordered tables". Marker class for body tables produced by the |
|
64
|
|
|
|
|
|
|
# *_unordered parser entry points. Underlying storage is a plain Perl |
|
65
|
|
|
|
|
|
|
# hashref (no Tie::IxHash, no `\0_keys` sidecar) — iteration order is |
|
66
|
|
|
|
|
|
|
# arbitrary per Perl's hash randomization. Mirrors Rust's |
|
67
|
|
|
|
|
|
|
# Value::UnorderedTable. `to_dms` (full mode) refuses to round-trip a |
|
68
|
|
|
|
|
|
|
# Document containing this variant; `to_dms_lite` accepts it. |
|
69
|
|
|
|
|
|
|
package DMS::Parser::UnorderedTable; |
|
70
|
|
|
|
|
|
|
# Construct from a plain hashref. Caller hands ownership of the hash. |
|
71
|
|
|
|
|
|
|
sub new { |
|
72
|
0
|
|
|
0
|
|
0
|
my ($class, $h) = @_; |
|
73
|
0
|
0
|
|
|
|
0
|
$h = {} unless defined $h; |
|
74
|
0
|
|
|
|
|
0
|
return bless $h, $class; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
package DMS::Parser; |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# UAX #31 §2 default identifier syntax frozen at Unicode 15.1. |
|
80
|
|
|
|
|
|
|
# Sorted, non-overlapping ranges: XID_Continue \ Default_Ignorable_Code_Point |
|
81
|
|
|
|
|
|
|
# (per SPEC: every parser must use this same frozen snapshot — relying on |
|
82
|
|
|
|
|
|
|
# Perl's `\p{XID_Continue}` would track the host's Unicode data and accept |
|
83
|
|
|
|
|
|
|
# newly-assigned codepoints DMS does not know about). 772 ranges total. |
|
84
|
|
|
|
|
|
|
our @XID_CONTINUE_RANGES = ( |
|
85
|
|
|
|
|
|
|
[0x00AA, 0x00AA], |
|
86
|
|
|
|
|
|
|
[0x00B5, 0x00B5], |
|
87
|
|
|
|
|
|
|
[0x00B7, 0x00B7], |
|
88
|
|
|
|
|
|
|
[0x00BA, 0x00BA], |
|
89
|
|
|
|
|
|
|
[0x00C0, 0x00D6], |
|
90
|
|
|
|
|
|
|
[0x00D8, 0x00F6], |
|
91
|
|
|
|
|
|
|
[0x00F8, 0x02C1], |
|
92
|
|
|
|
|
|
|
[0x02C6, 0x02D1], |
|
93
|
|
|
|
|
|
|
[0x02E0, 0x02E4], |
|
94
|
|
|
|
|
|
|
[0x02EC, 0x02EC], |
|
95
|
|
|
|
|
|
|
[0x02EE, 0x02EE], |
|
96
|
|
|
|
|
|
|
[0x0300, 0x034E], |
|
97
|
|
|
|
|
|
|
[0x0350, 0x0374], |
|
98
|
|
|
|
|
|
|
[0x0376, 0x0377], |
|
99
|
|
|
|
|
|
|
[0x037B, 0x037D], |
|
100
|
|
|
|
|
|
|
[0x037F, 0x037F], |
|
101
|
|
|
|
|
|
|
[0x0386, 0x038A], |
|
102
|
|
|
|
|
|
|
[0x038C, 0x038C], |
|
103
|
|
|
|
|
|
|
[0x038E, 0x03A1], |
|
104
|
|
|
|
|
|
|
[0x03A3, 0x03F5], |
|
105
|
|
|
|
|
|
|
[0x03F7, 0x0481], |
|
106
|
|
|
|
|
|
|
[0x0483, 0x0487], |
|
107
|
|
|
|
|
|
|
[0x048A, 0x052F], |
|
108
|
|
|
|
|
|
|
[0x0531, 0x0556], |
|
109
|
|
|
|
|
|
|
[0x0559, 0x0559], |
|
110
|
|
|
|
|
|
|
[0x0560, 0x0588], |
|
111
|
|
|
|
|
|
|
[0x0591, 0x05BD], |
|
112
|
|
|
|
|
|
|
[0x05BF, 0x05BF], |
|
113
|
|
|
|
|
|
|
[0x05C1, 0x05C2], |
|
114
|
|
|
|
|
|
|
[0x05C4, 0x05C5], |
|
115
|
|
|
|
|
|
|
[0x05C7, 0x05C7], |
|
116
|
|
|
|
|
|
|
[0x05D0, 0x05EA], |
|
117
|
|
|
|
|
|
|
[0x05EF, 0x05F2], |
|
118
|
|
|
|
|
|
|
[0x0610, 0x061A], |
|
119
|
|
|
|
|
|
|
[0x0620, 0x0669], |
|
120
|
|
|
|
|
|
|
[0x066E, 0x06D3], |
|
121
|
|
|
|
|
|
|
[0x06D5, 0x06DC], |
|
122
|
|
|
|
|
|
|
[0x06DF, 0x06E8], |
|
123
|
|
|
|
|
|
|
[0x06EA, 0x06FC], |
|
124
|
|
|
|
|
|
|
[0x06FF, 0x06FF], |
|
125
|
|
|
|
|
|
|
[0x0710, 0x074A], |
|
126
|
|
|
|
|
|
|
[0x074D, 0x07B1], |
|
127
|
|
|
|
|
|
|
[0x07C0, 0x07F5], |
|
128
|
|
|
|
|
|
|
[0x07FA, 0x07FA], |
|
129
|
|
|
|
|
|
|
[0x07FD, 0x07FD], |
|
130
|
|
|
|
|
|
|
[0x0800, 0x082D], |
|
131
|
|
|
|
|
|
|
[0x0840, 0x085B], |
|
132
|
|
|
|
|
|
|
[0x0860, 0x086A], |
|
133
|
|
|
|
|
|
|
[0x0870, 0x0887], |
|
134
|
|
|
|
|
|
|
[0x0889, 0x088E], |
|
135
|
|
|
|
|
|
|
[0x0898, 0x08E1], |
|
136
|
|
|
|
|
|
|
[0x08E3, 0x0963], |
|
137
|
|
|
|
|
|
|
[0x0966, 0x096F], |
|
138
|
|
|
|
|
|
|
[0x0971, 0x0983], |
|
139
|
|
|
|
|
|
|
[0x0985, 0x098C], |
|
140
|
|
|
|
|
|
|
[0x098F, 0x0990], |
|
141
|
|
|
|
|
|
|
[0x0993, 0x09A8], |
|
142
|
|
|
|
|
|
|
[0x09AA, 0x09B0], |
|
143
|
|
|
|
|
|
|
[0x09B2, 0x09B2], |
|
144
|
|
|
|
|
|
|
[0x09B6, 0x09B9], |
|
145
|
|
|
|
|
|
|
[0x09BC, 0x09C4], |
|
146
|
|
|
|
|
|
|
[0x09C7, 0x09C8], |
|
147
|
|
|
|
|
|
|
[0x09CB, 0x09CE], |
|
148
|
|
|
|
|
|
|
[0x09D7, 0x09D7], |
|
149
|
|
|
|
|
|
|
[0x09DC, 0x09DD], |
|
150
|
|
|
|
|
|
|
[0x09DF, 0x09E3], |
|
151
|
|
|
|
|
|
|
[0x09E6, 0x09F1], |
|
152
|
|
|
|
|
|
|
[0x09FC, 0x09FC], |
|
153
|
|
|
|
|
|
|
[0x09FE, 0x09FE], |
|
154
|
|
|
|
|
|
|
[0x0A01, 0x0A03], |
|
155
|
|
|
|
|
|
|
[0x0A05, 0x0A0A], |
|
156
|
|
|
|
|
|
|
[0x0A0F, 0x0A10], |
|
157
|
|
|
|
|
|
|
[0x0A13, 0x0A28], |
|
158
|
|
|
|
|
|
|
[0x0A2A, 0x0A30], |
|
159
|
|
|
|
|
|
|
[0x0A32, 0x0A33], |
|
160
|
|
|
|
|
|
|
[0x0A35, 0x0A36], |
|
161
|
|
|
|
|
|
|
[0x0A38, 0x0A39], |
|
162
|
|
|
|
|
|
|
[0x0A3C, 0x0A3C], |
|
163
|
|
|
|
|
|
|
[0x0A3E, 0x0A42], |
|
164
|
|
|
|
|
|
|
[0x0A47, 0x0A48], |
|
165
|
|
|
|
|
|
|
[0x0A4B, 0x0A4D], |
|
166
|
|
|
|
|
|
|
[0x0A51, 0x0A51], |
|
167
|
|
|
|
|
|
|
[0x0A59, 0x0A5C], |
|
168
|
|
|
|
|
|
|
[0x0A5E, 0x0A5E], |
|
169
|
|
|
|
|
|
|
[0x0A66, 0x0A75], |
|
170
|
|
|
|
|
|
|
[0x0A81, 0x0A83], |
|
171
|
|
|
|
|
|
|
[0x0A85, 0x0A8D], |
|
172
|
|
|
|
|
|
|
[0x0A8F, 0x0A91], |
|
173
|
|
|
|
|
|
|
[0x0A93, 0x0AA8], |
|
174
|
|
|
|
|
|
|
[0x0AAA, 0x0AB0], |
|
175
|
|
|
|
|
|
|
[0x0AB2, 0x0AB3], |
|
176
|
|
|
|
|
|
|
[0x0AB5, 0x0AB9], |
|
177
|
|
|
|
|
|
|
[0x0ABC, 0x0AC5], |
|
178
|
|
|
|
|
|
|
[0x0AC7, 0x0AC9], |
|
179
|
|
|
|
|
|
|
[0x0ACB, 0x0ACD], |
|
180
|
|
|
|
|
|
|
[0x0AD0, 0x0AD0], |
|
181
|
|
|
|
|
|
|
[0x0AE0, 0x0AE3], |
|
182
|
|
|
|
|
|
|
[0x0AE6, 0x0AEF], |
|
183
|
|
|
|
|
|
|
[0x0AF9, 0x0AFF], |
|
184
|
|
|
|
|
|
|
[0x0B01, 0x0B03], |
|
185
|
|
|
|
|
|
|
[0x0B05, 0x0B0C], |
|
186
|
|
|
|
|
|
|
[0x0B0F, 0x0B10], |
|
187
|
|
|
|
|
|
|
[0x0B13, 0x0B28], |
|
188
|
|
|
|
|
|
|
[0x0B2A, 0x0B30], |
|
189
|
|
|
|
|
|
|
[0x0B32, 0x0B33], |
|
190
|
|
|
|
|
|
|
[0x0B35, 0x0B39], |
|
191
|
|
|
|
|
|
|
[0x0B3C, 0x0B44], |
|
192
|
|
|
|
|
|
|
[0x0B47, 0x0B48], |
|
193
|
|
|
|
|
|
|
[0x0B4B, 0x0B4D], |
|
194
|
|
|
|
|
|
|
[0x0B55, 0x0B57], |
|
195
|
|
|
|
|
|
|
[0x0B5C, 0x0B5D], |
|
196
|
|
|
|
|
|
|
[0x0B5F, 0x0B63], |
|
197
|
|
|
|
|
|
|
[0x0B66, 0x0B6F], |
|
198
|
|
|
|
|
|
|
[0x0B71, 0x0B71], |
|
199
|
|
|
|
|
|
|
[0x0B82, 0x0B83], |
|
200
|
|
|
|
|
|
|
[0x0B85, 0x0B8A], |
|
201
|
|
|
|
|
|
|
[0x0B8E, 0x0B90], |
|
202
|
|
|
|
|
|
|
[0x0B92, 0x0B95], |
|
203
|
|
|
|
|
|
|
[0x0B99, 0x0B9A], |
|
204
|
|
|
|
|
|
|
[0x0B9C, 0x0B9C], |
|
205
|
|
|
|
|
|
|
[0x0B9E, 0x0B9F], |
|
206
|
|
|
|
|
|
|
[0x0BA3, 0x0BA4], |
|
207
|
|
|
|
|
|
|
[0x0BA8, 0x0BAA], |
|
208
|
|
|
|
|
|
|
[0x0BAE, 0x0BB9], |
|
209
|
|
|
|
|
|
|
[0x0BBE, 0x0BC2], |
|
210
|
|
|
|
|
|
|
[0x0BC6, 0x0BC8], |
|
211
|
|
|
|
|
|
|
[0x0BCA, 0x0BCD], |
|
212
|
|
|
|
|
|
|
[0x0BD0, 0x0BD0], |
|
213
|
|
|
|
|
|
|
[0x0BD7, 0x0BD7], |
|
214
|
|
|
|
|
|
|
[0x0BE6, 0x0BEF], |
|
215
|
|
|
|
|
|
|
[0x0C00, 0x0C0C], |
|
216
|
|
|
|
|
|
|
[0x0C0E, 0x0C10], |
|
217
|
|
|
|
|
|
|
[0x0C12, 0x0C28], |
|
218
|
|
|
|
|
|
|
[0x0C2A, 0x0C39], |
|
219
|
|
|
|
|
|
|
[0x0C3C, 0x0C44], |
|
220
|
|
|
|
|
|
|
[0x0C46, 0x0C48], |
|
221
|
|
|
|
|
|
|
[0x0C4A, 0x0C4D], |
|
222
|
|
|
|
|
|
|
[0x0C55, 0x0C56], |
|
223
|
|
|
|
|
|
|
[0x0C58, 0x0C5A], |
|
224
|
|
|
|
|
|
|
[0x0C5D, 0x0C5D], |
|
225
|
|
|
|
|
|
|
[0x0C60, 0x0C63], |
|
226
|
|
|
|
|
|
|
[0x0C66, 0x0C6F], |
|
227
|
|
|
|
|
|
|
[0x0C80, 0x0C83], |
|
228
|
|
|
|
|
|
|
[0x0C85, 0x0C8C], |
|
229
|
|
|
|
|
|
|
[0x0C8E, 0x0C90], |
|
230
|
|
|
|
|
|
|
[0x0C92, 0x0CA8], |
|
231
|
|
|
|
|
|
|
[0x0CAA, 0x0CB3], |
|
232
|
|
|
|
|
|
|
[0x0CB5, 0x0CB9], |
|
233
|
|
|
|
|
|
|
[0x0CBC, 0x0CC4], |
|
234
|
|
|
|
|
|
|
[0x0CC6, 0x0CC8], |
|
235
|
|
|
|
|
|
|
[0x0CCA, 0x0CCD], |
|
236
|
|
|
|
|
|
|
[0x0CD5, 0x0CD6], |
|
237
|
|
|
|
|
|
|
[0x0CDD, 0x0CDE], |
|
238
|
|
|
|
|
|
|
[0x0CE0, 0x0CE3], |
|
239
|
|
|
|
|
|
|
[0x0CE6, 0x0CEF], |
|
240
|
|
|
|
|
|
|
[0x0CF1, 0x0CF3], |
|
241
|
|
|
|
|
|
|
[0x0D00, 0x0D0C], |
|
242
|
|
|
|
|
|
|
[0x0D0E, 0x0D10], |
|
243
|
|
|
|
|
|
|
[0x0D12, 0x0D44], |
|
244
|
|
|
|
|
|
|
[0x0D46, 0x0D48], |
|
245
|
|
|
|
|
|
|
[0x0D4A, 0x0D4E], |
|
246
|
|
|
|
|
|
|
[0x0D54, 0x0D57], |
|
247
|
|
|
|
|
|
|
[0x0D5F, 0x0D63], |
|
248
|
|
|
|
|
|
|
[0x0D66, 0x0D6F], |
|
249
|
|
|
|
|
|
|
[0x0D7A, 0x0D7F], |
|
250
|
|
|
|
|
|
|
[0x0D81, 0x0D83], |
|
251
|
|
|
|
|
|
|
[0x0D85, 0x0D96], |
|
252
|
|
|
|
|
|
|
[0x0D9A, 0x0DB1], |
|
253
|
|
|
|
|
|
|
[0x0DB3, 0x0DBB], |
|
254
|
|
|
|
|
|
|
[0x0DBD, 0x0DBD], |
|
255
|
|
|
|
|
|
|
[0x0DC0, 0x0DC6], |
|
256
|
|
|
|
|
|
|
[0x0DCA, 0x0DCA], |
|
257
|
|
|
|
|
|
|
[0x0DCF, 0x0DD4], |
|
258
|
|
|
|
|
|
|
[0x0DD6, 0x0DD6], |
|
259
|
|
|
|
|
|
|
[0x0DD8, 0x0DDF], |
|
260
|
|
|
|
|
|
|
[0x0DE6, 0x0DEF], |
|
261
|
|
|
|
|
|
|
[0x0DF2, 0x0DF3], |
|
262
|
|
|
|
|
|
|
[0x0E01, 0x0E3A], |
|
263
|
|
|
|
|
|
|
[0x0E40, 0x0E4E], |
|
264
|
|
|
|
|
|
|
[0x0E50, 0x0E59], |
|
265
|
|
|
|
|
|
|
[0x0E81, 0x0E82], |
|
266
|
|
|
|
|
|
|
[0x0E84, 0x0E84], |
|
267
|
|
|
|
|
|
|
[0x0E86, 0x0E8A], |
|
268
|
|
|
|
|
|
|
[0x0E8C, 0x0EA3], |
|
269
|
|
|
|
|
|
|
[0x0EA5, 0x0EA5], |
|
270
|
|
|
|
|
|
|
[0x0EA7, 0x0EBD], |
|
271
|
|
|
|
|
|
|
[0x0EC0, 0x0EC4], |
|
272
|
|
|
|
|
|
|
[0x0EC6, 0x0EC6], |
|
273
|
|
|
|
|
|
|
[0x0EC8, 0x0ECE], |
|
274
|
|
|
|
|
|
|
[0x0ED0, 0x0ED9], |
|
275
|
|
|
|
|
|
|
[0x0EDC, 0x0EDF], |
|
276
|
|
|
|
|
|
|
[0x0F00, 0x0F00], |
|
277
|
|
|
|
|
|
|
[0x0F18, 0x0F19], |
|
278
|
|
|
|
|
|
|
[0x0F20, 0x0F29], |
|
279
|
|
|
|
|
|
|
[0x0F35, 0x0F35], |
|
280
|
|
|
|
|
|
|
[0x0F37, 0x0F37], |
|
281
|
|
|
|
|
|
|
[0x0F39, 0x0F39], |
|
282
|
|
|
|
|
|
|
[0x0F3E, 0x0F47], |
|
283
|
|
|
|
|
|
|
[0x0F49, 0x0F6C], |
|
284
|
|
|
|
|
|
|
[0x0F71, 0x0F84], |
|
285
|
|
|
|
|
|
|
[0x0F86, 0x0F97], |
|
286
|
|
|
|
|
|
|
[0x0F99, 0x0FBC], |
|
287
|
|
|
|
|
|
|
[0x0FC6, 0x0FC6], |
|
288
|
|
|
|
|
|
|
[0x1000, 0x1049], |
|
289
|
|
|
|
|
|
|
[0x1050, 0x109D], |
|
290
|
|
|
|
|
|
|
[0x10A0, 0x10C5], |
|
291
|
|
|
|
|
|
|
[0x10C7, 0x10C7], |
|
292
|
|
|
|
|
|
|
[0x10CD, 0x10CD], |
|
293
|
|
|
|
|
|
|
[0x10D0, 0x10FA], |
|
294
|
|
|
|
|
|
|
[0x10FC, 0x115E], |
|
295
|
|
|
|
|
|
|
[0x1161, 0x1248], |
|
296
|
|
|
|
|
|
|
[0x124A, 0x124D], |
|
297
|
|
|
|
|
|
|
[0x1250, 0x1256], |
|
298
|
|
|
|
|
|
|
[0x1258, 0x1258], |
|
299
|
|
|
|
|
|
|
[0x125A, 0x125D], |
|
300
|
|
|
|
|
|
|
[0x1260, 0x1288], |
|
301
|
|
|
|
|
|
|
[0x128A, 0x128D], |
|
302
|
|
|
|
|
|
|
[0x1290, 0x12B0], |
|
303
|
|
|
|
|
|
|
[0x12B2, 0x12B5], |
|
304
|
|
|
|
|
|
|
[0x12B8, 0x12BE], |
|
305
|
|
|
|
|
|
|
[0x12C0, 0x12C0], |
|
306
|
|
|
|
|
|
|
[0x12C2, 0x12C5], |
|
307
|
|
|
|
|
|
|
[0x12C8, 0x12D6], |
|
308
|
|
|
|
|
|
|
[0x12D8, 0x1310], |
|
309
|
|
|
|
|
|
|
[0x1312, 0x1315], |
|
310
|
|
|
|
|
|
|
[0x1318, 0x135A], |
|
311
|
|
|
|
|
|
|
[0x135D, 0x135F], |
|
312
|
|
|
|
|
|
|
[0x1369, 0x1371], |
|
313
|
|
|
|
|
|
|
[0x1380, 0x138F], |
|
314
|
|
|
|
|
|
|
[0x13A0, 0x13F5], |
|
315
|
|
|
|
|
|
|
[0x13F8, 0x13FD], |
|
316
|
|
|
|
|
|
|
[0x1401, 0x166C], |
|
317
|
|
|
|
|
|
|
[0x166F, 0x167F], |
|
318
|
|
|
|
|
|
|
[0x1681, 0x169A], |
|
319
|
|
|
|
|
|
|
[0x16A0, 0x16EA], |
|
320
|
|
|
|
|
|
|
[0x16EE, 0x16F8], |
|
321
|
|
|
|
|
|
|
[0x1700, 0x1715], |
|
322
|
|
|
|
|
|
|
[0x171F, 0x1734], |
|
323
|
|
|
|
|
|
|
[0x1740, 0x1753], |
|
324
|
|
|
|
|
|
|
[0x1760, 0x176C], |
|
325
|
|
|
|
|
|
|
[0x176E, 0x1770], |
|
326
|
|
|
|
|
|
|
[0x1772, 0x1773], |
|
327
|
|
|
|
|
|
|
[0x1780, 0x17B3], |
|
328
|
|
|
|
|
|
|
[0x17B6, 0x17D3], |
|
329
|
|
|
|
|
|
|
[0x17D7, 0x17D7], |
|
330
|
|
|
|
|
|
|
[0x17DC, 0x17DD], |
|
331
|
|
|
|
|
|
|
[0x17E0, 0x17E9], |
|
332
|
|
|
|
|
|
|
[0x1810, 0x1819], |
|
333
|
|
|
|
|
|
|
[0x1820, 0x1878], |
|
334
|
|
|
|
|
|
|
[0x1880, 0x18AA], |
|
335
|
|
|
|
|
|
|
[0x18B0, 0x18F5], |
|
336
|
|
|
|
|
|
|
[0x1900, 0x191E], |
|
337
|
|
|
|
|
|
|
[0x1920, 0x192B], |
|
338
|
|
|
|
|
|
|
[0x1930, 0x193B], |
|
339
|
|
|
|
|
|
|
[0x1946, 0x196D], |
|
340
|
|
|
|
|
|
|
[0x1970, 0x1974], |
|
341
|
|
|
|
|
|
|
[0x1980, 0x19AB], |
|
342
|
|
|
|
|
|
|
[0x19B0, 0x19C9], |
|
343
|
|
|
|
|
|
|
[0x19D0, 0x19DA], |
|
344
|
|
|
|
|
|
|
[0x1A00, 0x1A1B], |
|
345
|
|
|
|
|
|
|
[0x1A20, 0x1A5E], |
|
346
|
|
|
|
|
|
|
[0x1A60, 0x1A7C], |
|
347
|
|
|
|
|
|
|
[0x1A7F, 0x1A89], |
|
348
|
|
|
|
|
|
|
[0x1A90, 0x1A99], |
|
349
|
|
|
|
|
|
|
[0x1AA7, 0x1AA7], |
|
350
|
|
|
|
|
|
|
[0x1AB0, 0x1ABD], |
|
351
|
|
|
|
|
|
|
[0x1ABF, 0x1ACE], |
|
352
|
|
|
|
|
|
|
[0x1B00, 0x1B4C], |
|
353
|
|
|
|
|
|
|
[0x1B50, 0x1B59], |
|
354
|
|
|
|
|
|
|
[0x1B6B, 0x1B73], |
|
355
|
|
|
|
|
|
|
[0x1B80, 0x1BF3], |
|
356
|
|
|
|
|
|
|
[0x1C00, 0x1C37], |
|
357
|
|
|
|
|
|
|
[0x1C40, 0x1C49], |
|
358
|
|
|
|
|
|
|
[0x1C4D, 0x1C7D], |
|
359
|
|
|
|
|
|
|
[0x1C80, 0x1C88], |
|
360
|
|
|
|
|
|
|
[0x1C90, 0x1CBA], |
|
361
|
|
|
|
|
|
|
[0x1CBD, 0x1CBF], |
|
362
|
|
|
|
|
|
|
[0x1CD0, 0x1CD2], |
|
363
|
|
|
|
|
|
|
[0x1CD4, 0x1CFA], |
|
364
|
|
|
|
|
|
|
[0x1D00, 0x1F15], |
|
365
|
|
|
|
|
|
|
[0x1F18, 0x1F1D], |
|
366
|
|
|
|
|
|
|
[0x1F20, 0x1F45], |
|
367
|
|
|
|
|
|
|
[0x1F48, 0x1F4D], |
|
368
|
|
|
|
|
|
|
[0x1F50, 0x1F57], |
|
369
|
|
|
|
|
|
|
[0x1F59, 0x1F59], |
|
370
|
|
|
|
|
|
|
[0x1F5B, 0x1F5B], |
|
371
|
|
|
|
|
|
|
[0x1F5D, 0x1F5D], |
|
372
|
|
|
|
|
|
|
[0x1F5F, 0x1F7D], |
|
373
|
|
|
|
|
|
|
[0x1F80, 0x1FB4], |
|
374
|
|
|
|
|
|
|
[0x1FB6, 0x1FBC], |
|
375
|
|
|
|
|
|
|
[0x1FBE, 0x1FBE], |
|
376
|
|
|
|
|
|
|
[0x1FC2, 0x1FC4], |
|
377
|
|
|
|
|
|
|
[0x1FC6, 0x1FCC], |
|
378
|
|
|
|
|
|
|
[0x1FD0, 0x1FD3], |
|
379
|
|
|
|
|
|
|
[0x1FD6, 0x1FDB], |
|
380
|
|
|
|
|
|
|
[0x1FE0, 0x1FEC], |
|
381
|
|
|
|
|
|
|
[0x1FF2, 0x1FF4], |
|
382
|
|
|
|
|
|
|
[0x1FF6, 0x1FFC], |
|
383
|
|
|
|
|
|
|
[0x203F, 0x2040], |
|
384
|
|
|
|
|
|
|
[0x2054, 0x2054], |
|
385
|
|
|
|
|
|
|
[0x2071, 0x2071], |
|
386
|
|
|
|
|
|
|
[0x207F, 0x207F], |
|
387
|
|
|
|
|
|
|
[0x2090, 0x209C], |
|
388
|
|
|
|
|
|
|
[0x20D0, 0x20DC], |
|
389
|
|
|
|
|
|
|
[0x20E1, 0x20E1], |
|
390
|
|
|
|
|
|
|
[0x20E5, 0x20F0], |
|
391
|
|
|
|
|
|
|
[0x2102, 0x2102], |
|
392
|
|
|
|
|
|
|
[0x2107, 0x2107], |
|
393
|
|
|
|
|
|
|
[0x210A, 0x2113], |
|
394
|
|
|
|
|
|
|
[0x2115, 0x2115], |
|
395
|
|
|
|
|
|
|
[0x2118, 0x211D], |
|
396
|
|
|
|
|
|
|
[0x2124, 0x2124], |
|
397
|
|
|
|
|
|
|
[0x2126, 0x2126], |
|
398
|
|
|
|
|
|
|
[0x2128, 0x2128], |
|
399
|
|
|
|
|
|
|
[0x212A, 0x2139], |
|
400
|
|
|
|
|
|
|
[0x213C, 0x213F], |
|
401
|
|
|
|
|
|
|
[0x2145, 0x2149], |
|
402
|
|
|
|
|
|
|
[0x214E, 0x214E], |
|
403
|
|
|
|
|
|
|
[0x2160, 0x2188], |
|
404
|
|
|
|
|
|
|
[0x2C00, 0x2CE4], |
|
405
|
|
|
|
|
|
|
[0x2CEB, 0x2CF3], |
|
406
|
|
|
|
|
|
|
[0x2D00, 0x2D25], |
|
407
|
|
|
|
|
|
|
[0x2D27, 0x2D27], |
|
408
|
|
|
|
|
|
|
[0x2D2D, 0x2D2D], |
|
409
|
|
|
|
|
|
|
[0x2D30, 0x2D67], |
|
410
|
|
|
|
|
|
|
[0x2D6F, 0x2D6F], |
|
411
|
|
|
|
|
|
|
[0x2D7F, 0x2D96], |
|
412
|
|
|
|
|
|
|
[0x2DA0, 0x2DA6], |
|
413
|
|
|
|
|
|
|
[0x2DA8, 0x2DAE], |
|
414
|
|
|
|
|
|
|
[0x2DB0, 0x2DB6], |
|
415
|
|
|
|
|
|
|
[0x2DB8, 0x2DBE], |
|
416
|
|
|
|
|
|
|
[0x2DC0, 0x2DC6], |
|
417
|
|
|
|
|
|
|
[0x2DC8, 0x2DCE], |
|
418
|
|
|
|
|
|
|
[0x2DD0, 0x2DD6], |
|
419
|
|
|
|
|
|
|
[0x2DD8, 0x2DDE], |
|
420
|
|
|
|
|
|
|
[0x2DE0, 0x2DFF], |
|
421
|
|
|
|
|
|
|
[0x3005, 0x3007], |
|
422
|
|
|
|
|
|
|
[0x3021, 0x302F], |
|
423
|
|
|
|
|
|
|
[0x3031, 0x3035], |
|
424
|
|
|
|
|
|
|
[0x3038, 0x303C], |
|
425
|
|
|
|
|
|
|
[0x3041, 0x3096], |
|
426
|
|
|
|
|
|
|
[0x3099, 0x309A], |
|
427
|
|
|
|
|
|
|
[0x309D, 0x309F], |
|
428
|
|
|
|
|
|
|
[0x30A1, 0x30FF], |
|
429
|
|
|
|
|
|
|
[0x3105, 0x312F], |
|
430
|
|
|
|
|
|
|
[0x3131, 0x3163], |
|
431
|
|
|
|
|
|
|
[0x3165, 0x318E], |
|
432
|
|
|
|
|
|
|
[0x31A0, 0x31BF], |
|
433
|
|
|
|
|
|
|
[0x31F0, 0x31FF], |
|
434
|
|
|
|
|
|
|
[0x3400, 0x4DBF], |
|
435
|
|
|
|
|
|
|
[0x4E00, 0xA48C], |
|
436
|
|
|
|
|
|
|
[0xA4D0, 0xA4FD], |
|
437
|
|
|
|
|
|
|
[0xA500, 0xA60C], |
|
438
|
|
|
|
|
|
|
[0xA610, 0xA62B], |
|
439
|
|
|
|
|
|
|
[0xA640, 0xA66F], |
|
440
|
|
|
|
|
|
|
[0xA674, 0xA67D], |
|
441
|
|
|
|
|
|
|
[0xA67F, 0xA6F1], |
|
442
|
|
|
|
|
|
|
[0xA717, 0xA71F], |
|
443
|
|
|
|
|
|
|
[0xA722, 0xA788], |
|
444
|
|
|
|
|
|
|
[0xA78B, 0xA7CA], |
|
445
|
|
|
|
|
|
|
[0xA7D0, 0xA7D1], |
|
446
|
|
|
|
|
|
|
[0xA7D3, 0xA7D3], |
|
447
|
|
|
|
|
|
|
[0xA7D5, 0xA7D9], |
|
448
|
|
|
|
|
|
|
[0xA7F2, 0xA827], |
|
449
|
|
|
|
|
|
|
[0xA82C, 0xA82C], |
|
450
|
|
|
|
|
|
|
[0xA840, 0xA873], |
|
451
|
|
|
|
|
|
|
[0xA880, 0xA8C5], |
|
452
|
|
|
|
|
|
|
[0xA8D0, 0xA8D9], |
|
453
|
|
|
|
|
|
|
[0xA8E0, 0xA8F7], |
|
454
|
|
|
|
|
|
|
[0xA8FB, 0xA8FB], |
|
455
|
|
|
|
|
|
|
[0xA8FD, 0xA92D], |
|
456
|
|
|
|
|
|
|
[0xA930, 0xA953], |
|
457
|
|
|
|
|
|
|
[0xA960, 0xA97C], |
|
458
|
|
|
|
|
|
|
[0xA980, 0xA9C0], |
|
459
|
|
|
|
|
|
|
[0xA9CF, 0xA9D9], |
|
460
|
|
|
|
|
|
|
[0xA9E0, 0xA9FE], |
|
461
|
|
|
|
|
|
|
[0xAA00, 0xAA36], |
|
462
|
|
|
|
|
|
|
[0xAA40, 0xAA4D], |
|
463
|
|
|
|
|
|
|
[0xAA50, 0xAA59], |
|
464
|
|
|
|
|
|
|
[0xAA60, 0xAA76], |
|
465
|
|
|
|
|
|
|
[0xAA7A, 0xAAC2], |
|
466
|
|
|
|
|
|
|
[0xAADB, 0xAADD], |
|
467
|
|
|
|
|
|
|
[0xAAE0, 0xAAEF], |
|
468
|
|
|
|
|
|
|
[0xAAF2, 0xAAF6], |
|
469
|
|
|
|
|
|
|
[0xAB01, 0xAB06], |
|
470
|
|
|
|
|
|
|
[0xAB09, 0xAB0E], |
|
471
|
|
|
|
|
|
|
[0xAB11, 0xAB16], |
|
472
|
|
|
|
|
|
|
[0xAB20, 0xAB26], |
|
473
|
|
|
|
|
|
|
[0xAB28, 0xAB2E], |
|
474
|
|
|
|
|
|
|
[0xAB30, 0xAB5A], |
|
475
|
|
|
|
|
|
|
[0xAB5C, 0xAB69], |
|
476
|
|
|
|
|
|
|
[0xAB70, 0xABEA], |
|
477
|
|
|
|
|
|
|
[0xABEC, 0xABED], |
|
478
|
|
|
|
|
|
|
[0xABF0, 0xABF9], |
|
479
|
|
|
|
|
|
|
[0xAC00, 0xD7A3], |
|
480
|
|
|
|
|
|
|
[0xD7B0, 0xD7C6], |
|
481
|
|
|
|
|
|
|
[0xD7CB, 0xD7FB], |
|
482
|
|
|
|
|
|
|
[0xF900, 0xFA6D], |
|
483
|
|
|
|
|
|
|
[0xFA70, 0xFAD9], |
|
484
|
|
|
|
|
|
|
[0xFB00, 0xFB06], |
|
485
|
|
|
|
|
|
|
[0xFB13, 0xFB17], |
|
486
|
|
|
|
|
|
|
[0xFB1D, 0xFB28], |
|
487
|
|
|
|
|
|
|
[0xFB2A, 0xFB36], |
|
488
|
|
|
|
|
|
|
[0xFB38, 0xFB3C], |
|
489
|
|
|
|
|
|
|
[0xFB3E, 0xFB3E], |
|
490
|
|
|
|
|
|
|
[0xFB40, 0xFB41], |
|
491
|
|
|
|
|
|
|
[0xFB43, 0xFB44], |
|
492
|
|
|
|
|
|
|
[0xFB46, 0xFBB1], |
|
493
|
|
|
|
|
|
|
[0xFBD3, 0xFC5D], |
|
494
|
|
|
|
|
|
|
[0xFC64, 0xFD3D], |
|
495
|
|
|
|
|
|
|
[0xFD50, 0xFD8F], |
|
496
|
|
|
|
|
|
|
[0xFD92, 0xFDC7], |
|
497
|
|
|
|
|
|
|
[0xFDF0, 0xFDF9], |
|
498
|
|
|
|
|
|
|
[0xFE20, 0xFE2F], |
|
499
|
|
|
|
|
|
|
[0xFE33, 0xFE34], |
|
500
|
|
|
|
|
|
|
[0xFE4D, 0xFE4F], |
|
501
|
|
|
|
|
|
|
[0xFE71, 0xFE71], |
|
502
|
|
|
|
|
|
|
[0xFE73, 0xFE73], |
|
503
|
|
|
|
|
|
|
[0xFE77, 0xFE77], |
|
504
|
|
|
|
|
|
|
[0xFE79, 0xFE79], |
|
505
|
|
|
|
|
|
|
[0xFE7B, 0xFE7B], |
|
506
|
|
|
|
|
|
|
[0xFE7D, 0xFE7D], |
|
507
|
|
|
|
|
|
|
[0xFE7F, 0xFEFC], |
|
508
|
|
|
|
|
|
|
[0xFF10, 0xFF19], |
|
509
|
|
|
|
|
|
|
[0xFF21, 0xFF3A], |
|
510
|
|
|
|
|
|
|
[0xFF3F, 0xFF3F], |
|
511
|
|
|
|
|
|
|
[0xFF41, 0xFF5A], |
|
512
|
|
|
|
|
|
|
[0xFF65, 0xFF9F], |
|
513
|
|
|
|
|
|
|
[0xFFA1, 0xFFBE], |
|
514
|
|
|
|
|
|
|
[0xFFC2, 0xFFC7], |
|
515
|
|
|
|
|
|
|
[0xFFCA, 0xFFCF], |
|
516
|
|
|
|
|
|
|
[0xFFD2, 0xFFD7], |
|
517
|
|
|
|
|
|
|
[0xFFDA, 0xFFDC], |
|
518
|
|
|
|
|
|
|
[0x10000, 0x1000B], |
|
519
|
|
|
|
|
|
|
[0x1000D, 0x10026], |
|
520
|
|
|
|
|
|
|
[0x10028, 0x1003A], |
|
521
|
|
|
|
|
|
|
[0x1003C, 0x1003D], |
|
522
|
|
|
|
|
|
|
[0x1003F, 0x1004D], |
|
523
|
|
|
|
|
|
|
[0x10050, 0x1005D], |
|
524
|
|
|
|
|
|
|
[0x10080, 0x100FA], |
|
525
|
|
|
|
|
|
|
[0x10140, 0x10174], |
|
526
|
|
|
|
|
|
|
[0x101FD, 0x101FD], |
|
527
|
|
|
|
|
|
|
[0x10280, 0x1029C], |
|
528
|
|
|
|
|
|
|
[0x102A0, 0x102D0], |
|
529
|
|
|
|
|
|
|
[0x102E0, 0x102E0], |
|
530
|
|
|
|
|
|
|
[0x10300, 0x1031F], |
|
531
|
|
|
|
|
|
|
[0x1032D, 0x1034A], |
|
532
|
|
|
|
|
|
|
[0x10350, 0x1037A], |
|
533
|
|
|
|
|
|
|
[0x10380, 0x1039D], |
|
534
|
|
|
|
|
|
|
[0x103A0, 0x103C3], |
|
535
|
|
|
|
|
|
|
[0x103C8, 0x103CF], |
|
536
|
|
|
|
|
|
|
[0x103D1, 0x103D5], |
|
537
|
|
|
|
|
|
|
[0x10400, 0x1049D], |
|
538
|
|
|
|
|
|
|
[0x104A0, 0x104A9], |
|
539
|
|
|
|
|
|
|
[0x104B0, 0x104D3], |
|
540
|
|
|
|
|
|
|
[0x104D8, 0x104FB], |
|
541
|
|
|
|
|
|
|
[0x10500, 0x10527], |
|
542
|
|
|
|
|
|
|
[0x10530, 0x10563], |
|
543
|
|
|
|
|
|
|
[0x10570, 0x1057A], |
|
544
|
|
|
|
|
|
|
[0x1057C, 0x1058A], |
|
545
|
|
|
|
|
|
|
[0x1058C, 0x10592], |
|
546
|
|
|
|
|
|
|
[0x10594, 0x10595], |
|
547
|
|
|
|
|
|
|
[0x10597, 0x105A1], |
|
548
|
|
|
|
|
|
|
[0x105A3, 0x105B1], |
|
549
|
|
|
|
|
|
|
[0x105B3, 0x105B9], |
|
550
|
|
|
|
|
|
|
[0x105BB, 0x105BC], |
|
551
|
|
|
|
|
|
|
[0x10600, 0x10736], |
|
552
|
|
|
|
|
|
|
[0x10740, 0x10755], |
|
553
|
|
|
|
|
|
|
[0x10760, 0x10767], |
|
554
|
|
|
|
|
|
|
[0x10780, 0x10785], |
|
555
|
|
|
|
|
|
|
[0x10787, 0x107B0], |
|
556
|
|
|
|
|
|
|
[0x107B2, 0x107BA], |
|
557
|
|
|
|
|
|
|
[0x10800, 0x10805], |
|
558
|
|
|
|
|
|
|
[0x10808, 0x10808], |
|
559
|
|
|
|
|
|
|
[0x1080A, 0x10835], |
|
560
|
|
|
|
|
|
|
[0x10837, 0x10838], |
|
561
|
|
|
|
|
|
|
[0x1083C, 0x1083C], |
|
562
|
|
|
|
|
|
|
[0x1083F, 0x10855], |
|
563
|
|
|
|
|
|
|
[0x10860, 0x10876], |
|
564
|
|
|
|
|
|
|
[0x10880, 0x1089E], |
|
565
|
|
|
|
|
|
|
[0x108E0, 0x108F2], |
|
566
|
|
|
|
|
|
|
[0x108F4, 0x108F5], |
|
567
|
|
|
|
|
|
|
[0x10900, 0x10915], |
|
568
|
|
|
|
|
|
|
[0x10920, 0x10939], |
|
569
|
|
|
|
|
|
|
[0x10980, 0x109B7], |
|
570
|
|
|
|
|
|
|
[0x109BE, 0x109BF], |
|
571
|
|
|
|
|
|
|
[0x10A00, 0x10A03], |
|
572
|
|
|
|
|
|
|
[0x10A05, 0x10A06], |
|
573
|
|
|
|
|
|
|
[0x10A0C, 0x10A13], |
|
574
|
|
|
|
|
|
|
[0x10A15, 0x10A17], |
|
575
|
|
|
|
|
|
|
[0x10A19, 0x10A35], |
|
576
|
|
|
|
|
|
|
[0x10A38, 0x10A3A], |
|
577
|
|
|
|
|
|
|
[0x10A3F, 0x10A3F], |
|
578
|
|
|
|
|
|
|
[0x10A60, 0x10A7C], |
|
579
|
|
|
|
|
|
|
[0x10A80, 0x10A9C], |
|
580
|
|
|
|
|
|
|
[0x10AC0, 0x10AC7], |
|
581
|
|
|
|
|
|
|
[0x10AC9, 0x10AE6], |
|
582
|
|
|
|
|
|
|
[0x10B00, 0x10B35], |
|
583
|
|
|
|
|
|
|
[0x10B40, 0x10B55], |
|
584
|
|
|
|
|
|
|
[0x10B60, 0x10B72], |
|
585
|
|
|
|
|
|
|
[0x10B80, 0x10B91], |
|
586
|
|
|
|
|
|
|
[0x10C00, 0x10C48], |
|
587
|
|
|
|
|
|
|
[0x10C80, 0x10CB2], |
|
588
|
|
|
|
|
|
|
[0x10CC0, 0x10CF2], |
|
589
|
|
|
|
|
|
|
[0x10D00, 0x10D27], |
|
590
|
|
|
|
|
|
|
[0x10D30, 0x10D39], |
|
591
|
|
|
|
|
|
|
[0x10E80, 0x10EA9], |
|
592
|
|
|
|
|
|
|
[0x10EAB, 0x10EAC], |
|
593
|
|
|
|
|
|
|
[0x10EB0, 0x10EB1], |
|
594
|
|
|
|
|
|
|
[0x10EFD, 0x10F1C], |
|
595
|
|
|
|
|
|
|
[0x10F27, 0x10F27], |
|
596
|
|
|
|
|
|
|
[0x10F30, 0x10F50], |
|
597
|
|
|
|
|
|
|
[0x10F70, 0x10F85], |
|
598
|
|
|
|
|
|
|
[0x10FB0, 0x10FC4], |
|
599
|
|
|
|
|
|
|
[0x10FE0, 0x10FF6], |
|
600
|
|
|
|
|
|
|
[0x11000, 0x11046], |
|
601
|
|
|
|
|
|
|
[0x11066, 0x11075], |
|
602
|
|
|
|
|
|
|
[0x1107F, 0x110BA], |
|
603
|
|
|
|
|
|
|
[0x110C2, 0x110C2], |
|
604
|
|
|
|
|
|
|
[0x110D0, 0x110E8], |
|
605
|
|
|
|
|
|
|
[0x110F0, 0x110F9], |
|
606
|
|
|
|
|
|
|
[0x11100, 0x11134], |
|
607
|
|
|
|
|
|
|
[0x11136, 0x1113F], |
|
608
|
|
|
|
|
|
|
[0x11144, 0x11147], |
|
609
|
|
|
|
|
|
|
[0x11150, 0x11173], |
|
610
|
|
|
|
|
|
|
[0x11176, 0x11176], |
|
611
|
|
|
|
|
|
|
[0x11180, 0x111C4], |
|
612
|
|
|
|
|
|
|
[0x111C9, 0x111CC], |
|
613
|
|
|
|
|
|
|
[0x111CE, 0x111DA], |
|
614
|
|
|
|
|
|
|
[0x111DC, 0x111DC], |
|
615
|
|
|
|
|
|
|
[0x11200, 0x11211], |
|
616
|
|
|
|
|
|
|
[0x11213, 0x11237], |
|
617
|
|
|
|
|
|
|
[0x1123E, 0x11241], |
|
618
|
|
|
|
|
|
|
[0x11280, 0x11286], |
|
619
|
|
|
|
|
|
|
[0x11288, 0x11288], |
|
620
|
|
|
|
|
|
|
[0x1128A, 0x1128D], |
|
621
|
|
|
|
|
|
|
[0x1128F, 0x1129D], |
|
622
|
|
|
|
|
|
|
[0x1129F, 0x112A8], |
|
623
|
|
|
|
|
|
|
[0x112B0, 0x112EA], |
|
624
|
|
|
|
|
|
|
[0x112F0, 0x112F9], |
|
625
|
|
|
|
|
|
|
[0x11300, 0x11303], |
|
626
|
|
|
|
|
|
|
[0x11305, 0x1130C], |
|
627
|
|
|
|
|
|
|
[0x1130F, 0x11310], |
|
628
|
|
|
|
|
|
|
[0x11313, 0x11328], |
|
629
|
|
|
|
|
|
|
[0x1132A, 0x11330], |
|
630
|
|
|
|
|
|
|
[0x11332, 0x11333], |
|
631
|
|
|
|
|
|
|
[0x11335, 0x11339], |
|
632
|
|
|
|
|
|
|
[0x1133B, 0x11344], |
|
633
|
|
|
|
|
|
|
[0x11347, 0x11348], |
|
634
|
|
|
|
|
|
|
[0x1134B, 0x1134D], |
|
635
|
|
|
|
|
|
|
[0x11350, 0x11350], |
|
636
|
|
|
|
|
|
|
[0x11357, 0x11357], |
|
637
|
|
|
|
|
|
|
[0x1135D, 0x11363], |
|
638
|
|
|
|
|
|
|
[0x11366, 0x1136C], |
|
639
|
|
|
|
|
|
|
[0x11370, 0x11374], |
|
640
|
|
|
|
|
|
|
[0x11400, 0x1144A], |
|
641
|
|
|
|
|
|
|
[0x11450, 0x11459], |
|
642
|
|
|
|
|
|
|
[0x1145E, 0x11461], |
|
643
|
|
|
|
|
|
|
[0x11480, 0x114C5], |
|
644
|
|
|
|
|
|
|
[0x114C7, 0x114C7], |
|
645
|
|
|
|
|
|
|
[0x114D0, 0x114D9], |
|
646
|
|
|
|
|
|
|
[0x11580, 0x115B5], |
|
647
|
|
|
|
|
|
|
[0x115B8, 0x115C0], |
|
648
|
|
|
|
|
|
|
[0x115D8, 0x115DD], |
|
649
|
|
|
|
|
|
|
[0x11600, 0x11640], |
|
650
|
|
|
|
|
|
|
[0x11644, 0x11644], |
|
651
|
|
|
|
|
|
|
[0x11650, 0x11659], |
|
652
|
|
|
|
|
|
|
[0x11680, 0x116B8], |
|
653
|
|
|
|
|
|
|
[0x116C0, 0x116C9], |
|
654
|
|
|
|
|
|
|
[0x11700, 0x1171A], |
|
655
|
|
|
|
|
|
|
[0x1171D, 0x1172B], |
|
656
|
|
|
|
|
|
|
[0x11730, 0x11739], |
|
657
|
|
|
|
|
|
|
[0x11740, 0x11746], |
|
658
|
|
|
|
|
|
|
[0x11800, 0x1183A], |
|
659
|
|
|
|
|
|
|
[0x118A0, 0x118E9], |
|
660
|
|
|
|
|
|
|
[0x118FF, 0x11906], |
|
661
|
|
|
|
|
|
|
[0x11909, 0x11909], |
|
662
|
|
|
|
|
|
|
[0x1190C, 0x11913], |
|
663
|
|
|
|
|
|
|
[0x11915, 0x11916], |
|
664
|
|
|
|
|
|
|
[0x11918, 0x11935], |
|
665
|
|
|
|
|
|
|
[0x11937, 0x11938], |
|
666
|
|
|
|
|
|
|
[0x1193B, 0x11943], |
|
667
|
|
|
|
|
|
|
[0x11950, 0x11959], |
|
668
|
|
|
|
|
|
|
[0x119A0, 0x119A7], |
|
669
|
|
|
|
|
|
|
[0x119AA, 0x119D7], |
|
670
|
|
|
|
|
|
|
[0x119DA, 0x119E1], |
|
671
|
|
|
|
|
|
|
[0x119E3, 0x119E4], |
|
672
|
|
|
|
|
|
|
[0x11A00, 0x11A3E], |
|
673
|
|
|
|
|
|
|
[0x11A47, 0x11A47], |
|
674
|
|
|
|
|
|
|
[0x11A50, 0x11A99], |
|
675
|
|
|
|
|
|
|
[0x11A9D, 0x11A9D], |
|
676
|
|
|
|
|
|
|
[0x11AB0, 0x11AF8], |
|
677
|
|
|
|
|
|
|
[0x11C00, 0x11C08], |
|
678
|
|
|
|
|
|
|
[0x11C0A, 0x11C36], |
|
679
|
|
|
|
|
|
|
[0x11C38, 0x11C40], |
|
680
|
|
|
|
|
|
|
[0x11C50, 0x11C59], |
|
681
|
|
|
|
|
|
|
[0x11C72, 0x11C8F], |
|
682
|
|
|
|
|
|
|
[0x11C92, 0x11CA7], |
|
683
|
|
|
|
|
|
|
[0x11CA9, 0x11CB6], |
|
684
|
|
|
|
|
|
|
[0x11D00, 0x11D06], |
|
685
|
|
|
|
|
|
|
[0x11D08, 0x11D09], |
|
686
|
|
|
|
|
|
|
[0x11D0B, 0x11D36], |
|
687
|
|
|
|
|
|
|
[0x11D3A, 0x11D3A], |
|
688
|
|
|
|
|
|
|
[0x11D3C, 0x11D3D], |
|
689
|
|
|
|
|
|
|
[0x11D3F, 0x11D47], |
|
690
|
|
|
|
|
|
|
[0x11D50, 0x11D59], |
|
691
|
|
|
|
|
|
|
[0x11D60, 0x11D65], |
|
692
|
|
|
|
|
|
|
[0x11D67, 0x11D68], |
|
693
|
|
|
|
|
|
|
[0x11D6A, 0x11D8E], |
|
694
|
|
|
|
|
|
|
[0x11D90, 0x11D91], |
|
695
|
|
|
|
|
|
|
[0x11D93, 0x11D98], |
|
696
|
|
|
|
|
|
|
[0x11DA0, 0x11DA9], |
|
697
|
|
|
|
|
|
|
[0x11EE0, 0x11EF6], |
|
698
|
|
|
|
|
|
|
[0x11F00, 0x11F10], |
|
699
|
|
|
|
|
|
|
[0x11F12, 0x11F3A], |
|
700
|
|
|
|
|
|
|
[0x11F3E, 0x11F42], |
|
701
|
|
|
|
|
|
|
[0x11F50, 0x11F59], |
|
702
|
|
|
|
|
|
|
[0x11FB0, 0x11FB0], |
|
703
|
|
|
|
|
|
|
[0x12000, 0x12399], |
|
704
|
|
|
|
|
|
|
[0x12400, 0x1246E], |
|
705
|
|
|
|
|
|
|
[0x12480, 0x12543], |
|
706
|
|
|
|
|
|
|
[0x12F90, 0x12FF0], |
|
707
|
|
|
|
|
|
|
[0x13000, 0x1342F], |
|
708
|
|
|
|
|
|
|
[0x13440, 0x13455], |
|
709
|
|
|
|
|
|
|
[0x14400, 0x14646], |
|
710
|
|
|
|
|
|
|
[0x16800, 0x16A38], |
|
711
|
|
|
|
|
|
|
[0x16A40, 0x16A5E], |
|
712
|
|
|
|
|
|
|
[0x16A60, 0x16A69], |
|
713
|
|
|
|
|
|
|
[0x16A70, 0x16ABE], |
|
714
|
|
|
|
|
|
|
[0x16AC0, 0x16AC9], |
|
715
|
|
|
|
|
|
|
[0x16AD0, 0x16AED], |
|
716
|
|
|
|
|
|
|
[0x16AF0, 0x16AF4], |
|
717
|
|
|
|
|
|
|
[0x16B00, 0x16B36], |
|
718
|
|
|
|
|
|
|
[0x16B40, 0x16B43], |
|
719
|
|
|
|
|
|
|
[0x16B50, 0x16B59], |
|
720
|
|
|
|
|
|
|
[0x16B63, 0x16B77], |
|
721
|
|
|
|
|
|
|
[0x16B7D, 0x16B8F], |
|
722
|
|
|
|
|
|
|
[0x16E40, 0x16E7F], |
|
723
|
|
|
|
|
|
|
[0x16F00, 0x16F4A], |
|
724
|
|
|
|
|
|
|
[0x16F4F, 0x16F87], |
|
725
|
|
|
|
|
|
|
[0x16F8F, 0x16F9F], |
|
726
|
|
|
|
|
|
|
[0x16FE0, 0x16FE1], |
|
727
|
|
|
|
|
|
|
[0x16FE3, 0x16FE4], |
|
728
|
|
|
|
|
|
|
[0x16FF0, 0x16FF1], |
|
729
|
|
|
|
|
|
|
[0x17000, 0x187F7], |
|
730
|
|
|
|
|
|
|
[0x18800, 0x18CD5], |
|
731
|
|
|
|
|
|
|
[0x18D00, 0x18D08], |
|
732
|
|
|
|
|
|
|
[0x1AFF0, 0x1AFF3], |
|
733
|
|
|
|
|
|
|
[0x1AFF5, 0x1AFFB], |
|
734
|
|
|
|
|
|
|
[0x1AFFD, 0x1AFFE], |
|
735
|
|
|
|
|
|
|
[0x1B000, 0x1B122], |
|
736
|
|
|
|
|
|
|
[0x1B132, 0x1B132], |
|
737
|
|
|
|
|
|
|
[0x1B150, 0x1B152], |
|
738
|
|
|
|
|
|
|
[0x1B155, 0x1B155], |
|
739
|
|
|
|
|
|
|
[0x1B164, 0x1B167], |
|
740
|
|
|
|
|
|
|
[0x1B170, 0x1B2FB], |
|
741
|
|
|
|
|
|
|
[0x1BC00, 0x1BC6A], |
|
742
|
|
|
|
|
|
|
[0x1BC70, 0x1BC7C], |
|
743
|
|
|
|
|
|
|
[0x1BC80, 0x1BC88], |
|
744
|
|
|
|
|
|
|
[0x1BC90, 0x1BC99], |
|
745
|
|
|
|
|
|
|
[0x1BC9D, 0x1BC9E], |
|
746
|
|
|
|
|
|
|
[0x1CF00, 0x1CF2D], |
|
747
|
|
|
|
|
|
|
[0x1CF30, 0x1CF46], |
|
748
|
|
|
|
|
|
|
[0x1D165, 0x1D169], |
|
749
|
|
|
|
|
|
|
[0x1D16D, 0x1D172], |
|
750
|
|
|
|
|
|
|
[0x1D17B, 0x1D182], |
|
751
|
|
|
|
|
|
|
[0x1D185, 0x1D18B], |
|
752
|
|
|
|
|
|
|
[0x1D1AA, 0x1D1AD], |
|
753
|
|
|
|
|
|
|
[0x1D242, 0x1D244], |
|
754
|
|
|
|
|
|
|
[0x1D400, 0x1D454], |
|
755
|
|
|
|
|
|
|
[0x1D456, 0x1D49C], |
|
756
|
|
|
|
|
|
|
[0x1D49E, 0x1D49F], |
|
757
|
|
|
|
|
|
|
[0x1D4A2, 0x1D4A2], |
|
758
|
|
|
|
|
|
|
[0x1D4A5, 0x1D4A6], |
|
759
|
|
|
|
|
|
|
[0x1D4A9, 0x1D4AC], |
|
760
|
|
|
|
|
|
|
[0x1D4AE, 0x1D4B9], |
|
761
|
|
|
|
|
|
|
[0x1D4BB, 0x1D4BB], |
|
762
|
|
|
|
|
|
|
[0x1D4BD, 0x1D4C3], |
|
763
|
|
|
|
|
|
|
[0x1D4C5, 0x1D505], |
|
764
|
|
|
|
|
|
|
[0x1D507, 0x1D50A], |
|
765
|
|
|
|
|
|
|
[0x1D50D, 0x1D514], |
|
766
|
|
|
|
|
|
|
[0x1D516, 0x1D51C], |
|
767
|
|
|
|
|
|
|
[0x1D51E, 0x1D539], |
|
768
|
|
|
|
|
|
|
[0x1D53B, 0x1D53E], |
|
769
|
|
|
|
|
|
|
[0x1D540, 0x1D544], |
|
770
|
|
|
|
|
|
|
[0x1D546, 0x1D546], |
|
771
|
|
|
|
|
|
|
[0x1D54A, 0x1D550], |
|
772
|
|
|
|
|
|
|
[0x1D552, 0x1D6A5], |
|
773
|
|
|
|
|
|
|
[0x1D6A8, 0x1D6C0], |
|
774
|
|
|
|
|
|
|
[0x1D6C2, 0x1D6DA], |
|
775
|
|
|
|
|
|
|
[0x1D6DC, 0x1D6FA], |
|
776
|
|
|
|
|
|
|
[0x1D6FC, 0x1D714], |
|
777
|
|
|
|
|
|
|
[0x1D716, 0x1D734], |
|
778
|
|
|
|
|
|
|
[0x1D736, 0x1D74E], |
|
779
|
|
|
|
|
|
|
[0x1D750, 0x1D76E], |
|
780
|
|
|
|
|
|
|
[0x1D770, 0x1D788], |
|
781
|
|
|
|
|
|
|
[0x1D78A, 0x1D7A8], |
|
782
|
|
|
|
|
|
|
[0x1D7AA, 0x1D7C2], |
|
783
|
|
|
|
|
|
|
[0x1D7C4, 0x1D7CB], |
|
784
|
|
|
|
|
|
|
[0x1D7CE, 0x1D7FF], |
|
785
|
|
|
|
|
|
|
[0x1DA00, 0x1DA36], |
|
786
|
|
|
|
|
|
|
[0x1DA3B, 0x1DA6C], |
|
787
|
|
|
|
|
|
|
[0x1DA75, 0x1DA75], |
|
788
|
|
|
|
|
|
|
[0x1DA84, 0x1DA84], |
|
789
|
|
|
|
|
|
|
[0x1DA9B, 0x1DA9F], |
|
790
|
|
|
|
|
|
|
[0x1DAA1, 0x1DAAF], |
|
791
|
|
|
|
|
|
|
[0x1DF00, 0x1DF1E], |
|
792
|
|
|
|
|
|
|
[0x1DF25, 0x1DF2A], |
|
793
|
|
|
|
|
|
|
[0x1E000, 0x1E006], |
|
794
|
|
|
|
|
|
|
[0x1E008, 0x1E018], |
|
795
|
|
|
|
|
|
|
[0x1E01B, 0x1E021], |
|
796
|
|
|
|
|
|
|
[0x1E023, 0x1E024], |
|
797
|
|
|
|
|
|
|
[0x1E026, 0x1E02A], |
|
798
|
|
|
|
|
|
|
[0x1E030, 0x1E06D], |
|
799
|
|
|
|
|
|
|
[0x1E08F, 0x1E08F], |
|
800
|
|
|
|
|
|
|
[0x1E100, 0x1E12C], |
|
801
|
|
|
|
|
|
|
[0x1E130, 0x1E13D], |
|
802
|
|
|
|
|
|
|
[0x1E140, 0x1E149], |
|
803
|
|
|
|
|
|
|
[0x1E14E, 0x1E14E], |
|
804
|
|
|
|
|
|
|
[0x1E290, 0x1E2AE], |
|
805
|
|
|
|
|
|
|
[0x1E2C0, 0x1E2F9], |
|
806
|
|
|
|
|
|
|
[0x1E4D0, 0x1E4F9], |
|
807
|
|
|
|
|
|
|
[0x1E7E0, 0x1E7E6], |
|
808
|
|
|
|
|
|
|
[0x1E7E8, 0x1E7EB], |
|
809
|
|
|
|
|
|
|
[0x1E7ED, 0x1E7EE], |
|
810
|
|
|
|
|
|
|
[0x1E7F0, 0x1E7FE], |
|
811
|
|
|
|
|
|
|
[0x1E800, 0x1E8C4], |
|
812
|
|
|
|
|
|
|
[0x1E8D0, 0x1E8D6], |
|
813
|
|
|
|
|
|
|
[0x1E900, 0x1E94B], |
|
814
|
|
|
|
|
|
|
[0x1E950, 0x1E959], |
|
815
|
|
|
|
|
|
|
[0x1EE00, 0x1EE03], |
|
816
|
|
|
|
|
|
|
[0x1EE05, 0x1EE1F], |
|
817
|
|
|
|
|
|
|
[0x1EE21, 0x1EE22], |
|
818
|
|
|
|
|
|
|
[0x1EE24, 0x1EE24], |
|
819
|
|
|
|
|
|
|
[0x1EE27, 0x1EE27], |
|
820
|
|
|
|
|
|
|
[0x1EE29, 0x1EE32], |
|
821
|
|
|
|
|
|
|
[0x1EE34, 0x1EE37], |
|
822
|
|
|
|
|
|
|
[0x1EE39, 0x1EE39], |
|
823
|
|
|
|
|
|
|
[0x1EE3B, 0x1EE3B], |
|
824
|
|
|
|
|
|
|
[0x1EE42, 0x1EE42], |
|
825
|
|
|
|
|
|
|
[0x1EE47, 0x1EE47], |
|
826
|
|
|
|
|
|
|
[0x1EE49, 0x1EE49], |
|
827
|
|
|
|
|
|
|
[0x1EE4B, 0x1EE4B], |
|
828
|
|
|
|
|
|
|
[0x1EE4D, 0x1EE4F], |
|
829
|
|
|
|
|
|
|
[0x1EE51, 0x1EE52], |
|
830
|
|
|
|
|
|
|
[0x1EE54, 0x1EE54], |
|
831
|
|
|
|
|
|
|
[0x1EE57, 0x1EE57], |
|
832
|
|
|
|
|
|
|
[0x1EE59, 0x1EE59], |
|
833
|
|
|
|
|
|
|
[0x1EE5B, 0x1EE5B], |
|
834
|
|
|
|
|
|
|
[0x1EE5D, 0x1EE5D], |
|
835
|
|
|
|
|
|
|
[0x1EE5F, 0x1EE5F], |
|
836
|
|
|
|
|
|
|
[0x1EE61, 0x1EE62], |
|
837
|
|
|
|
|
|
|
[0x1EE64, 0x1EE64], |
|
838
|
|
|
|
|
|
|
[0x1EE67, 0x1EE6A], |
|
839
|
|
|
|
|
|
|
[0x1EE6C, 0x1EE72], |
|
840
|
|
|
|
|
|
|
[0x1EE74, 0x1EE77], |
|
841
|
|
|
|
|
|
|
[0x1EE79, 0x1EE7C], |
|
842
|
|
|
|
|
|
|
[0x1EE7E, 0x1EE7E], |
|
843
|
|
|
|
|
|
|
[0x1EE80, 0x1EE89], |
|
844
|
|
|
|
|
|
|
[0x1EE8B, 0x1EE9B], |
|
845
|
|
|
|
|
|
|
[0x1EEA1, 0x1EEA3], |
|
846
|
|
|
|
|
|
|
[0x1EEA5, 0x1EEA9], |
|
847
|
|
|
|
|
|
|
[0x1EEAB, 0x1EEBB], |
|
848
|
|
|
|
|
|
|
[0x1FBF0, 0x1FBF9], |
|
849
|
|
|
|
|
|
|
[0x20000, 0x2A6DF], |
|
850
|
|
|
|
|
|
|
[0x2A700, 0x2B739], |
|
851
|
|
|
|
|
|
|
[0x2B740, 0x2B81D], |
|
852
|
|
|
|
|
|
|
[0x2B820, 0x2CEA1], |
|
853
|
|
|
|
|
|
|
[0x2CEB0, 0x2EBE0], |
|
854
|
|
|
|
|
|
|
[0x2EBF0, 0x2EE5D], |
|
855
|
|
|
|
|
|
|
[0x2F800, 0x2FA1D], |
|
856
|
|
|
|
|
|
|
[0x30000, 0x3134A], |
|
857
|
|
|
|
|
|
|
[0x31350, 0x323AF],); |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
# Binary-search the frozen XID_Continue range table. |
|
860
|
|
|
|
|
|
|
# Caller is responsible for the ASCII fast path; this is the slow path |
|
861
|
|
|
|
|
|
|
# for non-ASCII codepoints in identifier-character checks. |
|
862
|
|
|
|
|
|
|
sub _is_xid_continue { |
|
863
|
0
|
|
|
0
|
|
0
|
my ($cp) = @_; |
|
864
|
0
|
|
|
|
|
0
|
my $lo = 0; |
|
865
|
0
|
|
|
|
|
0
|
my $hi = $#XID_CONTINUE_RANGES; |
|
866
|
0
|
|
|
|
|
0
|
while ($lo <= $hi) { |
|
867
|
0
|
|
|
|
|
0
|
my $mid = ($lo + $hi) >> 1; |
|
868
|
0
|
|
|
|
|
0
|
my $r = $XID_CONTINUE_RANGES[$mid]; |
|
869
|
0
|
0
|
|
|
|
0
|
if ($cp < $r->[0]) { $hi = $mid - 1 } |
|
|
0
|
0
|
|
|
|
0
|
|
|
870
|
0
|
|
|
|
|
0
|
elsif ($cp > $r->[1]) { $lo = $mid + 1 } |
|
871
|
0
|
|
|
|
|
0
|
else { return 1 } |
|
872
|
|
|
|
|
|
|
} |
|
873
|
0
|
|
|
|
|
0
|
return 0; |
|
874
|
|
|
|
|
|
|
} |
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
sub new_table { |
|
877
|
52
|
|
|
52
|
0
|
1900
|
require Tie::IxHash; |
|
878
|
52
|
|
|
|
|
11827
|
tie my %h, 'Tie::IxHash'; |
|
879
|
52
|
|
|
|
|
1011
|
return \%h; |
|
880
|
|
|
|
|
|
|
} |
|
881
|
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
# SPEC §"Unordered tables". Plain hashref blessed into DMS::Parser::UnorderedTable. |
|
883
|
|
|
|
|
|
|
# Used by parse_table_block / parse_flow_table / parse_list_item_value when |
|
884
|
|
|
|
|
|
|
# the parser was constructed with ignore_order=true. Iteration order over |
|
885
|
|
|
|
|
|
|
# the resulting table is arbitrary (Perl hash randomization). |
|
886
|
|
|
|
|
|
|
sub new_unordered_table { |
|
887
|
0
|
|
|
0
|
0
|
0
|
my %h; |
|
888
|
0
|
|
|
|
|
0
|
return bless \%h, 'DMS::Parser::UnorderedTable'; |
|
889
|
|
|
|
|
|
|
} |
|
890
|
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
# Like new_table, but returns a plain (non-tied) hashref plus an |
|
892
|
|
|
|
|
|
|
# external "insertion-order keys" arrayref. Used in the lite/encoder |
|
893
|
|
|
|
|
|
|
# fast path where we don't need the round-trip Document tree — just |
|
894
|
|
|
|
|
|
|
# enough structure for JSON emission. The encoder special-cases this |
|
895
|
|
|
|
|
|
|
# shape (HASH refs accompanied by an "$order" arrayref). |
|
896
|
|
|
|
|
|
|
# |
|
897
|
|
|
|
|
|
|
# We tag the plain hashref with a hidden key holding the keys arrayref |
|
898
|
|
|
|
|
|
|
# so it round-trips through `$t->{$k} = $v` access patterns: the |
|
899
|
|
|
|
|
|
|
# encoder extracts the order list from `$t->{"\0_keys"}` if present and |
|
900
|
|
|
|
|
|
|
# falls back to `keys %$t` otherwise. |
|
901
|
|
|
|
|
|
|
our $ORDER_KEY = "\0_keys"; |
|
902
|
|
|
|
|
|
|
sub new_ordered_table { |
|
903
|
0
|
|
|
0
|
0
|
0
|
return { $ORDER_KEY => [] }; |
|
904
|
|
|
|
|
|
|
} |
|
905
|
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
# Per-indent compiled bulk regex cache for parse_table_block's fast |
|
907
|
|
|
|
|
|
|
# path. Common indent levels (0, 2, 4, 6, ...) compile once globally; |
|
908
|
|
|
|
|
|
|
# subsequent encounters reuse the qr// from this cache instead of |
|
909
|
|
|
|
|
|
|
# recompiling per parse_table_block call. |
|
910
|
|
|
|
|
|
|
our %BULK_RE_CACHE; |
|
911
|
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
sub _err { |
|
913
|
0
|
|
|
0
|
|
0
|
my ($self, $msg) = @_; |
|
914
|
0
|
|
|
|
|
0
|
return "$self->{line}:" . ($self->{pos} - $self->{line_start} + 1) . ": $msg\n"; |
|
915
|
|
|
|
|
|
|
} |
|
916
|
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
sub _err_at { |
|
918
|
4
|
|
|
4
|
|
40
|
my ($self, $line, $line_start, $pos, $msg) = @_; |
|
919
|
4
|
|
|
|
|
20
|
my $col = $pos - $line_start + 1; |
|
920
|
4
|
|
|
|
|
104
|
return "$line:$col: $msg\n"; |
|
921
|
|
|
|
|
|
|
} |
|
922
|
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
sub _die_at { |
|
924
|
0
|
|
|
0
|
|
0
|
my ($self, $line, $line_start, $pos, $msg) = @_; |
|
925
|
0
|
|
|
|
|
0
|
die "$line:" . ($pos - $line_start + 1) . ": $msg\n"; |
|
926
|
|
|
|
|
|
|
} |
|
927
|
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
sub _die { |
|
929
|
0
|
|
|
0
|
|
0
|
my ($self, $msg) = @_; |
|
930
|
0
|
|
|
|
|
0
|
die _err($self, $msg); |
|
931
|
|
|
|
|
|
|
} |
|
932
|
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
# SPEC §Decode/Encode (v0.14): canonical entry point. Returns the body |
|
934
|
|
|
|
|
|
|
# only — meta and comments are dropped. Use decode_document() to keep |
|
935
|
|
|
|
|
|
|
# them. |
|
936
|
|
|
|
|
|
|
sub decode { |
|
937
|
2
|
|
|
2
|
1
|
6919
|
my ($src) = @_; |
|
938
|
2
|
|
|
|
|
7
|
my $doc = decode_document($src); |
|
939
|
2
|
|
|
|
|
11
|
return $doc->{body}; |
|
940
|
|
|
|
|
|
|
} |
|
941
|
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
# Deprecated alias for decode(). Removed in the next release. |
|
943
|
|
|
|
|
|
|
# SPEC §Decode/Encode — Migration from the parse/to_dms era. |
|
944
|
|
|
|
|
|
|
{ my $warned; |
|
945
|
|
|
|
|
|
|
sub parse { |
|
946
|
1
|
50
|
|
1
|
0
|
11
|
unless ($warned++) { |
|
947
|
1
|
|
|
|
|
207
|
Carp::carp( |
|
948
|
|
|
|
|
|
|
'DMS::Parser::parse() is deprecated; use decode() instead. ' |
|
949
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse() to decode().'); |
|
950
|
|
|
|
|
|
|
} |
|
951
|
1
|
|
|
|
|
10
|
goto &decode; |
|
952
|
|
|
|
|
|
|
} |
|
953
|
|
|
|
|
|
|
} |
|
954
|
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
# SPEC §Parsing modes — full and lite. Lite-mode equivalents return |
|
956
|
|
|
|
|
|
|
# a Document with empty comments + original_forms. |
|
957
|
|
|
|
|
|
|
sub decode_lite { |
|
958
|
0
|
|
|
0
|
1
|
0
|
my ($src) = @_; |
|
959
|
0
|
|
|
|
|
0
|
return decode_lite_document($src)->{body}; |
|
960
|
|
|
|
|
|
|
} |
|
961
|
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
# Deprecated alias for decode_lite(). Removed in the next release. |
|
963
|
|
|
|
|
|
|
{ my $warned; |
|
964
|
|
|
|
|
|
|
sub parse_lite { |
|
965
|
0
|
0
|
|
0
|
0
|
0
|
unless ($warned++) { |
|
966
|
0
|
|
|
|
|
0
|
Carp::carp( |
|
967
|
|
|
|
|
|
|
'DMS::Parser::parse_lite() is deprecated; use decode_lite() instead. ' |
|
968
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse_lite() to decode_lite().'); |
|
969
|
|
|
|
|
|
|
} |
|
970
|
0
|
|
|
|
|
0
|
goto &decode_lite; |
|
971
|
|
|
|
|
|
|
} |
|
972
|
|
|
|
|
|
|
} |
|
973
|
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
sub decode_lite_document { |
|
975
|
0
|
|
|
0
|
1
|
0
|
my ($src) = @_; |
|
976
|
|
|
|
|
|
|
# Fast path: try the iterative mega-regex tokenizer for the simple |
|
977
|
|
|
|
|
|
|
# subset (bareword keys, simple scalar values, nested-table headers, |
|
978
|
|
|
|
|
|
|
# blank lines, single-line `#` / `//` comments, ASCII-only). Returns |
|
979
|
|
|
|
|
|
|
# undef on anything it can't handle; we fall back to the full |
|
980
|
|
|
|
|
|
|
# recursive-descent parser below. The fast path avoids the |
|
981
|
|
|
|
|
|
|
# parse_table_block recursion and the parse_kvpair / _skip_trivia |
|
982
|
|
|
|
|
|
|
# method-dispatch frames — pure-Perl method calls are the main |
|
983
|
|
|
|
|
|
|
# cost on flat configs (kube values.yaml-shaped). |
|
984
|
0
|
|
|
|
|
0
|
my $fast = _parse_lite_document_fast(\$src); |
|
985
|
0
|
0
|
|
|
|
0
|
return $fast if defined $fast; |
|
986
|
0
|
|
|
|
|
0
|
return _parse_document_with_mode($src, 1); |
|
987
|
|
|
|
|
|
|
} |
|
988
|
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
# Iterative mega-regex tokenizer + manual stack assembly for the lite |
|
990
|
|
|
|
|
|
|
# subset. Returns a Document hashref { meta, body, comments, |
|
991
|
|
|
|
|
|
|
# original_forms } on success, or undef on any pattern the fast path |
|
992
|
|
|
|
|
|
|
# can't handle (front matter, heredocs, list items, escaped strings, |
|
993
|
|
|
|
|
|
|
# block comments, complex-key flow forms, non-ASCII strings). |
|
994
|
|
|
|
|
|
|
# Caller's $src isn't modified; we do a pos() scan via a scalar ref. |
|
995
|
|
|
|
|
|
|
sub _parse_lite_document_fast { |
|
996
|
0
|
|
|
0
|
|
0
|
my ($src_ref) = @_; |
|
997
|
0
|
|
|
|
|
0
|
my $len = length($$src_ref); |
|
998
|
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
# Pre-flight rejection: a single regex over the whole src checks |
|
1000
|
|
|
|
|
|
|
# for any opener of a construct the fast path doesn't handle. If |
|
1001
|
|
|
|
|
|
|
# any are present, return undef immediately so the slow parser |
|
1002
|
|
|
|
|
|
|
# gets the (correct) full-spec implementation. This costs one |
|
1003
|
|
|
|
|
|
|
# linear scan but it's bounded by the C regex engine and faster |
|
1004
|
|
|
|
|
|
|
# than discovering mid-parse that we can't handle something. The |
|
1005
|
|
|
|
|
|
|
# negative lookaheads for `(?!##)` exclude `### LABEL` block |
|
1006
|
|
|
|
|
|
|
# comment openers. We deliberately allow `\u`/`\U` Unicode escapes |
|
1007
|
|
|
|
|
|
|
# but reject other backslash escapes (would need decoding). |
|
1008
|
0
|
0
|
|
|
|
0
|
return undef if $$src_ref =~ /"""|'''/; # heredocs |
|
1009
|
0
|
0
|
|
|
|
0
|
return undef if $$src_ref =~ /^[ \t]*\+/m; # list items / front matter |
|
1010
|
0
|
0
|
|
|
|
0
|
return undef if $$src_ref =~ /^[ \t]*###/m; # block comment opener |
|
1011
|
|
|
|
|
|
|
# Note: we don't pre-reject `/*` because real configs sometimes |
|
1012
|
|
|
|
|
|
|
# have it inside string literals (e.g., `path: "/etc/foo/*.tmpl"`). |
|
1013
|
|
|
|
|
|
|
# The mega-regex handles strings as a unit so `/*` is consumed |
|
1014
|
|
|
|
|
|
|
# inside the string capture; the inner loop will only fail on a |
|
1015
|
|
|
|
|
|
|
# `/*` that appears at line-start as an actual block comment, in |
|
1016
|
|
|
|
|
|
|
# which case it correctly bails to the slow parser. |
|
1017
|
|
|
|
|
|
|
# Backslash escapes ARE handled in the string regex below — the |
|
1018
|
|
|
|
|
|
|
# decode pass after match handles \", \\, \n, \r, \t, \b, \f, \uXXXX, |
|
1019
|
|
|
|
|
|
|
# \UXXXXXXXX. Anything else with `\` is still rejected. |
|
1020
|
|
|
|
|
|
|
# Quoted keys, inline tables/lists with content, dates, hex/oct/bin |
|
1021
|
|
|
|
|
|
|
# numbers — all opt out by being matched as "didn't match the |
|
1022
|
|
|
|
|
|
|
# mega-regex" inside the loop; the loop returns undef on the |
|
1023
|
|
|
|
|
|
|
# first non-match. |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
# Manual stack of containers, with the indent at which their |
|
1026
|
|
|
|
|
|
|
# children live. The root has child_indent 0. When a header |
|
1027
|
|
|
|
|
|
|
# (`key:\n`) arrives, we push the new child table with |
|
1028
|
|
|
|
|
|
|
# child_indent=undef ("TBD — set by first child"). The pop rule |
|
1029
|
|
|
|
|
|
|
# is "while top's child_indent > current indent, pop"; this |
|
1030
|
|
|
|
|
|
|
# closes nested blocks as the indent decreases. |
|
1031
|
0
|
|
|
|
|
0
|
my $root = { $ORDER_KEY => [] }; |
|
1032
|
0
|
|
|
|
|
0
|
my @stack = ({ c => $root, ci => 0 }); |
|
1033
|
|
|
|
|
|
|
|
|
1034
|
0
|
|
|
|
|
0
|
pos($$src_ref) = 0; |
|
1035
|
0
|
|
|
|
|
0
|
LINE: while (pos($$src_ref) < $len) { |
|
1036
|
|
|
|
|
|
|
# Blank line — most common after kvpairs. |
|
1037
|
0
|
0
|
|
|
|
0
|
if ($$src_ref =~ /\G[ \t]*\r?\n/gc) { next LINE; } |
|
|
0
|
|
|
|
|
0
|
|
|
1038
|
|
|
|
|
|
|
# Single-line `#` or `//` comment (block forms `###` / `/*` |
|
1039
|
|
|
|
|
|
|
# already pre-rejected). |
|
1040
|
0
|
0
|
|
|
|
0
|
if ($$src_ref =~ /\G[ \t]*(?:#|\/\/)[^\n\r]*\r?\n/gc) { next LINE; } |
|
|
0
|
|
|
|
|
0
|
|
|
1041
|
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
# Mega-match: kvpair with simple value OR nested-block header. |
|
1043
|
|
|
|
|
|
|
# Group layout (capturing only — non-capturing alternations |
|
1044
|
|
|
|
|
|
|
# don't shift indices): |
|
1045
|
|
|
|
|
|
|
# $1 = indent (spaces) |
|
1046
|
|
|
|
|
|
|
# $2 = bareword key |
|
1047
|
|
|
|
|
|
|
# $3 = positive integer |
|
1048
|
|
|
|
|
|
|
# $4 = negative integer |
|
1049
|
|
|
|
|
|
|
# $5 = bool ('true'/'false') |
|
1050
|
|
|
|
|
|
|
# $6 = basic-string content (escapes allowed; decoded after) |
|
1051
|
|
|
|
|
|
|
# $7 = empty list / empty table literal ('[]' or '{}') |
|
1052
|
|
|
|
|
|
|
# $8 = decimal float (no exponent) |
|
1053
|
|
|
|
|
|
|
# $9 = flow-list-of-simple-values content (between [ and ], |
|
1054
|
|
|
|
|
|
|
# no nested brackets, no embedded newlines) |
|
1055
|
|
|
|
|
|
|
# $10 = flow-table-of-simple-kvpairs content |
|
1056
|
|
|
|
|
|
|
# $11 = trailing '\r?\n' for the no-value (header) form |
|
1057
|
0
|
0
|
|
|
|
0
|
if ($$src_ref =~ /\G([ ]*)([A-Za-z_][A-Za-z0-9_-]*):(?:[ ](?:(0|[1-9][0-9]{0,17})|(-[1-9][0-9]{0,17})|(true|false)|"((?:[\x20\x21\x23-\x5b\x5d-\x7e]|\\(?:["\\nrtbf]|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}))*)"|(\[\]|\{\})|(-?(?:0|[1-9][0-9]*)\.[0-9]+)|\[([^\[\]\n\r]+)\]|\{([^\{\}\n\r]+)\})\r?\n|(\r?\n))/gc) { |
|
1058
|
0
|
|
|
|
|
0
|
my $ind = length($1); |
|
1059
|
0
|
|
|
|
|
0
|
my $key = $2; |
|
1060
|
|
|
|
|
|
|
# Pop closed levels (top's children no longer reachable). |
|
1061
|
0
|
|
0
|
|
|
0
|
while (@stack > 1 && defined($stack[-1]{ci}) && $stack[-1]{ci} > $ind) { |
|
|
|
|
0
|
|
|
|
|
|
1062
|
0
|
|
|
|
|
0
|
pop @stack; |
|
1063
|
|
|
|
|
|
|
} |
|
1064
|
|
|
|
|
|
|
# First child of a header? Its indent fixes the parent's |
|
1065
|
|
|
|
|
|
|
# child_indent. |
|
1066
|
0
|
0
|
|
|
|
0
|
if (!defined $stack[-1]{ci}) { |
|
1067
|
|
|
|
|
|
|
# The new indent must be strictly greater than the |
|
1068
|
|
|
|
|
|
|
# header's parent's child_indent (i.e., we descended). |
|
1069
|
0
|
0
|
0
|
|
|
0
|
if (@stack >= 2 && $stack[-2]{ci} >= $ind) { return undef; } |
|
|
0
|
|
|
|
|
0
|
|
|
1070
|
0
|
|
|
|
|
0
|
$stack[-1]{ci} = $ind; |
|
1071
|
|
|
|
|
|
|
} |
|
1072
|
|
|
|
|
|
|
# Indent must match the current container's expected level. |
|
1073
|
0
|
0
|
|
|
|
0
|
return undef if $stack[-1]{ci} != $ind; |
|
1074
|
0
|
|
|
|
|
0
|
my $container = $stack[-1]{c}; |
|
1075
|
0
|
0
|
|
|
|
0
|
return undef if exists $container->{$key}; # duplicate key |
|
1076
|
|
|
|
|
|
|
|
|
1077
|
0
|
0
|
|
|
|
0
|
if (defined $11) { |
|
1078
|
|
|
|
|
|
|
# Nested-block header: child is a table (the fast path |
|
1079
|
|
|
|
|
|
|
# doesn't handle child-as-list; pre-flight rejects any |
|
1080
|
|
|
|
|
|
|
# `+` so this is safe). |
|
1081
|
0
|
|
|
|
|
0
|
my $child = { $ORDER_KEY => [] }; |
|
1082
|
0
|
|
|
|
|
0
|
push @{$container->{$ORDER_KEY}}, $key; |
|
|
0
|
|
|
|
|
0
|
|
|
1083
|
0
|
|
|
|
|
0
|
$container->{$key} = $child; |
|
1084
|
0
|
|
|
|
|
0
|
push @stack, { c => $child, ci => undef }; |
|
1085
|
|
|
|
|
|
|
} else { |
|
1086
|
0
|
|
|
|
|
0
|
my $val; |
|
1087
|
0
|
0
|
|
|
|
0
|
if (defined $3) { my $iv = 0+$3; $val = bless \$iv, 'DMS::Parser::Integer'; } |
|
|
0
|
0
|
|
|
|
0
|
|
|
|
0
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1088
|
0
|
|
|
|
|
0
|
elsif (defined $4) { my $iv = 0+$4; $val = bless \$iv, 'DMS::Parser::Integer'; } |
|
|
0
|
|
|
|
|
0
|
|
|
1089
|
0
|
0
|
|
|
|
0
|
elsif (defined $5) { my $bv = $5 eq 'true' ? 1 : 0; $val = bless \$bv, 'DMS::Parser::Bool'; } |
|
|
0
|
|
|
|
|
0
|
|
|
1090
|
|
|
|
|
|
|
elsif (defined $6) { |
|
1091
|
0
|
|
|
|
|
0
|
$val = $6; |
|
1092
|
|
|
|
|
|
|
# Decode escapes only if any backslash present (the |
|
1093
|
|
|
|
|
|
|
# common case is clean ASCII strings — skip the |
|
1094
|
|
|
|
|
|
|
# substitution then). |
|
1095
|
0
|
0
|
|
|
|
0
|
if (index($val, '\\') >= 0) { |
|
1096
|
0
|
|
|
|
|
0
|
$val =~ s{\\(["\\nrtbf])}{ |
|
1097
|
0
|
0
|
|
|
|
0
|
$1 eq 'n' ? "\n" |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
: $1 eq 't' ? "\t" |
|
1099
|
|
|
|
|
|
|
: $1 eq 'r' ? "\r" |
|
1100
|
|
|
|
|
|
|
: $1 eq 'b' ? "\b" |
|
1101
|
|
|
|
|
|
|
: $1 eq 'f' ? "\f" |
|
1102
|
|
|
|
|
|
|
: $1 |
|
1103
|
|
|
|
|
|
|
}ge; |
|
1104
|
0
|
|
|
|
|
0
|
$val =~ s{\\u([0-9A-Fa-f]{4})}{chr(hex($1))}ge; |
|
|
0
|
|
|
|
|
0
|
|
|
1105
|
0
|
|
|
|
|
0
|
$val =~ s{\\U([0-9A-Fa-f]{8})}{chr(hex($1))}ge; |
|
|
0
|
|
|
|
|
0
|
|
|
1106
|
|
|
|
|
|
|
} |
|
1107
|
|
|
|
|
|
|
} |
|
1108
|
0
|
0
|
|
|
|
0
|
elsif (defined $7) { $val = $7 eq '[]' ? [] : { $ORDER_KEY => [] }; } |
|
1109
|
0
|
|
|
|
|
0
|
elsif (defined $8) { my $fv = 0+$8; $val = bless \$fv, 'DMS::Parser::Float'; } |
|
|
0
|
|
|
|
|
0
|
|
|
1110
|
|
|
|
|
|
|
elsif (defined $9) { |
|
1111
|
|
|
|
|
|
|
# Flow-list with content (no nested brackets / newlines). |
|
1112
|
|
|
|
|
|
|
# Try to parse inner content as a comma-separated |
|
1113
|
|
|
|
|
|
|
# list of simple values. Bail to slow path if any |
|
1114
|
|
|
|
|
|
|
# value isn't a clean leaf. |
|
1115
|
0
|
|
|
|
|
0
|
my $inner = $9; |
|
1116
|
0
|
|
|
|
|
0
|
my @items; |
|
1117
|
|
|
|
|
|
|
# Split by ',' but allow whitespace. |
|
1118
|
0
|
|
|
|
|
0
|
for my $raw (split /\s*,\s*/, $inner) { |
|
1119
|
0
|
0
|
|
|
|
0
|
next if $raw eq ''; # trailing comma → empty trailing field |
|
1120
|
0
|
0
|
|
|
|
0
|
if ($raw =~ /^(0|[1-9][0-9]{0,17})$/) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1121
|
0
|
|
|
|
|
0
|
my $iv = 0+$raw; push @items, bless \$iv, 'DMS::Parser::Integer'; |
|
|
0
|
|
|
|
|
0
|
|
|
1122
|
|
|
|
|
|
|
} elsif ($raw =~ /^-[1-9][0-9]{0,17}$/) { |
|
1123
|
0
|
|
|
|
|
0
|
my $iv = 0+$raw; push @items, bless \$iv, 'DMS::Parser::Integer'; |
|
|
0
|
|
|
|
|
0
|
|
|
1124
|
|
|
|
|
|
|
} elsif ($raw eq 'true') { |
|
1125
|
0
|
|
|
|
|
0
|
my $bv = 1; push @items, bless \$bv, 'DMS::Parser::Bool'; |
|
|
0
|
|
|
|
|
0
|
|
|
1126
|
|
|
|
|
|
|
} elsif ($raw eq 'false') { |
|
1127
|
0
|
|
|
|
|
0
|
my $bv = 0; push @items, bless \$bv, 'DMS::Parser::Bool'; |
|
|
0
|
|
|
|
|
0
|
|
|
1128
|
|
|
|
|
|
|
} elsif ($raw =~ /^"([\x20\x21\x23-\x5b\x5d-\x7e]*)"$/) { |
|
1129
|
0
|
|
|
|
|
0
|
push @items, $1; # ASCII-clean string, no escapes |
|
1130
|
|
|
|
|
|
|
} else { |
|
1131
|
0
|
|
|
|
|
0
|
return undef; # complex flow content — fall back |
|
1132
|
|
|
|
|
|
|
} |
|
1133
|
|
|
|
|
|
|
} |
|
1134
|
0
|
|
|
|
|
0
|
$val = \@items; |
|
1135
|
|
|
|
|
|
|
} |
|
1136
|
|
|
|
|
|
|
else { |
|
1137
|
|
|
|
|
|
|
# $10: flow-table — fall back for now (rare; would |
|
1138
|
|
|
|
|
|
|
# need split on `,` and inner `key: value` parsing). |
|
1139
|
0
|
|
|
|
|
0
|
return undef; |
|
1140
|
|
|
|
|
|
|
} |
|
1141
|
0
|
|
|
|
|
0
|
push @{$container->{$ORDER_KEY}}, $key; |
|
|
0
|
|
|
|
|
0
|
|
|
1142
|
0
|
|
|
|
|
0
|
$container->{$key} = $val; |
|
1143
|
|
|
|
|
|
|
} |
|
1144
|
0
|
|
|
|
|
0
|
next LINE; |
|
1145
|
|
|
|
|
|
|
} |
|
1146
|
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
# Anything else: bail out, slow parser owns it. |
|
1148
|
0
|
|
|
|
|
0
|
return undef; |
|
1149
|
|
|
|
|
|
|
} |
|
1150
|
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
# Successful walk — return Document shape. comments/original_forms |
|
1152
|
|
|
|
|
|
|
# are empty by definition (we rejected anything that could carry |
|
1153
|
|
|
|
|
|
|
# them). |
|
1154
|
|
|
|
|
|
|
return { |
|
1155
|
0
|
|
|
|
|
0
|
meta => undef, |
|
1156
|
|
|
|
|
|
|
body => $root, |
|
1157
|
|
|
|
|
|
|
comments => [], |
|
1158
|
|
|
|
|
|
|
original_forms => [], |
|
1159
|
|
|
|
|
|
|
}; |
|
1160
|
|
|
|
|
|
|
} |
|
1161
|
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
# SPEC §"Unordered tables" — opt-in. Body tables are produced as |
|
1163
|
|
|
|
|
|
|
# DMS::Parser::UnorderedTable (plain Perl hashes, no insertion-order tracking). |
|
1164
|
|
|
|
|
|
|
# Front matter remains ordered (per spec — meta is small and used by |
|
1165
|
|
|
|
|
|
|
# tooling that benefits from stable order). Documents from these entry |
|
1166
|
|
|
|
|
|
|
# points cannot round-trip via `encode` (full mode); use `encode_lite` |
|
1167
|
|
|
|
|
|
|
# instead. `decode_document_unordered` is full-mode (comments AST + |
|
1168
|
|
|
|
|
|
|
# original_forms still recorded); `decode_lite_document_unordered` is |
|
1169
|
|
|
|
|
|
|
# the (unordered, lite) combo — fastest read-only path. |
|
1170
|
|
|
|
|
|
|
sub decode_document_unordered { |
|
1171
|
0
|
|
|
0
|
0
|
0
|
my ($src) = @_; |
|
1172
|
0
|
|
|
|
|
0
|
return _parse_document_with_mode($src, 0, 1); |
|
1173
|
|
|
|
|
|
|
} |
|
1174
|
|
|
|
|
|
|
sub decode_lite_document_unordered { |
|
1175
|
0
|
|
|
0
|
0
|
0
|
my ($src) = @_; |
|
1176
|
0
|
|
|
|
|
0
|
return _parse_document_with_mode($src, 1, 1); |
|
1177
|
|
|
|
|
|
|
} |
|
1178
|
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
# Deprecated aliases. Removed in the next release. |
|
1180
|
|
|
|
|
|
|
{ my $warned; |
|
1181
|
|
|
|
|
|
|
sub parse_document_unordered { |
|
1182
|
0
|
0
|
|
0
|
0
|
0
|
unless ($warned++) { |
|
1183
|
0
|
|
|
|
|
0
|
Carp::carp( |
|
1184
|
|
|
|
|
|
|
'DMS::Parser::parse_document_unordered() is deprecated; ' |
|
1185
|
|
|
|
|
|
|
. 'use decode_document_unordered() instead. ' |
|
1186
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse_*() to decode_*().'); |
|
1187
|
|
|
|
|
|
|
} |
|
1188
|
0
|
|
|
|
|
0
|
goto &decode_document_unordered; |
|
1189
|
|
|
|
|
|
|
} |
|
1190
|
|
|
|
|
|
|
} |
|
1191
|
|
|
|
|
|
|
{ my $warned; |
|
1192
|
|
|
|
|
|
|
sub parse_lite_document_unordered { |
|
1193
|
0
|
0
|
|
0
|
0
|
0
|
unless ($warned++) { |
|
1194
|
0
|
|
|
|
|
0
|
Carp::carp( |
|
1195
|
|
|
|
|
|
|
'DMS::Parser::parse_lite_document_unordered() is deprecated; ' |
|
1196
|
|
|
|
|
|
|
. 'use decode_lite_document_unordered() instead. ' |
|
1197
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse_*() to decode_*().'); |
|
1198
|
|
|
|
|
|
|
} |
|
1199
|
0
|
|
|
|
|
0
|
goto &decode_lite_document_unordered; |
|
1200
|
|
|
|
|
|
|
} |
|
1201
|
|
|
|
|
|
|
} |
|
1202
|
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
# Re-emit a parsed Document as DMS source. See SPEC §encode. |
|
1204
|
|
|
|
|
|
|
sub encode { |
|
1205
|
21
|
|
|
21
|
0
|
1580
|
my ($doc) = @_; |
|
1206
|
21
|
|
|
|
|
1111
|
require DMS::Parser::Emitter; |
|
1207
|
21
|
|
|
|
|
88
|
return DMS::Parser::Emitter::encode($doc); |
|
1208
|
|
|
|
|
|
|
} |
|
1209
|
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
# Lite-mode emit: canonical DMS source — no comments, decimal integers, |
|
1211
|
|
|
|
|
|
|
# basic-quoted strings — ignoring any comments / original_forms in $doc. |
|
1212
|
|
|
|
|
|
|
# `decode(encode_lite($doc))` is data-equivalent to $doc; round-trip of |
|
1213
|
|
|
|
|
|
|
# comment + literal-form is *not* preserved. SPEC §encode. |
|
1214
|
|
|
|
|
|
|
sub encode_lite { |
|
1215
|
2
|
|
|
2
|
0
|
981
|
my ($doc) = @_; |
|
1216
|
2
|
|
|
|
|
14
|
require DMS::Parser::Emitter; |
|
1217
|
2
|
|
|
|
|
9
|
return DMS::Parser::Emitter::encode_lite($doc); |
|
1218
|
|
|
|
|
|
|
} |
|
1219
|
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
# Deprecated aliases for encode/encode_lite. Removed in the next release. |
|
1221
|
|
|
|
|
|
|
{ my $warned; |
|
1222
|
|
|
|
|
|
|
sub to_dms { |
|
1223
|
1
|
50
|
|
1
|
0
|
9
|
unless ($warned++) { |
|
1224
|
1
|
|
|
|
|
126
|
Carp::carp( |
|
1225
|
|
|
|
|
|
|
'DMS::Parser::to_dms() is deprecated; use encode() instead. ' |
|
1226
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed to_dms() to encode().'); |
|
1227
|
|
|
|
|
|
|
} |
|
1228
|
1
|
|
|
|
|
9
|
goto &encode; |
|
1229
|
|
|
|
|
|
|
} |
|
1230
|
|
|
|
|
|
|
} |
|
1231
|
|
|
|
|
|
|
{ my $warned; |
|
1232
|
|
|
|
|
|
|
sub to_dms_lite { |
|
1233
|
1
|
50
|
|
1
|
0
|
7
|
unless ($warned++) { |
|
1234
|
1
|
|
|
|
|
120
|
Carp::carp( |
|
1235
|
|
|
|
|
|
|
'DMS::Parser::to_dms_lite() is deprecated; use encode_lite() instead. ' |
|
1236
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed to_dms_lite() to encode_lite().'); |
|
1237
|
|
|
|
|
|
|
} |
|
1238
|
1
|
|
|
|
|
10
|
goto &encode_lite; |
|
1239
|
|
|
|
|
|
|
} |
|
1240
|
|
|
|
|
|
|
} |
|
1241
|
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
sub decode_document { |
|
1243
|
42
|
|
|
42
|
1
|
537792
|
my ($src) = @_; |
|
1244
|
42
|
|
|
|
|
136
|
return _parse_document_with_mode($src, 0, 0); |
|
1245
|
|
|
|
|
|
|
} |
|
1246
|
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
# Deprecated alias for decode_document(). Removed in the next release. |
|
1248
|
|
|
|
|
|
|
{ my $warned; |
|
1249
|
|
|
|
|
|
|
sub parse_document { |
|
1250
|
1
|
50
|
|
1
|
0
|
8
|
unless ($warned++) { |
|
1251
|
1
|
|
|
|
|
139
|
Carp::carp( |
|
1252
|
|
|
|
|
|
|
'DMS::Parser::parse_document() is deprecated; use decode_document() instead. ' |
|
1253
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse_document() to decode_document().'); |
|
1254
|
|
|
|
|
|
|
} |
|
1255
|
1
|
|
|
|
|
10
|
goto &decode_document; |
|
1256
|
|
|
|
|
|
|
} |
|
1257
|
|
|
|
|
|
|
} |
|
1258
|
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
# SPEC §"UTF-8 only": reject any non-strict UTF-8 byte sequence — |
|
1260
|
|
|
|
|
|
|
# overlongs, lone continuation bytes, 5/6-byte forms, codepoints above |
|
1261
|
|
|
|
|
|
|
# U+10FFFF, and surrogates encoded as 3-byte UTF-8. Perl's built-in |
|
1262
|
|
|
|
|
|
|
# `utf8::decode` is too lax (it accepts the legacy "extended UTF-8" |
|
1263
|
|
|
|
|
|
|
# encoding for code points up to 2^31), so we walk the bytes ourselves |
|
1264
|
|
|
|
|
|
|
# before any decoding. |
|
1265
|
|
|
|
|
|
|
sub _validate_strict_utf8 { |
|
1266
|
53
|
|
|
53
|
|
114
|
my ($s) = @_; |
|
1267
|
|
|
|
|
|
|
# Fast path: pure-ASCII source has no UTF-8 work to do. A single |
|
1268
|
|
|
|
|
|
|
# regex hit returns immediately on flat-ASCII data — saves ~150ms. |
|
1269
|
53
|
50
|
|
|
|
241
|
return if $s !~ /[\x80-\xFF]/; |
|
1270
|
0
|
|
|
|
|
0
|
my $n = length($s); |
|
1271
|
0
|
|
|
|
|
0
|
my $i = 0; |
|
1272
|
0
|
|
|
|
|
0
|
while ($i < $n) { |
|
1273
|
0
|
|
|
|
|
0
|
my $b0 = ord(substr($s, $i, 1)); |
|
1274
|
0
|
0
|
|
|
|
0
|
if ($b0 < 0x80) { $i++; next; } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1275
|
0
|
|
|
|
|
0
|
my ($expect, $cp_lo, $cp_hi); |
|
1276
|
0
|
0
|
|
|
|
0
|
if (($b0 & 0xE0) == 0xC0) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1277
|
0
|
0
|
|
|
|
0
|
return _utf8_die($s, $i) if $b0 < 0xC2; # overlong |
|
1278
|
0
|
|
|
|
|
0
|
$expect = 2; $cp_lo = 0x80; $cp_hi = 0x7FF; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1279
|
|
|
|
|
|
|
} elsif (($b0 & 0xF0) == 0xE0) { |
|
1280
|
0
|
|
|
|
|
0
|
$expect = 3; $cp_lo = 0x800; $cp_hi = 0xFFFF; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1281
|
|
|
|
|
|
|
} elsif (($b0 & 0xF8) == 0xF0) { |
|
1282
|
0
|
0
|
|
|
|
0
|
return _utf8_die($s, $i) if $b0 > 0xF4; # > U+10FFFF |
|
1283
|
0
|
|
|
|
|
0
|
$expect = 4; $cp_lo = 0x10000; $cp_hi = 0x10FFFF; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1284
|
|
|
|
|
|
|
} else { |
|
1285
|
0
|
|
|
|
|
0
|
return _utf8_die($s, $i); # bare cont / 5-6 byte |
|
1286
|
|
|
|
|
|
|
} |
|
1287
|
0
|
0
|
|
|
|
0
|
return _utf8_die($s, $i) if $i + $expect > $n; |
|
1288
|
0
|
|
|
|
|
0
|
my $cp = ($b0 & ((1 << (7 - $expect)) - 1)); |
|
1289
|
0
|
|
|
|
|
0
|
for (my $k = 1; $k < $expect; $k++) { |
|
1290
|
0
|
|
|
|
|
0
|
my $bk = ord(substr($s, $i + $k, 1)); |
|
1291
|
0
|
0
|
|
|
|
0
|
return _utf8_die($s, $i) if ($bk & 0xC0) != 0x80; |
|
1292
|
0
|
|
|
|
|
0
|
$cp = ($cp << 6) | ($bk & 0x3F); |
|
1293
|
|
|
|
|
|
|
} |
|
1294
|
0
|
0
|
0
|
|
|
0
|
return _utf8_die($s, $i) if $cp < $cp_lo || $cp > $cp_hi; |
|
1295
|
0
|
0
|
0
|
|
|
0
|
return _utf8_die($s, $i) if $cp >= 0xD800 && $cp <= 0xDFFF; |
|
1296
|
0
|
|
|
|
|
0
|
$i += $expect; |
|
1297
|
|
|
|
|
|
|
} |
|
1298
|
|
|
|
|
|
|
} |
|
1299
|
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
sub _utf8_die { |
|
1301
|
0
|
|
|
0
|
|
0
|
my ($s, $i) = @_; |
|
1302
|
0
|
|
|
|
|
0
|
my $prefix = substr($s, 0, $i); |
|
1303
|
0
|
|
|
|
|
0
|
my $line = 1 + ($prefix =~ tr/\n//); |
|
1304
|
0
|
|
|
|
|
0
|
my $last_nl = rindex($prefix, "\n"); |
|
1305
|
0
|
|
|
|
|
0
|
my $col = $i - ($last_nl + 1) + 1; |
|
1306
|
0
|
|
|
|
|
0
|
die "$line:$col: input is not valid UTF-8\n"; |
|
1307
|
|
|
|
|
|
|
} |
|
1308
|
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
# Shared input-normalization for every public decode entry point. |
|
1310
|
|
|
|
|
|
|
# SPEC §"UTF-8 only, NFC-normalized": reject BOM at offset 0, reject |
|
1311
|
|
|
|
|
|
|
# malformed UTF-8 bytes (5/6-byte forms, > U+10FFFF, lone continuation |
|
1312
|
|
|
|
|
|
|
# bytes, overlongs, surrogates), reject U+0000 anywhere, then NFC the |
|
1313
|
|
|
|
|
|
|
# source. Returns the (possibly NFC'd) Perl-internal-encoded string. |
|
1314
|
|
|
|
|
|
|
sub _normalize_source { |
|
1315
|
53
|
|
|
53
|
|
112
|
my ($src) = @_; |
|
1316
|
|
|
|
|
|
|
# SPEC §"UTF-8 only, NFC-normalized": DMS source is plain UTF-8 with |
|
1317
|
|
|
|
|
|
|
# no byte-order mark. A leading U+FEFF is not silently consumed — |
|
1318
|
|
|
|
|
|
|
# reject it explicitly so encoding mistakes surface loudly. (BOMs |
|
1319
|
|
|
|
|
|
|
# *inside* string/heredoc bodies are fine; this only fires at offset 0.) |
|
1320
|
|
|
|
|
|
|
# We check for the raw UTF-8 BOM bytes (EF BB BF) before any decoding, |
|
1321
|
|
|
|
|
|
|
# so the rejection is independent of how the caller passed `$src`. |
|
1322
|
53
|
50
|
66
|
|
|
606
|
if (!utf8::is_utf8($src) && length($src) >= 3 |
|
|
|
|
66
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
&& substr($src, 0, 3) eq "\xEF\xBB\xBF") { |
|
1324
|
0
|
|
|
|
|
0
|
die "1:1: BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8\n"; |
|
1325
|
|
|
|
|
|
|
} |
|
1326
|
|
|
|
|
|
|
# SPEC §"UTF-8 only": reject malformed UTF-8 bytes (codepoints |
|
1327
|
|
|
|
|
|
|
# > U+10FFFF, lone continuation bytes, overlongs, surrogates encoded |
|
1328
|
|
|
|
|
|
|
# as bytes). Perl's `utf8::decode` is permissive about 5/6-byte |
|
1329
|
|
|
|
|
|
|
# forms and codepoints above U+10FFFF, so we additionally walk the |
|
1330
|
|
|
|
|
|
|
# raw bytes to confirm strict UTF-8 conformance. |
|
1331
|
53
|
50
|
|
|
|
189
|
if (!utf8::is_utf8($src)) { |
|
1332
|
53
|
|
|
|
|
147
|
_validate_strict_utf8($src); |
|
1333
|
53
|
|
|
|
|
117
|
my $copy = $src; |
|
1334
|
53
|
50
|
|
|
|
199
|
if (!utf8::decode($copy)) { |
|
1335
|
0
|
|
|
|
|
0
|
die "1:1: input is not valid UTF-8\n"; |
|
1336
|
|
|
|
|
|
|
} |
|
1337
|
53
|
|
|
|
|
110
|
$src = $copy; |
|
1338
|
|
|
|
|
|
|
} |
|
1339
|
53
|
50
|
66
|
|
|
251
|
if (length($src) >= 1 && substr($src, 0, 1) eq "\x{FEFF}") { |
|
1340
|
0
|
|
|
|
|
0
|
die "1:1: BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8\n"; |
|
1341
|
|
|
|
|
|
|
} |
|
1342
|
|
|
|
|
|
|
# U+0000 is not allowed anywhere in DMS source (see SPEC §Strings). |
|
1343
|
53
|
50
|
|
|
|
169
|
if ((my $nul = index($src, "\0")) >= 0) { |
|
1344
|
0
|
|
|
|
|
0
|
my $prefix = substr($src, 0, $nul); |
|
1345
|
0
|
|
|
|
|
0
|
my $line = 1 + ($prefix =~ tr/\n//); |
|
1346
|
0
|
|
|
|
|
0
|
my $last_nl = rindex($prefix, "\n"); |
|
1347
|
0
|
|
|
|
|
0
|
my $col = $nul - ($last_nl + 1) + 1; |
|
1348
|
0
|
|
|
|
|
0
|
die "$line:$col: U+0000 (NUL) is not allowed in DMS source\n"; |
|
1349
|
|
|
|
|
|
|
} |
|
1350
|
|
|
|
|
|
|
# SPEC §Unicode normalization: NFC the source before tokenization. |
|
1351
|
|
|
|
|
|
|
# Fast path: pure-ASCII source is already in NFC; skip the (linear, |
|
1352
|
|
|
|
|
|
|
# codepoint-walking) NFC pass entirely. The check itself is a single |
|
1353
|
|
|
|
|
|
|
# regex that finds the first non-ASCII byte if any. |
|
1354
|
53
|
50
|
|
|
|
169
|
$src = _NFC($src) if $src =~ /[^\x00-\x7F]/; |
|
1355
|
53
|
|
|
|
|
113
|
return $src; |
|
1356
|
|
|
|
|
|
|
} |
|
1357
|
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
# SPEC §Front-matter-only decode. Decodes the leading `+++ ... +++` |
|
1359
|
|
|
|
|
|
|
# block (if any) and returns it as a hashref, then stops without |
|
1360
|
|
|
|
|
|
|
# tokenizing the body. Returns undef when the document has no front |
|
1361
|
|
|
|
|
|
|
# matter at all (no opening `+++` after trivia). An empty front matter |
|
1362
|
|
|
|
|
|
|
# (`+++\n+++\n`) returns a defined-but-empty hashref, distinguishable |
|
1363
|
|
|
|
|
|
|
# from undef. |
|
1364
|
|
|
|
|
|
|
# |
|
1365
|
|
|
|
|
|
|
# Operates in lite mode (no comment AST, no original_forms recorded |
|
1366
|
|
|
|
|
|
|
# inside the FM). Diagnostics inside the `+++ ... +++` block are |
|
1367
|
|
|
|
|
|
|
# byte-identical to a full decode. |
|
1368
|
|
|
|
|
|
|
sub decode_front_matter { |
|
1369
|
11
|
|
|
11
|
1
|
214463
|
my ($src) = @_; |
|
1370
|
11
|
|
|
|
|
29
|
$src = _normalize_source($src); |
|
1371
|
11
|
|
|
|
|
100
|
my $self = bless { |
|
1372
|
|
|
|
|
|
|
src => $src, |
|
1373
|
|
|
|
|
|
|
len => length($src), |
|
1374
|
|
|
|
|
|
|
pos => 0, |
|
1375
|
|
|
|
|
|
|
line => 1, |
|
1376
|
|
|
|
|
|
|
line_start => 0, |
|
1377
|
|
|
|
|
|
|
pending_leading => [], |
|
1378
|
|
|
|
|
|
|
path => [], |
|
1379
|
|
|
|
|
|
|
comments => [], |
|
1380
|
|
|
|
|
|
|
original_forms => [], |
|
1381
|
|
|
|
|
|
|
record_forms => 1, |
|
1382
|
|
|
|
|
|
|
# SPEC §Front-matter-only decode: "Mode: front-matter-only |
|
1383
|
|
|
|
|
|
|
# decode runs in lite mode." |
|
1384
|
|
|
|
|
|
|
lite => 1, |
|
1385
|
|
|
|
|
|
|
# FM is always ordered (SPEC §"Unordered tables"); ignore_order |
|
1386
|
|
|
|
|
|
|
# only affects body tables, which we never reach. |
|
1387
|
|
|
|
|
|
|
ignore_order => 0, |
|
1388
|
|
|
|
|
|
|
}, __PACKAGE__; |
|
1389
|
11
|
|
|
|
|
41
|
return $self->parse_front_matter; |
|
1390
|
|
|
|
|
|
|
} |
|
1391
|
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
sub _parse_document_with_mode { |
|
1393
|
42
|
|
|
42
|
|
98
|
my ($src, $lite, $ignore_order) = @_; |
|
1394
|
42
|
50
|
|
|
|
132
|
$ignore_order = 0 unless $ignore_order; |
|
1395
|
42
|
|
|
|
|
108
|
$src = _normalize_source($src); |
|
1396
|
42
|
|
|
|
|
443
|
my $self = bless { |
|
1397
|
|
|
|
|
|
|
src => $src, |
|
1398
|
|
|
|
|
|
|
len => length($src), |
|
1399
|
|
|
|
|
|
|
pos => 0, |
|
1400
|
|
|
|
|
|
|
line => 1, |
|
1401
|
|
|
|
|
|
|
line_start => 0, |
|
1402
|
|
|
|
|
|
|
# Comment-AST state. `pending_leading` accumulates full-line |
|
1403
|
|
|
|
|
|
|
# comments seen by `_skip_trivia`; on the next sibling-entry |
|
1404
|
|
|
|
|
|
|
# they're flushed as Leading on its breadcrumb, on a blank line |
|
1405
|
|
|
|
|
|
|
# gap or end-of-block they're flushed as Floating on the current |
|
1406
|
|
|
|
|
|
|
# path. `path` is the breadcrumb stack for the value currently |
|
1407
|
|
|
|
|
|
|
# being parsed (strings for table keys; DMS::Parser::Index for list |
|
1408
|
|
|
|
|
|
|
# indices). `comments` is the accumulator returned to the caller. |
|
1409
|
|
|
|
|
|
|
pending_leading => [], |
|
1410
|
|
|
|
|
|
|
path => [], |
|
1411
|
|
|
|
|
|
|
comments => [], |
|
1412
|
|
|
|
|
|
|
# Original-literal records for to_dms round-trip. Sparse: only |
|
1413
|
|
|
|
|
|
|
# nodes whose surface form differs from the emitter's default |
|
1414
|
|
|
|
|
|
|
# (decimal-no-underscores for ints, basic-quoted for strings) |
|
1415
|
|
|
|
|
|
|
# get an entry. See SPEC §to_dms. |
|
1416
|
|
|
|
|
|
|
original_forms => [], |
|
1417
|
|
|
|
|
|
|
# When false, integer/string lexeme recording is suppressed. |
|
1418
|
|
|
|
|
|
|
# Used inside key parses and inside heredoc modifier args. |
|
1419
|
|
|
|
|
|
|
record_forms => 1, |
|
1420
|
|
|
|
|
|
|
# Lite mode: skip comment-AST + original_forms bookkeeping. |
|
1421
|
|
|
|
|
|
|
# Same grammar, same errors. |
|
1422
|
|
|
|
|
|
|
lite => $lite, |
|
1423
|
|
|
|
|
|
|
# Unordered mode: when true, body tables are produced as |
|
1424
|
|
|
|
|
|
|
# DMS::Parser::UnorderedTable (plain Perl hashes) instead of Tie::IxHash |
|
1425
|
|
|
|
|
|
|
# tied tables. Front-matter parsing ignores this flag — meta |
|
1426
|
|
|
|
|
|
|
# stays ordered. See SPEC §"Unordered tables". |
|
1427
|
|
|
|
|
|
|
ignore_order => $ignore_order, |
|
1428
|
|
|
|
|
|
|
}, __PACKAGE__; |
|
1429
|
42
|
|
|
|
|
161
|
my $meta = $self->parse_front_matter; |
|
1430
|
42
|
|
|
|
|
145
|
my $body = $self->parse_body; |
|
1431
|
|
|
|
|
|
|
return { |
|
1432
|
|
|
|
|
|
|
meta => $meta, |
|
1433
|
|
|
|
|
|
|
body => $body, |
|
1434
|
|
|
|
|
|
|
comments => $self->{comments}, |
|
1435
|
|
|
|
|
|
|
original_forms => $self->{original_forms}, |
|
1436
|
42
|
|
|
|
|
449
|
}; |
|
1437
|
|
|
|
|
|
|
} |
|
1438
|
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
# Append an OriginalLiteral record at the current path. Skipped when |
|
1440
|
|
|
|
|
|
|
# record_forms is false or when in lite mode. |
|
1441
|
|
|
|
|
|
|
sub _record_form { |
|
1442
|
41
|
|
|
41
|
|
98
|
my ($self, $lit) = @_; |
|
1443
|
41
|
50
|
33
|
|
|
176
|
return if $self->{lite} || !$self->{record_forms}; |
|
1444
|
41
|
|
|
|
|
88
|
push @{$self->{original_forms}}, [ [@{$self->{path}}], $lit ]; |
|
|
41
|
|
|
|
|
92
|
|
|
|
41
|
|
|
|
|
235
|
|
|
1445
|
|
|
|
|
|
|
} |
|
1446
|
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
sub _peek { |
|
1448
|
899
|
|
|
899
|
|
1251
|
my $self = shift; |
|
1449
|
899
|
50
|
|
|
|
1862
|
return undef if $self->{pos} >= $self->{len}; |
|
1450
|
899
|
|
|
|
|
2126
|
return substr($self->{src}, $self->{pos}, 1); |
|
1451
|
|
|
|
|
|
|
} |
|
1452
|
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
sub _peek_at { |
|
1454
|
4
|
|
|
4
|
|
11
|
my ($self, $off) = @_; |
|
1455
|
4
|
|
|
|
|
9
|
my $p = $self->{pos} + $off; |
|
1456
|
4
|
50
|
|
|
|
13
|
return undef if $p >= $self->{len}; |
|
1457
|
4
|
|
|
|
|
15
|
return substr($self->{src}, $p, 1); |
|
1458
|
|
|
|
|
|
|
} |
|
1459
|
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
sub _starts_with { |
|
1461
|
92
|
|
|
92
|
|
225
|
my ($self, $s) = @_; |
|
1462
|
92
|
|
|
|
|
363
|
return substr($self->{src}, $self->{pos}, length($s)) eq $s; |
|
1463
|
|
|
|
|
|
|
} |
|
1464
|
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
sub _bump { |
|
1466
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
1467
|
0
|
0
|
|
|
|
0
|
return undef if $self->{pos} >= $self->{len}; |
|
1468
|
0
|
|
|
|
|
0
|
my $c = substr($self->{src}, $self->{pos}, 1); |
|
1469
|
0
|
|
|
|
|
0
|
$self->{pos}++; |
|
1470
|
0
|
|
|
|
|
0
|
return $c; |
|
1471
|
|
|
|
|
|
|
} |
|
1472
|
|
|
|
|
|
|
|
|
1473
|
253
|
|
|
253
|
|
802
|
sub _eof { return $_[0]{pos} >= $_[0]{len} } |
|
1474
|
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
sub _advance_line { |
|
1476
|
140
|
|
|
140
|
|
200
|
my $self = shift; |
|
1477
|
140
|
|
|
|
|
257
|
$self->{line}++; |
|
1478
|
140
|
|
|
|
|
265
|
$self->{line_start} = $self->{pos}; |
|
1479
|
|
|
|
|
|
|
} |
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
sub _is_bare_key_char { |
|
1482
|
296
|
|
|
296
|
|
551
|
my ($c) = @_; |
|
1483
|
296
|
50
|
|
|
|
608
|
return 1 if $c eq '-'; |
|
1484
|
296
|
|
|
|
|
428
|
my $o = ord($c); |
|
1485
|
296
|
50
|
|
|
|
601
|
if ($o < 128) { |
|
1486
|
296
|
|
|
|
|
996
|
return $c =~ /[A-Za-z0-9_]/; |
|
1487
|
|
|
|
|
|
|
} |
|
1488
|
|
|
|
|
|
|
# Frozen Unicode 15.1 XID_Continue snapshot - see @XID_CONTINUE_RANGES. |
|
1489
|
0
|
|
|
|
|
0
|
return _is_xid_continue($o); |
|
1490
|
|
|
|
|
|
|
} |
|
1491
|
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
sub _is_label_start { |
|
1493
|
37
|
|
|
37
|
|
74
|
my ($c) = @_; |
|
1494
|
37
|
|
33
|
|
|
239
|
return defined($c) && ($c eq '_' || $c =~ /[A-Za-z]/); |
|
1495
|
|
|
|
|
|
|
} |
|
1496
|
|
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
sub _is_label_cont { |
|
1498
|
62
|
|
|
62
|
|
114
|
my ($c) = @_; |
|
1499
|
62
|
|
33
|
|
|
350
|
return defined($c) && ($c eq '_' || $c =~ /[A-Za-z0-9]/); |
|
1500
|
|
|
|
|
|
|
} |
|
1501
|
|
|
|
|
|
|
|
|
1502
|
|
|
|
|
|
|
sub _looks_like_date_prefix { |
|
1503
|
0
|
|
|
0
|
|
0
|
my ($s) = @_; |
|
1504
|
0
|
0
|
|
|
|
0
|
return 0 if length($s) < 10; |
|
1505
|
0
|
|
|
|
|
0
|
return $s =~ /^\d\d\d\d-\d\d-\d\d/; |
|
1506
|
|
|
|
|
|
|
} |
|
1507
|
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
sub _looks_like_time_prefix { |
|
1509
|
0
|
|
|
0
|
|
0
|
my ($s) = @_; |
|
1510
|
0
|
0
|
|
|
|
0
|
return 0 if length($s) < 8; |
|
1511
|
0
|
|
|
|
|
0
|
return $s =~ /^\d\d:\d\d:\d\d/; |
|
1512
|
|
|
|
|
|
|
} |
|
1513
|
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
my %TERMINATORS = map { $_ => 1 } (' ', "\t", "\n", "\r", '#', '/', ',', ']', '}'); |
|
1515
|
|
|
|
|
|
|
sub _is_value_terminator { |
|
1516
|
0
|
|
|
0
|
|
0
|
my ($c) = @_; |
|
1517
|
0
|
0
|
|
|
|
0
|
return 1 if !defined($c); |
|
1518
|
0
|
|
|
|
|
0
|
return exists $TERMINATORS{$c}; |
|
1519
|
|
|
|
|
|
|
} |
|
1520
|
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
# SPEC §Lexical "Reserved decorator sigils": the seventeen characters |
|
1522
|
|
|
|
|
|
|
# below are reserved as decorator sigils at line-start position. A body |
|
1523
|
|
|
|
|
|
|
# line whose first non-whitespace character is one of these is a tier-0 |
|
1524
|
|
|
|
|
|
|
# parse error. Underscore is *not* in this set — it has its own role |
|
1525
|
|
|
|
|
|
|
# for core / built-in decorators (e.g. heredoc modifiers `_trim`). |
|
1526
|
|
|
|
|
|
|
# The reservation only applies to structural body positions; sigils |
|
1527
|
|
|
|
|
|
|
# appearing inside strings, comments, or heredoc bodies are ordinary |
|
1528
|
|
|
|
|
|
|
# content and remain valid. |
|
1529
|
|
|
|
|
|
|
my %RESERVED_DECORATOR_SIGIL = map { $_ => 1 } |
|
1530
|
|
|
|
|
|
|
('!', '@', '$', '%', '^', '&', '*', '|', '~', '`', |
|
1531
|
|
|
|
|
|
|
'.', ',', '>', '<', '?', ';', '='); |
|
1532
|
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
sub _check_reserved_sigil { |
|
1534
|
109
|
|
|
109
|
|
174
|
my $self = shift; |
|
1535
|
109
|
|
|
|
|
184
|
my $p = $self->{pos}; |
|
1536
|
109
|
50
|
|
|
|
324
|
return if $p >= $self->{len}; |
|
1537
|
109
|
|
|
|
|
250
|
my $c = substr($self->{src}, $p, 1); |
|
1538
|
109
|
50
|
|
|
|
336
|
return unless exists $RESERVED_DECORATOR_SIGIL{$c}; |
|
1539
|
0
|
|
|
|
|
0
|
$self->_die( |
|
1540
|
|
|
|
|
|
|
"'$c' is a reserved decorator sigil at line-start (tier 0)" |
|
1541
|
|
|
|
|
|
|
); |
|
1542
|
|
|
|
|
|
|
} |
|
1543
|
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
sub _skip_inline_ws { |
|
1545
|
88
|
|
|
88
|
|
141
|
my $self = shift; |
|
1546
|
88
|
|
|
|
|
272
|
pos($self->{src}) = $self->{pos}; |
|
1547
|
88
|
100
|
|
|
|
374
|
if ($self->{src} =~ /\G[ \t]+/gc) { |
|
1548
|
6
|
|
|
|
|
14
|
$self->{pos} = pos($self->{src}); |
|
1549
|
|
|
|
|
|
|
} |
|
1550
|
|
|
|
|
|
|
} |
|
1551
|
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
sub _consume_eol { |
|
1553
|
123
|
|
|
123
|
|
200
|
my $self = shift; |
|
1554
|
123
|
|
|
|
|
268
|
my $c = $self->_peek; |
|
1555
|
123
|
50
|
33
|
|
|
417
|
if (defined($c) && $c eq "\n") { |
|
1556
|
123
|
|
|
|
|
211
|
$self->{pos}++; |
|
1557
|
123
|
|
|
|
|
310
|
$self->_advance_line; |
|
1558
|
123
|
|
|
|
|
274
|
return 1; |
|
1559
|
|
|
|
|
|
|
} |
|
1560
|
0
|
0
|
|
|
|
0
|
if ($self->_starts_with("\r\n")) { |
|
1561
|
0
|
|
|
|
|
0
|
$self->{pos} += 2; |
|
1562
|
0
|
|
|
|
|
0
|
$self->_advance_line; |
|
1563
|
0
|
|
|
|
|
0
|
return 1; |
|
1564
|
|
|
|
|
|
|
} |
|
1565
|
0
|
|
|
|
|
0
|
return 0; |
|
1566
|
|
|
|
|
|
|
} |
|
1567
|
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
sub _skip_trivia { |
|
1569
|
330
|
|
|
330
|
|
542
|
my $self = shift; |
|
1570
|
|
|
|
|
|
|
# Hot path: the next byte is something other than ws/EOL/comment. |
|
1571
|
|
|
|
|
|
|
# Skip the whole loop. parse_table_block hits this between every |
|
1572
|
|
|
|
|
|
|
# 50k keys. |
|
1573
|
330
|
|
|
|
|
586
|
my $p = $self->{pos}; |
|
1574
|
330
|
100
|
|
|
|
778
|
if ($p < $self->{len}) { |
|
1575
|
223
|
|
|
|
|
521
|
my $c0 = substr($self->{src}, $p, 1); |
|
1576
|
223
|
100
|
66
|
|
|
2091
|
return if $c0 ne ' ' && $c0 ne "\t" && $c0 ne "\n" && $c0 ne "\r" |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
&& $c0 ne '#' && $c0 ne '/'; |
|
1578
|
|
|
|
|
|
|
} else { |
|
1579
|
107
|
|
|
|
|
179
|
return; |
|
1580
|
|
|
|
|
|
|
} |
|
1581
|
53
|
|
|
|
|
122
|
while (1) { |
|
1582
|
93
|
|
|
|
|
174
|
my $start = $self->{pos}; |
|
1583
|
|
|
|
|
|
|
# Inline _skip_inline_ws + _peek in one regex hit. |
|
1584
|
93
|
|
|
|
|
265
|
pos($self->{src}) = $start; |
|
1585
|
93
|
100
|
|
|
|
423
|
if ($self->{src} =~ /\G[ \t]+/gc) { $self->{pos} = pos($self->{src}); } |
|
|
15
|
|
|
|
|
31
|
|
|
1586
|
93
|
|
|
|
|
163
|
$p = $self->{pos}; |
|
1587
|
93
|
50
|
|
|
|
268
|
my $c = $p >= $self->{len} ? undef : substr($self->{src}, $p, 1); |
|
1588
|
93
|
100
|
66
|
|
|
991
|
if (defined($c) && ($c eq "\n" || $c eq "\r")) { |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
1589
|
4
|
50
|
33
|
|
|
19
|
if ($c eq "\r" && !$self->_starts_with("\r\n")) { |
|
1590
|
0
|
|
|
|
|
0
|
$self->_die("bare CR is not a valid line terminator"); |
|
1591
|
|
|
|
|
|
|
} |
|
1592
|
|
|
|
|
|
|
# Blank line: any pending leading comments are now separated |
|
1593
|
|
|
|
|
|
|
# from a future sibling, so flush them as Floating on the |
|
1594
|
|
|
|
|
|
|
# current path. |
|
1595
|
4
|
|
|
|
|
17
|
$self->_flush_pending_as_floating; |
|
1596
|
4
|
|
|
|
|
14
|
$self->_consume_eol; |
|
1597
|
|
|
|
|
|
|
} elsif (defined($c) && $c eq '#') { |
|
1598
|
40
|
100
|
|
|
|
128
|
if ($self->_starts_with("###")) { |
|
1599
|
2
|
|
|
|
|
7
|
my $raw = $self->_read_hash_block_comment; |
|
1600
|
2
|
|
|
|
|
14
|
push @{$self->{pending_leading}}, |
|
1601
|
2
|
50
|
|
|
|
6
|
{ content => $raw, kind => 'block' } unless $self->{lite}; |
|
1602
|
|
|
|
|
|
|
} else { |
|
1603
|
38
|
|
|
|
|
97
|
my $raw = $self->_read_line_comment_to_eol; |
|
1604
|
38
|
|
|
|
|
122
|
$self->_consume_eol; |
|
1605
|
37
|
|
|
|
|
232
|
push @{$self->{pending_leading}}, |
|
1606
|
38
|
100
|
|
|
|
93
|
{ content => $raw, kind => 'line' } unless $self->{lite}; |
|
1607
|
|
|
|
|
|
|
} |
|
1608
|
|
|
|
|
|
|
} elsif (defined($c) && $c eq '/' && $self->_starts_with("//")) { |
|
1609
|
0
|
|
|
|
|
0
|
my $raw = $self->_read_line_comment_to_eol; |
|
1610
|
0
|
|
|
|
|
0
|
$self->_consume_eol; |
|
1611
|
0
|
|
|
|
|
0
|
push @{$self->{pending_leading}}, |
|
1612
|
0
|
0
|
|
|
|
0
|
{ content => $raw, kind => 'line' } unless $self->{lite}; |
|
1613
|
|
|
|
|
|
|
} elsif (defined($c) && $c eq '/' && $self->_starts_with("/*")) { |
|
1614
|
0
|
|
|
|
|
0
|
my $raw = $self->_read_c_block_comment; |
|
1615
|
0
|
|
|
|
|
0
|
push @{$self->{pending_leading}}, |
|
1616
|
0
|
0
|
|
|
|
0
|
{ content => $raw, kind => 'block' } unless $self->{lite}; |
|
1617
|
|
|
|
|
|
|
} else { |
|
1618
|
49
|
|
|
|
|
94
|
$self->{pos} = $start; |
|
1619
|
49
|
|
|
|
|
105
|
return; |
|
1620
|
|
|
|
|
|
|
} |
|
1621
|
44
|
100
|
|
|
|
123
|
return if $self->_eof; |
|
1622
|
|
|
|
|
|
|
} |
|
1623
|
|
|
|
|
|
|
} |
|
1624
|
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
# Drain pending_leading and attach each as a Floating comment on the |
|
1626
|
|
|
|
|
|
|
# current path. Called on blank-line gaps and at end-of-block. |
|
1627
|
|
|
|
|
|
|
sub _flush_pending_as_floating { |
|
1628
|
99
|
|
|
99
|
|
156
|
my $self = shift; |
|
1629
|
99
|
100
|
|
|
|
168
|
return if !@{$self->{pending_leading}}; |
|
|
99
|
|
|
|
|
258
|
|
|
1630
|
5
|
|
|
|
|
11
|
my @drained = @{$self->{pending_leading}}; |
|
|
5
|
|
|
|
|
14
|
|
|
1631
|
5
|
|
|
|
|
14
|
$self->{pending_leading} = []; |
|
1632
|
5
|
|
|
|
|
13
|
for my $c (@drained) { |
|
1633
|
5
|
|
|
|
|
14
|
push @{$self->{comments}}, { |
|
1634
|
|
|
|
|
|
|
comment => $c, |
|
1635
|
|
|
|
|
|
|
position => 'floating', |
|
1636
|
5
|
|
|
|
|
8
|
path => [@{$self->{path}}], |
|
|
5
|
|
|
|
|
27
|
|
|
1637
|
|
|
|
|
|
|
}; |
|
1638
|
|
|
|
|
|
|
} |
|
1639
|
|
|
|
|
|
|
} |
|
1640
|
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
# Drain pending_leading and attach each as a Leading comment on the |
|
1642
|
|
|
|
|
|
|
# current path. Called by sibling-entry sites (parse_kvpair, |
|
1643
|
|
|
|
|
|
|
# parse_list_block) right after pushing the new sibling's breadcrumb. |
|
1644
|
|
|
|
|
|
|
sub _flush_pending_as_leading_on_current { |
|
1645
|
109
|
|
|
109
|
|
220
|
my $self = shift; |
|
1646
|
109
|
100
|
|
|
|
171
|
return if !@{$self->{pending_leading}}; |
|
|
109
|
|
|
|
|
345
|
|
|
1647
|
20
|
|
|
|
|
36
|
my @drained = @{$self->{pending_leading}}; |
|
|
20
|
|
|
|
|
55
|
|
|
1648
|
20
|
|
|
|
|
51
|
$self->{pending_leading} = []; |
|
1649
|
20
|
|
|
|
|
63
|
for my $c (@drained) { |
|
1650
|
20
|
|
|
|
|
51
|
push @{$self->{comments}}, { |
|
1651
|
|
|
|
|
|
|
comment => $c, |
|
1652
|
|
|
|
|
|
|
position => 'leading', |
|
1653
|
20
|
|
|
|
|
30
|
path => [@{$self->{path}}], |
|
|
20
|
|
|
|
|
141
|
|
|
1654
|
|
|
|
|
|
|
}; |
|
1655
|
|
|
|
|
|
|
} |
|
1656
|
|
|
|
|
|
|
} |
|
1657
|
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
# Read a `# ...` or `// ...` line comment (without consuming the EOL) |
|
1659
|
|
|
|
|
|
|
# and return the raw delimiter+body text. |
|
1660
|
|
|
|
|
|
|
sub _read_line_comment_to_eol { |
|
1661
|
55
|
|
|
55
|
|
97
|
my $self = shift; |
|
1662
|
|
|
|
|
|
|
# Bulk scan to next \n or \r in one regex op rather than char-by-char |
|
1663
|
|
|
|
|
|
|
# _peek loop. The bench fixture is 56% comments so this fires often. |
|
1664
|
55
|
|
|
|
|
158
|
pos($self->{src}) = $self->{pos}; |
|
1665
|
55
|
|
|
|
|
187
|
$self->{src} =~ /\G[^\n\r]*/gc; |
|
1666
|
55
|
|
|
|
|
112
|
my $end = pos($self->{src}); |
|
1667
|
55
|
|
|
|
|
90
|
my $start = $self->{pos}; |
|
1668
|
55
|
|
|
|
|
109
|
$self->{pos} = $end; |
|
1669
|
|
|
|
|
|
|
# Lite mode discards comments — skip building the substr entirely. |
|
1670
|
55
|
100
|
|
|
|
185
|
return '' if $self->{lite}; |
|
1671
|
54
|
|
|
|
|
169
|
return substr($self->{src}, $start, $end - $start); |
|
1672
|
|
|
|
|
|
|
} |
|
1673
|
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
# Read `/* ... */` (nested), returning the raw text including delimiters. |
|
1675
|
|
|
|
|
|
|
sub _read_c_block_comment { |
|
1676
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
1677
|
0
|
|
|
|
|
0
|
my ($sl, $sls, $sp) = ($self->{line}, $self->{line_start}, $self->{pos}); |
|
1678
|
0
|
|
|
|
|
0
|
my $start = $self->{pos}; |
|
1679
|
0
|
|
|
|
|
0
|
$self->{pos} += 2; |
|
1680
|
0
|
|
|
|
|
0
|
my $depth = 1; |
|
1681
|
0
|
|
|
|
|
0
|
while ($depth > 0) { |
|
1682
|
0
|
0
|
|
|
|
0
|
if ($self->_eof) { |
|
1683
|
0
|
|
|
|
|
0
|
die $self->_err_at($sl, $sls, $sp, "unterminated /* block comment"); |
|
1684
|
|
|
|
|
|
|
} |
|
1685
|
0
|
|
|
|
|
0
|
my $c = $self->_peek; |
|
1686
|
0
|
0
|
0
|
|
|
0
|
if ($c eq '/' && $self->_starts_with("/*")) { $self->{pos} += 2; $depth++; } |
|
|
0
|
0
|
0
|
|
|
0
|
|
|
|
0
|
0
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
|
|
|
|
1687
|
0
|
|
|
|
|
0
|
elsif ($c eq '*' && $self->_starts_with("*/")) { $self->{pos} += 2; $depth--; } |
|
|
0
|
|
|
|
|
0
|
|
|
1688
|
0
|
|
|
|
|
0
|
elsif ($c eq "\n") { $self->{pos}++; $self->_advance_line; } |
|
|
0
|
|
|
|
|
0
|
|
|
1689
|
0
|
|
|
|
|
0
|
elsif ($c eq "\r" && $self->_starts_with("\r\n")) { $self->{pos} += 2; $self->_advance_line; } |
|
|
0
|
|
|
|
|
0
|
|
|
1690
|
0
|
|
|
|
|
0
|
else { $self->{pos}++; } |
|
1691
|
|
|
|
|
|
|
} |
|
1692
|
0
|
|
|
|
|
0
|
return substr($self->{src}, $start, $self->{pos} - $start); |
|
1693
|
|
|
|
|
|
|
} |
|
1694
|
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
# Read `### ... ###` or `###LABEL ... LABEL`. The terminator's EOL is |
|
1696
|
|
|
|
|
|
|
# consumed but is NOT part of the returned text. |
|
1697
|
|
|
|
|
|
|
sub _read_hash_block_comment { |
|
1698
|
2
|
|
|
2
|
|
4
|
my $self = shift; |
|
1699
|
2
|
|
|
|
|
45
|
my ($sl, $sls, $sp) = ($self->{line}, $self->{line_start}, $self->{pos}); |
|
1700
|
2
|
|
|
|
|
6
|
my $start = $self->{pos}; |
|
1701
|
2
|
|
|
|
|
5
|
$self->{pos} += 3; |
|
1702
|
2
|
|
|
|
|
5
|
my $ls = $self->{pos}; |
|
1703
|
2
|
|
|
|
|
4
|
while (1) { |
|
1704
|
2
|
|
|
|
|
6
|
my $c = $self->_peek; |
|
1705
|
2
|
50
|
33
|
|
|
18
|
last if !defined($c) || !($c eq '_' || $c =~ /[A-Za-z0-9]/); |
|
|
|
|
33
|
|
|
|
|
|
1706
|
0
|
|
|
|
|
0
|
$self->{pos}++; |
|
1707
|
|
|
|
|
|
|
} |
|
1708
|
2
|
|
|
|
|
8
|
my $label = substr($self->{src}, $ls, $self->{pos} - $ls); |
|
1709
|
2
|
50
|
|
|
|
7
|
if (length($label) > 0) { |
|
1710
|
0
|
|
|
|
|
0
|
my $c0 = substr($label, 0, 1); |
|
1711
|
0
|
0
|
0
|
|
|
0
|
if ($c0 ne '_' && $c0 !~ /[A-Za-z]/) { |
|
1712
|
0
|
|
|
|
|
0
|
die $self->_err_at($sl, $sls, $sp, "block comment label must start with a letter or underscore"); |
|
1713
|
|
|
|
|
|
|
} |
|
1714
|
|
|
|
|
|
|
} |
|
1715
|
2
|
50
|
|
|
|
6
|
my $terminator = length($label) ? $label : "###"; |
|
1716
|
2
|
|
|
|
|
7
|
$self->_skip_inline_ws; |
|
1717
|
2
|
50
|
33
|
|
|
6
|
if (!($self->_consume_eol || $self->_eof)) { |
|
1718
|
0
|
|
|
|
|
0
|
$self->_die("block comment opener must be on its own line"); |
|
1719
|
|
|
|
|
|
|
} |
|
1720
|
2
|
|
|
|
|
5
|
while (1) { |
|
1721
|
4
|
50
|
|
|
|
10
|
if ($self->_eof) { |
|
1722
|
0
|
|
|
|
|
0
|
die $self->_err_at($sl, $sls, $sp, "unterminated ### block comment"); |
|
1723
|
|
|
|
|
|
|
} |
|
1724
|
4
|
|
|
|
|
9
|
my $lb = $self->{pos}; |
|
1725
|
4
|
|
|
|
|
5
|
while (1) { |
|
1726
|
18
|
|
|
|
|
34
|
my $c = $self->_peek; |
|
1727
|
18
|
100
|
66
|
|
|
80
|
last if !defined($c) || $c eq "\n" || $c eq "\r"; |
|
|
|
|
66
|
|
|
|
|
|
1728
|
14
|
|
|
|
|
21
|
$self->{pos}++; |
|
1729
|
|
|
|
|
|
|
} |
|
1730
|
4
|
|
|
|
|
14
|
my $line_text = substr($self->{src}, $lb, $self->{pos} - $lb); |
|
1731
|
4
|
|
|
|
|
8
|
my $line_end = $self->{pos}; |
|
1732
|
4
|
|
|
|
|
13
|
$self->_consume_eol; |
|
1733
|
4
|
|
|
|
|
24
|
my $trimmed = $line_text; |
|
1734
|
4
|
|
|
|
|
20
|
$trimmed =~ s/^\s+|\s+$//g; |
|
1735
|
4
|
100
|
|
|
|
13
|
if ($trimmed eq $terminator) { |
|
1736
|
2
|
|
|
|
|
11
|
return substr($self->{src}, $start, $line_end - $start); |
|
1737
|
|
|
|
|
|
|
} |
|
1738
|
|
|
|
|
|
|
} |
|
1739
|
|
|
|
|
|
|
} |
|
1740
|
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
sub parse_front_matter { |
|
1742
|
53
|
|
|
53
|
0
|
87
|
my $self = shift; |
|
1743
|
53
|
|
|
|
|
114
|
my $save_pos = $self->{pos}; |
|
1744
|
53
|
|
|
|
|
92
|
my $save_line = $self->{line}; |
|
1745
|
53
|
|
|
|
|
86
|
my $save_ls = $self->{line_start}; |
|
1746
|
53
|
|
|
|
|
76
|
my $save_pending = scalar @{$self->{pending_leading}}; |
|
|
53
|
|
|
|
|
107
|
|
|
1747
|
53
|
|
|
|
|
113
|
my $save_comments = scalar @{$self->{comments}}; |
|
|
53
|
|
|
|
|
124
|
|
|
1748
|
53
|
|
|
|
|
186
|
$self->_skip_trivia; |
|
1749
|
53
|
|
|
|
|
146
|
my $rest = substr($self->{src}, $self->{pos}); |
|
1750
|
53
|
100
|
|
|
|
178
|
if (substr($rest, 0, 3) ne '+++') { |
|
1751
|
42
|
|
|
|
|
81
|
$self->{pos} = $save_pos; $self->{line} = $save_line; $self->{line_start} = $save_ls; |
|
|
42
|
|
|
|
|
73
|
|
|
|
42
|
|
|
|
|
102
|
|
|
1752
|
|
|
|
|
|
|
# Speculative skip_trivia may have captured comments — undo so |
|
1753
|
|
|
|
|
|
|
# the body parser re-captures them with the right path. |
|
1754
|
42
|
|
|
|
|
68
|
splice @{$self->{pending_leading}}, $save_pending; |
|
|
42
|
|
|
|
|
109
|
|
|
1755
|
42
|
|
|
|
|
107
|
splice @{$self->{comments}}, $save_comments; |
|
|
42
|
|
|
|
|
84
|
|
|
1756
|
42
|
|
|
|
|
126
|
return undef; |
|
1757
|
|
|
|
|
|
|
} |
|
1758
|
|
|
|
|
|
|
# Any trailing content on the opener line is a parse error |
|
1759
|
|
|
|
|
|
|
# (SPEC §Front matter: "each `+++` must appear on its own line, |
|
1760
|
|
|
|
|
|
|
# with no trailing content"). Advance past `+++` and let the |
|
1761
|
|
|
|
|
|
|
# strict EOL check below diagnose. |
|
1762
|
11
|
|
|
|
|
28
|
my ($ol, $ols, $op) = ($self->{line}, $self->{line_start}, $self->{pos}); |
|
1763
|
11
|
|
|
|
|
18
|
$self->{pos} += 3; |
|
1764
|
11
|
|
|
|
|
28
|
$self->_skip_inline_ws; |
|
1765
|
11
|
50
|
33
|
|
|
21
|
if (!($self->_consume_eol || $self->_eof)) { |
|
1766
|
0
|
|
|
|
|
0
|
$self->_die("front matter opener must be on its own line"); |
|
1767
|
|
|
|
|
|
|
} |
|
1768
|
11
|
|
|
|
|
14
|
my @inner; |
|
1769
|
11
|
|
|
|
|
15
|
while (1) { |
|
1770
|
24
|
100
|
|
|
|
41
|
if ($self->_eof) { |
|
1771
|
1
|
|
|
|
|
3
|
die $self->_err_at($ol, $ols, $op, "unterminated front matter: missing closing '+++'"); |
|
1772
|
|
|
|
|
|
|
} |
|
1773
|
23
|
|
|
|
|
31
|
my $lb = $self->{pos}; |
|
1774
|
23
|
|
|
|
|
27
|
while (1) { |
|
1775
|
240
|
|
|
|
|
313
|
my $c = $self->_peek; |
|
1776
|
240
|
100
|
66
|
|
|
693
|
last if !defined($c) || $c eq "\n" || $c eq "\r"; |
|
|
|
|
66
|
|
|
|
|
|
1777
|
217
|
|
|
|
|
277
|
$self->{pos}++; |
|
1778
|
|
|
|
|
|
|
} |
|
1779
|
23
|
|
|
|
|
48
|
my $lt = substr($self->{src}, $lb, $self->{pos} - $lb); |
|
1780
|
23
|
|
|
|
|
32
|
my $trimmed = $lt; $trimmed =~ s/^\s+|\s+$//g; |
|
|
23
|
|
|
|
|
90
|
|
|
1781
|
23
|
100
|
|
|
|
47
|
if ($trimmed eq '+++') { $self->_consume_eol; last; } |
|
|
10
|
|
|
|
|
21
|
|
|
|
10
|
|
|
|
|
14
|
|
|
1782
|
13
|
|
|
|
|
24
|
push @inner, $lt; |
|
1783
|
13
|
50
|
|
|
|
25
|
push @inner, "\n" if $self->_consume_eol; |
|
1784
|
|
|
|
|
|
|
} |
|
1785
|
10
|
|
|
|
|
27
|
my $inner_src = join('', @inner); |
|
1786
|
|
|
|
|
|
|
my $sub = bless { |
|
1787
|
|
|
|
|
|
|
src => $inner_src, |
|
1788
|
|
|
|
|
|
|
len => length($inner_src), |
|
1789
|
|
|
|
|
|
|
pos => 0, |
|
1790
|
|
|
|
|
|
|
line => 1, |
|
1791
|
|
|
|
|
|
|
line_start => 0, |
|
1792
|
|
|
|
|
|
|
pending_leading => [], |
|
1793
|
|
|
|
|
|
|
path => [], |
|
1794
|
|
|
|
|
|
|
comments => [], |
|
1795
|
|
|
|
|
|
|
original_forms => [], |
|
1796
|
|
|
|
|
|
|
record_forms => 1, |
|
1797
|
|
|
|
|
|
|
lite => $self->{lite}, |
|
1798
|
|
|
|
|
|
|
# Front-matter is always ordered, regardless of body's ignore_order. |
|
1799
|
|
|
|
|
|
|
# See SPEC §"Unordered tables". |
|
1800
|
10
|
|
|
|
|
86
|
ignore_order => 0, |
|
1801
|
|
|
|
|
|
|
}, __PACKAGE__; |
|
1802
|
10
|
|
|
|
|
45
|
my $table = $sub->parse_body_as_table; |
|
1803
|
10
|
|
|
|
|
37
|
my $lite = $self->{lite}; |
|
1804
|
10
|
|
|
|
|
15
|
my ($meta, $meta_order); |
|
1805
|
10
|
100
|
|
|
|
25
|
if ($lite) { |
|
1806
|
7
|
|
|
|
|
8
|
$meta_order = []; |
|
1807
|
7
|
|
|
|
|
10
|
$meta = { $ORDER_KEY => $meta_order }; |
|
1808
|
|
|
|
|
|
|
} else { |
|
1809
|
3
|
|
|
|
|
9
|
$meta = new_table(); |
|
1810
|
|
|
|
|
|
|
} |
|
1811
|
10
|
|
|
3
|
|
76
|
my $fm_err = sub { die $self->_err_at($ol, $ols, $op, $_[0]); }; |
|
|
3
|
|
|
|
|
7
|
|
|
1812
|
|
|
|
|
|
|
# Iterate $table in insertion order: lite-mode tables carry their |
|
1813
|
|
|
|
|
|
|
# order list at $ORDER_KEY, tied tables yield insertion order via |
|
1814
|
|
|
|
|
|
|
# `keys`. |
|
1815
|
10
|
|
|
|
|
15
|
my @table_keys; |
|
1816
|
10
|
100
|
66
|
|
|
34
|
if ($lite && exists $table->{$ORDER_KEY}) { |
|
1817
|
7
|
|
|
|
|
7
|
@table_keys = @{ $table->{$ORDER_KEY} }; |
|
|
7
|
|
|
|
|
13
|
|
|
1818
|
|
|
|
|
|
|
} else { |
|
1819
|
3
|
|
|
|
|
17
|
@table_keys = grep { $_ ne $ORDER_KEY } keys %$table; |
|
|
3
|
|
|
|
|
69
|
|
|
1820
|
|
|
|
|
|
|
} |
|
1821
|
10
|
|
|
|
|
22
|
for my $k (@table_keys) { |
|
1822
|
11
|
|
|
|
|
25
|
my $v = $table->{$k}; |
|
1823
|
11
|
100
|
|
|
|
50
|
if ($k =~ /^_/) { |
|
1824
|
4
|
100
|
|
|
|
6
|
if ($k eq '_dms_tier') { |
|
1825
|
3
|
100
|
|
|
|
7
|
unless (ref($v) eq 'DMS::Parser::Integer') { |
|
1826
|
1
|
|
|
|
|
3
|
$fm_err->("_dms_tier must be a non-negative integer"); |
|
1827
|
|
|
|
|
|
|
} |
|
1828
|
2
|
|
|
|
|
7
|
my $n = int($$v); |
|
1829
|
2
|
50
|
|
|
|
4
|
if ($n < 0) { |
|
1830
|
0
|
|
|
|
|
0
|
$fm_err->("_dms_tier must be non-negative"); |
|
1831
|
|
|
|
|
|
|
} |
|
1832
|
2
|
100
|
|
|
|
4
|
if ($n >= 1) { |
|
1833
|
1
|
|
|
|
|
3
|
$fm_err->("_dms_tier: $n is not supported (no tier >= 1 is defined in this version of DMS)"); |
|
1834
|
|
|
|
|
|
|
} |
|
1835
|
|
|
|
|
|
|
} else { |
|
1836
|
1
|
|
|
|
|
3
|
$fm_err->("unknown reserved key: $k"); |
|
1837
|
|
|
|
|
|
|
} |
|
1838
|
|
|
|
|
|
|
} else { |
|
1839
|
7
|
100
|
|
|
|
14
|
push @$meta_order, $k if $lite; |
|
1840
|
7
|
|
|
|
|
22
|
$meta->{$k} = $v; |
|
1841
|
|
|
|
|
|
|
} |
|
1842
|
|
|
|
|
|
|
} |
|
1843
|
|
|
|
|
|
|
# Hoist sub-parser comments into ours, prefixing each path with the |
|
1844
|
|
|
|
|
|
|
# sentinel string "__fm__" so callers can distinguish front-matter |
|
1845
|
|
|
|
|
|
|
# comments from body comments. Comments attached to reserved |
|
1846
|
|
|
|
|
|
|
# (consumed) `_dms_*` keys are dropped. |
|
1847
|
7
|
|
|
|
|
58
|
for my $ac (@{$sub->{comments}}) { |
|
|
7
|
|
|
|
|
13
|
|
|
1848
|
1
|
|
|
|
|
3
|
my $first = $ac->{path}[0]; |
|
1849
|
1
|
|
33
|
|
|
11
|
my $attached_to_reserved = (defined($first) && !ref($first) && substr($first, 0, 1) eq '_'); |
|
1850
|
1
|
50
|
|
|
|
4
|
if ($attached_to_reserved) { |
|
1851
|
|
|
|
|
|
|
# Reserved key was consumed — re-attach as floating on FM. |
|
1852
|
0
|
|
|
|
|
0
|
push @{$self->{comments}}, { |
|
1853
|
|
|
|
|
|
|
comment => $ac->{comment}, |
|
1854
|
0
|
|
|
|
|
0
|
position => 'floating', |
|
1855
|
|
|
|
|
|
|
path => ['__fm__'], |
|
1856
|
|
|
|
|
|
|
}; |
|
1857
|
0
|
|
|
|
|
0
|
next; |
|
1858
|
|
|
|
|
|
|
} |
|
1859
|
1
|
|
|
|
|
2
|
my @new_path = ('__fm__', @{$ac->{path}}); |
|
|
1
|
|
|
|
|
4
|
|
|
1860
|
1
|
|
|
|
|
21
|
push @{$self->{comments}}, { |
|
1861
|
|
|
|
|
|
|
comment => $ac->{comment}, |
|
1862
|
|
|
|
|
|
|
position => $ac->{position}, |
|
1863
|
1
|
|
|
|
|
2
|
path => \@new_path, |
|
1864
|
|
|
|
|
|
|
}; |
|
1865
|
|
|
|
|
|
|
} |
|
1866
|
|
|
|
|
|
|
# Same hoist for original_forms: `__fm__` prefix, drop entries for |
|
1867
|
|
|
|
|
|
|
# consumed `_dms_*` keys. |
|
1868
|
7
|
|
|
|
|
12
|
for my $pair (@{$sub->{original_forms}}) { |
|
|
7
|
|
|
|
|
14
|
|
|
1869
|
0
|
|
|
|
|
0
|
my ($spath, $lit) = @$pair; |
|
1870
|
0
|
|
|
|
|
0
|
my $first = $spath->[0]; |
|
1871
|
0
|
0
|
0
|
|
|
0
|
if (defined($first) && !ref($first) && substr($first, 0, 1) eq '_') { |
|
|
|
|
0
|
|
|
|
|
|
1872
|
0
|
|
|
|
|
0
|
next; |
|
1873
|
|
|
|
|
|
|
} |
|
1874
|
0
|
|
|
|
|
0
|
my @new_path = ('__fm__', @$spath); |
|
1875
|
0
|
|
|
|
|
0
|
push @{$self->{original_forms}}, [ \@new_path, $lit ]; |
|
|
0
|
|
|
|
|
0
|
|
|
1876
|
|
|
|
|
|
|
} |
|
1877
|
7
|
|
|
|
|
80
|
return $meta; |
|
1878
|
|
|
|
|
|
|
} |
|
1879
|
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
sub parse_body_as_table { |
|
1881
|
10
|
|
|
10
|
0
|
13
|
my $self = shift; |
|
1882
|
10
|
|
|
|
|
20
|
$self->_skip_trivia; |
|
1883
|
10
|
100
|
|
|
|
19
|
if ($self->_eof) { |
|
1884
|
|
|
|
|
|
|
# FM body is empty or comments-only — flush pending as floating. |
|
1885
|
1
|
|
|
|
|
3
|
$self->_flush_pending_as_floating; |
|
1886
|
1
|
50
|
|
|
|
6
|
return $self->{lite} ? { $ORDER_KEY => [] } : new_table(); |
|
1887
|
|
|
|
|
|
|
} |
|
1888
|
9
|
|
|
|
|
20
|
my $c = $self->_peek; |
|
1889
|
9
|
50
|
33
|
|
|
33
|
if ($c eq ' ' || $c eq "\t") { $self->_die("unexpected indentation inside front matter"); } |
|
|
0
|
|
|
|
|
0
|
|
|
1890
|
|
|
|
|
|
|
# SPEC §Lexical "Reserved decorator sigils": rejected before the |
|
1891
|
|
|
|
|
|
|
# generic "front matter block must be a table" diagnostic so the |
|
1892
|
|
|
|
|
|
|
# error message identifies the actual cause. |
|
1893
|
9
|
|
|
|
|
24
|
$self->_check_reserved_sigil; |
|
1894
|
9
|
50
|
33
|
|
|
22
|
if ($c eq '+' && $self->_peek_after_plus_is_space_or_eol) { $self->_die("front matter block cannot have a list root"); } |
|
|
0
|
|
|
|
|
0
|
|
|
1895
|
9
|
50
|
|
|
|
21
|
if (!$self->_line_starts_kvpair) { $self->_die("front matter block must be a table"); } |
|
|
0
|
|
|
|
|
0
|
|
|
1896
|
9
|
|
|
|
|
26
|
my $t = $self->parse_table_block(0); |
|
1897
|
9
|
|
|
|
|
22
|
$self->_skip_trivia; |
|
1898
|
9
|
50
|
|
|
|
23
|
$self->_die("trailing content inside front matter") unless $self->_eof; |
|
1899
|
9
|
|
|
|
|
15
|
return $t; |
|
1900
|
|
|
|
|
|
|
} |
|
1901
|
|
|
|
|
|
|
|
|
1902
|
|
|
|
|
|
|
sub parse_body { |
|
1903
|
42
|
|
|
42
|
0
|
66
|
my $self = shift; |
|
1904
|
42
|
|
|
|
|
117
|
$self->_skip_trivia; |
|
1905
|
42
|
50
|
|
|
|
110
|
if ($self->_eof) { |
|
1906
|
|
|
|
|
|
|
# Empty / comment-only body: pending comments float on root. |
|
1907
|
0
|
|
|
|
|
0
|
$self->_flush_pending_as_floating; |
|
1908
|
0
|
0
|
|
|
|
0
|
if ($self->{ignore_order}) { |
|
1909
|
0
|
|
|
|
|
0
|
return new_unordered_table(); |
|
1910
|
|
|
|
|
|
|
} |
|
1911
|
0
|
0
|
|
|
|
0
|
return $self->{lite} ? { $ORDER_KEY => [] } : new_table(); |
|
1912
|
|
|
|
|
|
|
} |
|
1913
|
42
|
|
|
|
|
109
|
my $c = $self->_peek; |
|
1914
|
42
|
50
|
33
|
|
|
176
|
if ($c eq ' ' || $c eq "\t") { |
|
1915
|
0
|
|
|
|
|
0
|
$self->_die("unexpected indentation at document root"); |
|
1916
|
|
|
|
|
|
|
} |
|
1917
|
42
|
|
|
|
|
72
|
my $result; |
|
1918
|
42
|
100
|
66
|
|
|
162
|
if ($c eq '+' && $self->_peek_after_plus_is_space_or_eol) { |
|
|
|
50
|
|
|
|
|
|
|
1919
|
1
|
|
|
|
|
7
|
$result = $self->parse_list_block(0); |
|
1920
|
1
|
|
|
|
|
4
|
$self->_skip_trivia; |
|
1921
|
1
|
50
|
|
|
|
4
|
$self->_die("trailing content after list root") unless $self->_eof; |
|
1922
|
|
|
|
|
|
|
} elsif ($self->_line_starts_kvpair) { |
|
1923
|
41
|
|
|
|
|
138
|
$result = $self->parse_table_block(0); |
|
1924
|
41
|
|
|
|
|
113
|
$self->_skip_trivia; |
|
1925
|
41
|
50
|
|
|
|
106
|
$self->_die("trailing content after table root") unless $self->_eof; |
|
1926
|
|
|
|
|
|
|
} else { |
|
1927
|
0
|
|
|
|
|
0
|
$result = $self->parse_inline_value_or_heredoc; |
|
1928
|
0
|
|
|
|
|
0
|
$self->_consume_after_value(1); |
|
1929
|
0
|
|
|
|
|
0
|
$self->_skip_trivia; |
|
1930
|
0
|
0
|
|
|
|
0
|
$self->_die("scalar root cannot be followed by more content") unless $self->_eof; |
|
1931
|
|
|
|
|
|
|
} |
|
1932
|
|
|
|
|
|
|
# Any trivia comments seen after the body float on root. |
|
1933
|
42
|
|
|
|
|
119
|
$self->_flush_pending_as_floating; |
|
1934
|
42
|
|
|
|
|
104
|
return $result; |
|
1935
|
|
|
|
|
|
|
} |
|
1936
|
|
|
|
|
|
|
|
|
1937
|
|
|
|
|
|
|
sub _peek_after_plus_is_space_or_eol { |
|
1938
|
4
|
|
|
4
|
|
10
|
my $self = shift; |
|
1939
|
4
|
|
|
|
|
44
|
my $nxt = $self->_peek_at(1); |
|
1940
|
4
|
50
|
|
|
|
13
|
return 1 if !defined($nxt); |
|
1941
|
4
|
|
0
|
|
|
25
|
return $nxt eq ' ' || $nxt eq "\t" || $nxt eq "\n" || $nxt eq "\r"; |
|
1942
|
|
|
|
|
|
|
} |
|
1943
|
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
sub _line_starts_kvpair { |
|
1945
|
59
|
|
|
59
|
|
111
|
my $self = shift; |
|
1946
|
59
|
|
|
|
|
101
|
my $p = $self->{pos}; |
|
1947
|
59
|
|
|
|
|
116
|
my $s = $self->{src}; |
|
1948
|
59
|
|
|
|
|
125
|
my $n = $self->{len}; |
|
1949
|
59
|
50
|
33
|
|
|
381
|
if ($p < $n && substr($s, $p, 1) eq '"') { |
|
|
|
50
|
33
|
|
|
|
|
|
1950
|
0
|
|
|
|
|
0
|
$p++; |
|
1951
|
0
|
|
|
|
|
0
|
while ($p < $n) { |
|
1952
|
0
|
|
|
|
|
0
|
my $ch = substr($s, $p, 1); |
|
1953
|
0
|
0
|
0
|
|
|
0
|
if ($ch eq '\\') { $p += 2; } |
|
|
0
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
|
|
|
1954
|
0
|
|
|
|
|
0
|
elsif ($ch eq '"') { $p++; last; } |
|
|
0
|
|
|
|
|
0
|
|
|
1955
|
0
|
|
|
|
|
0
|
elsif ($ch eq "\n" || $ch eq "\r") { return 0; } |
|
1956
|
0
|
|
|
|
|
0
|
else { $p++; } |
|
1957
|
|
|
|
|
|
|
} |
|
1958
|
|
|
|
|
|
|
} elsif ($p < $n && substr($s, $p, 1) eq "'") { |
|
1959
|
0
|
|
|
|
|
0
|
$p++; |
|
1960
|
0
|
|
|
|
|
0
|
while ($p < $n) { |
|
1961
|
0
|
|
|
|
|
0
|
my $ch = substr($s, $p, 1); |
|
1962
|
0
|
0
|
0
|
|
|
0
|
if ($ch eq "'") { $p++; last; } |
|
|
0
|
0
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1963
|
0
|
|
|
|
|
0
|
elsif ($ch eq "\n" || $ch eq "\r") { return 0; } |
|
1964
|
0
|
|
|
|
|
0
|
else { $p++; } |
|
1965
|
|
|
|
|
|
|
} |
|
1966
|
|
|
|
|
|
|
} else { |
|
1967
|
59
|
|
|
|
|
96
|
my $any = 0; |
|
1968
|
59
|
|
|
|
|
161
|
while ($p < $n) { |
|
1969
|
296
|
|
|
|
|
509
|
my $ch = substr($s, $p, 1); |
|
1970
|
296
|
100
|
|
|
|
563
|
last if !_is_bare_key_char($ch); |
|
1971
|
237
|
|
|
|
|
379
|
$p++; $any = 1; |
|
|
237
|
|
|
|
|
497
|
|
|
1972
|
|
|
|
|
|
|
} |
|
1973
|
59
|
50
|
|
|
|
158
|
return 0 if !$any; |
|
1974
|
|
|
|
|
|
|
} |
|
1975
|
59
|
100
|
66
|
|
|
329
|
return 0 if $p >= $n || substr($s, $p, 1) ne ':'; |
|
1976
|
50
|
50
|
|
|
|
127
|
return 1 if $p + 1 >= $n; |
|
1977
|
50
|
|
|
|
|
99
|
my $nxt = substr($s, $p+1, 1); |
|
1978
|
50
|
|
33
|
|
|
227
|
return $nxt eq ' ' || $nxt eq "\t" || $nxt eq "\n" || $nxt eq "\r"; |
|
1979
|
|
|
|
|
|
|
} |
|
1980
|
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
sub _measure_line_indent { |
|
1982
|
17
|
|
|
17
|
|
29
|
my $self = shift; |
|
1983
|
|
|
|
|
|
|
# Single regex hit instead of a per-char loop. Operate directly on |
|
1984
|
|
|
|
|
|
|
# $self->{src} (no copy of the 700KB+ source) — pos() set/read is |
|
1985
|
|
|
|
|
|
|
# safe on a hash element. |
|
1986
|
17
|
|
|
|
|
51
|
pos($self->{src}) = $self->{line_start}; |
|
1987
|
17
|
100
|
|
|
|
106
|
return $self->{src} =~ /\G( +)/g ? length($1) : 0; |
|
1988
|
|
|
|
|
|
|
} |
|
1989
|
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
sub parse_table_block { |
|
1991
|
54
|
|
|
54
|
0
|
117
|
my ($self, $indent) = @_; |
|
1992
|
54
|
|
|
|
|
99
|
my $lite = $self->{lite}; |
|
1993
|
54
|
|
|
|
|
97
|
my $ignore_order = $self->{ignore_order}; |
|
1994
|
|
|
|
|
|
|
# Lite path uses a plain (non-tied) hash with a sidecar order list |
|
1995
|
|
|
|
|
|
|
# under "\0_keys" — Tie::IxHash STORE/FETCH/EXISTS dispatch overhead |
|
1996
|
|
|
|
|
|
|
# is the single biggest fixed cost on flat-table parses. Non-lite |
|
1997
|
|
|
|
|
|
|
# path keeps the tied hash for full Document-tree compatibility. |
|
1998
|
|
|
|
|
|
|
# ignore_order path: blessed DMS::Parser::UnorderedTable plain hashref, no |
|
1999
|
|
|
|
|
|
|
# order list, no tying. SPEC §"Unordered tables". |
|
2000
|
54
|
|
|
|
|
96
|
my $t; |
|
2001
|
|
|
|
|
|
|
my $order; |
|
2002
|
54
|
50
|
|
|
|
159
|
if ($ignore_order) { |
|
|
|
100
|
|
|
|
|
|
|
2003
|
0
|
|
|
|
|
0
|
$t = new_unordered_table(); |
|
2004
|
|
|
|
|
|
|
} elsif ($lite) { |
|
2005
|
6
|
|
|
|
|
5
|
$order = []; |
|
2006
|
6
|
|
|
|
|
12
|
$t = { $ORDER_KEY => $order }; |
|
2007
|
|
|
|
|
|
|
} else { |
|
2008
|
48
|
|
|
|
|
115
|
$t = new_table(); |
|
2009
|
|
|
|
|
|
|
} |
|
2010
|
|
|
|
|
|
|
# Hot bulk loop (lite mode, indent == 0 only). On flat-table |
|
2011
|
|
|
|
|
|
|
# benchmarks, every line matches `bareword: int\n` — we can batch |
|
2012
|
|
|
|
|
|
|
# them with one tight regex and avoid 50 k method-call frames per |
|
2013
|
|
|
|
|
|
|
# parse. The regex anchors at the current `pos`, captures up to N |
|
2014
|
|
|
|
|
|
|
# consecutive simple-line kvpairs, and stops on the first non-match |
|
2015
|
|
|
|
|
|
|
# so the slow path can handle anything else (strings, floats, dates, |
|
2016
|
|
|
|
|
|
|
# nested values, comments, indented blocks). Re-entered after each |
|
2017
|
|
|
|
|
|
|
# slow-path iteration so a one-off slow line doesn't disable the |
|
2018
|
|
|
|
|
|
|
# batch path for the rest of the file. |
|
2019
|
|
|
|
|
|
|
# Bulk path: lite + (ordered with sidecar) OR lite + ignore_order |
|
2020
|
|
|
|
|
|
|
# (no sidecar). Both paths use a plain hash; the only difference is |
|
2021
|
|
|
|
|
|
|
# whether `$order` is updated. |
|
2022
|
54
|
100
|
|
|
|
142
|
my $bulk = $lite ? 1 : 0; |
|
2023
|
|
|
|
|
|
|
# File-scoped cache below (%BULK_RE_CACHE) maps indent -> compiled |
|
2024
|
|
|
|
|
|
|
# regex. Persists across calls so common levels (0, 2, 4, 6) compile |
|
2025
|
|
|
|
|
|
|
# once globally even across recursive parse_table_block invocations. |
|
2026
|
54
|
|
66
|
|
|
227
|
my $bulk_re = $BULK_RE_CACHE{$indent} //= do { |
|
2027
|
5
|
100
|
|
|
|
21
|
my $sp = $indent == 0 ? '' : "[ ]{$indent}"; |
|
2028
|
|
|
|
|
|
|
# Groups: |
|
2029
|
|
|
|
|
|
|
# $2: positive integer |
|
2030
|
|
|
|
|
|
|
# $3: negative integer |
|
2031
|
|
|
|
|
|
|
# $4: bool |
|
2032
|
|
|
|
|
|
|
# $5: ASCII-only basic-string content (no escapes) |
|
2033
|
|
|
|
|
|
|
# $6: empty list / empty table literal |
|
2034
|
|
|
|
|
|
|
# $7: decimal float (matches `-?[0-9]+\.[0-9]+`, no exponent) |
|
2035
|
5
|
|
|
|
|
919
|
qr/\G$sp([A-Za-z_][A-Za-z0-9_-]*):[ ](?:(0|[1-9][0-9]{0,17})|(-[1-9][0-9]{0,17})|(true|false)|"([\x20-\x21\x23-\x5b\x5d-\x7e]*)"|(\[\]|\{\})|(-?(?:0|[1-9][0-9]*)\.[0-9]+))\r?\n/; |
|
2036
|
|
|
|
|
|
|
}; |
|
2037
|
54
|
|
|
|
|
108
|
while (1) { |
|
2038
|
154
|
100
|
66
|
|
|
2207
|
if ($bulk && $self->{pos} == $self->{line_start}) { |
|
2039
|
6
|
|
|
|
|
8
|
my $src_ref = \$self->{src}; |
|
2040
|
6
|
|
|
|
|
14
|
pos($$src_ref) = $self->{pos}; |
|
2041
|
6
|
|
|
|
|
9
|
my $line = $self->{line}; |
|
2042
|
|
|
|
|
|
|
# One regex matches multiple value forms: |
|
2043
|
|
|
|
|
|
|
# group $2: positive integer (0 or [1-9][0-9]{0,17}, 18 digits max → safe i64) |
|
2044
|
|
|
|
|
|
|
# group $3: negative integer (-[1-9][0-9]{0,17}) |
|
2045
|
|
|
|
|
|
|
# group $4: 'true' or 'false' |
|
2046
|
|
|
|
|
|
|
# group $5: simple ASCII basic-string content (no \, ", \n, \r, |
|
2047
|
|
|
|
|
|
|
# and ASCII-only so we skip the NFC pass) — empty |
|
2048
|
|
|
|
|
|
|
# string is common in real configs (kube values.yaml |
|
2049
|
|
|
|
|
|
|
# has hundreds of `key: ""` lines). |
|
2050
|
|
|
|
|
|
|
# group $6: '[]' or '{}' (empty list / empty table) |
|
2051
|
|
|
|
|
|
|
# Excludes leading-zero forms (which DMS rejects) and overflow. |
|
2052
|
|
|
|
|
|
|
# Accepts both LF and CRLF line endings. Anything else (escaped |
|
2053
|
|
|
|
|
|
|
# strings, floats, dates, multi-line blocks, comments) falls to |
|
2054
|
|
|
|
|
|
|
# the slow path on the next iteration. |
|
2055
|
|
|
|
|
|
|
# Inner bulk loop: consumes any pattern we can match purely |
|
2056
|
|
|
|
|
|
|
# by regex (kvpair, blank line, single-line `#` or `//` |
|
2057
|
|
|
|
|
|
|
# comment) without method dispatch. Falls back to the |
|
2058
|
|
|
|
|
|
|
# outer slow path the moment something complex appears |
|
2059
|
|
|
|
|
|
|
# (heredoc, escaped string, list item, block comment). |
|
2060
|
6
|
|
|
|
|
5
|
INNER: while (1) { |
|
2061
|
14
|
100
|
|
|
|
76
|
if ($$src_ref =~ /$bulk_re/gc) { |
|
2062
|
8
|
|
|
|
|
16
|
my $k = $1; |
|
2063
|
8
|
50
|
|
|
|
24
|
if (exists $t->{$k}) { |
|
2064
|
|
|
|
|
|
|
# Roll back to start of the current line for accurate error pos. |
|
2065
|
0
|
|
|
|
|
0
|
my $cur = pos($$src_ref); |
|
2066
|
0
|
|
0
|
|
|
0
|
while ($cur > 0 && substr($$src_ref, $cur - 1, 1) ne "\n") { $cur--; } |
|
|
0
|
|
|
|
|
0
|
|
|
2067
|
0
|
|
|
|
|
0
|
$self->{pos} = $cur; |
|
2068
|
0
|
|
|
|
|
0
|
$self->{line} = $line; |
|
2069
|
0
|
|
|
|
|
0
|
$self->{line_start} = $cur; |
|
2070
|
0
|
|
|
|
|
0
|
$self->_die("duplicate key: $k"); |
|
2071
|
|
|
|
|
|
|
} |
|
2072
|
8
|
|
|
|
|
7
|
my $val; |
|
2073
|
8
|
100
|
|
|
|
35
|
if (defined $2) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2074
|
2
|
|
|
|
|
4
|
my $iv = 0 + $2; |
|
2075
|
2
|
|
|
|
|
8
|
$val = bless \$iv, 'DMS::Parser::Integer'; |
|
2076
|
|
|
|
|
|
|
} elsif (defined $3) { |
|
2077
|
0
|
|
|
|
|
0
|
my $iv = 0 + $3; |
|
2078
|
0
|
|
|
|
|
0
|
$val = bless \$iv, 'DMS::Parser::Integer'; |
|
2079
|
|
|
|
|
|
|
} elsif (defined $4) { |
|
2080
|
0
|
0
|
|
|
|
0
|
my $bv = $4 eq 'true' ? 1 : 0; |
|
2081
|
0
|
|
|
|
|
0
|
$val = bless \$bv, 'DMS::Parser::Bool'; |
|
2082
|
|
|
|
|
|
|
} elsif (defined $5) { |
|
2083
|
|
|
|
|
|
|
# ASCII-only basic string, no escapes / no NFC. |
|
2084
|
6
|
|
|
|
|
8
|
$val = $5; |
|
2085
|
|
|
|
|
|
|
} elsif (defined $6) { |
|
2086
|
|
|
|
|
|
|
# '[]' or '{}'. |
|
2087
|
0
|
0
|
|
|
|
0
|
$val = $6 eq '[]' ? [] : { $ORDER_KEY => [] }; |
|
2088
|
|
|
|
|
|
|
} else { |
|
2089
|
|
|
|
|
|
|
# $7: decimal float. |
|
2090
|
0
|
|
|
|
|
0
|
my $fv = 0 + $7; |
|
2091
|
0
|
|
|
|
|
0
|
$val = bless \$fv, 'DMS::Parser::Float'; |
|
2092
|
|
|
|
|
|
|
} |
|
2093
|
8
|
50
|
|
|
|
18
|
push @$order, $k if $order; |
|
2094
|
8
|
|
|
|
|
12
|
$t->{$k} = $val; |
|
2095
|
8
|
|
|
|
|
8
|
$line++; |
|
2096
|
8
|
|
|
|
|
12
|
next INNER; |
|
2097
|
|
|
|
|
|
|
} |
|
2098
|
|
|
|
|
|
|
# Blank line at any leading whitespace. |
|
2099
|
6
|
50
|
|
|
|
8
|
if ($$src_ref =~ /\G[ \t]*\r?\n/gc) { |
|
2100
|
0
|
|
|
|
|
0
|
$line++; |
|
2101
|
0
|
|
|
|
|
0
|
next INNER; |
|
2102
|
|
|
|
|
|
|
} |
|
2103
|
|
|
|
|
|
|
# Single-line `#` comment (NOT `###` labeled block) or `//`. |
|
2104
|
|
|
|
|
|
|
# Excludes `/*` (C-style block) which spans multiple lines. |
|
2105
|
|
|
|
|
|
|
# bench_realistic is 56% comments — taking these in the |
|
2106
|
|
|
|
|
|
|
# bulk loop avoids hundreds of _skip_trivia method calls. |
|
2107
|
6
|
50
|
|
|
|
8
|
if ($$src_ref =~ /\G[ \t]*(?:#(?!##)|\/\/(?!\*))[^\n\r]*\r?\n/gc) { |
|
2108
|
0
|
|
|
|
|
0
|
$line++; |
|
2109
|
0
|
|
|
|
|
0
|
next INNER; |
|
2110
|
|
|
|
|
|
|
} |
|
2111
|
6
|
|
|
|
|
7
|
last INNER; |
|
2112
|
|
|
|
|
|
|
} |
|
2113
|
6
|
|
33
|
|
|
12
|
$self->{pos} = pos($$src_ref) // $self->{pos}; |
|
2114
|
6
|
|
|
|
|
8
|
$self->{line} = $line; |
|
2115
|
6
|
|
|
|
|
8
|
$self->{line_start} = $self->{pos}; |
|
2116
|
|
|
|
|
|
|
} |
|
2117
|
154
|
|
|
|
|
491
|
$self->_skip_trivia; |
|
2118
|
154
|
100
|
|
|
|
460
|
last if $self->{pos} >= $self->{len}; |
|
2119
|
|
|
|
|
|
|
# Inline _measure_line_indent: hot enough that the call cost |
|
2120
|
|
|
|
|
|
|
# matters across 50k iterations. For indent==0 (most flat |
|
2121
|
|
|
|
|
|
|
# tables) we can skip the regex when pos is already at |
|
2122
|
|
|
|
|
|
|
# line_start with no leading space — by far the common case. |
|
2123
|
100
|
|
|
|
|
217
|
my $li; |
|
2124
|
100
|
100
|
66
|
|
|
654
|
if ($indent == 0 && $self->{pos} == $self->{line_start} |
|
|
|
|
66
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
&& substr($self->{src}, $self->{pos}, 1) ne ' ') { |
|
2126
|
95
|
|
|
|
|
157
|
$li = 0; |
|
2127
|
|
|
|
|
|
|
} else { |
|
2128
|
5
|
|
|
|
|
18
|
pos($self->{src}) = $self->{line_start}; |
|
2129
|
5
|
50
|
|
|
|
36
|
$li = $self->{src} =~ /\G( +)/g ? length($1) : 0; |
|
2130
|
|
|
|
|
|
|
} |
|
2131
|
100
|
50
|
|
|
|
271
|
last if $li < $indent; |
|
2132
|
100
|
50
|
|
|
|
279
|
if ($li != $indent) { |
|
2133
|
0
|
|
|
|
|
0
|
die $self->_err_at($self->{line}, $self->{line_start}, $self->{line_start}+$indent, |
|
2134
|
|
|
|
|
|
|
"inconsistent indent: expected $indent spaces, got $li"); |
|
2135
|
|
|
|
|
|
|
} |
|
2136
|
100
|
|
|
|
|
223
|
$self->{pos} = $self->{line_start} + $indent; |
|
2137
|
|
|
|
|
|
|
# SPEC §Lexical "Reserved decorator sigils": reject ! @ $ % ^ & * |
|
2138
|
|
|
|
|
|
|
# | ~ ` . , > < ? ; = as the first non-whitespace character of a |
|
2139
|
|
|
|
|
|
|
# body line. We sit at exactly that position now (line_start + |
|
2140
|
|
|
|
|
|
|
# structural indent), so a single-char check is sufficient. |
|
2141
|
100
|
|
|
|
|
297
|
$self->_check_reserved_sigil; |
|
2142
|
100
|
|
|
|
|
162
|
my ($k, $v); |
|
2143
|
100
|
50
|
|
|
|
204
|
if ($lite) { |
|
2144
|
|
|
|
|
|
|
# Inlined fast-path of parse_kvpair: skip the eval frame and |
|
2145
|
|
|
|
|
|
|
# the path push/pop that parse_kvpair adds for non-lite modes. |
|
2146
|
0
|
|
|
|
|
0
|
$k = $self->parse_key; |
|
2147
|
|
|
|
|
|
|
$self->_die("expected ':' after key") |
|
2148
|
0
|
0
|
|
|
|
0
|
if substr($self->{src}, $self->{pos}, 1) ne ':'; |
|
2149
|
0
|
|
|
|
|
0
|
$v = $self->_parse_kvpair_after_key($indent); |
|
2150
|
0
|
0
|
|
|
|
0
|
$self->_die("duplicate key: $k") if exists $t->{$k}; |
|
2151
|
0
|
0
|
|
|
|
0
|
push @$order, $k if $order; |
|
2152
|
0
|
|
|
|
|
0
|
$t->{$k} = $v; |
|
2153
|
|
|
|
|
|
|
} else { |
|
2154
|
100
|
|
|
|
|
295
|
($k, $v) = $self->parse_kvpair($indent); |
|
2155
|
100
|
50
|
|
|
|
603
|
$self->_die("duplicate key: $k") if exists $t->{$k}; |
|
2156
|
100
|
|
|
|
|
951
|
$t->{$k} = $v; |
|
2157
|
|
|
|
|
|
|
} |
|
2158
|
|
|
|
|
|
|
} |
|
2159
|
|
|
|
|
|
|
# Block close: leftover pending comments float on the enclosing |
|
2160
|
|
|
|
|
|
|
# container (this table itself). |
|
2161
|
54
|
100
|
|
|
|
186
|
$self->_flush_pending_as_floating unless $lite; |
|
2162
|
54
|
|
|
|
|
127
|
return $t; |
|
2163
|
|
|
|
|
|
|
} |
|
2164
|
|
|
|
|
|
|
|
|
2165
|
|
|
|
|
|
|
sub parse_list_block { |
|
2166
|
4
|
|
|
4
|
0
|
10
|
my ($self, $indent) = @_; |
|
2167
|
4
|
|
|
|
|
8
|
my @items; |
|
2168
|
4
|
|
|
|
|
9
|
while (1) { |
|
2169
|
13
|
|
|
|
|
39
|
$self->_skip_trivia; |
|
2170
|
13
|
100
|
|
|
|
32
|
last if $self->_eof; |
|
2171
|
10
|
|
|
|
|
27
|
my $li = $self->_measure_line_indent; |
|
2172
|
10
|
100
|
|
|
|
24
|
last if $li < $indent; |
|
2173
|
9
|
50
|
|
|
|
27
|
if ($li != $indent) { |
|
2174
|
0
|
|
|
|
|
0
|
die $self->_err_at($self->{line}, $self->{line_start}, $self->{line_start}+$indent, |
|
2175
|
|
|
|
|
|
|
"inconsistent indent: expected $indent spaces, got $li"); |
|
2176
|
|
|
|
|
|
|
} |
|
2177
|
9
|
|
|
|
|
20
|
$self->{pos} = $self->{line_start} + $indent; |
|
2178
|
9
|
50
|
|
|
|
31
|
if ($self->_peek ne '+') { last; } |
|
|
0
|
|
|
|
|
0
|
|
|
2179
|
|
|
|
|
|
|
# Commit to a new list item: push its index, attach pending |
|
2180
|
|
|
|
|
|
|
# leading comments to it, then parse the value. |
|
2181
|
9
|
|
|
|
|
20
|
my $idx = scalar @items; |
|
2182
|
9
|
|
|
|
|
17
|
push @{$self->{path}}, DMS::Parser::Index->new($idx); |
|
|
9
|
|
|
|
|
41
|
|
|
2183
|
9
|
|
|
|
|
40
|
$self->_flush_pending_as_leading_on_current; |
|
2184
|
9
|
|
|
|
|
14
|
$self->{pos}++; |
|
2185
|
9
|
|
|
|
|
39
|
my $c = $self->_peek; |
|
2186
|
9
|
|
|
|
|
18
|
my $item; |
|
2187
|
9
|
|
|
|
|
17
|
my $ok = eval { |
|
2188
|
9
|
50
|
33
|
|
|
47
|
if (defined($c) && ($c eq ' ' || $c eq "\t")) { |
|
|
|
0
|
33
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2189
|
9
|
|
|
|
|
18
|
$self->{pos}++; |
|
2190
|
9
|
|
|
|
|
35
|
$self->_skip_inline_ws; |
|
2191
|
9
|
|
|
|
|
30
|
$self->_capture_inner_block_comments; |
|
2192
|
9
|
|
|
|
|
20
|
my $c2 = $self->_peek; |
|
2193
|
9
|
50
|
33
|
|
|
101
|
if (!defined($c2) || $c2 eq "\n" || $c2 eq "\r") { |
|
|
|
|
33
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
# "+ INNER[EOL]" — empty item with inner comments. |
|
2195
|
0
|
|
|
|
|
0
|
$self->_consume_eol; |
|
2196
|
0
|
|
|
|
|
0
|
$self->_skip_trivia; |
|
2197
|
0
|
0
|
|
|
|
0
|
$self->_die("expected indented block after empty '+' marker") if $self->_eof; |
|
2198
|
0
|
|
|
|
|
0
|
my $inner = $self->_measure_line_indent; |
|
2199
|
0
|
0
|
|
|
|
0
|
$self->_die("expected indented block after empty '+' marker") if $inner <= $indent; |
|
2200
|
0
|
|
|
|
|
0
|
$item = $self->parse_block_value($inner); |
|
2201
|
|
|
|
|
|
|
} else { |
|
2202
|
9
|
|
|
|
|
30
|
$item = $self->parse_list_item_value($indent); |
|
2203
|
|
|
|
|
|
|
} |
|
2204
|
|
|
|
|
|
|
} elsif (!defined($c) || $c eq "\n" || $c eq "\r") { |
|
2205
|
0
|
|
|
|
|
0
|
$self->_consume_eol; |
|
2206
|
0
|
|
|
|
|
0
|
$self->_skip_trivia; |
|
2207
|
0
|
0
|
|
|
|
0
|
$self->_die("expected indented block after empty '+' marker") if $self->_eof; |
|
2208
|
0
|
|
|
|
|
0
|
my $inner = $self->_measure_line_indent; |
|
2209
|
0
|
0
|
|
|
|
0
|
$self->_die("expected indented block after empty '+' marker") if $inner <= $indent; |
|
2210
|
0
|
|
|
|
|
0
|
$item = $self->parse_block_value($inner); |
|
2211
|
|
|
|
|
|
|
} else { |
|
2212
|
0
|
|
|
|
|
0
|
$self->_die("expected space after '+'"); |
|
2213
|
|
|
|
|
|
|
} |
|
2214
|
9
|
|
|
|
|
16
|
1; |
|
2215
|
|
|
|
|
|
|
}; |
|
2216
|
9
|
|
|
|
|
20
|
my $err = $@; |
|
2217
|
9
|
|
|
|
|
15
|
pop @{$self->{path}}; |
|
|
9
|
|
|
|
|
36
|
|
|
2218
|
9
|
50
|
|
|
|
39
|
if (!$ok) { die $err; } |
|
|
0
|
|
|
|
|
0
|
|
|
2219
|
9
|
|
|
|
|
20
|
push @items, $item; |
|
2220
|
|
|
|
|
|
|
} |
|
2221
|
|
|
|
|
|
|
# Block close: leftover pending comments float on the list itself. |
|
2222
|
4
|
|
|
|
|
16
|
$self->_flush_pending_as_floating; |
|
2223
|
4
|
|
|
|
|
15
|
return \@items; |
|
2224
|
|
|
|
|
|
|
} |
|
2225
|
|
|
|
|
|
|
|
|
2226
|
|
|
|
|
|
|
sub parse_block_value { |
|
2227
|
7
|
|
|
7
|
0
|
17
|
my ($self, $indent) = @_; |
|
2228
|
7
|
|
|
|
|
17
|
$self->{pos} = $self->{line_start} + $indent; |
|
2229
|
7
|
100
|
66
|
|
|
54
|
if ($self->_peek eq '+' && $self->_peek_after_plus_is_space_or_eol) { |
|
2230
|
3
|
|
|
|
|
9
|
return $self->parse_list_block($indent); |
|
2231
|
|
|
|
|
|
|
} |
|
2232
|
4
|
|
|
|
|
50
|
return $self->parse_table_block($indent); |
|
2233
|
|
|
|
|
|
|
} |
|
2234
|
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
sub parse_list_item_value { |
|
2236
|
9
|
|
|
9
|
0
|
21
|
my ($self, $list_indent) = @_; |
|
2237
|
9
|
50
|
|
|
|
23
|
if ($self->_line_starts_kvpair) { |
|
2238
|
0
|
|
|
|
|
0
|
my $key_col = $self->{pos} - $self->{line_start}; |
|
2239
|
0
|
|
|
|
|
0
|
my $lite = $self->{lite}; |
|
2240
|
0
|
|
|
|
|
0
|
my $ignore_order = $self->{ignore_order}; |
|
2241
|
0
|
|
|
|
|
0
|
my ($k, $v) = $self->parse_kvpair($key_col); |
|
2242
|
0
|
|
|
|
|
0
|
my ($t, $order); |
|
2243
|
0
|
0
|
|
|
|
0
|
if ($ignore_order) { |
|
|
|
0
|
|
|
|
|
|
|
2244
|
0
|
|
|
|
|
0
|
$t = new_unordered_table(); |
|
2245
|
0
|
|
|
|
|
0
|
$t->{$k} = $v; |
|
2246
|
|
|
|
|
|
|
} elsif ($lite) { |
|
2247
|
0
|
|
|
|
|
0
|
$order = [$k]; |
|
2248
|
0
|
|
|
|
|
0
|
$t = { $ORDER_KEY => $order, $k => $v }; |
|
2249
|
|
|
|
|
|
|
} else { |
|
2250
|
0
|
|
|
|
|
0
|
$t = new_table(); |
|
2251
|
0
|
|
|
|
|
0
|
$t->{$k} = $v; |
|
2252
|
|
|
|
|
|
|
} |
|
2253
|
0
|
|
|
|
|
0
|
while (1) { |
|
2254
|
0
|
|
|
|
|
0
|
$self->_skip_trivia; |
|
2255
|
0
|
0
|
|
|
|
0
|
last if $self->_eof; |
|
2256
|
0
|
|
|
|
|
0
|
my $li = $self->_measure_line_indent; |
|
2257
|
0
|
0
|
|
|
|
0
|
last if $li < $key_col; |
|
2258
|
0
|
0
|
|
|
|
0
|
if ($li != $key_col) { |
|
2259
|
0
|
|
|
|
|
0
|
die $self->_err_at($self->{line}, $self->{line_start}, $self->{line_start}+$key_col, |
|
2260
|
|
|
|
|
|
|
"list-item table sibling key must align with first key"); |
|
2261
|
|
|
|
|
|
|
} |
|
2262
|
0
|
|
|
|
|
0
|
$self->{pos} = $self->{line_start} + $key_col; |
|
2263
|
0
|
0
|
|
|
|
0
|
$self->_die("'+' marker at sibling-key column is ambiguous") if $self->_peek eq '+'; |
|
2264
|
0
|
0
|
|
|
|
0
|
last if !$self->_line_starts_kvpair; |
|
2265
|
0
|
|
|
|
|
0
|
my ($k2, $v2) = $self->parse_kvpair($key_col); |
|
2266
|
0
|
0
|
|
|
|
0
|
$self->_die("duplicate key: $k2") if exists $t->{$k2}; |
|
2267
|
0
|
0
|
|
|
|
0
|
push @$order, $k2 if $order; |
|
2268
|
0
|
|
|
|
|
0
|
$t->{$k2} = $v2; |
|
2269
|
|
|
|
|
|
|
} |
|
2270
|
|
|
|
|
|
|
# End of inline-table-in-list-item: any pending leading comments |
|
2271
|
|
|
|
|
|
|
# belong to the enclosing list item itself (Floating). |
|
2272
|
0
|
|
|
|
|
0
|
$self->_flush_pending_as_floating; |
|
2273
|
0
|
|
|
|
|
0
|
return $t; |
|
2274
|
|
|
|
|
|
|
} |
|
2275
|
9
|
|
|
|
|
25
|
my $v = $self->parse_inline_value_or_heredoc; |
|
2276
|
9
|
|
|
|
|
36
|
$self->_consume_after_value(0); |
|
2277
|
9
|
|
|
|
|
22
|
return $v; |
|
2278
|
|
|
|
|
|
|
} |
|
2279
|
|
|
|
|
|
|
|
|
2280
|
|
|
|
|
|
|
sub parse_kvpair { |
|
2281
|
100
|
|
|
100
|
0
|
251
|
my ($self, $parent_indent) = @_; |
|
2282
|
100
|
|
|
|
|
257
|
my $key = $self->parse_key; |
|
2283
|
100
|
50
|
|
|
|
347
|
$self->_die("expected ':' after key") if substr($self->{src}, $self->{pos}, 1) ne ':'; |
|
2284
|
|
|
|
|
|
|
# Lite mode: no comment-AST, no original_form recording, no path |
|
2285
|
|
|
|
|
|
|
# bookkeeping needed. Skip the breadcrumb push/pop + eval frame. |
|
2286
|
100
|
50
|
|
|
|
329
|
if ($self->{lite}) { |
|
2287
|
0
|
|
|
|
|
0
|
my $v = $self->_parse_kvpair_after_key($parent_indent); |
|
2288
|
0
|
|
|
|
|
0
|
return ($key, $v); |
|
2289
|
|
|
|
|
|
|
} |
|
2290
|
|
|
|
|
|
|
# We've now committed: this is a kvpair. Push the breadcrumb so |
|
2291
|
|
|
|
|
|
|
# pending leading comments attach here and so trailing comments |
|
2292
|
|
|
|
|
|
|
# captured by _consume_after_value get the right path. |
|
2293
|
100
|
|
|
|
|
153
|
push @{$self->{path}}, $key; |
|
|
100
|
|
|
|
|
271
|
|
|
2294
|
100
|
|
|
|
|
293
|
$self->_flush_pending_as_leading_on_current; |
|
2295
|
100
|
|
|
|
|
175
|
my $v; |
|
2296
|
100
|
|
|
|
|
178
|
my $ok = eval { $v = $self->_parse_kvpair_after_key($parent_indent); 1 }; |
|
|
100
|
|
|
|
|
277
|
|
|
|
100
|
|
|
|
|
230
|
|
|
2297
|
100
|
|
|
|
|
192
|
my $err = $@; |
|
2298
|
100
|
|
|
|
|
144
|
pop @{$self->{path}}; |
|
|
100
|
|
|
|
|
210
|
|
|
2299
|
100
|
50
|
|
|
|
277
|
if (!$ok) { die $err; } |
|
|
0
|
|
|
|
|
0
|
|
|
2300
|
100
|
|
|
|
|
338
|
return ($key, $v); |
|
2301
|
|
|
|
|
|
|
} |
|
2302
|
|
|
|
|
|
|
|
|
2303
|
|
|
|
|
|
|
sub _parse_kvpair_after_key { |
|
2304
|
100
|
|
|
100
|
|
208
|
my ($self, $parent_indent) = @_; |
|
2305
|
100
|
|
|
|
|
176
|
$self->{pos}++; # consume ':' |
|
2306
|
|
|
|
|
|
|
# Inline _peek: per-key dispatch overhead is significant on flat-50k. |
|
2307
|
100
|
|
|
|
|
189
|
my $p = $self->{pos}; |
|
2308
|
100
|
50
|
|
|
|
278
|
my $c = $p >= $self->{len} ? undef : substr($self->{src}, $p, 1); |
|
2309
|
100
|
100
|
66
|
|
|
449
|
if (defined($c) && ($c eq ' ' || $c eq "\t")) { |
|
|
|
|
33
|
|
|
|
|
|
2310
|
93
|
|
|
|
|
161
|
$self->{pos}++; |
|
2311
|
93
|
|
|
|
|
153
|
$p = $self->{pos}; |
|
2312
|
|
|
|
|
|
|
# Inline _skip_inline_ws fast path — only run the regex if the |
|
2313
|
|
|
|
|
|
|
# next byte is itself ws (single-space `key: v` is the common case). |
|
2314
|
93
|
50
|
|
|
|
298
|
my $c2 = $p >= $self->{len} ? undef : substr($self->{src}, $p, 1); |
|
2315
|
93
|
50
|
33
|
|
|
491
|
if (defined($c2) && ($c2 eq ' ' || $c2 eq "\t")) { |
|
|
|
|
33
|
|
|
|
|
|
2316
|
0
|
|
|
|
|
0
|
pos($self->{src}) = $p; |
|
2317
|
0
|
|
|
|
|
0
|
$self->{src} =~ /\G[ \t]+/gc; |
|
2318
|
0
|
|
|
|
|
0
|
$self->{pos} = pos($self->{src}); |
|
2319
|
0
|
|
|
|
|
0
|
$p = $self->{pos}; |
|
2320
|
0
|
0
|
|
|
|
0
|
$c2 = $p >= $self->{len} ? undef : substr($self->{src}, $p, 1); |
|
2321
|
|
|
|
|
|
|
} |
|
2322
|
|
|
|
|
|
|
# Inline _capture_inner_block_comments fast path: only call out |
|
2323
|
|
|
|
|
|
|
# if the next byte is '/'. |
|
2324
|
93
|
50
|
33
|
|
|
389
|
if (defined($c2) && $c2 eq '/') { |
|
2325
|
0
|
|
|
|
|
0
|
$self->_capture_inner_block_comments; |
|
2326
|
0
|
|
|
|
|
0
|
$p = $self->{pos}; |
|
2327
|
0
|
0
|
|
|
|
0
|
$c2 = $p >= $self->{len} ? undef : substr($self->{src}, $p, 1); |
|
2328
|
|
|
|
|
|
|
} |
|
2329
|
93
|
50
|
33
|
|
|
470
|
if (!defined($c2) || $c2 eq "\n" || $c2 eq "\r") { |
|
|
|
|
33
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
# "key: INNER[EOL]" — child block with inner comments. |
|
2331
|
0
|
|
|
|
|
0
|
$self->_consume_eol; |
|
2332
|
0
|
|
|
|
|
0
|
$self->_skip_trivia; |
|
2333
|
0
|
0
|
|
|
|
0
|
$self->_die("expected indented child block") if $self->_eof; |
|
2334
|
0
|
|
|
|
|
0
|
my $child = $self->_measure_line_indent; |
|
2335
|
0
|
0
|
|
|
|
0
|
$self->_die("expected indented child block") if $child <= $parent_indent; |
|
2336
|
0
|
|
|
|
|
0
|
return $self->parse_block_value($child); |
|
2337
|
|
|
|
|
|
|
} |
|
2338
|
93
|
|
|
|
|
265
|
my $v = $self->parse_inline_value_or_heredoc; |
|
2339
|
93
|
|
|
|
|
348
|
$self->_consume_after_value(0); |
|
2340
|
93
|
|
|
|
|
242
|
return $v; |
|
2341
|
|
|
|
|
|
|
} |
|
2342
|
7
|
50
|
33
|
|
|
37
|
if (!defined($c) || $c eq "\n" || $c eq "\r") { |
|
|
|
|
33
|
|
|
|
|
|
2343
|
7
|
|
|
|
|
25
|
$self->_consume_eol; |
|
2344
|
7
|
|
|
|
|
21
|
$self->_skip_trivia; |
|
2345
|
7
|
50
|
|
|
|
20
|
$self->_die("expected indented child block") if $self->_eof; |
|
2346
|
7
|
|
|
|
|
23
|
my $child = $self->_measure_line_indent; |
|
2347
|
7
|
50
|
|
|
|
20
|
$self->_die("expected indented child block") if $child <= $parent_indent; |
|
2348
|
7
|
|
|
|
|
29
|
return $self->parse_block_value($child); |
|
2349
|
|
|
|
|
|
|
} |
|
2350
|
0
|
|
|
|
|
0
|
$self->_die("expected whitespace after ':'"); |
|
2351
|
|
|
|
|
|
|
} |
|
2352
|
|
|
|
|
|
|
|
|
2353
|
|
|
|
|
|
|
sub parse_key { |
|
2354
|
102
|
|
|
102
|
0
|
169
|
my $self = shift; |
|
2355
|
102
|
|
|
|
|
189
|
my $p = $self->{pos}; |
|
2356
|
102
|
50
|
|
|
|
287
|
my $c = $p >= $self->{len} ? undef : substr($self->{src}, $p, 1); |
|
2357
|
102
|
50
|
33
|
|
|
409
|
if (defined($c) && $c eq '"') { |
|
2358
|
0
|
0
|
|
|
|
0
|
$self->_die("triple-quoted strings are not allowed as keys") if $self->_starts_with('"""'); |
|
2359
|
|
|
|
|
|
|
# Suppress original-form recording: keys are not values and must |
|
2360
|
|
|
|
|
|
|
# not generate OriginalLiteral entries on the parent path. |
|
2361
|
0
|
|
|
|
|
0
|
my $saved = $self->{record_forms}; |
|
2362
|
0
|
|
|
|
|
0
|
$self->{record_forms} = 0; |
|
2363
|
0
|
|
|
|
|
0
|
my $r = eval { $self->parse_basic_string_value }; |
|
|
0
|
|
|
|
|
0
|
|
|
2364
|
0
|
|
|
|
|
0
|
my $e = $@; |
|
2365
|
0
|
|
|
|
|
0
|
$self->{record_forms} = $saved; |
|
2366
|
0
|
0
|
|
|
|
0
|
die $e if $e; |
|
2367
|
0
|
|
|
|
|
0
|
return $r; |
|
2368
|
|
|
|
|
|
|
} |
|
2369
|
102
|
50
|
33
|
|
|
395
|
if (defined($c) && $c eq "'") { |
|
2370
|
0
|
0
|
|
|
|
0
|
$self->_die("triple-quoted strings are not allowed as keys") if $self->_starts_with("'''"); |
|
2371
|
0
|
|
|
|
|
0
|
my $saved = $self->{record_forms}; |
|
2372
|
0
|
|
|
|
|
0
|
$self->{record_forms} = 0; |
|
2373
|
0
|
|
|
|
|
0
|
my $r = eval { $self->parse_literal_string_value }; |
|
|
0
|
|
|
|
|
0
|
|
|
2374
|
0
|
|
|
|
|
0
|
my $e = $@; |
|
2375
|
0
|
|
|
|
|
0
|
$self->{record_forms} = $saved; |
|
2376
|
0
|
0
|
|
|
|
0
|
die $e if $e; |
|
2377
|
0
|
|
|
|
|
0
|
return $r; |
|
2378
|
|
|
|
|
|
|
} |
|
2379
|
102
|
50
|
|
|
|
222
|
$self->_die("expected key") if !defined($c); |
|
2380
|
102
|
|
|
|
|
267
|
return $self->parse_bare_key; |
|
2381
|
|
|
|
|
|
|
} |
|
2382
|
|
|
|
|
|
|
|
|
2383
|
|
|
|
|
|
|
sub parse_bare_key { |
|
2384
|
102
|
|
|
102
|
0
|
168
|
my $self = shift; |
|
2385
|
102
|
|
|
|
|
213
|
my $start = $self->{pos}; |
|
2386
|
|
|
|
|
|
|
# Fast path: ASCII-only bare key. One regex bite handles the entire |
|
2387
|
|
|
|
|
|
|
# token — no per-byte loop, no Unicode codepoint test. |
|
2388
|
102
|
|
|
|
|
358
|
pos($self->{src}) = $start; |
|
2389
|
102
|
50
|
|
|
|
500
|
if ($self->{src} =~ /\G[A-Za-z0-9_-]+/gc) { |
|
2390
|
102
|
|
|
|
|
242
|
$self->{pos} = pos($self->{src}); |
|
2391
|
|
|
|
|
|
|
} |
|
2392
|
|
|
|
|
|
|
# Slow path: extend across non-ASCII XID_Continue codepoints if any. |
|
2393
|
102
|
|
|
|
|
277
|
while ($self->{pos} < $self->{len}) { |
|
2394
|
102
|
|
|
|
|
277
|
my $c = substr($self->{src}, $self->{pos}, 1); |
|
2395
|
102
|
50
|
|
|
|
275
|
last if ord($c) < 128; |
|
2396
|
0
|
0
|
|
|
|
0
|
last unless _is_bare_key_char($c); |
|
2397
|
0
|
|
|
|
|
0
|
$self->{pos}++; |
|
2398
|
0
|
|
|
|
|
0
|
pos($self->{src}) = $self->{pos}; |
|
2399
|
0
|
0
|
|
|
|
0
|
if ($self->{src} =~ /\G[A-Za-z0-9_-]+/gc) { |
|
2400
|
0
|
|
|
|
|
0
|
$self->{pos} = pos($self->{src}); |
|
2401
|
|
|
|
|
|
|
} |
|
2402
|
|
|
|
|
|
|
} |
|
2403
|
102
|
50
|
|
|
|
242
|
$self->_die("expected key") if $self->{pos} == $start; |
|
2404
|
102
|
|
|
|
|
356
|
return substr($self->{src}, $start, $self->{pos} - $start); |
|
2405
|
|
|
|
|
|
|
} |
|
2406
|
|
|
|
|
|
|
|
|
2407
|
|
|
|
|
|
|
sub _capture_inner_block_comments { |
|
2408
|
9
|
|
|
9
|
|
13
|
my $self = shift; |
|
2409
|
|
|
|
|
|
|
# Hot-path early-out: if the next byte isn't '/', there's nothing to do. |
|
2410
|
|
|
|
|
|
|
# Per-key cost is one substr instead of two method calls. |
|
2411
|
9
|
|
|
|
|
17
|
my $p = $self->{pos}; |
|
2412
|
|
|
|
|
|
|
return if $p >= $self->{len} || substr($self->{src}, $p, 1) ne '/' |
|
2413
|
9
|
50
|
33
|
|
|
94
|
|| substr($self->{src}, $p, 2) ne '/*'; |
|
|
|
|
33
|
|
|
|
|
|
2414
|
0
|
|
|
|
|
0
|
while (1) { |
|
2415
|
0
|
|
|
|
|
0
|
$p = $self->{pos}; |
|
2416
|
0
|
0
|
0
|
|
|
0
|
last if $p >= $self->{len} || substr($self->{src}, $p, 2) ne '/*'; |
|
2417
|
0
|
|
|
|
|
0
|
my $raw = $self->_read_c_block_comment; |
|
2418
|
0
|
|
|
|
|
0
|
push @{$self->{comments}}, { |
|
2419
|
|
|
|
|
|
|
comment => { content => $raw, kind => 'block' }, |
|
2420
|
|
|
|
|
|
|
position => 'inner', |
|
2421
|
0
|
|
|
|
|
0
|
path => [@{$self->{path}}], |
|
2422
|
0
|
0
|
|
|
|
0
|
} unless $self->{lite}; |
|
2423
|
0
|
|
|
|
|
0
|
$self->_skip_inline_ws; |
|
2424
|
|
|
|
|
|
|
} |
|
2425
|
|
|
|
|
|
|
} |
|
2426
|
|
|
|
|
|
|
|
|
2427
|
|
|
|
|
|
|
sub parse_inline_value_or_heredoc { |
|
2428
|
113
|
|
|
113
|
0
|
194
|
my $self = shift; |
|
2429
|
|
|
|
|
|
|
# Inner /* ... */ comments are captured by the caller via |
|
2430
|
|
|
|
|
|
|
# _capture_inner_block_comments before this function runs. |
|
2431
|
113
|
|
|
|
|
203
|
my $p = $self->{pos}; |
|
2432
|
113
|
50
|
|
|
|
292
|
my $c = $p >= $self->{len} ? undef : substr($self->{src}, $p, 1); |
|
2433
|
|
|
|
|
|
|
# SPEC §Lexical "Reserved decorator sigils": a value position that |
|
2434
|
|
|
|
|
|
|
# leads with one of `! @ $ % ^ & * | ~ \` . , > < ? ; =` is rejected |
|
2435
|
|
|
|
|
|
|
# at tier 0. Covers `key: !tag`, `+ !tag`, scalar-root `!tag`, and |
|
2436
|
|
|
|
|
|
|
# the same forms inside flow containers (which dispatch per item). |
|
2437
|
113
|
50
|
33
|
|
|
471
|
if (defined($c) && exists $RESERVED_DECORATOR_SIGIL{$c}) { |
|
2438
|
0
|
|
|
|
|
0
|
$self->_die( |
|
2439
|
|
|
|
|
|
|
"'$c' is a reserved decorator sigil at line-start (tier 0)" |
|
2440
|
|
|
|
|
|
|
); |
|
2441
|
|
|
|
|
|
|
} |
|
2442
|
113
|
50
|
|
|
|
232
|
if (defined($c)) { |
|
2443
|
|
|
|
|
|
|
# Hot path: ASCII digit (the most common leaf). Dispatch |
|
2444
|
|
|
|
|
|
|
# straight to parse_number_or_datetime without hitting the |
|
2445
|
|
|
|
|
|
|
# other typed-leaf branches. |
|
2446
|
113
|
100
|
100
|
|
|
429
|
if ($c ge '0' && $c le '9') { |
|
2447
|
70
|
|
|
|
|
185
|
return $self->parse_number_or_datetime; |
|
2448
|
|
|
|
|
|
|
} |
|
2449
|
43
|
100
|
|
|
|
147
|
if ($c eq '"') { |
|
2450
|
26
|
100
|
|
|
|
81
|
return $self->parse_heredoc_basic if $self->_starts_with('"""'); |
|
2451
|
|
|
|
|
|
|
# Basic is the emitter's default for strings — no record. |
|
2452
|
15
|
|
|
|
|
57
|
return $self->parse_basic_string_value; |
|
2453
|
|
|
|
|
|
|
} |
|
2454
|
17
|
100
|
|
|
|
40
|
if ($c eq "'") { |
|
2455
|
9
|
100
|
|
|
|
22
|
return $self->parse_heredoc_literal if $self->_starts_with("'''"); |
|
2456
|
3
|
|
|
|
|
12
|
my $r = $self->parse_literal_string_value; |
|
2457
|
3
|
|
|
|
|
42
|
$self->_record_form({ string_form => { kind => 'literal' } }); |
|
2458
|
3
|
|
|
|
|
10
|
return $r; |
|
2459
|
|
|
|
|
|
|
} |
|
2460
|
8
|
100
|
|
|
|
23
|
return $self->parse_flow_array if $c eq '['; |
|
2461
|
7
|
100
|
|
|
|
33
|
return $self->parse_flow_table if $c eq '{'; |
|
2462
|
6
|
50
|
33
|
|
|
37
|
return $self->parse_bool_value if $c eq 't' || $c eq 'f'; |
|
2463
|
6
|
50
|
|
|
|
15
|
return $self->parse_inf_value if $c eq 'i'; |
|
2464
|
6
|
50
|
|
|
|
15
|
return $self->parse_nan_value if $c eq 'n'; |
|
2465
|
6
|
50
|
66
|
|
|
27
|
return $self->parse_number_or_datetime if $c eq '+' || $c eq '-'; |
|
2466
|
|
|
|
|
|
|
} |
|
2467
|
0
|
0
|
|
|
|
0
|
$self->_die("expected value") if !defined($c); |
|
2468
|
0
|
|
|
|
|
0
|
$self->_die("unexpected character '$c' in value"); |
|
2469
|
|
|
|
|
|
|
} |
|
2470
|
|
|
|
|
|
|
|
|
2471
|
|
|
|
|
|
|
sub parse_bool_value { |
|
2472
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
2473
|
0
|
0
|
0
|
|
|
0
|
if ($self->_starts_with("true") && _is_value_terminator($self->_peek_at(4))) { |
|
2474
|
0
|
|
|
|
|
0
|
$self->{pos} += 4; |
|
2475
|
0
|
|
|
|
|
0
|
return DMS::Parser::Bool->new(1); |
|
2476
|
|
|
|
|
|
|
} |
|
2477
|
0
|
0
|
0
|
|
|
0
|
if ($self->_starts_with("false") && _is_value_terminator($self->_peek_at(5))) { |
|
2478
|
0
|
|
|
|
|
0
|
$self->{pos} += 5; |
|
2479
|
0
|
|
|
|
|
0
|
return DMS::Parser::Bool->new(0); |
|
2480
|
|
|
|
|
|
|
} |
|
2481
|
0
|
|
|
|
|
0
|
$self->_die("expected value"); |
|
2482
|
|
|
|
|
|
|
} |
|
2483
|
|
|
|
|
|
|
|
|
2484
|
|
|
|
|
|
|
sub parse_inf_value { |
|
2485
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
2486
|
0
|
0
|
0
|
|
|
0
|
if ($self->_starts_with("inf") && _is_value_terminator($self->_peek_at(3))) { |
|
2487
|
0
|
|
|
|
|
0
|
$self->{pos} += 3; |
|
2488
|
0
|
|
|
|
|
0
|
return DMS::Parser::Float->new(9**9**9); |
|
2489
|
|
|
|
|
|
|
} |
|
2490
|
0
|
|
|
|
|
0
|
$self->_die("expected 'inf'"); |
|
2491
|
|
|
|
|
|
|
} |
|
2492
|
|
|
|
|
|
|
|
|
2493
|
|
|
|
|
|
|
sub parse_nan_value { |
|
2494
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
2495
|
0
|
0
|
0
|
|
|
0
|
if ($self->_starts_with("nan") && _is_value_terminator($self->_peek_at(3))) { |
|
2496
|
0
|
|
|
|
|
0
|
$self->{pos} += 3; |
|
2497
|
0
|
|
|
|
|
0
|
return DMS::Parser::Float->new(-(9**9**9) + (9**9**9)); |
|
2498
|
|
|
|
|
|
|
} |
|
2499
|
0
|
|
|
|
|
0
|
$self->_die("expected 'nan'"); |
|
2500
|
|
|
|
|
|
|
} |
|
2501
|
|
|
|
|
|
|
|
|
2502
|
|
|
|
|
|
|
sub parse_number_or_datetime { |
|
2503
|
76
|
|
|
76
|
0
|
116
|
my $self = shift; |
|
2504
|
76
|
|
|
|
|
155
|
my $base = $self->{pos}; |
|
2505
|
76
|
|
|
|
|
134
|
my $len_src = $self->{len}; |
|
2506
|
|
|
|
|
|
|
# Combined hot-path: a plain decimal integer that doesn't look like |
|
2507
|
|
|
|
|
|
|
# a date or time. One regex bite scans the token; a structural |
|
2508
|
|
|
|
|
|
|
# check rejects anything _parse_integer would die on. This single |
|
2509
|
|
|
|
|
|
|
# branch is taken by every leaf in flat-data benchmarks. |
|
2510
|
76
|
|
|
|
|
281
|
pos($self->{src}) = $base; |
|
2511
|
76
|
100
|
|
|
|
432
|
if ($self->{src} =~ /\G(0|[1-9][0-9]{0,17})(?![\d_.eExob:-])/g) { |
|
2512
|
52
|
|
|
|
|
155
|
my $tok = $1; |
|
2513
|
52
|
|
|
|
|
108
|
$self->{pos} = $base + length($tok); |
|
2514
|
52
|
|
|
|
|
138
|
my $iv = 0 + $tok; |
|
2515
|
52
|
|
|
|
|
210
|
return bless \$iv, 'DMS::Parser::Integer'; |
|
2516
|
|
|
|
|
|
|
} |
|
2517
|
|
|
|
|
|
|
# Inline first-char peek: avoid the function-call + substr in _peek. |
|
2518
|
24
|
|
|
|
|
56
|
my $first = substr($self->{src}, $base, 1); |
|
2519
|
24
|
|
100
|
|
|
81
|
my $starts_sign = ($first eq '+' || $first eq '-'); |
|
2520
|
24
|
100
|
|
|
|
61
|
if (!$starts_sign) { |
|
2521
|
|
|
|
|
|
|
# Combined date/time-prefix sniff, regex-only. Skips _looks_like_* |
|
2522
|
|
|
|
|
|
|
# which themselves do another substr+regex. |
|
2523
|
18
|
|
|
|
|
40
|
pos($self->{src}) = $base; |
|
2524
|
18
|
50
|
66
|
|
|
86
|
if ($len_src - $base >= 10 && $self->{src} =~ /\G\d\d\d\d-\d\d-\d\d/) { |
|
2525
|
0
|
|
|
|
|
0
|
return $self->parse_datetime_value; |
|
2526
|
|
|
|
|
|
|
} |
|
2527
|
18
|
50
|
66
|
|
|
74
|
if ($len_src - $base >= 8 && $self->{src} =~ /\G\d\d:\d\d:\d\d/) { |
|
2528
|
0
|
|
|
|
|
0
|
return $self->parse_local_time_value; |
|
2529
|
|
|
|
|
|
|
} |
|
2530
|
|
|
|
|
|
|
} |
|
2531
|
24
|
50
|
66
|
|
|
77
|
if ($starts_sign && substr($self->{src}, $base + 1, 3) eq 'inf') { |
|
2532
|
0
|
|
|
|
|
0
|
my $p4 = $base + 4; |
|
2533
|
0
|
0
|
|
|
|
0
|
my $next = $p4 >= $len_src ? undef : substr($self->{src}, $p4, 1); |
|
2534
|
0
|
0
|
|
|
|
0
|
if (_is_value_terminator($next)) { |
|
2535
|
0
|
|
|
|
|
0
|
my $neg = $first eq '-'; |
|
2536
|
0
|
|
|
|
|
0
|
$self->{pos} += 4; |
|
2537
|
0
|
0
|
|
|
|
0
|
return DMS::Parser::Float->new($neg ? -(9**9**9) : 9**9**9); |
|
2538
|
|
|
|
|
|
|
} |
|
2539
|
|
|
|
|
|
|
} |
|
2540
|
24
|
|
|
|
|
61
|
my ($len, $is_float) = $self->_scan_number_token; |
|
2541
|
24
|
|
|
|
|
55
|
my $s = substr($self->{src}, $base, $len); |
|
2542
|
24
|
50
|
|
|
|
88
|
if ($is_float) { |
|
2543
|
0
|
|
|
|
|
0
|
my $f; |
|
2544
|
0
|
|
|
|
|
0
|
eval { $f = _parse_float_val($s); }; |
|
|
0
|
|
|
|
|
0
|
|
|
2545
|
0
|
0
|
|
|
|
0
|
$self->_die("invalid float: $s ($@)") if $@; |
|
2546
|
0
|
|
|
|
|
0
|
$self->{pos} += $len; |
|
2547
|
0
|
|
|
|
|
0
|
return DMS::Parser::Float->new($f); |
|
2548
|
|
|
|
|
|
|
} |
|
2549
|
|
|
|
|
|
|
# Fast path: plain decimal (no sign, no leading-0, fits in 18 digits) |
|
2550
|
|
|
|
|
|
|
# never dies — skip the eval frame entirely. The validating regex |
|
2551
|
|
|
|
|
|
|
# rejects anything _parse_integer would error on. Bless inline to |
|
2552
|
|
|
|
|
|
|
# avoid a method call per leaf. |
|
2553
|
24
|
50
|
|
|
|
91
|
if ($s =~ /\A(?:0|[1-9][0-9]{0,17})\z/) { |
|
2554
|
0
|
|
|
|
|
0
|
$self->{pos} += $len; |
|
2555
|
0
|
|
|
|
|
0
|
my $iv = 0 + $s; |
|
2556
|
0
|
|
|
|
|
0
|
return bless \$iv, 'DMS::Parser::Integer'; |
|
2557
|
|
|
|
|
|
|
} |
|
2558
|
24
|
|
|
|
|
36
|
my $n; |
|
2559
|
24
|
|
|
|
|
46
|
eval { $n = _parse_integer($s); }; |
|
|
24
|
|
|
|
|
77
|
|
|
2560
|
24
|
50
|
|
|
|
79
|
if ($@) { my $msg = $@; $msg =~ s/\n.*//s; $self->_die($msg); } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
2561
|
24
|
|
|
|
|
51
|
$self->{pos} += $len; |
|
2562
|
|
|
|
|
|
|
# Record original lexeme when it differs from the canonical |
|
2563
|
|
|
|
|
|
|
# "decimal, no underscores, no '+' sign" form the default emitter |
|
2564
|
|
|
|
|
|
|
# would produce. Hex/oct/bin, underscores, explicit '+' → recorded. |
|
2565
|
|
|
|
|
|
|
# Skipped entirely in lite mode (record_forms gate inside _record_form). |
|
2566
|
24
|
100
|
66
|
|
|
96
|
if (!$self->{lite} && $s ne $n) { |
|
2567
|
21
|
|
|
|
|
110
|
$self->_record_form({ integer_lit => $s }); |
|
2568
|
|
|
|
|
|
|
} |
|
2569
|
24
|
|
|
|
|
87
|
return DMS::Parser::Integer->new($n); |
|
2570
|
|
|
|
|
|
|
} |
|
2571
|
|
|
|
|
|
|
|
|
2572
|
|
|
|
|
|
|
sub _scan_number_token { |
|
2573
|
24
|
|
|
24
|
|
38
|
my $self = shift; |
|
2574
|
24
|
|
|
|
|
45
|
my $base = $self->{pos}; |
|
2575
|
24
|
|
|
|
|
59
|
pos($self->{src}) = $base; |
|
2576
|
|
|
|
|
|
|
# Fast path: plain decimal integer (no sign, no '.' / 'e' / 'x' / |
|
2577
|
|
|
|
|
|
|
# 'o' / 'b' prefix continuation). Most leaves in flat data are like |
|
2578
|
|
|
|
|
|
|
# this — skip the heavy alternation and avoid the post-scan token |
|
2579
|
|
|
|
|
|
|
# re-classification. The negative lookahead must reject anything |
|
2580
|
|
|
|
|
|
|
# that would extend the token under the slow grammar (digits, '_', |
|
2581
|
|
|
|
|
|
|
# '.', 'e'/'E', and the 0x/0o/0b prefix indicators). |
|
2582
|
24
|
50
|
|
|
|
91
|
if ($self->{src} =~ /\G(\d+)(?![\d_.eExob])/g) { |
|
2583
|
0
|
|
|
|
|
0
|
return (length($1), 0); |
|
2584
|
|
|
|
|
|
|
} |
|
2585
|
|
|
|
|
|
|
# One regex covers both prefixed (0x/0o/0b) and decimal forms. Behavior |
|
2586
|
|
|
|
|
|
|
# mirrors the per-char loop: the prefix branch accepts hex digits + |
|
2587
|
|
|
|
|
|
|
# underscores with at most one '.' and one 'p[+-]?' exponent; the |
|
2588
|
|
|
|
|
|
|
# decimal branch accepts digits + underscores with at most one '.' and |
|
2589
|
|
|
|
|
|
|
# one 'e[+-]?' exponent. |
|
2590
|
24
|
|
|
|
|
53
|
pos($self->{src}) = $base; |
|
2591
|
24
|
|
|
|
|
104
|
$self->{src} =~ m{ |
|
2592
|
|
|
|
|
|
|
\G [+-]? |
|
2593
|
|
|
|
|
|
|
(?: |
|
2594
|
|
|
|
|
|
|
0[xob] [0-9a-fA-F_]* (?: \. [0-9a-fA-F_]* )? (?: p [+-]? \d* )? |
|
2595
|
|
|
|
|
|
|
| [\d_]* (?: \. [\d_]* )? (?: [eE] [+-]? \d* )? |
|
2596
|
|
|
|
|
|
|
) |
|
2597
|
|
|
|
|
|
|
}gxc; |
|
2598
|
24
|
|
|
|
|
54
|
my $end = pos($self->{src}); |
|
2599
|
24
|
50
|
|
|
|
58
|
$end = $base unless defined $end; |
|
2600
|
24
|
|
|
|
|
47
|
my $len = $end - $base; |
|
2601
|
24
|
50
|
|
|
|
62
|
return (0, 0) if $len == 0; |
|
2602
|
24
|
|
|
|
|
56
|
my $tok = substr($self->{src}, $base, $len); |
|
2603
|
|
|
|
|
|
|
# In prefixed (0x/0o/0b) tokens 'e' is a hex digit, not an exponent — |
|
2604
|
|
|
|
|
|
|
# only '.' or 'p' make the token a float there. |
|
2605
|
24
|
100
|
|
|
|
95
|
if ($tok =~ /^[+-]?0[xob]/) { |
|
2606
|
13
|
50
|
|
|
|
58
|
return ($len, $tok =~ /[.p]/ ? 1 : 0); |
|
2607
|
|
|
|
|
|
|
} |
|
2608
|
11
|
50
|
|
|
|
49
|
return ($len, $tok =~ /[.eE]/ ? 1 : 0); |
|
2609
|
|
|
|
|
|
|
} |
|
2610
|
|
|
|
|
|
|
|
|
2611
|
|
|
|
|
|
|
sub _valid_underscores { |
|
2612
|
0
|
|
|
0
|
|
0
|
my ($s) = @_; |
|
2613
|
0
|
0
|
|
|
|
0
|
return 1 if length($s) == 0; |
|
2614
|
0
|
0
|
0
|
|
|
0
|
return 0 if substr($s,0,1) eq '_' || substr($s,-1,1) eq '_'; |
|
2615
|
0
|
|
|
|
|
0
|
my $prev = 0; |
|
2616
|
0
|
|
|
|
|
0
|
for my $c (split //, $s) { |
|
2617
|
0
|
0
|
|
|
|
0
|
if ($c eq '_') { return 0 if $prev; $prev = 1; } |
|
|
0
|
0
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
2618
|
0
|
|
|
|
|
0
|
else { $prev = 0; } |
|
2619
|
|
|
|
|
|
|
} |
|
2620
|
0
|
|
|
|
|
0
|
return 1; |
|
2621
|
|
|
|
|
|
|
} |
|
2622
|
|
|
|
|
|
|
|
|
2623
|
|
|
|
|
|
|
# Returns the canonical decimal string for an DMS integer literal. The hot |
|
2624
|
|
|
|
|
|
|
# path (decimal, no underscores, fits in i64) avoids Math::BigInt entirely |
|
2625
|
|
|
|
|
|
|
# because BigInt construction/arithmetic dominates the parser otherwise. |
|
2626
|
|
|
|
|
|
|
# Errors mirror the old Math::BigInt-based implementation byte-for-byte. |
|
2627
|
|
|
|
|
|
|
sub _parse_integer { |
|
2628
|
24
|
|
|
24
|
|
46
|
my ($s) = @_; |
|
2629
|
|
|
|
|
|
|
# Hot path: a plain decimal token (no sign, no leading zero, no |
|
2630
|
|
|
|
|
|
|
# underscore, fits in 18 chars i.e. always within i64). Covers the |
|
2631
|
|
|
|
|
|
|
# vast majority of real-world keys/leaves and skips ~10 substr + |
|
2632
|
|
|
|
|
|
|
# length checks below. |
|
2633
|
24
|
50
|
|
|
|
74
|
if ($s =~ /\A(?:0|[1-9][0-9]{0,17})\z/) { |
|
2634
|
0
|
|
|
|
|
0
|
return $s; |
|
2635
|
|
|
|
|
|
|
} |
|
2636
|
24
|
|
|
|
|
46
|
my $sign_str = ''; |
|
2637
|
24
|
|
|
|
|
37
|
my $is_neg = 0; |
|
2638
|
24
|
|
|
|
|
49
|
my $rest = $s; |
|
2639
|
24
|
|
|
|
|
51
|
my $first = substr($rest, 0, 1); |
|
2640
|
24
|
100
|
|
|
|
71
|
if ($first eq '-') { $sign_str = '-'; $is_neg = 1; $rest = substr($rest, 1); } |
|
|
3
|
100
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
7
|
|
|
2641
|
3
|
|
|
|
|
6
|
elsif ($first eq '+') { $rest = substr($rest, 1); } |
|
2642
|
24
|
50
|
|
|
|
61
|
die "hex prefix must be lowercase '0x'\n" if substr($rest, 0, 2) eq '0X'; |
|
2643
|
24
|
|
|
|
|
37
|
my $radix = 10; |
|
2644
|
24
|
|
|
|
|
38
|
my $body = $rest; |
|
2645
|
24
|
100
|
|
|
|
86
|
if (length($rest) >= 2) { |
|
2646
|
21
|
|
|
|
|
41
|
my $p2 = substr($rest, 0, 2); |
|
2647
|
21
|
100
|
|
|
|
64
|
if ($p2 eq '0x') { $radix = 16; $body = substr($rest, 2); } |
|
|
7
|
100
|
|
|
|
38
|
|
|
|
7
|
100
|
|
|
|
18
|
|
|
2648
|
3
|
|
|
|
|
6
|
elsif ($p2 eq '0o') { $radix = 8; $body = substr($rest, 2); } |
|
|
3
|
|
|
|
|
7
|
|
|
2649
|
3
|
|
|
|
|
7
|
elsif ($p2 eq '0b') { $radix = 2; $body = substr($rest, 2); } |
|
|
3
|
|
|
|
|
7
|
|
|
2650
|
|
|
|
|
|
|
} |
|
2651
|
24
|
50
|
|
|
|
68
|
die "empty number\n" if length($body) == 0; |
|
2652
|
24
|
50
|
33
|
|
|
111
|
die "underscore must be between digits\n" |
|
2653
|
|
|
|
|
|
|
if substr($body, 0, 1) eq '_' || substr($body, -1, 1) eq '_'; |
|
2654
|
24
|
50
|
100
|
|
|
108
|
if ($radix == 10 && length($rest) > 1 && substr($rest, 0, 1) eq '0') { |
|
|
|
|
66
|
|
|
|
|
|
2655
|
0
|
|
|
|
|
0
|
die "leading zeros are not allowed on decimal integers\n"; |
|
2656
|
|
|
|
|
|
|
} |
|
2657
|
24
|
50
|
|
|
|
61
|
die "underscore must be between digits\n" if index($body, '__') >= 0; |
|
2658
|
24
|
|
|
|
|
43
|
my $clean = $body; |
|
2659
|
24
|
100
|
|
|
|
67
|
$clean =~ tr/_//d if index($clean, '_') >= 0; |
|
2660
|
24
|
100
|
|
|
|
82
|
if ($radix == 10) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
2661
|
11
|
50
|
|
|
|
30
|
die "invalid digit for base 10\n" if $clean =~ /\D/; |
|
2662
|
|
|
|
|
|
|
} elsif ($radix == 16) { |
|
2663
|
7
|
50
|
|
|
|
29
|
die "invalid digit for base 16\n" if $clean =~ /[^0-9a-fA-F]/; |
|
2664
|
|
|
|
|
|
|
} elsif ($radix == 8) { |
|
2665
|
3
|
50
|
|
|
|
12
|
die "invalid digit for base 8\n" if $clean =~ /[^0-7]/; |
|
2666
|
|
|
|
|
|
|
} else { |
|
2667
|
3
|
50
|
|
|
|
11
|
die "invalid digit for base 2\n" if $clean =~ /[^01]/; |
|
2668
|
|
|
|
|
|
|
} |
|
2669
|
|
|
|
|
|
|
|
|
2670
|
24
|
100
|
|
|
|
57
|
if ($radix == 10) { |
|
2671
|
|
|
|
|
|
|
# Strip leading zeros (keep at least one digit). |
|
2672
|
11
|
|
|
|
|
25
|
$clean =~ s/^0+(?=\d)//; |
|
2673
|
11
|
100
|
|
|
|
26
|
my $bound = $is_neg ? '9223372036854775808' : '9223372036854775807'; |
|
2674
|
11
|
50
|
33
|
|
|
49
|
die "integer out of i64 range\n" |
|
|
|
|
33
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
if length($clean) > length($bound) |
|
2676
|
|
|
|
|
|
|
|| (length($clean) == length($bound) && $clean gt $bound); |
|
2677
|
11
|
50
|
|
|
|
26
|
return '0' if $clean eq '0'; |
|
2678
|
11
|
|
|
|
|
42
|
return "${sign_str}${clean}"; |
|
2679
|
|
|
|
|
|
|
} |
|
2680
|
|
|
|
|
|
|
|
|
2681
|
|
|
|
|
|
|
# Non-decimal: cap by max digit count for i64 magnitude. |
|
2682
|
13
|
50
|
66
|
|
|
92
|
if ($radix == 16 && length($clean) > 16) { die "integer out of i64 range\n"; } |
|
|
0
|
50
|
66
|
|
|
0
|
|
|
|
|
50
|
66
|
|
|
|
|
|
2683
|
0
|
|
|
|
|
0
|
elsif ($radix == 8 && length($clean) > 22) { die "integer out of i64 range\n"; } |
|
2684
|
0
|
|
|
|
|
0
|
elsif ($radix == 2 && length($clean) > 64) { die "integer out of i64 range\n"; } |
|
2685
|
|
|
|
|
|
|
|
|
2686
|
13
|
|
|
|
|
27
|
my $val_lc = lc $clean; |
|
2687
|
|
|
|
|
|
|
# Detect magnitudes in the high half (>= 2^63) that don't fit a signed |
|
2688
|
|
|
|
|
|
|
# 64-bit positive value. Allowed only as the exact i64 minimum: -2^63. |
|
2689
|
13
|
|
|
|
|
23
|
my $high_half = 0; |
|
2690
|
13
|
100
|
33
|
|
|
35
|
if ($radix == 16) { $high_half = length($val_lc) == 16 && substr($val_lc, 0, 1) ge '8'; } |
|
|
7
|
100
|
|
|
|
22
|
|
|
2691
|
3
|
|
33
|
|
|
9
|
elsif ($radix == 8) { $high_half = length($val_lc) == 22 && substr($val_lc, 0, 1) gt '0'; } |
|
2692
|
3
|
|
33
|
|
|
49
|
else { $high_half = length($val_lc) == 64 && substr($val_lc, 0, 1) eq '1'; } |
|
2693
|
13
|
50
|
|
|
|
31
|
if ($high_half) { |
|
2694
|
0
|
0
|
|
|
|
0
|
if ($is_neg) { |
|
2695
|
0
|
0
|
0
|
|
|
0
|
return '-9223372036854775808' |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2696
|
|
|
|
|
|
|
if ($radix == 16 && $val_lc eq '8000000000000000') |
|
2697
|
|
|
|
|
|
|
|| ($radix == 8 && $val_lc eq '1000000000000000000000') |
|
2698
|
|
|
|
|
|
|
|| ($radix == 2 && $val_lc eq '1' . ('0' x 63)); |
|
2699
|
|
|
|
|
|
|
} |
|
2700
|
0
|
|
|
|
|
0
|
die "integer out of i64 range\n"; |
|
2701
|
|
|
|
|
|
|
} |
|
2702
|
|
|
|
|
|
|
|
|
2703
|
13
|
|
|
|
|
22
|
my $native; |
|
2704
|
13
|
100
|
|
|
|
40
|
if ($radix == 16) { $native = hex($val_lc); } |
|
|
7
|
100
|
|
|
|
15
|
|
|
2705
|
3
|
|
|
|
|
10
|
elsif ($radix == 8) { $native = oct("0$val_lc"); } |
|
2706
|
3
|
|
|
|
|
7
|
else { $native = oct("0b$val_lc"); } |
|
2707
|
13
|
50
|
|
|
|
32
|
return '0' if $native == 0; |
|
2708
|
13
|
|
|
|
|
59
|
return "${sign_str}$native"; |
|
2709
|
|
|
|
|
|
|
} |
|
2710
|
|
|
|
|
|
|
|
|
2711
|
|
|
|
|
|
|
sub _parse_float_val { |
|
2712
|
0
|
|
|
0
|
|
0
|
my ($s) = @_; |
|
2713
|
0
|
|
|
|
|
0
|
my $sign = 1.0; |
|
2714
|
0
|
|
|
|
|
0
|
my $rest = $s; |
|
2715
|
0
|
0
|
|
|
|
0
|
if (substr($rest,0,1) eq '-') { $sign = -1.0; $rest = substr($rest, 1); } |
|
|
0
|
0
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
2716
|
0
|
|
|
|
|
0
|
elsif (substr($rest,0,1) eq '+') { $rest = substr($rest, 1); } |
|
2717
|
0
|
0
|
0
|
|
|
0
|
if (substr($rest,0,2) eq '0x' || substr($rest,0,2) eq '0o' || substr($rest,0,2) eq '0b') { |
|
|
|
|
0
|
|
|
|
|
|
2718
|
0
|
|
|
|
|
0
|
return $sign * _parse_nondec_float($rest); |
|
2719
|
|
|
|
|
|
|
} |
|
2720
|
0
|
|
|
|
|
0
|
return $sign * _parse_dec_float($rest); |
|
2721
|
|
|
|
|
|
|
} |
|
2722
|
|
|
|
|
|
|
|
|
2723
|
|
|
|
|
|
|
sub _parse_dec_float { |
|
2724
|
0
|
|
|
0
|
|
0
|
my ($s) = @_; |
|
2725
|
0
|
|
|
|
|
0
|
my $e_idx = -1; |
|
2726
|
0
|
|
|
|
|
0
|
for (my $i = 0; $i < length($s); $i++) { |
|
2727
|
0
|
|
|
|
|
0
|
my $c = substr($s, $i, 1); |
|
2728
|
0
|
0
|
0
|
|
|
0
|
if ($c eq 'e' || $c eq 'E') { $e_idx = $i; last; } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
2729
|
|
|
|
|
|
|
} |
|
2730
|
0
|
0
|
|
|
|
0
|
my $m = $e_idx == -1 ? $s : substr($s, 0, $e_idx); |
|
2731
|
0
|
0
|
|
|
|
0
|
my $e = $e_idx == -1 ? undef : substr($s, $e_idx+1); |
|
2732
|
0
|
0
|
|
|
|
0
|
die "decimal float requires '.'\n" if index($m, '.') < 0; |
|
2733
|
0
|
|
|
|
|
0
|
my @parts = split /\./, $m, 2; |
|
2734
|
0
|
0
|
0
|
|
|
0
|
die "decimal float requires digit on both sides of '.'\n" |
|
|
|
|
0
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
if @parts != 2 || $parts[0] eq '' || $parts[1] eq ''; |
|
2736
|
0
|
0
|
|
|
|
0
|
die "invalid character in mantissa\n" if $parts[0] =~ /[^\d_]/; |
|
2737
|
0
|
0
|
|
|
|
0
|
die "invalid character in mantissa\n" if $parts[1] =~ /[^\d_]/; |
|
2738
|
0
|
0
|
0
|
|
|
0
|
die "bad underscore in mantissa\n" |
|
2739
|
|
|
|
|
|
|
unless _valid_underscores($parts[0]) && _valid_underscores($parts[1]); |
|
2740
|
0
|
|
|
|
|
0
|
my $full = $parts[0]; $full =~ s/_//g; $full .= '.'; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
2741
|
0
|
|
|
|
|
0
|
my $frac = $parts[1]; $frac =~ s/_//g; $full .= $frac; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
2742
|
0
|
0
|
|
|
|
0
|
if (defined $e) { |
|
2743
|
0
|
|
|
|
|
0
|
my $e_clean = $e; $e_clean =~ s/^[+-]//; |
|
|
0
|
|
|
|
|
0
|
|
|
2744
|
0
|
0
|
|
|
|
0
|
die "underscore not allowed in exponent\n" if $e_clean =~ /_/; |
|
2745
|
0
|
0
|
|
|
|
0
|
die "invalid character in exponent\n" if $e =~ /[^\d+-]/; |
|
2746
|
0
|
0
|
|
|
|
0
|
die "empty exponent\n" if $e_clean eq ''; |
|
2747
|
0
|
|
|
|
|
0
|
$full .= 'e' . $e; |
|
2748
|
|
|
|
|
|
|
} |
|
2749
|
0
|
|
|
|
|
0
|
return 0 + $full; |
|
2750
|
|
|
|
|
|
|
} |
|
2751
|
|
|
|
|
|
|
|
|
2752
|
|
|
|
|
|
|
sub _parse_nondec_float { |
|
2753
|
0
|
|
|
0
|
|
0
|
my ($s) = @_; |
|
2754
|
0
|
|
|
|
|
0
|
my ($radix, $rest); |
|
2755
|
0
|
0
|
|
|
|
0
|
if (substr($s,0,2) eq '0x') { $radix = 16; $rest = substr($s,2); } |
|
|
0
|
0
|
|
|
|
0
|
|
|
|
0
|
0
|
|
|
|
0
|
|
|
2756
|
0
|
|
|
|
|
0
|
elsif (substr($s,0,2) eq '0o') { $radix = 8; $rest = substr($s,2); } |
|
|
0
|
|
|
|
|
0
|
|
|
2757
|
0
|
|
|
|
|
0
|
elsif (substr($s,0,2) eq '0b') { $radix = 2; $rest = substr($s,2); } |
|
|
0
|
|
|
|
|
0
|
|
|
2758
|
0
|
|
|
|
|
0
|
else { die "non-decimal float prefix required\n"; } |
|
2759
|
0
|
|
|
|
|
0
|
my $p_idx = index($rest, 'p'); |
|
2760
|
0
|
0
|
|
|
|
0
|
die "non-decimal float requires 'p' exponent\n" if $p_idx < 0; |
|
2761
|
0
|
|
|
|
|
0
|
my $mant = substr($rest, 0, $p_idx); |
|
2762
|
0
|
|
|
|
|
0
|
my $exp_str = substr($rest, $p_idx+1); |
|
2763
|
0
|
0
|
|
|
|
0
|
die "empty exponent\n" if $exp_str eq ''; |
|
2764
|
0
|
0
|
|
|
|
0
|
die "underscore not allowed in exponent\n" if $exp_str =~ /_/; |
|
2765
|
0
|
0
|
|
|
|
0
|
die "invalid exponent character\n" if $exp_str =~ /[^\d+-]/; |
|
2766
|
0
|
|
|
|
|
0
|
my $exp = int($exp_str); |
|
2767
|
0
|
|
|
|
|
0
|
my ($ip, $fp); |
|
2768
|
0
|
0
|
|
|
|
0
|
if (index($mant, '.') >= 0) { |
|
2769
|
0
|
|
|
|
|
0
|
($ip, $fp) = split /\./, $mant, 2; |
|
2770
|
0
|
0
|
0
|
|
|
0
|
die "digit required on both sides of '.'\n" if $ip eq '' || $fp eq ''; |
|
2771
|
|
|
|
|
|
|
} else { |
|
2772
|
0
|
|
|
|
|
0
|
$ip = $mant; $fp = ''; |
|
|
0
|
|
|
|
|
0
|
|
|
2773
|
|
|
|
|
|
|
} |
|
2774
|
0
|
0
|
0
|
|
|
0
|
die "bad underscore\n" unless _valid_underscores($ip) && _valid_underscores($fp); |
|
2775
|
0
|
|
|
|
|
0
|
$ip =~ s/_//g; $fp =~ s/_//g; |
|
|
0
|
|
|
|
|
0
|
|
|
2776
|
0
|
|
|
|
|
0
|
my $digit_chars = substr("0123456789abcdef", 0, $radix); |
|
2777
|
0
|
|
|
|
|
0
|
for my $c (split //, $ip) { |
|
2778
|
0
|
0
|
|
|
|
0
|
die "invalid digit for base $radix\n" if index($digit_chars, lc $c) < 0; |
|
2779
|
|
|
|
|
|
|
} |
|
2780
|
0
|
|
|
|
|
0
|
for my $c (split //, $fp) { |
|
2781
|
0
|
0
|
|
|
|
0
|
die "invalid digit for base $radix\n" if index($digit_chars, lc $c) < 0; |
|
2782
|
|
|
|
|
|
|
} |
|
2783
|
0
|
0
|
|
|
|
0
|
my $int_val = $ip eq '' ? 0 : hex_to_int($ip, $radix); |
|
2784
|
0
|
|
|
|
|
0
|
my $frac_val = 0.0; |
|
2785
|
0
|
|
|
|
|
0
|
my $div = $radix * 1.0; |
|
2786
|
0
|
|
|
|
|
0
|
for my $c (split //, $fp) { |
|
2787
|
0
|
|
|
|
|
0
|
my $d = index("0123456789abcdef", lc $c); |
|
2788
|
0
|
|
|
|
|
0
|
$frac_val += $d / $div; |
|
2789
|
0
|
|
|
|
|
0
|
$div *= $radix; |
|
2790
|
|
|
|
|
|
|
} |
|
2791
|
0
|
|
|
|
|
0
|
return ($int_val + $frac_val) * (2 ** $exp); |
|
2792
|
|
|
|
|
|
|
} |
|
2793
|
|
|
|
|
|
|
|
|
2794
|
|
|
|
|
|
|
sub hex_to_int { |
|
2795
|
0
|
|
|
0
|
0
|
0
|
my ($s, $radix) = @_; |
|
2796
|
0
|
|
|
|
|
0
|
my $v = 0; |
|
2797
|
0
|
|
|
|
|
0
|
for my $c (split //, $s) { |
|
2798
|
0
|
|
|
|
|
0
|
$v = $v * $radix + index("0123456789abcdef", lc $c); |
|
2799
|
|
|
|
|
|
|
} |
|
2800
|
0
|
|
|
|
|
0
|
return $v; |
|
2801
|
|
|
|
|
|
|
} |
|
2802
|
|
|
|
|
|
|
|
|
2803
|
|
|
|
|
|
|
sub _days_in_month { |
|
2804
|
0
|
|
|
0
|
|
0
|
my ($y, $m) = @_; |
|
2805
|
0
|
0
|
0
|
|
|
0
|
return 31 if $m == 1 || $m == 3 || $m == 5 || $m == 7 || $m == 8 || $m == 10 || $m == 12; |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2806
|
0
|
0
|
0
|
|
|
0
|
return 30 if $m == 4 || $m == 6 || $m == 9 || $m == 11; |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2807
|
0
|
0
|
|
|
|
0
|
if ($m == 2) { |
|
2808
|
0
|
0
|
0
|
|
|
0
|
return ((($y % 4 == 0) && ($y % 100 != 0)) || ($y % 400 == 0)) ? 29 : 28; |
|
2809
|
|
|
|
|
|
|
} |
|
2810
|
0
|
|
|
|
|
0
|
return 0; |
|
2811
|
|
|
|
|
|
|
} |
|
2812
|
|
|
|
|
|
|
|
|
2813
|
|
|
|
|
|
|
sub _validate_date { |
|
2814
|
0
|
|
|
0
|
|
0
|
my ($s) = @_; |
|
2815
|
0
|
0
|
0
|
|
|
0
|
die "invalid date format\n" |
|
|
|
|
0
|
|
|
|
|
|
2816
|
|
|
|
|
|
|
if length($s) != 10 || substr($s,4,1) ne '-' || substr($s,7,1) ne '-'; |
|
2817
|
0
|
|
|
|
|
0
|
for my $i (0,1,2,3,5,6,8,9) { |
|
2818
|
0
|
0
|
|
|
|
0
|
die "date must be all digits\n" if substr($s,$i,1) !~ /\d/; |
|
2819
|
|
|
|
|
|
|
} |
|
2820
|
0
|
|
|
|
|
0
|
my $y = int(substr($s,0,4)); |
|
2821
|
0
|
|
|
|
|
0
|
my $mo = int(substr($s,5,2)); |
|
2822
|
0
|
|
|
|
|
0
|
my $d = int(substr($s,8,2)); |
|
2823
|
0
|
0
|
0
|
|
|
0
|
die "month out of range\n" if $mo < 1 || $mo > 12; |
|
2824
|
0
|
0
|
0
|
|
|
0
|
die "day out of range\n" if $d < 1 || $d > _days_in_month($y, $mo); |
|
2825
|
|
|
|
|
|
|
} |
|
2826
|
|
|
|
|
|
|
|
|
2827
|
|
|
|
|
|
|
sub _validate_time { |
|
2828
|
0
|
|
|
0
|
|
0
|
my ($s) = @_; |
|
2829
|
0
|
0
|
0
|
|
|
0
|
die "invalid time format\n" |
|
|
|
|
0
|
|
|
|
|
|
2830
|
|
|
|
|
|
|
if length($s) != 8 || substr($s,2,1) ne ':' || substr($s,5,1) ne ':'; |
|
2831
|
0
|
|
|
|
|
0
|
for my $i (0,1,3,4,6,7) { |
|
2832
|
0
|
0
|
|
|
|
0
|
die "time must be all digits\n" if substr($s,$i,1) !~ /\d/; |
|
2833
|
|
|
|
|
|
|
} |
|
2834
|
0
|
|
|
|
|
0
|
my $h = int(substr($s,0,2)); |
|
2835
|
0
|
|
|
|
|
0
|
my $m = int(substr($s,3,2)); |
|
2836
|
0
|
|
|
|
|
0
|
my $sec = int(substr($s,6,2)); |
|
2837
|
0
|
0
|
|
|
|
0
|
die "hour out of range\n" if $h > 23; |
|
2838
|
0
|
0
|
|
|
|
0
|
die "minute out of range\n" if $m > 59; |
|
2839
|
0
|
0
|
|
|
|
0
|
die "second out of range (leap seconds not supported)\n" if $sec > 59; |
|
2840
|
|
|
|
|
|
|
} |
|
2841
|
|
|
|
|
|
|
|
|
2842
|
|
|
|
|
|
|
sub parse_datetime_value { |
|
2843
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
2844
|
0
|
|
|
|
|
0
|
my $rest = substr($self->{src}, $self->{pos}); |
|
2845
|
0
|
|
|
|
|
0
|
my $date = substr($rest, 0, 10); |
|
2846
|
0
|
|
|
|
|
0
|
eval { _validate_date($date); }; |
|
|
0
|
|
|
|
|
0
|
|
|
2847
|
0
|
0
|
|
|
|
0
|
if ($@) { my $msg = $@; chomp $msg; $self->_die($msg); } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
2848
|
0
|
|
|
|
|
0
|
my $rest2 = substr($rest, 10); |
|
2849
|
0
|
0
|
0
|
|
|
0
|
if (substr($rest2,0,1) ne 'T' && substr($rest2,0,1) ne ' ') { |
|
2850
|
0
|
0
|
|
|
|
0
|
if (substr($rest2,0,1) eq 't') { |
|
2851
|
0
|
|
|
|
|
0
|
$self->_die("date and time separator must be uppercase 'T' (lowercase 't' not permitted)"); |
|
2852
|
|
|
|
|
|
|
} |
|
2853
|
0
|
|
|
|
|
0
|
my $after = substr($rest2,0,1); |
|
2854
|
0
|
0
|
|
|
|
0
|
$after = undef if $after eq ''; |
|
2855
|
0
|
0
|
|
|
|
0
|
$self->_die("invalid character after date") unless _is_value_terminator($after); |
|
2856
|
0
|
|
|
|
|
0
|
$self->{pos} += 10; |
|
2857
|
0
|
|
|
|
|
0
|
return DMS::Parser::LocalDate->new($date); |
|
2858
|
|
|
|
|
|
|
} |
|
2859
|
0
|
0
|
|
|
|
0
|
if (substr($rest2,0,1) eq ' ') { |
|
2860
|
0
|
|
|
|
|
0
|
(my $after_ws = $rest2) =~ s/\A[ \t]+//; |
|
2861
|
0
|
0
|
0
|
|
|
0
|
if (length($after_ws) > 0 && substr($after_ws, 0, 1) =~ /\d/) { |
|
2862
|
0
|
|
|
|
|
0
|
$self->_die("date and time must be separated by 'T' (space not permitted)"); |
|
2863
|
|
|
|
|
|
|
} |
|
2864
|
0
|
|
|
|
|
0
|
$self->{pos} += 10; |
|
2865
|
0
|
|
|
|
|
0
|
return DMS::Parser::LocalDate->new($date); |
|
2866
|
|
|
|
|
|
|
} |
|
2867
|
0
|
|
|
|
|
0
|
my $after_t = substr($rest2, 1); |
|
2868
|
0
|
0
|
|
|
|
0
|
$self->_die("expected HH:MM:SS after 'T'") unless _looks_like_time_prefix($after_t); |
|
2869
|
0
|
|
|
|
|
0
|
my $time_str = substr($after_t, 0, 8); |
|
2870
|
0
|
|
|
|
|
0
|
eval { _validate_time($time_str); }; |
|
|
0
|
|
|
|
|
0
|
|
|
2871
|
0
|
0
|
|
|
|
0
|
if ($@) { my $msg = $@; chomp $msg; $self->_die($msg); } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
2872
|
0
|
|
|
|
|
0
|
my $consumed = 10 + 1 + 8; |
|
2873
|
0
|
|
|
|
|
0
|
my $after_time = substr($rest, $consumed); |
|
2874
|
0
|
|
|
|
|
0
|
my $frac_len = 0; |
|
2875
|
0
|
0
|
|
|
|
0
|
if (substr($after_time,0,1) eq '.') { |
|
2876
|
0
|
|
|
|
|
0
|
my $k = 1; |
|
2877
|
0
|
|
0
|
|
|
0
|
while ($k < length($after_time) && substr($after_time,$k,1) =~ /\d/) { $k++; } |
|
|
0
|
|
|
|
|
0
|
|
|
2878
|
0
|
|
|
|
|
0
|
my $digits = $k - 1; |
|
2879
|
0
|
0
|
|
|
|
0
|
$self->_die("expected fractional digits after '.'") if $digits == 0; |
|
2880
|
0
|
0
|
|
|
|
0
|
$self->_die("fractional seconds limited to 9 digits (nanosecond precision)") if $digits > 9; |
|
2881
|
0
|
|
|
|
|
0
|
$frac_len = $k; |
|
2882
|
|
|
|
|
|
|
} |
|
2883
|
0
|
|
|
|
|
0
|
$consumed += $frac_len; |
|
2884
|
0
|
|
|
|
|
0
|
my $after_frac = substr($rest, $consumed); |
|
2885
|
0
|
0
|
0
|
|
|
0
|
if (substr($after_frac,0,1) eq 'Z' || substr($after_frac,0,1) eq 'z') { |
|
2886
|
0
|
|
|
|
|
0
|
$consumed += 1; |
|
2887
|
0
|
|
|
|
|
0
|
my $s = substr($rest, 0, $consumed); |
|
2888
|
0
|
|
|
|
|
0
|
$self->{pos} += $consumed; |
|
2889
|
0
|
|
|
|
|
0
|
return DMS::Parser::OffsetDateTime->new($s); |
|
2890
|
|
|
|
|
|
|
} |
|
2891
|
0
|
0
|
0
|
|
|
0
|
if (substr($after_frac,0,1) eq '+' || substr($after_frac,0,1) eq '-') { |
|
2892
|
0
|
0
|
0
|
|
|
0
|
if (length($after_frac) < 6 |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2893
|
|
|
|
|
|
|
|| substr($after_frac,1,1) !~ /\d/ || substr($after_frac,2,1) !~ /\d/ |
|
2894
|
|
|
|
|
|
|
|| substr($after_frac,3,1) ne ':' |
|
2895
|
|
|
|
|
|
|
|| substr($after_frac,4,1) !~ /\d/ || substr($after_frac,5,1) !~ /\d/) { |
|
2896
|
0
|
|
|
|
|
0
|
$self->_die("invalid offset; expected ±HH:MM"); |
|
2897
|
|
|
|
|
|
|
} |
|
2898
|
0
|
|
|
|
|
0
|
my $oh = int(substr($after_frac,1,2)); |
|
2899
|
0
|
|
|
|
|
0
|
my $om = int(substr($after_frac,4,2)); |
|
2900
|
0
|
0
|
0
|
|
|
0
|
$self->_die("offset out of range") if $oh > 23 || $om > 59; |
|
2901
|
0
|
|
|
|
|
0
|
$consumed += 6; |
|
2902
|
0
|
|
|
|
|
0
|
my $s = substr($rest, 0, $consumed); |
|
2903
|
0
|
|
|
|
|
0
|
$self->{pos} += $consumed; |
|
2904
|
0
|
|
|
|
|
0
|
return DMS::Parser::OffsetDateTime->new($s); |
|
2905
|
|
|
|
|
|
|
} |
|
2906
|
0
|
|
|
|
|
0
|
my $after = substr($after_frac,0,1); |
|
2907
|
0
|
0
|
|
|
|
0
|
$after = undef if $after eq ''; |
|
2908
|
0
|
0
|
|
|
|
0
|
$self->_die("invalid character after datetime") unless _is_value_terminator($after); |
|
2909
|
0
|
|
|
|
|
0
|
my $s = substr($rest, 0, $consumed); |
|
2910
|
0
|
|
|
|
|
0
|
$self->{pos} += $consumed; |
|
2911
|
0
|
|
|
|
|
0
|
return DMS::Parser::LocalDateTime->new($s); |
|
2912
|
|
|
|
|
|
|
} |
|
2913
|
|
|
|
|
|
|
|
|
2914
|
|
|
|
|
|
|
sub parse_local_time_value { |
|
2915
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
2916
|
0
|
|
|
|
|
0
|
my $rest = substr($self->{src}, $self->{pos}); |
|
2917
|
0
|
|
|
|
|
0
|
my $time_str = substr($rest, 0, 8); |
|
2918
|
0
|
|
|
|
|
0
|
eval { _validate_time($time_str); }; |
|
|
0
|
|
|
|
|
0
|
|
|
2919
|
0
|
0
|
|
|
|
0
|
if ($@) { my $msg = $@; chomp $msg; $self->_die($msg); } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
2920
|
0
|
|
|
|
|
0
|
my $consumed = 8; |
|
2921
|
0
|
|
|
|
|
0
|
my $after = substr($rest, $consumed); |
|
2922
|
0
|
0
|
|
|
|
0
|
if (substr($after,0,1) eq '.') { |
|
2923
|
0
|
|
|
|
|
0
|
my $k = 1; |
|
2924
|
0
|
|
0
|
|
|
0
|
while ($k < length($after) && substr($after,$k,1) =~ /\d/) { $k++; } |
|
|
0
|
|
|
|
|
0
|
|
|
2925
|
0
|
|
|
|
|
0
|
my $digits = $k - 1; |
|
2926
|
0
|
0
|
|
|
|
0
|
$self->_die("expected fractional digits after '.'") if $digits == 0; |
|
2927
|
0
|
0
|
|
|
|
0
|
$self->_die("fractional seconds limited to 9 digits") if $digits > 9; |
|
2928
|
0
|
|
|
|
|
0
|
$consumed += $k; |
|
2929
|
|
|
|
|
|
|
} |
|
2930
|
0
|
|
|
|
|
0
|
my $after2 = substr($rest, $consumed); |
|
2931
|
0
|
|
|
|
|
0
|
my $nxt = substr($after2,0,1); |
|
2932
|
0
|
0
|
|
|
|
0
|
$nxt = undef if $nxt eq ''; |
|
2933
|
0
|
0
|
|
|
|
0
|
$self->_die("invalid character after time") unless _is_value_terminator($nxt); |
|
2934
|
0
|
|
|
|
|
0
|
my $s = substr($rest, 0, $consumed); |
|
2935
|
0
|
|
|
|
|
0
|
$self->{pos} += $consumed; |
|
2936
|
0
|
|
|
|
|
0
|
return DMS::Parser::LocalTime->new($s); |
|
2937
|
|
|
|
|
|
|
} |
|
2938
|
|
|
|
|
|
|
|
|
2939
|
|
|
|
|
|
|
sub parse_basic_string_value { |
|
2940
|
15
|
|
|
15
|
0
|
29
|
my $self = shift; |
|
2941
|
15
|
|
|
|
|
51
|
my ($sl, $sls, $sp) = ($self->{line}, $self->{line_start}, $self->{pos}); |
|
2942
|
15
|
|
|
|
|
29
|
$self->{pos}++; |
|
2943
|
15
|
|
|
|
|
28
|
my $out = ''; |
|
2944
|
15
|
|
|
|
|
32
|
my $src = $self->{src}; |
|
2945
|
15
|
|
|
|
|
29
|
my $len = $self->{len}; |
|
2946
|
15
|
|
|
|
|
51
|
pos($src) = $self->{pos}; |
|
2947
|
15
|
|
|
|
|
59
|
while (1) { |
|
2948
|
|
|
|
|
|
|
# Bulk-scan a run of safe characters (anything but ", \, or line-end) |
|
2949
|
|
|
|
|
|
|
# in one regex op. The previous per-char _peek loop did O(N) Perl-VM |
|
2950
|
|
|
|
|
|
|
# round-trips; this reduces typical string parsing to a single regex |
|
2951
|
|
|
|
|
|
|
# plus per-escape handling. parse_basic_string_value was the |
|
2952
|
|
|
|
|
|
|
# heaviest leaf at 19% of decode time on bench_realistic. |
|
2953
|
18
|
50
|
|
|
|
78
|
if ($src =~ /\G([^"\\\n\r]*)/gc) { |
|
2954
|
18
|
100
|
|
|
|
92
|
$out .= $1 if length($1); |
|
2955
|
|
|
|
|
|
|
} |
|
2956
|
18
|
|
|
|
|
31
|
my $p = pos($src); |
|
2957
|
18
|
50
|
|
|
|
56
|
if ($p >= $len) { |
|
2958
|
0
|
|
|
|
|
0
|
$self->{pos} = $p; |
|
2959
|
0
|
|
|
|
|
0
|
die $self->_err_at($sl, $sls, $sp, "unterminated string"); |
|
2960
|
|
|
|
|
|
|
} |
|
2961
|
18
|
|
|
|
|
43
|
my $c = substr($src, $p, 1); |
|
2962
|
18
|
100
|
|
|
|
48
|
if ($c eq '"') { |
|
2963
|
15
|
|
|
|
|
31
|
$self->{pos} = $p + 1; |
|
2964
|
|
|
|
|
|
|
# SPEC §Unicode normalization: re-NFC after escape decoding. |
|
2965
|
15
|
50
|
|
|
|
88
|
return $out !~ /[^\x00-\x7F]/ ? $out : _NFC($out); |
|
2966
|
|
|
|
|
|
|
} |
|
2967
|
3
|
50
|
33
|
|
|
16
|
if ($c eq "\n" || $c eq "\r") { |
|
2968
|
0
|
|
|
|
|
0
|
$self->{pos} = $p; |
|
2969
|
0
|
|
|
|
|
0
|
$self->_die("strings cannot span lines"); |
|
2970
|
|
|
|
|
|
|
} |
|
2971
|
|
|
|
|
|
|
# $c is '\\' — handle the escape, then resume the bulk scan. |
|
2972
|
3
|
|
|
|
|
6
|
$p++; # past the backslash |
|
2973
|
3
|
50
|
|
|
|
8
|
if ($p >= $len) { |
|
2974
|
0
|
|
|
|
|
0
|
$self->{pos} = $p; |
|
2975
|
0
|
|
|
|
|
0
|
$self->_die("unterminated escape"); |
|
2976
|
|
|
|
|
|
|
} |
|
2977
|
3
|
|
|
|
|
7
|
my $esc = substr($src, $p, 1); |
|
2978
|
3
|
|
|
|
|
4
|
$p++; |
|
2979
|
3
|
50
|
0
|
|
|
13
|
if ($esc eq '"') { $out .= '"'; } |
|
|
0
|
50
|
|
|
|
0
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2980
|
0
|
|
|
|
|
0
|
elsif ($esc eq '\\') { $out .= '\\'; } |
|
2981
|
3
|
|
|
|
|
7
|
elsif ($esc eq 'n') { $out .= "\n"; } |
|
2982
|
0
|
|
|
|
|
0
|
elsif ($esc eq 't') { $out .= "\t"; } |
|
2983
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'r') { $out .= "\r"; } |
|
2984
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'b') { $out .= "\b"; } |
|
2985
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'f') { $out .= "\f"; } |
|
2986
|
|
|
|
|
|
|
elsif ($esc eq 'u' || $esc eq 'U') { |
|
2987
|
|
|
|
|
|
|
# _read_hex_codepoint reads from $self->{pos}; sync it first. |
|
2988
|
0
|
|
|
|
|
0
|
$self->{pos} = $p; |
|
2989
|
0
|
0
|
|
|
|
0
|
$out .= $self->_read_hex_codepoint($esc eq 'u' ? 4 : 8); |
|
2990
|
0
|
|
|
|
|
0
|
$p = $self->{pos}; |
|
2991
|
|
|
|
|
|
|
} |
|
2992
|
|
|
|
|
|
|
else { |
|
2993
|
0
|
|
|
|
|
0
|
$self->{pos} = $p; |
|
2994
|
0
|
|
|
|
|
0
|
$self->_die("invalid escape '\\$esc'"); |
|
2995
|
|
|
|
|
|
|
} |
|
2996
|
3
|
|
|
|
|
10
|
pos($src) = $p; |
|
2997
|
|
|
|
|
|
|
} |
|
2998
|
|
|
|
|
|
|
} |
|
2999
|
|
|
|
|
|
|
|
|
3000
|
|
|
|
|
|
|
sub parse_literal_string_value { |
|
3001
|
3
|
|
|
3
|
0
|
5
|
my $self = shift; |
|
3002
|
3
|
|
|
|
|
10
|
my ($sl, $sls, $sp) = ($self->{line}, $self->{line_start}, $self->{pos}); |
|
3003
|
3
|
|
|
|
|
5
|
$self->{pos}++; |
|
3004
|
3
|
|
|
|
|
22
|
my $out = ''; |
|
3005
|
3
|
|
|
|
|
4
|
while (1) { |
|
3006
|
24
|
50
|
|
|
|
51
|
if ($self->_eof) { |
|
3007
|
0
|
|
|
|
|
0
|
die $self->_err_at($sl, $sls, $sp, "unterminated string"); |
|
3008
|
|
|
|
|
|
|
} |
|
3009
|
24
|
|
|
|
|
53
|
my $c = $self->_peek; |
|
3010
|
24
|
50
|
33
|
|
|
147
|
$self->_die("strings cannot span lines") if $c eq "\n" || $c eq "\r"; |
|
3011
|
24
|
100
|
|
|
|
52
|
if ($c eq "'") { $self->{pos}++; return $out; } |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
10
|
|
|
3012
|
21
|
|
|
|
|
39
|
$self->{pos}++; |
|
3013
|
21
|
|
|
|
|
36
|
$out .= $c; |
|
3014
|
|
|
|
|
|
|
} |
|
3015
|
|
|
|
|
|
|
} |
|
3016
|
|
|
|
|
|
|
|
|
3017
|
|
|
|
|
|
|
sub _read_hex_codepoint { |
|
3018
|
0
|
|
|
0
|
|
0
|
my ($self, $n) = @_; |
|
3019
|
0
|
|
|
|
|
0
|
my $rest = substr($self->{src}, $self->{pos}); |
|
3020
|
0
|
0
|
|
|
|
0
|
$self->_die("expected $n hex digits in unicode escape") if length($rest) < $n; |
|
3021
|
0
|
|
|
|
|
0
|
my $hex = substr($rest, 0, $n); |
|
3022
|
0
|
0
|
|
|
|
0
|
$self->_die("invalid hex in unicode escape: $hex") if $hex !~ /^[0-9a-fA-F]+$/; |
|
3023
|
0
|
|
|
|
|
0
|
my $v = hex($hex); |
|
3024
|
|
|
|
|
|
|
# SPEC: U+0000 is forbidden anywhere in DMS source, including via |
|
3025
|
|
|
|
|
|
|
# escape decoding. ` ` / `\U00000000` must not slip through. |
|
3026
|
0
|
0
|
|
|
|
0
|
if ($v == 0) { |
|
3027
|
0
|
|
|
|
|
0
|
$self->_die("\\u0000 escape forbidden"); |
|
3028
|
|
|
|
|
|
|
} |
|
3029
|
0
|
0
|
0
|
|
|
0
|
if ($v >= 0xD800 && $v <= 0xDFFF) { |
|
3030
|
0
|
|
|
|
|
0
|
$self->_die(sprintf("surrogate codepoint U+%04X in escape", $v)); |
|
3031
|
|
|
|
|
|
|
} |
|
3032
|
0
|
0
|
|
|
|
0
|
if ($v > 0x10FFFF) { |
|
3033
|
0
|
|
|
|
|
0
|
$self->_die("unicode escape is not a scalar value"); |
|
3034
|
|
|
|
|
|
|
} |
|
3035
|
0
|
|
|
|
|
0
|
$self->{pos} += $n; |
|
3036
|
0
|
|
|
|
|
0
|
return chr($v); |
|
3037
|
|
|
|
|
|
|
} |
|
3038
|
|
|
|
|
|
|
|
|
3039
|
|
|
|
|
|
|
# Heredocs |
|
3040
|
|
|
|
|
|
|
|
|
3041
|
|
|
|
|
|
|
sub parse_heredoc_basic { |
|
3042
|
11
|
|
|
11
|
0
|
19
|
my $self = shift; |
|
3043
|
11
|
|
|
|
|
22
|
$self->{pos} += 3; |
|
3044
|
11
|
|
|
|
|
32
|
my $label = $self->_parse_heredoc_label; |
|
3045
|
11
|
|
|
|
|
33
|
my $mods = $self->_parse_heredoc_modifiers; |
|
3046
|
11
|
|
|
|
|
33
|
$self->_skip_inline_ws; |
|
3047
|
11
|
50
|
33
|
|
|
39
|
if (!($self->_consume_eol || $self->_eof)) { |
|
3048
|
0
|
|
|
|
|
0
|
$self->_die("heredoc opener must be followed by end of line"); |
|
3049
|
|
|
|
|
|
|
} |
|
3050
|
11
|
100
|
|
|
|
32
|
my $terminator = length($label) ? $label : '"""'; |
|
3051
|
11
|
|
|
|
|
34
|
my $body = $self->_collect_heredoc_body($terminator); |
|
3052
|
|
|
|
|
|
|
# SPEC §basic-string escapes: surrogate codepoints (U+D800..U+DFFF) |
|
3053
|
|
|
|
|
|
|
# and U+0000 are not valid in `\uXXXX` / `\UXXXXXXXX` escapes. |
|
3054
|
|
|
|
|
|
|
# Basic-heredoc bodies are kept raw, so we validate the rules by |
|
3055
|
|
|
|
|
|
|
# scanning the body for offending escape sequences. |
|
3056
|
11
|
|
|
|
|
32
|
_validate_heredoc_basic_escapes($body); |
|
3057
|
11
|
|
|
|
|
25
|
my $stripped = _strip_indent_and_continuations($body, 1); |
|
3058
|
11
|
|
|
|
|
18
|
my $out; |
|
3059
|
11
|
|
|
|
|
23
|
eval { $out = _apply_modifiers($stripped, $mods); }; |
|
|
11
|
|
|
|
|
28
|
|
|
3060
|
11
|
50
|
|
|
|
31
|
if ($@) { my $msg = $@; chomp $msg; $self->_die($msg); } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
3061
|
|
|
|
|
|
|
$self->_record_form({ |
|
3062
|
|
|
|
|
|
|
string_form => { |
|
3063
|
|
|
|
|
|
|
kind => 'heredoc', |
|
3064
|
|
|
|
|
|
|
flavor => 'basic_triple', |
|
3065
|
|
|
|
|
|
|
label => (length($label) ? $label : undef), |
|
3066
|
11
|
100
|
|
|
|
121
|
modifiers => [ map { { name => $_->{name}, args => $_->{args} } } @$mods ], |
|
|
3
|
|
|
|
|
37
|
|
|
3067
|
|
|
|
|
|
|
}, |
|
3068
|
|
|
|
|
|
|
}); |
|
3069
|
|
|
|
|
|
|
# SPEC §Unicode normalization: re-NFC after escape decoding. |
|
3070
|
11
|
50
|
|
|
|
89
|
return $out !~ /[^\x00-\x7F]/ ? $out : _NFC($out); |
|
3071
|
|
|
|
|
|
|
} |
|
3072
|
|
|
|
|
|
|
|
|
3073
|
|
|
|
|
|
|
# SPEC §basic-string escapes: a `\uXXXX` / `\UXXXXXXXX` escape whose |
|
3074
|
|
|
|
|
|
|
# decoded value falls in the surrogate range U+D800..U+DFFF is not a |
|
3075
|
|
|
|
|
|
|
# Unicode scalar and is a parse error. Likewise U+0000 is forbidden. |
|
3076
|
|
|
|
|
|
|
# Basic-heredoc body lines are collected raw, so we validate the same |
|
3077
|
|
|
|
|
|
|
# rules by scanning the body for offending escape sequences. |
|
3078
|
|
|
|
|
|
|
sub _validate_heredoc_basic_escapes { |
|
3079
|
11
|
|
|
11
|
|
23
|
my ($body) = @_; |
|
3080
|
11
|
|
|
|
|
16
|
for my $ln (@{$body->{lines}}) { |
|
|
11
|
|
|
|
|
31
|
|
|
3081
|
11
|
|
|
|
|
21
|
my $text = $ln->{text}; |
|
3082
|
11
|
|
|
|
|
21
|
my $len = length($text); |
|
3083
|
11
|
|
|
|
|
18
|
my $i = 0; |
|
3084
|
11
|
|
|
|
|
28
|
while ($i < $len) { |
|
3085
|
83
|
50
|
|
|
|
184
|
if (substr($text, $i, 1) eq '\\') { |
|
3086
|
|
|
|
|
|
|
# find run of consecutive backslashes |
|
3087
|
0
|
|
|
|
|
0
|
my $j = $i; |
|
3088
|
0
|
|
0
|
|
|
0
|
while ($j < $len && substr($text, $j, 1) eq '\\') { $j++; } |
|
|
0
|
|
|
|
|
0
|
|
|
3089
|
0
|
|
|
|
|
0
|
my $run = $j - $i; |
|
3090
|
0
|
0
|
0
|
|
|
0
|
if ($run % 2 == 1 && $j < $len) { |
|
3091
|
0
|
|
|
|
|
0
|
my $intro = substr($text, $j, 1); |
|
3092
|
0
|
0
|
|
|
|
0
|
my $n = ($intro eq 'u') ? 4 : ($intro eq 'U') ? 8 : 0; |
|
|
|
0
|
|
|
|
|
|
|
3093
|
0
|
0
|
0
|
|
|
0
|
if ($n > 0 && $j + 1 + $n <= $len) { |
|
3094
|
0
|
|
|
|
|
0
|
my $hex = substr($text, $j + 1, $n); |
|
3095
|
0
|
0
|
|
|
|
0
|
if ($hex =~ /^[0-9a-fA-F]+$/) { |
|
3096
|
0
|
|
|
|
|
0
|
my $cp = hex($hex); |
|
3097
|
0
|
|
|
|
|
0
|
my $esc_off = $j - 1; |
|
3098
|
0
|
0
|
|
|
|
0
|
if ($cp == 0) { |
|
3099
|
|
|
|
|
|
|
die sprintf("%d:%d: \\u0000 escape forbidden\n", |
|
3100
|
0
|
|
|
|
|
0
|
$ln->{line}, $esc_off + 1); |
|
3101
|
|
|
|
|
|
|
} |
|
3102
|
0
|
0
|
0
|
|
|
0
|
if ($cp >= 0xD800 && $cp <= 0xDFFF) { |
|
3103
|
|
|
|
|
|
|
die sprintf("%d:%d: surrogate codepoint U+%04X in escape\n", |
|
3104
|
0
|
|
|
|
|
0
|
$ln->{line}, $esc_off + 1, $cp); |
|
3105
|
|
|
|
|
|
|
} |
|
3106
|
|
|
|
|
|
|
} |
|
3107
|
|
|
|
|
|
|
} |
|
3108
|
|
|
|
|
|
|
} |
|
3109
|
0
|
|
|
|
|
0
|
$i = $j; |
|
3110
|
|
|
|
|
|
|
} else { |
|
3111
|
83
|
|
|
|
|
207
|
$i++; |
|
3112
|
|
|
|
|
|
|
} |
|
3113
|
|
|
|
|
|
|
} |
|
3114
|
|
|
|
|
|
|
} |
|
3115
|
|
|
|
|
|
|
} |
|
3116
|
|
|
|
|
|
|
|
|
3117
|
|
|
|
|
|
|
sub parse_heredoc_literal { |
|
3118
|
6
|
|
|
6
|
0
|
11
|
my $self = shift; |
|
3119
|
6
|
|
|
|
|
12
|
$self->{pos} += 3; |
|
3120
|
6
|
|
|
|
|
53
|
my $label = $self->_parse_heredoc_label; |
|
3121
|
6
|
|
|
|
|
17
|
my $mods = $self->_parse_heredoc_modifiers; |
|
3122
|
6
|
|
|
|
|
20
|
$self->_skip_inline_ws; |
|
3123
|
6
|
50
|
33
|
|
|
15
|
if (!($self->_consume_eol || $self->_eof)) { |
|
3124
|
0
|
|
|
|
|
0
|
$self->_die("heredoc opener must be followed by end of line"); |
|
3125
|
|
|
|
|
|
|
} |
|
3126
|
6
|
100
|
|
|
|
16
|
my $terminator = length($label) ? $label : "'''"; |
|
3127
|
6
|
|
|
|
|
15
|
my $body = $self->_collect_heredoc_body($terminator); |
|
3128
|
6
|
|
|
|
|
16
|
my $stripped = _strip_indent_and_continuations($body, 0); |
|
3129
|
6
|
|
|
|
|
11
|
my $out; |
|
3130
|
6
|
|
|
|
|
13
|
eval { $out = _apply_modifiers($stripped, $mods); }; |
|
|
6
|
|
|
|
|
14
|
|
|
3131
|
6
|
50
|
|
|
|
17
|
if ($@) { my $msg = $@; chomp $msg; $self->_die($msg); } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
3132
|
|
|
|
|
|
|
$self->_record_form({ |
|
3133
|
|
|
|
|
|
|
string_form => { |
|
3134
|
|
|
|
|
|
|
kind => 'heredoc', |
|
3135
|
|
|
|
|
|
|
flavor => 'literal_triple', |
|
3136
|
|
|
|
|
|
|
label => (length($label) ? $label : undef), |
|
3137
|
6
|
100
|
|
|
|
87
|
modifiers => [ map { { name => $_->{name}, args => $_->{args} } } @$mods ], |
|
|
0
|
|
|
|
|
0
|
|
|
3138
|
|
|
|
|
|
|
}, |
|
3139
|
|
|
|
|
|
|
}); |
|
3140
|
6
|
|
|
|
|
32
|
return $out; |
|
3141
|
|
|
|
|
|
|
} |
|
3142
|
|
|
|
|
|
|
|
|
3143
|
|
|
|
|
|
|
sub _parse_heredoc_label { |
|
3144
|
17
|
|
|
17
|
|
57
|
my $self = shift; |
|
3145
|
17
|
|
|
|
|
42
|
my $c = $self->_peek; |
|
3146
|
17
|
100
|
|
|
|
41
|
return '' if !_is_label_start($c); |
|
3147
|
11
|
|
|
|
|
26
|
my $start = $self->{pos}; |
|
3148
|
11
|
|
|
|
|
17
|
while (1) { |
|
3149
|
44
|
|
|
|
|
110
|
my $c2 = $self->_peek; |
|
3150
|
44
|
100
|
|
|
|
82
|
last if !_is_label_cont($c2); |
|
3151
|
33
|
|
|
|
|
60
|
$self->{pos}++; |
|
3152
|
|
|
|
|
|
|
} |
|
3153
|
11
|
|
|
|
|
34
|
return substr($self->{src}, $start, $self->{pos} - $start); |
|
3154
|
|
|
|
|
|
|
} |
|
3155
|
|
|
|
|
|
|
|
|
3156
|
|
|
|
|
|
|
sub _parse_heredoc_modifiers { |
|
3157
|
17
|
|
|
17
|
|
29
|
my $self = shift; |
|
3158
|
17
|
|
|
|
|
31
|
my @mods; |
|
3159
|
17
|
|
|
|
|
35
|
while (1) { |
|
3160
|
20
|
|
|
|
|
38
|
my $ws_start = $self->{pos}; |
|
3161
|
20
|
|
|
|
|
55
|
$self->_skip_inline_ws; |
|
3162
|
20
|
|
|
|
|
40
|
my $had_ws = $self->{pos} > $ws_start; |
|
3163
|
20
|
|
|
|
|
41
|
my $c = $self->_peek; |
|
3164
|
20
|
100
|
66
|
|
|
62
|
if (defined($c) && _is_label_start($c)) { |
|
3165
|
3
|
50
|
|
|
|
10
|
$self->_die("modifier must be preceded by whitespace") unless $had_ws; |
|
3166
|
3
|
|
|
|
|
10
|
push @mods, $self->_parse_one_modifier; |
|
3167
|
|
|
|
|
|
|
} else { |
|
3168
|
17
|
|
|
|
|
31
|
$self->{pos} = $ws_start; |
|
3169
|
17
|
|
|
|
|
46
|
return \@mods; |
|
3170
|
|
|
|
|
|
|
} |
|
3171
|
|
|
|
|
|
|
} |
|
3172
|
|
|
|
|
|
|
} |
|
3173
|
|
|
|
|
|
|
|
|
3174
|
|
|
|
|
|
|
sub _parse_one_modifier { |
|
3175
|
3
|
|
|
3
|
|
5
|
my $self = shift; |
|
3176
|
3
|
|
|
|
|
7
|
my $ns = $self->{pos}; |
|
3177
|
3
|
|
|
|
|
5
|
while (1) { |
|
3178
|
18
|
|
|
|
|
35
|
my $c = $self->_peek; |
|
3179
|
18
|
100
|
|
|
|
33
|
last if !_is_label_cont($c); |
|
3180
|
15
|
|
|
|
|
27
|
$self->{pos}++; |
|
3181
|
|
|
|
|
|
|
} |
|
3182
|
3
|
|
|
|
|
9
|
my $name = substr($self->{src}, $ns, $self->{pos} - $ns); |
|
3183
|
3
|
50
|
|
|
|
8
|
$self->_die("modifiers require parentheses") if $self->_peek ne '('; |
|
3184
|
3
|
|
|
|
|
7
|
$self->{pos}++; |
|
3185
|
|
|
|
|
|
|
# Suppress original-form recording for modifier args: they're |
|
3186
|
|
|
|
|
|
|
# parse-time values used as call arguments and must not pollute the |
|
3187
|
|
|
|
|
|
|
# host heredoc node's original_forms slot. |
|
3188
|
3
|
|
|
|
|
7
|
my $saved = $self->{record_forms}; |
|
3189
|
3
|
|
|
|
|
5
|
$self->{record_forms} = 0; |
|
3190
|
3
|
|
|
|
|
5
|
my @args; |
|
3191
|
3
|
|
|
|
|
6
|
my $ok = eval { |
|
3192
|
3
|
|
|
|
|
5
|
while (1) { |
|
3193
|
6
|
|
|
|
|
16
|
$self->_skip_inline_ws; |
|
3194
|
6
|
50
|
|
|
|
13
|
if ($self->_peek eq ')') { $self->{pos}++; last; } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
3195
|
6
|
|
|
|
|
35
|
push @args, $self->parse_inline_value_or_heredoc; |
|
3196
|
6
|
|
|
|
|
18
|
$self->_skip_inline_ws; |
|
3197
|
6
|
|
|
|
|
14
|
my $c = $self->_peek; |
|
3198
|
6
|
100
|
|
|
|
18
|
if ($c eq ',') { $self->{pos}++; } |
|
|
3
|
50
|
|
|
|
7
|
|
|
3199
|
3
|
|
|
|
|
5
|
elsif ($c eq ')') { $self->{pos}++; last; } |
|
|
3
|
|
|
|
|
7
|
|
|
3200
|
0
|
|
|
|
|
0
|
else { $self->_die("expected ',' or ')' in modifier args"); } |
|
3201
|
|
|
|
|
|
|
} |
|
3202
|
3
|
|
|
|
|
7
|
1; |
|
3203
|
|
|
|
|
|
|
}; |
|
3204
|
3
|
|
|
|
|
5
|
my $err = $@; |
|
3205
|
3
|
|
|
|
|
7
|
$self->{record_forms} = $saved; |
|
3206
|
3
|
50
|
|
|
|
8
|
die $err if !$ok; |
|
3207
|
3
|
|
|
|
|
45
|
return { name => $name, args => \@args }; |
|
3208
|
|
|
|
|
|
|
} |
|
3209
|
|
|
|
|
|
|
|
|
3210
|
|
|
|
|
|
|
sub _collect_heredoc_body { |
|
3211
|
17
|
|
|
17
|
|
37
|
my ($self, $terminator) = @_; |
|
3212
|
17
|
|
|
|
|
30
|
my @lines; |
|
3213
|
17
|
|
|
|
|
49
|
my ($sl, $sls, $sp) = ($self->{line}, $self->{line_start}, $self->{pos}); |
|
3214
|
17
|
|
|
|
|
28
|
while (1) { |
|
3215
|
34
|
50
|
|
|
|
73
|
if ($self->_eof) { |
|
3216
|
0
|
|
|
|
|
0
|
die $self->_err_at($sl, $sls, $sp, "unterminated heredoc"); |
|
3217
|
|
|
|
|
|
|
} |
|
3218
|
34
|
|
|
|
|
65
|
my $lb = $self->{pos}; |
|
3219
|
34
|
|
|
|
|
51
|
while (1) { |
|
3220
|
235
|
|
|
|
|
439
|
my $c = $self->_peek; |
|
3221
|
235
|
100
|
66
|
|
|
1083
|
last if !defined($c) || $c eq "\n" || $c eq "\r"; |
|
|
|
|
66
|
|
|
|
|
|
3222
|
201
|
|
|
|
|
327
|
$self->{pos}++; |
|
3223
|
|
|
|
|
|
|
} |
|
3224
|
34
|
|
|
|
|
77
|
my $raw = substr($self->{src}, $lb, $self->{pos} - $lb); |
|
3225
|
34
|
|
|
|
|
80
|
my ($this_line, $this_lstart) = ($self->{line}, $self->{line_start}); |
|
3226
|
34
|
|
|
|
|
68
|
my $trimmed = $raw; $trimmed =~ s/^\s+|\s+$//g; |
|
|
34
|
|
|
|
|
239
|
|
|
3227
|
34
|
100
|
|
|
|
111
|
if ($trimmed eq $terminator) { |
|
3228
|
17
|
|
|
|
|
58
|
my $strip_depth = 0; |
|
3229
|
17
|
|
|
|
|
61
|
for my $c (split //, $raw) { |
|
3230
|
51
|
100
|
|
|
|
110
|
if ($c eq ' ') { $strip_depth++; } else { last; } |
|
|
34
|
|
|
|
|
77
|
|
|
|
17
|
|
|
|
|
33
|
|
|
3231
|
|
|
|
|
|
|
} |
|
3232
|
17
|
|
|
|
|
84
|
return { lines => \@lines, strip_depth => $strip_depth }; |
|
3233
|
|
|
|
|
|
|
} |
|
3234
|
17
|
|
|
|
|
46
|
$self->_consume_eol; |
|
3235
|
17
|
|
|
|
|
100
|
push @lines, { text => $raw, line => $this_line, line_start => $this_lstart }; |
|
3236
|
|
|
|
|
|
|
} |
|
3237
|
|
|
|
|
|
|
} |
|
3238
|
|
|
|
|
|
|
|
|
3239
|
|
|
|
|
|
|
sub _strip_indent_and_continuations { |
|
3240
|
17
|
|
|
17
|
|
37
|
my ($body, $allow_cont) = @_; |
|
3241
|
17
|
|
|
|
|
26
|
my @out; |
|
3242
|
17
|
|
|
|
|
30
|
my $first = 1; |
|
3243
|
17
|
|
|
|
|
24
|
my $pending = 0; |
|
3244
|
17
|
|
|
|
|
36
|
my @last = (1, 0); |
|
3245
|
17
|
|
|
|
|
30
|
for my $ln (@{$body->{lines}}) { |
|
|
17
|
|
|
|
|
39
|
|
|
3246
|
17
|
|
|
|
|
61
|
@last = ($ln->{line}, $ln->{line_start}); |
|
3247
|
17
|
|
|
|
|
75
|
my $is_blank = ($ln->{text} =~ /\A[ \t]*\z/); |
|
3248
|
17
|
|
|
|
|
32
|
my $stripped; |
|
3249
|
17
|
50
|
|
|
|
35
|
if ($is_blank) { |
|
3250
|
0
|
|
|
|
|
0
|
$stripped = ''; |
|
3251
|
|
|
|
|
|
|
} else { |
|
3252
|
17
|
|
|
|
|
26
|
my $leading = 0; |
|
3253
|
17
|
|
|
|
|
60
|
for my $c (split //, $ln->{text}) { |
|
3254
|
51
|
100
|
|
|
|
97
|
if ($c eq ' ') { $leading++; } else { last; } |
|
|
34
|
|
|
|
|
64
|
|
|
|
17
|
|
|
|
|
26
|
|
|
3255
|
|
|
|
|
|
|
} |
|
3256
|
17
|
50
|
|
|
|
55
|
if ($leading < $body->{strip_depth}) { |
|
3257
|
|
|
|
|
|
|
die sprintf("%d:%d: heredoc body line indented %d spaces, less than strip depth %d\n", |
|
3258
|
0
|
|
|
|
|
0
|
$ln->{line}, $leading + 1, $leading, $body->{strip_depth}); |
|
3259
|
|
|
|
|
|
|
} |
|
3260
|
17
|
|
|
|
|
46
|
$stripped = substr($ln->{text}, $body->{strip_depth}); |
|
3261
|
|
|
|
|
|
|
} |
|
3262
|
17
|
|
|
|
|
27
|
my $piece = $stripped; |
|
3263
|
17
|
|
|
|
|
28
|
my $splice = 0; |
|
3264
|
17
|
100
|
|
|
|
41
|
if ($allow_cont) { |
|
3265
|
11
|
|
|
|
|
19
|
my $trimmed_end = $piece; $trimmed_end =~ s/[ \t]+$//; |
|
|
11
|
|
|
|
|
35
|
|
|
3266
|
11
|
|
|
|
|
22
|
my $idx = rindex($trimmed_end, '\\'); |
|
3267
|
11
|
50
|
33
|
|
|
31
|
if ($idx != -1 && $idx == length($trimmed_end) - 1) { |
|
3268
|
0
|
|
|
|
|
0
|
my $preceding = 0; |
|
3269
|
0
|
|
0
|
|
|
0
|
for (my $k = $idx - 1; $k >= 0 && substr($trimmed_end,$k,1) eq '\\'; $k--) { |
|
3270
|
0
|
|
|
|
|
0
|
$preceding++; |
|
3271
|
|
|
|
|
|
|
} |
|
3272
|
0
|
0
|
|
|
|
0
|
if ($preceding % 2 == 0) { |
|
3273
|
0
|
|
|
|
|
0
|
$piece = substr($trimmed_end, 0, $idx); |
|
3274
|
0
|
|
|
|
|
0
|
$splice = 1; |
|
3275
|
|
|
|
|
|
|
} |
|
3276
|
|
|
|
|
|
|
} |
|
3277
|
|
|
|
|
|
|
} |
|
3278
|
17
|
50
|
|
|
|
59
|
if ($first) { |
|
|
|
0
|
|
|
|
|
|
|
3279
|
17
|
|
|
|
|
37
|
push @out, $piece; |
|
3280
|
17
|
|
|
|
|
29
|
$first = 0; |
|
3281
|
|
|
|
|
|
|
} elsif ($pending) { |
|
3282
|
0
|
|
|
|
|
0
|
my $trimmed_start = $piece; $trimmed_start =~ s/^[ \t]+//; |
|
|
0
|
|
|
|
|
0
|
|
|
3283
|
0
|
0
|
|
|
|
0
|
push @out, $trimmed_start unless $is_blank; |
|
3284
|
|
|
|
|
|
|
} else { |
|
3285
|
0
|
|
|
|
|
0
|
push @out, "\n", $piece; |
|
3286
|
|
|
|
|
|
|
} |
|
3287
|
17
|
|
|
|
|
38
|
$pending = $splice; |
|
3288
|
|
|
|
|
|
|
} |
|
3289
|
17
|
50
|
|
|
|
42
|
if ($pending) { |
|
3290
|
0
|
|
|
|
|
0
|
die sprintf("%d:1: trailing line continuation has nothing to splice to\n", $last[0]); |
|
3291
|
|
|
|
|
|
|
} |
|
3292
|
17
|
|
|
|
|
58
|
return join('', @out); |
|
3293
|
|
|
|
|
|
|
} |
|
3294
|
|
|
|
|
|
|
|
|
3295
|
|
|
|
|
|
|
sub _fold_paragraphs { |
|
3296
|
0
|
|
|
0
|
|
0
|
my ($s) = @_; |
|
3297
|
0
|
|
|
|
|
0
|
my @paras = split /\n\n/, $s, -1; |
|
3298
|
|
|
|
|
|
|
return join("\n", map { |
|
3299
|
0
|
|
|
|
|
0
|
join(' ', grep { length($_) } split /\n/, $_, -1); |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
3300
|
|
|
|
|
|
|
} @paras); |
|
3301
|
|
|
|
|
|
|
} |
|
3302
|
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
sub _replace_all_runs { |
|
3304
|
0
|
|
|
0
|
|
0
|
my ($s, $charset, $replacement) = @_; |
|
3305
|
0
|
|
|
|
|
0
|
my $out = ''; |
|
3306
|
0
|
|
|
|
|
0
|
my @chars = split //, $s; |
|
3307
|
0
|
|
|
|
|
0
|
my $i = 0; |
|
3308
|
0
|
|
|
|
|
0
|
while ($i < @chars) { |
|
3309
|
0
|
0
|
|
|
|
0
|
if ($charset->{$chars[$i]}) { |
|
3310
|
0
|
|
0
|
|
|
0
|
while ($i < @chars && $charset->{$chars[$i]}) { $i++; } |
|
|
0
|
|
|
|
|
0
|
|
|
3311
|
0
|
|
|
|
|
0
|
$out .= $replacement; |
|
3312
|
|
|
|
|
|
|
} else { |
|
3313
|
0
|
|
|
|
|
0
|
$out .= $chars[$i]; |
|
3314
|
0
|
|
|
|
|
0
|
$i++; |
|
3315
|
|
|
|
|
|
|
} |
|
3316
|
|
|
|
|
|
|
} |
|
3317
|
0
|
|
|
|
|
0
|
return $out; |
|
3318
|
|
|
|
|
|
|
} |
|
3319
|
|
|
|
|
|
|
|
|
3320
|
|
|
|
|
|
|
sub _replace_leading_run { |
|
3321
|
0
|
|
|
0
|
|
0
|
my ($s, $charset, $replacement) = @_; |
|
3322
|
0
|
|
|
|
|
0
|
my @chars = split //, $s; |
|
3323
|
0
|
|
|
|
|
0
|
my $end = 0; |
|
3324
|
0
|
|
0
|
|
|
0
|
while ($end < @chars && $charset->{$chars[$end]}) { $end++; } |
|
|
0
|
|
|
|
|
0
|
|
|
3325
|
0
|
0
|
|
|
|
0
|
return $s if $end == 0; |
|
3326
|
0
|
|
|
|
|
0
|
return $replacement . join('', @chars[$end..$#chars]); |
|
3327
|
|
|
|
|
|
|
} |
|
3328
|
|
|
|
|
|
|
|
|
3329
|
|
|
|
|
|
|
sub _replace_trailing_run { |
|
3330
|
3
|
|
|
3
|
|
8
|
my ($s, $charset, $replacement) = @_; |
|
3331
|
3
|
|
|
|
|
9
|
my @chars = split //, $s; |
|
3332
|
3
|
|
|
|
|
7
|
my $start = scalar @chars; |
|
3333
|
3
|
|
33
|
|
|
17
|
while ($start > 0 && $charset->{$chars[$start-1]}) { $start--; } |
|
|
0
|
|
|
|
|
0
|
|
|
3334
|
3
|
50
|
|
|
|
15
|
return $s if $start == @chars; |
|
3335
|
0
|
|
|
|
|
0
|
return join('', @chars[0..$start-1]) . $replacement; |
|
3336
|
|
|
|
|
|
|
} |
|
3337
|
|
|
|
|
|
|
|
|
3338
|
|
|
|
|
|
|
sub _per_line_edges { |
|
3339
|
0
|
|
|
0
|
|
0
|
my ($s, $charset, $replacement) = @_; |
|
3340
|
0
|
|
|
|
|
0
|
my @lines = split /\n/, $s, -1; |
|
3341
|
0
|
|
|
|
|
0
|
for my $line (@lines) { |
|
3342
|
0
|
|
|
|
|
0
|
$line = _replace_leading_run($line, $charset, $replacement); |
|
3343
|
0
|
|
|
|
|
0
|
$line = _replace_trailing_run($line, $charset, $replacement); |
|
3344
|
|
|
|
|
|
|
} |
|
3345
|
0
|
|
|
|
|
0
|
return join("\n", @lines); |
|
3346
|
|
|
|
|
|
|
} |
|
3347
|
|
|
|
|
|
|
|
|
3348
|
|
|
|
|
|
|
sub _apply_trim { |
|
3349
|
3
|
|
|
3
|
|
10
|
my ($s, $chars, $where, $replacement) = @_; |
|
3350
|
3
|
50
|
|
|
|
8
|
return $s if length($chars) == 0; |
|
3351
|
3
|
|
|
|
|
12
|
my %charset = map { $_ => 1 } split //, $chars; |
|
|
3
|
|
|
|
|
14
|
|
|
3352
|
3
|
|
|
|
|
8
|
my $has_star = (index($where, '*') >= 0); |
|
3353
|
3
|
|
|
|
|
7
|
my $has_pipe = (index($where, '|') >= 0); |
|
3354
|
3
|
|
|
|
|
7
|
my $has_lt = (index($where, '<') >= 0); |
|
3355
|
3
|
|
|
|
|
5
|
my $has_gt = (index($where, '>') >= 0); |
|
3356
|
3
|
50
|
33
|
|
|
37
|
return $s if !($has_star || $has_pipe || $has_lt || $has_gt); |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
3357
|
3
|
50
|
|
|
|
7
|
return _replace_all_runs($s, \%charset, $replacement) if $has_star; |
|
3358
|
3
|
|
|
|
|
7
|
my $cur = $s; |
|
3359
|
3
|
50
|
|
|
|
7
|
$cur = _per_line_edges($cur, \%charset, $replacement) if $has_pipe; |
|
3360
|
3
|
50
|
|
|
|
8
|
$cur = _replace_leading_run($cur, \%charset, $replacement) if $has_lt; |
|
3361
|
3
|
50
|
|
|
|
13
|
$cur = _replace_trailing_run($cur, \%charset, $replacement) if $has_gt; |
|
3362
|
3
|
|
|
|
|
12
|
return $cur; |
|
3363
|
|
|
|
|
|
|
} |
|
3364
|
|
|
|
|
|
|
|
|
3365
|
|
|
|
|
|
|
sub _apply_modifiers { |
|
3366
|
17
|
|
|
17
|
|
39
|
my ($s, $mods) = @_; |
|
3367
|
17
|
|
|
|
|
57
|
my $cur = $s; |
|
3368
|
17
|
|
|
|
|
39
|
for my $m (@$mods) { |
|
3369
|
3
|
|
|
|
|
8
|
my $name = $m->{name}; |
|
3370
|
3
|
|
|
|
|
6
|
my $args = $m->{args}; |
|
3371
|
3
|
50
|
|
|
|
11
|
if ($name eq '_fold_paragraphs') { |
|
|
|
50
|
|
|
|
|
|
|
3372
|
0
|
0
|
|
|
|
0
|
die "fold_paragraphs() takes no arguments\n" if @$args; |
|
3373
|
0
|
|
|
|
|
0
|
$cur = _fold_paragraphs($cur); |
|
3374
|
|
|
|
|
|
|
} elsif ($name eq '_trim') { |
|
3375
|
3
|
50
|
33
|
|
|
29
|
die qq{trim(chars, where, replacement = "") expects 2 or 3 arguments\n} |
|
3376
|
|
|
|
|
|
|
if @$args < 2 || @$args > 3; |
|
3377
|
3
|
|
|
|
|
6
|
my $chars = $args->[0]; |
|
3378
|
3
|
50
|
|
|
|
10
|
die "trim: first argument (chars) must be a string\n" if ref($chars) ne ''; |
|
3379
|
3
|
|
|
|
|
5
|
my $where = $args->[1]; |
|
3380
|
3
|
50
|
|
|
|
7
|
die "trim: second argument (where) must be a string\n" if ref($where) ne ''; |
|
3381
|
3
|
|
|
|
|
7
|
my $replacement = ''; |
|
3382
|
3
|
50
|
|
|
|
10
|
if (@$args == 3) { |
|
3383
|
0
|
0
|
|
|
|
0
|
die "trim: third argument (replacement) must be a string\n" if ref($args->[2]) ne ''; |
|
3384
|
0
|
|
|
|
|
0
|
$replacement = $args->[2]; |
|
3385
|
|
|
|
|
|
|
} |
|
3386
|
3
|
|
|
|
|
9
|
$cur = _apply_trim($cur, $chars, $where, $replacement); |
|
3387
|
|
|
|
|
|
|
} else { |
|
3388
|
0
|
|
|
|
|
0
|
die "unknown modifier: $name\n"; |
|
3389
|
|
|
|
|
|
|
} |
|
3390
|
|
|
|
|
|
|
} |
|
3391
|
17
|
|
|
|
|
42
|
return $cur; |
|
3392
|
|
|
|
|
|
|
} |
|
3393
|
|
|
|
|
|
|
|
|
3394
|
|
|
|
|
|
|
# Flow forms |
|
3395
|
|
|
|
|
|
|
|
|
3396
|
|
|
|
|
|
|
sub parse_flow_array { |
|
3397
|
1
|
|
|
1
|
0
|
3
|
my $self = shift; |
|
3398
|
1
|
|
|
|
|
3
|
$self->{pos}++; |
|
3399
|
1
|
|
|
|
|
3
|
my @items; |
|
3400
|
1
|
|
|
|
|
2
|
while (1) { |
|
3401
|
3
|
|
|
|
|
11
|
$self->_skip_flow_ws; |
|
3402
|
3
|
50
|
|
|
|
7
|
if ($self->_peek eq ']') { $self->{pos}++; return \@items; } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
3403
|
|
|
|
|
|
|
# Push the current index onto the path so any OriginalLiteral |
|
3404
|
|
|
|
|
|
|
# records inside the flow value get the right breadcrumb. |
|
3405
|
3
|
|
|
|
|
6
|
my $idx = scalar @items; |
|
3406
|
3
|
|
|
|
|
6
|
push @{$self->{path}}, DMS::Parser::Index->new($idx); |
|
|
3
|
|
|
|
|
27
|
|
|
3407
|
3
|
|
|
|
|
5
|
my $v; |
|
3408
|
3
|
|
|
|
|
6
|
my $ok = eval { $v = $self->_parse_inline_value_in_flow; 1 }; |
|
|
3
|
|
|
|
|
9
|
|
|
|
3
|
|
|
|
|
7
|
|
|
3409
|
3
|
|
|
|
|
7
|
my $err = $@; |
|
3410
|
3
|
|
|
|
|
5
|
pop @{$self->{path}}; |
|
|
3
|
|
|
|
|
6
|
|
|
3411
|
3
|
50
|
|
|
|
10
|
if (!$ok) { die $err; } |
|
|
0
|
|
|
|
|
0
|
|
|
3412
|
3
|
|
|
|
|
6
|
push @items, $v; |
|
3413
|
3
|
|
|
|
|
9
|
$self->_skip_flow_ws; |
|
3414
|
3
|
|
|
|
|
7
|
my $c = $self->_peek; |
|
3415
|
3
|
100
|
|
|
|
9
|
if ($c eq ',') { $self->{pos}++; } |
|
|
2
|
50
|
|
|
|
4
|
|
|
|
|
0
|
|
|
|
|
|
|
3416
|
1
|
|
|
|
|
14
|
elsif ($c eq ']') { $self->{pos}++; return \@items; } |
|
|
1
|
|
|
|
|
4
|
|
|
3417
|
0
|
|
|
|
|
0
|
elsif (!defined($c)) { $self->_die("unterminated flow array"); } |
|
3418
|
0
|
|
|
|
|
0
|
else { $self->_die("unexpected '$c' in flow array; expected ',' or ']'"); } |
|
3419
|
|
|
|
|
|
|
} |
|
3420
|
|
|
|
|
|
|
} |
|
3421
|
|
|
|
|
|
|
|
|
3422
|
|
|
|
|
|
|
sub parse_flow_table { |
|
3423
|
1
|
|
|
1
|
0
|
3
|
my $self = shift; |
|
3424
|
1
|
|
|
|
|
2
|
$self->{pos}++; |
|
3425
|
1
|
|
|
|
|
3
|
my $lite = $self->{lite}; |
|
3426
|
1
|
|
|
|
|
2
|
my $ignore_order = $self->{ignore_order}; |
|
3427
|
1
|
|
|
|
|
3
|
my ($t, $order); |
|
3428
|
1
|
50
|
|
|
|
22
|
if ($ignore_order) { |
|
|
|
50
|
|
|
|
|
|
|
3429
|
0
|
|
|
|
|
0
|
$t = new_unordered_table(); |
|
3430
|
|
|
|
|
|
|
} elsif ($lite) { |
|
3431
|
0
|
|
|
|
|
0
|
$order = []; |
|
3432
|
0
|
|
|
|
|
0
|
$t = { $ORDER_KEY => $order }; |
|
3433
|
|
|
|
|
|
|
} else { |
|
3434
|
1
|
|
|
|
|
4
|
$t = new_table(); |
|
3435
|
|
|
|
|
|
|
} |
|
3436
|
1
|
|
|
|
|
2
|
while (1) { |
|
3437
|
2
|
|
|
|
|
8
|
$self->_skip_flow_ws; |
|
3438
|
2
|
50
|
|
|
|
6
|
if ($self->_peek eq '}') { $self->{pos}++; return $t; } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
3439
|
2
|
|
|
|
|
6
|
my $key = $self->parse_key; |
|
3440
|
2
|
50
|
|
|
|
5
|
$self->_die("expected ':' after flow-table key") if $self->_peek ne ':'; |
|
3441
|
2
|
|
|
|
|
3
|
$self->{pos}++; |
|
3442
|
2
|
|
|
|
|
6
|
my $c = $self->_peek; |
|
3443
|
2
|
0
|
33
|
|
|
12
|
if (!defined($c) || ($c ne ' ' && $c ne "\t" && $c ne "\n" && $c ne "\r")) { |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
3444
|
0
|
|
|
|
|
0
|
$self->_die("expected whitespace after ':'"); |
|
3445
|
|
|
|
|
|
|
} |
|
3446
|
2
|
|
|
|
|
6
|
$self->_skip_flow_ws; |
|
3447
|
|
|
|
|
|
|
# Path push so OriginalLiteral records inside the value get the |
|
3448
|
|
|
|
|
|
|
# key path as their breadcrumb. |
|
3449
|
2
|
50
|
|
|
|
6
|
push @{$self->{path}}, $key unless $lite; |
|
|
2
|
|
|
|
|
6
|
|
|
3450
|
2
|
|
|
|
|
4
|
my $v; |
|
3451
|
2
|
50
|
|
|
|
5
|
if ($lite) { |
|
3452
|
0
|
|
|
|
|
0
|
$v = $self->_parse_inline_value_in_flow; |
|
3453
|
|
|
|
|
|
|
} else { |
|
3454
|
2
|
|
|
|
|
15
|
my $ok = eval { $v = $self->_parse_inline_value_in_flow; 1 }; |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
5
|
|
|
3455
|
2
|
|
|
|
|
4
|
my $err = $@; |
|
3456
|
2
|
|
|
|
|
4
|
pop @{$self->{path}}; |
|
|
2
|
|
|
|
|
5
|
|
|
3457
|
2
|
50
|
|
|
|
8
|
if (!$ok) { die $err; } |
|
|
0
|
|
|
|
|
0
|
|
|
3458
|
|
|
|
|
|
|
} |
|
3459
|
2
|
50
|
|
|
|
9
|
$self->_die("duplicate key: $key") if exists $t->{$key}; |
|
3460
|
2
|
50
|
|
|
|
14
|
push @$order, $key if $order; |
|
3461
|
2
|
|
|
|
|
9
|
$t->{$key} = $v; |
|
3462
|
2
|
|
|
|
|
47
|
$self->_skip_flow_ws; |
|
3463
|
2
|
|
|
|
|
6
|
my $c2 = $self->_peek; |
|
3464
|
2
|
100
|
|
|
|
14
|
if ($c2 eq ',') { $self->{pos}++; } |
|
|
1
|
50
|
|
|
|
3
|
|
|
|
|
0
|
|
|
|
|
|
|
3465
|
1
|
|
|
|
|
2
|
elsif ($c2 eq '}') { $self->{pos}++; return $t; } |
|
|
1
|
|
|
|
|
28
|
|
|
3466
|
0
|
|
|
|
|
0
|
elsif (!defined($c2)) { $self->_die("unterminated flow table"); } |
|
3467
|
0
|
|
|
|
|
0
|
else { $self->_die("unexpected '$c2' in flow table; expected ',' or '}'"); } |
|
3468
|
|
|
|
|
|
|
} |
|
3469
|
|
|
|
|
|
|
} |
|
3470
|
|
|
|
|
|
|
|
|
3471
|
|
|
|
|
|
|
sub _skip_flow_ws { |
|
3472
|
12
|
|
|
12
|
|
21
|
my $self = shift; |
|
3473
|
12
|
|
|
|
|
23
|
while (1) { |
|
3474
|
12
|
|
|
|
|
29
|
pos($self->{src}) = $self->{pos}; |
|
3475
|
12
|
100
|
|
|
|
47
|
if ($self->{src} =~ /\G[ \t]+/gc) { |
|
3476
|
5
|
|
|
|
|
10
|
$self->{pos} = pos($self->{src}); |
|
3477
|
|
|
|
|
|
|
} |
|
3478
|
12
|
50
|
|
|
|
33
|
return if $self->{pos} >= $self->{len}; |
|
3479
|
12
|
|
|
|
|
27
|
my $c = substr($self->{src}, $self->{pos}, 1); |
|
3480
|
12
|
50
|
|
|
|
30
|
if ($c eq "\n") { $self->{pos}++; $self->_advance_line; next; } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
3481
|
12
|
50
|
|
|
|
27
|
if ($c eq "\r") { |
|
3482
|
0
|
0
|
|
|
|
0
|
if (substr($self->{src}, $self->{pos}, 2) eq "\r\n") { |
|
3483
|
0
|
|
|
|
|
0
|
$self->{pos} += 2; $self->_advance_line; next; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
3484
|
|
|
|
|
|
|
} |
|
3485
|
0
|
|
|
|
|
0
|
return; |
|
3486
|
|
|
|
|
|
|
} |
|
3487
|
12
|
50
|
|
|
|
27
|
if ($c eq '#') { $self->_die("comments not allowed inside flow forms"); } |
|
|
0
|
|
|
|
|
0
|
|
|
3488
|
12
|
50
|
|
|
|
25
|
if ($c eq '/') { |
|
3489
|
0
|
|
|
|
|
0
|
my $n = substr($self->{src}, $self->{pos} + 1, 1); |
|
3490
|
0
|
0
|
0
|
|
|
0
|
if ($n eq '/' || $n eq '*') { |
|
3491
|
0
|
|
|
|
|
0
|
$self->_die("comments not allowed inside flow forms"); |
|
3492
|
|
|
|
|
|
|
} |
|
3493
|
|
|
|
|
|
|
} |
|
3494
|
12
|
|
|
|
|
23
|
return; |
|
3495
|
|
|
|
|
|
|
} |
|
3496
|
|
|
|
|
|
|
} |
|
3497
|
|
|
|
|
|
|
|
|
3498
|
|
|
|
|
|
|
sub _parse_inline_value_in_flow { |
|
3499
|
5
|
|
|
5
|
|
8
|
my $self = shift; |
|
3500
|
5
|
50
|
33
|
|
|
13
|
if ($self->_peek eq '"' && $self->_starts_with('"""')) { |
|
3501
|
0
|
|
|
|
|
0
|
$self->_die("heredocs are not allowed inside flow forms"); |
|
3502
|
|
|
|
|
|
|
} |
|
3503
|
5
|
50
|
33
|
|
|
11
|
if ($self->_peek eq "'" && $self->_starts_with("'''")) { |
|
3504
|
0
|
|
|
|
|
0
|
$self->_die("heredocs are not allowed inside flow forms"); |
|
3505
|
|
|
|
|
|
|
} |
|
3506
|
5
|
|
|
|
|
15
|
return $self->parse_inline_value_or_heredoc; |
|
3507
|
|
|
|
|
|
|
} |
|
3508
|
|
|
|
|
|
|
|
|
3509
|
|
|
|
|
|
|
sub _consume_after_value { |
|
3510
|
102
|
|
|
102
|
|
219
|
my ($self, $allow_eof) = @_; |
|
3511
|
|
|
|
|
|
|
# Hot-path early-out for the no-comment case (every flat-table leaf): |
|
3512
|
|
|
|
|
|
|
# check the byte at pos. Common case: directly at \n with no |
|
3513
|
|
|
|
|
|
|
# trailing WS — return after one substr + branch. _advance_line is |
|
3514
|
|
|
|
|
|
|
# inlined too so the hot path has zero method calls. |
|
3515
|
102
|
|
|
|
|
154
|
my $hot_had_ws = 0; |
|
3516
|
102
|
|
|
|
|
205
|
my $p = $self->{pos}; |
|
3517
|
|
|
|
|
|
|
{ |
|
3518
|
102
|
50
|
|
|
|
157
|
if ($p >= $self->{len}) { return; } |
|
|
102
|
|
|
|
|
287
|
|
|
|
0
|
|
|
|
|
0
|
|
|
3519
|
102
|
|
|
|
|
258
|
my $c = substr($self->{src}, $p, 1); |
|
3520
|
102
|
100
|
|
|
|
237
|
if ($c eq "\n") { |
|
3521
|
85
|
|
|
|
|
147
|
my $np = $p + 1; |
|
3522
|
85
|
|
|
|
|
151
|
$self->{pos} = $np; |
|
3523
|
85
|
|
|
|
|
183
|
$self->{line}++; |
|
3524
|
85
|
|
|
|
|
208
|
$self->{line_start} = $np; |
|
3525
|
85
|
|
|
|
|
190
|
return; |
|
3526
|
|
|
|
|
|
|
} |
|
3527
|
17
|
50
|
33
|
|
|
69
|
if ($c eq ' ' || $c eq "\t") { |
|
3528
|
|
|
|
|
|
|
# Have trailing WS: skip it, then re-check. |
|
3529
|
17
|
|
|
|
|
42
|
pos($self->{src}) = $p; |
|
3530
|
17
|
|
|
|
|
58
|
$self->{src} =~ /\G[ \t]+/gc; |
|
3531
|
17
|
|
|
|
|
38
|
$self->{pos} = pos($self->{src}); |
|
3532
|
17
|
|
|
|
|
53
|
$hot_had_ws = 1; |
|
3533
|
17
|
|
|
|
|
34
|
$p = $self->{pos}; |
|
3534
|
17
|
50
|
|
|
|
52
|
if ($p >= $self->{len}) { return; } |
|
|
0
|
|
|
|
|
0
|
|
|
3535
|
17
|
|
|
|
|
38
|
$c = substr($self->{src}, $p, 1); |
|
3536
|
17
|
50
|
|
|
|
47
|
if ($c eq "\n") { |
|
3537
|
0
|
|
|
|
|
0
|
my $np = $p + 1; |
|
3538
|
0
|
|
|
|
|
0
|
$self->{pos} = $np; |
|
3539
|
0
|
|
|
|
|
0
|
$self->{line}++; |
|
3540
|
0
|
|
|
|
|
0
|
$self->{line_start} = $np; |
|
3541
|
0
|
|
|
|
|
0
|
return; |
|
3542
|
|
|
|
|
|
|
} |
|
3543
|
|
|
|
|
|
|
} |
|
3544
|
17
|
0
|
33
|
|
|
64
|
if ($c ne '#' && $c ne '/' && $c ne "\r") { |
|
|
|
|
33
|
|
|
|
|
|
3545
|
0
|
|
|
|
|
0
|
$self->_die("unexpected character '$c' after value"); |
|
3546
|
|
|
|
|
|
|
} |
|
3547
|
|
|
|
|
|
|
# else fall through to the comment-handling slow path. Pass |
|
3548
|
|
|
|
|
|
|
# $hot_had_ws so the first iteration knows ws was already consumed. |
|
3549
|
|
|
|
|
|
|
} |
|
3550
|
|
|
|
|
|
|
# Same-line comment(s) after a value attach as `trailing`. Multiple |
|
3551
|
|
|
|
|
|
|
# block comments may stack; a `#`/`//` line comment, if present, |
|
3552
|
|
|
|
|
|
|
# consumes to EOL and must come last. |
|
3553
|
17
|
|
|
|
|
31
|
my $first_iter = 1; |
|
3554
|
17
|
|
|
|
|
24
|
while (1) { |
|
3555
|
17
|
|
|
|
|
31
|
my $ws_start = $self->{pos}; |
|
3556
|
17
|
|
|
|
|
49
|
$self->_skip_inline_ws; |
|
3557
|
17
|
|
|
|
|
83
|
my $had_ws = $self->{pos} > $ws_start; |
|
3558
|
17
|
50
|
33
|
|
|
94
|
$had_ws = 1 if $first_iter && $hot_had_ws; |
|
3559
|
17
|
|
|
|
|
25
|
$first_iter = 0; |
|
3560
|
17
|
|
|
|
|
51
|
my $c = $self->_peek; |
|
3561
|
17
|
50
|
33
|
|
|
83
|
if (defined($c) && $c eq '#' && !$self->_starts_with("###")) { |
|
|
|
0
|
33
|
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3562
|
17
|
50
|
|
|
|
45
|
$self->_die("expected whitespace before '#' comment") unless $had_ws; |
|
3563
|
17
|
|
|
|
|
50
|
my $raw = $self->_read_line_comment_to_eol; |
|
3564
|
17
|
|
|
|
|
72
|
push @{$self->{comments}}, { |
|
3565
|
|
|
|
|
|
|
comment => { content => $raw, kind => 'line' }, |
|
3566
|
|
|
|
|
|
|
position => 'trailing', |
|
3567
|
17
|
|
|
|
|
73
|
path => [@{$self->{path}}], |
|
3568
|
17
|
50
|
|
|
|
49
|
} unless $self->{lite}; |
|
3569
|
17
|
|
|
|
|
42
|
last; |
|
3570
|
|
|
|
|
|
|
} elsif (defined($c) && $c eq '/' && $self->_starts_with("//")) { |
|
3571
|
0
|
0
|
|
|
|
0
|
$self->_die("expected whitespace before '//' comment") unless $had_ws; |
|
3572
|
0
|
|
|
|
|
0
|
my $raw = $self->_read_line_comment_to_eol; |
|
3573
|
0
|
|
|
|
|
0
|
push @{$self->{comments}}, { |
|
3574
|
|
|
|
|
|
|
comment => { content => $raw, kind => 'line' }, |
|
3575
|
|
|
|
|
|
|
position => 'trailing', |
|
3576
|
0
|
|
|
|
|
0
|
path => [@{$self->{path}}], |
|
3577
|
0
|
0
|
|
|
|
0
|
} unless $self->{lite}; |
|
3578
|
0
|
|
|
|
|
0
|
last; |
|
3579
|
|
|
|
|
|
|
} elsif (defined($c) && $c eq '/' && $self->_starts_with("/*")) { |
|
3580
|
0
|
|
|
|
|
0
|
my $raw = $self->_read_c_block_comment; |
|
3581
|
0
|
|
|
|
|
0
|
push @{$self->{comments}}, { |
|
3582
|
|
|
|
|
|
|
comment => { content => $raw, kind => 'block' }, |
|
3583
|
|
|
|
|
|
|
position => 'trailing', |
|
3584
|
0
|
|
|
|
|
0
|
path => [@{$self->{path}}], |
|
3585
|
0
|
0
|
|
|
|
0
|
} unless $self->{lite}; |
|
3586
|
0
|
|
|
|
|
0
|
next; |
|
3587
|
|
|
|
|
|
|
} else { |
|
3588
|
0
|
|
|
|
|
0
|
last; |
|
3589
|
|
|
|
|
|
|
} |
|
3590
|
|
|
|
|
|
|
} |
|
3591
|
17
|
|
|
|
|
43
|
my $c = $self->_peek; |
|
3592
|
17
|
50
|
|
|
|
42
|
return if !defined($c); |
|
3593
|
17
|
50
|
|
|
|
56
|
if ($c eq "\n") { $self->{pos}++; $self->_advance_line; return; } |
|
|
17
|
|
|
|
|
33
|
|
|
|
17
|
|
|
|
|
76
|
|
|
|
17
|
|
|
|
|
35
|
|
|
3594
|
0
|
0
|
0
|
|
|
|
if ($c eq "\r" && $self->_starts_with("\r\n")) { $self->{pos} += 2; $self->_advance_line; return; } |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
3595
|
0
|
|
|
|
|
|
$self->_die("unexpected character '$c' after value"); |
|
3596
|
|
|
|
|
|
|
} |
|
3597
|
|
|
|
|
|
|
|
|
3598
|
|
|
|
|
|
|
1; |
|
3599
|
|
|
|
|
|
|
|
|
3600
|
|
|
|
|
|
|
__END__ |