line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
=head1 NAME |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
JSON::Streaming::Reader - Read JSON strings in a streaming manner |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=cut |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
package JSON::Streaming::Reader; |
9
|
|
|
|
|
|
|
|
10
|
7
|
|
|
7
|
|
863
|
use strict; |
|
7
|
|
|
|
|
15
|
|
|
7
|
|
|
|
|
281
|
|
11
|
7
|
|
|
7
|
|
41
|
use warnings; |
|
7
|
|
|
|
|
11
|
|
|
7
|
|
|
|
|
208
|
|
12
|
7
|
|
|
7
|
|
38
|
use Carp; |
|
7
|
|
|
|
|
20
|
|
|
7
|
|
|
|
|
734
|
|
13
|
7
|
|
|
7
|
|
8332
|
use IO::Scalar; |
|
7
|
|
|
|
|
130315
|
|
|
7
|
|
|
|
|
577
|
|
14
|
7
|
|
|
7
|
|
5283
|
use JSON::Streaming::Reader::EventWrapper; |
|
7
|
|
|
|
|
22
|
|
|
7
|
|
|
|
|
429
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '0.06'; |
17
|
|
|
|
|
|
|
|
18
|
7
|
|
|
7
|
|
44
|
use constant ROOT_STATE => {}; |
|
7
|
|
|
|
|
63
|
|
|
7
|
|
|
|
|
608
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# Make some constants for the token types |
21
|
|
|
|
|
|
|
BEGIN { |
22
|
7
|
|
|
7
|
|
17
|
foreach my $token_type (qw(start_object end_object start_array end_array start_property end_property add_string add_number add_boolean add_null error)) { |
23
|
7
|
|
|
7
|
|
34
|
no strict 'refs'; |
|
7
|
|
|
|
|
13
|
|
|
7
|
|
|
|
|
477
|
|
24
|
|
|
|
|
|
|
|
25
|
77
|
|
|
|
|
140
|
my $full_name = __PACKAGE__."::".uc($token_type); |
26
|
|
|
|
|
|
|
|
27
|
77
|
|
|
955
|
|
230
|
*{$full_name} = sub { $token_type }; |
|
77
|
|
|
|
|
34674
|
|
|
955
|
|
|
|
|
3566
|
|
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
}; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub for_stream { |
32
|
194
|
|
|
194
|
1
|
288
|
my ($class, $stream) = @_; |
33
|
|
|
|
|
|
|
|
34
|
194
|
|
|
|
|
457
|
my $self = bless {}, $class; |
35
|
194
|
|
|
|
|
408
|
$self->{stream} = $stream; |
36
|
194
|
|
|
|
|
287
|
$self->{state} = ROOT_STATE; |
37
|
194
|
|
|
|
|
363
|
$self->{state_stack} = []; |
38
|
194
|
|
|
|
|
355
|
$self->{used} = 0; |
39
|
194
|
|
|
|
|
511
|
return $self; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub for_string { |
43
|
129
|
|
|
129
|
1
|
368
|
my ($class, $value) = @_; |
44
|
|
|
|
|
|
|
|
45
|
129
|
100
|
|
|
|
952
|
my $stream = IO::Scalar->new(ref $value ? $value : \$value); |
46
|
129
|
|
|
|
|
5366
|
return $class->for_stream($stream); |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
sub event_based { |
50
|
65
|
|
|
65
|
1
|
368
|
my ($class, %callbacks) = @_; |
51
|
|
|
|
|
|
|
|
52
|
65
|
|
|
|
|
454
|
my $fake_stream = JSON::Streaming::Reader::EventWrapper->new(); |
53
|
65
|
|
|
|
|
166
|
my $self = $class->for_stream($fake_stream); |
54
|
65
|
|
|
|
|
125
|
$self->{event_callbacks} = \%callbacks; |
55
|
|
|
|
|
|
|
|
56
|
65
|
|
|
|
|
190
|
return $self; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub process_tokens { |
60
|
67
|
|
|
67
|
1
|
353
|
my ($self, %callbacks) = @_; |
61
|
|
|
|
|
|
|
|
62
|
67
|
|
|
|
|
181
|
while (my $token = $self->get_token()) { |
63
|
266
|
|
|
|
|
374
|
my $token_type = shift @$token; |
64
|
266
|
50
|
|
|
|
649
|
my $callback = $callbacks{$token_type} or Carp::croak("No callback provided for $token_type tokens"); |
65
|
266
|
|
|
|
|
737
|
$callback->(@$token); |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub get_token { |
70
|
963
|
|
|
963
|
1
|
7431
|
my ($self) = @_; |
71
|
|
|
|
|
|
|
|
72
|
963
|
100
|
|
|
|
2443
|
return undef if $self->{errored}; |
73
|
|
|
|
|
|
|
|
74
|
867
|
|
|
|
|
1028
|
my $tok = eval { |
75
|
867
|
|
|
|
|
1587
|
my $need_comma = $self->made_value; |
76
|
867
|
|
|
|
|
1017
|
while (1) { # Until we find a character that's interesting |
77
|
956
|
|
|
|
|
1690
|
$self->_eat_whitespace(); |
78
|
894
|
|
|
|
|
1699
|
my $char = $self->_peek_char(); |
79
|
|
|
|
|
|
|
|
80
|
894
|
100
|
|
|
|
1802
|
unless (defined($char)) { |
81
|
|
|
|
|
|
|
# EOF |
82
|
124
|
100
|
|
|
|
235
|
die("Unexpected end of input\n") unless $self->_state == ROOT_STATE; |
83
|
116
|
|
|
|
|
267
|
return undef; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# If we've found more stuff while we're in the root state and we've |
87
|
|
|
|
|
|
|
# already seen stuff then there's junk at the end of the string. |
88
|
770
|
100
|
100
|
|
|
1394
|
die("Unexpected junk at the end of input\n") if $self->_state == ROOT_STATE && $self->{used}; |
89
|
|
|
|
|
|
|
|
90
|
750
|
100
|
100
|
|
|
2123
|
if ($char eq ',' && ! $self->done_comma) { |
91
|
135
|
100
|
100
|
|
|
376
|
if ($self->in_array || $self->in_object) { |
|
|
50
|
|
|
|
|
|
92
|
89
|
50
|
|
|
|
183
|
if ($self->made_value) { |
93
|
89
|
|
|
|
|
170
|
$self->_require_char(','); |
94
|
|
|
|
|
|
|
|
95
|
89
|
100
|
|
|
|
177
|
if($self->is_event_based) { |
96
|
23
|
|
|
|
|
40
|
my $stream = $self->{stream}; |
97
|
23
|
|
|
|
|
68
|
$stream->complete_reading; |
98
|
23
|
|
|
|
|
102
|
$stream->begin_reading; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
89
|
|
|
|
|
184
|
$self->_set_done_comma(); |
102
|
89
|
|
|
|
|
173
|
next; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
elsif ($self->in_property) { |
106
|
|
|
|
|
|
|
# If we're in a property then a comma indicates |
107
|
|
|
|
|
|
|
# the end of the property. We exit the property state |
108
|
|
|
|
|
|
|
# but leave the comma so that the next get_token |
109
|
|
|
|
|
|
|
# can still see it. |
110
|
46
|
100
|
|
|
|
88
|
die("Property has no value\n") unless $self->made_value; |
111
|
45
|
|
|
|
|
98
|
$self->_pop_state(); |
112
|
45
|
|
|
|
|
84
|
$self->_set_made_value; |
113
|
45
|
|
|
|
|
77
|
return [ END_PROPERTY ]; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
615
|
100
|
100
|
|
|
1926
|
if ($char ne '}' && $self->in_object) { |
118
|
|
|
|
|
|
|
# If we're in an object then we must start a property here. |
119
|
94
|
|
|
|
|
191
|
my $name_token = $self->_get_string_token(); |
120
|
88
|
50
|
|
|
|
196
|
die "Expected string\n" unless $name_token->[0] eq ADD_STRING; |
121
|
88
|
|
|
|
|
187
|
$self->_eat_whitespace(); |
122
|
85
|
|
|
|
|
182
|
$self->_require_char(":"); |
123
|
83
|
|
|
|
|
126
|
my $property_name = $name_token->[1]; |
124
|
83
|
|
|
|
|
162
|
my $state = $self->_push_state(); |
125
|
83
|
|
|
|
|
121
|
$state->{in_property} = 1; |
126
|
83
|
|
|
|
|
166
|
return [ START_PROPERTY, $property_name ]; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
521
|
100
|
|
|
|
2588
|
if ($char eq '{') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
130
|
68
|
50
|
|
|
|
168
|
die("Unexpected start of object\n") unless $self->can_start_value; |
131
|
68
|
|
|
|
|
169
|
$self->_require_char('{'); |
132
|
68
|
|
|
|
|
158
|
my $state = $self->_push_state(); |
133
|
68
|
|
|
|
|
137
|
$state->{in_object} = 1; |
134
|
68
|
|
|
|
|
158
|
return [ START_OBJECT ]; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
elsif ($char eq '}') { |
137
|
94
|
50
|
|
|
|
241
|
die("Expected another property\n") if $self->done_comma; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# If we're in a property then this also indicates |
140
|
|
|
|
|
|
|
# the end of the property. |
141
|
|
|
|
|
|
|
# We don't actually consume the } here, so the next |
142
|
|
|
|
|
|
|
# call to get_token will see it again but it will be |
143
|
|
|
|
|
|
|
# in the is_object state rather than is_property. |
144
|
94
|
100
|
|
|
|
196
|
if ($self->in_property) { |
145
|
36
|
100
|
|
|
|
81
|
die("Property has no value\n") unless $self->made_value; |
146
|
35
|
|
|
|
|
79
|
$self->_pop_state(); |
147
|
35
|
|
|
|
|
64
|
$self->_set_made_value; |
148
|
35
|
|
|
|
|
76
|
return [ END_PROPERTY ]; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
58
|
100
|
|
|
|
127
|
die("End of object without matching start\n") unless $self->in_object; |
152
|
55
|
|
|
|
|
131
|
$self->_require_char('}'); |
153
|
55
|
|
|
|
|
126
|
$self->_pop_state(); |
154
|
55
|
|
|
|
|
148
|
$self->_set_made_value(); |
155
|
55
|
|
|
|
|
123
|
return [ END_OBJECT ]; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
elsif ($char eq '[') { |
158
|
62
|
50
|
|
|
|
142
|
die("Unexpected start of array\n") unless $self->can_start_value; |
159
|
62
|
|
|
|
|
172
|
$self->_require_char('['); |
160
|
62
|
|
|
|
|
139
|
my $state = $self->_push_state(); |
161
|
62
|
|
|
|
|
100
|
$state->{in_array} = 1; |
162
|
62
|
|
|
|
|
135
|
return [ START_ARRAY ]; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
elsif ($char eq ']') { |
165
|
52
|
100
|
|
|
|
111
|
die("End of array without matching start\n") unless $self->in_array; |
166
|
50
|
100
|
|
|
|
120
|
die("Expected another value\n") if $self->done_comma; |
167
|
48
|
|
|
|
|
101
|
$self->_require_char(']'); |
168
|
48
|
|
|
|
|
113
|
$self->_pop_state(); |
169
|
48
|
|
|
|
|
140
|
$self->_set_made_value(); |
170
|
48
|
|
|
|
|
99
|
return [ END_ARRAY ]; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
elsif ($char eq '"') { |
173
|
70
|
100
|
|
|
|
164
|
die("Unexpected string value\n") unless $self->can_start_value; |
174
|
69
|
50
|
66
|
|
|
200
|
die("Expected ,\n") if $need_comma && ! $self->done_comma; |
175
|
69
|
|
|
|
|
161
|
return $self->_get_string_token(); |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
elsif ($char eq 't') { |
178
|
13
|
50
|
|
|
|
35
|
die("Unexpected boolean value\n") unless $self->can_start_value; |
179
|
13
|
50
|
66
|
|
|
51
|
die("Expected ,\n") if $need_comma && ! $self->done_comma; |
180
|
13
|
|
|
|
|
36
|
foreach my $c (qw(t r u e)) { |
181
|
52
|
|
|
|
|
105
|
$self->_require_char($c); |
182
|
|
|
|
|
|
|
} |
183
|
12
|
|
|
|
|
36
|
$self->_set_made_value(); |
184
|
12
|
|
|
|
|
32
|
return [ ADD_BOOLEAN, 1 ]; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
elsif ($char eq 'f') { |
187
|
14
|
50
|
|
|
|
35
|
die("Unexpected boolean value\n") unless $self->can_start_value; |
188
|
14
|
100
|
100
|
|
|
56
|
die("Expected ,\n") if $need_comma && ! $self->done_comma; |
189
|
13
|
|
|
|
|
26
|
foreach my $c (qw(f a l s e)) { |
190
|
55
|
|
|
|
|
104
|
$self->_require_char($c); |
191
|
|
|
|
|
|
|
} |
192
|
4
|
|
|
|
|
13
|
$self->_set_made_value(); |
193
|
4
|
|
|
|
|
11
|
return [ ADD_BOOLEAN, 0 ]; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
elsif ($char eq 'n') { |
196
|
16
|
50
|
|
|
|
38
|
die("Unexpected null\n") unless $self->can_start_value; |
197
|
16
|
50
|
66
|
|
|
57
|
die("Expected ,\n") if $need_comma && ! $self->done_comma; |
198
|
16
|
|
|
|
|
33
|
foreach my $c (qw(n u l l)) { |
199
|
64
|
|
|
|
|
119
|
$self->_require_char($c); |
200
|
|
|
|
|
|
|
} |
201
|
15
|
|
|
|
|
36
|
$self->_set_made_value(); |
202
|
15
|
|
|
|
|
33
|
return [ ADD_NULL ]; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
elsif ($char =~ /^[\d\-]/) { |
205
|
124
|
50
|
|
|
|
272
|
die("Unexpected number value\n") unless $self->can_start_value; |
206
|
124
|
100
|
100
|
|
|
354
|
die("Expected ,\n") if $need_comma && ! $self->done_comma; |
207
|
122
|
|
|
|
|
280
|
return $self->_get_number_token(); |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
8
|
|
|
|
|
42
|
die "Unexpected character $char\n"; |
211
|
0
|
|
|
|
|
0
|
last; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
}; |
214
|
867
|
100
|
|
|
|
2279
|
if ($@) { |
215
|
166
|
100
|
66
|
|
|
675
|
unless (ref($@) && $@ == JSON::Streaming::Reader::EventWrapper::UNDERRUN()) { |
216
|
78
|
|
|
|
|
138
|
$self->{errored} = 1; |
217
|
78
|
|
|
|
|
113
|
my $error = $@; |
218
|
78
|
|
|
|
|
127
|
chomp $error; |
219
|
78
|
|
|
|
|
148
|
return [ ERROR, $error ]; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
else { |
222
|
|
|
|
|
|
|
# If it's an underrun signal from our weird event-based IO wrapper, |
223
|
|
|
|
|
|
|
# we pass it through to the caller. |
224
|
|
|
|
|
|
|
|
225
|
88
|
|
|
|
|
261
|
die $@; |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
701
|
|
|
|
|
2180
|
return $tok; |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub skip { |
234
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
235
|
|
|
|
|
|
|
|
236
|
0
|
0
|
|
|
|
0
|
Carp::croak("Can't skip() on an event-based reader") if $self->is_event_based; |
237
|
|
|
|
|
|
|
|
238
|
0
|
|
|
|
|
0
|
my @end_chars; |
239
|
|
|
|
|
|
|
|
240
|
0
|
0
|
|
|
|
0
|
@end_chars = (',', '}') if $self->in_property; |
241
|
0
|
0
|
|
|
|
0
|
@end_chars = qw(}) if $self->in_object; |
242
|
0
|
0
|
|
|
|
0
|
@end_chars = qw(]) if $self->in_array; |
243
|
|
|
|
|
|
|
|
244
|
0
|
|
|
|
|
0
|
my $start_chars = 0; |
245
|
|
|
|
|
|
|
|
246
|
0
|
|
|
|
|
0
|
while (1) { |
247
|
0
|
|
|
|
|
0
|
my $peek = $self->_peek_char(); |
248
|
|
|
|
|
|
|
|
249
|
0
|
0
|
|
|
|
0
|
die "Unexpected end of input\n" unless defined($peek); |
250
|
|
|
|
|
|
|
|
251
|
0
|
0
|
|
|
|
0
|
if ($peek eq '"') { |
252
|
|
|
|
|
|
|
# Use the normal string parser to skip over the |
253
|
|
|
|
|
|
|
# string so that strings containing our end_chars don't |
254
|
|
|
|
|
|
|
# cause us problems. |
255
|
0
|
|
|
|
|
0
|
$self->_parse_string(); |
256
|
0
|
|
|
|
|
0
|
next; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
0
|
0
|
0
|
|
|
0
|
if ($start_chars < 1 && grep { $_ eq $peek } @end_chars) { |
|
0
|
|
|
|
|
0
|
|
260
|
0
|
0
|
0
|
|
|
0
|
unless ($self->in_property && $peek eq '}') { |
261
|
0
|
|
|
|
|
0
|
$self->_get_char(); |
262
|
|
|
|
|
|
|
} |
263
|
0
|
|
0
|
|
|
0
|
my $skipped_value = $self->in_object || $self->in_array; |
264
|
0
|
|
|
|
|
0
|
$self->_pop_state(); |
265
|
0
|
0
|
|
|
|
0
|
$self->_set_made_value() if $skipped_value; |
266
|
0
|
|
|
|
|
0
|
return; |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
else { |
269
|
0
|
0
|
0
|
|
|
0
|
$start_chars++ if $peek eq '[' || $peek eq '{'; |
270
|
0
|
0
|
0
|
|
|
0
|
$start_chars-- if $peek eq ']' || $peek eq '}'; |
271
|
0
|
|
|
|
|
0
|
$self->_get_char(); |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub slurp { |
278
|
6
|
|
|
6
|
1
|
27
|
my ($self) = @_; |
279
|
|
|
|
|
|
|
|
280
|
6
|
50
|
|
|
|
16
|
Carp::croak("Can't slurp() on an event-based reader") if $self->is_event_based; |
281
|
|
|
|
|
|
|
|
282
|
6
|
|
|
|
|
13
|
my $start_state = $self->_state; |
283
|
6
|
|
|
|
|
10
|
my @items = (); |
284
|
6
|
|
|
|
|
7
|
my $current_item = undef; |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
my $push_item = sub { |
287
|
7
|
|
|
7
|
|
19
|
my $item = shift; |
288
|
7
|
|
|
|
|
9
|
push @items, $current_item; |
289
|
7
|
|
|
|
|
32
|
$current_item = $item; |
290
|
6
|
|
|
|
|
26
|
}; |
291
|
|
|
|
|
|
|
my $pop_item = sub { |
292
|
13
|
|
|
13
|
|
14
|
$current_item = pop @items; |
293
|
13
|
|
|
|
|
13
|
return $current_item; |
294
|
6
|
|
|
|
|
17
|
}; |
295
|
|
|
|
|
|
|
my $handle_value = sub { |
296
|
16
|
|
|
16
|
|
20
|
my ($token, $target) = @_; |
297
|
16
|
|
|
|
|
19
|
my $type = $token->[0]; |
298
|
|
|
|
|
|
|
|
299
|
16
|
100
|
100
|
|
|
22
|
if ($type eq ADD_STRING || $type eq ADD_NUMBER) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
300
|
8
|
|
|
|
|
32
|
$$target = $token->[1]; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
elsif ($type eq ADD_BOOLEAN) { |
303
|
2
|
100
|
|
|
|
12
|
$$target = $token->[1] ? \1 : \0; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
elsif ($type eq ADD_NULL) { |
306
|
1
|
|
|
|
|
4
|
$$target = undef; |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
elsif ($type eq START_OBJECT) { |
309
|
2
|
|
|
|
|
3
|
my $new_item = {}; |
310
|
2
|
|
|
|
|
4
|
$$target = $new_item; |
311
|
2
|
|
|
|
|
3
|
$push_item->($new_item); |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
elsif ($type eq START_ARRAY) { |
314
|
3
|
|
|
|
|
4
|
my $new_item = []; |
315
|
3
|
|
|
|
|
5
|
$$target = $new_item; |
316
|
3
|
|
|
|
|
7
|
$push_item->($new_item); |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
else { |
319
|
|
|
|
|
|
|
# This should actually never happen, since it should be caught |
320
|
|
|
|
|
|
|
# by the underlying raw API. |
321
|
0
|
|
|
|
|
0
|
die "Expecting a value but got a $type token\n"; |
322
|
|
|
|
|
|
|
} |
323
|
6
|
|
|
|
|
24
|
}; |
324
|
|
|
|
|
|
|
|
325
|
6
|
|
|
|
|
8
|
my $need_deref = 0; |
326
|
6
|
50
|
|
|
|
12
|
if ($self->in_array) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
327
|
0
|
|
|
|
|
0
|
$current_item = []; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
elsif ($self->in_object) { |
330
|
0
|
|
|
|
|
0
|
$current_item = {}; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
elsif ($self->in_property) { |
333
|
6
|
|
|
|
|
7
|
my $value = undef; |
334
|
6
|
|
|
|
|
8
|
$current_item = \$value; |
335
|
6
|
|
|
|
|
8
|
$need_deref = 1; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
else { |
338
|
0
|
|
|
|
|
0
|
die "Can only slurp arrays, object or properties\n"; |
339
|
|
|
|
|
|
|
} |
340
|
6
|
|
|
|
|
12
|
my $ret_item = $current_item; |
341
|
|
|
|
|
|
|
|
342
|
6
|
|
|
|
|
12
|
while (my $token = $self->get_token()) { |
343
|
31
|
|
|
|
|
35
|
my $type = $token->[0]; |
344
|
|
|
|
|
|
|
|
345
|
31
|
50
|
|
|
|
49
|
if ($type eq ERROR) { |
346
|
0
|
|
|
|
|
0
|
die $token->[1]; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
31
|
|
|
|
|
49
|
my $item_type = ref($current_item); |
350
|
|
|
|
|
|
|
|
351
|
31
|
100
|
100
|
|
|
112
|
if ($item_type eq 'SCALAR' || $item_type eq 'REF') { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# We're expecting a value |
353
|
|
|
|
|
|
|
|
354
|
16
|
100
|
|
|
|
24
|
if ($type eq END_PROPERTY) { |
355
|
8
|
|
|
|
|
14
|
$pop_item->(); |
356
|
8
|
100
|
|
|
|
22
|
last unless defined($current_item); |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
else { |
359
|
8
|
|
|
|
|
14
|
$handle_value->($token, $current_item); |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
elsif ($item_type eq 'ARRAY') { |
363
|
11
|
100
|
|
|
|
19
|
if ($type eq END_ARRAY) { |
364
|
3
|
|
|
|
|
6
|
$pop_item->(); |
365
|
3
|
50
|
|
|
|
12
|
last unless defined($current_item); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
else { |
368
|
|
|
|
|
|
|
# We're expecting a value here too, but |
369
|
|
|
|
|
|
|
# we're going to add it to the end of the |
370
|
|
|
|
|
|
|
# array instead. |
371
|
8
|
|
|
|
|
10
|
my $target = \$current_item->[scalar(@$current_item)]; |
372
|
8
|
|
|
|
|
16
|
$handle_value->($token, $target); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
elsif ($item_type eq 'HASH') { |
376
|
|
|
|
|
|
|
# We're expecting a property here. |
377
|
|
|
|
|
|
|
|
378
|
4
|
100
|
|
|
|
6
|
if ($type eq START_PROPERTY) { |
|
|
50
|
|
|
|
|
|
379
|
2
|
|
|
|
|
5
|
my $name = $token->[1]; |
380
|
2
|
|
|
|
|
6
|
my $target = \$current_item->{$name}; |
381
|
2
|
|
|
|
|
4
|
$push_item->($target); |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
elsif ($type eq END_OBJECT) { |
384
|
2
|
|
|
|
|
4
|
$pop_item->(); |
385
|
2
|
50
|
|
|
|
7
|
last unless defined($current_item); |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
else { |
388
|
0
|
|
|
|
|
0
|
die "Not expecting $type in object state\n"; |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
else { |
392
|
0
|
|
|
|
|
0
|
die "Don't know what to do with a $item_type value\n"; |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# There should be nothing in $current_item by this point. |
398
|
6
|
50
|
|
|
|
14
|
die "Unexpected end of input" if defined($current_item); |
399
|
|
|
|
|
|
|
|
400
|
6
|
50
|
|
|
|
75
|
return $need_deref ? $$ret_item : $ret_item; |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub signal_eof { |
404
|
65
|
|
|
65
|
1
|
126
|
my ($self) = @_; |
405
|
|
|
|
|
|
|
|
406
|
65
|
50
|
|
|
|
129
|
Carp::croak("Can't signal_eof on a non-event-based JSON reader") unless $self->is_event_based; |
407
|
|
|
|
|
|
|
|
408
|
65
|
|
|
|
|
221
|
$self->{stream}->signal_eof(); |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# Now feed the buffer with nothing to get it to process |
411
|
|
|
|
|
|
|
# whatever we have left in the buffer. |
412
|
65
|
|
|
|
|
87
|
my $empty = ''; |
413
|
65
|
|
|
|
|
142
|
$self->feed_buffer(\$empty); |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub feed_buffer { |
417
|
171
|
|
|
171
|
1
|
253
|
my ($self, $new_data) = @_; |
418
|
|
|
|
|
|
|
|
419
|
171
|
50
|
|
|
|
325
|
Carp::croak("Can't feed_buffer on a non-event-based JSON reader") unless $self->is_event_based; |
420
|
|
|
|
|
|
|
|
421
|
171
|
|
|
|
|
285
|
my $stream = $self->{stream}; |
422
|
|
|
|
|
|
|
|
423
|
171
|
|
|
|
|
549
|
$stream->feed_buffer($new_data); |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
# Retain the peek value so we can restore it if we roll back |
426
|
171
|
|
|
|
|
249
|
my $old_peek = $self->{peeked}; |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# Start a read transaction so we can roll back if there's a buffer underrun |
429
|
171
|
|
|
|
|
427
|
$stream->begin_reading(); |
430
|
|
|
|
|
|
|
|
431
|
171
|
|
|
|
|
229
|
my $callbacks = $self->{event_callbacks}; |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
# Now get our normal, blocking parsing code to try to read tokens until we underrun the buffer. |
434
|
171
|
|
|
|
|
213
|
eval { |
435
|
171
|
|
|
|
|
325
|
while (my $token = $self->get_token()) { |
436
|
208
|
|
|
|
|
640
|
$stream->complete_reading(); |
437
|
|
|
|
|
|
|
|
438
|
208
|
|
|
|
|
399
|
my $token_type = shift @$token; |
439
|
208
|
50
|
|
|
|
611
|
my $callback = $callbacks->{$token_type} or Carp::croak("No callback provided for $token_type tokens"); |
440
|
208
|
|
|
|
|
575
|
$callback->(@$token); |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
# Start a new transaction at the end of the last token. |
443
|
208
|
|
|
|
|
354
|
my $old_peek = $self->{peeked}; |
444
|
208
|
|
|
|
|
538
|
$stream->begin_reading(); |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
}; |
447
|
171
|
100
|
|
|
|
404
|
if ($@) { |
448
|
88
|
|
|
|
|
115
|
my $err = $@; |
449
|
88
|
50
|
33
|
|
|
401
|
if (ref($err) && $err == JSON::Streaming::Reader::EventWrapper::UNDERRUN()) { |
450
|
|
|
|
|
|
|
# Roll back and try again when we get more data. |
451
|
88
|
|
|
|
|
244
|
$stream->roll_back_reading(); |
452
|
88
|
|
|
|
|
131
|
$self->{peeked} = $old_peek; |
453
|
88
|
|
|
|
|
330
|
return; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
else { |
456
|
|
|
|
|
|
|
# Some other kind of error. Re-throw. |
457
|
0
|
|
|
|
|
0
|
die $err; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
else { |
461
|
|
|
|
|
|
|
# We hit EOF without an underrun, so we just need to clean up now. |
462
|
83
|
|
|
|
|
229
|
$stream->complete_reading(); |
463
|
83
|
50
|
|
|
|
215
|
my $callback = $callbacks->{eof} or Carp::croak("No callback provided for eof"); |
464
|
83
|
|
|
|
|
205
|
$callback->(); |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
sub _get_char { |
469
|
2313
|
|
|
2313
|
|
2939
|
my ($self) = @_; |
470
|
|
|
|
|
|
|
|
471
|
2313
|
|
|
|
|
3794
|
my $ret = $self->_peek_char(); |
472
|
2306
|
|
|
|
|
3410
|
$self->{peeked} = undef; |
473
|
2306
|
|
|
|
|
4194
|
return $ret; |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
sub _require_char { |
477
|
741
|
|
|
741
|
|
1045
|
my ($self, $required) = @_; |
478
|
|
|
|
|
|
|
|
479
|
741
|
|
|
|
|
1376
|
my $char = $self->_get_char(); |
480
|
738
|
100
|
100
|
|
|
4400
|
unless (defined($char) && $char eq $required) { |
481
|
14
|
100
|
|
|
|
111
|
die "Expected $required but encountered ".(defined($char) ? $char : 'EOF')."\n"; |
482
|
|
|
|
|
|
|
} |
483
|
724
|
|
|
|
|
1171
|
return $char; |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub _peek_char { |
487
|
5823
|
|
|
5823
|
|
6658
|
my ($self) = @_; |
488
|
|
|
|
|
|
|
|
489
|
5823
|
100
|
|
|
|
18521
|
return $self->{peeked} if defined($self->{peeked}); |
490
|
|
|
|
|
|
|
|
491
|
2743
|
|
|
|
|
3132
|
my $buf = ""; |
492
|
2743
|
|
|
|
|
8255
|
my $success = $self->{stream}->read($buf, 1); |
493
|
|
|
|
|
|
|
|
494
|
2655
|
100
|
|
|
|
24457
|
unless ($success) { |
495
|
|
|
|
|
|
|
# Assume EOF |
496
|
314
|
|
|
|
|
759
|
return undef; |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
2341
|
|
|
|
|
6488
|
return $self->{peeked} = $buf; |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub _eat_whitespace { |
503
|
1044
|
|
|
1044
|
|
1180
|
my ($self) = @_; |
504
|
|
|
|
|
|
|
|
505
|
1044
|
|
|
|
|
1071
|
while (1) { |
506
|
1406
|
|
|
|
|
2264
|
my $char = $self->_peek_char(); |
507
|
|
|
|
|
|
|
|
508
|
1341
|
100
|
|
|
|
2897
|
return if ! defined($char); |
509
|
1217
|
100
|
|
|
|
8691
|
return if $char !~ /^\s/; |
510
|
362
|
|
|
|
|
590
|
$self->_get_char(); |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
sub _get_digits { |
515
|
150
|
|
|
150
|
|
260
|
my ($self) = @_; |
516
|
|
|
|
|
|
|
|
517
|
150
|
|
|
|
|
206
|
my $accum = ""; |
518
|
|
|
|
|
|
|
|
519
|
150
|
|
100
|
|
|
265
|
while (defined($self->_peek_char()) && $self->_peek_char() =~ /\d/) { |
520
|
212
|
|
|
|
|
476
|
$accum .= $self->_get_char(); |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# We should have got at least one digit |
524
|
134
|
100
|
66
|
|
|
666
|
die "Expected digits but got ".(defined $self->_peek_char() ? $self->_peek_char() : 'EOF')."\n" unless defined($accum) && $accum ne ''; |
|
|
100
|
|
|
|
|
|
525
|
|
|
|
|
|
|
|
526
|
124
|
|
|
|
|
283
|
return $accum; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
sub _get_number_token { |
530
|
122
|
|
|
122
|
|
153
|
my ($self) = @_; |
531
|
|
|
|
|
|
|
|
532
|
122
|
|
|
|
|
180
|
my @accum = (); |
533
|
|
|
|
|
|
|
|
534
|
122
|
100
|
|
|
|
240
|
if ($self->_peek_char() eq '-') { |
535
|
9
|
|
|
|
|
25
|
push @accum, $self->_get_char(); |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
122
|
|
|
|
|
278
|
push @accum, $self->_get_digits; |
539
|
|
|
|
|
|
|
|
540
|
105
|
100
|
100
|
|
|
213
|
if (defined($self->_peek_char()) && $self->_peek_char() eq '.') { |
541
|
18
|
|
|
|
|
44
|
push @accum, $self->_get_char(); |
542
|
|
|
|
|
|
|
|
543
|
18
|
|
|
|
|
43
|
push @accum, $self->_get_digits; |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
|
547
|
98
|
100
|
100
|
|
|
285
|
if (defined($self->_peek_char()) && $self->_peek_char() =~ /e/i) { |
548
|
10
|
|
|
|
|
28
|
push @accum, $self->_get_char(); |
549
|
|
|
|
|
|
|
|
550
|
10
|
|
|
|
|
57
|
my $peek = $self->_peek_char(); |
551
|
|
|
|
|
|
|
|
552
|
10
|
100
|
100
|
|
|
82
|
if (defined($peek) && ($peek eq '+' || $peek eq '-')) { |
|
|
|
66
|
|
|
|
|
553
|
4
|
|
|
|
|
12
|
push @accum, $self->_get_char(); |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
10
|
|
|
|
|
26
|
push @accum, $self->_get_digits; |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
|
559
|
96
|
|
|
|
|
236
|
$self->_set_made_value(); |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# Join and convert to number |
562
|
|
|
|
|
|
|
# Perl's numberification conveniently does what we need here. |
563
|
96
|
|
|
|
|
192
|
return [ ADD_NUMBER, join('', @accum)+0 ]; |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
my %escape_chars = ( |
567
|
|
|
|
|
|
|
b => "\b", |
568
|
|
|
|
|
|
|
f => "\f", |
569
|
|
|
|
|
|
|
n => "\n", |
570
|
|
|
|
|
|
|
r => "\r", |
571
|
|
|
|
|
|
|
t => "\t", |
572
|
|
|
|
|
|
|
"\\" => "\\", |
573
|
|
|
|
|
|
|
"/" => "/", |
574
|
|
|
|
|
|
|
'"' => '"', |
575
|
|
|
|
|
|
|
); |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
sub _get_string_token { |
578
|
163
|
|
|
163
|
|
205
|
my ($self) = @_; |
579
|
|
|
|
|
|
|
|
580
|
163
|
|
|
|
|
536
|
$self->_require_char('"'); |
581
|
|
|
|
|
|
|
|
582
|
159
|
|
|
|
|
201
|
my $accum = ""; |
583
|
|
|
|
|
|
|
|
584
|
159
|
|
|
|
|
189
|
while (1) { |
585
|
952
|
|
|
|
|
1723
|
my $char = $self->_get_char(); |
586
|
|
|
|
|
|
|
|
587
|
948
|
100
|
|
|
|
2012
|
die "Unterminated string\n" unless defined($char); |
588
|
944
|
100
|
|
|
|
1982
|
if ($char eq '"') { |
589
|
150
|
|
|
|
|
207
|
last; |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
794
|
100
|
|
|
|
1295
|
if ($char eq "\\") { |
593
|
5
|
|
|
|
|
13
|
my $escape_char = $self->_get_char(); |
594
|
|
|
|
|
|
|
|
595
|
5
|
50
|
|
|
|
13
|
die "Unfinished escape sequence\n" unless defined($escape_char); |
596
|
|
|
|
|
|
|
|
597
|
5
|
100
|
|
|
|
20
|
if (my $replacement = $escape_chars{$escape_char}) { |
|
|
50
|
|
|
|
|
|
598
|
4
|
|
|
|
|
7
|
$accum .= $replacement; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
elsif ($escape_char eq 'u') { |
601
|
|
|
|
|
|
|
# TODO: Support this |
602
|
0
|
|
|
|
|
0
|
die "\\u sequence not yet supported\n"; |
603
|
|
|
|
|
|
|
} |
604
|
|
|
|
|
|
|
else { |
605
|
1
|
|
|
|
|
5
|
die "Invalid escape sequence \\$escape_char\n"; |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
else { |
609
|
789
|
|
|
|
|
1084
|
$accum .= $char; |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
|
613
|
150
|
|
|
|
|
293
|
$self->_set_made_value(); |
614
|
|
|
|
|
|
|
|
615
|
150
|
|
|
|
|
274
|
return [ ADD_STRING, $accum ]; |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
sub _parse_string { |
619
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
620
|
|
|
|
|
|
|
|
621
|
0
|
|
|
|
|
0
|
$self->_require_char('"'); |
622
|
|
|
|
|
|
|
|
623
|
0
|
|
|
|
|
0
|
my $accum = ""; |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
# Don't bother building the result buffer if we're called in void context |
626
|
0
|
|
|
|
|
0
|
my $want_result = defined(wantarray()); |
627
|
|
|
|
|
|
|
|
628
|
0
|
|
|
|
|
0
|
while (1) { |
629
|
0
|
|
|
|
|
0
|
my $char = $self->_get_char(); |
630
|
|
|
|
|
|
|
|
631
|
0
|
0
|
|
|
|
0
|
die "Unterminated string\n" unless defined($char); |
632
|
0
|
0
|
|
|
|
0
|
if ($char eq '"') { |
633
|
0
|
|
|
|
|
0
|
last; |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
|
636
|
0
|
0
|
|
|
|
0
|
if ($char eq "\\") { |
637
|
0
|
|
|
|
|
0
|
my $escape_char = $self->_get_char(); |
638
|
|
|
|
|
|
|
|
639
|
0
|
0
|
|
|
|
0
|
die "Unfinished escape sequence\n" unless defined($escape_char); |
640
|
|
|
|
|
|
|
|
641
|
0
|
0
|
|
|
|
0
|
if (my $replacement = $escape_chars{$escape_char}) { |
|
|
0
|
|
|
|
|
|
642
|
0
|
0
|
|
|
|
0
|
$accum .= $replacement if $want_result; |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
elsif ($escape_char eq 'u') { |
645
|
|
|
|
|
|
|
# TODO: Support this |
646
|
0
|
|
|
|
|
0
|
die "\\u sequence not yet supported\n"; |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
else { |
649
|
0
|
|
|
|
|
0
|
die "Invalid escape sequence \\$escape_char"; |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
else { |
653
|
0
|
0
|
|
|
|
0
|
$accum .= $char if $want_result; |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
} |
656
|
|
|
|
|
|
|
|
657
|
0
|
|
|
|
|
0
|
return $accum; |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
sub _push_state { |
661
|
213
|
|
|
213
|
|
268
|
my ($self) = @_; |
662
|
|
|
|
|
|
|
|
663
|
213
|
50
|
66
|
|
|
372
|
Carp::croak("Can't add anything else: JSON output is complete") if $self->_state == ROOT_STATE && $self->{used}; |
664
|
|
|
|
|
|
|
|
665
|
213
|
|
|
|
|
340
|
$self->{used} = 1; |
666
|
|
|
|
|
|
|
|
667
|
213
|
|
|
|
|
226
|
push @{$self->{state_stack}}, $self->{state}; |
|
213
|
|
|
|
|
455
|
|
668
|
|
|
|
|
|
|
|
669
|
213
|
|
|
|
|
859
|
$self->{state} = { |
670
|
|
|
|
|
|
|
in_object => 0, |
671
|
|
|
|
|
|
|
in_array => 0, |
672
|
|
|
|
|
|
|
in_property => 0, |
673
|
|
|
|
|
|
|
made_value => 0, |
674
|
|
|
|
|
|
|
}; |
675
|
|
|
|
|
|
|
|
676
|
213
|
|
|
|
|
410
|
return $self->{state}; |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
sub _pop_state { |
680
|
183
|
|
|
183
|
|
226
|
my ($self) = @_; |
681
|
|
|
|
|
|
|
|
682
|
183
|
|
|
|
|
192
|
my $state = pop @{$self->{state_stack}}; |
|
183
|
|
|
|
|
340
|
|
683
|
183
|
|
|
|
|
361
|
return $self->{state} = $state; |
684
|
|
|
|
|
|
|
} |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
sub _state { |
687
|
6078
|
|
|
6078
|
|
6857
|
my ($self) = @_; |
688
|
|
|
|
|
|
|
|
689
|
6078
|
|
|
|
|
22361
|
return $self->{state}; |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
sub in_object { |
693
|
1042
|
100
|
|
1042
|
0
|
1881
|
return $_[0]->_state->{in_object} ? 1 : 0; |
694
|
|
|
|
|
|
|
} |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
sub in_array { |
697
|
193
|
100
|
|
193
|
0
|
382
|
return $_[0]->_state->{in_array} ? 1 : 0; |
698
|
|
|
|
|
|
|
} |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
sub in_property { |
701
|
513
|
100
|
|
513
|
0
|
967
|
return $_[0]->_state->{in_property} ? 1 : 0; |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
sub made_value { |
705
|
1121
|
100
|
|
1121
|
0
|
2182
|
return $_[0]->_state->{made_value} ? 1 : 0; |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
sub done_comma { |
709
|
324
|
100
|
|
324
|
0
|
622
|
return $_[0]->_state->{done_comma} ? 1 : 0; |
710
|
|
|
|
|
|
|
} |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
sub _set_made_value { |
713
|
460
|
100
|
|
460
|
|
907
|
$_[0]->_state->{made_value} = 1 unless $_[0]->_state == ROOT_STATE; |
714
|
460
|
100
|
|
|
|
969
|
$_[0]->_state->{done_comma} = 0 unless $_[0]->_state == ROOT_STATE; |
715
|
460
|
|
|
|
|
880
|
$_[0]->{used} = 1; |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
sub _set_done_comma { |
719
|
89
|
50
|
|
89
|
|
180
|
$_[0]->_state->{done_comma} = 1 unless $_[0]->_state == ROOT_STATE; |
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
sub can_start_value { |
723
|
|
|
|
|
|
|
|
724
|
367
|
100
|
100
|
367
|
0
|
725
|
return 0 if $_[0]->in_property && $_[0]->made_value; |
725
|
|
|
|
|
|
|
|
726
|
366
|
50
|
|
|
|
758
|
return $_[0]->in_object ? 0 : 1; |
727
|
|
|
|
|
|
|
} |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
sub _expecting_property { |
730
|
0
|
0
|
|
0
|
|
0
|
return $_[0]->in_object ? 1 : 0; |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
sub is_event_based { |
734
|
331
|
|
|
331
|
0
|
1114
|
return defined($_[0]->{event_callbacks}); |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
1; |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=head1 DESCRIPTION |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
This module is effectively a tokenizer for JSON strings. With it you can process |
742
|
|
|
|
|
|
|
JSON strings in customizable ways without first creating a Perl data structure |
743
|
|
|
|
|
|
|
from the data. For some applications, such as those where the expected data |
744
|
|
|
|
|
|
|
structure is known ahead of time, this may be a more efficient way to process |
745
|
|
|
|
|
|
|
incoming data. |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
=head1 SYNOPSIS |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
my $jsonr = JSON::Streaming::Reader->for_stream($fh); |
750
|
|
|
|
|
|
|
$jsonr->process_tokens( |
751
|
|
|
|
|
|
|
start_object => sub { |
752
|
|
|
|
|
|
|
... |
753
|
|
|
|
|
|
|
}, |
754
|
|
|
|
|
|
|
end_object => sub { |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
}, |
757
|
|
|
|
|
|
|
start_property => sub { |
758
|
|
|
|
|
|
|
my ($name) = @_; |
759
|
|
|
|
|
|
|
}, |
760
|
|
|
|
|
|
|
# ... |
761
|
|
|
|
|
|
|
); |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
=head1 CREATING A NEW INSTANCE |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
This module can operate on either an L instance or a string. |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
=head2 JSON::Streaming::Reader->for_stream($fh) |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
Create a new instance that will read from the provided L instance. |
770
|
|
|
|
|
|
|
If you want to operate on a raw Perl filehandle, you currently must wrap it up in |
771
|
|
|
|
|
|
|
an IO::Handle instance yourself. |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
=head2 JSON::Streaming::Reader->for_string(\$string) |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
Create a new instance that will read from the provided string. Uses L |
776
|
|
|
|
|
|
|
to make a stream-like wrapper around the string, and passes it into C. |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
=head1 CALLBACK API |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
The recommended way to use this library is via the callback-based API. In this |
781
|
|
|
|
|
|
|
API you make a single method call on the reader object and pass it a CODE ref |
782
|
|
|
|
|
|
|
for each token type. The reader object will then consume the entire stream |
783
|
|
|
|
|
|
|
and call the callback responding to the type of each token it encounters. |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
An error token will be raised if an error is encountered during parsing. |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
For tokens that themselves have data, the data items will be passed in as arguments |
788
|
|
|
|
|
|
|
to the callback. |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
The handlers for the C, C and C tokens |
791
|
|
|
|
|
|
|
may use the C method from the pull API, as described below, to avoid processing |
792
|
|
|
|
|
|
|
the remainder of the corresponding container. |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
=head2 $jsonr->process_tokens(%callbacks) |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
Read the whole stream and call a callback corresponding to each token encountered. |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=head1 PULL API |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
A lower-level API is provided that allows the caller to pull single tokens |
801
|
|
|
|
|
|
|
from the stream as necessary. The callback API is implemented in terms of the |
802
|
|
|
|
|
|
|
pull API. |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
=head2 $jsonr->get_token() |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
Get the next token from the stream and advance. If the end of the stream is reached, this |
807
|
|
|
|
|
|
|
will return C. Otherwise it returns an ARRAY ref whose first member is the |
808
|
|
|
|
|
|
|
token type and its subsequent members are the token type's data items, if any. |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=head2 $jsonr->skip() |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
Quickly skip to the end of the current container. This can be used after a C, |
813
|
|
|
|
|
|
|
C or C token is retrieved to signal that the remainder of the |
814
|
|
|
|
|
|
|
container is not actually required. The next call to get_token will return the token |
815
|
|
|
|
|
|
|
that comes after the corresponding C token for the current container. The corresponding |
816
|
|
|
|
|
|
|
C token is never returned. |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
This is most useful for skipping over unrecognised properties when populating a known |
819
|
|
|
|
|
|
|
data structure. |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
It is better to use this method than to implement skipping in the caller because skipping |
822
|
|
|
|
|
|
|
is done using a lightweight mechanism that does not need to allocate additional memory |
823
|
|
|
|
|
|
|
for tokens encountered during skipping. However, since this method uses a simpler |
824
|
|
|
|
|
|
|
state model it may cause less-intuitive error messages to be raised if there is a |
825
|
|
|
|
|
|
|
JSON syntax error within the content that is skipped. |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Note that errors encountered during skip are actually raised via C rather than |
828
|
|
|
|
|
|
|
via the return value as with C. |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
=head2 $jsonr->slurp() |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
Skip to the end of the current container, capturing its value. |
833
|
|
|
|
|
|
|
This allows you to handle a C, |
834
|
|
|
|
|
|
|
C or C token as if it were an C-type token, |
835
|
|
|
|
|
|
|
dealing with its entire contents in one go. |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
The next call to get_token will return the token |
838
|
|
|
|
|
|
|
that comes after the corresponding C token for the current container. The corresponding |
839
|
|
|
|
|
|
|
C token is never returned. |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
The return value of this method call will be a Perl data structure |
842
|
|
|
|
|
|
|
representing the data that was skipped. This uses the same mappings as other |
843
|
|
|
|
|
|
|
popular Perl JSON libraries: objects become hashrefs, arrays become arrayrefs, |
844
|
|
|
|
|
|
|
strings and integers become scalars, boolean values become references to either |
845
|
|
|
|
|
|
|
1 or 0, and null becomes undef. |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
This is useful if there is a part of the tree that you would rather handle |
848
|
|
|
|
|
|
|
via an in-memory data structure like you'd get from a non-streaming JSON parser. |
849
|
|
|
|
|
|
|
It allows you to mix-and-match streaming parsing and one-shot parsing |
850
|
|
|
|
|
|
|
within a single data stream. |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
Note that errors encountered during skip are actually raised via C rather than |
853
|
|
|
|
|
|
|
via the return value as with C. |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
If you call this when in property state it will return the value of the property |
856
|
|
|
|
|
|
|
and parsing will continue after the corresponding C. In object or |
857
|
|
|
|
|
|
|
array state it will return the object or array and continue after the corresponding |
858
|
|
|
|
|
|
|
C or C. |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
=head1 EVENT-BASED API |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
This module has an experimental event-based API which can be used to |
863
|
|
|
|
|
|
|
do streaming JSON processing in event-driven applications or those |
864
|
|
|
|
|
|
|
which do non-blocking I/O. |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
In event-based mode it is the caller's responsibility to obtain data and |
867
|
|
|
|
|
|
|
when data is available provide it to the reader for processing. When |
868
|
|
|
|
|
|
|
enough data is available to unambigously represent a complete, atomic token |
869
|
|
|
|
|
|
|
a callback function is called in a similar fashion to the callback-based API |
870
|
|
|
|
|
|
|
described above. |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
The event-based API implementation is currently somewhat hacky and |
873
|
|
|
|
|
|
|
inefficient. Caution is advised when making use of it in production |
874
|
|
|
|
|
|
|
applications, since it is currently merely a shim over the existing |
875
|
|
|
|
|
|
|
blocking API which may introduce strange packet-boundary bugs |
876
|
|
|
|
|
|
|
and other misbehavior. |
877
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
=head2 JSON::Streaming::Reader->event_based(%callbacks) |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
Creates and returns an event-based reader. Callbacks are provided in the same way |
881
|
|
|
|
|
|
|
as to the C method in the callback-based API, though |
882
|
|
|
|
|
|
|
here there is an additional pseudo-token type called 'eof' which |
883
|
|
|
|
|
|
|
signals that the end of the stream has been reached. |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
Note that at present it is not possible to use the C or C methods |
886
|
|
|
|
|
|
|
on an event-based reader, since their implementations expect |
887
|
|
|
|
|
|
|
to be able to block. This ought to be fixed in a future version. |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
=head2 $jsonr->feed_buffer(\$data) |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
The caller must call this method whenever new data becomes available |
892
|
|
|
|
|
|
|
for processing. A call to this method causes the reader to append |
893
|
|
|
|
|
|
|
the supplied data to any existing buffer and then try to consume as |
894
|
|
|
|
|
|
|
many tokens as possible from the buffer before returning. A callback |
895
|
|
|
|
|
|
|
will be run for each complete token encountered in the buffer. |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
If the additional data does not allow a complete token to be recognised, |
898
|
|
|
|
|
|
|
the reader will retain the leftover buffer and attempt parsing again |
899
|
|
|
|
|
|
|
at the next call to C. |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
In most cases this method will be called in response to some event, |
902
|
|
|
|
|
|
|
such as a notification that more data is available to read on a socket. |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
=head2 $jsonr->signal_eof() |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
The caller must call this method to signal the end of the data stream. |
907
|
|
|
|
|
|
|
This will cause the parser to process any remaining bytes in the buffer, |
908
|
|
|
|
|
|
|
possibly running token callbacks in the process, and then call the |
909
|
|
|
|
|
|
|
special eof callback. |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
In most cases this method will be called in response to some event, |
912
|
|
|
|
|
|
|
such as a notification that a socket stream has been closed. |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
=head1 TOKEN TYPES |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
There are two major classes of token types. Bracketing tokens enclose other tokens |
917
|
|
|
|
|
|
|
and come in pairs, named with C and C prefixes. Leaf tokens stand alone |
918
|
|
|
|
|
|
|
and have C prefixes. |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
For convenience the token type names match the method names used in the "raw" API |
921
|
|
|
|
|
|
|
of L, so it is straightforward to implement a streaming JSON |
922
|
|
|
|
|
|
|
normalizer by feeding the output from this module into the corresponding methods on that module. |
923
|
|
|
|
|
|
|
However, this module does have an additional special token type 'error' which is used |
924
|
|
|
|
|
|
|
to indicate tokenizing errors and does not have a corresponding method on the writer. |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
=head2 start_object, end_object |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
These token types delimit a JSON object. In a valid JSON stream an object will contain |
929
|
|
|
|
|
|
|
only properties as direct children, which will result in start_property and end_property tokens. |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
=head2 start_array, end_array |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
These token types delimit a JSON array. In a valid JSON stream an object will contain |
934
|
|
|
|
|
|
|
only values as direct children, which will result in one of the value token types described |
935
|
|
|
|
|
|
|
below. |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
=head2 start_property($name), end_property |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
These token types delimit a JSON property. The name of the property is given as an argument. |
940
|
|
|
|
|
|
|
In a valid JSON stream a start_property token will always be followed by one of the value |
941
|
|
|
|
|
|
|
token types which will itself be immediately followed by an end_property token. |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
=head2 add_string($value) |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
Represents a JSON string. The value of the string is passed as an argument. |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
=head2 add_number($value) |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
Represents a JSON number. The value of the number is passed as an argument. |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
=head2 add_boolean($value) |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
Represents a JSON boolean. If it's C then 1 is passed as an argument, or if C 0 is passed. |
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
=head2 add_null |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
Represents a JSON null. |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=head2 error($string) |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
Indicates a tokenization error. A human-readable description of the error is included in $string. |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
=head1 STREAM BUFFERING |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
Except in event-based mode, this module doesn't do any buffering. |
966
|
|
|
|
|
|
|
It expects the underlying stream to do appropriate read buffering |
967
|
|
|
|
|
|
|
if necessary. |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
In event-based mode an internal buffer is used which retains |
970
|
|
|
|
|
|
|
bytes that are not yet enough to unambiguously represent |
971
|
|
|
|
|
|
|
a complete token so that it can retry when more data is available. |
972
|
|
|
|
|
|
|
In this situation it is up to the caller to read from its |
973
|
|
|
|
|
|
|
data source in an appropriate manner, but it is best to provide |
974
|
|
|
|
|
|
|
as much data as possible in a single data notification. |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
=head1 LICENSE |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
Copyright 2009 Martin Atkins . |
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
981
|
|
|
|
|
|
|
the same terms as Perl itself. |