line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package mb::JSON; |
2
|
|
|
|
|
|
|
###################################################################### |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# mb::JSON - a simple JSON parser for multibyte string |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# http://search.cpan.org/dist/mb-JSON/ |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright (c) 2021, 2022 INABA Hitoshi in a CPAN |
9
|
|
|
|
|
|
|
###################################################################### |
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
3592
|
use 5.00503; # Universal Consensus 1998 for primetools |
|
1
|
|
|
|
|
4
|
|
12
|
|
|
|
|
|
|
# use 5.008001; # Lancaster Consensus 2013 for toolchains |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
$VERSION = '0.03'; |
15
|
|
|
|
|
|
|
$VERSION = $VERSION; |
16
|
|
|
|
|
|
|
|
17
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
979
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
20
|
|
|
|
|
|
|
# UTF-8 |
21
|
|
|
|
|
|
|
my $utf8 = join '', qw{ |
22
|
|
|
|
|
|
|
[\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] | |
23
|
|
|
|
|
|
|
[\xC2-\xDF][\x80-\xBF] | |
24
|
|
|
|
|
|
|
[\xE0-\xE0][\xA0-\xBF][\x80-\xBF] | |
25
|
|
|
|
|
|
|
[\xE1-\xEC][\x80-\xBF][\x80-\xBF] | |
26
|
|
|
|
|
|
|
[\xED-\xED][\x80-\x9F][\x80-\xBF] | |
27
|
|
|
|
|
|
|
[\xEE-\xEF][\x80-\xBF][\x80-\xBF] | |
28
|
|
|
|
|
|
|
[\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | |
29
|
|
|
|
|
|
|
[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | |
30
|
|
|
|
|
|
|
[\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | |
31
|
|
|
|
|
|
|
[\x00-\xFF] |
32
|
|
|
|
|
|
|
}; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
35
|
|
|
|
|
|
|
# confess() for this module |
36
|
|
|
|
|
|
|
sub confess { |
37
|
0
|
|
|
0
|
0
|
0
|
my $i = 0; |
38
|
0
|
|
|
|
|
0
|
my @confess = (); |
39
|
0
|
|
|
|
|
0
|
while (my($package,$filename,$line,$subroutine) = caller($i)) { |
40
|
0
|
|
|
|
|
0
|
push @confess, "[$i] $filename($line) $subroutine\n"; |
41
|
0
|
|
|
|
|
0
|
$i++; |
42
|
|
|
|
|
|
|
} |
43
|
0
|
|
|
|
|
0
|
print STDERR "\n", @_, "\n"; |
44
|
0
|
|
|
|
|
0
|
print STDERR CORE::reverse @confess; |
45
|
0
|
|
|
|
|
0
|
die; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
49
|
|
|
|
|
|
|
# parse JSON data |
50
|
|
|
|
|
|
|
sub mb::JSON::parse { |
51
|
42
|
50
|
|
42
|
0
|
1476
|
local $_ = @_ ? $_[0] : $_; |
52
|
42
|
50
|
|
|
|
103
|
my $U0 = ($] =~ /^5\.006/) ? 'U0' : ''; |
53
|
42
|
|
|
|
|
59
|
my $parsed = ''; |
54
|
42
|
|
|
|
|
146
|
while (not /\G \z/xmsgc) { |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# beginning of JSON's string --> beginning of Perl's string |
57
|
172
|
100
|
|
|
|
819
|
if (/\G (") /xmsgc) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
58
|
41
|
|
|
|
|
102
|
$parsed .= $1; |
59
|
|
|
|
|
|
|
|
60
|
41
|
|
|
|
|
52
|
while (1) { |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
63
|
|
|
|
|
|
|
# end of JSON's string then ":" --> Perl's hash key |
64
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
65
|
|
|
|
|
|
|
# An object structure is represented as a pair of curly brackets |
66
|
|
|
|
|
|
|
# surrounding zero or more name/value pairs (or members). A name is a |
67
|
|
|
|
|
|
|
# string. A single colon comes after each name, separating the name |
68
|
|
|
|
|
|
|
# from the value. A single comma separates a value from a following |
69
|
|
|
|
|
|
|
# name. The names within an object SHOULD be unique. |
70
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
71
|
|
|
|
|
|
|
|
72
|
82
|
100
|
|
|
|
395
|
if (/\G ( " \s* ) : /xmsgc) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
73
|
21
|
|
|
|
|
33
|
$parsed .= "$1,"; |
74
|
21
|
|
|
|
|
44
|
last; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# end of JSON's string --> end of Perl's string |
78
|
|
|
|
|
|
|
elsif (/\G (") /xmsgc) { |
79
|
20
|
|
|
|
|
32
|
$parsed .= $1; |
80
|
20
|
|
|
|
|
51
|
last; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
84
|
|
|
|
|
|
|
# UTF-16 surrogate pair |
85
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
86
|
|
|
|
|
|
|
# To escape an extended character that is not in the Basic Multilingual |
87
|
|
|
|
|
|
|
# Plane, the character is represented as a 12-character sequence, |
88
|
|
|
|
|
|
|
# encoding the UTF-16 surrogate pair. So, for example, a string |
89
|
|
|
|
|
|
|
# containing only the G clef character (U+1D11E) may be represented as |
90
|
|
|
|
|
|
|
# "\uD834\uDD1E". |
91
|
|
|
|
|
|
|
# |
92
|
|
|
|
|
|
|
# TIPS: in Perl, \u in a "string" means ucfirst(), so must be \\u |
93
|
|
|
|
|
|
|
# TIPS: Don't use /i modifier, because \U is not \u |
94
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
elsif (/\G \\u ([Dd][89ABab][0-9A-Fa-f][0-9A-Fa-f]) \\u ([Dd][CDEFcdef][0-9A-Fa-f][0-9A-Fa-f]) /xmsgc) { |
97
|
2
|
|
|
|
|
6
|
my $high_surrogate = hex $1; |
98
|
2
|
|
|
|
|
3
|
my $low_surrogate = hex $2; |
99
|
2
|
|
|
|
|
6
|
my $unicode = 0x10000 + ($high_surrogate - 0xD800) * 0x400 + ($low_surrogate - 0xDC00); |
100
|
2
|
50
|
|
|
|
6
|
if (0) { } |
101
|
0
|
|
|
|
|
0
|
elsif ($unicode < 0x110000) { $parsed .= pack($U0.'C*', $unicode>>18|0xF0, $unicode>>12&0x3F|0x80, $unicode>>6&0x3F|0x80, $unicode&0x3F|0x80) } |
|
2
|
|
|
|
|
11
|
|
102
|
0
|
|
|
|
|
0
|
else { confess qq{@{[__FILE__]}: \\u{$1} is out of Unicode (0x0000 to 0xFFFF)}; } |
|
0
|
|
|
|
|
0
|
|
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
106
|
|
|
|
|
|
|
# any BMP UTF-16 codepoint |
107
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
108
|
|
|
|
|
|
|
# If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF), |
109
|
|
|
|
|
|
|
# then it may be represented as a six-character sequence: a reverse solidus, |
110
|
|
|
|
|
|
|
# followed by the lowercase letter u, followed by four hexadecimal digits that |
111
|
|
|
|
|
|
|
# encode the character's code point. The hexadecimal letters A through F can |
112
|
|
|
|
|
|
|
# be uppercase or lowercase. |
113
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
elsif (/\G \\u ([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]) /xmsgc) { |
116
|
2
|
|
|
|
|
21
|
my $unicode = hex $1; |
117
|
2
|
50
|
|
|
|
10
|
if (0) { } |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
118
|
0
|
|
|
|
|
0
|
elsif ($unicode < 0x80) { $parsed .= pack($U0.'C*', $unicode ) } |
|
0
|
|
|
|
|
0
|
|
119
|
0
|
|
|
|
|
0
|
elsif ($unicode < 0x800) { $parsed .= pack($U0.'C*', $unicode>>6 |0xC0, $unicode&0x3F|0x80) } |
120
|
2
|
|
|
|
|
14
|
elsif ($unicode < 0x10000) { $parsed .= pack($U0.'C*', $unicode>>12 |0xE0, $unicode>>6&0x3F|0x80, $unicode&0x3F|0x80) } |
121
|
0
|
|
|
|
|
0
|
else { confess qq{@{[__FILE__]}: \\u{$1} is out of Unicode (0x0000 to 0xFFFF)}; } |
|
0
|
|
|
|
|
0
|
|
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
125
|
|
|
|
|
|
|
# two-character sequence escape representations |
126
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
127
|
|
|
|
|
|
|
# Alternatively, there are two-character sequence escape representations |
128
|
|
|
|
|
|
|
# of some popular characters. So, for example, a string containing only |
129
|
|
|
|
|
|
|
# a single reverse solidus character may be represented more compactly |
130
|
|
|
|
|
|
|
# as "\\". |
131
|
|
|
|
|
|
|
# |
132
|
|
|
|
|
|
|
# \" quotation mark U+0022 |
133
|
|
|
|
|
|
|
# \\ reverse solidus U+005C |
134
|
|
|
|
|
|
|
# \/ solidus U+002F |
135
|
|
|
|
|
|
|
# \b backspace U+0008 |
136
|
|
|
|
|
|
|
# \f form feed U+000C |
137
|
|
|
|
|
|
|
# \n line feed U+000A |
138
|
|
|
|
|
|
|
# \r carriage return U+000D |
139
|
|
|
|
|
|
|
# \t tab U+0009 |
140
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
elsif (m{\G (\\["\\/bfnrt]) }xmsgc) { |
143
|
8
|
|
|
|
|
13
|
$parsed .= $1; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# escape $ and @ to avoid interpolation on eval() of Perl |
147
|
|
|
|
|
|
|
elsif (/\G ([\$\@]) /xmsgc) { |
148
|
2
|
|
|
|
|
5
|
$parsed .= "\\$1"; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# other all UTF-8 codepoints |
152
|
|
|
|
|
|
|
elsif (/\G ($utf8) /xmsgc) { |
153
|
27
|
|
|
|
|
56
|
$parsed .= $1; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# panic inside "string" |
157
|
|
|
|
|
|
|
else { |
158
|
0
|
|
|
|
|
0
|
confess sprintf(<
|
159
|
0
|
|
|
|
|
0
|
@{[__FILE__]}: JSON data makes panic; (maybe @{[__FILE__]} has bug(s)) |
|
0
|
|
|
|
|
0
|
|
160
|
|
|
|
|
|
|
%s |
161
|
|
|
|
|
|
|
END |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# JSON's "null" --> Perl's "undef" |
167
|
|
|
|
|
|
|
elsif (/\G null \b/xmsgc) { |
168
|
1
|
|
|
|
|
5
|
$parsed .= 'undef'; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# JSON's boolean "true" --> Perl's "1" |
172
|
|
|
|
|
|
|
elsif (/\G true \b/xmsgc) { |
173
|
1
|
|
|
|
|
4
|
$parsed .= '!!1'; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# JSON's boolean "false" --> Perl's "0" |
177
|
|
|
|
|
|
|
elsif (/\G false \b/xmsgc) { |
178
|
1
|
|
|
|
|
4
|
$parsed .= '!!0'; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# other all UTF-8 codepoints |
182
|
|
|
|
|
|
|
elsif (/\G ($utf8) /xmsgc) { |
183
|
128
|
|
|
|
|
381
|
$parsed .= $1; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# panic outside "string" |
187
|
|
|
|
|
|
|
else { |
188
|
0
|
|
|
|
|
0
|
confess sprintf(<
|
189
|
0
|
|
|
|
|
0
|
@{[__FILE__]}: JSON data makes panic; (maybe @{[__FILE__]} has bug(s)) |
|
0
|
|
|
|
|
0
|
|
190
|
|
|
|
|
|
|
%s |
191
|
|
|
|
|
|
|
END |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# return as Perl data without UTF-8 flag |
196
|
42
|
|
|
|
|
1783
|
return eval $parsed; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
1; |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
__END__ |