File Coverage

lib/mb/JSON.pm

Criterion	Covered	Total	%
statement	33	55	60.0
branch	28	36	77.7
condition			n/a
subroutine	3	4	75.0
pod	0	2	0.0
total	64	97	65.9

line	stmt	bran	sub	pod	time	code
1						package mb::JSON;
2						######################################################################
3						#
4						# mb::JSON - a simple JSON parser for multibyte string
5						#
6						# http://search.cpan.org/dist/mb-JSON/
7						#
8						# Copyright (c) 2021 INABA Hitoshi in a CPAN
9						######################################################################
10
11	1		1		3602	use 5.00503; # Universal Consensus 1998 for primetools
	1				4
12						# use 5.008001; # Lancaster Consensus 2013 for toolchains
13
14						$VERSION = '0.01';
15						$VERSION = $VERSION;
16
17	1		1		6	use strict;
	1				3
	1				955
18
19						#---------------------------------------------------------------------
20						# UTF-8
21						my $utf8 = join '', qw{
22						[\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] \|
23						[\xC2-\xDF][\x80-\xBF] \|
24						[\xE0-\xE0][\xA0-\xBF][\x80-\xBF] \|
25						[\xE1-\xEC][\x80-\xBF][\x80-\xBF] \|
26						[\xED-\xED][\x80-\x9F][\x80-\xBF] \|
27						[\xEE-\xEF][\x80-\xBF][\x80-\xBF] \|
28						[\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] \|
29						[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] \|
30						[\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] \|
31						[\x00-\xFF]
32						};
33
34						#---------------------------------------------------------------------
35						# confess() for this module
36						sub confess {
37	0		0	0	0	my $i = 0;
38	0				0	my @confess = ();
39	0				0	while (my($package,$filename,$line,$subroutine) = caller($i)) {
40	0				0	push @confess, "[$i] $filename($line) $subroutine\n";
41	0				0	$i++;
42						}
43	0				0	print STDERR "\n", @_, "\n";
44	0				0	print STDERR CORE::reverse @confess;
45	0				0	die;
46						}
47
48						#---------------------------------------------------------------------
49						# parse JSON data
50						sub parse {
51	42	50	42	0	1354	local $_ = @_ ? $_[0] : $_;
52	42	50			97	my $U0 = ($] =~ /^5\.006/) ? 'U0' : '';
53	42				59	my $parsed = '';
54	42				132	while (not /\G \z/xmsgc) {
55
56						# beginning of JSON's string --> beginning of Perl's string
57	172	100			842	if (/\G (") /xmsgc) {
		100
		100
		100
		50
58	41				80	$parsed .= $1;
59
60	41				47	while (1) {
61
62						#-------------------------------------------------------------------------------
63						# end of JSON's string then ":" --> Perl's hash key
64						#-------------------------------------------------------------------------------
65						# An object structure is represented as a pair of curly brackets
66						# surrounding zero or more name/value pairs (or members). A name is a
67						# string. A single colon comes after each name, separating the name
68						# from the value. A single comma separates a value from a following
69						# name. The names within an object SHOULD be unique.
70						#-------------------------------------------------------------------------------
71
72	82	100			399	if (/\G ( " \s* ) : /xmsgc) {
		100
		100
		100
		100
		100
		50
73	21				39	$parsed .= "$1,";
74	21				92	last;
75						}
76
77						# end of JSON's string --> end of Perl's string
78						elsif (/\G (") /xmsgc) {
79	20				28	$parsed .= $1;
80	20				51	last;
81						}
82
83						#-------------------------------------------------------------------------------
84						# UTF-16 surrogate pair
85						#-------------------------------------------------------------------------------
86						# To escape an extended character that is not in the Basic Multilingual
87						# Plane, the character is represented as a 12-character sequence,
88						# encoding the UTF-16 surrogate pair. So, for example, a string
89						# containing only the G clef character (U+1D11E) may be represented as
90						# "\uD834\uDD1E".
91						#
92						# TIPS: in Perl, \u in a "string" means ucfirst(), so must be \\u
93						# TIPS: Don't use /i modifier, because \U is not \u
94						#-------------------------------------------------------------------------------
95
96						elsif (/\G \\u ([Dd][89ABab][0-9A-Fa-f][0-9A-Fa-f]) \\u ([Dd][CDEFcdef][0-9A-Fa-f][0-9A-Fa-f]) /xmsgc) {
97	2				6	my $high_surrogate = hex $1;
98	2				3	my $low_surrogate = hex $2;
99	2				6	my $unicode = 0x10000 + ($high_surrogate - 0xD800) * 0x400 + ($low_surrogate - 0xDC00);
100	2	50			4	if (0) { }
101	0				0	elsif ($unicode < 0x110000) { $parsed .= pack($U0.'C*', $unicode>>18\|0xF0, $unicode>>12&0x3F\|0x80, $unicode>>6&0x3F\|0x80, $unicode&0x3F\|0x80) }
	2				10
102	0				0	else { confess qq{@{[__FILE__]}: \\u{$1} is out of Unicode (0x0000 to 0xFFFF)}; }
	0				0
103						}
104
105						#-------------------------------------------------------------------------------
106						# any BMP UTF-16 codepoint
107						#-------------------------------------------------------------------------------
108						# If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF),
109						# then it may be represented as a six-character sequence: a reverse solidus,
110						# followed by the lowercase letter u, followed by four hexadecimal digits that
111						# encode the character's code point. The hexadecimal letters A through F can
112						# be uppercase or lowercase.
113						#-------------------------------------------------------------------------------
114
115						elsif (/\G \\u ([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]) /xmsgc) {
116	2				6	my $unicode = hex $1;
117	2	50			11	if (0) { }
		50
		50
118	0				0	elsif ($unicode < 0x80) { $parsed .= pack($U0.'C*', $unicode ) }
	0				0
119	0				0	elsif ($unicode < 0x800) { $parsed .= pack($U0.'C*', $unicode>>6 \|0xC0, $unicode&0x3F\|0x80) }
120	2				16	elsif ($unicode < 0x10000) { $parsed .= pack($U0.'C*', $unicode>>12 \|0xE0, $unicode>>6&0x3F\|0x80, $unicode&0x3F\|0x80) }
121	0				0	else { confess qq{@{[__FILE__]}: \\u{$1} is out of Unicode (0x0000 to 0xFFFF)}; }
	0				0
122						}
123
124						#-------------------------------------------------------------------------------
125						# two-character sequence escape representations
126						#-------------------------------------------------------------------------------
127						# Alternatively, there are two-character sequence escape representations
128						# of some popular characters. So, for example, a string containing only
129						# a single reverse solidus character may be represented more compactly
130						# as "\\".
131						#
132						# \" quotation mark U+0022
133						# \\ reverse solidus U+005C
134						# \/ solidus U+002F
135						# \b backspace U+0008
136						# \f form feed U+000C
137						# \n line feed U+000A
138						# \r carriage return U+000D
139						# \t tab U+0009
140						#-------------------------------------------------------------------------------
141
142						elsif (m{\G (\\["\\/bfnrt]) }xmsgc) {
143	8				14	$parsed .= $1;
144						}
145
146						# escape $ and @ to avoid interpolation on eval() of Perl
147						elsif (/\G ([\$\@]) /xmsgc) {
148	2				6	$parsed .= "\\$1";
149						}
150
151						# other all UTF-8 codepoints
152						elsif (/\G ($utf8) /xmsgc) {
153	27				61	$parsed .= $1;
154						}
155
156						# panic inside "string"
157						else {
158	0				0	confess sprintf(<
159	0				0	@{[__FILE__]}: JSON data makes panic; (maybe @{[__FILE__]} has bug(s))
	0				0
160						%s
161						END
162						}
163						}
164						}
165
166						# JSON's "null" --> Perl's "undef"
167						elsif (/\G null \b/xmsgc) {
168	1				5	$parsed .= 'undef';
169						}
170
171						# JSON's boolean "true" --> Perl's "1"
172						elsif (/\G true \b/xmsgc) {
173	1				5	$parsed .= '1';
174						}
175
176						# JSON's boolean "false" --> Perl's "0"
177						elsif (/\G false \b/xmsgc) {
178	1				4	$parsed .= '0';
179						}
180
181						# other all UTF-8 codepoints
182						elsif (/\G ($utf8) /xmsgc) {
183	128				387	$parsed .= $1;
184						}
185
186						# panic outside "string"
187						else {
188	0				0	confess sprintf(<
189	0				0	@{[__FILE__]}: JSON data makes panic; (maybe @{[__FILE__]} has bug(s))
	0				0
190						%s
191						END
192						}
193						}
194
195						# return as Perl data without UTF-8 flag
196	42				1750	return eval $parsed;
197						}
198
199						1;
200
201						__END__