File Coverage

blib/lib/TOML/Tiny/Tokenizer.pm

Criterion	Covered	Total	%
statement	124	130	95.3
branch	22	24	91.6
condition	24	29	82.7
subroutine	16	18	88.8
pod	0	12	0.0
total	186	213	87.3

line	stmt	bran	cond	sub	pod	time	code
1							# ABSTRACT: tokenizer used by TOML::Tiny
2							$TOML::Tiny::Tokenizer::VERSION = '0.15';
3							use strict;
4	286			286		1746	use warnings;
	286					544
	286					7315
5	286			286		1228	no warnings qw(experimental);
	286					542
	286					7659
6	286			286		1262	use charnames qw(:full);
	286					525
	286					8881
7	286			286		116464	use v5.18;
	286					2280000
	286					1584
8	286			286		52663
	286					858
9							use TOML::Tiny::Grammar;
10	286			286		1374
	286					509
	286					512121
11							my ($class, %param) = @_;
12
13	424			424	0	1384	my $self = bless{
14							source => $param{source},
15							last_position => length $param{source},
16							position => 0,
17							line => 1,
18	424					2797	last_token => undef,
19							}, $class;
20
21							return $self;
22							}
23	424					1357
24							my $self = shift;
25							return $self->{last_token};
26							}
27	0			0	0	0
28	0					0	my $self = shift;
29
30							return unless defined $self->{source}
31							&& $self->{position} < $self->{last_position};
32	6358			6358	0	7494
33							if (!$self->{last_token}) {
34							return $self->{last_token} = {type => 'table', pos => 0, line => 1, value => []};
35	6358	100	66			18123	}
36
37	6114	100				10047	# Update the regex engine's position marker in case some other regex
38	422					2707	# attempted to match against the source string and reset it.
39							pos($self->{source}) = $self->{position};
40
41							my $token;
42							my $type;
43	5692					11749	my $value;
44
45	5692					11051	state $key_set = qr/\G ($Key) $WS* (?= =)/x;
46							state $table = qr/\G \[ $WS* ($Key) $WS* \] $WS* (?:$EOL \| $)/x;
47	5692					0	state $array_table = qr/\G \[\[ $WS* ($Key) $WS* \]\] $WS* (?:$EOL \| $)/x;
48
49	5692					31564	state $simple = {
50	5692					138996	'[' => 'inline_array',
51	5692					118037	']' => 'inline_array_close',
52							'{' => 'inline_table',
53	5692					99159	'}' => 'inline_table_close',
54							',' => 'comma',
55							'=' => 'assign',
56							'true' => 'bool',
57							'false' => 'bool',
58							};
59
60							# More complex matches with regexps
61							while ($self->{position} < $self->{last_position} && !defined($type)) {
62							my $prev = $self->{last_token} ? $self->{last_token}{type} : 'EOL';
63							my $newline = !!($prev eq 'EOL' \|\| $prev eq 'table' \|\| $prev eq 'array_table');
64
65	5692		100			15986	for ($self->{source}) {
66	5888	50				10438	/\G$WS+/gc; # ignore whitespace
67	5888		100			16882	/\G$Comment$/mgc && next; # ignore comments
68
69	5888					8196	last when /\G$/gc;
70	5888					23879
71	5888	100				20648	when (/\G$EOL/gc) {
72							++$self->{line};
73	5788					9896	$type = 'EOL';
74							}
75	5689					24348
76	1323					1994	if ($newline) {
77	1323					2844	when (/$table/gc) {
78							$type = 'table';
79							$value = $self->tokenize_key($1);
80	4366	100				7920	}
81	1557					7580
82	233					362	when (/$array_table/gc) {
83	233					491	$type = 'array_table';
84							$value = $self->tokenize_key($1);
85							}
86	1324					4872	}
87	74					136
88	74					159	when (/$key_set/gc) {
89							$type = 'key';
90							$value = $1;
91							}
92	4059					19146
93	1059					1811	when (/\G ( [\[\]{}=,] \| true \| false )/xgc) {
94	1059					3452	$value = $1;
95							$type = $simple->{$value};
96							}
97	3000					6529
98	1852					3360	when (/\G($String)/gc) {
99	1852					5168	$type = 'string';
100							$value = $1;
101							}
102	1148					40150
103	461					840	when (/\G($DateTime)/gc) {
104	461					1645	$type = 'datetime';
105							$value = $1;
106							}
107	687					92280
108	55					127	when (/\G($Float)/gc) {
109	55					259	$type = 'float';
110							$value = $1;
111							}
112	632					15964
113	101					169	when (/\G($Integer)/gc) {
114	101					403	$type = 'integer';
115							$value = $1;
116							}
117	531					9342
118	422					853	default{
119	422					1744	my $substr = substr($self->{source}, $self->{position}, 30) // 'undef';
120							die "toml syntax error on line $self->{line}\n\t-->\|$substr\|\n";
121							}
122	109					336	}
123	109		50			629
124	109					1326	if ($type) {
125							state $tokenizers = {};
126							my $tokenize = $tokenizers->{$type} //= $self->can("tokenize_$type") \|\| 0;
127
128	5779	100				9334	$token = {
129	5580					6179	line => $self->{line},
130	5580		100			19313	pos => $self->{pos},
			100
131							type => $type,
132							value => $tokenize ? $tokenize->($self, $value) : $value,
133							prev => $self->{last_token},
134							};
135
136							# Unset the previous token's 'prev' key to prevent keeping the entire
137							# chain of previously parsed tokens alive for the whole process.
138	5580	100				16936	undef $self->{last_token}{prev};
139
140							$self->{last_token} = $token;
141							}
142	5575					9877
143							$self->update_position;
144	5575					7177	}
145
146							return $token;
147	5774					9064	}
148
149							my $self = shift;
150	5578					14239	my $rest = substr $self->{source}, $self->{position};
151							my $stop = index $rest, "\n";
152							substr $rest, 0, $stop;
153							}
154	0			0	0	0
155	0					0	my $self = shift;
156	0					0	$self->{position} = pos($self->{source}) // 0;
157	0					0	}
158
159							my $self = shift;
160							my $token = shift;
161	5774			5774	0	6558	my $msg = shift // 'unknown';
162	5774		50			23151	my $line = $token ? $token->{line} : $self->{line};
163							die "toml: parse error at line $line: $msg\n";
164							}
165
166	5			5	0	12	my $self = shift;
167	5					16	my $toml = shift;
168	5		50			20	my @segs = $toml =~ /($SimpleKey)\.?/g;
169	5	50				17	my @keys;
170	5					56
171							for my $seg (@segs) {
172							$seg =~ s/^["']//;
173							$seg =~ s/["']$//;
174	1366			1366	0	1884	$seg = $self->unescape_str($seg);
175	1366					2009	push @keys, $seg;
176	1366					18141	}
177	1366					29152
178							return \@keys;
179	1366					2329	}
180	1630					3132
181	1630					3365	$_[1] =~ tr/_//d;
182	1630					3452	$_[1];
183	1630					3506	}
184
185							$_[1] =~ tr/_+//d;
186	1366					5598	$_[1];
187							}
188
189							my $self = shift;
190	101			101	0	269	my $toml = shift;
191	101					322	my $ml = index($toml, q{'''}) == 0
192							\|\| index($toml, q{"""}) == 0;
193							my $lit = index($toml, q{'}) == 0;
194							my $str = '';
195	422			422	0	1040
196	422					1422	if ($ml) {
197							$str = substr $toml, 3, length($toml) - 6;
198							my @newlines = $str =~ /($CRLF)/g;
199							$self->{line} += scalar @newlines;
200	461			461	0	609	$str =~ s/^$WS* $CRLF//x; # trim leading whitespace
201	461					596	$str =~ s/\\$EOL\s*//xgs; # trim newlines from lines ending in backslash
202	461		100			2034	} else {
203							$str = substr($toml, 1, length($toml) - 2);
204	461					838	}
205	461					583
206							if (!$lit) {
207	461	100				705	$str = $self->unescape_str($str);
208	35					99	}
209	35					321
210	35					74	return $str;
211	35					289	}
212	35					310
213							state $esc = {
214	426					1139	'\b' => "\x08",
215							'\t' => "\x09",
216							'\n' => "\x0A",
217	461	100				799	'\f' => "\x0C",
218	428					741	'\r' => "\x0D",
219							'\"' => "\x22",
220							'\/' => "\x2F",
221	456					1956	'\\\\' => "\x5C",
222							};
223
224							if (exists $esc->{$_[0]}) {
225	119			119	0	371	return $esc->{$_[0]};
226							}
227
228							my $hex = hex substr($_[0], 2);
229
230							if ($hex < 0x10FFFF && charnames::viacode($hex)) {
231							return chr $hex;
232							}
233
234							return;
235							}
236	119	100				307
237	100					330	state $re = qr/($Escape)/;
238							$_[1] =~ s\|$re\|unescape_chars($1) // $_[0]->error(undef, "invalid unicode escape: $1")\|xge;
239							$_[1];
240	19					59	}
241
242	19	100	100			70	1;
243	14					36713
244
245							=pod
246	5					9023
247							=encoding UTF-8
248
249							=head1 NAME
250	2058			2058	0	8931
251	2058		66			6100	TOML::Tiny::Tokenizer - tokenizer used by TOML::Tiny
	119					247
252	2053					3833
253							=head1 VERSION
254
255							version 0.15
256
257							=head1 AUTHOR
258
259							Jeff Ober <sysread@fastmail.fm>
260
261							=head1 COPYRIGHT AND LICENSE
262
263							This software is copyright (c) 2021 by Jeff Ober.
264
265							This is free software; you can redistribute it and/or modify it under
266							the same terms as the Perl 5 programming language system itself.
267
268							=cut