File Coverage

blib/lib/SQL/SplitStatement/Tokenizer.pm

Criterion	Covered	Total	%
statement	13	14	92.8
branch	2	4	50.0
condition			n/a
subroutine	4	4	100.0
pod	1	1	100.0
total	20	23	86.9

line	stmt	bran	sub	pod	time	code
1	33		33		293	use strict;
	33				66
	33				887
2	33		33		146	use warnings;
	33				55
	33				1043
3						package SQL::SplitStatement::Tokenizer;
4
5
6	33		33		140	use Exporter;
	33				63
	33				16084
7
8						our @ISA = qw(Exporter);
9
10						our @EXPORT_OK= qw(tokenize_sql);
11
12						our $VERSION = '1.00023';
13
14						my $re= qr{
15						(
16						(?:--\|\#)[\ \t\S]* # single line comments
17						\|
18						(?:<>\|<=>\|>=\|<=\|==\|=\|!=\|!\|<<\|>>\|<\|>\|\\|\\|\|\\|\|&&\|&\|-\|\+\|\(?!/)\|/(?!\)\|\%\|~\|\^\|\?)
19						# operators and tests
20						\|
21						[\[\]\{\},;.] # punctuation (parenthesis, comma)
22						\|
23						\'\'(?!\') # empty single quoted string
24						\|
25						\"\"(?!\"") # empty double quoted string
26						\|
27						"(?>(?:(?>[^"\\]+)\|""\|\\.)*)+"
28						# anything inside double quotes, ungreedy
29						\|
30						`(?>(?:(?>[^`\\]+)\|``\|\\.)*)+`
31						# anything inside backticks quotes, ungreedy
32						\|
33						'(?>(?:(?>[^'\\]+)\|''\|\\.)*)+'
34						# anything inside single quotes, ungreedy.
35						\|
36						/\[\ \t\r\n\S]?\*/ # C style comments
37						\|
38						(?:[\w:@]+(?:\.(?:\w+\|\)?))
39						# words, standard named placeholders, db.table., db.
40						\|
41						(?: \$_\$ \| \$\d+ \| \${1,2} )
42						# dollar expressions - eg $_$ $3 $$
43						\|
44						\n # newline
45						\|
46						[\t\ ]+ # any kind of white spaces
47						)
48						}smx;
49
50						sub tokenize_sql {
51	79		79	1	440	my ( $query, $remove_white_tokens )= @_;
52
53	79				47132	my @query= $query =~ m{$re}smxg;
54
55	79	50			530	if ($remove_white_tokens) {
56	0				0	@query= grep( !/^[\s\n\r]*$/, @query );
57						}
58
59	79	50			21619	return wantarray ? @query : \@query;
60						}
61
62						1;
63
64						=pod
65
66						=head1 NAME
67
68						SQL::SplitStatement::Tokenizer - A simple SQL tokenizer.
69
70						=head1 SYNOPSIS
71
72						use SQL::SplitStatement::Tokenizer qw(tokenize_sql);
73
74						my $query= q{SELECT 1 + 1};
75						my @tokens= tokenize_sql($query);
76
77						# @tokens now contains ('SELECT', ' ', '1', ' ', '+', ' ', '1')
78
79						=head1 DESCRIPTION
80
81						SQL::SplitStatement::Tokenizer is a simple tokenizer for SQL queries. It does
82						not claim to be a parser or query verifier. It just creates sane tokens from a
83						valid SQL query.
84
85						It supports SQL with comments like:
86
87						-- This query is used to insert a message into
88						-- logs table
89						INSERT INTO log (application, message) VALUES (?, ?)
90
91						Also supports C<''>, C<""> and C<\'> escaping methods, so tokenizing queries
92						like the one below should not be a problem:
93
94						INSERT INTO log (application, message)
95						VALUES ('myapp', 'Hey, this is a ''single quoted string''!')
96
97						=head1 API
98
99						=over 4
100
101						=item tokenize_sql
102
103						use SQL::SplitStatement::Tokenizer qw(tokenize_sql);
104
105						my @tokens = tokenize_sql($query);
106						my $tokens = tokenize_sql($query);
107
108						$tokens = tokenize_sql( $query, $remove_white_tokens );
109
110						C can be imported to current namespace on request. It receives a
111						SQL query, and returns an array of tokens if called in list context, or an
112						arrayref if called in scalar context.
113
114
115						If C<$remove_white_tokens> is true, white spaces only tokens will be removed from
116						result.
117
118						=back
119
120						=head1 ACKNOWLEDGEMENTS
121
122						=over 4
123
124						=item
125
126						Igor Sutton Lopes for writing SQL::Tokenizer, which this was forked from.
127
128						=item
129
130						Evan Harris, for implementing Shell comment style and SQL operators.
131
132						=item
133
134						Charlie Hills, for spotting a lot of important issues I haven't thought.
135
136						=item
137
138						Jonas Kramer, for fixing MySQL quoted strings and treating dot as punctuation character correctly.
139
140						=item
141
142						Emanuele Zeppieri, for asking to fix SQL::Tokenizer to support dollars as well.
143
144						=item
145
146						Nigel Metheringham, for extending the dollar signal support.
147
148						=item
149
150						Devin Withers, for making it not choke on CR+LF in comments.
151
152						=item
153
154						Luc Lanthier, for simplifying the regex and make it not choke on backslashes.
155
156						=back
157
158						=head1 AUTHOR
159
160						Copyright (c) 2007, 2008, 2009, 2010, 2011 Igor Sutton Lopes "". All rights
161						reserved.
162
163						Copyright (c) 2021 Veesh Goldman ""
164
165						This module is free software; you can redistribute it and/or modify it under
166						the same terms as Perl itself.
167
168						=cut
169