| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package PPI; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# See POD at end for documentation |
|
4
|
|
|
|
|
|
|
|
|
5
|
66
|
|
|
66
|
|
4950430
|
use 5.006; |
|
|
66
|
|
|
|
|
191
|
|
|
6
|
66
|
|
|
66
|
|
283
|
use strict; |
|
|
66
|
|
|
|
|
140
|
|
|
|
66
|
|
|
|
|
4368
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# Set the version for CPAN |
|
9
|
|
|
|
|
|
|
our $VERSION = '1.284'; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our ( $XS_COMPATIBLE, @XS_EXCLUDE ) = ( '0.845' ); |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# Load everything |
|
14
|
66
|
|
|
66
|
|
18573
|
use PPI::Util (); |
|
|
66
|
|
|
|
|
151
|
|
|
|
66
|
|
|
|
|
1358
|
|
|
15
|
66
|
|
|
66
|
|
24108
|
use PPI::Exception (); |
|
|
66
|
|
|
|
|
159
|
|
|
|
66
|
|
|
|
|
1260
|
|
|
16
|
66
|
|
|
66
|
|
27798
|
use PPI::Element (); |
|
|
66
|
|
|
|
|
176
|
|
|
|
66
|
|
|
|
|
1484
|
|
|
17
|
66
|
|
|
66
|
|
25497
|
use PPI::Token (); |
|
|
66
|
|
|
|
|
276
|
|
|
|
66
|
|
|
|
|
2187
|
|
|
18
|
66
|
|
|
66
|
|
27584
|
use PPI::Statement (); |
|
|
66
|
|
|
|
|
267
|
|
|
|
66
|
|
|
|
|
1826
|
|
|
19
|
66
|
|
|
66
|
|
27189
|
use PPI::Structure (); |
|
|
66
|
|
|
|
|
233
|
|
|
|
66
|
|
|
|
|
1628
|
|
|
20
|
66
|
|
|
66
|
|
22776
|
use PPI::Document (); |
|
|
66
|
|
|
|
|
223
|
|
|
|
66
|
|
|
|
|
1506
|
|
|
21
|
66
|
|
|
66
|
|
24367
|
use PPI::Document::File (); |
|
|
66
|
|
|
|
|
173
|
|
|
|
66
|
|
|
|
|
1206
|
|
|
22
|
66
|
|
|
66
|
|
4987
|
use PPI::Document::Fragment (); |
|
|
66
|
|
|
|
|
108
|
|
|
|
66
|
|
|
|
|
793
|
|
|
23
|
66
|
|
|
66
|
|
25233
|
use PPI::Document::Normalized (); |
|
|
66
|
|
|
|
|
153
|
|
|
|
66
|
|
|
|
|
1299
|
|
|
24
|
66
|
|
|
66
|
|
26478
|
use PPI::Normal (); |
|
|
66
|
|
|
|
|
193
|
|
|
|
66
|
|
|
|
|
1480
|
|
|
25
|
66
|
|
|
66
|
|
32026
|
use PPI::Tokenizer (); |
|
|
66
|
|
|
|
|
223
|
|
|
|
66
|
|
|
|
|
7061
|
|
|
26
|
66
|
|
|
66
|
|
35975
|
use PPI::Lexer (); |
|
|
66
|
|
|
|
|
190
|
|
|
|
66
|
|
|
|
|
6076
|
|
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# If it is installed, load in PPI::XS |
|
29
|
|
|
|
|
|
|
die |
|
30
|
|
|
|
|
|
|
if !$PPI::XS_DISABLE |
|
31
|
|
|
|
|
|
|
and !eval { require PPI::XS; 1 } |
|
32
|
|
|
|
|
|
|
and $@ !~ /^Can't locate .*? at /; # ignore failure to load if not installed |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
1; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
__END__ |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=pod |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 NAME |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
PPI - Parse, Analyze and Manipulate Perl (without perl) |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
use PPI; |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# Create a new empty document |
|
49
|
|
|
|
|
|
|
my $Document = PPI::Document->new; |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Create a document from source |
|
52
|
|
|
|
|
|
|
$Document = PPI::Document->new(\'print "Hello World!\n"'); |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Load a Document from a file |
|
55
|
|
|
|
|
|
|
$Document = PPI::Document->new('Module.pm'); |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# Does it contain any POD? |
|
58
|
|
|
|
|
|
|
if ( $Document->find_any('PPI::Token::Pod') ) { |
|
59
|
|
|
|
|
|
|
print "Module contains POD\n"; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# Get the name of the main package |
|
63
|
|
|
|
|
|
|
$pkg = $Document->find_first('PPI::Statement::Package')->namespace; |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Remove all that nasty documentation |
|
66
|
|
|
|
|
|
|
$Document->prune('PPI::Token::Pod'); |
|
67
|
|
|
|
|
|
|
$Document->prune('PPI::Token::Comment'); |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# Save the file |
|
70
|
|
|
|
|
|
|
$Document->save('Module.pm.stripped'); |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 About this Document |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
This is the PPI manual. It describes its reason for existing, its general |
|
77
|
|
|
|
|
|
|
structure, its use, an overview of the API, and provides a few |
|
78
|
|
|
|
|
|
|
implementation samples. |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head2 Background |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
The ability to read, and manipulate Perl (the language) programmatically |
|
83
|
|
|
|
|
|
|
other than with perl (the application) was one that caused difficulty |
|
84
|
|
|
|
|
|
|
for a long time. |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
The cause of this problem was Perl's complex and dynamic grammar. |
|
87
|
|
|
|
|
|
|
Although there is typically not a huge diversity in the grammar of most |
|
88
|
|
|
|
|
|
|
Perl code, certain issues cause large problems when it comes to parsing. |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Indeed, quite early in Perl's history Tom Christiansen introduced the Perl |
|
91
|
|
|
|
|
|
|
community to the quote I<"Nothing but perl can parse Perl">, or as it is |
|
92
|
|
|
|
|
|
|
more often stated now as a truism: |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
B<"Only perl can parse Perl"> |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
One example of the sorts of things that prevent Perl from being easily parsed |
|
97
|
|
|
|
|
|
|
is function signatures, as demonstrated by the following. |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
@result = (dothis $foo, $bar); |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# Which of the following is it equivalent to? |
|
102
|
|
|
|
|
|
|
@result = (dothis($foo), $bar); |
|
103
|
|
|
|
|
|
|
@result = dothis($foo, $bar); |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
The first line above can be interpreted in two different ways, depending |
|
106
|
|
|
|
|
|
|
on whether the C<&dothis> function is expecting one argument, or two, |
|
107
|
|
|
|
|
|
|
or several. |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
A "code parser" (something that parses for the purpose of execution) such |
|
110
|
|
|
|
|
|
|
as perl needs information that is not found in the immediate vicinity of |
|
111
|
|
|
|
|
|
|
the statement being parsed. |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
The information might not just be elsewhere in the file, it might not even be |
|
114
|
|
|
|
|
|
|
in the same file at all. It might also not be able to determine this |
|
115
|
|
|
|
|
|
|
information without the prior execution of a C<BEGIN {}> block, or the |
|
116
|
|
|
|
|
|
|
loading and execution of one or more external modules. Or worse the C<&dothis> |
|
117
|
|
|
|
|
|
|
function may not even have been written yet. |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
B<When parsing Perl as code, you must also execute it.> |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
Even perl itself never really fully understands the structure of the source |
|
122
|
|
|
|
|
|
|
code after and indeed B<as> it processes it, and in that sense doesn't |
|
123
|
|
|
|
|
|
|
"parse" Perl source into anything remotely like a structured document. |
|
124
|
|
|
|
|
|
|
This makes it of no real use for any task that needs to treat the source |
|
125
|
|
|
|
|
|
|
code as a document, and do so reliably and robustly. |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
For more information on why it is impossible to parse perl, see Randal |
|
128
|
|
|
|
|
|
|
Schwartz's seminal response to the question of "Why can't you parse Perl". |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
L<http://www.perlmonks.org/index.pl?node_id=44722> |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
The purpose of PPI is B<not> to parse Perl I<Code>, but to parse Perl |
|
133
|
|
|
|
|
|
|
I<Documents>. By treating the problem this way, we are able to parse a |
|
134
|
|
|
|
|
|
|
single file containing Perl source code "isolated" from any other |
|
135
|
|
|
|
|
|
|
resources, such as libraries upon which the code may depend, and |
|
136
|
|
|
|
|
|
|
without needing to run an instance of perl alongside or inside the parser. |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Historically, using an embedded perl parser was widely considered to be |
|
139
|
|
|
|
|
|
|
the most likely avenue for finding a solution to parsing Perl. It has been |
|
140
|
|
|
|
|
|
|
investigated from time to time, but attempts have generally failed or |
|
141
|
|
|
|
|
|
|
suffered from sufficiently bad corner cases that they were abandoned. |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head2 What Does PPI Stand For? |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
C<PPI> is an acronym for the longer original module name |
|
146
|
|
|
|
|
|
|
C<Parse::Perl::Isolated>. And in the spirit of the silly acronym games |
|
147
|
|
|
|
|
|
|
played by certain unnamed Open Source projects you may have I<hurd> of, |
|
148
|
|
|
|
|
|
|
it is also a reverse backronym of "I Parse Perl". |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Of course, I could just be lying and have just made that second bit up |
|
151
|
|
|
|
|
|
|
10 minutes before the release of PPI 1.000. Besides, B<all> the cool |
|
152
|
|
|
|
|
|
|
Perl packages have TLAs (Three Letter Acronyms). It's a rule or something. |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Why don't you just think of it as the B<Perl Parsing Interface> for simplicity. |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
The original name was shortened to prevent the author (and you the users) |
|
157
|
|
|
|
|
|
|
from contracting RSI by having to type crazy things like |
|
158
|
|
|
|
|
|
|
C<Parse::Perl::Isolated::Token::QuoteLike::Backtick> 100 times a day. |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
In acknowledgment that someone may some day come up with a valid solution |
|
161
|
|
|
|
|
|
|
for the grammar problem it was decided at the commencement of the project |
|
162
|
|
|
|
|
|
|
to leave the C<Parse::Perl> namespace free for any such effort. |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Since that time I've been able to prove to my own satisfaction that it |
|
165
|
|
|
|
|
|
|
B<is> truly impossible to accurately parse Perl as both code and document |
|
166
|
|
|
|
|
|
|
at once. For the academics, parsing Perl suffers from the "Halting Problem". |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head2 Why Parse Perl? |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
Once you can accept that we will never be able to parse Perl well enough |
|
171
|
|
|
|
|
|
|
to meet the standards of things that treat Perl as code, it is worth |
|
172
|
|
|
|
|
|
|
re-examining I<why> we want to "parse" Perl at all. |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
What are the things that people might want a "Perl parser" for? |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=over 4 |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=item Documentation |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
Analyzing the contents of a Perl document to automatically generate |
|
181
|
|
|
|
|
|
|
documentation, in parallel to, or as a replacement for, POD documentation. |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Allow an indexer to locate and process all the comments and |
|
184
|
|
|
|
|
|
|
documentation from code for "full text search" applications. |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=item Structural and Quality Analysis |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
Determine quality or other metrics across a body of code, and identify |
|
189
|
|
|
|
|
|
|
situations relating to particular phrases, techniques or locations. |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
Index functions, variables and packages within Perl code, and doing search |
|
192
|
|
|
|
|
|
|
and graph (in the node/edge sense) analysis of large code bases. |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
L<Perl::Critic>, based on PPI, is a large, thriving tool for bug detection |
|
195
|
|
|
|
|
|
|
and style analysis of Perl code. |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item Refactoring |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Make structural, syntax, or other changes to code in an automated manner, |
|
200
|
|
|
|
|
|
|
either independently or in assistance to an editor. This sort of task list |
|
201
|
|
|
|
|
|
|
includes backporting, forward porting, partial evaluation, "improving" code, |
|
202
|
|
|
|
|
|
|
or whatever. All the sort of things you'd want from a L<Perl::Editor>. |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=item Layout |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
Change the layout of code without changing its meaning. This includes |
|
207
|
|
|
|
|
|
|
techniques such as tidying (like L<perltidy>), obfuscation, compressing and |
|
208
|
|
|
|
|
|
|
"squishing", or to implement formatting preferences or policies. |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=item Presentation |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
This includes methods of improving the presentation of code, without changing |
|
213
|
|
|
|
|
|
|
the content of the code. Modify, improve, syntax colour etc the presentation |
|
214
|
|
|
|
|
|
|
of a Perl document. Generating "IntelliText"-like functions. |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=back |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
If we treat this as a baseline for the sort of things we are going to have |
|
219
|
|
|
|
|
|
|
to build on top of Perl, then it becomes possible to identify a standard |
|
220
|
|
|
|
|
|
|
for how good a Perl parser needs to be. |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=head2 How good is Good Enough(TM) |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
PPI seeks to be good enough to achieve all of the above tasks, or to provide |
|
225
|
|
|
|
|
|
|
a sufficiently good API on which to allow others to implement modules in |
|
226
|
|
|
|
|
|
|
these and related areas. |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
However, there are going to be limits to this process. Because PPI cannot |
|
229
|
|
|
|
|
|
|
adapt to changing grammars, any code written using source filters should not |
|
230
|
|
|
|
|
|
|
be assumed to be parsable. |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
At one extreme, this includes anything munged by L<Acme::Bleach>, as well |
|
233
|
|
|
|
|
|
|
as (arguably) more common cases like L<Switch>. We do not pretend to be |
|
234
|
|
|
|
|
|
|
able to always parse code using these modules, although as long as it still |
|
235
|
|
|
|
|
|
|
follows a format that looks like Perl syntax, it may be possible to extend |
|
236
|
|
|
|
|
|
|
the lexer to handle them. |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
The ability to extend PPI to handle lexical additions to the language is on |
|
239
|
|
|
|
|
|
|
the drawing board to be done some time post-1.0 |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
The goal for success was originally to be able to successfully parse 99% of |
|
242
|
|
|
|
|
|
|
all Perl documents contained in CPAN. This means the entire file in each |
|
243
|
|
|
|
|
|
|
case. |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
PPI has succeeded in this goal far beyond the expectations of even the |
|
246
|
|
|
|
|
|
|
author. At time of writing there are only 28 non-Acme Perl modules in CPAN |
|
247
|
|
|
|
|
|
|
that PPI is incapable of parsing. Most of these are so badly broken they |
|
248
|
|
|
|
|
|
|
do not compile as Perl code anyway. |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
So unless you are actively going out of your way to break PPI, you should |
|
251
|
|
|
|
|
|
|
expect that it will handle your code just fine. |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=head2 Internationalisation |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
PPI provides partial support for internationalisation and localisation. |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
Specifically, it allows the use of characters from the Latin-1 character |
|
258
|
|
|
|
|
|
|
set to be used in quotes, comments, and POD. Primarily, this covers |
|
259
|
|
|
|
|
|
|
languages from Europe and South America. |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
PPI does B<not> currently provide support for Unicode. |
|
262
|
|
|
|
|
|
|
If you need Unicode support and would like to help, |
|
263
|
|
|
|
|
|
|
contact the author. (contact details below) |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=head2 Round Trip Safe |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
When PPI parses a file it builds B<everything> into the model, including |
|
268
|
|
|
|
|
|
|
whitespace. This is needed in order to make the Document fully "Round Trip" |
|
269
|
|
|
|
|
|
|
safe. |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
The general concept behind a "Round Trip" parser is that it knows what it |
|
272
|
|
|
|
|
|
|
is parsing is somewhat uncertain, and so B<expects> to get things wrong |
|
273
|
|
|
|
|
|
|
from time to time. In the cases where it parses code wrongly the tree |
|
274
|
|
|
|
|
|
|
will serialize back out to the same string of code that was read in, |
|
275
|
|
|
|
|
|
|
repairing the parser's mistake as it heads back out to the file. |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
The end result is that if you parse in a file and serialize it back out |
|
278
|
|
|
|
|
|
|
without changing the tree, you are guaranteed to get the same file you |
|
279
|
|
|
|
|
|
|
started with. PPI does this correctly and reliably for 100% of all known |
|
280
|
|
|
|
|
|
|
cases. |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
B<What goes in, will come out. Every time.> |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
The one minor exception at this time is that if the newlines for your file |
|
285
|
|
|
|
|
|
|
are wrong (meaning not matching the platform newline format), PPI will |
|
286
|
|
|
|
|
|
|
localise them for you. (It isn't to be convenient, supporting |
|
287
|
|
|
|
|
|
|
arbitrary newlines would make some of the code more complicated) |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
Better control of the newline type is on the wish list though, and |
|
290
|
|
|
|
|
|
|
anyone wanting to help out is encouraged to contact the author. |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=head1 IMPLEMENTATION |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=head2 General Layout |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
PPI is built upon two primary "parsing" components, L<PPI::Tokenizer> |
|
297
|
|
|
|
|
|
|
and L<PPI::Lexer>, and a large tree of about 70 classes which implement |
|
298
|
|
|
|
|
|
|
the various the I<Perl Document Object Model> (PDOM). |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
The PDOM is conceptually similar in style and intent to the regular DOM or |
|
301
|
|
|
|
|
|
|
other code Abstract Syntax Trees (ASTs), but contains some differences |
|
302
|
|
|
|
|
|
|
to handle perl-specific cases, and to assist in treating the code as a |
|
303
|
|
|
|
|
|
|
document. Please note that it is B<not> an implementation of the official |
|
304
|
|
|
|
|
|
|
Document Object Model specification, only somewhat similar to it. |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
On top of the Tokenizer, Lexer and the classes of the PDOM, sit a number |
|
307
|
|
|
|
|
|
|
of classes intended to make life a little easier when dealing with PDOM |
|
308
|
|
|
|
|
|
|
trees. |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Both the major parsing components were hand-coded from scratch with only |
|
311
|
|
|
|
|
|
|
plain Perl code and a few small utility modules. There are no grammar or |
|
312
|
|
|
|
|
|
|
patterns mini-languages, no YACC or LEX style tools and only a small number |
|
313
|
|
|
|
|
|
|
of regular expressions. |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
This is primarily because of the sheer volume of accumulated cruft that |
|
316
|
|
|
|
|
|
|
exists in Perl. Not even perl itself is capable of parsing Perl documents |
|
317
|
|
|
|
|
|
|
(remember, it just parses and executes it as code). |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
As a result, PPI needed to be cruftier than perl itself. Feel free to |
|
320
|
|
|
|
|
|
|
shudder at this point, and hope you never have to understand the Tokenizer |
|
321
|
|
|
|
|
|
|
codebase. Speaking of which... |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=head2 The Tokenizer |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
The Tokenizer takes source code and converts it into a series of tokens. It |
|
326
|
|
|
|
|
|
|
does this using a slow but thorough character by character manual process, |
|
327
|
|
|
|
|
|
|
rather than using a pattern system or complex regexes. |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
Or at least it does so conceptually. If you were to actually trace the code |
|
330
|
|
|
|
|
|
|
you would find it's not truly character by character due to a number of |
|
331
|
|
|
|
|
|
|
regexps and optimisations throughout the code. This lets the Tokenizer |
|
332
|
|
|
|
|
|
|
"skip ahead" when it can find shortcuts, so it tends to jump around a line |
|
333
|
|
|
|
|
|
|
a bit wildly at times. |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
In practice, the number of times the Tokenizer will B<actually> move the |
|
336
|
|
|
|
|
|
|
character cursor itself is only about 5% - 10% higher than the number of |
|
337
|
|
|
|
|
|
|
tokens contained in the file. This makes it about as optimal as it can be |
|
338
|
|
|
|
|
|
|
made without implementing it in something other than Perl. |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
In 2001 when PPI was started, this structure made PPI quite slow, and not |
|
341
|
|
|
|
|
|
|
really suitable for interactive tasks. This situation has improved greatly |
|
342
|
|
|
|
|
|
|
with multi-gigahertz processors, but can still be painful when working with |
|
343
|
|
|
|
|
|
|
very large files. |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
The target parsing rate for PPI is about 5000 lines per gigacycle. It is |
|
346
|
|
|
|
|
|
|
currently believed to be at about 1500, and the main avenue for making it to |
|
347
|
|
|
|
|
|
|
the target speed has now become L<PPI::XS>, a drop-in XS accelerator for |
|
348
|
|
|
|
|
|
|
PPI. |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
Since L<PPI::XS> has only just gotten off the ground and is currently only |
|
351
|
|
|
|
|
|
|
at proof-of-concept stage, this may take a little while. Anyone interested |
|
352
|
|
|
|
|
|
|
in helping out with L<PPI::XS> is B<highly> encouraged to contact the |
|
353
|
|
|
|
|
|
|
author. In fact, the design of L<PPI::XS> means it's possible to port |
|
354
|
|
|
|
|
|
|
one function at a time safely and reliably. So every little bit will help. |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=head2 The Lexer |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
The Lexer takes a token stream, and converts it to a lexical tree. Because |
|
359
|
|
|
|
|
|
|
we are parsing Perl B<documents> this includes whitespace, comments, and |
|
360
|
|
|
|
|
|
|
all number of weird things that have no relevance when code is actually |
|
361
|
|
|
|
|
|
|
executed. |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
An instantiated L<PPI::Lexer> consumes L<PPI::Tokenizer> objects and |
|
364
|
|
|
|
|
|
|
produces L<PPI::Document> objects. However you should probably never be |
|
365
|
|
|
|
|
|
|
working with the Lexer directly. You should just be able to create |
|
366
|
|
|
|
|
|
|
L<PPI::Document> objects and work with them directly. |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=head2 The Perl Document Object Model |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
The PDOM is a structured collection of data classes that together provide |
|
371
|
|
|
|
|
|
|
a correct and scalable model for documents that follow the standard Perl |
|
372
|
|
|
|
|
|
|
syntax. |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=head2 The PDOM Class Tree |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
The following lists all of the 72 current PDOM classes, listing with indentation |
|
377
|
|
|
|
|
|
|
based on inheritance. |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
PPI::Element |
|
380
|
|
|
|
|
|
|
PPI::Node |
|
381
|
|
|
|
|
|
|
PPI::Document |
|
382
|
|
|
|
|
|
|
PPI::Document::Fragment |
|
383
|
|
|
|
|
|
|
PPI::Statement |
|
384
|
|
|
|
|
|
|
PPI::Statement::Package |
|
385
|
|
|
|
|
|
|
PPI::Statement::Include |
|
386
|
|
|
|
|
|
|
PPI::Statement::Sub |
|
387
|
|
|
|
|
|
|
PPI::Statement::Scheduled |
|
388
|
|
|
|
|
|
|
PPI::Statement::Compound |
|
389
|
|
|
|
|
|
|
PPI::Statement::Break |
|
390
|
|
|
|
|
|
|
PPI::Statement::Given |
|
391
|
|
|
|
|
|
|
PPI::Statement::When |
|
392
|
|
|
|
|
|
|
PPI::Statement::Data |
|
393
|
|
|
|
|
|
|
PPI::Statement::End |
|
394
|
|
|
|
|
|
|
PPI::Statement::Expression |
|
395
|
|
|
|
|
|
|
PPI::Statement::Variable |
|
396
|
|
|
|
|
|
|
PPI::Statement::Null |
|
397
|
|
|
|
|
|
|
PPI::Statement::UnmatchedBrace |
|
398
|
|
|
|
|
|
|
PPI::Statement::Unknown |
|
399
|
|
|
|
|
|
|
PPI::Structure |
|
400
|
|
|
|
|
|
|
PPI::Structure::Block |
|
401
|
|
|
|
|
|
|
PPI::Structure::Subscript |
|
402
|
|
|
|
|
|
|
PPI::Structure::Constructor |
|
403
|
|
|
|
|
|
|
PPI::Structure::Condition |
|
404
|
|
|
|
|
|
|
PPI::Structure::List |
|
405
|
|
|
|
|
|
|
PPI::Structure::For |
|
406
|
|
|
|
|
|
|
PPI::Structure::Given |
|
407
|
|
|
|
|
|
|
PPI::Structure::When |
|
408
|
|
|
|
|
|
|
PPI::Structure::Unknown |
|
409
|
|
|
|
|
|
|
PPI::Token |
|
410
|
|
|
|
|
|
|
PPI::Token::Whitespace |
|
411
|
|
|
|
|
|
|
PPI::Token::Comment |
|
412
|
|
|
|
|
|
|
PPI::Token::Pod |
|
413
|
|
|
|
|
|
|
PPI::Token::Number |
|
414
|
|
|
|
|
|
|
PPI::Token::Number::Binary |
|
415
|
|
|
|
|
|
|
PPI::Token::Number::Octal |
|
416
|
|
|
|
|
|
|
PPI::Token::Number::Hex |
|
417
|
|
|
|
|
|
|
PPI::Token::Number::Float |
|
418
|
|
|
|
|
|
|
PPI::Token::Number::Exp |
|
419
|
|
|
|
|
|
|
PPI::Token::Number::Version |
|
420
|
|
|
|
|
|
|
PPI::Token::Word |
|
421
|
|
|
|
|
|
|
PPI::Token::DashedWord |
|
422
|
|
|
|
|
|
|
PPI::Token::Symbol |
|
423
|
|
|
|
|
|
|
PPI::Token::Magic |
|
424
|
|
|
|
|
|
|
PPI::Token::ArrayIndex |
|
425
|
|
|
|
|
|
|
PPI::Token::Operator |
|
426
|
|
|
|
|
|
|
PPI::Token::Quote |
|
427
|
|
|
|
|
|
|
PPI::Token::Quote::Single |
|
428
|
|
|
|
|
|
|
PPI::Token::Quote::Double |
|
429
|
|
|
|
|
|
|
PPI::Token::Quote::Literal |
|
430
|
|
|
|
|
|
|
PPI::Token::Quote::Interpolate |
|
431
|
|
|
|
|
|
|
PPI::Token::QuoteLike |
|
432
|
|
|
|
|
|
|
PPI::Token::QuoteLike::Backtick |
|
433
|
|
|
|
|
|
|
PPI::Token::QuoteLike::Command |
|
434
|
|
|
|
|
|
|
PPI::Token::QuoteLike::Regexp |
|
435
|
|
|
|
|
|
|
PPI::Token::QuoteLike::Words |
|
436
|
|
|
|
|
|
|
PPI::Token::QuoteLike::Readline |
|
437
|
|
|
|
|
|
|
PPI::Token::Regexp |
|
438
|
|
|
|
|
|
|
PPI::Token::Regexp::Match |
|
439
|
|
|
|
|
|
|
PPI::Token::Regexp::Substitute |
|
440
|
|
|
|
|
|
|
PPI::Token::Regexp::Transliterate |
|
441
|
|
|
|
|
|
|
PPI::Token::HereDoc |
|
442
|
|
|
|
|
|
|
PPI::Token::Cast |
|
443
|
|
|
|
|
|
|
PPI::Token::Structure |
|
444
|
|
|
|
|
|
|
PPI::Token::Label |
|
445
|
|
|
|
|
|
|
PPI::Token::Separator |
|
446
|
|
|
|
|
|
|
PPI::Token::Data |
|
447
|
|
|
|
|
|
|
PPI::Token::End |
|
448
|
|
|
|
|
|
|
PPI::Token::Prototype |
|
449
|
|
|
|
|
|
|
PPI::Token::Attribute |
|
450
|
|
|
|
|
|
|
PPI::Token::Unknown |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
To summarize the above layout, all PDOM objects inherit from the |
|
453
|
|
|
|
|
|
|
L<PPI::Element> class. |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
Under this are L<PPI::Token>, strings of content with a known type, |
|
456
|
|
|
|
|
|
|
and L<PPI::Node>, syntactically significant containers that hold other |
|
457
|
|
|
|
|
|
|
Elements. |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
The three most important of these are the L<PPI::Document>, the |
|
460
|
|
|
|
|
|
|
L<PPI::Statement> and the L<PPI::Structure> classes. |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
=head2 The Document, Statement and Structure |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
At the top of all complete PDOM trees is a L<PPI::Document> object. It |
|
465
|
|
|
|
|
|
|
represents a complete file of Perl source code as you might find it on |
|
466
|
|
|
|
|
|
|
disk. |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
There are some specialised types of document, such as L<PPI::Document::File> |
|
469
|
|
|
|
|
|
|
and L<PPI::Document::Normalized> but for the purposes of the PDOM they are |
|
470
|
|
|
|
|
|
|
all just considered to be the same thing. |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
Each Document will contain a number of B<Statements>, B<Structures> and |
|
473
|
|
|
|
|
|
|
B<Tokens>. |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
A L<PPI::Statement> is any series of Tokens and Structures that are treated |
|
476
|
|
|
|
|
|
|
as a single contiguous statement by perl itself. You should note that a |
|
477
|
|
|
|
|
|
|
Statement is as close as PPI can get to "parsing" the code in the sense that |
|
478
|
|
|
|
|
|
|
perl-itself parses Perl code when it is building the op-tree. |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
Because of the isolation and Perl's syntax, it is provably impossible for |
|
481
|
|
|
|
|
|
|
PPI to accurately determine precedence of operators or which tokens are |
|
482
|
|
|
|
|
|
|
implicit arguments to a sub call. |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
So rather than lead you on with a bad guess that has a strong chance of |
|
485
|
|
|
|
|
|
|
being wrong, PPI does not attempt to determine precedence or sub parameters |
|
486
|
|
|
|
|
|
|
at all. |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
At a fundamental level, it only knows that this series of elements |
|
489
|
|
|
|
|
|
|
represents a single Statement as perl sees it, but it can do so with |
|
490
|
|
|
|
|
|
|
enough certainty that it can be trusted. |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
However, for specific Statement types the PDOM is able to derive additional |
|
493
|
|
|
|
|
|
|
useful information about their meaning. For the best, most useful, and most |
|
494
|
|
|
|
|
|
|
heavily used example, see L<PPI::Statement::Include>. |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
A L<PPI::Structure> is any series of tokens contained within matching braces. |
|
497
|
|
|
|
|
|
|
This includes code blocks, conditions, function argument braces, anonymous |
|
498
|
|
|
|
|
|
|
array and hash constructors, lists, scoping braces and all other syntactic |
|
499
|
|
|
|
|
|
|
structures represented by a matching pair of braces, including (although it |
|
500
|
|
|
|
|
|
|
may not seem obvious at first) C<E<lt>READLINEE<gt>> braces. |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Each Structure contains none, one, or many Tokens and Structures (the rules |
|
503
|
|
|
|
|
|
|
for which vary for the different Structure subclasses) |
|
504
|
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
Under the PDOM structure rules, a Statement can B<never> directly contain |
|
506
|
|
|
|
|
|
|
another child Statement, a Structure can B<never> directly contain another |
|
507
|
|
|
|
|
|
|
child Structure, and a Document can B<never> contain another Document |
|
508
|
|
|
|
|
|
|
anywhere in the tree. |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
Aside from these three rules, the PDOM tree is extremely flexible. |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=head2 The PDOM at Work |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
To demonstrate the PDOM in use lets start with an example showing how the |
|
515
|
|
|
|
|
|
|
tree might look for the following chunk of simple Perl code. |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
#!/usr/bin/perl |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
print( "Hello World!" ); |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
exit(); |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
Translated into a PDOM tree it would have the following structure (as shown |
|
524
|
|
|
|
|
|
|
via the included L<PPI::Dumper>). |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
PPI::Document |
|
527
|
|
|
|
|
|
|
PPI::Token::Comment '#!/usr/bin/perl\n' |
|
528
|
|
|
|
|
|
|
PPI::Token::Whitespace '\n' |
|
529
|
|
|
|
|
|
|
PPI::Statement |
|
530
|
|
|
|
|
|
|
PPI::Token::Word 'print' |
|
531
|
|
|
|
|
|
|
PPI::Structure::List ( ... ) |
|
532
|
|
|
|
|
|
|
PPI::Token::Whitespace ' ' |
|
533
|
|
|
|
|
|
|
PPI::Statement::Expression |
|
534
|
|
|
|
|
|
|
PPI::Token::Quote::Double '"Hello World!"' |
|
535
|
|
|
|
|
|
|
PPI::Token::Whitespace ' ' |
|
536
|
|
|
|
|
|
|
PPI::Token::Structure ';' |
|
537
|
|
|
|
|
|
|
PPI::Token::Whitespace '\n' |
|
538
|
|
|
|
|
|
|
PPI::Token::Whitespace '\n' |
|
539
|
|
|
|
|
|
|
PPI::Statement |
|
540
|
|
|
|
|
|
|
PPI::Token::Word 'exit' |
|
541
|
|
|
|
|
|
|
PPI::Structure::List ( ... ) |
|
542
|
|
|
|
|
|
|
PPI::Token::Structure ';' |
|
543
|
|
|
|
|
|
|
PPI::Token::Whitespace '\n' |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
Please note that in this example, strings are only listed for the |
|
546
|
|
|
|
|
|
|
B<actual> L<PPI::Token> that contains that string. Structures are listed |
|
547
|
|
|
|
|
|
|
with the type of brace characters they represent noted. |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
The L<PPI::Dumper> module can be used to generate similar trees yourself. |
|
550
|
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
We can make that PDOM dump a little easier to read if we strip out all the |
|
552
|
|
|
|
|
|
|
whitespace. Here it is again, sans the distracting whitespace tokens. |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
PPI::Document |
|
555
|
|
|
|
|
|
|
PPI::Token::Comment '#!/usr/bin/perl\n' |
|
556
|
|
|
|
|
|
|
PPI::Statement |
|
557
|
|
|
|
|
|
|
PPI::Token::Word 'print' |
|
558
|
|
|
|
|
|
|
PPI::Structure::List ( ... ) |
|
559
|
|
|
|
|
|
|
PPI::Statement::Expression |
|
560
|
|
|
|
|
|
|
PPI::Token::Quote::Double '"Hello World!"' |
|
561
|
|
|
|
|
|
|
PPI::Token::Structure ';' |
|
562
|
|
|
|
|
|
|
PPI::Statement |
|
563
|
|
|
|
|
|
|
PPI::Token::Word 'exit' |
|
564
|
|
|
|
|
|
|
PPI::Structure::List ( ... ) |
|
565
|
|
|
|
|
|
|
PPI::Token::Structure ';' |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
As you can see, the tree can get fairly deep at time, especially when every |
|
568
|
|
|
|
|
|
|
isolated token in a bracket becomes its own statement. This is needed to |
|
569
|
|
|
|
|
|
|
allow anything inside the tree the ability to grow. It also makes the |
|
570
|
|
|
|
|
|
|
search and analysis algorithms much more flexible. |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
Because of the depth and complexity of PDOM trees, a vast number of very easy |
|
573
|
|
|
|
|
|
|
to use methods have been added wherever possible to help people working with |
|
574
|
|
|
|
|
|
|
PDOM trees do normal tasks relatively quickly and efficiently. |
|
575
|
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
=head2 Overview of the Primary Classes |
|
577
|
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
The main PPI classes, and links to their own documentation, are listed |
|
579
|
|
|
|
|
|
|
here in alphabetical order. |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=over 4 |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=item L<PPI::Document> |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
The Document object, the root of the PDOM. |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=item L<PPI::Document::Fragment> |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
A cohesive fragment of a larger Document. Although not of any real current |
|
590
|
|
|
|
|
|
|
use, it is needed for use in certain internal tree manipulation |
|
591
|
|
|
|
|
|
|
algorithms. |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
For example, doing things like cut/copy/paste etc. Very similar to a |
|
594
|
|
|
|
|
|
|
L<PPI::Document>, but has some additional methods and does not represent |
|
595
|
|
|
|
|
|
|
a lexical scope boundary. |
|
596
|
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
A document fragment is also non-serializable, and so cannot be written out |
|
598
|
|
|
|
|
|
|
to a file. |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=item L<PPI::Dumper> |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
A simple class for dumping readable debugging versions of PDOM structures, |
|
603
|
|
|
|
|
|
|
such as in the demonstration above. |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
=item L<PPI::Element> |
|
606
|
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
The Element class is the abstract base class for all objects within the PDOM |
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
=item L<PPI::Find> |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
Implements an instantiable object form of a PDOM tree search. |
|
612
|
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
=item L<PPI::Lexer> |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
The PPI Lexer. Converts Token streams into PDOM trees. |
|
616
|
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
=item L<PPI::Node> |
|
618
|
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
The Node object, the abstract base class for all PDOM objects that can |
|
620
|
|
|
|
|
|
|
contain other Elements, such as the Document, Statement and Structure |
|
621
|
|
|
|
|
|
|
objects. |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=item L<PPI::Statement> |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
The base class for all Perl statements. Generic "evaluate for side-effects" |
|
626
|
|
|
|
|
|
|
statements are of this actual type. Other more interesting statement types |
|
627
|
|
|
|
|
|
|
belong to one of its children. |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
See its own documentation for a longer description and list of all of the |
|
630
|
|
|
|
|
|
|
different statement types and sub-classes. |
|
631
|
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
=item L<PPI::Structure> |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
The abstract base class for all structures. A Structure is a language |
|
635
|
|
|
|
|
|
|
construct consisting of matching braces containing a set of other elements. |
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
See the L<PPI::Structure> documentation for a description and |
|
638
|
|
|
|
|
|
|
list of all of the different structure types and sub-classes. |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
=item L<PPI::Token> |
|
641
|
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
A token is the basic unit of content. At its most basic, a Token is just |
|
643
|
|
|
|
|
|
|
a string tagged with metadata (its class, and some additional flags in |
|
644
|
|
|
|
|
|
|
some cases). |
|
645
|
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
=item L<PPI::Token::_QuoteEngine> |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
The L<PPI::Token::Quote> and L<PPI::Token::QuoteLike> classes provide |
|
649
|
|
|
|
|
|
|
abstract base classes for the many and varied types of quote and |
|
650
|
|
|
|
|
|
|
quote-like things in Perl. However, much of the actual quote logic is |
|
651
|
|
|
|
|
|
|
implemented in a separate quote engine, based at |
|
652
|
|
|
|
|
|
|
L<PPI::Token::_QuoteEngine>. |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
Classes that inherit from L<PPI::Token::Quote>, L<PPI::Token::QuoteLike> |
|
655
|
|
|
|
|
|
|
and L<PPI::Token::Regexp> are generally parsed only by the Quote Engine. |
|
656
|
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
=item L<PPI::Tokenizer> |
|
658
|
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
The PPI Tokenizer. One Tokenizer consumes a chunk of text and provides |
|
660
|
|
|
|
|
|
|
access to a stream of L<PPI::Token> objects. |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
The Tokenizer is very very complicated, to the point where even the author |
|
663
|
|
|
|
|
|
|
treads carefully when working with it. |
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
Most of the complication is the result of optimizations which have tripled |
|
666
|
|
|
|
|
|
|
the tokenization speed, at the expense of maintainability. We cope with the |
|
667
|
|
|
|
|
|
|
spaghetti by heavily commenting everything. |
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=item L<PPI::Transform> |
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
The Perl Document Transformation API. Provides a standard interface and |
|
672
|
|
|
|
|
|
|
abstract base class for objects and classes that manipulate Documents. |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=back |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
=head1 INSTALLING |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
The core PPI distribution is pure Perl and has been kept as tight as |
|
679
|
|
|
|
|
|
|
possible and with as few dependencies as possible. |
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
It should download and install normally on any platform from within |
|
682
|
|
|
|
|
|
|
the CPAN and CPANPLUS applications, or directly using the distribution |
|
683
|
|
|
|
|
|
|
tarball. If installing by hand, you may need to install a few small |
|
684
|
|
|
|
|
|
|
utility modules first. The exact ones will depend on your version of |
|
685
|
|
|
|
|
|
|
perl. |
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
There are no special install instructions for PPI, and the normal |
|
688
|
|
|
|
|
|
|
C<Perl Makefile.PL>, C<make>, C<make test>, C<make install> instructions |
|
689
|
|
|
|
|
|
|
apply. |
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
=head1 EXTENDING |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
The PPI namespace itself is reserved for use by PPI itself. |
|
694
|
|
|
|
|
|
|
You are recommended to use the PPIx:: namespace for PPI-specific |
|
695
|
|
|
|
|
|
|
modifications or prototypes thereof, or Perl:: for modules which provide |
|
696
|
|
|
|
|
|
|
a general Perl language-related functions. |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
If what you wish to implement looks like it fits into the PPIx:: namespace, |
|
699
|
|
|
|
|
|
|
you should consider contacting the PPI maintainers on GitHub first, as what |
|
700
|
|
|
|
|
|
|
you want may already be in progress, or you may wish to consider contributing |
|
701
|
|
|
|
|
|
|
to PPI itself. |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
=head1 TO DO |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
- Many more analysis and utility methods for PDOM classes |
|
706
|
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
- Creation of a PPI::Tutorial document |
|
708
|
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
- Add many more key functions to PPI::XS |
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
- We can B<always> write more and better unit tests |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
- Complete the full implementation of -E<gt>literal (1.200) |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
- Full understanding of scoping (due 1.300) |
|
716
|
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
=head1 SUPPORT |
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
The most recent version of PPI is available at the following address. |
|
720
|
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
L<https://metacpan.org/pod/PPI> |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
PPI source is maintained in a GitHub repository at the following address. |
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
L<https://github.com/Perl-Critic/PPI> |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
Contributions via GitHub pull request are welcome. |
|
728
|
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
Bug fixes in the form of pull requests or bug reports with |
|
730
|
|
|
|
|
|
|
new (failing) unit tests have the best chance of being addressed |
|
731
|
|
|
|
|
|
|
by busy maintainers, and are B<strongly> encouraged. |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
If you cannot provide a test or fix, or don't have time to do so, |
|
734
|
|
|
|
|
|
|
then regular bug reports are still accepted and appreciated via the |
|
735
|
|
|
|
|
|
|
GitHub bug tracker. |
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
L<https://github.com/Perl-Critic/PPI/issues> |
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
The C<ppidump> utility that is part of the L<Perl::Critic> distribution |
|
740
|
|
|
|
|
|
|
is a useful tool for demonstrating how PPI is parsing (or misparsing) |
|
741
|
|
|
|
|
|
|
small code snippets, and for providing information for bug reports. |
|
742
|
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
For other issues, questions, or commercial or media-related enquiries, |
|
744
|
|
|
|
|
|
|
contact the author. |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
=head1 AUTHOR |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
Adam Kennedy E<lt>adamk@cpan.orgE<gt> |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
=head1 ACKNOWLEDGMENTS |
|
751
|
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
A huge thank you to Phase N Australia |
|
753
|
|
|
|
|
|
|
(L<https://web.archive.org/web/20240304122957/http://www.phase-n.com/>) for |
|
754
|
|
|
|
|
|
|
permitting the original open sourcing and release of this distribution from |
|
755
|
|
|
|
|
|
|
what was originally several thousand hours of commercial work. |
|
756
|
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
Another big thank you to The Perl Foundation |
|
758
|
|
|
|
|
|
|
(L<http://www.perlfoundation.org/>) for funding for the final big |
|
759
|
|
|
|
|
|
|
refactoring and completion run. |
|
760
|
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
Also, to the various co-maintainers that have contributed both large and |
|
762
|
|
|
|
|
|
|
small with tests and patches and especially to those rare few who have |
|
763
|
|
|
|
|
|
|
deep-dived into the guts to (gasp) add a feature. |
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
- Dan Brook : PPIx::XPath, Acme::PerlML |
|
766
|
|
|
|
|
|
|
- Audrey Tang : "Line Noise" Testing |
|
767
|
|
|
|
|
|
|
- Arjen Laarhoven : Three-element ->location support |
|
768
|
|
|
|
|
|
|
- Elliot Shank : Perl 5.10 support, five-element ->location |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
And finally, thanks to those brave ( and foolish :) ) souls willing to dive |
|
771
|
|
|
|
|
|
|
in and use, test drive and provide feedback on PPI before version 1.000, |
|
772
|
|
|
|
|
|
|
in some cases before it made it to beta quality, and still did extremely |
|
773
|
|
|
|
|
|
|
distasteful things (like eating 50 meg of RAM a second). |
|
774
|
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
I owe you all a beer. Corner me somewhere and collect at your convenience. |
|
776
|
|
|
|
|
|
|
If I missed someone who wasn't in my email history, thank you too :) |
|
777
|
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
# In approximate order of appearance |
|
779
|
|
|
|
|
|
|
- Claes Jacobsson |
|
780
|
|
|
|
|
|
|
- Michael Schwern |
|
781
|
|
|
|
|
|
|
- Jeff T. Parsons |
|
782
|
|
|
|
|
|
|
- CPAN Author "CHOCOLATEBOY" |
|
783
|
|
|
|
|
|
|
- Robert Rotherberg |
|
784
|
|
|
|
|
|
|
- CPAN Author "PODMASTER" |
|
785
|
|
|
|
|
|
|
- Richard Soderberg |
|
786
|
|
|
|
|
|
|
- Nadim ibn Hamouda el Khemir |
|
787
|
|
|
|
|
|
|
- Graciliano M. P. |
|
788
|
|
|
|
|
|
|
- Leon Brocard |
|
789
|
|
|
|
|
|
|
- Jody Belka |
|
790
|
|
|
|
|
|
|
- Curtis Ovid |
|
791
|
|
|
|
|
|
|
- Yuval Kogman |
|
792
|
|
|
|
|
|
|
- Michael Schilli |
|
793
|
|
|
|
|
|
|
- Slaven Rezic |
|
794
|
|
|
|
|
|
|
- Lars Thegler |
|
795
|
|
|
|
|
|
|
- Tony Stubblebine |
|
796
|
|
|
|
|
|
|
- Tatsuhiko Miyagawa |
|
797
|
|
|
|
|
|
|
- CPAN Author "CHROMATIC" |
|
798
|
|
|
|
|
|
|
- Matisse Enzer |
|
799
|
|
|
|
|
|
|
- Roy Fulbright |
|
800
|
|
|
|
|
|
|
- Dan Brook |
|
801
|
|
|
|
|
|
|
- Johnny Lee |
|
802
|
|
|
|
|
|
|
- Johan Lindstrom |
|
803
|
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
And to single one person out, thanks go to Randal Schwartz who |
|
805
|
|
|
|
|
|
|
spent a great number of hours in IRC over a critical 6 month period |
|
806
|
|
|
|
|
|
|
explaining why Perl is impossibly unparsable and constantly shoving evil |
|
807
|
|
|
|
|
|
|
and ugly corner cases in my face. He remained a tireless devil's advocate, |
|
808
|
|
|
|
|
|
|
and without his support this project genuinely could never have been |
|
809
|
|
|
|
|
|
|
completed. |
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
So for my schooling in the Deep Magiks, you have my deepest gratitude Randal. |
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
814
|
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
Copyright 2001 - 2011 Adam Kennedy. |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
This program is free software; you can redistribute |
|
818
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
The full text of the license can be found in the |
|
821
|
|
|
|
|
|
|
LICENSE file included with this module. |
|
822
|
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
=cut |