File Coverage

blib/lib/Perl/Strip.pm

Criterion	Covered	Total	%
statement	9	90	10.0
branch	0	48	0.0
condition	0	48	0.0
subroutine	3	6	50.0
pod	2	2	100.0
total	14	194	7.2

line	stmt	bran	cond	sub	pod	time	code
1							=head1 NAME
2
3							Perl::Strip - reduce file size by stripping whitespace, comments, pod etc.
4
5							=head1 SYNOPSIS
6
7							use Perl::Strip;
8
9							=head1 DESCRIPTION
10
11							This module transforms perl sources into a more compact format. It does
12							this by removing most whitespace, comments, pod, and by some other means.
13
14							The resulting code looks obfuscated, but perl (and the deparser) don't
15							have any problems with that. Depending on the source file you can expect
16							about 30-60% "compression".
17
18							The main target for this module is low-diskspace environments, such as
19							L, boot floppy/CDs/flash environments and so on.
20
21							See also the commandline utility L.
22
23							=head1 METHODS
24
25							The C class is a subclsass of L, and as such
26							inherits all of it's methods, even the ones not documented here.
27
28							=over 4
29
30							=cut
31
32							package Perl::Strip;
33
34							our $VERSION = '1.1';
35							our $CACHE_VERSION = 2;
36
37	1			1		2557	use common::sense;
	1					11
	1					6
38
39	1			1		1534	use PPI;
	1					400023
	1					49
40
41	1			1		12	use base PPI::Transform::;
	1					8
	1					969
42
43							=item my $transform = new Perl::Strip key => value...
44
45							Creates a new Perl::Strip transform object. It supports the following
46							parameters:
47
48							=over 4
49
50							=item optimise_size => $bool
51
52							By default, this module optimises I, not raw size. This
53							switch changes that (and makes it slower).
54
55							=item keep_nl => $bool
56
57							By default, whitespace will either be stripped or replaced by a space. If
58							this option is enabled, then newlines will not be removed. This has the
59							advantage of keeping line number information intact (e.g. for backtraces),
60							but of course doesn't compress as well.
61
62							=item cache => $path
63
64							Since this module can take a very long time (minutes for the larger files
65							in the perl distribution), it can utilise a cache directory. The directory
66							will be created if it doesn't exist, and can be deleted at any time.
67
68							=back
69
70							=cut
71
72							# PPI::Transform compatible
73							sub document {
74	0			0	1		my ($self, $doc) = @_;
75
76	0						$self->{optimise_size} = 1; # more research is needed
77
78							# special stripping for unicore/ files
79	0	0					if (eval { $doc->child (1)->content =~ /^# .* (build by mktables\|machine-generated .*mktables) / }) {
	0
80
81	0						for my $heredoc (@{ $doc->find (PPI::Token::HereDoc::) }) {
	0
82	0						my $src = join "", $heredoc->heredoc;
83
84							# special stripping for unicore swashes and properties
85							# much more could be done by going binary
86	0						for ($src) {
87	0	0					s/^(?:0([0-9a-fA-F]+))?\t(?:0([0-9a-fA-F]+))?\t(?:0*([0-9a-fA-F]+))?/$1\t$2\t$3/gm
88							if $self->{optimise_size};
89
90							# s{
91							# ^([0-9a-fA-F]+)\t([0-9a-fA-F]*)\t
92							# }{
93							# # ww - smaller filesize, UU - compress better
94							# pack "C0UU",
95							# hex $1,
96							# length $2 ? (hex $2) - (hex $1) : 0
97							# }gemx;
98
99	0						s/#.*\n/\n/mg;
100	0						s/\s+\n/\n/mg;
101							}
102
103							# PPI seems to be mostly undocumented
104	0						$heredoc->{_heredoc} = [split /$/, $src];
105							}
106							}
107
108	0						$doc->prune (PPI::Token::Comment::);
109	0						$doc->prune (PPI::Token::Pod::);
110
111							# prune END stuff
112	0						for (my $last = $doc->last_element; $last; ) {
113	0						my $prev = $last->previous_token;
114
115	0	0					if ($last->isa (PPI::Token::Whitespace::)) {
		0
		0
116	0						$last->delete;
117							} elsif ($last->isa (PPI::Statement::End::)) {
118	0						$last->delete;
119	0						last;
120							} elsif ($last->isa (PPI::Token::Pod::)) {
121	0						$last->delete;
122							} else {
123	0						last;
124							}
125
126	0						$last = $prev;
127							}
128
129							# prune some but not all insignificant whitespace
130	0						for my $ws (@{ $doc->find (PPI::Token::Whitespace::) }) {
	0
131	0						my $prev = $ws->previous_token;
132	0						my $next = $ws->next_token;
133
134	0	0	0				if (!$prev \|\| !$next) {
135	0						$ws->delete;
136							} else {
137	0	0	0				if (
		0	0
			0
			0
			0
			0
			0
			0
138							$next->isa (PPI::Token::Operator::) && $next->{content} =~ /^(?:,\|=\|!\|!=\|==\|=>)$/ # no ., because of digits. == float
139							or $prev->isa (PPI::Token::Operator::) && $prev->{content} =~ /^(?:,\|=\|\.\|!\|!=\|==\|=>)$/
140							or $prev->isa (PPI::Token::Structure::)
141							or ($self->{optimise_size} &&
142							($prev->isa (PPI::Token::Word::)
143							&& (PPI::Token::Symbol:: eq ref $next
144							\|\| $next->isa (PPI::Structure::Block::)
145							\|\| $next->isa (PPI::Structure::List::)
146							\|\| $next->isa (PPI::Structure::Condition::)))
147							)
148							) {
149	0						$ws->delete;
150							} elsif ($prev->isa (PPI::Token::Whitespace::)) {
151	0						$ws->{content} = ' ';
152	0						$prev->delete;
153							} else {
154	0						$ws->{content} = ' ';
155							}
156							}
157							}
158
159							# prune whitespace around blocks, also ";" at end of blocks
160	0	0					if ($self->{optimise_size}) {
161							# these usually decrease size, but decrease compressability more
162	0						for my $struct (PPI::Structure::Block::, PPI::Structure::Condition::, PPI::Structure::List::) {
163	0						for my $node (@{ $doc->find ($struct) }) {
	0
164	0						my $n1 = $node->first_token;
165							# my $n2 = $n1->previous_token;
166	0						my $n3 = $n1->next_token;
167	0	0					$n1->delete if $n1->isa (PPI::Token::Whitespace::);
168							# $n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::); # unsafe! AE::timer $MAX_SIGNAL_LATENCY -($NOW - int$NOW)
169	0	0	0				$n3->delete if $n3 && $n3->isa (PPI::Token::Whitespace::);
170	0						my $n1 = $node->last_token;
171	0						my $n2 = $n1->next_token;
172	0						my $n3 = $n1->previous_token;
173	0	0					$n1->delete if $n1->isa (PPI::Token::Whitespace::);
174	0	0	0				$n2->delete if $n2 && $n2->isa (PPI::Token::Whitespace::);
175	0	0	0				$n3->{content} = "" # delete seems to trigger a bug inside PPI
			0
176							if $n3 && ($n3->isa (PPI::Token::Whitespace::)
177							\|\| ($n3->isa (PPI::Token::Structure::) && $n3->content eq ";"));
178							}
179							}
180							}
181
182							# foreach => for
183	0						for my $node (@{ $doc->find (PPI::Statement::Compound::) }) {
	0
184	0	0					if (my $n = $node->first_token) {
185	0	0	0				$n->{content} = "for" if $n->{content} eq "foreach" && $n->isa (PPI::Token::Word::);
186							}
187							}
188
189							# reformat qw() lists which often have lots of whitespace
190	0						for my $node (@{ $doc->find (PPI::Token::QuoteLike::Words::) }) {
	0
191	0	0					if ($node->{content} =~ /^qw(.)(.*)(.)$/s) {
192	0						my ($a, $qw, $b) = ($1, $2, $3);
193	0						$qw =~ s/^\s+//;
194	0						$qw =~ s/\s+$//;
195	0						$qw =~ s/\s+/ /g;
196	0						$node->{content} = "qw$a$qw$b";
197							}
198							}
199
200							# prune return at end of sub-blocks
201							#TODO:
202							# PPI::Document
203							# PPI::Statement::Sub
204							# PPI::Token::Word 'sub'
205							# PPI::Token::Whitespace ' '
206							# PPI::Token::Word 'f'
207							# PPI::Structure::Block { ... }
208							# PPI::Statement
209							# PPI::Token::Word 'sub'
210							# PPI::Structure::Block { ... }
211							# PPI::Statement::Break
212							# PPI::Token::Word 'return'
213							# PPI::Token::Whitespace ' '
214							# PPI::Token::Number '5'
215							# PPI::Token::Structure ';'
216							# PPI::Statement::Compound
217							# PPI::Structure::Block { ... }
218							# PPI::Statement::Break
219							# PPI::Token::Word 'return'
220							# PPI::Token::Whitespace ' '
221							# PPI::Token::Number '8'
222							# PPI::Statement::Break
223							# PPI::Token::Word 'return'
224							# PPI::Token::Whitespace ' '
225							# PPI::Token::Number '7'
226
227							1
228	0						}
229
230							=item $perl = $transform->strip ($perl)
231
232							Strips the perl source in C<$perl> and returns the stripped source.
233
234							=cut
235
236							sub strip {
237	0			0	1		my ($self, $src) = @_;
238
239							my $filter = sub {
240	0	0		0			my $ppi = new PPI::Document \$src
241							or return;
242
243	0	0					$self->document ($ppi)
244							or return;
245
246	0						$src = $ppi->serialize;
247	0						};
248
249	0	0	0				if (exists $self->{cache} && (2048 <= length $src)) {
250	0						my $file = "$self->{cache}/" . Digest::MD5::md5_hex "$CACHE_VERSION \n" . (!!$self->{optimise_size}) . "\n\x00$src";
251
252	0	0					if (open my $fh, "<:perlio", $file) {
253							# zero size means unchanged
254	0	0					if (-s $fh) {
255	0						local $/;
256	0						$src = <$fh>
257							}
258							} else {
259	0						my $oldsrc = $src;
260
261	0						$filter->();
262
263	0						mkdir $self->{cache};
264
265	0	0					if (open my $fh, ">:perlio", "$file~") {
266							# write a zero-byte file if source is unchanged
267	0	0	0				if ($oldsrc eq $src or (syswrite $fh, $src) == length $src) {
268	0						close $fh;
269	0						rename "$file~", $file;
270							}
271							}
272							}
273							} else {
274	0						$filter->();
275							}
276
277	0						$src
278							}
279
280							=back
281
282							=head1 SEE ALSO
283
284							L, L.
285
286							=head1 AUTHOR
287
288							Marc Lehmann
289							http://home.schmorp.de/
290
291							=cut
292
293							1;
294