File Coverage

blib/lib/JavaScript/Squish.pm

Criterion	Covered	Total	%
statement	254	304	83.5
branch	89	144	61.8
condition	31	54	57.4
subroutine	20	23	86.9
pod	19	19	100.0
total	413	544	75.9

line	stmt	bran	cond	sub	pod	time	code
1
2							package JavaScript::Squish;
3
4							=head1 NAME
5
6							JavaScript::Squish - Reduce/Compact JavaScript code to as few characters as possible.
7
8							=head1 SYNOPSIS
9
10							use JavaScript::Squish;
11							my $compacted = JavaScript::Squish->squish(
12							$javascript,
13							remove_comments_exceptions => qr/copyright/i )
14							or die $JavaScript::Squish::err_msg;
15
16							# OR, to just do a few steps #
17
18							my $c = JavaScript::Squish->new();
19							$c->data( $javascript );
20							$c->extract_strings_and_comments();
21							$c->replace_white_space();
22							my $new = $c->data();
23
24							=head1 DESCRIPTION
25
26							This module provides methods to compact javascript source down to just what is needed. It can remove all comments, put everything on one line (semi-)safely, and remove extra whitespace.
27
28							Any one of the various compacting techniques can be applied individually, or with in any group.
29
30							It also provides a means by which to extract all text literals or comments in separate arrays in the order they appear.
31
32							Since JavaScript eats up bandwidth, this can be very helpful, and you can then be free to properly comment your JavaScript without fear of burning up too much bandwidth.
33
34							=head2 EXPORT
35
36							None by default.
37
38							"squish" may be exported via "use JavaScript::Squish qw(squish);"
39
40							=head1 METHODS
41
42							=head2 Bsquish($js [, %options] )>
43
44							Class method. This is a wrapper around all methods in here, to allow you to do all compacting operations in one call.
45
46							my $squished = JavaScript::Squish->squish( $javascript );
47
48							Current supported options:
49
50							=over
51
52							=item remove_comments_exceptions : array ref of regexp's
53
54							Bsquish($js, remove_comments_exceptions =E [ qr/copyright/i ] )>
55
56							Any comment strings matching any of the supplied regexp's will not be removed. This is the recommended way to retain copyright notices, while still compacting out all other comments.
57
58							=back
59
60							=head2 Bnew()>
61
62							Constructor. Currently takes no options. Returns JavaScript::Squish object.
63
64							NOTE: if you want to specify a "remove_comments_exceptions" option via one of these object, you must do so directly against the C method (SEE BELOW).
65
66							=head2 B<$djc-Edata($js)>
67
68							If the option C<$js> is passed in, this sets the javascript that will be worked on.
69
70							If not passed in, this returns the javascript in whatever state it happens to be in (so you can step through, and pull the data out at any time).
71
72							=head2 B<$djc-Estrings()>
73
74							Returns all strings extracted by either C or C (NOTE: be sure to call one of the aforementioned extract methods prior to C, or you won't get anything back).
75
76							=head2 B<$djc-Ecomments()>
77
78							Returns all comments extracted by either C or C (NOTE: be sure to call one of the aforementioned extract methods prior to C, or you won't get anything back).
79
80							=head2 B<$djc-Edetermine_line_ending()>
81
82							Method to automatically determine the line ending character in the source data.
83
84							=head2 B<$djc-Eeol_char("\n")>
85
86							Method to set/override the line ending character which will be used to parse/join lines. Set to "\r\n" if you are working on a DOS / Windows formatted file.
87
88							=head2 B<$djc-Eextract_strings_and_comments()>
89
90							Finds all string literals (eg. things in quotes) and comments (// or /.../) and replaces them with tokens of the form "\0\0N\0\0" and "\0\0_N_\0\0" respectively, where N is the occurrance number in the file, and \0 is the null byte. The strings are stored inside the object so they may be resotred later.
91
92							After calling this, you may retrieve a list of all extracted strings or comments using the C or C methods.
93
94							=head2 B<$djc-Eextract_literal_strings()>
95
96							This is a wrapper around C, which will restore all comments afterwards (if they had not been stripped prior to its call).
97
98							NOTE: sets C<$djc-Estrings()>
99
100							=head2 B<$djc-Eextract_comments()>
101
102							This is a wrapper around C, which will restore all literal strings afterwards (if they had not been stripped prior to its call).
103
104							NOTE: sets C<$djc-Ecomments()>
105
106							=head2 B<$djc-Ereplace_white_space()>
107
108							Per each line:
109
110							=over
111
112							=item * Removes all begining of line whitespace.
113
114							=item * Removes all end of line whitespace.
115
116							=item * Combined all series of whitespace into one space character (eg. s/\s+/ /g)
117
118							=back
119
120							Comments and string literals (if still embeded) are untouched.
121
122							=head2 B<$djc-Eremove_blank_lines()>
123
124							...does what it says.
125
126							Comments and string literals (if still embeded) are untouched.
127
128							=head2 B<$djc-Ecombine_concats()>
129
130							Removes any string literal concatenations. Eg.
131
132							"bob and " + "sam " + someVar;
133
134							Becomes:
135
136							"bob and sam " + someVar
137
138							Comments (if still embeded) are untouched.
139
140							=head2 B<$djc-Ejoin_all()>
141
142							Puts everything on one line.
143
144							Coments begining with "//", if still embeded, are the exception, as they require a new line character at the end of the comment.
145
146							=head2 B<$djc-Ereplace_extra_whitespace()>
147
148							This removes any excess whitespace. Eg.
149
150							if (someVar = "foo") {
151
152							Becomes:
153
154							if(someVar="foo"){
155
156							Comments and string literals (if still embeded) are untouched.
157
158							=head2 B<$djc-Eremove_comments(%options)>
159
160							Current supported options:
161
162							=over
163
164							=item exceptions : array ref of regexp's
165
166							B<$djc-Eremove_comments( exceptions =E [ qr/copyright/i ] )>
167
168							Any comment strings matching any of the supplied regexp's will not be removed. This is the recommended way to retain copyright notices, while still compacting out all other comments.
169
170							=back
171
172							NOTE: this is destructive (ie. you cannot restore comments after this has been called).
173
174							=head2 B<$djc-Erestore_comments()>
175
176							All comments that were extracted with C<$djc-Eextract_strings_and_comments()> or C<$djc-Eextract_comments()> are restored. Comments retain all spacing and extra lines and such.
177
178							=head2 B<$djc-Erestore_literal_strings()>
179
180							All string literals that were extracted with C<$djc-Eextract_strings_and_comments()> or C<$djc-Eextract_comments()> are restored. String literals retain all spacing and extra lines and such.
181
182							=head2 B<$djc-Ereplace_final_eol()>
183
184							Prior to this being called, the end of line may not terminated with a new line character (especially after some of the steps above). This assures the data ends in at least one of whatever is set in C<$djc-Eeol_char()>.
185
186							=head1 NOTES
187
188							The following should only cause an issue in rare and odd situations... If the input file is in dos format (line termination with "\r\n" (ie. CR LF / Carriage return Line feed)), we'll attempt to make the output the same. If you have a mixture of embeded "\r\n" and "\n" characters (not escaped, those are still safe) then this script may get confused and make them all conform to whatever is first seen in the file.
189
190							The line-feed stripping isn't as thorough as it could be. It matches the behavior of JSMIN, and goes one step better with replace_extra_whitespace(), but I'm certain there are edge cases that could be optimised further. This shouldn't cause a noticable increase in size though.
191
192							=head1 TODO
193
194							Function and variable renaming, and other more dangerous compating techniques.
195
196							Currently, JavaScript::Squish::err_msg never gets set, as we die on any real errors. We should look into returning proper error codes and setting this if needed.
197
198							Fix Bugs :-)
199
200							=head1 BUGS
201
202							There are a few bugs, which may rear their head in some minor situations.
203
204							=over
205
206							=item Statements not terminated by semi-colon.
207
208							These should be ok now - leaving a note here because this hasn't been thoroughly tested (I don't have any javascript to test with that meets this criteria).
209
210							This would affect statements like the following:
211
212							i = 5.4
213							j = 42
214
215							This used to become "i=5.4 j=42", and would generate an error along the lines of "expected ';' before statement".
216
217							The linebreak should be retained now. Please let me know if you see otherwise.
218
219							=item Ambiguous operator precidence
220
221							Operator precidence may get screwed up in ambiguous statements. Eg. "x = y + ++b;" will be compacted into "x=y+++b;", which means something different.
222
223							=back
224
225							Still looking for them. If you find some, let us know.
226
227							=head1 SEE ALSO
228
229							=over
230
231							=item Latest releases, bugzilla, cvs repository, etc:
232
233							https://developer.berlios.de/projects/jscompactor/
234
235							=item Simlar projects:
236
237							http://crockford.com/javascript/jsmin
238							http://search.cpan.org/%7Epmichaux/JavaScript-Minifier/lib/JavaScript/Minifier.pm
239							http://dojotoolkit.org/docs/shrinksafe
240							http://dean.edwards.name/packer/
241
242							=back
243
244							=head1 AUTHOR
245
246							Joshua I. Miller
247
248							=head1 COPYRIGHT AND LICENSE
249
250							Copyright (c) 2005 by CallTech Communications, Inc.
251
252							This library is free software; you can redistribute it and/or modify
253							it under the same terms as Perl itself, either Perl version 5.8.3 or,
254							at your option, any later version of Perl 5 you may have available.
255
256							=cut
257
258	3			3		27560	use 5.00503;
	3					13
	3					167
259	3			3		17	use strict;
	3					4
	3					142
260	3			3		18	use Carp qw(croak carp);
	3					10
	3					228
261
262							require Exporter;
263	3			3		22	use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
	3					4
	3					10647
264							@ISA = qw(Exporter);
265
266							%EXPORT_TAGS = ( 'all' => [ qw( squish ) ] );
267
268							@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
269
270							@EXPORT = qw( );
271
272							$VERSION = '0.07';
273
274							sub squish
275							{
276	1			1	1	1801	my $this = shift;
277
278							# squish() can be used as a class method or instance method
279	1	50				6	unless (ref $this)
280							{
281	1					6	$this = $this->new();
282							}
283
284							{
285	1	50				3	my $data = (ref($_[0]) eq 'SCALAR') ? ${(shift)} : shift;
	1					5
	0					0
286	1					6	$this->data($data);
287							}
288	1	50				6	my %opts = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
	0					0
289
290							# determine line ending
291	1	50				7	print STDERR "Determining line ending format (LF \|\| CRLF)...\n" if $opts{DEBUG};
292	1					5	$this->determine_line_ending();
293
294							# extract literal strings and comments
295	1	50				4	print STDERR "Replacing literal strings and comments...\n" if $opts{DEBUG};
296	1					5	$this->extract_strings_and_comments();
297
298							# remove comments
299	1	50				5	print STDERR "Removing comments...\n" if $opts{DEBUG};
300	1					4	my %rc_opts = ();
301	1	50				4	$rc_opts{exceptions} = $opts{remove_comments_exceptions} if $opts{remove_comments_exceptions};
302	1					8	$this->remove_comments(%rc_opts);
303
304							# replace white space
305	1	50				4	print STDERR "Replacing white space...\n" if $opts{DEBUG};
306	1					11	$this->replace_white_space();
307
308							# remove blank lines
309	1	50				7	print STDERR "Removing blank lines...\n" if $opts{DEBUG};
310	1					4	$this->remove_blank_lines();
311
312							# combine literal string concatenators
313	1	50				4	print STDERR "Combining literal string concatenators...\n" if $opts{DEBUG};
314	1					5	$this->combine_concats();
315
316							# join all lines
317	1	50				3	print STDERR "Joining all lines...\n" if $opts{DEBUG};
318	1					4	$this->join_all();
319
320							# replace extra extra whitespace
321	1	50				19	print STDERR "Replacing extra extra whitespace...\n" if $opts{DEBUG};
322	1					4	$this->replace_extra_whitespace();
323
324							# restore literals
325	1	50				3	print STDERR "Restoring all literal strings...\n" if $opts{DEBUG};
326	1					5	$this->restore_literal_strings();
327
328							# replace final EOL
329	1	50				4	print STDERR "Replace final EOL...\n" if $opts{DEBUG};
330	1					4	$this->replace_final_eol();
331
332	1					3	return $this->data;
333							}
334
335							sub new
336							{
337	2			2	1	2354	my $proto = shift;
338	2		33			18	my $class = ref($proto) \|\| $proto;
339
340	2					17	my $this = {
341							data => '',
342							strings => [ ],
343							comments => [ ],
344							eol => "\n",
345							_strings_extracted => 0, # status var
346							_comments_extracted => 0, # status var
347							};
348	2					8	bless $this, $class;
349
350	2					6	return $this;
351							}
352
353							sub data
354							{
355	61			61	1	693	my $this = shift;
356	61	100				99	if ($_[0]) {
357	23	50				58	my $data = (ref($_[0]) eq 'SCALAR') ? ${$_[0]} : $_[0];
	0					0
358	23					84	$this->{data} = $_[0];
359							} else {
360	38					478	return $this->{data};
361							}
362							}
363
364							sub strings
365							{
366	12			12	1	17	my $this = shift;
367	12	100				24	if ($_[0]) {
368	6					13	$this->{strings} = $_[0];
369							} else {
370	6					15	return $this->{strings};
371							}
372							}
373
374							sub comments
375							{
376	12			12	1	15	my $this = shift;
377	12	100				24	if ($_[0]) {
378	6					15	$this->{comments} = $_[0];
379							} else {
380	6					15	return $this->{comments};
381							}
382							}
383
384							sub eol_char
385							{
386	163			163	1	712	my $this = shift;
387	163	100				243	if ($_[0]) {
388	4					22	$this->{eol} = $_[0];
389							} else {
390	159					632	return $this->{eol};
391							}
392							}
393
394							sub determine_line_ending
395							{
396	2			2	1	191	my $this = shift;
397
398							# Where is the first LF character?
399	2					8	my $lf_position = index($this->data, "\n");
400	2	50				10	if ($lf_position == -1)
401							{ # not found, set to default, cause it won't (shouldn't) matter
402	0					0	$this->eol_char("\n");
403							} else {
404	2	50				11	if ($lf_position == 0)
405							{ # found at first char, so there is no prior character to observe
406	0					0	$this->eol_char("\n");
407							} else {
408							# Is the character immediately before it a CR?
409	2					7	my $test_cr = substr($this->data, ($lf_position -1),1);
410	2	50				9	if ($test_cr eq "\r")
411							{
412	0					0	$this->eol_char("\r\n");
413							} else {
414	2					8	$this->eol_char("\n");
415							}
416							}
417							}
418							}
419
420							# extract_literal_strings() - wrapper around extract_strings_and_comments
421							sub extract_literal_strings
422							{
423	0			0	1	0	my $this = shift;
424
425							# don't do it twice...
426	0	0				0	return if $this->{_strings_extracted};
427
428							# save state of comments
429	0					0	my $comment_state = $this->{_comments_extracted};
430
431	0					0	$this->extract_strings_and_comments();
432							# only restore comments if they weren't extraced when we started
433	0	0				0	$this->restore_comments() unless $comment_state;
434							}
435
436							# extract_comments() - wrapper around extract_strings_and_comments
437							sub extract_comments
438							{
439	0			0	1	0	my $this = shift;
440
441							# don't do it twice...
442	0	0				0	return if $this->{_comments_extracted};
443
444							# save state of strings
445	0					0	my $string_state = $this->{_strings_extracted};
446
447	0					0	$this->extract_strings_and_comments();
448							# only restore strings if they weren't extraced when we started
449	0	0				0	$this->restore_literal_strings() unless $string_state;
450							}
451
452							sub extract_strings_and_comments
453							{
454	3			3	1	180	my $this = shift;
455
456							# SAFETY CHECKS
457							# Can't extract strings twice, as the keep the
458							# quotes in the original when we extract them
459	3	100				13	if ($this->{_strings_extracted}) {
460	1					4	$this->restore_literal_strings();
461							}
462							# Restore comments, so that we still get them
463							# in the cache (this could be optimized out)
464							# NOTE: if they had called remove_comments(), then we'll
465							# officially lose all our history of comments here.
466	3	50				14	if ($this->{_comments_extracted}) {
467	0					0	$this->restore_comments();
468							}
469
470							# reset the instance variables caching strings and comments:
471	3					10	$this->strings([]);
472	3					32	$this->comments([]);
473							# where we'll store the literals
474	3					10	my $strings = $this->strings();
475							# where we'll store the comments
476	3					8	my $comments = $this->comments();
477
478	3					4	my ($escaped, $quoteChar, $inQuote);
479
480	3					5	my $lastnws = ''; # last non-whitespace character
481	3					27	my $literal = ""; # literal strings we're building
482	3					5	my $t = ""; # replacement text
483
484	3					8	my @lines = split(/\r?\n/, $this->data); # dos or unix... output is unix
485							# step through each line
486	3					19	LINE: for (my $i=0; $i<@lines; $i++)
487							{
488							# step through each character
489	141					387	LINE_CHAR: for (my $j=0; $j
490							{
491	1967					2521	my $c = substr($lines[$i],$j,1);
492	1967					2170	my $c2 = substr($lines[$i],$j,2);
493							# look for start of string (if not in one)
494	1967	100				2614	if (! $inQuote)
495							{
496							# double-slash comments
497	1427	100	100			8254	if ($c2 eq "//") {
		100	100
		100	66
498	16					32	my $comment = substr($lines[$i],$j);
499	16					17	my $key_num = scalar(@{$comments});
	16					22
500	16					32	$t .= "\0\0".'_'.$key_num.'_'."\0\0";
501	16					38	$t .= $this->eol_char();
502	16					18	push(@{$comments}, $comment);
	16					41
503	16					54	next LINE;
504
505							# slash-star comments
506							} elsif ($c2 eq "/*") {
507	13					16	my $comment = "/*";
508	13					17	my $comstart = $j+2;
509	13					14	my $found_end = 0;
510	13					31	COMM_SEARCH1: for (my $k=($j+2); $k
511							{
512	359					383	my $end = substr($lines[$i],$k,2);
513	359	100				926	if ($end eq "*/") {
514	8					30	$comment .= substr($lines[$i],$comstart,($k+2 - $comstart));
515	8					9	$j = $k+1;
516	8					8	$found_end = 1;
517							#next LINE_CHAR;
518	8					12	last COMM_SEARCH1;
519							}
520							}
521
522	13	100				31	if (! $found_end)
523							{
524	5					14	$comment .= substr($lines[$i],$comstart).$this->eol_char();
525	5					22	COMM_SEARCH2: for (my $l=($i+1); $l<@lines; $l++)
526							{
527	11					30	for (my $k=0; $k
528							{
529	321					375	my $end = substr($lines[$l],$k,2);
530	321	100				781	if ($end eq "*/") {
531	5					10	$comment .= substr($lines[$l],0,$k+2);
532	5					6	$i = $l;
533	5					6	$j = $k+1;
534	5					7	$found_end = 1;
535							#next LINE_CHAR;
536	5					13	last COMM_SEARCH2;
537							}
538							}
539	6					14	$comment .= $lines[$l].$this->eol_char();
540							}
541							}
542	13	50				23	if (! $found_end)
543							{
544	0					0	die "Unterminated /* */ style comment found around line[$i]\n";
545							} else {
546	13					13	my $key_num = scalar(@{$comments});
	13					22
547	13					28	$t .= "\0\0".'_'.$key_num.'_'."\0\0";
548							#$t .= $this->eol_char();
549	13					15	push(@{$comments}, $comment);
	13					25
550	13					43	next LINE_CHAR;
551							}
552
553							# standard quoted strings, and bare regex's
554							# "/" is considered division if it's preceeded by: )._$\ or alphanum
555							} elsif ( $c eq '"' \|\| $c eq "'" \|\|
556							($c eq '/' && $lastnws !~ /[\)\.a-zA-Z0-9_\$\\]/) ) {
557	72					87	$inQuote = 1;
558	72					92	$escaped = 0;
559	72					106	$quoteChar = $c;
560	72					70	$t .= $c;
561	72					73	$literal = '';
562	72	50				253	$lastnws = $c unless $c =~ /\s/;
563
564							# standard code
565							} else {
566	1326					1267	$t .= $c;
567	1326	100				4763	$lastnws = $c unless $c =~ /\s/;
568							}
569
570							# else we're in a quote
571							} else {
572	540	100	66			1783	if ($c eq $quoteChar && !$escaped)
		50	33
573							{
574	72					73	$inQuote = 0;
575	72					67	my $key_num = scalar(@{$strings});
	72					88
576	72					119	$t .= "\0\0".$key_num."\0\0";
577	72					70	$t .= $c;
578	72					63	push(@{$strings}, $literal);
	72					149
579	72	50				276	$lastnws = $c unless $c =~ /\s/;
580
581							} elsif ($c eq "\\" && !$escaped) {
582	0					0	$escaped = 1;
583	0					0	$literal .= $c;
584	0	0				0	$lastnws = $c unless $c =~ /\s/;
585							} else {
586	468					431	$escaped = 0;
587	468					463	$literal .= $c;
588	468	100				1587	$lastnws = $c unless $c =~ /\s/;
589							}
590							}
591							}
592	125	100				181	if ($inQuote) {
593	6					17	$literal .= $this->eol_char();
594							} else {
595	119					208	$t .= $this->eol_char();
596							}
597							}
598
599	3					11	$this->{_comments_extracted} = 1;
600	3					7	$this->{_strings_extracted} = 1;
601	3					12	$this->comments($comments);
602	3					10	$this->strings($strings);
603	3					12	$this->data($t);
604							}
605
606							sub replace_white_space
607							{
608	2			2	1	455	my $this = shift;
609
610							# can't do this if literal strings are still in the thing.
611	2					6	my $string_state = $this->{_strings_extracted};
612	2					4	my $comment_state = $this->{_comments_extracted};
613	2	50	33			557	unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
614	0					0	$this->extract_strings_and_comments();
615							}
616
617	2					7	my @lines = split(/\r?\n/, $this->data);
618
619							# condense white space
620	2					10	foreach (@lines)
621							{
622	90					280	s/\s+/\ /g;
623	90					159	s/^\s//;
624	90					162	s/\s$//;
625							}
626
627	2					9	$this->data( join($this->eol_char(), @lines) );
628
629							# restore strings/comments if needed
630	2	50				6	unless ($string_state) {
631	0					0	$this->restore_literal_strings();
632							}
633	2	50				14	unless ($comment_state) {
634	0					0	$this->restore_comments();
635							}
636							}
637
638							sub remove_blank_lines
639							{
640	2			2	1	166	my $this = shift;
641
642							# can't do this if literal strings are still in the thing.
643	2					6	my $string_state = $this->{_strings_extracted};
644	2					3	my $comment_state = $this->{_comments_extracted};
645	2	50	33			15	unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
646	0					0	$this->extract_strings_and_comments();
647							}
648
649	2					7	my @lines = split(/\r?\n/, $this->data);
650	2					29	my @new_lines = ();
651	2					5	foreach (@lines)
652							{
653	90	100				204	next if /^\s*$/;
654	50					106	push(@new_lines,$_);
655
656							}
657
658	2					8	$this->data( join($this->eol_char(), @new_lines) );
659
660							# restore strings/comments if needed
661	2	50				7	unless ($string_state) {
662	0					0	$this->restore_literal_strings();
663							}
664	2	50				16	unless ($comment_state) {
665	0					0	$this->restore_comments();
666							}
667							}
668
669							sub combine_concats
670							{
671	2			2	1	155	my $this = shift;
672
673							# can't do this if literal strings are still in the thing.
674	2					4	my $string_state = $this->{_strings_extracted};
675	2					4	my $comment_state = $this->{_comments_extracted};
676	2	50	33			17	unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
677	0					0	$this->extract_strings_and_comments();
678							}
679
680	2					5	my $data = $this->data;
681							# TODO: currently, we only concat two literals if
682							# they both use the same quote style. Eg.
683							# this: "foo " + "bar" == "foo bar"
684							# not : "foo " + 'bar' == "foo "+'bar'
685							# this just makes things easier to do w/ a regexp, but we should be
686							# able to do the second form as well (can't w/out lookahead and
687							# lookbehind searches).
688	2					29	$data =~ s/(['"])\s?\+\s?\1//g;
689	2					6	$this->data($data);
690
691							# restore strings/comments if needed
692	2	50				6	unless ($string_state) {
693	0					0	$this->restore_literal_strings();
694							}
695	2	50				8	unless ($comment_state) {
696	0					0	$this->restore_comments();
697							}
698							}
699
700							sub join_all
701							{
702	2			2	1	185	my $this = shift;
703
704							# we can't join lines that contain "//" comments
705							# and we can't process unless strings are not there
706
707	2					4	my $string_state = $this->{_strings_extracted};
708	2					4	my $comment_state = $this->{_comments_extracted};
709	2	50	33			22	unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
710	0					0	$this->extract_strings_and_comments();
711							}
712
713	2					3	my $last_eol;
714							my $newdata;
715	2					8	foreach my $line (split(/\r?\n/, $this->data))
716							{
717							# if we have a linebreak between these charsets (not counting spaces/other-newlines)
718							# we retain it so we don't break any code.
719	48					112	my ($first_char) = ($line =~ /^\s*(\S)/);
720	48	100	66			344	if (defined($last_eol) &&
		100	66
			66
			66
721							($last_eol =~ /[a-zA-Z0-9\\\$_}\])+\-"']/ \|\| ord($last_eol) > 126) &&
722							($first_char =~ /[a-zA-Z0-9\\\$_{[(+\-]/ \|\| ord($first_char) > 126) )
723							{
724	6					8	$newdata .= "\n";
725							} elsif (defined $last_eol) {
726	40					43	$newdata .= " ";
727							}
728
729	48					72	$newdata .= $line;
730
731	48	50				274	if ($line =~ /(\S)\s*$/) {
732	48					89	$last_eol = $1;
733							}
734							}
735	2					9	$newdata =~ s/\ $//;
736	2					6	$this->data($newdata);
737
738							# restore comments if they're supposed to be in here
739	2	50				6	unless ($comment_state) {
740	0					0	$this->restore_comments();
741							}
742
743							# restore strings/comments if needed
744	2	50				8	unless ($string_state) {
745	0					0	$this->restore_literal_strings();
746							}
747							}
748
749							sub replace_extra_whitespace
750							{
751	2			2	1	161	my $this = shift;
752
753							# can't do this if literal strings are still in the thing.
754	2					5	my $string_state = $this->{_strings_extracted};
755	2					4	my $comment_state = $this->{_comments_extracted};
756	2	50	33			14	unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
757	0					0	$this->extract_strings_and_comments();
758							}
759
760	2					5	my $data = $this->data;
761							# remove unneccessary white space around operators, braces, parenthesis
762	2					71	$data =~ s/\s([\x21\x25\x26\x28\x29\x2a\x2b\x2c\x2d\x2f\x3a\x3b\x3c\x3d\x3e\x3f\x5b\x5d\x5c\x7b\x7c\x7d\x7e])/$1/g;
763	2					71	$data =~ s/([\x21\x25\x26\x28\x29\x2a\x2b\x2c\x2d\x2f\x3a\x3b\x3c\x3d\x3e\x3f\x5b\x5d\x5c\x7b\x7c\x7d\x7e])\s/$1/g;
764	2					12	$this->data($data);
765
766							# restore strings/comments if needed
767	2	50				7	unless ($string_state) {
768	0					0	$this->restore_literal_strings();
769							}
770	2	50				8	unless ($comment_state) {
771	0					0	$this->restore_comments();
772							}
773							}
774
775							sub remove_comments
776							{
777	3			3	1	513	my $this = shift;
778	3					12	my %opts = @_;
779	3					9	my @exceptions;
780	3	50	66			44	if (ref($opts{exceptions}) eq 'ARRAY') {
		100	66
781	0					0	@exceptions = @{$opts{exceptions}};
	0					0
782							} elsif ( ((ref($opts{exceptions}) eq 'Regexp') \|\| (! ref($opts{exceptions})))
783							&& $opts{exceptions} ) {
784	1					5	@exceptions = ( $opts{exceptions} );
785							}
786
787							# can't do this if literal strings are still in the thing.
788	3					8	my $string_state = $this->{_strings_extracted};
789	3					7	my $comment_state = $this->{_comments_extracted};
790	3	100	66			34	unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
791	1					4	$this->extract_strings_and_comments();
792							}
793
794	3					12	my $comments = $this->comments();
795
796	3					11	my $data = $this->data;
797	3					9	my $exception_caught = 0;
798							# replace each of the comments
799	3					10	for (my $i=0; $i<@{$comments}; $i++)
	32					123
800							{
801	29					37	my $comment = $comments->[$i];
802	29	100				50	if (grep { $comment =~ /$_/ } @exceptions)
	14					70
803							{
804	1					2	$exception_caught++;
805	1					44	$data =~ s/\0\0\_($i)\_\0\0/$comment/g;
806							} else {
807	28					487	$data =~ s/\0\0\_($i)\_\0\0//g;
808							}
809							}
810	3	100				12	$this->{_comments_extracted} = 0 if $exception_caught;
811	3					9	$this->data($data);
812
813							# restore strings if needed
814	3	50				19	unless ($string_state) {
815	0					0	$this->restore_literal_strings();
816							}
817							}
818
819							sub restore_comments
820							{
821	0			0	1	0	my $this = shift;
822
823	0	0				0	return unless $this->{_comments_extracted};
824
825	0					0	my $comments = $this->comments();
826
827	0					0	my $data = $this->data;
828							# replace each of the comments
829	0					0	for (my $i=0; $i<@{$comments}; $i++)
	0					0
830							{
831	0					0	my $comment = $comments->[$i];
832	0					0	$data =~ s/\0\0\_($i)\_\0\0/$comment/g;
833							}
834	0					0	$this->{_comments_extracted} = 0;
835	0					0	$this->data($data);
836							}
837
838							sub restore_literal_strings
839							{
840	3			3	1	149	my $this = shift;
841
842	3	50				585	return unless $this->{_strings_extracted};
843
844	3					9	my $strings = $this->strings();
845
846	3					7	my $data = $this->data;
847							# replace each of the strings
848	3					7	for (my $i=0; $i<@{$strings}; $i++)
	75					160
849							{
850	72					91	my $string = $strings->[$i];
851	72					1049	$data =~ s/\0\0($i)\0\0/$string/g;
852							}
853	3					7	$this->{_strings_extracted} = 0;
854	3					8	$this->data($data);
855							}
856
857							sub replace_final_eol
858							{
859	2			2	1	151	my $this = shift;
860
861	2					6	my $eol = $this->eol_char();
862	2					17	my $data = $this->data;
863	2	50				9	if ($data =~ /\r?\n$/) {
864	0					0	$data =~ s/\r?\n$/$eol/;
865							} else {
866	2					4	$data .= $eol;
867							}
868	2					20	$this->data($data);
869							}
870
871
872
873							1;