line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Lossy; |
2
|
|
|
|
|
|
|
|
3
|
13
|
|
|
13
|
|
396351
|
use 5.008; |
|
13
|
|
|
|
|
53
|
|
|
13
|
|
|
|
|
570
|
|
4
|
13
|
|
|
13
|
|
74
|
use strict; |
|
13
|
|
|
|
|
23
|
|
|
13
|
|
|
|
|
445
|
|
5
|
13
|
|
|
13
|
|
65
|
use warnings; |
|
13
|
|
|
|
|
27
|
|
|
13
|
|
|
|
|
395
|
|
6
|
13
|
|
|
13
|
|
3241
|
use utf8; |
|
13
|
|
|
|
|
49
|
|
|
13
|
|
|
|
|
90
|
|
7
|
|
|
|
|
|
|
|
8
|
13
|
|
|
13
|
|
466
|
use Carp; |
|
13
|
|
|
|
|
26
|
|
|
13
|
|
|
|
|
1369
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 NAME |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Text::Lossy - Lossy text compression |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 VERSION |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Version 0.40.2 |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=cut |
19
|
|
|
|
|
|
|
|
20
|
13
|
|
|
13
|
|
15467
|
use version 0.77; our $VERSION = version->declare('v0.40.2'); |
|
13
|
|
|
|
|
37181
|
|
|
13
|
|
|
|
|
132
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 SYNOPSIS |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use Text::Lossy; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $lossy = Text::Lossy->new; |
28
|
|
|
|
|
|
|
$lossy->add('whitespace'); |
29
|
|
|
|
|
|
|
my $short = $lossy->process($long); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
my $lossy = Text::Lossy->new->add('lower', 'punctuation'); # Chaining usage |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
$lossy->process($long); # In place |
34
|
|
|
|
|
|
|
$lossy->process(); # Filters $_ in place |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 DESCRIPTION |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
C is a collection of text filters for lossy compression. |
39
|
|
|
|
|
|
|
"Lossy compression" changes the data in a way which is irreversible, |
40
|
|
|
|
|
|
|
but results in a smaller file size after compression. One of the best |
41
|
|
|
|
|
|
|
known lossy compression uses is the JPEG image format. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Note that this module does not perform the actual compression itself, |
44
|
|
|
|
|
|
|
it merely changes the text so that it may be compressed better. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head2 Alpha software |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
This code is currently B. Anything can and will change, |
49
|
|
|
|
|
|
|
most likely in a backwards-incompatible manner. You have been warned. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 Usage |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
C uses an object oriented interface. You create a new |
54
|
|
|
|
|
|
|
C object, set the filters you wish to use (described below), |
55
|
|
|
|
|
|
|
and call the L method on the object. You can call this |
56
|
|
|
|
|
|
|
method as often as you like. In addition, there is a method which produces |
57
|
|
|
|
|
|
|
a closure, an anonymous subroutine, that acts like the process method on |
58
|
|
|
|
|
|
|
the given object. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head2 Adding new filters |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
New filters can be added with the L class method. |
63
|
|
|
|
|
|
|
Each filter is a subroutine which takes a single string and returns this |
64
|
|
|
|
|
|
|
string filtered. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=cut |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
our %filtermap; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head1 CONSTRUCTORS |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head2 new |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
my $lossy = Text::Lossy->new(); |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
The constructor for a new lossy text compressor. The constructor is quite |
77
|
|
|
|
|
|
|
light-weight; the only purpose of a compressor object is to accept and remember |
78
|
|
|
|
|
|
|
a sequence of filters to apply to text. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
The constructor takes no arguments. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=cut |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub new { |
85
|
17
|
|
|
17
|
1
|
692
|
my $class = shift; |
86
|
17
|
|
|
|
|
71
|
my $self = { |
87
|
|
|
|
|
|
|
filters => [], |
88
|
|
|
|
|
|
|
}; |
89
|
17
|
|
|
|
|
113
|
return bless $self, $class; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head1 METHODS |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=head2 process |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
my $new_text = $lossy->process( $old_text ); |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
This method takes a single text string, applies all the selected filters |
99
|
|
|
|
|
|
|
to it, and returns the filtered string. Filters are selected via |
100
|
|
|
|
|
|
|
L; see L. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The text is upgraded to character semantics via a call to |
103
|
|
|
|
|
|
|
C, see L. This will not change the text you passed |
104
|
|
|
|
|
|
|
in, nor should it have too surprising an effect on the output. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
If no text is passed in, nothing is returned (the empty list or C, |
107
|
|
|
|
|
|
|
depending on context). |
108
|
|
|
|
|
|
|
If an explicit C is passed in, an explicit C is returned, even in |
109
|
|
|
|
|
|
|
list context. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=cut |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub process { |
114
|
46
|
|
|
46
|
1
|
2458
|
my ($self, $text) = @_; |
115
|
46
|
100
|
|
|
|
143
|
return unless @_ > 1; |
116
|
44
|
100
|
|
|
|
138
|
return undef unless defined $text; |
117
|
42
|
|
|
|
|
111
|
utf8::upgrade($text); |
118
|
42
|
|
|
|
|
50
|
foreach my $f (@{$self->{'filters'}}) { |
|
42
|
|
|
|
|
245
|
|
119
|
47
|
|
|
|
|
127
|
$text = $f->{'code'}->($text); |
120
|
|
|
|
|
|
|
} |
121
|
42
|
|
|
|
|
5954
|
return $text; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head2 add |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
$lossy->add( 'lower', 'whitespace' ); |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
This method takes a list of filter names and adds them to the filter list |
129
|
|
|
|
|
|
|
of the filter object, in the order given. This allows a programmatic |
130
|
|
|
|
|
|
|
selection of filters, for example via command line. Returns the object |
131
|
|
|
|
|
|
|
for method chaining. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
If the filter is unknown, an exception is thrown. This may happen when you |
134
|
|
|
|
|
|
|
misspell the name, or forgot to use a module which registers the filter, |
135
|
|
|
|
|
|
|
or forgot to register it yourself. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=cut |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub add { |
140
|
23
|
|
|
23
|
1
|
145
|
my ($self, @filters) = @_; |
141
|
23
|
|
|
|
|
54
|
foreach my $name (@filters) { |
142
|
31
|
|
|
|
|
70
|
my $code = $filtermap{$name}; |
143
|
31
|
100
|
|
|
|
94
|
if (not $code) { |
144
|
2
|
|
|
|
|
57
|
croak "Unknown filter $name (did you forget to use the right module?)"; |
145
|
|
|
|
|
|
|
} |
146
|
29
|
|
|
|
|
41
|
push @{$self->{'filters'}}, { code => $code, name => $name }; |
|
29
|
|
|
|
|
181
|
|
147
|
|
|
|
|
|
|
} |
148
|
21
|
|
|
|
|
68
|
return $self; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head2 clear |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
$lossy->clear(); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Remove the filters from the filter object. The object will behave as |
156
|
|
|
|
|
|
|
if newly constructed. Returns the object for method chaining. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=cut |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub clear { |
161
|
3
|
|
|
3
|
1
|
9
|
my ($self) = @_; |
162
|
3
|
|
|
|
|
7
|
@{$self->{'filters'}} = (); |
|
3
|
|
|
|
|
15
|
|
163
|
3
|
|
|
|
|
9
|
return $self; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head2 list |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
my @names = $lossy->list(); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
List the filters added to this object, in order. The names (not the |
171
|
|
|
|
|
|
|
code) are returned in a list. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub list { |
176
|
5
|
|
|
5
|
1
|
12
|
my ($self) = @_; |
177
|
5
|
|
|
|
|
7
|
return map $_->{'name'}, @{$self->{'filters'}}; |
|
5
|
|
|
|
|
51
|
|
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 as_coderef |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
my $code = $lossy->as_coderef(); |
183
|
|
|
|
|
|
|
$new_text = $code->( $old_text ); |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Returns a code reference that closes over the object. This code reference |
186
|
|
|
|
|
|
|
acts like a bound L method on the constructed object. It |
187
|
|
|
|
|
|
|
can be used in places like L that expect a code reference that |
188
|
|
|
|
|
|
|
filters text. |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
The code reference is bound to the object, not a particular object state. |
191
|
|
|
|
|
|
|
Adding filters to the object after calling C will also change |
192
|
|
|
|
|
|
|
the behaviour of the code reference. |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=cut |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub as_coderef { |
197
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
198
|
|
|
|
|
|
|
return sub { |
199
|
0
|
|
|
0
|
|
0
|
return $self->process(@_); |
200
|
|
|
|
|
|
|
} |
201
|
0
|
|
|
|
|
0
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=head1 FILTERS |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
The following filters are defined by this module. Other modules may define |
206
|
|
|
|
|
|
|
more filters. |
207
|
|
|
|
|
|
|
Each of these filters can be added to the set via the L method. |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head2 lower |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Corresponds exactly to the L builtin in Perl, up |
212
|
|
|
|
|
|
|
to and including its Unicode handling. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=cut |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub lower { |
217
|
14
|
|
|
14
|
1
|
18
|
my ($text) = @_; |
218
|
14
|
|
|
|
|
157
|
return lc($text); |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=head2 whitespace |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Collapses any whitespace (C<\s> in regular expressions) to a single space, C. |
224
|
|
|
|
|
|
|
Whitespace at the beginning of the text is stripped completely. Whitespace at the end |
225
|
|
|
|
|
|
|
is also collapsed to a single space, to help separate lines. Text consisting only |
226
|
|
|
|
|
|
|
of whitespace results in an empty string. |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=cut |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub whitespace { |
231
|
8
|
|
|
8
|
1
|
12
|
my ($text) = @_; |
232
|
8
|
|
|
|
|
48
|
$text =~ s{ \s+ }{ }xmsg; |
233
|
|
|
|
|
|
|
# the above line also works for the end of the text |
234
|
8
|
|
|
|
|
26
|
$text =~ s{ \A \s+ }{}xms; |
235
|
8
|
|
|
|
|
20
|
return $text; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=head2 whitespace_nl |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
A variant of the L filter that leaves newlines on the end of the text |
241
|
|
|
|
|
|
|
alone. Other whitespace at the end will get collapsed into a single newline. |
242
|
|
|
|
|
|
|
If the text ends in whitespace that does not contain a new line, it is replaced |
243
|
|
|
|
|
|
|
by a space, as before. |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
This filter is most useful if you are creating a Unix-style text filter, and do not |
246
|
|
|
|
|
|
|
want to buffer the entire input before writing the (only) line to C. The |
247
|
|
|
|
|
|
|
newline at the end will allow downstream processes to work on new lines, too. |
248
|
|
|
|
|
|
|
Otherwise, this filter is not quite as efficient as the L filter. |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Any newlines in the middle of text are collapsed to a space, too. This is especially |
251
|
|
|
|
|
|
|
useful if you are reading in "paragraph mode", e.g. C<$/ = ''>, as you will get |
252
|
|
|
|
|
|
|
one long line per former paragraph. |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=cut |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub whitespace_nl { |
257
|
7
|
|
|
7
|
1
|
14
|
my ($text) = @_; |
258
|
|
|
|
|
|
|
# Remember whether a newline was present |
259
|
7
|
100
|
|
|
|
35
|
my $has_nl = ($text =~ m{ \n \s* \z }xms) ? 1 : 0; |
260
|
7
|
|
|
|
|
55
|
$text =~ s{ \s+ }{ }xmsg; |
261
|
7
|
|
|
|
|
24
|
$text =~ s{ \A \s+ }{}xms; |
262
|
|
|
|
|
|
|
# whitespace-at-end is now a space |
263
|
7
|
100
|
|
|
|
17
|
if ($has_nl) { |
264
|
|
|
|
|
|
|
# replace this space with a newline |
265
|
2
|
|
|
|
|
11
|
$text =~ s{ \s+ \z }{\n}xms; |
266
|
|
|
|
|
|
|
} |
267
|
7
|
|
|
|
|
26
|
return $text; |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head2 punctuation |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Strips punctuation, that is anything matching C<\p{Punctuation}>. It is replaced by |
273
|
|
|
|
|
|
|
nothing, removing it completely. |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=cut |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub punctuation { |
278
|
6
|
|
|
6
|
1
|
12
|
my ($text) = @_; |
279
|
|
|
|
|
|
|
# Turns out '\p{Punctuation}' fails on Perl 5.6, use the abbreviation '\pP' instead |
280
|
13
|
|
|
13
|
|
11223
|
$text =~ s{ \pP }{}xmsg; |
|
13
|
|
|
|
|
35
|
|
|
13
|
|
|
|
|
211
|
|
|
6
|
|
|
|
|
77
|
|
281
|
6
|
|
|
|
|
18
|
return $text; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=head2 punctuation_sp |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
A variant of L that replaces punctuation with a space character, C, |
287
|
|
|
|
|
|
|
instead of removing it completely. This is usually less efficient for compression, but |
288
|
|
|
|
|
|
|
retains more readability, for example in the presence of URLs or email addresses. |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=cut |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
sub punctuation_sp { |
293
|
3
|
|
|
3
|
1
|
5
|
my ($text) = @_; |
294
|
|
|
|
|
|
|
# Turns out '\p{Punctuation}' fails on Perl 5.6, use the abbreviation '\pP' instead |
295
|
3
|
|
|
|
|
25
|
$text =~ s{ \pP }{ }xmsg; |
296
|
3
|
|
|
|
|
10
|
return $text; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head2 alphabetize |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
Leaves the first and last letters of a word alone, but replaces the interior letters with |
302
|
|
|
|
|
|
|
the same set, sorted by the L function. This is done on the observation |
303
|
|
|
|
|
|
|
(source uncertain at the time) that words can still be made out if the letters are present, but |
304
|
|
|
|
|
|
|
in a different order, as long as the outer ones remain the same. |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
This filter may not work as proposed with every language or writing system. Specifically, it |
307
|
|
|
|
|
|
|
uses end-of-word matches C<\b> to determine which letters to leave alone. |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=cut |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
sub alphabetize { |
312
|
6
|
|
|
6
|
1
|
7
|
my ($text) = @_; |
313
|
6
|
|
|
|
|
81
|
$text =~ s{ \b (\p{Alpha}) (\p{Alpha}+) (\p{Alpha}) \b }{ $1 . join('', sort split(//,$2)) . $3 }xmseg; |
|
6
|
|
|
|
|
52
|
|
314
|
6
|
|
|
|
|
23
|
return $text; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# TODO: |
318
|
|
|
|
|
|
|
# - unidecode (separate module) |
319
|
|
|
|
|
|
|
# - normalize (separate module) |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=head1 CLASS METHODS |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
These methods are not called on a filter object, but on the class C |
324
|
|
|
|
|
|
|
itself. They are typically concerned with the filters that can be added to filter |
325
|
|
|
|
|
|
|
objects. |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
=head2 register_filters |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
Text::Lossy->register_filters( |
330
|
|
|
|
|
|
|
change_stuff => \&Other::Module::change_text, |
331
|
|
|
|
|
|
|
remove_ps => sub { my ($text) = @_; $text =~ s{[Pp]}{}; return $text; }, |
332
|
|
|
|
|
|
|
); |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
Adds one or more named filters to the set of available filters. Filters are |
335
|
|
|
|
|
|
|
passed in an anonymous hash. |
336
|
|
|
|
|
|
|
Previously defined mappings may be overwritten by this function. |
337
|
|
|
|
|
|
|
Specifically, passing C as the code reference removes the filter. |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=cut |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
%filtermap = ( |
342
|
|
|
|
|
|
|
'lower' => \&lower, |
343
|
|
|
|
|
|
|
'whitespace' => \&whitespace, |
344
|
|
|
|
|
|
|
'whitespace_nl' => \&whitespace_nl, |
345
|
|
|
|
|
|
|
'punctuation' => \&punctuation, |
346
|
|
|
|
|
|
|
'punctuation_sp' => \&punctuation_sp, |
347
|
|
|
|
|
|
|
'alphabetize' => \&alphabetize, |
348
|
|
|
|
|
|
|
); |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
sub register_filters { |
351
|
7
|
|
|
7
|
1
|
1507
|
my ($class, %mapping) = @_; |
352
|
7
|
|
|
|
|
101
|
foreach my $name (keys %mapping) { |
353
|
11
|
100
|
|
|
|
121
|
if (defined $mapping{$name}) { |
354
|
3
|
|
|
|
|
11
|
$filtermap{$name} = $mapping{$name}; |
355
|
|
|
|
|
|
|
} else { |
356
|
8
|
|
|
|
|
30
|
delete $filtermap{$name}; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
} |
359
|
7
|
|
|
|
|
29
|
return; |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=head2 available_filters |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
my @filters = Text::Lossy->available_filters(); |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
Lists the available filters at this point in time, specifically their names |
367
|
|
|
|
|
|
|
as used by L and L. The list is sorted alphabetically. |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=cut |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub available_filters { |
372
|
4
|
|
|
4
|
1
|
1410
|
my ($class) = @_; |
373
|
4
|
|
|
|
|
62
|
return sort keys %filtermap; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=head1 CREATING FILTERS |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
A filter is a subroutine which takes a single parameter (the text to be converted) and |
379
|
|
|
|
|
|
|
returns the filtered text. The text may also be changed in-place, as long as it is |
380
|
|
|
|
|
|
|
returned again. |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
These filters are then made available to the rest of the system via the |
383
|
|
|
|
|
|
|
L function. |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=head1 USAGE WITH Text::Filter |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
The L module provides an infrastructure for filtering text, but no actual filters. |
388
|
|
|
|
|
|
|
It can be used with C by passing the result of L as the C |
389
|
|
|
|
|
|
|
parameter. |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
It is recommended to set L to leave line endings alone when using the L |
392
|
|
|
|
|
|
|
filter, i.e. the L and |
393
|
|
|
|
|
|
|
L should be C<0>. This is the default |
394
|
|
|
|
|
|
|
for L. It will allow L to perform its assigned task on line endings. |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
One thing to note is that the C filters do not follow the L's convention |
397
|
|
|
|
|
|
|
that lines "to be skipped" should result in an C. |
398
|
|
|
|
|
|
|
This means you need to expect completely empty lines (C, not even a newline character) in |
399
|
|
|
|
|
|
|
your output. |
400
|
|
|
|
|
|
|
This should be no problem if you print to a file handle or append to a string, but may be surprising |
401
|
|
|
|
|
|
|
if you are filtering an array of lines. |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head1 EXPORT |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Nothing exported or exportable; use the OO interface instead. |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=head1 UNICODE |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
This code strives to be completely Unicode compatible. All filters aim to "do the right thing" on non-ASCII strings. |
410
|
|
|
|
|
|
|
Any failure to handle Unicode should be considered a bug; please report it. |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=head1 AUTHOR |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
Ben Deutsch, C<< >> |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=head1 BUGS |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
None known so far. |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
421
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
422
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=head1 SUPPORT |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
perldoc Text::Lossy |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
You can also look for information at: |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=over 4 |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
L |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
L |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=item * CPAN Ratings |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
L |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
=item * Search CPAN |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
L |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=back |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Copyright 2012 Ben Deutsch. |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
462
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
463
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=cut |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
1; # End of Text::Lossy |