File Coverage

blib/lib/Pod/Spell.pm

Criterion	Covered	Total	%
statement	79	79	100.0
branch	39	42	92.8
condition	15	21	71.4
subroutine	15	15	100.0
pod	4	4	100.0
total	152	161	94.4

line	stmt	bran	cond	sub	pod	time	code
1							package Pod::Spell;
2	6			6		599597	use 5.008;
	6					19
3	6			6		23	use strict;
	6					21
	6					133
4	6			6		21	use warnings;
	6					6
	6					1876
5
6							our $VERSION = '1.27';
7
8							sub new {
9	16			16	1	654344	my ( $class, %args ) = @_;
10
11	16					38	my $no_wide_chars = delete $args{no_wide_chars};
12	16	100				79	my $debug = exists $args{debug} ? delete $args{debug} : $ENV{PERL_POD_SPELL_DEBUG};
13
14	16		33			67	my $stopwords = $args{stopwords} \|\| do {
15							require Pod::Wordlist;
16							Pod::Wordlist->new(
17							_is_debug => $debug,
18							no_wide_chars => $no_wide_chars
19							)
20							};
21
22	16					883	my $parser = Pod::Spell::_Processor->new;
23	16					44	$parser->stopwords($stopwords);
24	16					88	$parser->_is_debug($debug);
25
26	16					106	my %self = (
27							processor => $parser,
28							stopwords => $stopwords,
29							debug => $debug,
30							);
31
32	16					56	bless \%self, $class
33							}
34
35	2	100		2		307	sub _is_debug { (shift)->{debug} ? 1 : 0; }
36
37	1			1	1	67	sub stopwords { (shift)->{stopwords} }
38
39							sub parse_from_file {
40	14			14	1	18	my $self = shift;
41	14					60	$self->{processor}->parse_from_file(@_);
42							}
43
44							sub parse_from_filehandle {
45	14			14	1	1202	my $self = shift;
46	14					39	$self->parse_from_file(@_);
47							}
48
49							package # Hide from indexing
50							Pod::Spell::_Processor;
51	6			6		892	use parent 'Pod::Simple';
	6					502
	6					38
52
53	6			6		204260	use Text::Wrap ();
	6					15117
	6					4558
54
55							__PACKAGE__->_accessorize(qw(
56							stopwords
57							_is_debug
58							));
59
60							sub new {
61	16			16		29	my $class = shift;
62	16					91	my $self = $class->SUPER::new(@_);
63	16					615	$self->accept_targets('stopwords');
64	16					244	return $self;
65							}
66
67							my %track_elements = (
68							for => 1,
69							Verbatim => 1,
70							L => 1,
71							C => 1,
72							F => 1,
73							);
74
75							sub output_fh {
76	49			49		207	my $self = shift;
77	49	100				99	if (@_) {
78	14					38	my ($fh) = @_;
79	14					118	my $encoded_fh = grep $_ eq 'utf8', PerlIO::get_layers(*$fh);
80	14					35	$self->{_encoded_fh} = $encoded_fh;
81							}
82	49					135	return $self->SUPER::output_fh(@_);
83							}
84
85							sub _handle_element_start {
86	88			88		27042	my ($self, $element_name, $attr) = @_;
87							$self->{buffer} = ''
88	88	100				234	if !defined $self->{buffer};
89
90	88	100				233	if ($track_elements{$element_name}) {
91	23					26	push @{ $self->{in_element} }, [ $element_name, $attr ];
	23					72
92							}
93							}
94
95							sub _handle_text {
96	66			66		481	my ($self, $text) = @_;
97
98	66					108	my $in = $self->{in_element};
99	66	100	100			244	if ($in && @$in) {
100	24					27	my ($element_name, $attr) = @{$in->[-1]};
	24					47
101	24	100	66			102	if ($element_name eq 'for' && $attr->{target_matching} eq 'stopwords') {
		100	33
		100
		50
102							# this will match both for/begin and stopwords/:stopwords
103
104	16	100				39	print "Stopword para: <$text>\n"
105							if $self->_is_debug;
106	16					179	$self->stopwords->learn_stopwords($text);
107	16					76	return;
108							}
109							# totally ignore verbatim sections
110							elsif ($element_name eq 'Verbatim') {
111	1					3	return;
112							}
113							elsif ($element_name eq 'L') {
114							return
115	1	50				4	if $attr->{'content-implicit'};
116							}
117							elsif ($element_name eq 'C' \|\| $element_name eq 'F') {
118							# maintain word boundaries
119	6	100				17	my $pre = $text =~ s{\A\s+}{} ? ' ' : '';
120	6	100				14	my $post = $text =~ s{\s+\z}{} ? ' ' : '';
121							# if _ is joined with text before or after, it will be treated as
122							# a Perl token and the entire word ignored
123	6	50				14	$text = $pre . (length $text ? '_' : '') . $post;
124							}
125							}
126
127	48					109	$self->{buffer} .= $text;
128							}
129
130							sub _handle_element_end {
131	88			88		1942	my ($self, $element_name) = @_;
132
133	88					109	my $in = $self->{in_element};
134	88	100	100			316	if ($in && @$in && $in->[-1][0] eq $element_name) {
			100
135	23					28	pop @$in;
136							}
137
138							return
139	88	100				276	if $element_name !~ m{\A(?:Para\|head\d\|item-.*\|over-block)\z};
140
141	48					80	my $buffer = delete $self->{buffer};
142	48	100	66			129	if (!defined $buffer \|\| !length $buffer) {
143	13					18	return;
144							}
145
146	35					82	my $fh = $self->output_fh;
147
148	35					208	my $out = $self->stopwords->strip_stopwords($buffer);
149
150							# maintain exact output of older Pod::Parser based implementation
151	35	100				77	print { $fh } "\n"
	14					347
152							if $element_name ne 'Para';
153
154							return
155	35	100				69	if !length $out;
156
157	32					47	local $Text::Wrap::huge = 'overflow';
158	32					96	my $wrapped = Text::Wrap::wrap( '', '', $out ) . "\n\n";
159
160	32	100				5970	if ($self->{_encoded_fh}) {
161	16					21	print { $fh } $wrapped;
	16					310
162							}
163							else {
164	16					18	print { $fh } Encode::encode('UTF-8', $wrapped);
	16					101
165							}
166							}
167
168							1;
169
170							__END__
171
172							=pod
173
174							=encoding UTF-8
175
176							=for :stopwords Sean M. Burke Caleb Cushing Olivier Mengué PODs virtE<ugrave> qux
177
178							=head1 NAME
179
180							Pod::Spell - a formatter for spellchecking Pod
181
182							=head1 VERSION
183
184							version 1.27
185
186							=head1 SYNOPSIS
187
188							use Pod::Spell;
189							Pod::Spell->new->parse_from_file( 'File.pm' );
190
191							Pod::Spell->new->parse_from_filehandle( $infile, $outfile );
192
193							Also look at L<podspell>
194
195							% perl -MPod::Spell -e "Pod::Spell->new->parse_from_file(shift)" Thing.pm \|spell \|fmt
196
197							...or instead of piping to spell or C<ispell>, use C<< >temp.txt >>, and open
198							F<temp.txt> in your word processor for spell-checking.
199
200							=head1 DESCRIPTION
201
202							Pod::Spell is a Pod formatter whose output is good for
203							spellchecking. Pod::Spell is rather like L<Pod::Text>, except that
204							it doesn't put much effort into actual formatting, and it suppresses things
205							that look like Perl symbols or Perl jargon (so that your spellchecking
206							program won't complain about mystery words like "C<$thing>"
207							or "C<Foo::Bar>" or "hashref").
208
209							This class works by filtering out words that look like Perl or any
210							form of computerese (like "C<$thing>" or "C<< N>7 >>" or
211							"C<@{$foo}{'bar','baz'}>", anything in CE<lt>...E<gt> or FE<lt>...E<gt>
212							codes, anything in verbatim paragraphs (code blocks), and anything
213							in the stopword list. The default stopword list for a document starts
214							out from the stopword list defined by L<Pod::Wordlist>,
215							and can be supplemented (on a per-document basis) by having
216							C<"=for stopwords"> / C<"=for :stopwords"> region(s) in a document.
217
218							=head1 METHODS
219
220							=head2 new
221
222							Pod::Spell->new(%options)
223
224							Creates a new Pod::Spell instance. Accepts several options:
225
226							=over 4
227
228							=item debug
229
230							When set to a true value, will output debugging messages about how the Pod
231							is being processed.
232
233							Defaults to false.
234
235							=item stopwords
236
237							Can be specified to use an alternate wordlist instance.
238
239							Defaults to a new Pod::Wordlist instance.
240
241							=item no_wide_chars
242
243							Will be passed to Pod::Wordlist when creating a new instance. Causes all words
244							with characters outside the Latin-1 range to be stripped from the output.
245
246							=back
247
248							=head2 stopwords
249
250							$self->stopwords->isa('Pod::WordList'); # true
251
252							=head2 parse_from_filehandle($in_fh,$out_fh)
253
254							This method takes an input filehandle (which is assumed to already be
255							opened for reading) and reads the entire input stream looking for blocks
256							(paragraphs) of POD documentation to be processed. If no first argument
257							is given the default input filehandle C<STDIN> is used.
258
259							The C<$in_fh> parameter may be any object that provides a B<getline()>
260							method to retrieve a single line of input text (hence, an appropriate
261							wrapper object could be used to parse PODs from a single string or an
262							array of strings).
263
264							=head2 parse_from_file($filename,$outfile)
265
266							This method takes a filename and does the following:
267
268							=over 2
269
270							=item *
271
272							opens the input and output files for reading
273							(creating the appropriate filehandles)
274
275							=item *
276
277							invokes the B<parse_from_filehandle()> method passing it the
278							corresponding input and output filehandles.
279
280							=item *
281
282							closes the input and output files.
283
284							=back
285
286							If the special input filename "", "-" or "<&STDIN" is given then the STDIN
287							filehandle is used for input (and no open or close is performed). If no
288							input filename is specified then "-" is implied. Filehandle references,
289							or objects that support the regular IO operations (like C<E<lt>$fhE<gt>>
290							or C<$fh-<Egt>getline>) are also accepted; the handles must already be
291							opened.
292
293							If a second argument is given then it should be the name of the desired
294							output file. If the special output filename "-" or ">&STDOUT" is given
295							then the STDOUT filehandle is used for output (and no open or close is
296							performed). If the special output filename ">&STDERR" is given then the
297							STDERR filehandle is used for output (and no open or close is
298							performed). If no output filehandle is currently in use and no output
299							filename is specified, then "-" is implied.
300							Alternatively, filehandle references or objects that support the regular
301							IO operations (like C<print>, e.g. L<IO::String>) are also accepted;
302							the object must already be opened.
303
304							=head1 ENCODINGS
305
306							If your Pod is encoded in something other than Latin-1, it should declare
307							an encoding using the L<< perlpod/C<=encoding I<encodingname>> >> directive.
308
309							=head1 ADDING STOPWORDS
310
311							You can add stopwords on a per-document basis with
312							C<"=for stopwords"> / C<"=for :stopwords"> regions, like so:
313
314							=for stopwords plok Pringe zorch snik !qux
315							foo bar baz quux quuux
316
317							This adds every word in that paragraph after "stopwords" to the
318							stopword list, effective for the rest of the document. In such a
319							list, words are whitespace-separated. (The amount of whitespace
320							doesn't matter, as long as there's no blank lines in the middle
321							of the paragraph.) Plural forms are added automatically using
322							L<Lingua::EN::Inflect>. Words beginning with "!" are
323							I<deleted> from the stopword list -- so "!qux" deletes "qux" from the
324							stopword list, if it was in there in the first place. Note that if
325							a stopword is all-lowercase, then it means that it's okay in I<any>
326							case; but if the word has any capital letters, then it means that
327							it's okay I<only> with I<that> case. So a Wordlist entry of "perl"
328							would permit "perl", "Perl", and (less interestingly) "PERL", "pERL",
329							"PerL", et cetera. However, a Wordlist entry of "Perl" catches
330							only "Perl", not "perl". So if you wanted to make sure you said
331							only "Perl", never "perl", you could add this to the top of your
332							document:
333
334							=for stopwords !perl Perl
335
336							Then all instances of the word "Perl" would be weeded out of the
337							Pod::Spell-formatted version of your document, but any instances of
338							the word "perl" would be left in (unless they were in a CE<lt>...> or
339							FE<lt>...> style).
340
341							You can have several "=for stopwords" regions in your document. You
342							can even express them like so:
343
344							=begin stopwords
345
346							plok Pringe zorch
347
348							snik !qux
349
350							foo bar
351							baz quux quuux
352
353							=end stopwords
354
355							If you want to use EE<lt>...> sequences in a "stopwords" region, you
356							have to use ":stopwords", as here:
357
358							=for :stopwords
359							virtE<ugrave>
360
361							...meaning that you're adding a stopword of "virtE<ugrave>". If
362							you left the ":" out, that would mean you were adding a stopword of
363							"virtEE<lt>ugrave>" (with a literal E, a literal <, etc), which
364							will have no effect, since any occurrences of virtEE<lt>ugrave>
365							don't look like a normal human-language word anyway, and so would
366							be screened out before the stopword list is consulted anyway.
367
368							=head1 CAVEATS
369
370							=head2 finding stopwords defined with C<=for>
371
372							Pod::Spell makes a single pass over the POD. Stopwords
373							must be added B<before> they show up in the POD.
374
375							=head1 HINT
376
377							If you feed output of Pod::Spell into your word processor and run a
378							spell-check, make sure you're I<not> also running a grammar-check -- because
379							Pod::Spell drops words that it thinks are Perl symbols, jargon, or
380							stopwords, this means you'll have ungrammatical sentences, what with
381							words being missing and all. And you don't need a grammar checker
382							to tell you that.
383
384							=head1 SEE ALSO
385
386							=over 4
387
388							=item * L<Pod::Wordlist>
389
390							=item * L<Pod::Simple>
391
392							=item * L<podchecker> also known as L<Pod::Checker>
393
394							=item * L<perlpod>, L<perlpodspec>
395
396							=back
397
398							=head1 BUGS
399
400							Please report any bugs or feature requests on the bugtracker website
401							L<https://rt.cpan.org/Public/Dist/Display.html?Name=Pod-Spell> or by email
402							to L<bug-Pod-Spell@rt.cpan.org\|mailto:bug-Pod-Spell@rt.cpan.org>.
403
404							When submitting a bug or request, please include a test-file or a
405							patch to an existing test-file that illustrates the bug or desired
406							feature.
407
408							=head1 AUTHORS
409
410							=over 4
411
412							=item *
413
414							Sean M. Burke <sburke@cpan.org>
415
416							=item *
417
418							Caleb Cushing <xenoterracide@gmail.com>
419
420							=back
421
422							=head1 CONTRIBUTORS
423
424							=for stopwords David Golden Graham Knop Kent Fredric Mohammad S Anwar Olivier Mengué Paulo Custodio
425
426							=over 4
427
428							=item *
429
430							David Golden <dagolden@cpan.org>
431
432							=item *
433
434							Graham Knop <haarg@haarg.org>
435
436							=item *
437
438							Kent Fredric <kentfredric@gmail.com>
439
440							=item *
441
442							Mohammad S Anwar <mohammad.anwar@yahoo.com>
443
444							=item *
445
446							Olivier Mengué <dolmen@cpan.org>
447
448							=item *
449
450							Paulo Custodio <pauloscustodio@gmail.com>
451
452							=back
453
454							=head1 COPYRIGHT AND LICENSE
455
456							This software is Copyright (c) 2024 by Olivier Mengué.
457
458							This is free software, licensed under:
459
460							The Artistic License 2.0 (GPL Compatible)
461
462							=cut