File Coverage

blib/lib/Pod/Spell.pm
Criterion Covered Total %
statement 79 79 100.0
branch 39 42 92.8
condition 15 21 71.4
subroutine 15 15 100.0
pod 4 4 100.0
total 152 161 94.4


line stmt bran cond sub pod time code
1             package Pod::Spell;
2 6     6   599597 use 5.008;
  6         19  
3 6     6   23 use strict;
  6         21  
  6         133  
4 6     6   21 use warnings;
  6         6  
  6         1876  
5              
6             our $VERSION = '1.27';
7              
8             sub new {
9 16     16 1 654344 my ( $class, %args ) = @_;
10              
11 16         38 my $no_wide_chars = delete $args{no_wide_chars};
12 16 100       79 my $debug = exists $args{debug} ? delete $args{debug} : $ENV{PERL_POD_SPELL_DEBUG};
13              
14 16   33     67 my $stopwords = $args{stopwords} || do {
15             require Pod::Wordlist;
16             Pod::Wordlist->new(
17             _is_debug => $debug,
18             no_wide_chars => $no_wide_chars
19             )
20             };
21              
22 16         883 my $parser = Pod::Spell::_Processor->new;
23 16         44 $parser->stopwords($stopwords);
24 16         88 $parser->_is_debug($debug);
25              
26 16         106 my %self = (
27             processor => $parser,
28             stopwords => $stopwords,
29             debug => $debug,
30             );
31              
32 16         56 bless \%self, $class
33             }
34              
35 2 100   2   307 sub _is_debug { (shift)->{debug} ? 1 : 0; }
36              
37 1     1 1 67 sub stopwords { (shift)->{stopwords} }
38              
39             sub parse_from_file {
40 14     14 1 18 my $self = shift;
41 14         60 $self->{processor}->parse_from_file(@_);
42             }
43              
44             sub parse_from_filehandle {
45 14     14 1 1202 my $self = shift;
46 14         39 $self->parse_from_file(@_);
47             }
48              
49             package # Hide from indexing
50             Pod::Spell::_Processor;
51 6     6   892 use parent 'Pod::Simple';
  6         502  
  6         38  
52              
53 6     6   204260 use Text::Wrap ();
  6         15117  
  6         4558  
54              
55             __PACKAGE__->_accessorize(qw(
56             stopwords
57             _is_debug
58             ));
59              
60             sub new {
61 16     16   29 my $class = shift;
62 16         91 my $self = $class->SUPER::new(@_);
63 16         615 $self->accept_targets('stopwords');
64 16         244 return $self;
65             }
66              
67             my %track_elements = (
68             for => 1,
69             Verbatim => 1,
70             L => 1,
71             C => 1,
72             F => 1,
73             );
74              
75             sub output_fh {
76 49     49   207 my $self = shift;
77 49 100       99 if (@_) {
78 14         38 my ($fh) = @_;
79 14         118 my $encoded_fh = grep $_ eq 'utf8', PerlIO::get_layers(*$fh);
80 14         35 $self->{_encoded_fh} = $encoded_fh;
81             }
82 49         135 return $self->SUPER::output_fh(@_);
83             }
84              
85             sub _handle_element_start {
86 88     88   27042 my ($self, $element_name, $attr) = @_;
87             $self->{buffer} = ''
88 88 100       234 if !defined $self->{buffer};
89              
90 88 100       233 if ($track_elements{$element_name}) {
91 23         26 push @{ $self->{in_element} }, [ $element_name, $attr ];
  23         72  
92             }
93             }
94              
95             sub _handle_text {
96 66     66   481 my ($self, $text) = @_;
97              
98 66         108 my $in = $self->{in_element};
99 66 100 100     244 if ($in && @$in) {
100 24         27 my ($element_name, $attr) = @{$in->[-1]};
  24         47  
101 24 100 66     102 if ($element_name eq 'for' && $attr->{target_matching} eq 'stopwords') {
    100 33        
    100          
    50          
102             # this will match both for/begin and stopwords/:stopwords
103              
104 16 100       39 print "Stopword para: <$text>\n"
105             if $self->_is_debug;
106 16         179 $self->stopwords->learn_stopwords($text);
107 16         76 return;
108             }
109             # totally ignore verbatim sections
110             elsif ($element_name eq 'Verbatim') {
111 1         3 return;
112             }
113             elsif ($element_name eq 'L') {
114             return
115 1 50       4 if $attr->{'content-implicit'};
116             }
117             elsif ($element_name eq 'C' || $element_name eq 'F') {
118             # maintain word boundaries
119 6 100       17 my $pre = $text =~ s{\A\s+}{} ? ' ' : '';
120 6 100       14 my $post = $text =~ s{\s+\z}{} ? ' ' : '';
121             # if _ is joined with text before or after, it will be treated as
122             # a Perl token and the entire word ignored
123 6 50       14 $text = $pre . (length $text ? '_' : '') . $post;
124             }
125             }
126              
127 48         109 $self->{buffer} .= $text;
128             }
129              
130             sub _handle_element_end {
131 88     88   1942 my ($self, $element_name) = @_;
132              
133 88         109 my $in = $self->{in_element};
134 88 100 100     316 if ($in && @$in && $in->[-1][0] eq $element_name) {
      100        
135 23         28 pop @$in;
136             }
137              
138             return
139 88 100       276 if $element_name !~ m{\A(?:Para|head\d|item-.*|over-block)\z};
140              
141 48         80 my $buffer = delete $self->{buffer};
142 48 100 66     129 if (!defined $buffer || !length $buffer) {
143 13         18 return;
144             }
145              
146 35         82 my $fh = $self->output_fh;
147              
148 35         208 my $out = $self->stopwords->strip_stopwords($buffer);
149              
150             # maintain exact output of older Pod::Parser based implementation
151 35 100       77 print { $fh } "\n"
  14         347  
152             if $element_name ne 'Para';
153              
154             return
155 35 100       69 if !length $out;
156              
157 32         47 local $Text::Wrap::huge = 'overflow';
158 32         96 my $wrapped = Text::Wrap::wrap( '', '', $out ) . "\n\n";
159              
160 32 100       5970 if ($self->{_encoded_fh}) {
161 16         21 print { $fh } $wrapped;
  16         310  
162             }
163             else {
164 16         18 print { $fh } Encode::encode('UTF-8', $wrapped);
  16         101  
165             }
166             }
167              
168             1;
169              
170             __END__
171              
172             =pod
173              
174             =encoding UTF-8
175              
176             =for :stopwords Sean M. Burke Caleb Cushing Olivier Mengué PODs virtE<ugrave> qux
177              
178             =head1 NAME
179              
180             Pod::Spell - a formatter for spellchecking Pod
181              
182             =head1 VERSION
183              
184             version 1.27
185              
186             =head1 SYNOPSIS
187              
188             use Pod::Spell;
189             Pod::Spell->new->parse_from_file( 'File.pm' );
190              
191             Pod::Spell->new->parse_from_filehandle( $infile, $outfile );
192              
193             Also look at L<podspell>
194              
195             % perl -MPod::Spell -e "Pod::Spell->new->parse_from_file(shift)" Thing.pm |spell |fmt
196              
197             ...or instead of piping to spell or C<ispell>, use C<< >temp.txt >>, and open
198             F<temp.txt> in your word processor for spell-checking.
199              
200             =head1 DESCRIPTION
201              
202             Pod::Spell is a Pod formatter whose output is good for
203             spellchecking. Pod::Spell is rather like L<Pod::Text>, except that
204             it doesn't put much effort into actual formatting, and it suppresses things
205             that look like Perl symbols or Perl jargon (so that your spellchecking
206             program won't complain about mystery words like "C<$thing>"
207             or "C<Foo::Bar>" or "hashref").
208              
209             This class works by filtering out words that look like Perl or any
210             form of computerese (like "C<$thing>" or "C<< N>7 >>" or
211             "C<@{$foo}{'bar','baz'}>", anything in CE<lt>...E<gt> or FE<lt>...E<gt>
212             codes, anything in verbatim paragraphs (code blocks), and anything
213             in the stopword list. The default stopword list for a document starts
214             out from the stopword list defined by L<Pod::Wordlist>,
215             and can be supplemented (on a per-document basis) by having
216             C<"=for stopwords"> / C<"=for :stopwords"> region(s) in a document.
217              
218             =head1 METHODS
219              
220             =head2 new
221              
222             Pod::Spell->new(%options)
223              
224             Creates a new Pod::Spell instance. Accepts several options:
225              
226             =over 4
227              
228             =item debug
229              
230             When set to a true value, will output debugging messages about how the Pod
231             is being processed.
232              
233             Defaults to false.
234              
235             =item stopwords
236              
237             Can be specified to use an alternate wordlist instance.
238              
239             Defaults to a new Pod::Wordlist instance.
240              
241             =item no_wide_chars
242              
243             Will be passed to Pod::Wordlist when creating a new instance. Causes all words
244             with characters outside the Latin-1 range to be stripped from the output.
245              
246             =back
247              
248             =head2 stopwords
249              
250             $self->stopwords->isa('Pod::WordList'); # true
251              
252             =head2 parse_from_filehandle($in_fh,$out_fh)
253              
254             This method takes an input filehandle (which is assumed to already be
255             opened for reading) and reads the entire input stream looking for blocks
256             (paragraphs) of POD documentation to be processed. If no first argument
257             is given the default input filehandle C<STDIN> is used.
258              
259             The C<$in_fh> parameter may be any object that provides a B<getline()>
260             method to retrieve a single line of input text (hence, an appropriate
261             wrapper object could be used to parse PODs from a single string or an
262             array of strings).
263              
264             =head2 parse_from_file($filename,$outfile)
265              
266             This method takes a filename and does the following:
267              
268             =over 2
269              
270             =item *
271              
272             opens the input and output files for reading
273             (creating the appropriate filehandles)
274              
275             =item *
276              
277             invokes the B<parse_from_filehandle()> method passing it the
278             corresponding input and output filehandles.
279              
280             =item *
281              
282             closes the input and output files.
283              
284             =back
285              
286             If the special input filename "", "-" or "<&STDIN" is given then the STDIN
287             filehandle is used for input (and no open or close is performed). If no
288             input filename is specified then "-" is implied. Filehandle references,
289             or objects that support the regular IO operations (like C<E<lt>$fhE<gt>>
290             or C<$fh-<Egt>getline>) are also accepted; the handles must already be
291             opened.
292              
293             If a second argument is given then it should be the name of the desired
294             output file. If the special output filename "-" or ">&STDOUT" is given
295             then the STDOUT filehandle is used for output (and no open or close is
296             performed). If the special output filename ">&STDERR" is given then the
297             STDERR filehandle is used for output (and no open or close is
298             performed). If no output filehandle is currently in use and no output
299             filename is specified, then "-" is implied.
300             Alternatively, filehandle references or objects that support the regular
301             IO operations (like C<print>, e.g. L<IO::String>) are also accepted;
302             the object must already be opened.
303              
304             =head1 ENCODINGS
305              
306             If your Pod is encoded in something other than Latin-1, it should declare
307             an encoding using the L<< perlpod/C<=encoding I<encodingname>> >> directive.
308              
309             =head1 ADDING STOPWORDS
310              
311             You can add stopwords on a per-document basis with
312             C<"=for stopwords"> / C<"=for :stopwords"> regions, like so:
313              
314             =for stopwords plok Pringe zorch snik !qux
315             foo bar baz quux quuux
316              
317             This adds every word in that paragraph after "stopwords" to the
318             stopword list, effective for the rest of the document. In such a
319             list, words are whitespace-separated. (The amount of whitespace
320             doesn't matter, as long as there's no blank lines in the middle
321             of the paragraph.) Plural forms are added automatically using
322             L<Lingua::EN::Inflect>. Words beginning with "!" are
323             I<deleted> from the stopword list -- so "!qux" deletes "qux" from the
324             stopword list, if it was in there in the first place. Note that if
325             a stopword is all-lowercase, then it means that it's okay in I<any>
326             case; but if the word has any capital letters, then it means that
327             it's okay I<only> with I<that> case. So a Wordlist entry of "perl"
328             would permit "perl", "Perl", and (less interestingly) "PERL", "pERL",
329             "PerL", et cetera. However, a Wordlist entry of "Perl" catches
330             only "Perl", not "perl". So if you wanted to make sure you said
331             only "Perl", never "perl", you could add this to the top of your
332             document:
333              
334             =for stopwords !perl Perl
335              
336             Then all instances of the word "Perl" would be weeded out of the
337             Pod::Spell-formatted version of your document, but any instances of
338             the word "perl" would be left in (unless they were in a CE<lt>...> or
339             FE<lt>...> style).
340              
341             You can have several "=for stopwords" regions in your document. You
342             can even express them like so:
343              
344             =begin stopwords
345              
346             plok Pringe zorch
347              
348             snik !qux
349              
350             foo bar
351             baz quux quuux
352              
353             =end stopwords
354              
355             If you want to use EE<lt>...> sequences in a "stopwords" region, you
356             have to use ":stopwords", as here:
357              
358             =for :stopwords
359             virtE<ugrave>
360              
361             ...meaning that you're adding a stopword of "virtE<ugrave>". If
362             you left the ":" out, that would mean you were adding a stopword of
363             "virtEE<lt>ugrave>" (with a literal E, a literal <, etc), which
364             will have no effect, since any occurrences of virtEE<lt>ugrave>
365             don't look like a normal human-language word anyway, and so would
366             be screened out before the stopword list is consulted anyway.
367              
368             =head1 CAVEATS
369              
370             =head2 finding stopwords defined with C<=for>
371              
372             Pod::Spell makes a single pass over the POD. Stopwords
373             must be added B<before> they show up in the POD.
374              
375             =head1 HINT
376              
377             If you feed output of Pod::Spell into your word processor and run a
378             spell-check, make sure you're I<not> also running a grammar-check -- because
379             Pod::Spell drops words that it thinks are Perl symbols, jargon, or
380             stopwords, this means you'll have ungrammatical sentences, what with
381             words being missing and all. And you don't need a grammar checker
382             to tell you that.
383              
384             =head1 SEE ALSO
385              
386             =over 4
387              
388             =item * L<Pod::Wordlist>
389              
390             =item * L<Pod::Simple>
391              
392             =item * L<podchecker> also known as L<Pod::Checker>
393              
394             =item * L<perlpod>, L<perlpodspec>
395              
396             =back
397              
398             =head1 BUGS
399              
400             Please report any bugs or feature requests on the bugtracker website
401             L<https://rt.cpan.org/Public/Dist/Display.html?Name=Pod-Spell> or by email
402             to L<bug-Pod-Spell@rt.cpan.org|mailto:bug-Pod-Spell@rt.cpan.org>.
403              
404             When submitting a bug or request, please include a test-file or a
405             patch to an existing test-file that illustrates the bug or desired
406             feature.
407              
408             =head1 AUTHORS
409              
410             =over 4
411              
412             =item *
413              
414             Sean M. Burke <sburke@cpan.org>
415              
416             =item *
417              
418             Caleb Cushing <xenoterracide@gmail.com>
419              
420             =back
421              
422             =head1 CONTRIBUTORS
423              
424             =for stopwords David Golden Graham Knop Kent Fredric Mohammad S Anwar Olivier Mengué Paulo Custodio
425              
426             =over 4
427              
428             =item *
429              
430             David Golden <dagolden@cpan.org>
431              
432             =item *
433              
434             Graham Knop <haarg@haarg.org>
435              
436             =item *
437              
438             Kent Fredric <kentfredric@gmail.com>
439              
440             =item *
441              
442             Mohammad S Anwar <mohammad.anwar@yahoo.com>
443              
444             =item *
445              
446             Olivier Mengué <dolmen@cpan.org>
447              
448             =item *
449              
450             Paulo Custodio <pauloscustodio@gmail.com>
451              
452             =back
453              
454             =head1 COPYRIGHT AND LICENSE
455              
456             This software is Copyright (c) 2024 by Olivier Mengué.
457              
458             This is free software, licensed under:
459              
460             The Artistic License 2.0 (GPL Compatible)
461              
462             =cut