File Coverage

blib/lib/Test/Spelling/Stopwords.pm
Criterion Covered Total %
statement 32 137 23.3
branch 4 56 7.1
condition 1 27 3.7
subroutine 10 22 45.4
pod 6 6 100.0
total 53 248 21.3


line stmt bran cond sub pod time code
1             package Test::Spelling::Stopwords;
2              
3             $Test::Spelling::Stopwords::VERSION = '0.03';
4             $Test::Spelling::Stopwords::AUTHORITY = 'cpan:MANWAR';
5              
6 2     2   262144 use strict;
  2         4  
  2         80  
7 2     2   10 use warnings;
  2         9  
  2         139  
8              
9 2     2   22 use Cwd qw(abs_path);
  2         4  
  2         115  
10 2     2   12 use Exporter qw(import);
  2         3  
  2         82  
11 2     2   13 use File::Find qw(find);
  2         3  
  2         160  
12 2     2   12 use File::Spec qw();
  2         3  
  2         43  
13 2     2   17 use Test::Builder qw();
  2         4  
  2         2128  
14              
15             our @EXPORT = qw(
16             all_pod_files_spelling_ok
17             pod_file_spelling_ok
18             set_stopwords_file
19             set_spell_lang
20             set_spell_dirs
21             get_stopwords_file
22             );
23              
24             =head1 NAME
25              
26             Test::Spelling::Stopwords - POD spell-checking with project-specific stopwords
27              
28             =head1 VERSION
29              
30             Version 0.03
31              
32             =head1 SYNOPSIS
33              
34             Minimal - just drop this into your F directory:
35              
36             # xt/spell-pod.t
37             use Test::More;
38             use Test::Spelling::Stopwords;
39              
40             unless ($ENV{AUTHOR_TESTING} || $ENV{RELEASE_TESTING} || $ENV{CI}) {
41             plan skip_all => 'Spelling tests only run under AUTHOR_TESTING';
42             }
43              
44             all_pod_files_spelling_ok();
45              
46             Or with explicit configuration:
47              
48             use Test::Spelling::Stopwords;
49              
50             set_spell_lang('en_US');
51             set_stopwords_file('xt/.stopwords');
52             set_spell_dirs('lib', 'bin');
53              
54             all_pod_files_spelling_ok();
55              
56             Or with per-call overrides:
57              
58             all_pod_files_spelling_ok(
59             lang => 'en_US',
60             stopwords_file => 'xt/.stopwords',
61             dirs => ['lib', 'bin'],
62             );
63              
64             Check a single file:
65              
66             use Test::Spelling::Stopwords;
67             use Test::More tests => 1;
68              
69             pod_file_spelling_ok('lib/My/Module.pm');
70              
71             =head1 DESCRIPTION
72              
73             C is a drop-in POD spell-checker that integrates
74             project-specific stopword files with B. It is designed to work
75             alongside the companion L script, which auto-generates a
76             F<.stopwords> file containing only the vocabulary unique to your project -
77             after filtering out the common Perl ecosystem terms already covered by
78             L.
79              
80             =head2 How it differs from L
81              
82             L is the established CPAN module for POD spell-checking.
83             C does not replace it - it complements it by
84             addressing two specific gaps:
85              
86             =over 4
87              
88             =item Automatic stopwords file discovery
89              
90             L requires you to call C explicitly or
91             maintain a C<__DATA__> section in your test.
92             C automatically discovers and loads a
93             F<.stopwords> file from your project root (or any path you configure),
94             so your test file contains no project-specific content and can be reused
95             across projects unchanged.
96              
97             =item Line-number reporting
98              
99             When L finds a misspelled word it tells you the word but
100             not where it is. C reports the exact line
101             number(s) in the source file where each misspelling appears, making
102             failures fast to locate and fix.
103              
104             =back
105              
106             =head2 Two-layer stopword architecture
107              
108             The module merges two sources of known words before checking any file:
109              
110             =over 4
111              
112             =item Layer 1 - L
113              
114             The CPAN-maintained vocabulary of common Perl and technical terms (C,
115             C, C, C, C, C, etc.). This mirrors
116             what C filters out when building F<.stopwords>, so the
117             module and the generator always agree on what counts as a known word.
118              
119             Without this layer the test is stricter than the generator and flags words
120             that L covers - causing false failures even on a freshly
121             generated F<.stopwords>.
122              
123             =item Layer 2 - F<.stopwords>
124              
125             Project-specific vocabulary generated by C. Contains only
126             terms not already covered by L.
127              
128             =back
129              
130             =head2 Stopwords file format
131              
132             The F<.stopwords> file is a plain text file with one word per line.
133             Lines beginning with C<#> and blank lines are ignored.
134              
135             # Auto-generated stopwords for en_GB
136             dbic
137             mojolicious
138             resultset
139             myauthor
140              
141             Generate it with the companion C script:
142              
143             gen-stopwords --dir lib --dir bin
144              
145             =head2 Freshness check
146              
147             On every run, C compares the modification time
148             of your F<.stopwords> file against your source files. If any source file
149             is newer, it emits a C warning:
150              
151             # ------------------------------------------------------------
152             # WARNING: .stopwords is out of date!
153             # Run gen-stopwords to regenerate it.
154             # ------------------------------------------------------------
155              
156             This is advisory only - the test continues to run.
157              
158             =head2 POD cleaning
159              
160             Before passing each line to aspell, all POD formatting codes are stripped
161             B:
162              
163             E removed (not 'gt', preventing the 'Egt' artefact)
164             L removed
165             C removed
166             B removed
167              
168             This is more aggressive than simple content extraction and prevents a
169             class of false positives caused by POD entity fragments appearing as
170             bare words.
171              
172             =head2 Environment variables
173              
174             All defaults can be overridden without editing the test file:
175              
176             =over 4
177              
178             =item C
179              
180             Aspell language code. Default: C.
181              
182             =item C
183              
184             Path to the stopwords file. Default: C<.stopwords>.
185              
186             =item C
187              
188             Colon- or comma-separated list of directories to scan.
189             Default: C.
190              
191             =item C
192              
193             Complete aspell command string, including all flags.
194             Default: C.
195              
196             =back
197              
198             =cut
199              
200             my $TB = Test::Builder->new;
201              
202             my %_config = (
203             lang => $ENV{SPELL_LANG} || 'en_GB',
204             stopwords_file => $ENV{STOPWORD_FILE} || '.stopwords',
205             dirs => do {
206             $ENV{SPELL_DIRS}
207             ? [ split /[:,]/, $ENV{SPELL_DIRS} ]
208             : [qw(lib bin script)]
209             },
210             );
211              
212             my %_prune = map { $_ => 1 } qw(
213             .git .svn .hg .build
214             blib _build local extlib cover_db
215             node_modules vendor
216             );
217              
218             my $SOURCE_RE = qr/\.(pm|pod|pl|t)$/;
219              
220             =head1 CONFIGURATION API
221              
222             =head2 set_spell_lang
223              
224             set_spell_lang('en_US');
225              
226             Sets the aspell language code. May also be set via the C
227             environment variable.
228              
229             =cut
230              
231 0     0 1 0 sub set_spell_lang { $_config{lang} = $_[0] }
232              
233             =head2 set_stopwords_file
234              
235             set_stopwords_file('xt/.stopwords');
236              
237             Sets the path to the stopwords file. May also be set via the
238             C environment variable.
239              
240             =cut
241              
242 0     0 1 0 sub set_stopwords_file { $_config{stopwords_file} = $_[0] }
243              
244             =head2 set_spell_dirs
245              
246             set_spell_dirs('lib', 'bin', 'script');
247             set_spell_dirs( ['lib', 'bin'] );
248              
249             Sets the list of directories to search for POD files. Accepts either a
250             list or an arrayref. May also be set via the C environment
251             variable.
252              
253             =cut
254              
255 0 0   0 1 0 sub set_spell_dirs { $_config{dirs} = ref $_[0] ? $_[0] : [@_] }
256              
257             =head2 get_stopwords_file
258              
259             my $path = get_stopwords_file();
260              
261             Returns the currently configured stopwords file path.
262              
263             =cut
264              
265 0     0 1 0 sub get_stopwords_file { $_config{stopwords_file} }
266              
267             =head1 EXPORTED FUNCTIONS
268              
269             =head2 all_pod_files_spelling_ok
270              
271             all_pod_files_spelling_ok();
272              
273             all_pod_files_spelling_ok(
274             lang => 'en_US',
275             stopwords_file => 'xt/.stopwords',
276             dirs => ['lib', 'bin'],
277             );
278              
279             Finds all Perl source files (F<.pm>, F<.pl>, F<.pod>, F<.t>) under the
280             configured source directories, and runs a spell-check on the POD in each
281             one. Emits one TAP pass/fail per file.
282              
283             Misspelled words are reported via C with their line numbers:
284              
285             not ok 1 - POD spelling: lib/My/Module.pm
286             # 'serialiisable' line(s): 42
287             # 'Egtconnect' line(s): 17, 83
288              
289             Accepts an optional hash of per-call overrides (C, C,
290             C) that take precedence over the module-level configuration for the
291             duration of the call.
292              
293             Skips gracefully (via C) if:
294              
295             =over 4
296              
297             =item * aspell is not installed or not on C<$PATH>
298              
299             =item * The stopwords file does not exist
300              
301             =item * No POD files are found in the configured directories
302              
303             =back
304              
305             =cut
306              
307             sub all_pod_files_spelling_ok {
308 1     1 1 250168 my %args = @_;
309              
310 1 50       8 local $_config{lang} = $args{lang} if exists $args{lang};
311 1 50       4 local $_config{stopwords_file} = $args{stopwords_file} if exists $args{stopwords_file};
312 1 50       4 local $_config{dirs} = $args{dirs} if exists $args{dirs};
313              
314 1 50       5 unless (_check_aspell()) {
315 1         37 $TB->plan(skip_all => 'aspell is not installed or not on $PATH');
316 0         0 return;
317             }
318              
319 0 0       0 unless (-e $_config{stopwords_file}) {
320 0         0 $TB->plan(
321             skip_all =>
322             "No $_config{stopwords_file} found. Run gen-stopwords to create one."
323             );
324 0         0 return;
325             }
326              
327 0         0 _freshness_check();
328              
329 0         0 my $stopwords = _load_stopwords();
330 0         0 my @files = _pod_files();
331              
332 0 0       0 unless (@files) {
333 0         0 $TB->plan(skip_all => 'No POD files found to check');
334 0         0 return;
335             }
336              
337 0         0 $TB->plan(tests => scalar @files);
338              
339 0         0 for my $file (@files) {
340 0         0 pod_file_spelling_ok($file, $stopwords);
341             }
342             }
343              
344             =head2 pod_file_spelling_ok
345              
346             pod_file_spelling_ok($file);
347             pod_file_spelling_ok($file, \%stopwords);
348             pod_file_spelling_ok($file, \%stopwords, $test_name);
349              
350             Spell-checks the POD in a single file. Emits one pass or fail.
351              
352             If C<\%stopwords> is omitted the configured stopwords file is loaded
353             automatically. C<$test_name> defaults to C<"POD spelling: $file">.
354              
355             Returns true if the file passes, false otherwise.
356              
357             =cut
358              
359             sub pod_file_spelling_ok {
360 0     0 1 0 my ($file, $stopwords, $test_name) = @_;
361              
362 0   0     0 $stopwords //= _load_stopwords();
363 0   0     0 $test_name //= "POD spelling: $file";
364              
365 0         0 my ($passed, $errors) = _check_file($file, $stopwords);
366              
367 0 0       0 if ($passed) {
368 0         0 $TB->ok(1, $test_name);
369             }
370             else {
371 0         0 $TB->ok(0, $test_name);
372 0         0 for my $word (sort keys %$errors) {
373             $TB->diag(sprintf " '%s' line(s): %s",
374 0         0 $word, join ', ', @{ $errors->{$word} });
  0         0  
375             }
376             }
377              
378 0         0 return $passed;
379             }
380              
381             #
382             #
383             # Internal Helpers
384              
385             sub _aspell_cmd {
386             return $ENV{ASPELL_CMD}
387 0   0 0   0 || "aspell list -l $_config{lang} --run-together";
388             }
389              
390             sub _check_aspell {
391 1     1   5174 my $out = `aspell --version 2>&1`;
392 1   33     99 return $? == 0 && $out =~ /aspell/i;
393             }
394              
395             # Build the combined stopword lookup — two layers:
396             #
397             # Layer 1: Pod::Wordlist - the shared Perl community vocabulary (~1000 words)
398             # Layer 2: .stopwords - project-specific terms only
399             #
400             # This mirrors gen-stopwords exactly: gen-stopwords filters Pod::Wordlist words
401             # OUT of .stopwords, so we must add them back here at runtime. Without Layer 1
402             # the test is stricter than the generator and flags words like 'ok', 'undef',
403             # 'dbi', 'CPAN' that Pod::Wordlist covers — causing false failures on a freshly
404             # generated .stopwords file.
405             sub _load_stopwords {
406 0     0     my %words;
407              
408             # Layer 1 - Pod::Wordlist
409 0 0         if (eval 'use Pod::Wordlist; 1') {
410 2     2   29 my $wl = do { no strict 'refs'; \%{'Pod::Wordlist::Wordlist'} };
  2         5  
  2         2972  
  0            
  0            
  0            
411 0           $words{ lc $_ } = 1 for keys %$wl;
412             }
413             else {
414 0           $TB->diag(
415             'Pod::Wordlist not found - install it for best results '
416             . '(cpanm Pod::Wordlist).'
417             );
418             }
419              
420             # Layer 2 - project .stopwords file
421 0           my $file = $_config{stopwords_file};
422 0 0         if (-e $file) {
423             open my $fh, '<', $file
424 0 0         or do { $TB->diag("Cannot open $file: $!"); return \%words };
  0            
  0            
425              
426 0           while (<$fh>) {
427 0           chomp;
428 0 0 0       next if /^#/ || /^\s*$/;
429 0           $words{ lc $_ } = 1;
430             }
431 0           close $fh;
432             }
433              
434 0           return \%words;
435             }
436              
437             # Warn via diag if any source file is newer than the stopwords file.
438             sub _freshness_check {
439 0     0     my $file = $_config{stopwords_file};
440 0 0         return unless -e $file;
441              
442 0           my $stop_mtime = (stat $file)[9];
443 0           my $latest_src_mtime = 0;
444 0           my @search_dirs = grep { -d } ('.', @{ $_config{dirs} });
  0            
  0            
445              
446             find({
447             wanted => sub {
448 0 0 0 0     if (-d $_ && $_prune{$_}) { $File::Find::prune = 1; return }
  0            
  0            
449 0 0 0       return unless -f $_ && /$SOURCE_RE/;
450 0 0         return if $_ eq $file;
451 0           my $m = (stat _)[9];
452 0 0         $latest_src_mtime = $m if $m > $latest_src_mtime;
453             },
454 0           no_chdir => 0,
455             }, @search_dirs);
456              
457 0 0         if ($latest_src_mtime > $stop_mtime) {
458 0           $TB->diag('-' x 60);
459 0           $TB->diag("WARNING: $file is out of date!");
460 0           $TB->diag('Run gen-stopwords to regenerate it.');
461 0           $TB->diag('-' x 60);
462             }
463             }
464              
465             # Collect all POD files from the configured source directories.
466             sub _pod_files {
467 0     0     my @dirs = grep { -d } @{ $_config{dirs} };
  0            
  0            
468 0           my @files;
469              
470 0 0         return () unless @dirs;
471              
472             find({
473             wanted => sub {
474 0 0 0 0     if (-d $_ && $_prune{$_}) { $File::Find::prune = 1; return }
  0            
  0            
475 0 0 0       return unless -f $_ && /$SOURCE_RE/;
476 0           push @files, $File::Find::name;
477             },
478 0           no_chdir => 0,
479             }, @dirs);
480              
481 0           return sort @files;
482             }
483              
484             # Spell-check a single file.
485             # Returns ( $passed, \%errors ) where %errors is word => [ line numbers ].
486             sub _check_file {
487 0     0     my ($file, $stopwords) = @_;
488              
489 0           my %errors;
490 0           my $in_pod = 0;
491 0           my $line_no = 0;
492 0           my $cmd = _aspell_cmd();
493              
494 0 0         open my $fh, '<', $file
495             or return (0, { _open_error => ["Cannot open: $!"] });
496              
497 0           while (my $line = <$fh>) {
498 0           $line_no++;
499              
500 0 0         $in_pod = 1 if $line =~ /^=(?:head\d|item|over|back|pod|begin|for|method|attr)\b/;
501 0 0         $in_pod = 0 if $line =~ /^=cut\b/;
502 0 0         next unless $in_pod;
503              
504             # Strip POD formatting codes entirely - prevents 'Egt' artefacts
505 0           $line =~ s/[A-Z]<[^>]+>//g;
506 0           $line =~ s/[<>]//g;
507              
508 0           (my $escaped = $line) =~ s/'/'\\''/g;
509 0           my $misspelled = `echo '$escaped' | $cmd 2>/dev/null`;
510 0 0         next unless $misspelled;
511              
512 0           for my $word (split /\n/, $misspelled) {
513 0           $word =~ s/^\s+|\s+$//g;
514 0 0         next unless length $word;
515              
516 0           my $clean = lc $word;
517 0           $clean =~ s/'s$//;
518              
519 0 0         next if $stopwords->{$clean};
520              
521 0           push @{ $errors{$word} }, $line_no;
  0            
522             }
523             }
524              
525 0           close $fh;
526              
527 0           return (!%errors, \%errors);
528             }
529              
530             =head1 RECOMMENDED WORKFLOW
531              
532             =over 4
533              
534             =item 1.
535              
536             Install dependencies:
537              
538             cpanm -vS Test::Spelling::Stopwords
539              
540             This also installs the companion C script.
541              
542             =item 2.
543              
544             Generate your project's stopwords file:
545              
546             gen-stopwords --dir lib --dir bin
547              
548             This scans your source files, runs aspell, filters out terms already
549             covered by L, and writes a lean F<.stopwords> containing
550             only project-specific vocabulary.
551              
552             =item 3.
553              
554             Create F:
555              
556             use Test::More;
557             use Test::Spelling::Stopwords;
558              
559             unless ($ENV{AUTHOR_TESTING} || $ENV{RELEASE_TESTING} || $ENV{CI}) {
560             plan skip_all => 'Spelling tests only run under AUTHOR_TESTING';
561             }
562              
563             all_pod_files_spelling_ok();
564              
565             =item 4.
566              
567             Run:
568              
569             AUTHOR_TESTING=1 prove -lv xt/spell-pod.t
570              
571             =item 5.
572              
573             After adding or editing source files, regenerate:
574              
575             gen-stopwords
576              
577             The test will warn you if you forget.
578              
579             =back
580              
581             =head1 DEPENDENCIES
582              
583             =over 4
584              
585             =item * L (core via L)
586              
587             =item * L (core)
588              
589             =item * L (core)
590              
591             =item * L (core)
592              
593             =item * L (strongly recommended - C)
594              
595             =item * B - must be installed on the system and available on C<$PATH>
596              
597             =back
598              
599             =head1 BUGS AND LIMITATIONS
600              
601             =over 4
602              
603             =item * aspell must be installed externally. The module skips gracefully
604             if it is absent but cannot install it for you.
605              
606             =item * The shell pipe to aspell (via backticks) means Windows is not
607             currently supported. Patches welcome.
608              
609             =item * The freshness check uses file modification times, which are reset
610             by C and similar operations. It is advisory only.
611              
612             =back
613              
614             =head1 SEE ALSO
615              
616             =over 4
617              
618             =item * L - the established base module this complements
619              
620             =item * L - the community Perl vocabulary list
621              
622             =item * L - POD-aware text extraction for spell-checking
623              
624             =item * L - companion script for generating F<.stopwords>
625              
626             =back
627              
628             =head1 AUTHOR
629              
630             Mohammad Sajid Anwar C<< >>
631              
632             =head1 REPOSITORY
633              
634             L
635              
636             =head1 BUGS
637              
638             Please report any bugs or feature requests through the web interface at L.
639             I will be notified and then you'll automatically be notified of progress on your
640             bug as I make changes.
641              
642             =head1 SUPPORT
643              
644             You can find documentation for this module with the perldoc command.
645              
646             perldoc Test::Spelling::Stopwords
647              
648             You can also look for information at:
649              
650             =over 4
651              
652             =item * Bug Report
653              
654             L
655              
656             =item * CPAN Ratings
657              
658             L
659              
660             =item * Search MetaCPAN
661              
662             L
663              
664             =back
665              
666             =head1 LICENSE AND COPYRIGHT
667              
668             Copyright (C) 2026 Mohammad Sajid Anwar.
669              
670             This program is free software; you can redistribute it and / or modify it under
671             the terms of the the Artistic License (2.0). You may obtain a copy of the full
672             license at:
673             L
674             Any use, modification, and distribution of the Standard or Modified Versions is
675             governed by this Artistic License.By using, modifying or distributing the Package,
676             you accept this license. Do not use, modify, or distribute the Package, if you do
677             not accept this license.
678             If your Modified Version has been derived from a Modified Version made by someone
679             other than you,you are nevertheless required to ensure that your Modified Version
680             complies with the requirements of this license.
681             This license does not grant you the right to use any trademark, service mark,
682             tradename, or logo of the Copyright Holder.
683             This license includes the non-exclusive, worldwide, free-of-charge patent license
684             to make, have made, use, offer to sell, sell, import and otherwise transfer the
685             Package with respect to any patent claims licensable by the Copyright Holder that
686             are necessarily infringed by the Package. If you institute patent litigation
687             (including a cross-claim or counterclaim) against any party alleging that the
688             Package constitutes direct or contributory patent infringement,then this Artistic
689             License to you shall terminate on the date that such litigation is filed.
690             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND
691             CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED
692             WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
693             NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS
694             REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT,
695             INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE
696             OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
697              
698             =cut
699              
700             1; # End of Test::Spelling::Stopwords