|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Test::Text;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
166492
 | 
 use warnings;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
78
 | 
    | 
| 
4
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
15
 | 
 use strict;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
    | 
| 
5
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
960
 | 
 use utf8; # Files and dictionaries might use utf8  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
6
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
1423
 | 
 use Encode;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25040
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
167
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
18
 | 
 use Carp;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
157
 | 
    | 
| 
9
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
1938
 | 
 use Path::Tiny;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35382
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
127
 | 
    | 
| 
10
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
1075
 | 
 use Text::Hunspell;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6362
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
114
 | 
    | 
| 
11
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
1105
 | 
 use Test::Text::Sentence qw(split_sentences);  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
146
 | 
    | 
| 
12
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
29
 | 
 use v5.22;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
1081
 | 
 use version; our $VERSION = qv('0.6.6'); # Really works with RMarkdown  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4511
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
1259
 | 
 use parent 'Test::Builder::Module'; # Included in Test::Simple  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
701
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $CLASS = __PACKAGE__;  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT= 'just_check';  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 BEGIN {  | 
| 
22
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
74817
 | 
   binmode *STDOUT, ":encoding(utf8)";  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
17
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    | 
| 
23
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2347
 | 
   binmode *STDERR, ":encoding(utf8)";  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Module implementation here  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
28
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
  
1
  
 | 
2293
 | 
   my $class = shift;  | 
| 
29
 | 
6
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
17
 | 
   my $dir = shift || croak "Need a single directory with text" ;  | 
| 
30
 | 
6
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
15
 | 
   my $data_dir = shift || croak "No default spelling data directory\n";  | 
| 
31
 | 
6
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
22
 | 
   my $language = shift || "en_US"; # Defaults to English  | 
| 
32
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
   my @files = @_ ; # Use all appropriate files in dir by default  | 
| 
33
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
   if (!@files ) {  | 
| 
34
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6137
 | 
     @files = glob("$dir/*.md $dir/*.tex $dir/*.txt $dir/*.markdown $dir/*.Rmd $dir/*.Rmarkdown)");  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
36
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     @files = map( "$dir/$_", @files );  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
38
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
   my $self = {  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      _dir => $dir,  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      _data_dir => $data_dir,  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      _files => \@files  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
43
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
   bless  $self, $class;  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Speller declaration  | 
| 
46
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148339
 | 
   my $speller = Text::Hunspell->new(  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  "$data_dir/$language.aff",    # Hunspell or other affix file  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  "$data_dir/$language.dic"     # Hunspell or other dictionary file  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				   );  | 
| 
50
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
80
 | 
   croak "Couldn't create speller: $1" if !$speller;  | 
| 
51
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
   $self->{'_speller'} = $speller;  | 
| 
52
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1105
 | 
   $speller->add_dic("$dir/words.dic"); # word.dic should be in the text directory  | 
| 
53
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13078
 | 
   return $self;  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub dir {  | 
| 
57
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
8
 | 
     return shift->{'_dir'};  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub files {  | 
| 
61
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
  
1
  
 | 
29
 | 
   return shift->{'_files'};  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub check {  | 
| 
65
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
6413
 | 
   my $self = shift;  | 
| 
66
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
   my $tb= $CLASS->builder;  | 
| 
67
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
   my $speller = $self->{'_speller'};  | 
| 
68
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
   my %vocabulary;  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my @sentences;  | 
| 
70
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
   for my $f ( @{$self->files}) {  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
    | 
| 
71
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3335
 | 
     my $file_content= path($f)->slurp_utf8;  | 
| 
72
 | 
13
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4538
 | 
     if ( $f =~ /(\.md|\.markdown|\.Rmd|\.Rmarkdown)/ ) {  | 
| 
73
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
       $file_content = _strip_urls( $file_content);  | 
| 
74
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
       $file_content = _strip_code( $file_content);  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
76
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     push @sentences, split_sentences( $file_content );  | 
| 
77
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
     $tb->cmp_ok( scalar @sentences, ">=", 1, "We have " . ($#sentences + 1) . " sentences");  | 
| 
78
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
1304
 | 
     my @words = ($file_content =~ m{\b(\p{L}+)\b}g);  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
    | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5988
 | 
    | 
| 
79
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     for my $w (@words) {  | 
| 
80
 | 
703
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
193824
 | 
       next if !$w;  | 
| 
81
 | 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1911
 | 
       $vocabulary{lc($w)}++;  | 
| 
82
 | 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5083
 | 
       $tb->ok( $speller->check( $w),  "$f >> '". encode_utf8($w) . "'");  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
84
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3684
 | 
     my $different_words = scalar keys %vocabulary;  | 
| 
85
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
     $tb->cmp_ok(  $different_words, ">", 1, "We have $different_words different words");  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _strip_urls {  | 
| 
91
 | 
13
 | 
 
 | 
  
 33
  
 | 
  
13
  
 | 
 
 | 
36
 | 
   my $text = shift || carp "No text";  | 
| 
92
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
138
 | 
   $text =~ s/\[(.+?)\]\(\S+\)/$1/sg;  | 
| 
93
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
   return $text;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _strip_code {  | 
| 
97
 | 
13
 | 
 
 | 
  
 33
  
 | 
  
13
  
 | 
 
 | 
31
 | 
   my $text = shift || carp "No text in _strip_code";  | 
| 
98
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
92
 | 
   $text =~ s/---[\w\W]*?---//g;  | 
| 
99
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
156
 | 
   $text =~ s/~~~[\w\W]*?~~~//g;  | 
| 
100
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
   $text =~ s/```.+?```//g;  | 
| 
101
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
   $text =~ s/`[^`]+?`//g;  | 
| 
102
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
   return $text;  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub just_check {  | 
| 
107
 | 
2
 | 
 
 | 
  
 33
  
 | 
  
2
  
 | 
  
1
  
 | 
1202
 | 
     my $dir = shift || croak "Need a directory with text" ;  | 
| 
108
 | 
2
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
5
 | 
     my $data_dir = shift || croak "No default spelling data directory\n";  | 
| 
109
 | 
2
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
7
 | 
     my $language = shift || "en_US"; # Defaults to English  | 
| 
110
 | 
2
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
5
 | 
     my $call_done_testing = shift // 1; # Defaults to 1  | 
| 
111
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     my $tesxt = Test::Text->new($dir, $data_dir, $language, @_);  | 
| 
112
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my $tb= $CLASS->builder;  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $tb->subtest( "Testing $dir" => sub {  | 
| 
114
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
2179
 | 
       $tesxt->check();  | 
| 
115
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     });  | 
| 
116
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
32657
 | 
     $tb->done_testing() if $call_done_testing;  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub done_testing {  | 
| 
120
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
   my $tb= $CLASS->builder;  | 
| 
121
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $tb->done_testing;  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 "All over, all out, all over and out"; # Magic circus phrase said at the end of the show  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |