| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Test::Text; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 3 |  |  | 3 |  | 167518 | use warnings; | 
|  | 3 |  |  |  |  | 24 |  | 
|  | 3 |  |  |  |  | 77 |  | 
| 4 | 3 |  |  | 3 |  | 13 | use strict; | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 45 |  | 
| 5 | 3 |  |  | 3 |  | 962 | use utf8; # Files and dictionaries might use utf8 | 
|  | 3 |  |  |  |  | 26 |  | 
|  | 3 |  |  |  |  | 25 |  | 
| 6 | 3 |  |  | 3 |  | 1399 | use Encode; | 
|  | 3 |  |  |  |  | 25055 |  | 
|  | 3 |  |  |  |  | 168 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 3 |  |  | 3 |  | 16 | use Carp; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 139 |  | 
| 9 | 3 |  |  | 3 |  | 1986 | use Path::Tiny; | 
|  | 3 |  |  |  |  | 35169 |  | 
|  | 3 |  |  |  |  | 133 |  | 
| 10 | 3 |  |  | 3 |  | 1099 | use Text::Hunspell; | 
|  | 3 |  |  |  |  | 6114 |  | 
|  | 3 |  |  |  |  | 116 |  | 
| 11 | 3 |  |  | 3 |  | 1120 | use Test::Text::Sentence qw(split_sentences); | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 137 |  | 
| 12 | 3 |  |  | 3 |  | 28 | use v5.22; | 
|  | 3 |  |  |  |  | 8 |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 | 3 |  |  | 3 |  | 1037 | use version; our $VERSION = qv('0.6.5'); # Works with UTF8 and includes Text::Sentence | 
|  | 3 |  |  |  |  | 4491 |  | 
|  | 3 |  |  |  |  | 16 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 | 3 |  |  | 3 |  | 1275 | use parent 'Test::Builder::Module'; # Included in Test::Simple | 
|  | 3 |  |  |  |  | 665 |  | 
|  | 3 |  |  |  |  | 14 |  | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | my $CLASS = __PACKAGE__; | 
| 19 |  |  |  |  |  |  | our @EXPORT= 'just_check'; | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | BEGIN { | 
| 22 | 3 |  |  | 3 |  | 74772 | binmode *STDOUT, ":encoding(utf8)"; | 
|  | 3 |  |  | 3 |  | 20 |  | 
|  | 3 |  |  |  |  | 4 |  | 
|  | 3 |  |  |  |  | 20 |  | 
| 23 | 3 |  |  |  |  | 2493 | binmode *STDERR, ":encoding(utf8)"; | 
| 24 |  |  |  |  |  |  | } | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | # Module implementation here | 
| 27 |  |  |  |  |  |  | sub new { | 
| 28 | 6 |  |  | 6 | 1 | 2482 | my $class = shift; | 
| 29 | 6 |  | 33 |  |  | 20 | my $dir = shift || croak "Need a single directory with text" ; | 
| 30 | 6 |  | 33 |  |  | 19 | my $data_dir = shift || croak "No default spelling data directory\n"; | 
| 31 | 6 |  | 100 |  |  | 56 | my $language = shift || "en_US"; # Defaults to English | 
| 32 | 6 |  |  |  |  | 16 | my @files = @_ ; # Use all appropriate files in dir by default | 
| 33 | 6 | 100 |  |  |  | 72 | if (!@files ) { | 
| 34 | 5 |  |  |  |  | 6984 | @files = glob("$dir/*.md $dir/*.tex $dir/*.txt $dir/*.markdown $dir/*.Rmd $dir/*.Rmarkdown)"); | 
| 35 |  |  |  |  |  |  | } else { | 
| 36 | 1 |  |  |  |  | 5 | @files = map( "$dir/$_", @files ); | 
| 37 |  |  |  |  |  |  | } | 
| 38 | 6 |  |  |  |  | 62 | my $self = { | 
| 39 |  |  |  |  |  |  | _dir => $dir, | 
| 40 |  |  |  |  |  |  | _data_dir => $data_dir, | 
| 41 |  |  |  |  |  |  | _files => \@files | 
| 42 |  |  |  |  |  |  | }; | 
| 43 | 6 |  |  |  |  | 16 | bless  $self, $class; | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | # Speller declaration | 
| 46 | 6 |  |  |  |  | 148924 | my $speller = Text::Hunspell->new( | 
| 47 |  |  |  |  |  |  | "$data_dir/$language.aff",    # Hunspell or other affix file | 
| 48 |  |  |  |  |  |  | "$data_dir/$language.dic"     # Hunspell or other dictionary file | 
| 49 |  |  |  |  |  |  | ); | 
| 50 | 6 | 50 |  |  |  | 108 | croak "Couldn't create speller: $1" if !$speller; | 
| 51 | 6 |  |  |  |  | 41 | $self->{'_speller'} = $speller; | 
| 52 | 6 |  |  |  |  | 1243 | $speller->add_dic("$dir/words.dic"); # word.dic should be in the text directory | 
| 53 | 6 |  |  |  |  | 13619 | return $self; | 
| 54 |  |  |  |  |  |  | } | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | sub dir { | 
| 57 | 1 |  |  | 1 | 1 | 9 | return shift->{'_dir'}; | 
| 58 |  |  |  |  |  |  | } | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | sub files { | 
| 61 | 6 |  |  | 6 | 1 | 29 | return shift->{'_files'}; | 
| 62 |  |  |  |  |  |  | } | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | sub check { | 
| 65 | 4 |  |  | 4 | 1 | 1170 | my $self = shift; | 
| 66 | 4 |  |  |  |  | 18 | my $tb= $CLASS->builder; | 
| 67 | 4 |  |  |  |  | 32 | my $speller = $self->{'_speller'}; | 
| 68 | 4 |  |  |  |  | 11 | my %vocabulary; | 
| 69 |  |  |  |  |  |  | my @sentences; | 
| 70 | 4 |  |  |  |  | 10 | for my $f ( @{$self->files}) { | 
|  | 4 |  |  |  |  | 17 |  | 
| 71 | 13 |  |  |  |  | 3535 | my $file_content= path($f)->slurp_utf8; | 
| 72 | 13 | 100 |  |  |  | 5294 | if ( $f =~ /(\.md|\.markdown)/ ) { | 
| 73 | 10 |  |  |  |  | 37 | $file_content = _strip_urls( $file_content); | 
| 74 | 10 |  |  |  |  | 28 | $file_content = _strip_code( $file_content); | 
| 75 |  |  |  |  |  |  | } | 
| 76 | 13 |  |  |  |  | 58 | push @sentences, split_sentences( $file_content ); | 
| 77 | 13 |  |  |  |  | 93 | $tb->cmp_ok( scalar @sentences, ">=", 1, "We have " . ($#sentences + 1) . " sentences"); | 
| 78 | 3 |  |  | 3 |  | 1309 | my @words = ($file_content =~ m{\b(\p{L}+)\b}g); | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 42 |  | 
|  | 13 |  |  |  |  | 14880 |  | 
| 79 | 13 |  |  |  |  | 41 | for my $w (@words) { | 
| 80 | 766 | 50 |  |  |  | 216749 | next if !$w; | 
| 81 | 766 |  |  |  |  | 2316 | $vocabulary{lc($w)}++; | 
| 82 | 766 |  |  |  |  | 5943 | $tb->ok( $speller->check( $w),  "$f >> '". encode_utf8($w) . "'"); | 
| 83 |  |  |  |  |  |  | } | 
| 84 | 13 |  |  |  |  | 3696 | my $different_words = scalar keys %vocabulary; | 
| 85 | 13 |  |  |  |  | 63 | $tb->cmp_ok(  $different_words, ">", 1, "We have $different_words different words"); | 
| 86 |  |  |  |  |  |  | } | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | sub _strip_urls { | 
| 91 | 10 |  | 33 | 10 |  | 40 | my $text = shift || carp "No text"; | 
| 92 | 10 |  |  |  |  | 126 | $text =~ s/\[(.+?)\]\(\S+\)/$1/sg; | 
| 93 | 10 |  |  |  |  | 31 | return $text; | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | sub _strip_code { | 
| 97 | 10 |  | 33 | 10 |  | 32 | my $text = shift || carp "No text in _strip_code"; | 
| 98 | 10 |  |  |  |  | 156 | $text =~ s/~~~[\w\W]*?~~~//g; | 
| 99 | 10 |  |  |  |  | 51 | $text =~ s/```[\w\W]+?```//g; | 
| 100 | 10 |  |  |  |  | 52 | $text =~ s/`[^`]+?`//g; | 
| 101 | 10 |  |  |  |  | 20 | return $text; | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | sub just_check { | 
| 106 | 2 |  | 33 | 2 | 1 | 1310 | my $dir = shift || croak "Need a directory with text" ; | 
| 107 | 2 |  | 33 |  |  | 7 | my $data_dir = shift || croak "No default spelling data directory\n"; | 
| 108 | 2 |  | 50 |  |  | 7 | my $language = shift || "en_US"; # Defaults to English | 
| 109 | 2 |  | 50 |  |  | 8 | my $call_done_testing = shift // 1; # Defaults to 1 | 
| 110 | 2 |  |  |  |  | 17 | my $tesxt = Test::Text->new($dir, $data_dir, $language, @_); | 
| 111 | 2 |  |  |  |  | 26 | my $tb= $CLASS->builder; | 
| 112 |  |  |  |  |  |  | $tb->subtest( "Testing $dir" => sub { | 
| 113 | 2 |  |  | 2 |  | 2913 | $tesxt->check(); | 
| 114 | 2 |  |  |  |  | 89 | }); | 
| 115 | 2 | 100 |  |  |  | 42929 | $tb->done_testing() if $call_done_testing; | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | sub done_testing { | 
| 119 | 0 |  |  | 0 | 1 |  | my $tb= $CLASS->builder; | 
| 120 | 0 |  |  |  |  |  | $tb->done_testing; | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | "All over, all out, all over and out"; # Magic circus phrase said at the end of the show | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | __END__ |