File Coverage

blib/lib/Text/Pangram.pm
Criterion Covered Total %
statement 55 57 96.4
branch 6 8 75.0
condition n/a
subroutine 12 13 92.3
pod 6 6 100.0
total 79 84 94.0


line stmt bran cond sub pod time code
1             package Text::Pangram;
2              
3 2     2   24709 use strict;
  2         5  
  2         76  
4 2     2   9 use warnings;
  2         4  
  2         89  
5              
6             =head1 NAME
7              
8             Text::Pangram - utilities to find English pangrams
9              
10             =head1 VERSION
11              
12             Version 0.01
13              
14             =cut
15              
16             our $VERSION = "0.01";
17              
18 2     2   1963 use List::MoreUtils qw{all};
  2         6362  
  2         226  
19 2     2   20 use List::Util qw{min};
  2         3  
  2         1516  
20              
21             sub new {
22 3     3 1 17 my ($class, $text) = @_;
23 3         8 my $self = {};
24 3         9 $self->{text} = $text;
25 3         11 return bless ($self, $class);
26             }
27              
28             sub is_pangram {
29 5     5 1 14 my $self = shift;
30 5     116   74 return all {$self->{text} =~ /$_/i} 'a' .. 'z';
  116         791  
31             }
32              
33             sub find_pangram_window {
34 3     3 1 10 my $self = shift;
35 3 100       9 return undef if (! $self->is_pangram);
36 2         12 my $orig_text = $self->{text};
37              
38 2         24 (my $text = $orig_text) =~ s/[^A-Za-z]//g; # remove all but letters
39              
40             # @sighting is location of most-recent ex of each letter, keyed a=0 etc.
41 2         3 my @sighting;
42 2         12 @sighting[0..25] = undef; # so our check below will work
43              
44             # hash slice: use letters to index array
45 2         3 my %letter_index;
46 2         629 @letter_index{"a".."z"} = (0..25);
47              
48 2         6 my $smallest_window = length($text);
49 2         4 my $final_index;
50 2         3 my $seen_all = undef;
51              
52 2         8 for my $index (0..(length($text) - 1)) {
53 66         111 my $letter = substr $text, $index, 1;
54 66         126 @sighting[ $letter_index{lc($letter)} ] = $index;
55              
56             # Don't start looking for pangrams until we've seen all letters.
57             # The "all" call is expensive, and irrelevant after early use:
58             # skip it with faster boolean check
59 66 50       137 if ( !$seen_all ) {
60 66 100   158   225 unless (all { defined($_) } @sighting) {
  158         230  
61 64         160 next;
62             } else {
63 2         5 $seen_all++;
64             }
65             }
66            
67 2         38 my $min = min @sighting;
68            
69 2         4 my $range = $index - $min;
70 2 50       19 if ($range < $smallest_window) {
71 2         3 $smallest_window = $range;
72 2         8 $final_index = $index;
73             }
74             }
75              
76 2         6 $self->{window_length} = $smallest_window + 1;
77 2         11 $self->{stripped_window} = substr $text, $final_index - $smallest_window, $smallest_window + 1;
78 2         10 $self->{window} = $self->_find_original_window;
79 2         43 return 1;
80             }
81              
82             ### accessors
83              
84             sub window_length {
85 0     0 1 0 my $self = shift;
86 0         0 return $self->{window_length};
87             }
88              
89             sub stripped_window {
90 3     3 1 8 my $self = shift;
91 3         36 return $self->{stripped_window};
92             }
93              
94             sub window {
95 1     1 1 3 my $self = shift;
96 1         9 return $self->{window};
97             }
98              
99             ### utility method
100              
101             sub _find_original_window {
102 2     2   4 my $self = shift;
103 2         8 my $regex = join('(?:[^A-Za-z]*)', split('',$self->stripped_window));
104 2         154 my ($window) = $self->{text} =~ m/($regex)/s;
105 2         11 return $window;
106             }
107              
108             =head1 SYNOPSIS
109              
110             use Text::Pangram;
111              
112             my $text = "The quick brown fox jumps over the lazy dog.";
113             my $pangram = Text::Pangram->new( $text );
114              
115             print "Pangram!\n" if $pangram->is_pangram;
116              
117             if ($pangram->find_pangram_window) {
118             print "Smallest window is " . $pangram->window_length " characters:\n";
119             print $pangram->window . "\n";
120             } else {
121             print "Not a pangram!\n";
122             }
123              
124             =head1 DESCRIPTION
125              
126             A pangram is a text that contains every letter of the alphabet. This
127             module provides utilities for identifying pangrams.
128              
129             =over
130              
131             =item $pangram->new
132              
133             Constructor. Receives the text that will be analyzed.
134              
135             =back
136              
137             =over
138              
139             =item $pangram->is_pangram
140              
141             Returns true if the supplied text is a pangram.
142              
143             =back
144              
145             =over
146              
147             =item $pangram->find_pangram_window
148              
149             Finds the shortest "pangrammatic window" in a text: the shortest
150             span of text that contains a pangram. It is designed to be fast when
151             scanning large texts.
152              
153             The method will return false if the text does not contain a pangram at
154             all. If the text is pangrammatic, C<$pangram> will allow you to access
155             three pieces of data:
156              
157             =back
158              
159             =over
160              
161             =item $pangram->window_length
162              
163             The length of the shortest pangrammatic window.
164              
165             =back
166              
167             =over
168              
169             =item $pangram->window
170              
171             The pangrammatic window from the original text.
172              
173             =back
174              
175             =over
176              
177             =item $pangram->stripped_window
178              
179             The stripped text of the pangrammatic window. (That is, with all
180             nonalphabetic characters stripped out.)
181              
182             =back
183              
184             =head1 AUTHOR
185              
186             Jesse Sheidlower C<< >>
187              
188             =head1 BUGS
189              
190             Please report any bugs or feature requests to C
191             rt.cpan.org> , or through the web interface at
192             L. I will
193             be notified, and then you'll automatically be notified of progress on
194             your bug as I make changes.
195              
196             =head1 TODO
197              
198             Extend to handle non-English text, other alphabets, etc.
199              
200             =head1 ACKNOWLEDGEMENTS
201              
202             Thanks to Adam Turoff, Ben Rosengart, and Perrin Harkins for help and
203             suggestions.
204              
205             =head1 COPYRIGHT AND LICENSE
206              
207             Copyright (c) 2014 Jesse Sheidlower.
208              
209             This library is free software; you can redistribute it and/or modify
210             it under the same terms as Perl itself.
211              
212             =cut
213              
214             1; # End of Text::Pangram