File Coverage

blib/lib/Text/Perfide/WordBags.pm
Criterion Covered Total %
statement 15 41 36.5
branch 0 8 0.0
condition 0 5 0.0
subroutine 5 10 50.0
pod 5 5 100.0
total 25 69 36.2


line stmt bran cond sub pod time code
1             package Text::Perfide::WordBags;
2              
3 1     1   30757 use warnings;
  1         3  
  1         38  
4 1     1   6 use strict;
  1         2  
  1         36  
5 1     1   879 use utf8::all;
  1         68720  
  1         7  
6              
7 1     1   3503 use base 'Exporter';
  1         2  
  1         660  
8             our @EXPORT = (qw/pairability file2bag bagint baguni bagcard/);
9              
10              
11             =head1 NAME
12              
13             Text::Perfide::WordBags - Create word bags from text, and operate over them.
14              
15             =head1 VERSION
16              
17             Version 0.01_02
18              
19             =cut
20              
21             our $VERSION = '0.01_02';
22              
23             =head1 SYNOPSIS
24              
25             Quick summary of what the module does.
26              
27             Perhaps a little code snippet.
28              
29             use Text::Perfide::WordBags;
30              
31             my $foo = Text::Perfide::WordBags->new();
32             ...
33              
34             =head1 EXPORT
35              
36             A list of functions that can be exported. You can delete this section
37             if you don't export anything, such as for a purely object-oriented module.
38              
39             =head1 FUNCTIONS
40              
41             =head2 pairability
42            
43             Calculates pairability of two wordbags. Pairibitily value is given by:
44              
45             $int/($uni || 1)
46              
47             where $int and $uni are the values given by, respectively, the intersection
48             and the union of the two bags.
49              
50             =cut
51              
52             sub pairability{
53 0     0 1   my ($bag1,$bag2) = @_;
54 0           my $int = bagcard(bagint($bag1,$bag2));
55 0           my $uni = bagcard(baguni($bag1,$bag2));
56 0   0       return $int/($uni || 1);
57             }
58              
59             =head2 file2bag
60              
61             Create a word bag from a file.
62              
63             Receives as argument a function ref and a file path.
64              
65             Reads a file in slurp mode, passes the text to the function passed as argument,
66             and returns the result.
67              
68             =cut
69              
70             sub file2bag{
71 0     0 1   my ($txt2bag_fn,$file) = @_;
72 0 0         open my $fp,'<:utf8',$file or die "Could not open file '$file'";
73 0           my $text = join '',<$fp>;
74 0           return $txt2bag_fn->($text);
75             }
76              
77             # sub txt2bag_pn{ # proper nouns
78             # my $text = shift;
79             #
80             # my $upper = {};
81             # my $uppat = qr{\b[A-Z]\w{3,}(?:['-]\w+)*\b};
82             # $upper->{$1}++ while($text =~ /($uppat)/g);
83             #
84             # my $lower = {};
85             # my $lwpat = qr{\b[a-z]+(?:['-][a-z]+)*\b};
86             # $lower->{$1}++ while($text =~ /($lwpat)/g);
87             #
88             # foreach my $k (keys %$upper){
89             # if($lower->{lc $k}){
90             # my $ratio = $upper->{$k}/$lower->{lc $k};
91             # delete $upper->{$k} if $ratio < 10;
92             # }
93             # }
94             # return $upper;
95             # }
96             #
97             # sub txt2bag_num{
98             # my $text = shift;
99             # my $bag = {};
100             # my $pecul = qr{\d+};
101             # $bag->{$1}++ while($text =~ /($pecul)/g);
102             # if(haspn($bag)){
103             # foreach(1..300){
104             # $bag->{$_}-- if $bag->{$_};
105             # delete $bag->{$_} unless $bag->{$_};
106             # }
107             # }
108             # return $bag;
109             # }
110              
111             =head2 bagint
112            
113             Calculates the intersection between two wordbags.
114            
115             =cut
116              
117             sub bagint {
118 0     0 1   my ($bag1,$bag2) = @_;
119 0           my $inters = {};
120 0           foreach(keys %$bag1){
121 0 0         next unless $bag2->{$_};
122 0 0         $inters->{$_} = $bag1->{$_} < $bag2->{$_} ? $bag1->{$_} : $bag2->{$_};
123             }
124 0           return $inters;
125             }
126              
127             =head2 baguni
128              
129             Calculates the union of two wordbags.
130              
131             =cut
132              
133             sub baguni {
134 0     0 1   my ($bag1,$bag2) = @_;
135 0           my $union = {};
136 0           foreach(keys %$bag1){
137 1     1   8 no warnings;
  1         2  
  1         353  
138 0 0         $union->{$_} = $bag1->{$_} > $bag2->{$_} ? $bag1->{$_} : $bag2->{$_};
139             }
140 0           foreach(keys %$bag2){
141 0   0       $union->{$_}//=$bag2->{$_};
142             }
143 0           return $union;
144             }
145              
146             =head2 bagcard
147              
148             Calculates the cardinality of two wordbags.
149              
150             =cut
151              
152             sub bagcard {
153 0     0 1   my $bag = shift;
154 0           my $soma = 0;
155 0           map { $soma+= $bag->{$_} } keys %$bag;
  0            
156 0           return $soma;
157             }
158              
159              
160              
161             =head1 AUTHOR
162              
163             Andre Santos, C<< >>
164              
165             =head1 BUGS
166              
167             Please report any bugs or feature requests to C, or through
168             the web interface at L. I will be notified, and then you'll
169             automatically be notified of progress on your bug as I make changes.
170              
171              
172             =head1 ACKNOWLEDGEMENTS
173              
174              
175             =head1 LICENSE AND COPYRIGHT
176              
177             Copyright 2011 Project Natura.
178              
179             This program is free software; you can redistribute it and/or modify it
180             under the terms of either: the GNU General Public License as published
181             by the Free Software Foundation; or the Artistic License.
182              
183             See http://dev.perl.org/licenses/ for more information.
184              
185              
186             =cut
187              
188             1; # End of Text::Perfide::WordBags