File Coverage

blib/lib/Lingua/Verbnet.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             package Lingua::Verbnet;
2              
3 1     1   27698 use strict;
  1         3  
  1         34  
4 1     1   6 use warnings;
  1         2  
  1         65  
5             our $VERSION = sprintf "%d.%03d", q$Revision: 1.2 $ =~ /(\d+)/g;
6              
7 1     1   1803 use XML::Parser;
  0            
  0            
8             use Lingua::Verbnet::Ambiguity;
9             use Carp;
10              
11             sub new {
12             my %stats = (); # currently just the ambiguity stats
13             my $class = shift;
14             my $parser = new XML::Parser(
15             Style => 'Subs',
16             Pkg => 'Lingua::Verbnet::StatHandlers');
17             {
18             package Lingua::Verbnet::StatHandlers; # handlers to fill up %stats
19             no warnings 'once';
20             no warnings 'redefine'; # we purposefully redefine the subs here,
21             # in order to package the correct %stats/$frames/@members in the closure
22             *{VNCLASS} = sub { # in Lingua::Verbnet::StatHandlers::
23             my $frames = 0;
24             my @members = ();
25              
26             *{MEMBER} = sub {
27             my ($parser, $el, %attrs) = @_;
28             push @members, $attrs{'name'};
29             };
30              
31             *{FRAME} = sub { $frames++; };
32              
33             *{VNCLASS_} = sub {
34             for my $e (@members) {
35             # let += operate on unused slots silently
36             no warnings 'uninitialized';
37             $stats{$e} += $frames;
38             }
39             };
40             };
41             }
42             for my $file (@_) {
43             $parser->parsefile($file);
44             }
45             my %ambiguity = %stats;
46             my $closure = sub {
47             if ('ambiguity' eq $_[0]) {
48             bless \%ambiguity, 'Lingua::Verbnet::Ambiguity';
49             }
50             else {
51             croak "$class can't $_[0]";
52             }
53             };
54             bless $closure, $class;
55             }
56              
57             sub DESTROY {}
58             sub AUTOLOAD {
59             my $self = shift;
60              
61             # Remove the package name. See perlbot(1).
62             our $AUTOLOAD =~ s/^.*:://;
63              
64             $self->($AUTOLOAD,@_);
65             }
66              
67             1;
68             =head1 NAME
69              
70             Lingua::Verbnet -- extract stats from verbnet xml files
71              
72             =head1 SYNOPSIS
73              
74             use Lingua::Verbnet;
75             my @verbnet_xml_files = ... ;
76             my $verbnet = Lingua::Verbnet->new(@verbnet_xml_files);
77             $verbnet->ambiguity->score('cut'); # get the ambiguity score of the verb 'cut'
78             my %stats = $verbnet->ambiguity->hash; # get the full ambiguity scores hash (verb => score)
79              
80             =head1 DESCRIPTION
81              
82             Potentially, collect and query various aspects of data
83             from the verbnet XML files. Currently, supports just
84             the ambiguity stats extraction.
85              
86             =head1 METHODS
87              
88             =over
89              
90             =item new
91              
92             Constructor, arguments include the list of the source files
93             to contain the verbnet XML data. If no arguments given,
94             assumes reading from the STDIN.
95              
96             =item ambiguity
97              
98             Return an Lingua::Verbnet::Ambiguity object for querying
99             the verb ambiguity stats.
100              
101             =back
102              
103             =head1 THANKS
104              
105             Published mainly for the purpose of demonstrating the concise way of using
106             the Subs style of XML::Parser together with closures, which is inspired
107             by DSSSL (thanks, James Clark!) and SGMLSpm script "sgmlspl.pl" by David Megginson
108             (thanks, and blue skies!). Thanks also to Yuval Kogman for his persistent
109             insistence that the above is a good enough reason to publish this on CPAN.
110             Thanks to Dr. Michael Elhadad who asked me to do the cross-evaluation
111             of two probabilistic parsers at http://www.cs.bgu.ac.il/~nlpproj/parse-eval/ ,
112             where this code originated.
113              
114             =head1 SEE ALSO
115              
116             L, L, L,
117             I at L
118              
119             =head1 AUTHOR
120              
121             Vassilii Khachaturov >
122              
123             =head1 LICENSE
124              
125             This program is free software; you can redistribute it and/or
126             modify it under the same terms as Perl itself.
127              
128             See F