File Coverage

blib/lib/CWB/CQP/More.pm
Criterion Covered Total %
statement 3 3 100.0
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 4 4 100.0


line stmt bran cond sub pod time code
1             package CWB::CQP::More;
2             $CWB::CQP::More::VERSION = '0.06';
3 1     1   63828 use parent CWB::CQP;
  1         380  
  1         6  
4             use CWB;
5              
6             use Carp;
7             use Try::Tiny;
8             use Encode;
9             use warnings;
10             use strict;
11             use POSIX::Open3;
12             use CWB::CQP::More::Iterator;
13              
14             =head1 NAME
15              
16             CWB::CQP::More - A higher level interface for CWB::CQP
17              
18             =head1 SYNOPSIS
19              
20             use CWB::CQP::More;
21              
22             my $cqp = CWB::CQP::More->new( { utf8 => 1 } );
23              
24             $cqp->change_corpus('HANSARDS');
25              
26             # This needs to get fixed... not nice to say "''"
27             $cqp->set(Context => [20, 'words'],
28             LD => "''",
29             RD => "''");
30              
31             # using Try::Tiny...
32             try {
33             $cqp->exec('A = "dog";');
34             my $result_size = $cqp->size('A');
35             my @lines = $cqp->cat('A');
36             } catch {
37             print "Error: $_\n";
38             }
39              
40             $cqp->annotation_show("pos");
41              
42             $details = $cqp->corpora_details('hansards');
43              
44             $available_corpora = $cqp->show_corpora;
45              
46              
47             =head1 METHODS
48              
49             This class superclasses CWB::CQP and adds some higher-order
50             functionalities.
51              
52             =head2 new
53              
54             The C constructor has the same behavior has the C
55             C method, unless the first argument is a hash reference. In that
56             case, it is shifted and used as configuration for
57             C. The remaining arguments are sent unaltered to
58             C constructor.
59              
60             =cut
61              
62             sub _super_hacked_new {
63             my @options = @_;
64             my $self = {};
65              
66             # split options with values, e.g. "-r /my/registry" => "-r", "/my/registry"
67             # (doesn't work for multiple options in one string)
68             @options = map { (/^(--?[A-Za-z0-9]+)\s+(.+)$/) ? ($1, $2) : $_ } @options;
69              
70             ## run CQP server in the background
71             my $in = $self->{'in'} = new FileHandle; # stdin of CQP
72             my $out = $self->{'out'} = new FileHandle; # stdout of CQP
73             my $err = $self->{'err'} = new FileHandle; # stderr of CQP
74              
75             my $pid = open3($in, $out, $err, $CWB::CQP, @CWB::CQP::CQP_options, @options);
76              
77             $self->{'pid'} = $pid; # child process ID (so process can be killed if necessary)
78             $in->autoflush(1); # make sure that commands sent to CQP are always flushed immediately
79              
80             my ($need_major, $need_minor, $need_beta) = split /\./, $CWB::CQP::CQP_version;
81             $need_beta = 0 unless $need_beta;
82              
83             my $version_string = $out->getline; # child mode (-c) should print version on startup
84             chomp $version_string;
85             croak "ERROR: CQP backend startup failed ('$CWB::CQP @CWB::CQP::CQP_options @options')\n"
86             unless $version_string =~
87             m/^CQP\s+(?:\w+\s+)*([0-9]+)\.([0-9]+)(?:\.b?([0-9]+))?(?:\s+(.*))?$/;
88             $self->{'major_version'} = $1;
89             $self->{'minor_version'} = $2;
90             $self->{'beta_version'} = $3 || 0;
91             $self->{'compile_date'} = $4 || "unknown";
92             croak "ERROR: CQP version too old, need at least v$CWB::CQP::CQP_version ($version_string)\n"
93             unless ($1 > $need_major or
94             $1 == $need_major
95             and ($2 > $need_minor or
96             ($2 == $need_minor and $3 >= $need_beta)));
97              
98             ## command execution
99             $self->{'command'} = undef; # CQP command string that is currently being processed (undef = last command has been completed)
100             $self->{'lines'} = []; # array of output lines read from CQP process
101             $self->{'buffer'} = ""; # read buffer for standard output from CQP process
102             $self->{'block_size'} = 256; # block size for reading from CQP's output and error streams
103             $self->{'query_lock'} = undef;# holds random key while query lock mode is active
104             ## error handling (messages on stderr)
105             $self->{'error_handler'} = undef; # set to subref for user-defined error handler
106             $self->{'status'} = 'ok'; # status of last executed command ('ok' or 'error')
107             $self->{'error_message'} = []; # arrayref to array containing message produced by last command (if any)
108             ## handling of CQP progress messages
109             $self->{'progress'} = 0; # whether progress messages are activated
110             $self->{'progress_handler'} = undef; # optional callback for progress messages
111             $self->{'progress_info'} = []; # contains last available progress information: [$total_percent, $pass, $n_passes, $message, $percent]
112             ## debugging (prints more or less everything on stdout)
113             $self->{'debug'} = 0;
114             ## select vectors for CQP output (stdout, stderr, stdout|stderr)
115             $self->{'select_err'} = new IO::Select($err);
116             $self->{'select_out'} = new IO::Select($out);
117             $self->{'select_any'} = new IO::Select($err, $out);
118             ## CQP object setup complete
119             return $self;
120             }
121              
122             sub new {
123             my ($class, @args) = @_;
124             my $conf = shift @args if ref($args[0]);
125              
126             my $self = _super_hacked_new(@args);
127             if (exists($conf->{parallel}) && $conf->{parallel}) {
128             bless $self, __PACKAGE__."::Parallel";
129             } else {
130             bless $self, __PACKAGE__;
131             }
132              
133             $self->exec("set PrettyPrint off");
134              
135             for my $k (keys %$conf) {
136             $self->{"__$k"} = $conf->{$k};
137             }
138              
139             $self->set_error_handler( sub { } );
140              
141             return $self;
142             }
143              
144             =head2 utf8
145              
146             Set utf8 mode on or off. Pass it a 1 or a 0 as argument. Returns that
147             same value. If used without arguments, returns current value.
148              
149             =cut
150              
151             sub utf8 {
152             my ($self, $v) = @_;
153             $self->{__utf8} = $v if $v;
154             return $self->{__utf8} || 0;
155             }
156              
157             =head2 size
158              
159             Uses the C CQP command to fetch the size of a query result
160             set. Pass it its name, returns an integer. C<-1> if the result set
161             does not exist or an error occurred.
162              
163             =cut
164              
165             sub size {
166             my ($self, $name) = @_;
167             my $n;
168             try {
169             ($n) = $self->exec("size $name");
170             } catch {
171             return -1;
172             };
173             return $n;
174             }
175              
176             =head2 cat
177              
178             This method uses the C method to return a result set. The first
179             mandatory argument is the name of the result set. Second and Third
180             arguments are optional, and correspond to the interval of matches to
181             return.
182              
183             Returns empty list on any error.
184              
185             =cut
186              
187             sub cat {
188             my ($self, $id, $from, $to) = @_;
189             my $extra = "";
190             $extra = "$from $to" if defined($from) && defined($to);
191             my @ans;
192             try {
193             @ans = $self->exec("cat $id $extra;");
194             } catch {
195             @ans = ();
196             };
197             return @ans;
198             }
199              
200             =head2 annotation_show
201              
202             Use this method to specify what annotations to make CQP to show. Pass
203             it a list of the annotation names.
204              
205             =cut
206              
207             sub annotation_show($@) {
208             my ($self, @annotations) = @_;
209             my $annots = join(" ", map { "+$_" } @annotations);
210             $self->exec("show $annots;");
211             }
212              
213             =head2 annotation_hide
214              
215             Use this method to specify what annotations to make CQP to not show
216             (hide). Pass it a list of the annotation names.
217              
218             =cut
219              
220             sub annotation_hide($@) {
221             my ($self, @annotations) = @_;
222             my $annots = join(" ", map { "-$_" } @annotations);
223             $self->exec("show $annots;");
224             }
225              
226             =head2 change_corpus
227              
228             Change current active corpus. Pass the corpus name as the argument.
229              
230             =cut
231              
232             sub change_corpus($$) {
233             my ($self, $cname) = @_;
234             $cname = uc $cname;
235             $self->exec("$cname;");
236             }
237              
238             =head2 set
239              
240             Set CQP properties. Pass a hash (not a reference) of key/values to be
241             set. Note that at the moment string values should be double quoted
242             (see example in the synopsis).
243              
244             =cut
245              
246             sub set($%) {
247             my ($self, %vars) = @_;
248             for my $key (keys %vars) {
249             my $values;
250             if (ref($vars{$key}) eq "ARRAY") {
251             $values = join(" ", @{$vars{$key}});
252             } else {
253             $values = $vars{$key};
254             }
255              
256             try {
257             $self->exec("set $key $values;");
258             };
259             }
260             }
261              
262             =head2 exec
263              
264             Similar to CWB::CQP->exec, but dying in case of error with the error
265             message. Useful for use with C. Check the synopsis above
266             for an example.
267              
268             =cut
269              
270             sub exec {
271             my ($self, @args) = @_;
272             @args = map { Encode::_utf8_off($_); $_ } @args if $self->{__utf8};
273             my @answer = $self->SUPER::exec(@args);
274             die $self->error_message unless $self->ok;
275             @answer = map { Encode::_utf8_on($_); $_ } @answer if $self->{__utf8};
276             return @answer;
277             }
278              
279             =head2 corpora_details
280              
281             Returns a reference to a hash with details about a specific corpus,
282             like name, id, home directory, properties and attributes;
283              
284             =cut
285              
286             sub corpora_details {
287             my ($self, $cname) = @_;
288             return undef unless $cname;
289              
290             $cname = lc $cname unless $cname =~ m{[/\\]};
291              
292             my $details = {};
293             my $reg = new CWB::RegistryFile $cname;
294             return undef unless $reg;
295              
296             $details->{filename} = $reg->filename;
297             $details->{name} = $reg->name;
298             $details->{corpus_id} = $reg->id;
299             $details->{home_dir} = $reg->home;
300             $details->{info_file} = $reg->info;
301              
302             my @properties = $reg->list_properties;
303             for my $property (@properties) {
304             $details->{property}{$property} = $reg->property($property);
305             }
306              
307             my @attributes = $reg->list_attributes;
308             for my $attr (@attributes) {
309             $details->{attribute}{$reg->attribute($attr)}{$attr} = $reg->attribute_path($attr);
310             }
311              
312             return $details;
313             }
314              
315             =head2 show_corpora
316              
317             Returns a reference to a list of the available corpora;
318              
319             =cut
320              
321             sub show_corpora {
322             my $self = shift;
323             my $ans;
324             try {
325             $ans = [ $self->exec("show corpora;") ];
326             } catch {
327             $ans = [];
328             };
329             return $ans;
330             }
331              
332             =head2 iterator
333              
334             Returns a new iterator, to iterate over a result set. See
335             L for documentation on how to use it.
336              
337             =cut
338              
339             sub iterator {
340             return CWB::CQP::More::Iterator->new(@_);
341             }
342              
343             =head1 AUTHOR
344              
345             Alberto Simoes, C<< >>
346              
347             =head1 BUGS
348              
349             Please report any bugs or feature requests to C, or through
350             the web interface at L. I will be notified, and then you'll
351             automatically be notified of progress on your bug as I make changes.
352              
353             =head1 SUPPORT
354              
355             You can find documentation for this module with the perldoc command.
356              
357             perldoc CWB::CQP::More
358              
359              
360             You can also look for information at:
361              
362             =over 4
363              
364             =item * RT: CPAN's request tracker
365              
366             L
367              
368             =item * AnnoCPAN: Annotated CPAN documentation
369              
370             L
371              
372             =item * CPAN Ratings
373              
374             L
375              
376             =item * Search CPAN
377              
378             L
379              
380             =back
381              
382              
383             =head1 ACKNOWLEDGEMENTS
384              
385             Thanks for Stefan Evert for all help.
386              
387             =head1 LICENSE AND COPYRIGHT
388              
389             Copyright 2010-2011 Alberto Simoes.
390              
391             This program is free software; you can redistribute it and/or modify it
392             under the terms of either: the GNU General Public License as published
393             by the Free Software Foundation; or the Artistic License.
394              
395             See http://dev.perl.org/licenses/ for more information.
396              
397              
398             =cut
399              
400             1; # End of CWB::CQP::More