File Coverage

blib/lib/CWB/CQP/More.pm
Criterion Covered Total %
statement 3 3 100.0
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 4 4 100.0


line stmt bran cond sub pod time code
1             package CWB::CQP::More;
2             $CWB::CQP::More::VERSION = '0.08';
3 1     1   19435 use parent CWB::CQP;
  1         266  
  1         5  
4             use CWB;
5              
6             use Carp;
7             use Try::Tiny;
8             use Encode;
9             use warnings;
10             use strict;
11             use POSIX::Open3;
12             use CWB::CQP::More::Iterator;
13              
14             our $DEBUG = 0;
15              
16             sub import {
17             my @ops = @_;
18             $DEBUG = grep { $_ eq "DEBUG" } @ops;
19             }
20              
21             =head1 NAME
22              
23             CWB::CQP::More - A higher level interface for CWB::CQP
24              
25             =head1 SYNOPSIS
26              
27             use CWB::CQP::More;
28              
29             my $cqp = CWB::CQP::More->new( { utf8 => 1 } );
30              
31             $cqp->change_corpus('HANSARDS');
32              
33             # This needs to get fixed... not nice to say "''"
34             $cqp->set(Context => [20, 'words'],
35             LD => "''",
36             RD => "''");
37              
38             # using Try::Tiny...
39             try {
40             $cqp->exec('A = "dog";');
41             my $result_size = $cqp->size('A');
42             my @lines = $cqp->cat('A');
43             } catch {
44             print "Error: $_\n";
45             }
46              
47             $cqp->annotation_show("pos");
48              
49             $details = $cqp->corpora_details('hansards');
50              
51             $available_corpora = $cqp->show_corpora;
52              
53             # for debug
54             use CWB::CQP::More 'DEBUG';
55              
56             =head1 METHODS
57              
58             This class superclasses CWB::CQP and adds some higher-order
59             functionalities.
60              
61             =head2 new
62              
63             The C constructor has the same behavior has the C
64             C method, unless the first argument is a hash reference. In that
65             case, it is shifted and used as configuration for
66             C. The remaining arguments are sent unaltered to
67             C constructor.
68              
69             =cut
70              
71             sub _super_hacked_new {
72             my @options = @_;
73             my $self = {};
74              
75             # split options with values, e.g. "-r /my/registry" => "-r", "/my/registry"
76             # (doesn't work for multiple options in one string)
77             @options = map { (/^(--?[A-Za-z0-9]+)\s+(.+)$/) ? ($1, $2) : $_ } @options;
78              
79             ## run CQP server in the background
80             my $in = $self->{'in'} = new FileHandle; # stdin of CQP
81             my $out = $self->{'out'} = new FileHandle; # stdout of CQP
82             my $err = $self->{'err'} = new FileHandle; # stderr of CQP
83              
84             my $pid = open3($in, $out, $err, $CWB::CQP, @CWB::CQP::CQP_options, @options);
85              
86             $self->{'pid'} = $pid; # child process ID (so process can be killed if necessary)
87             $in->autoflush(1); # make sure that commands sent to CQP are always flushed immediately
88              
89             my ($need_major, $need_minor, $need_beta) = split /\./, $CWB::CQP::CQP_version;
90             $need_beta = 0 unless $need_beta;
91              
92             my $version_string = $out->getline; # child mode (-c) should print version on startup
93             chomp $version_string;
94             croak "ERROR: CQP backend startup failed ('$CWB::CQP @CWB::CQP::CQP_options @options')\n"
95             unless $version_string =~
96             m/^CQP\s+(?:\w+\s+)*([0-9]+)\.([0-9]+)(?:\.b?([0-9]+))?(?:\s+(.*))?$/;
97             $self->{'major_version'} = $1;
98             $self->{'minor_version'} = $2;
99             $self->{'beta_version'} = $3 || 0;
100             $self->{'compile_date'} = $4 || "unknown";
101             croak "ERROR: CQP version too old, need at least v$CWB::CQP::CQP_version ($version_string)\n"
102             unless ($1 > $need_major or
103             $1 == $need_major
104             and ($2 > $need_minor or
105             ($2 == $need_minor and $3 >= $need_beta)));
106              
107             ## command execution
108             $self->{'command'} = undef; # CQP command string that is currently being processed (undef = last command has been completed)
109             $self->{'lines'} = []; # array of output lines read from CQP process
110             $self->{'buffer'} = ""; # read buffer for standard output from CQP process
111             $self->{'block_size'} = 256; # block size for reading from CQP's output and error streams
112             $self->{'query_lock'} = undef;# holds random key while query lock mode is active
113             ## error handling (messages on stderr)
114             $self->{'error_handler'} = undef; # set to subref for user-defined error handler
115             $self->{'status'} = 'ok'; # status of last executed command ('ok' or 'error')
116             $self->{'error_message'} = []; # arrayref to array containing message produced by last command (if any)
117             ## handling of CQP progress messages
118             $self->{'progress'} = 0; # whether progress messages are activated
119             $self->{'progress_handler'} = undef; # optional callback for progress messages
120             $self->{'progress_info'} = []; # contains last available progress information: [$total_percent, $pass, $n_passes, $message, $percent]
121             ## debugging (prints more or less everything on stdout)
122             $self->{'debug'} = 0;
123             ## select vectors for CQP output (stdout, stderr, stdout|stderr)
124             $self->{'select_err'} = new IO::Select($err);
125             $self->{'select_out'} = new IO::Select($out);
126             $self->{'select_any'} = new IO::Select($err, $out);
127             ## CQP object setup complete
128             return $self;
129             }
130              
131             sub new {
132             my ($class, @args) = @_;
133             my $conf = shift @args if ref($args[0]);
134              
135             my $self = _super_hacked_new(@args);
136             if (exists($conf->{parallel}) && $conf->{parallel}) {
137             bless $self, __PACKAGE__."::Parallel";
138             } else {
139             bless $self, __PACKAGE__;
140             }
141              
142             $self->exec("set PrettyPrint off");
143              
144             for my $k (keys %$conf) {
145             $self->{"__$k"} = $conf->{$k};
146             }
147              
148             $self->set_error_handler( sub { } );
149              
150             return $self;
151             }
152              
153             =head2 utf8
154              
155             Set utf8 mode on or off. Pass it a 1 or a 0 as argument. Returns that
156             same value. If used without arguments, returns current value.
157              
158             =cut
159              
160             sub utf8 {
161             my ($self, $v) = @_;
162             $self->{__utf8} = $v if $v;
163             return $self->{__utf8} || 0;
164             }
165              
166             =head2 size
167              
168             Uses the C CQP command to fetch the size of a query result
169             set. Pass it its name, returns an integer. C<-1> if the result set
170             does not exist or an error occurred.
171              
172             =cut
173              
174             sub size {
175             my ($self, $name) = @_;
176             my $n;
177             try {
178             ($n) = $self->exec("size $name");
179             } catch {
180             return -1;
181             };
182             return $n;
183             }
184              
185             =head2 cat
186              
187             This method uses the C method to return a result set. The first
188             mandatory argument is the name of the result set. Second and Third
189             arguments are optional, and correspond to the interval of matches to
190             return.
191              
192             Returns empty list on any error.
193              
194             =cut
195              
196             sub cat {
197             my ($self, $id, $from, $to) = @_;
198             my $extra = "";
199             $extra = "$from $to" if defined($from) && defined($to);
200             my @ans;
201             try {
202             @ans = $self->exec("cat $id $extra;");
203             } catch {
204             @ans = ();
205             };
206             return @ans;
207             }
208              
209             =head2 annotation_show
210              
211             Use this method to specify what annotations to make CQP to show. Pass
212             it a list of the annotation names.
213              
214             =cut
215              
216             sub annotation_show($@) {
217             my ($self, @annotations) = @_;
218             my $annots = join(" ", map { "+$_" } @annotations);
219             $self->exec("show $annots;");
220             }
221              
222             =head2 annotation_hide
223              
224             Use this method to specify what annotations to make CQP to not show
225             (hide). Pass it a list of the annotation names.
226              
227             =cut
228              
229             sub annotation_hide($@) {
230             my ($self, @annotations) = @_;
231             my $annots = join(" ", map { "-$_" } @annotations);
232             $self->exec("show $annots;");
233             }
234              
235             =head2 change_corpus
236              
237             Change current active corpus. Pass the corpus name as the argument.
238              
239             =cut
240              
241             sub change_corpus($$) {
242             my ($self, $cname) = @_;
243             $cname = uc $cname;
244             $self->exec("$cname;");
245             }
246              
247             =head2 set
248              
249             Set CQP properties. Pass a hash (not a reference) of key/values to be
250             set. Note that at the moment string values should be double quoted
251             (see example in the synopsis).
252              
253             =cut
254              
255             sub set($%) {
256             my ($self, %vars) = @_;
257             for my $key (keys %vars) {
258             my $values;
259             if (ref($vars{$key}) eq "ARRAY") {
260             $values = join(" ", @{$vars{$key}});
261             } else {
262             $values = $vars{$key};
263             }
264              
265             try {
266             $self->exec("set $key $values;");
267             };
268             }
269             }
270              
271             =head2 exec
272              
273             Similar to CWB::CQP->exec, but dying in case of error with the error
274             message. Useful for use with C. Check the synopsis above
275             for an example.
276              
277             =cut
278              
279             sub exec {
280             my ($self, @args) = @_;
281             @args = map { Encode::_utf8_off($_); $_ } @args if $self->{__utf8};
282             print STDERR join(' || ', @args), "\n" if $DEBUG;
283             my @answer = $self->SUPER::exec(@args);
284             die $self->error_message unless $self->ok;
285             @answer = map { Encode::_utf8_on($_); $_ } @answer if $self->{__utf8};
286             return @answer;
287             }
288              
289             =head2 corpora_details
290              
291             Returns a reference to a hash with details about a specific corpus,
292             like name, id, home directory, properties and attributes;
293              
294             =cut
295              
296             sub corpora_details {
297             my ($self, $cname) = @_;
298             return undef unless $cname;
299              
300             $cname = lc $cname unless $cname =~ m{[/\\]};
301              
302             my $details = {};
303             my $reg = new CWB::RegistryFile $cname;
304             return undef unless $reg;
305              
306             $details->{filename} = $reg->filename;
307             $details->{name} = $reg->name;
308             $details->{corpus_id} = $reg->id;
309             $details->{home_dir} = $reg->home;
310             $details->{info_file} = $reg->info;
311              
312             my @properties = $reg->list_properties;
313             for my $property (@properties) {
314             $details->{property}{$property} = $reg->property($property);
315             }
316              
317             my @attributes = $reg->list_attributes;
318             for my $attr (@attributes) {
319             $details->{attribute}{$reg->attribute($attr)}{$attr} = $reg->attribute_path($attr);
320             }
321              
322             return $details;
323             }
324              
325             =head2 show_corpora
326              
327             Returns a reference to a list of the available corpora;
328              
329             =cut
330              
331             sub show_corpora {
332             my $self = shift;
333             my $ans;
334             try {
335             $ans = [ $self->exec("show corpora;") ];
336             } catch {
337             $ans = [];
338             };
339             return $ans;
340             }
341              
342             =head2 iterator
343              
344             Returns a new iterator, to iterate over a result set. See
345             L for documentation on how to use it.
346              
347             =cut
348              
349             sub iterator {
350             return CWB::CQP::More::Iterator->new(@_);
351             }
352              
353             =head1 AUTHOR
354              
355             Alberto Simoes, C<< >>
356              
357             =head1 BUGS
358              
359             Please report any bugs or feature requests to C, or through
360             the web interface at L. I will be notified, and then you'll
361             automatically be notified of progress on your bug as I make changes.
362              
363             =head1 SUPPORT
364              
365             You can find documentation for this module with the perldoc command.
366              
367             perldoc CWB::CQP::More
368              
369              
370             You can also look for information at:
371              
372             =over 4
373              
374             =item * RT: CPAN's request tracker
375              
376             L
377              
378             =item * AnnoCPAN: Annotated CPAN documentation
379              
380             L
381              
382             =item * CPAN Ratings
383              
384             L
385              
386             =item * Search CPAN
387              
388             L
389              
390             =back
391              
392              
393             =head1 ACKNOWLEDGEMENTS
394              
395             Thanks for Stefan Evert for all help.
396              
397             =head1 LICENSE AND COPYRIGHT
398              
399             Copyright 2010-2011 Alberto Simoes.
400              
401             This program is free software; you can redistribute it and/or modify it
402             under the terms of either: the GNU General Public License as published
403             by the Free Software Foundation; or the Artistic License.
404              
405             See http://dev.perl.org/licenses/ for more information.
406              
407              
408             =cut
409              
410             1; # End of CWB::CQP::More