File Coverage

blib/lib/Bio/ConnectDots/Parser.pm
Criterion Covered Total %
statement 189 264 71.5
branch 95 190 50.0
condition n/a
subroutine 18 22 81.8
pod 0 18 0.0
total 302 494 61.1


line stmt bran cond sub pod time code
1             package Bio::ConnectDots::Parser;
2 2     2   47019 use vars qw(@ISA @AUTO_ATTRIBUTES @OTHER_ATTRIBUTES %SYNONYMS %DEFAULTS);
  2         5  
  2         179  
3 2     2   13 use strict;
  2         3  
  2         84  
4 2     2   10961 use Text::Balanced qw(extract_delimited extract_bracketed extract_quotelike);
  2         74830  
  2         270  
5 2     2   1263 use Class::AutoClass;
  2         26504  
  2         14256  
6             @ISA = qw(Class::AutoClass);
7              
8             @AUTO_ATTRIBUTES=qw();
9             %SYNONYMS=();
10             @OTHER_ATTRIBUTES=qw();
11             %DEFAULTS=();
12             Class::AutoClass::declare(__PACKAGE__);
13              
14             # constraints =
15             # constraint
16             # constraint separator constraint ...
17             # separator = any non-word character or 'and'
18             sub parse_constraints {
19 1     1 0 631 my($self,$text,$want_tree)=@_;
20 1         2 my($constraint,$rest);
21 1         3 my $constraints=[];
22 1         5 while ($text) {
23 2         7 ($constraint,$rest)=$self->parse_constraint($text,$want_tree);
24 2 50       7 last unless $constraint;
25 2         4 push(@$constraints,$constraint);
26 2         11 $rest=~s/^[\s,;]*(AND)*[\s,;]*//is; # consume separator
27 2         6 $text=$rest;
28             }
29 1         1 my $result;
30 1 50       5 if (@$constraints) {
31 1 50       3 $want_tree? $result={match=>$constraints}: $result=$constraints;
32             } else {
33 0         0 $rest=$text;
34             }
35 1 50       6 wantarray? ($result,$rest): $result;
36             }
37              
38             # constraint =
39             # constant
40             # op constant
41             # term constant
42             # term op constant
43             # try longest first.
44             sub parse_constraint {
45 4     4 0 2144 my($self,$text,$want_tree)=@_;
46 4         7 my($term,$op,$constant,$rest_term,$rest,$rest);
47 4         13 ($term,$rest_term)=$self->parse_term($text,$want_tree);
48 4 50       12 goto FAIL unless $term;
49             # try parsing 'op constant'
50 4         15 ($op,$rest)=$self->parse_op($rest_term,$want_tree);
51 4 50       37 goto SUCCESS if $op=~/exists/i; # no constant needed
52 4 50       10 if ($op) {
53 4         12 ($constant,$rest)=$self->parse_constant($rest,$want_tree);
54 4 50       12 if ($constant) {
55 4         17 goto SUCCESS;
56             }
57             }
58             # try parsing just 'constant'
59 0         0 ($constant,$rest)=$self->parse_constant($rest_term,$want_tree);
60 0 0       0 goto FAIL unless $constant;
61            
62 4         25 SUCCESS:
63             my $result={term=>$term,op=>$op,constant=>$constant};
64 4 50       12 $result={match=>$result} if $want_tree;
65 4 100       15 return wantarray? ($result,$rest): $result;
66 0 0       0 FAIL:
67             return wantarray? (undef,$text): undef;
68             }
69              
70             # joins =
71             # join
72             # join separator join ...
73             # separator = any non-word character or 'and'
74             sub parse_joins {
75 1     1 0 490 my($self,$text,$want_tree)=@_;
76 1         2 my($join,$rest);
77 1         2 my $joins=[];
78 1         3 while ($text) {
79 2         6 ($join,$rest)=$self->parse_join($text,$want_tree);
80 2 50       5 last unless $join;
81 2         3 push(@$joins,$join);
82 2         8 $rest=~s/^[\s,;]*(AND)*[\s,;]*//is; # consume separator
83 2         4 $text=$rest;
84             }
85 1         1 my $result;
86 1 50       3 if (@$joins) {
87 1 50       3 $want_tree? $result={match=>$joins}: $result=$joins;
88             } else {
89 0         0 $rest=$text;
90             }
91 1 50       4 wantarray? ($result,$rest): $result;
92             }
93              
94             # join =
95             # term = term
96             sub parse_join {
97 4     4 0 1089 my($self,$text,$want_tree)=@_;
98 4         12 my($term0,$rest)=$self->parse_term($text,$want_tree);
99 4 50       11 goto FAIL unless $term0;
100 4         14 $rest=~s/^\s*=+\s*//is; # consume separator
101 4         9 my($term1,$rest)=$self->parse_term($rest,$want_tree);
102 4 50       39 goto FAIL unless $term1;
103            
104 4         13 SUCCESS:
105             my $result={term0=>$term0,term1=>$term1};
106 4 50       14 $result={match=>$result} if $want_tree;
107 4 100       10 return wantarray? ($result,$rest): $result;
108 0 0       0 FAIL:
109             return wantarray? (undef,$text): undef;
110             }
111              
112             # aliases =
113             # alias
114             # alias separator alias ...
115             # separator = any non-word character
116             sub parse_aliases {
117 1     1 0 432 my($self,$text,$want_tree)=@_;
118 1         2 my($alias,$rest);
119 1         2 my $aliases=[];
120 1         17 while ($text) {
121 2         5 ($alias,$rest)=$self->parse_alias($text,$want_tree);
122 2 50       6 last unless $alias;
123 2         3 push(@$aliases,$alias);
124 2         7 $rest=~s/^[\s,;]*//is; # consume separator
125 2         5 $text=$rest;
126             }
127 1         2 my $result;
128 1 50       6 if (@$aliases) {
129 1 50       4 $want_tree? $result={match=>$aliases}: $result=$aliases;
130             } else {
131 0         0 $rest=$text;
132             }
133 1 50       12 wantarray? ($result,$rest): $result;
134             }
135              
136             # alias =
137             # name separator alias
138             # separator = any non-word character or AS
139             sub parse_alias {
140 4     4 0 825 my($self,$text,$want_tree)=@_;
141 4         11 my($target_name,$rest)=$self->parse_qword($text,$want_tree);
142 4 50       9 goto FAIL unless $target_name;
143 4         12 $rest=~s/^\s*AS\s*|[\s,;]*//is; # consume separator
144 4         9 my($alias_name,$rest)=$self->parse_qword($rest,$want_tree);
145 4 50       9 goto FAIL unless $alias_name;
146            
147 4         10 SUCCESS:
148             my $result={target_name=>$target_name,alias_name=>$alias_name};
149 4 50       19 $result={match=>$result} if $want_tree;
150 4 100       12 return wantarray? ($result,$rest): $result;
151 0 0       0 FAIL:
152             return wantarray? (undef,$text): undef;
153             }
154              
155             # term = term1 | term1.term1 | term1.term1.term1
156             # approximate this by list of any number of term1's
157             sub parse_term {
158 15     15 0 1251 my($self,$text,$want_tree)=@_;
159 15         14 my($term1,$rest);
160 15         23 my $term=[];
161 15         34 while ($text) {
162 28         53 ($term1,$rest)=$self->parse_term1($text,$want_tree);
163 28 50       56 last unless $term1;
164 28         40 push(@$term,$term1);
165 28 100       98 last unless $rest=~s/^\s*\.\s*//s; # done unless separator is '.'
166 13         34 $text=$rest;
167             }
168 15         14 my $result;
169 15 50       24 if (@$term) {
170 15 50       59 $result=$want_tree? {match=>$term}: $term;
171             } else {
172 0         0 $rest=$text;
173             }
174 15 100       43 wantarray? ($result,$rest): $result;
175             }
176              
177             # term1='*' | word | quoted_phrase | list
178             sub parse_term1 {
179 37     37 0 1804 my($self,$text,$want_tree)=@_;
180 37         45 $text.=' '; # append space because extract_quotelike doesn't
181             # handel q() if ) is last character of string
182 37         38 my($rule,$match,$rest,$prefix,$body,$skip);
183 37         103 $text=~s/^\s*//s; # strip leading spaces
184 37 100       147 if (($match,$rest)=$text=~/^(\*)(.*)/s) {
    100          
    100          
    100          
185 1         3 $rule='*';
186             } elsif (($match,$rest,$prefix,$skip,$skip,$body)=extract_quotelike($text),$match) {
187 1         70 $rule='quoted_phrase';
188             } elsif (($match,$rest,$prefix)=extract_bracketed($text,'[(q'),$match) {
189 1         372 $rule='list';
190 1         4 ($match)=$match=~/^[\[\(](.*)[\)\]]$/s;
191 1         5 $match=$self->parse_term_list($match,$want_tree);
192             } elsif (($match,$rest)=$text=~/^(\w+)(.*)/s) {
193 33         2850 $rule='word';
194             }
195 37         128 my $result;
196 37 100       56 if ($match) {
197 36 100       59 $match=$body if defined $body;
198 36 100       68 $result=$want_tree? {match=>$match,rule=>$rule}: $match;
199             } else {
200 1         2 $rest=$text;
201             }
202 37 100       125 wantarray? ($result,$rest): $result;
203             }
204             sub parse_term_list {
205 1     1 0 3 my($self,$text,$want_tree)=@_;
206 1         1 my($term1,$rest);
207 1         2 my $term=[];
208 1         3 while ($text) {
209 5         12 ($term1,$rest)=$self->parse_term1($text,$want_tree);
210 5 100       10 last unless $term1;
211 4         5 push(@$term,$term1);
212 4         11 $rest=~s/^[\s,;]//s; # consume separator
213 4         7 $text=$rest;
214             }
215 1         2 my $result;
216 1 50       3 if (@$term) {
217 1 50       13 $result=$want_tree? {match=>$term}: $term;
218             } else {
219 0         0 $rest=$text;
220             }
221 1 50       4 wantarray? ($result,$rest): $result;
222             }
223             # term = term1 | term1.term1 | term1.term1.term1
224             # approximate this by list of any number of term1's
225             sub parse_term_value {
226 0     0 0 0 my($self,$text,$want_tree)=@_;
227 0         0 my($term1,$rest);
228 0         0 my $term=[];
229 0         0 while ($text) {
230 0         0 ($term1,$rest)=$self->parse_term1_value($text,$want_tree);
231 0 0       0 last unless $term1;
232 0         0 push(@$term,$term1);
233 0         0 $text=$rest;
234             }
235 0         0 my $result;
236 0 0       0 if (@$term) {
237 0 0       0 $result=$want_tree? {match=>$term}: $term;
238             } else {
239 0         0 $rest=$text;
240             }
241 0 0       0 wantarray? ($result,$rest): $result;
242             }
243              
244             # term1='*' | everything to . | list
245             sub parse_term1_value {
246 0     0 0 0 my($self,$text,$want_tree)=@_;
247 0         0 my($rule,$match,$rest,$prefix);
248 0         0 $text=~s/^\s*//s; # strip leading spaces
249 0 0       0 if (($match,$rest)=$text=~/^(\*)(.*)/s) {
    0          
    0          
250 0         0 $rule='*';
251             } elsif (($match,$rest,$prefix)=extract_bracketed($text,'[(q'),$match) {
252 0         0 $rule='list';
253 0         0 ($match)=$match=~/^[\[\(](.*)[\)\]]$/s;
254 0         0 $match=$self->parse_term_list($match,$want_tree);
255             } elsif (($match,$rest)=$text=~/^(.*?)\.(.*)/s) {
256 0         0 $match=~s/\s*$//s; # strip trailing spaces
257 0         0 $rule='word';
258             } else {
259 0         0 $match=$text;
260 0         0 $rest='';
261 0         0 $rule='value';
262             }
263 0         0 my $result;
264 0 0       0 if ($match) {
265 0 0       0 $result=$want_tree? {match=>$match,rule=>$rule}: $match;
266             } else {
267 0         0 $rest=$text;
268             }
269 0 0       0 wantarray? ($result,$rest): $result;
270             }
271             # op = usual comparison ops | 'in'
272             sub parse_op {
273 6     6 0 765 my($self,$text,$want_tree)=@_;
274 6         22 $text=~s/^\s*//s; # strip leading spaces
275 6         48 my($match,$rest)=$text=~/^(exists|not\s*in|in|<=|==|!=|>=|<|=|>)(.*)/is; # longest patterns must be first
276 6         18 $match=uc($match);
277 6 50       13 $match='NOT IN' if $match=~/NOT\s*IN/;
278 6 50       23 $match='=' if $match eq '=='; # special case == for benefit of Perl programmers
279 6         7 my $result;
280 6 50       16 if ($match) {
281 6 50       16 $result=$want_tree? {match=>$match,rule=>'op'}: $match;
282             } else {
283 0         0 $rest=$text;
284             }
285 6 100       22 wantarray? ($result,$rest): $result;
286             }
287              
288             # constant = word | quoted_phrase | list
289             sub parse_constant {
290 8     8 0 832 my($self,$text,$want_tree)=@_;
291 8         12 $text.=' '; # append space because extract_quotelike doesn't
292             # handel q() if ) is last character of string
293 8         9 my($rule,$match,$rest,$prefix,$body,$skip);
294 8         25 $text=~s/^\s*//s; # strip leading spaces
295            
296 8 50       24 if (($match,$rest,$prefix,$skip,$skip,$body)=extract_quotelike($text),$match) {
    100          
    50          
297 0         0 $rule='quoted_phrase';
298             } elsif (($match,$rest,$prefix)=extract_bracketed($text,'[(q'),$match) {
299 1         276 $rule='list';
300 1         6 ($match)=$match=~/^[\[\(](.*)[\)\]]$/s;
301 1         5 $match=$self->parse_constant_list($match,$want_tree);
302             } elsif (($match,$rest)=$text=~/^([\w\.]+)\W*(.*)/s) {
303 7         677 $rule='word';
304             }
305 8         18 my $result;
306 8 50       14 if ($match) {
307 8 50       19 $match=$body if defined $body;
308 8 50       15 $result=$want_tree? {match=>$match,rule=>$rule}: $match;
309             } else {
310 0         0 $rest=$text;
311             }
312 8 100       36 wantarray? ($result,$rest): $result;
313             }
314             sub parse_constant_list {
315 1     1 0 2 my($self,$text,$want_tree)=@_;
316 1         2 my($constant1,$rest);
317 1         2 my $constants=[];
318 1         7 while ($text) {
319 2         9 ($constant1,$rest)=$self->parse_constant($text,$want_tree);
320 2 50       5 last unless $constant1;
321 2         4 push(@$constants,$constant1);
322 2         4 $text=$rest;
323             }
324 1         1 my $result;
325 1 50       3 if (@$constants) {
326 1 50       3 $result=$want_tree? {match=>$constants}: $constants;
327             } else {
328 0         0 $rest=$text;
329             }
330 1 50       3 wantarray? ($result,$rest): $result;
331             }
332             # constant_value = entire string | list
333             sub parse_constant_value {
334 0     0 0 0 my($self,$text,$want_tree)=@_;
335 0         0 my($rule,$match,$rest,$prefix,$body,$skip);
336 0 0       0 if (($match,$rest,$prefix)=extract_bracketed($text,'[(q'),$match) {
337 0         0 $rule='list';
338 0         0 ($match)=$match=~/^[\[\(](.*)[\)\]]$/s;
339 0         0 $match=$self->parse_constant_list($match,$want_tree);
340             } else{
341 0         0 $match=$text;
342 0         0 $rest='';
343 0         0 $rule='value';
344             }
345 0         0 my $result;
346 0 0       0 if ($match) {
347 0 0       0 $result=$want_tree? {match=>$match,rule=>$rule}: $match;
348             } else {
349 0         0 $rest=$text;
350             }
351 0 0       0 wantarray? ($result,$rest): $result;
352             }
353              
354             # outputs =
355             # output
356             # output separator output ...
357             # separator = any non-word character
358             sub parse_outputs {
359 0     0 0 0 my($self,$text,$want_tree)=@_;
360 0         0 my($output,$rest);
361 0         0 my $outputs=[];
362 0         0 while ($text) {
363 0         0 ($output,$rest)=$self->parse_output($text,$want_tree);
364 0 0       0 last unless $output;
365 0         0 push(@$outputs,$output);
366 0         0 $rest=~s/^[\s,;]*//is; # consume separator
367 0         0 $text=$rest;
368             }
369 0         0 my $result;
370 0 0       0 if (@$outputs) {
371 0 0       0 $want_tree? $result={match=>$outputs}: $result=$outputs;
372             } else {
373 0         0 $rest=$text;
374             }
375 0 0       0 wantarray? ($result,$rest): $result;
376             }
377              
378             # output =
379             # word | word.word, optionally followed by 'AS' name
380             sub parse_output {
381 2     2 0 881 my($self,$text,$want_tree)=@_;
382 2         3 my($output1,$rest,$output_name);
383 2         4 my $output=[];
384 2         5 while ($text) {
385 4         10 ($output1,$rest)=$self->parse_qword($text,$want_tree);
386 4 50       8 last unless $output1;
387 4         69 push(@$output,$output1);
388 4 100       16 last unless $rest=~s/^\s*\.\s*//s; # done unless separator is '.'
389 2         5 $text=$rest;
390             }
391 2 50       5 goto FAIL unless @$output;
392 2 100       8 if ($rest=~s/^\W*AS\W*//is) { # consume separator and
393             # parse output_name if separator is 'as'
394 1         4 ($output_name,$rest)=$self->parse_qword($rest,$want_tree);
395 1 50       5 goto FAIL unless $output_name;
396             }
397             SUCCESS:
398 2         12 my $result={termlist=>$output,output_name=>$output_name};
399 2 50       6 $result={match=>$result} if $want_tree;
400 2 50       9 return wantarray? ($result,$rest): $result;
401 0 0       0 FAIL:
402             return wantarray? (undef,$text): undef;
403             }
404              
405             # qword = word | quoted_phrase
406             sub parse_qword {
407 15     15 0 913 my($self,$text,$want_tree)=@_;
408 15         19 $text.=' '; # append space because extract_quotelike doesn't
409             # handel q() if ) is last character of string
410 15         18 my($rule,$match,$rest,$prefix,$body,$skip);
411 15         40 $text=~s/^\s*//s; # strip leading spaces
412            
413 15 100       45 if (($match,$rest,$prefix,$skip,$skip,$body)=extract_quotelike($text),$match) {
    50          
414 2         231 $rule='quoted_phrase';
415             } elsif (($match,$rest)=$text=~/^(\w+)(.*)/s) {
416 13         489 $rule='word';
417             }
418 15         24 my $result;
419 15 50       26 if ($match) {
420 15 100       24 $match=$body if defined $body;
421 15 100       29 $result=$want_tree? {match=>$match,rule=>$rule}: $match;
422             } else {
423 0         0 $rest=$text;
424             }
425 15 100       49 wantarray? ($result,$rest): $result;
426             }
427              
428              
429             1;
430