line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
##-*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
## File: DDC::Format::Kwic.pm |
4
|
|
|
|
|
|
|
## Author: Bryan Jurish |
5
|
|
|
|
|
|
|
## Description: |
6
|
|
|
|
|
|
|
## + DDC Query utilities: output formatting: keywords-in-context |
7
|
|
|
|
|
|
|
##====================================================================== |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package DDC::Format::Kwic; |
10
|
26
|
|
|
26
|
|
198
|
use File::Basename; |
|
26
|
|
|
|
|
60
|
|
|
26
|
|
|
|
|
3264
|
|
11
|
26
|
|
|
26
|
|
233
|
use Carp; |
|
26
|
|
|
|
|
60
|
|
|
26
|
|
|
|
|
1613
|
|
12
|
26
|
|
|
26
|
|
174
|
use strict; |
|
26
|
|
|
|
|
81
|
|
|
26
|
|
|
|
|
1439
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
##====================================================================== |
15
|
|
|
|
|
|
|
## Globals |
16
|
|
|
|
|
|
|
our @ISA = qw(DDC::Format); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
BEGIN { |
19
|
26
|
|
|
26
|
|
27883
|
*isa = \&UNIVERSAL::isa; |
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
##====================================================================== |
23
|
|
|
|
|
|
|
## Constructors, etc. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
## $fmt = $CLASS_OR_OBJ->new(%args) |
26
|
|
|
|
|
|
|
## + %args: |
27
|
|
|
|
|
|
|
## ( |
28
|
|
|
|
|
|
|
## start=>$previous_hit_num, ##-- pre-initial hit number (default=0) |
29
|
|
|
|
|
|
|
## highlight=>[$pre,$post], ##-- highlighting substrings |
30
|
|
|
|
|
|
|
## width=>$nchars, ##-- context width; default=32 |
31
|
|
|
|
|
|
|
## useMatchIds=>$bool, ##-- whether to use match-ids if available; undef (default) if non-trivial match-ids are specified |
32
|
|
|
|
|
|
|
## ) |
33
|
|
|
|
|
|
|
sub new { |
34
|
0
|
|
|
0
|
1
|
|
my $that = shift; |
35
|
0
|
|
0
|
|
|
|
return bless { |
36
|
|
|
|
|
|
|
highlight=>['__','__'], |
37
|
|
|
|
|
|
|
width=>32, |
38
|
|
|
|
|
|
|
useMatchIds=>undef, |
39
|
|
|
|
|
|
|
@_ |
40
|
|
|
|
|
|
|
}, ref($that)||$that; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
## $fmt = $fmt->reset() |
44
|
|
|
|
|
|
|
## + reset counters, etc. |
45
|
|
|
|
|
|
|
sub reset { |
46
|
0
|
|
|
0
|
1
|
|
$_[0]{start}=0; |
47
|
0
|
|
|
|
|
|
return $_[0]->SUPER::reset(); |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
##====================================================================== |
51
|
|
|
|
|
|
|
## Helper functions |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
## $len = maxlen(@strings) |
54
|
|
|
|
|
|
|
sub maxlen { |
55
|
0
|
|
|
0
|
0
|
|
my $l = 0; |
56
|
0
|
0
|
|
|
|
|
do { $l=length($_) if (length($_) > $l) } foreach (@_); |
|
0
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
return $l; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
##====================================================================== |
61
|
|
|
|
|
|
|
## API |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
## $str = $fmt->toString($hitList) |
64
|
|
|
|
|
|
|
sub toString { |
65
|
0
|
|
|
0
|
1
|
|
my ($fmt,$hits) = @_; |
66
|
|
|
|
|
|
|
|
67
|
0
|
0
|
0
|
|
|
|
if ($hits->{counts_} && @{$hits->{counts_}}) { |
|
0
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
##-- count-query: format as text |
69
|
0
|
|
|
|
|
|
my ($i); |
70
|
0
|
|
|
|
|
|
my @lens = map {$i=$_; maxlen(map {$_->[$i]} @{$hits->{counts_}})} (0..$#{$hits->{counts_}[0]}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
71
|
0
|
|
|
|
|
|
my $fmt = join("\t", map {"%-${_}s"} @lens)."\n"; |
|
0
|
|
|
|
|
|
|
72
|
0
|
|
|
|
|
|
return join('', map {sprintf($fmt,@$_)} @{$hits->{counts_}}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
0
|
|
0
|
|
|
|
my $xlen = $fmt->{width} || 2**31; |
76
|
0
|
|
|
|
|
|
my $hnum = $hits->{start}; |
77
|
0
|
0
|
|
|
|
|
my $useMatchIds = defined($fmt->{useMatchIds}) ? $fmt->{useMatchIds} : (grep {$_>0 && $_!=1} map {$_->{hl_}} map {@{$_->{ctx_}[1]}} @{$hits->{hits_}}); |
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
79
|
0
|
|
|
|
|
|
my (@hits); |
80
|
0
|
|
|
|
|
|
foreach my $hit (@{$hits->{hits_}}) { |
|
0
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
##-- hit key: number + file basename + page |
82
|
0
|
|
|
|
|
|
my $f = basename($hit->{meta_}{file_}); |
83
|
0
|
|
|
|
|
|
$f =~ s/\..*$//; |
84
|
0
|
0
|
|
|
|
|
my $p = defined($hit->{meta_}{page_}) ? $hit->{meta_}{page_} : 0; |
85
|
0
|
|
|
|
|
|
my $pagei = (grep {$hit->{meta_}{indices_}[$_] eq 'page'} (0..$#{$hit->{meta_}{indices_}}))[0]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
86
|
0
|
0
|
|
|
|
|
my $targetMatchId = $useMatchIds ? (sort {$a<=>$b} grep {$_} map {$_->{hl_}} @{$hit->{ctx_}[1]})[0] : undef; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
##-- hit context |
89
|
0
|
|
0
|
|
|
|
my $fkey = $hits->{defaultField} || $hit->{meta_}{indices_}[0] || 'w'; |
90
|
0
|
|
|
|
|
|
my (@l,@c,@r); |
91
|
0
|
|
|
|
|
|
my $ary = \@l; |
92
|
0
|
|
|
|
|
|
my $hl = '__'; |
93
|
0
|
|
|
|
|
|
foreach (map {@$_} @{$hit->{ctx_}}) { |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
94
|
0
|
0
|
0
|
|
|
|
if ($ary eq \@l && ref($_) && ($useMatchIds ? $_->{hl_}==$targetMatchId : $_->{hl_})) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
$ary=\@c; |
96
|
0
|
0
|
0
|
|
|
|
$p=$_->{page} if (!$p && defined($_->{page})); |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
elsif ($ary eq \@c) { |
99
|
0
|
|
|
|
|
|
$ary = \@r; |
100
|
|
|
|
|
|
|
} |
101
|
0
|
0
|
|
|
|
|
$hl = ($ary eq \@c ? '__' : '_'); |
102
|
|
|
|
|
|
|
push(@$ary, (ref($_) |
103
|
|
|
|
|
|
|
? ($_->{hl_} |
104
|
|
|
|
|
|
|
? ($hl.$_->{$fkey}.$hl.($useMatchIds ? "/$_->{hl_}" : '')) |
105
|
0
|
0
|
|
|
|
|
: $_->{$fkey}) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
106
|
|
|
|
|
|
|
: $_)); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
0
|
|
|
|
|
|
my $ls = join(' ', @l); |
110
|
0
|
|
|
|
|
|
my $rs = join(' ', @r); |
111
|
0
|
0
|
|
|
|
|
substr($ls, 0, length($ls)-$xlen+3, '...') if (length($ls) > $xlen); |
112
|
0
|
0
|
|
|
|
|
substr($rs, $xlen-3, length($rs)-$xlen+3, '...') if (length($rs) > $xlen); |
113
|
|
|
|
|
|
|
|
114
|
0
|
|
|
|
|
|
push(@hits,[$hnum++, "[$f:$p]", $ls, join(' ',@c), $rs]); |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
0
|
|
|
|
|
|
my $ln = maxlen(map {$_->[0]} @hits); |
|
0
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
my $lf = maxlen(map {$_->[1]} @hits); |
|
0
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
my $ll = maxlen(map {$_->[2]} @hits); |
|
0
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
my $lc = maxlen(map {$_->[3]} @hits); |
|
0
|
|
|
|
|
|
|
121
|
0
|
|
|
|
|
|
my $lr = maxlen(map {$_->[4]} @hits); |
|
0
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
return ( |
123
|
|
|
|
|
|
|
"# Hit(s) $hits[0][0]-$hits[$#hits][0] of $hits->{nhits_}" |
124
|
|
|
|
|
|
|
.($hits->{hint_} ? " {hint_}>" : '') |
125
|
|
|
|
|
|
|
."\n" |
126
|
0
|
0
|
|
|
|
|
.join('', map {sprintf("%${ln}d: %-${lf}s %${ll}s %-${lc}s %-${lr}s\n", @$_)} @hits) |
|
0
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
); |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
1; ##-- be happy |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
__END__ |