line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
##-*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
## File: DDC::Format::Kwic.pm |
4
|
|
|
|
|
|
|
## Author: Bryan Jurish |
5
|
|
|
|
|
|
|
## Description: |
6
|
|
|
|
|
|
|
## + DDC Query utilities: output formatting: keywords-in-context |
7
|
|
|
|
|
|
|
##====================================================================== |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package DDC::Format::Kwic; |
10
|
26
|
|
|
26
|
|
181
|
use File::Basename; |
|
26
|
|
|
|
|
57
|
|
|
26
|
|
|
|
|
2844
|
|
11
|
26
|
|
|
26
|
|
176
|
use Carp; |
|
26
|
|
|
|
|
52
|
|
|
26
|
|
|
|
|
1233
|
|
12
|
26
|
|
|
26
|
|
181
|
use strict; |
|
26
|
|
|
|
|
65
|
|
|
26
|
|
|
|
|
1453
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
##====================================================================== |
15
|
|
|
|
|
|
|
## Globals |
16
|
|
|
|
|
|
|
our @ISA = qw(DDC::Format); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
BEGIN { |
19
|
26
|
|
|
26
|
|
27506
|
*isa = \&UNIVERSAL::isa; |
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
##====================================================================== |
23
|
|
|
|
|
|
|
## Constructors, etc. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
## $fmt = $CLASS_OR_OBJ->new(%args) |
26
|
|
|
|
|
|
|
## + %args: |
27
|
|
|
|
|
|
|
## ( |
28
|
|
|
|
|
|
|
## start=>$previous_hit_num, ##-- pre-initial hit number (default=0) |
29
|
|
|
|
|
|
|
## highlight=>[$pre,$post], ##-- highlighting substrings |
30
|
|
|
|
|
|
|
## width=>$nchars, ##-- context width; default=32 |
31
|
|
|
|
|
|
|
## useMatchIds=>$bool, ##-- whether to use match-ids if available; undef (default) if non-trivial match-ids are specified |
32
|
|
|
|
|
|
|
## wsAttr=>$wskey, ##-- token attribute to use for preceding whitespace (default='ws') |
33
|
|
|
|
|
|
|
## ) |
34
|
|
|
|
|
|
|
sub new { |
35
|
0
|
|
|
0
|
1
|
|
my $that = shift; |
36
|
0
|
|
0
|
|
|
|
return bless { |
37
|
|
|
|
|
|
|
highlight=>['__','__'], |
38
|
|
|
|
|
|
|
width=>32, |
39
|
|
|
|
|
|
|
useMatchIds=>undef, |
40
|
|
|
|
|
|
|
wsAttr=>'ws', |
41
|
|
|
|
|
|
|
@_ |
42
|
|
|
|
|
|
|
}, ref($that)||$that; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
## $fmt = $fmt->reset() |
46
|
|
|
|
|
|
|
## + reset counters, etc. |
47
|
|
|
|
|
|
|
sub reset { |
48
|
0
|
|
|
0
|
1
|
|
$_[0]{start}=0; |
49
|
0
|
|
|
|
|
|
return $_[0]->SUPER::reset(); |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
##====================================================================== |
53
|
|
|
|
|
|
|
## Helper functions |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
## $len = maxlen(@strings) |
56
|
|
|
|
|
|
|
sub maxlen { |
57
|
0
|
|
|
0
|
0
|
|
my $l = 0; |
58
|
0
|
0
|
|
|
|
|
do { $l=length($_) if (length($_) > $l) } foreach (@_); |
|
0
|
|
|
|
|
|
|
59
|
0
|
|
|
|
|
|
return $l; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
## $wsStr = $fmt->wsStr($token) |
63
|
|
|
|
|
|
|
sub wsStr { |
64
|
0
|
|
|
0
|
0
|
|
my ($fmt,$w) = @_; |
65
|
0
|
0
|
0
|
|
|
|
return (!$fmt->{wsAttr} || !ref($w) || !defined($w->{$fmt->{wsAttr}}) || $w->{$fmt->{wsAttr}} ? ' ' : ''); |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
##====================================================================== |
69
|
|
|
|
|
|
|
## API |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
## $str = $fmt->toString($hitList) |
72
|
|
|
|
|
|
|
sub toString { |
73
|
0
|
|
|
0
|
1
|
|
my ($fmt,$hits) = @_; |
74
|
|
|
|
|
|
|
|
75
|
0
|
0
|
0
|
|
|
|
if ($hits->{counts_} && @{$hits->{counts_}}) { |
|
0
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
##-- count-query: format as text |
77
|
0
|
|
|
|
|
|
my ($i); |
78
|
0
|
|
|
|
|
|
my @lens = map {$i=$_; maxlen(map {$_->[$i]} @{$hits->{counts_}})} (0..$#{$hits->{counts_}[0]}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
79
|
0
|
|
|
|
|
|
my $fmt = join("\t", map {"%-${_}s"} @lens)."\n"; |
|
0
|
|
|
|
|
|
|
80
|
0
|
|
|
|
|
|
return join('', map {sprintf($fmt,@$_)} @{$hits->{counts_}}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
0
|
|
0
|
|
|
|
my $xlen = $fmt->{width} || 2**31; |
84
|
0
|
|
|
|
|
|
my $hnum = $hits->{start}; |
85
|
0
|
0
|
|
|
|
|
my $useMatchIds = defined($fmt->{useMatchIds}) ? $fmt->{useMatchIds} : (grep {$_>0 && $_!=1} map {$_->{hl_}} map {@{$_->{ctx_}[1]}} @{$hits->{hits_}}); |
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
my (@hits); |
88
|
0
|
|
|
|
|
|
foreach my $hit (@{$hits->{hits_}}) { |
|
0
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
##-- hit key: number + file basename + page |
90
|
0
|
|
|
|
|
|
my $f = basename($hit->{meta_}{file_}); |
91
|
0
|
|
|
|
|
|
$f =~ s/\..*$//; |
92
|
0
|
0
|
|
|
|
|
my $p = defined($hit->{meta_}{page_}) ? $hit->{meta_}{page_} : 0; |
93
|
0
|
|
|
|
|
|
my $pagei = (grep {$hit->{meta_}{indices_}[$_] eq 'page'} (0..$#{$hit->{meta_}{indices_}}))[0]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
94
|
0
|
0
|
|
|
|
|
my $targetMatchId = $useMatchIds ? (sort {$a<=>$b} grep {$_} map {$_->{hl_}} @{$hit->{ctx_}[1]})[0] : undef; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
##-- hit context |
97
|
0
|
|
0
|
|
|
|
my $fkey = $hits->{defaultField} || $hit->{meta_}{indices_}[0] || 'w'; |
98
|
0
|
|
|
|
|
|
my (@l,@c,@r); |
99
|
0
|
|
|
|
|
|
my $ary = \@l; |
100
|
0
|
|
|
|
|
|
my $hl = '__'; |
101
|
0
|
|
|
|
|
|
foreach (map {@$_} @{$hit->{ctx_}}) { |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
102
|
0
|
0
|
0
|
|
|
|
if ($ary eq \@l && ref($_) && ($useMatchIds ? $_->{hl_}==$targetMatchId : $_->{hl_})) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
103
|
0
|
|
|
|
|
|
$ary=\@c; |
104
|
0
|
0
|
0
|
|
|
|
$p=$_->{page} if (!$p && defined($_->{page})); |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
elsif ($ary eq \@c) { |
107
|
0
|
|
|
|
|
|
$ary = \@r; |
108
|
|
|
|
|
|
|
} |
109
|
0
|
0
|
|
|
|
|
$hl = ($ary eq \@c ? '__' : '_'); |
110
|
|
|
|
|
|
|
push(@$ary, ($fmt->wsStr($_) |
111
|
|
|
|
|
|
|
.(ref($_) |
112
|
|
|
|
|
|
|
? ($_->{hl_} |
113
|
|
|
|
|
|
|
? ($hl.$_->{$fkey}.$hl.($useMatchIds ? "/$_->{hl_}" : '')) |
114
|
0
|
0
|
|
|
|
|
: $_->{$fkey}) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
115
|
|
|
|
|
|
|
: $_))); |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
my $ls = join('', @l); |
119
|
0
|
|
|
|
|
|
my $rs = join('', @r); |
120
|
0
|
0
|
|
|
|
|
substr($ls, 0, length($ls)-$xlen+3, '...') if (length($ls) > $xlen); |
121
|
0
|
0
|
|
|
|
|
substr($rs, $xlen-3, length($rs)-$xlen+3, '...') if (length($rs) > $xlen); |
122
|
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
push(@hits,[$hnum++, "[$f:$p]", $ls, join('',@c), $rs]); |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
0
|
|
|
|
|
|
my $ln = maxlen(map {$_->[0]} @hits); |
|
0
|
|
|
|
|
|
|
127
|
0
|
|
|
|
|
|
my $lf = maxlen(map {$_->[1]} @hits); |
|
0
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
|
my $ll = maxlen(map {$_->[2]} @hits); |
|
0
|
|
|
|
|
|
|
129
|
0
|
|
|
|
|
|
my $lc = maxlen(map {$_->[3]} @hits); |
|
0
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
my $lr = maxlen(map {$_->[4]} @hits); |
|
0
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
return ( |
132
|
|
|
|
|
|
|
"# Hit(s) $hits[0][0]-$hits[$#hits][0] of $hits->{nhits_}" |
133
|
|
|
|
|
|
|
.($hits->{hint_} ? " {hint_}>" : '') |
134
|
|
|
|
|
|
|
."\n" |
135
|
0
|
0
|
|
|
|
|
.join('', map {sprintf("%${ln}d: %-${lf}s %${ll}s %-${lc}s %-${lr}s\n", @$_)} @hits) |
|
0
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
); |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
1; ##-- be happy |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
__END__ |