line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Context::Para; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Text::Context::Para - A paragraph in context |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 DESCRIPTION |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
This is a paragraph being used by Text::Context. |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=cut |
12
|
|
|
|
|
|
|
|
13
|
4
|
|
|
4
|
|
26
|
use strict; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
160
|
|
14
|
4
|
|
|
4
|
|
22
|
use warnings; |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
126
|
|
15
|
|
|
|
|
|
|
|
16
|
4
|
|
|
4
|
|
5454
|
use HTML::Entities; |
|
4
|
|
|
|
|
54326
|
|
|
4
|
|
|
|
|
513
|
|
17
|
4
|
|
|
4
|
|
22281
|
use Text::Context::EitherSide qw(get_context); |
|
4
|
|
|
|
|
39831
|
|
|
4
|
|
|
|
|
338
|
|
18
|
|
|
|
|
|
|
|
19
|
4
|
|
|
4
|
|
35
|
use constant DEFAULT_START_TAG => ''; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
222
|
|
20
|
4
|
|
|
4
|
|
21
|
use constant DEFAULT_END_TAG => ""; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
2448
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head2 new |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my $para = Text::Context::Para->new($content, $order); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=cut |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub new { |
31
|
83
|
|
|
83
|
1
|
311
|
my ($class, $content, $order) = @_; |
32
|
83
|
|
|
|
|
1144
|
return bless { |
33
|
|
|
|
|
|
|
content => $content, |
34
|
|
|
|
|
|
|
scoretable => [], |
35
|
|
|
|
|
|
|
marked_words => [], |
36
|
|
|
|
|
|
|
final_score => 0, |
37
|
|
|
|
|
|
|
order => $order |
38
|
|
|
|
|
|
|
}, $class; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 METHODS |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head2 best_keywords / slim |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head2 as_text / marked_up |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
You can override DEFAULT_START_TAG and DEFAULT_END_TAG. These default to |
48
|
|
|
|
|
|
|
and |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=cut |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub best_keywords { |
54
|
20
|
|
|
20
|
1
|
621
|
my $self = shift; |
55
|
20
|
100
|
|
|
|
25
|
return @{ $self->{scoretable}->[-1] || [] }; |
|
20
|
|
|
|
|
147
|
|
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub slim { |
59
|
8
|
|
|
8
|
1
|
13
|
my ($self, $max_weight) = @_; |
60
|
8
|
|
|
|
|
45
|
$self->{content} =~ s/^\s+//; |
61
|
8
|
|
|
|
|
188
|
$self->{content} =~ s/\s+$//; |
62
|
8
|
100
|
|
|
|
49
|
return $self if length $self->{content} <= $max_weight; |
63
|
4
|
|
|
|
|
228
|
my @words = split /\s+/, $self->{content}; |
64
|
4
|
|
|
|
|
29
|
for (reverse(0 .. @words / 2)) { |
65
|
70
|
|
|
|
|
1109
|
my $trial = get_context($_, $self->{content}, @{ $self->{marked_words} }); |
|
70
|
|
|
|
|
272
|
|
66
|
70
|
100
|
|
|
|
93340
|
if (length $trial < $max_weight) { |
67
|
4
|
|
|
|
|
14
|
$self->{content} = $trial; |
68
|
4
|
|
|
|
|
75
|
return $self; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
} |
71
|
0
|
|
|
|
|
0
|
$self->{content} = join " ... ", @{ $self->{marked_words} }; |
|
0
|
|
|
|
|
0
|
|
72
|
0
|
|
|
|
|
0
|
return $self; # Should not happen. |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
10
|
|
|
10
|
1
|
1504
|
sub as_text { return $_[0]->{content} } |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub marked_up { |
78
|
4
|
|
|
4
|
1
|
10
|
my $self = shift; |
79
|
4
|
|
100
|
|
|
286
|
my $start_tag = shift || DEFAULT_START_TAG; |
80
|
4
|
|
100
|
|
|
27
|
my $end_tag = shift || DEFAULT_END_TAG; |
81
|
4
|
|
|
|
|
15
|
my $content = $self->as_text; |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# Need to escape entities in here. |
84
|
4
|
|
|
|
|
19
|
my $re = join "|", map { qr/\Q$_\E/i } @{ $self->{marked_words} }; |
|
7
|
|
|
|
|
109
|
|
|
4
|
|
|
|
|
12
|
|
85
|
4
|
|
|
|
|
193
|
my $re2 = qr/\b($re)\b/i; |
86
|
4
|
|
|
|
|
57
|
my @fragments = split /$re2/i, $content; |
87
|
4
|
|
|
|
|
11
|
my $output; |
88
|
4
|
|
|
|
|
13
|
for my $orig_frag (@fragments) { |
89
|
17
|
|
|
|
|
57
|
my $frag = encode_entities($orig_frag); |
90
|
17
|
100
|
|
|
|
1731
|
if ($orig_frag =~ /$re2/i) { |
91
|
7
|
|
|
|
|
18
|
$frag = $start_tag . $frag . $end_tag; |
92
|
|
|
|
|
|
|
} |
93
|
17
|
|
|
|
|
45
|
$output .= $frag; |
94
|
|
|
|
|
|
|
} |
95
|
4
|
|
|
|
|
55
|
return $output; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
1; |