line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Copyright 2014-2016 - Giovanni Simoni |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# This file is part of PFT. |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# PFT is free software: you can redistribute it and/or modify it under the |
6
|
|
|
|
|
|
|
# terms of the GNU General Public License as published by the Free |
7
|
|
|
|
|
|
|
# Software Foundation, either version 3 of the License, or (at your |
8
|
|
|
|
|
|
|
# option) any later version. |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# PFT is distributed in the hope that it will be useful, but WITHOUT ANY |
11
|
|
|
|
|
|
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or |
12
|
|
|
|
|
|
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
13
|
|
|
|
|
|
|
# for more details. |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License along |
16
|
|
|
|
|
|
|
# with PFT. If not, see . |
17
|
|
|
|
|
|
|
# |
18
|
|
|
|
|
|
|
package PFT::Text::Symbol v1.3.0; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=pod |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=encoding utf8 |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 NAME |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
PFT::Text::Symbol - Symbol from text scan |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 SYNOPSIS |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
my $array = PFT::Text::Symbol->scan_html($your_html_text); |
31
|
|
|
|
|
|
|
foreach (PFT::Text::Symbol->scan_html($your_html_text)) { |
32
|
|
|
|
|
|
|
die unless $_->isa('PFT::Text::Symbol') |
33
|
|
|
|
|
|
|
}; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 DESCRIPTION |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
Each instance of C represents a symbol obtained by |
38
|
|
|
|
|
|
|
parsing the text of an entry C: they are detected as |
39
|
|
|
|
|
|
|
CaE> and CimgE> tags in HTML. Symbols are collected |
40
|
|
|
|
|
|
|
into a a C object. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
An example will make this easy to understand. Let's consider the following |
43
|
|
|
|
|
|
|
tag: |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
It will generate a symbol C<$s1> such that: |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=over 1 |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=item C<$s1-Ekeyword> is C; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=item C<$s1-Eargs> is the list C<(a1, b1, c1)>; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=item C<$s1-Estart> points to the first C<:> character; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=item C<$s1-Elen> points to the last C<1> character; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=back |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Since a block of HTML can possibly yield multiple symbols, there's no |
62
|
|
|
|
|
|
|
public construction. Use the C multi-constructor instead. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=head2 Construction |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Construction usually goes through Cscan_html>, |
67
|
|
|
|
|
|
|
which expects an HTML string as parameter and returns a list of blessed |
68
|
|
|
|
|
|
|
symbols. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
For other needs (e.g. testing): |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
PFT::Text::Symbol->new($keyword, [$arg1, …, $argn], $start, $length) |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=cut |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub scan_html { |
77
|
24
|
|
|
24
|
0
|
23941
|
my $cls = shift; |
78
|
|
|
|
|
|
|
|
79
|
24
|
|
|
|
|
90
|
my $pair = qr/":(\w+):([^"]*)"/; |
80
|
24
|
|
|
|
|
180
|
my $img = qr/]*src=\s*$pair([^>]*)>/; |
81
|
24
|
|
|
|
|
136
|
my $ahr = qr/]*href=\s*$pair([^>]*)>/; |
82
|
|
|
|
|
|
|
|
83
|
24
|
|
|
|
|
69
|
my $text = join '', @_; |
84
|
24
|
|
|
|
|
35
|
my @out; |
85
|
24
|
|
|
|
|
54
|
for my $reg ($img, $ahr) { |
86
|
48
|
|
|
|
|
1354
|
while ($text =~ /\W$reg/smg) { |
87
|
36
|
|
|
|
|
119
|
my $len = length($1) + length($2) + 2; # +2 for :: |
88
|
36
|
|
|
|
|
98
|
my $start = pos($text) - $len - length($3) - 2; # -2 for "> |
89
|
|
|
|
|
|
|
|
90
|
36
|
|
|
|
|
352
|
push @out, bless([ |
91
|
|
|
|
|
|
|
$1, # keyword |
92
|
|
|
|
|
|
|
[split /\//, $2], # args list |
93
|
|
|
|
|
|
|
$start, |
94
|
|
|
|
|
|
|
$len, |
95
|
|
|
|
|
|
|
], $cls); |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
24
|
|
|
|
|
150
|
sort { $a->start <=> $b->start } @out; |
|
40
|
|
|
|
|
78
|
|
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub new { |
103
|
0
|
|
|
0
|
0
|
0
|
my $cls = shift; |
104
|
0
|
|
|
|
|
0
|
bless [@_], $cls; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
4
|
|
|
4
|
|
109662
|
use utf8; |
|
4
|
|
|
|
|
17
|
|
|
4
|
|
|
|
|
24
|
|
108
|
4
|
|
|
4
|
|
135
|
use v5.16; |
|
4
|
|
|
|
|
13
|
|
109
|
4
|
|
|
4
|
|
19
|
use strict; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
71
|
|
110
|
4
|
|
|
4
|
|
16
|
use warnings; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
751
|
|
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head2 Properties |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=over |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item keyword |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=cut |
119
|
|
|
|
|
|
|
|
120
|
53
|
|
|
53
|
1
|
5582
|
sub keyword { shift->[0] } |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item args |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=cut |
125
|
|
|
|
|
|
|
|
126
|
40
|
|
|
40
|
1
|
65
|
sub args { @{shift->[1]} } |
|
40
|
|
|
|
|
226
|
|
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item start |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=cut |
131
|
|
|
|
|
|
|
|
132
|
116
|
|
|
116
|
1
|
383
|
sub start { shift->[2] } |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item len |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=cut |
137
|
|
|
|
|
|
|
|
138
|
63
|
|
|
63
|
1
|
236
|
sub len { shift->[3] } |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=back |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=cut |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
use overload |
145
|
|
|
|
|
|
|
'""' => sub { |
146
|
36
|
|
|
36
|
|
2768
|
my $self = shift; |
147
|
|
|
|
|
|
|
sprintf 'PFT::Text::Symbol[key:"%s", args:["%s"], start:%d, len:%d]', |
148
|
|
|
|
|
|
|
$self->[0], |
149
|
36
|
|
|
|
|
72
|
join('", "', @{$self->[1]}), |
150
|
36
|
|
|
|
|
60
|
@{$self}[2, 3], |
|
36
|
|
|
|
|
298
|
|
151
|
|
|
|
|
|
|
}, |
152
|
4
|
|
|
4
|
|
38
|
; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
48
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
1; |