| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# You may distribute under the terms of either the GNU General Public License |
|
2
|
|
|
|
|
|
|
# or the Artistic License (the same terms as Perl itself) |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# (C) Paul Evans, 2008-2023 -- leonerd@leonerd.org.uk |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package String::Tagged 0.22; |
|
7
|
|
|
|
|
|
|
|
|
8
|
20
|
|
|
20
|
|
4688169
|
use v5.14; |
|
|
20
|
|
|
|
|
188
|
|
|
9
|
20
|
|
|
20
|
|
118
|
use warnings; |
|
|
20
|
|
|
|
|
38
|
|
|
|
20
|
|
|
|
|
653
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
20
|
|
|
20
|
|
114
|
use Scalar::Util qw( blessed ); |
|
|
20
|
|
|
|
|
36
|
|
|
|
20
|
|
|
|
|
1336
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
require String::Tagged::Extent; |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use constant { |
|
16
|
20
|
|
|
|
|
1947
|
FLAG_ANCHOR_BEFORE => 0x01, |
|
17
|
|
|
|
|
|
|
FLAG_ANCHOR_AFTER => 0x02, |
|
18
|
|
|
|
|
|
|
FLAG_ITERATING => 0x04, |
|
19
|
|
|
|
|
|
|
FLAG_DELETED => 0x08, |
|
20
|
20
|
|
|
20
|
|
134
|
}; |
|
|
20
|
|
|
|
|
37
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
20
|
|
|
20
|
|
144
|
use constant DEBUG => 0; |
|
|
20
|
|
|
|
|
49
|
|
|
|
20
|
|
|
|
|
1080
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# Since we're providing overloading, we should set fallback by default |
|
25
|
20
|
|
|
20
|
|
126
|
use overload fallback => 1; |
|
|
20
|
|
|
|
|
38
|
|
|
|
20
|
|
|
|
|
171
|
|
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=head1 NAME |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
C - string buffers with value tags on extents |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
use String::Tagged; |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my $st = String::Tagged->new( "An important message" ); |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
$st->apply_tag( 3, 9, bold => 1 ); |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
$st->iter_substr_nooverlap( |
|
40
|
|
|
|
|
|
|
sub { |
|
41
|
|
|
|
|
|
|
my ( $substring, %tags ) = @_; |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
print $tags{bold} ? "$substring" |
|
44
|
|
|
|
|
|
|
: $substring; |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
); |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
This module implements an object class, instances of which store a (mutable) |
|
51
|
|
|
|
|
|
|
string buffer that supports tags. A tag is a name/value pair that applies to |
|
52
|
|
|
|
|
|
|
some extent of the underlying string. |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
The types of tag names ought to be strings, or at least values that are |
|
55
|
|
|
|
|
|
|
well-behaved as strings, as the names will often be used as the keys in hashes |
|
56
|
|
|
|
|
|
|
or applied to the C operator. |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
The types of tag values are not restricted - any scalar will do. This could be |
|
59
|
|
|
|
|
|
|
a simple integer or string, ARRAY or HASH reference, or even a CODE reference |
|
60
|
|
|
|
|
|
|
containing an event handler of some kind. |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Tags may be arbitrarily overlapped. Any given offset within the string has in |
|
63
|
|
|
|
|
|
|
effect, a set of uniquely named tags. Tags of different names are independent. |
|
64
|
|
|
|
|
|
|
For tags of the same name, only the latest, shortest tag takes effect. |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
For example, consider a string with three tags represented here: |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Here is my string with tags |
|
69
|
|
|
|
|
|
|
[-------------------------] foo => 1 |
|
70
|
|
|
|
|
|
|
[-------] foo => 2 |
|
71
|
|
|
|
|
|
|
[---] bar => 3 |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Every character in this string has a tag named C. The value of this tag |
|
74
|
|
|
|
|
|
|
is 2 for the words C and C and the space inbetween, and 1 |
|
75
|
|
|
|
|
|
|
elsewhere. Additionally, the words C and C and the space between them |
|
76
|
|
|
|
|
|
|
also have the tag C with a value 3. |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Since C does not understand the significance of the tag values |
|
79
|
|
|
|
|
|
|
it therefore cannot detect if two neighbouring tags really contain the same |
|
80
|
|
|
|
|
|
|
semantic idea. Consider the following string: |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
A string with words |
|
83
|
|
|
|
|
|
|
[-------] type => "message" |
|
84
|
|
|
|
|
|
|
[--------] type => "message" |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
This string contains two tags. C will treat this as two |
|
87
|
|
|
|
|
|
|
different tag values as far as C is concerned, even |
|
88
|
|
|
|
|
|
|
though C yields the same value for the C tag at any position |
|
89
|
|
|
|
|
|
|
in the string. The C method may be used to merge tag extents of |
|
90
|
|
|
|
|
|
|
tags that should be considered as equal. |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head1 NAMING |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
I spent a lot of time considering the name for this module. It seems that a |
|
95
|
|
|
|
|
|
|
number of people across a number of languages all created similar |
|
96
|
|
|
|
|
|
|
functionality, though named very differently. For the benefit of |
|
97
|
|
|
|
|
|
|
keyword-based search tools and similar, here's a list of some other names this |
|
98
|
|
|
|
|
|
|
sort of object might be known by: |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=over 4 |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item * |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Extents |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=item * |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Overlays |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=item * |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Attribute or attributed strings |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=item * |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Markup |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item * |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Out-of-band data |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=back |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=cut |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
*is_string_tagged = |
|
127
|
|
|
|
|
|
|
# It would be nice if we could #ifdef HAVE_PERL_VERSION(...) |
|
128
|
|
|
|
|
|
|
( $] >= 5.034 ) ? |
|
129
|
|
|
|
|
|
|
do { eval 'use experimental "isa"; sub { $_[0] isa __PACKAGE__ }' // die $@ } : |
|
130
|
288
|
100
|
|
288
|
|
1536
|
do { sub { blessed $_[0] and $_[0]->isa( __PACKAGE__ ) } }; |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head2 new |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
$st = String::Tagged->new( $str ) |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Returns a new instance of a C object. It will contain no tags. |
|
141
|
|
|
|
|
|
|
If the optional C<$str> argument is supplied, the string buffer will be |
|
142
|
|
|
|
|
|
|
initialised from this value. |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
If C<$str> is a C object then it will be cloned, as if calling |
|
145
|
|
|
|
|
|
|
the C method on it. |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub new |
|
150
|
|
|
|
|
|
|
{ |
|
151
|
137
|
|
|
137
|
1
|
25527
|
my $class = shift; |
|
152
|
137
|
|
|
|
|
341
|
my ( $str ) = @_; |
|
153
|
|
|
|
|
|
|
|
|
154
|
137
|
100
|
|
|
|
318
|
return $class->clone( $str ) if is_string_tagged( $str ); |
|
155
|
|
|
|
|
|
|
|
|
156
|
117
|
100
|
|
|
|
325
|
$str = "" unless defined $str; |
|
157
|
|
|
|
|
|
|
|
|
158
|
117
|
|
|
|
|
615
|
return bless { |
|
159
|
|
|
|
|
|
|
str => "$str", |
|
160
|
|
|
|
|
|
|
tags => [], |
|
161
|
|
|
|
|
|
|
}, $class; |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head2 new_tagged |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
$st = String::Tagged->new_tagged( $str, %tags ) |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Shortcut for creating a new C object with the given tags |
|
169
|
|
|
|
|
|
|
applied to the entire length. The tags will not be anchored at either end. |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub new_tagged |
|
174
|
|
|
|
|
|
|
{ |
|
175
|
8
|
|
|
8
|
1
|
1676
|
my $class = shift; |
|
176
|
8
|
|
|
|
|
28
|
my ( $str, %tags ) = @_; |
|
177
|
|
|
|
|
|
|
|
|
178
|
8
|
|
|
|
|
24
|
my $self = $class->new( $str ); |
|
179
|
|
|
|
|
|
|
|
|
180
|
8
|
|
|
|
|
25
|
my $length = $self->length; |
|
181
|
8
|
|
|
|
|
43
|
$self->apply_tag( 0, $length, $_ => $tags{$_} ) for keys %tags; |
|
182
|
|
|
|
|
|
|
|
|
183
|
8
|
|
|
|
|
31
|
return $self; |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=head2 clone (class) |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
$new = String::Tagged->clone( $orig, %opts ) |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Returns a new instance of C made by cloning the original, |
|
191
|
|
|
|
|
|
|
subject to the options provided. The returned instance will be in the |
|
192
|
|
|
|
|
|
|
requested class, which need not match the class of the original. |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
The following options are recognised: |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=over 4 |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item only_tags => ARRAY |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
If present, gives an ARRAY reference containing tag names. Only those tags |
|
201
|
|
|
|
|
|
|
named here will be copied; others will be ignored. |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=item except_tags => ARRAY |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
If present, gives an ARRAY reference containing tag names. All tags will be |
|
206
|
|
|
|
|
|
|
copied except those named here. |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=item convert_tags => HASH |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
If present, gives a HASH reference containing tag conversion functions. For |
|
211
|
|
|
|
|
|
|
any tags in the original to be copied whose names appear in the hash, the |
|
212
|
|
|
|
|
|
|
name and value are passed into the corresponding function, which should return |
|
213
|
|
|
|
|
|
|
an even-sized key/value list giving a tag, or a list of tags, to apply to the |
|
214
|
|
|
|
|
|
|
new clone. |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
my @new_tags = $convert_tags->{$orig_name}->( $orig_name, $orig_value ) |
|
217
|
|
|
|
|
|
|
# Where @new_tags is ( $new_name, $new_value, $new_name_2, $new_value_2, ... ) |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
As a further convenience, if the value for a given tag name is a plain string |
|
220
|
|
|
|
|
|
|
instead of a code reference, it gives the new name for the tag, and will be |
|
221
|
|
|
|
|
|
|
applied with its existing value. |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
If C is being used too, then the source names of any tags to be |
|
224
|
|
|
|
|
|
|
converted must also be listed there, or they will not be copied. |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item start => INT |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
I |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Start at the given position; defaults to 0. |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=item end => INT |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
I |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
End after the given position; defaults to end of string. This option overrides |
|
237
|
|
|
|
|
|
|
C. |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=item len => INT |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
End after the given length beyond the start position; defaults to end of |
|
242
|
|
|
|
|
|
|
string. This option only applies if C is not given. |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=back |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head2 clone (instance) |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
$new = $orig->clone( %args ) |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Called as an instance (rather than a class) method, the newly-cloned instance |
|
251
|
|
|
|
|
|
|
is returned in the same class as the original. |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=cut |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub clone |
|
256
|
|
|
|
|
|
|
{ |
|
257
|
56
|
100
|
|
56
|
1
|
2961
|
my ( $class, $orig ) = blessed $_[0] ? |
|
258
|
|
|
|
|
|
|
( ref $_[0], shift ) : |
|
259
|
|
|
|
|
|
|
( shift, shift ); |
|
260
|
56
|
|
|
|
|
155
|
my %opts = @_; |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
my $only = exists $opts{only_tags} ? |
|
263
|
56
|
100
|
|
|
|
145
|
{ map { $_ => 1 } @{ $opts{only_tags} } } : |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
4
|
|
|
264
|
|
|
|
|
|
|
undef; |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
my $except = exists $opts{except_tags} ? |
|
267
|
56
|
50
|
|
|
|
134
|
{ map { $_ => 1 } @{ $opts{except_tags} } } : |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
268
|
|
|
|
|
|
|
undef; |
|
269
|
|
|
|
|
|
|
|
|
270
|
56
|
|
|
|
|
122
|
my $convert = $opts{convert_tags}; |
|
271
|
|
|
|
|
|
|
|
|
272
|
56
|
|
|
|
|
167
|
my $origstr = $orig->str; |
|
273
|
|
|
|
|
|
|
|
|
274
|
56
|
|
100
|
|
|
209
|
my $start = $opts{start} // 0; |
|
275
|
|
|
|
|
|
|
my $end = $opts{end} // |
|
276
|
|
|
|
|
|
|
( defined $opts{len} ? $start + $opts{len} |
|
277
|
56
|
100
|
100
|
|
|
286
|
: length $origstr ); |
|
278
|
|
|
|
|
|
|
|
|
279
|
56
|
|
|
|
|
96
|
my $len = $end - $start; |
|
280
|
|
|
|
|
|
|
|
|
281
|
56
|
|
|
|
|
201
|
my $new = $class->new( substr $origstr, $start, $end - $start ); |
|
282
|
|
|
|
|
|
|
|
|
283
|
56
|
|
|
|
|
162
|
my $tags = $orig->{tags}; |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# We know we're only looking |
|
286
|
56
|
|
|
|
|
148
|
foreach my $t ( @$tags ) { |
|
287
|
63
|
|
|
|
|
143
|
my ( $ts, $te, $tn, $tv, $tf ) = @$t; |
|
288
|
|
|
|
|
|
|
|
|
289
|
63
|
100
|
|
|
|
158
|
next if $te < $start; |
|
290
|
58
|
100
|
|
|
|
141
|
last if $ts >= $end; |
|
291
|
|
|
|
|
|
|
|
|
292
|
55
|
100
|
100
|
|
|
168
|
next if $only and not $only->{$tn}; |
|
293
|
54
|
0
|
33
|
|
|
103
|
next if $except and $except->{$tn}; |
|
294
|
|
|
|
|
|
|
|
|
295
|
54
|
|
|
|
|
85
|
my @tags; |
|
296
|
54
|
100
|
66
|
|
|
141
|
if( $convert and my $c = $convert->{$tn} ) { |
|
297
|
2
|
100
|
|
|
|
8
|
if( ref $c eq "CODE" ) { |
|
298
|
1
|
|
|
|
|
6
|
@tags = $c->( $tn, $tv ); |
|
299
|
|
|
|
|
|
|
} |
|
300
|
|
|
|
|
|
|
else { |
|
301
|
1
|
|
|
|
|
4
|
@tags = ( $c, $tv ); |
|
302
|
|
|
|
|
|
|
} |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
else { |
|
305
|
52
|
|
|
|
|
109
|
@tags = ( $tn, $tv ); |
|
306
|
|
|
|
|
|
|
} |
|
307
|
|
|
|
|
|
|
|
|
308
|
54
|
|
|
|
|
136
|
$_ -= $start for $ts, $te; |
|
309
|
|
|
|
|
|
|
|
|
310
|
54
|
100
|
|
|
|
134
|
my $tl = $te - ( $ts < 0 ? 0 : $ts ); |
|
311
|
|
|
|
|
|
|
|
|
312
|
54
|
100
|
|
|
|
142
|
next if $te <= 0; |
|
313
|
53
|
100
|
100
|
|
|
248
|
$ts = -1 if $ts < 0 or $tf & FLAG_ANCHOR_BEFORE; |
|
314
|
53
|
100
|
100
|
|
|
201
|
$tl = -1 if $te > $len or $tf & FLAG_ANCHOR_AFTER; |
|
315
|
|
|
|
|
|
|
|
|
316
|
53
|
|
|
|
|
140
|
while( @tags ) { |
|
317
|
53
|
|
|
|
|
150
|
$new->apply_tag( $ts, $tl, shift @tags, shift @tags ); |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
|
|
321
|
56
|
|
|
|
|
266
|
return $new; |
|
322
|
|
|
|
|
|
|
} |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
sub _mkextent |
|
325
|
|
|
|
|
|
|
{ |
|
326
|
191
|
|
|
191
|
|
270
|
my $self = shift; |
|
327
|
191
|
|
|
|
|
355
|
my ( $start, $end, $flags ) = @_; |
|
328
|
|
|
|
|
|
|
|
|
329
|
191
|
|
|
|
|
290
|
$flags &= (FLAG_ANCHOR_BEFORE|FLAG_ANCHOR_AFTER); |
|
330
|
|
|
|
|
|
|
|
|
331
|
191
|
|
|
|
|
913
|
return bless [ $self, $start, $end, $flags ], 'String::Tagged::Extent'; |
|
332
|
|
|
|
|
|
|
} |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=head2 from_sprintf |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
$str = String::Tagged->from_sprintf( $format, @args ) |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
I |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
Returns a new instance of a C object, initialised by |
|
341
|
|
|
|
|
|
|
formatting the supplied arguments using the supplied format. |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
The C<$format> string is similar to that supported by the core C |
|
344
|
|
|
|
|
|
|
operator, though a few features such as out-of-order argument indexing and |
|
345
|
|
|
|
|
|
|
vector formatting are missing. This format string may be a plain perl string, |
|
346
|
|
|
|
|
|
|
or an instance of C. In the latter case, any tags within it |
|
347
|
|
|
|
|
|
|
are preserved in the result. |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
In the case of a C<%s> conversion, the value of the argument consumed may |
|
350
|
|
|
|
|
|
|
itself be a C instance. In this case it will be appended to |
|
351
|
|
|
|
|
|
|
the returned object, preserving any tags within it. |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
All other conversions are handled individually by the core C |
|
354
|
|
|
|
|
|
|
operator and appended to the result. |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=cut |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub from_sprintf |
|
359
|
|
|
|
|
|
|
{ |
|
360
|
12
|
|
|
12
|
1
|
1018
|
my $class = shift; |
|
361
|
12
|
|
|
|
|
28
|
my ( $format, @args ) = @_; |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Clone the format string into the candidate return value, and then |
|
364
|
|
|
|
|
|
|
# repeatedly replace %... expansions with their required value using |
|
365
|
|
|
|
|
|
|
# ->set_substr, so that embedded tags in the format will behave sensibly. |
|
366
|
|
|
|
|
|
|
|
|
367
|
12
|
100
|
|
|
|
28
|
my $ret = ( is_string_tagged( $format ) ) ? |
|
368
|
|
|
|
|
|
|
$class->clone( $format ) : |
|
369
|
|
|
|
|
|
|
$class->new( $format ); |
|
370
|
|
|
|
|
|
|
|
|
371
|
12
|
|
|
|
|
21
|
my $pos = 0; |
|
372
|
|
|
|
|
|
|
|
|
373
|
12
|
|
|
|
|
52
|
while( $pos < length $ret ) { |
|
374
|
22
|
|
|
|
|
41
|
my $str = "$ret"; |
|
375
|
22
|
|
|
|
|
59
|
pos( $str ) = $pos; |
|
376
|
|
|
|
|
|
|
|
|
377
|
22
|
|
|
|
|
38
|
my $replacement; |
|
378
|
|
|
|
|
|
|
|
|
379
|
22
|
100
|
|
|
|
144
|
if( $str =~ m/\G[^%]+/gc ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# A literal span |
|
381
|
9
|
|
|
|
|
20
|
$pos = $+[0]; |
|
382
|
9
|
|
|
|
|
23
|
next; |
|
383
|
|
|
|
|
|
|
} |
|
384
|
|
|
|
|
|
|
elsif( $str =~ m/\G%%/gc ) { |
|
385
|
|
|
|
|
|
|
# A literal %% conversion |
|
386
|
1
|
|
|
|
|
2
|
$replacement = "%"; |
|
387
|
|
|
|
|
|
|
} |
|
388
|
|
|
|
|
|
|
elsif( $str =~ m/\G%([-]?)(\d+|\*)?(?:\.(\d+|\*))?s/gc ) { |
|
389
|
|
|
|
|
|
|
# A string |
|
390
|
10
|
|
|
|
|
39
|
my ( $flags, $width, $precision ) = ( $1, $2, $3 ); |
|
391
|
10
|
100
|
100
|
|
|
33
|
$width = shift @args if defined $width and $width eq "*"; |
|
392
|
10
|
100
|
100
|
|
|
35
|
$precision = shift @args if defined $precision and $precision eq "*"; |
|
393
|
10
|
|
|
|
|
17
|
my $arg = shift @args; |
|
394
|
|
|
|
|
|
|
|
|
395
|
10
|
50
|
|
|
|
20
|
defined $arg or do { |
|
396
|
0
|
|
|
|
|
0
|
warnings::warnif( uninitialized => "Use of ininitialized value in String::Tagged->from_sprintf" ); |
|
397
|
0
|
|
|
|
|
0
|
$arg = ""; |
|
398
|
|
|
|
|
|
|
}; |
|
399
|
|
|
|
|
|
|
|
|
400
|
10
|
100
|
|
|
|
20
|
if( defined $precision ) { |
|
401
|
2
|
50
|
|
|
|
6
|
if( is_string_tagged( $arg ) ) { |
|
402
|
0
|
|
|
|
|
0
|
$arg = $arg->substr( 0, $precision ); |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
else { |
|
405
|
2
|
|
|
|
|
10
|
$arg = substr $arg, 0, $precision; |
|
406
|
|
|
|
|
|
|
} |
|
407
|
|
|
|
|
|
|
} |
|
408
|
|
|
|
|
|
|
|
|
409
|
10
|
|
|
|
|
16
|
my $leftalign = $flags =~ m/-/; |
|
410
|
|
|
|
|
|
|
|
|
411
|
10
|
100
|
|
|
|
22
|
my $padding = defined $width ? $width - length $arg : 0; |
|
412
|
10
|
100
|
|
|
|
18
|
$padding = 0 if $padding < 0; |
|
413
|
|
|
|
|
|
|
|
|
414
|
10
|
|
|
|
|
17
|
$replacement = ""; |
|
415
|
|
|
|
|
|
|
|
|
416
|
10
|
100
|
|
|
|
26
|
$replacement .= " " x $padding if !$leftalign; |
|
417
|
|
|
|
|
|
|
|
|
418
|
10
|
|
|
|
|
18
|
$replacement .= $arg; |
|
419
|
|
|
|
|
|
|
|
|
420
|
10
|
100
|
|
|
|
24
|
$replacement .= " " x $padding if $leftalign; |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
elsif( $str =~ m/\G%(.*?)([cduoxefgXEGbBpaAiDUOF])/gc ) { |
|
423
|
|
|
|
|
|
|
# Another conversion format |
|
424
|
2
|
|
|
|
|
8
|
my ( $template, $flags ) = ( $2, $1 ); |
|
425
|
2
|
|
|
|
|
3
|
my $argc = 1; |
|
426
|
2
|
|
|
|
|
4
|
$argc += ( () = $flags =~ m/\*/g ); |
|
427
|
|
|
|
|
|
|
|
|
428
|
2
|
|
|
|
|
12
|
$replacement = sprintf "%$flags$template", @args[0..$argc-1]; |
|
429
|
2
|
|
|
|
|
5
|
splice @args, 0, $argc; |
|
430
|
|
|
|
|
|
|
} |
|
431
|
|
|
|
|
|
|
elsif( $str =~ m/\G%(.*?)([a-zA-Z])/gc ) { |
|
432
|
0
|
|
|
|
|
0
|
warn "Unrecognised sprintf conversion %$2"; |
|
433
|
|
|
|
|
|
|
} |
|
434
|
|
|
|
|
|
|
else { |
|
435
|
|
|
|
|
|
|
# must be at EOF now |
|
436
|
0
|
|
|
|
|
0
|
last; |
|
437
|
|
|
|
|
|
|
} |
|
438
|
|
|
|
|
|
|
|
|
439
|
13
|
|
|
|
|
45
|
my $templatelen = $+[0] - $-[0]; |
|
440
|
13
|
|
|
|
|
50
|
$ret->set_substr( $-[0], $templatelen, $replacement ); |
|
441
|
|
|
|
|
|
|
|
|
442
|
13
|
|
|
|
|
32
|
$pos += length( $replacement ); |
|
443
|
|
|
|
|
|
|
} |
|
444
|
|
|
|
|
|
|
|
|
445
|
12
|
|
|
|
|
49
|
return $ret; |
|
446
|
|
|
|
|
|
|
} |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=head2 join |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
$str = String::Tagged->join( $sep, @parts ) |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
I |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Returns a new instance of a C object, formed by concatenating |
|
455
|
|
|
|
|
|
|
each of the component piece together, joined with the separator string. |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
The result will be much like the core C function, except that it will |
|
458
|
|
|
|
|
|
|
preserve tags in the resulting string. |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=cut |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
sub join |
|
463
|
|
|
|
|
|
|
{ |
|
464
|
1
|
|
|
1
|
1
|
2
|
my $class = shift; |
|
465
|
1
|
|
|
|
|
4
|
my ( $sep, @parts ) = @_; |
|
466
|
|
|
|
|
|
|
|
|
467
|
1
|
50
|
|
|
|
2
|
is_string_tagged( $sep ) or |
|
468
|
|
|
|
|
|
|
$sep = $class->new( $sep ); |
|
469
|
|
|
|
|
|
|
|
|
470
|
1
|
|
|
|
|
3
|
my $ret = shift @parts; |
|
471
|
1
|
|
|
|
|
5
|
$ret .= $sep . $_ for @parts; |
|
472
|
|
|
|
|
|
|
|
|
473
|
1
|
|
|
|
|
4
|
return $ret; |
|
474
|
|
|
|
|
|
|
} |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head1 METHODS |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=cut |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=head2 str |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
$str = $st->str |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
$str = "$st" |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
Returns the plain string contained within the object. |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
This method is also called for stringification; so the C |
|
489
|
|
|
|
|
|
|
object can be used in a plain string interpolation such as |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
my $message = String::Tagged->new( "Hello world" ); |
|
492
|
|
|
|
|
|
|
print "My message is $message\n"; |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=cut |
|
495
|
|
|
|
|
|
|
|
|
496
|
20
|
|
|
20
|
|
32309
|
use overload '""' => 'str'; |
|
|
20
|
|
|
|
|
61
|
|
|
|
20
|
|
|
|
|
160
|
|
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
sub str |
|
499
|
|
|
|
|
|
|
{ |
|
500
|
227
|
|
|
227
|
1
|
20190
|
my $self = shift; |
|
501
|
227
|
|
|
|
|
792
|
return $self->{str}; |
|
502
|
|
|
|
|
|
|
} |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=head2 length |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
$len = $st->length |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
$len = length( $st ) |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
Returns the length of the plain string. Because stringification works on this |
|
511
|
|
|
|
|
|
|
object class, the normal core C function works correctly on it. |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=cut |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
sub length |
|
516
|
|
|
|
|
|
|
{ |
|
517
|
412
|
|
|
412
|
1
|
598
|
my $self = shift; |
|
518
|
412
|
|
|
|
|
1129
|
return CORE::length $self->{str}; |
|
519
|
|
|
|
|
|
|
} |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=head2 substr |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
$str = $st->substr( $start, $len ) |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
Returns a C instance representing a section from within the |
|
526
|
|
|
|
|
|
|
given string, containing all the same tags at the same conceptual positions. |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=cut |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
sub substr |
|
531
|
|
|
|
|
|
|
{ |
|
532
|
27
|
|
|
27
|
1
|
54
|
my $self = shift; |
|
533
|
27
|
|
|
|
|
53
|
my ( $start, $len ) = @_; |
|
534
|
|
|
|
|
|
|
|
|
535
|
27
|
|
|
|
|
72
|
return $self->clone( start => $start, len => $len ); |
|
536
|
|
|
|
|
|
|
} |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=head2 plain_substr |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
$str = $st->plain_substr( $start, $len ) |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Returns as a plain perl string, the substring at the given position. This will |
|
543
|
|
|
|
|
|
|
be the same string data as returned by C, only as a plain string |
|
544
|
|
|
|
|
|
|
without the tags |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=cut |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
sub plain_substr |
|
549
|
|
|
|
|
|
|
{ |
|
550
|
23
|
|
|
23
|
1
|
42
|
my $self = shift; |
|
551
|
23
|
|
|
|
|
57
|
my ( $start, $len ) = @_; |
|
552
|
|
|
|
|
|
|
|
|
553
|
23
|
|
|
|
|
108
|
return CORE::substr( $self->{str}, $start, $len ); |
|
554
|
|
|
|
|
|
|
} |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
sub _cmp_tags |
|
557
|
|
|
|
|
|
|
{ |
|
558
|
154
|
|
|
154
|
|
265
|
my ( $as, $ae ) = @$a; |
|
559
|
154
|
|
|
|
|
240
|
my ( $bs, $be ) = @$b; |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# Sort by start first; shortest first |
|
562
|
154
|
|
100
|
|
|
636
|
return $as <=> $bs || |
|
563
|
|
|
|
|
|
|
$ae <=> $be; |
|
564
|
|
|
|
|
|
|
} |
|
565
|
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
sub _assert_sorted |
|
567
|
|
|
|
|
|
|
{ |
|
568
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
569
|
|
|
|
|
|
|
|
|
570
|
0
|
|
|
|
|
0
|
my $tags = $self->{tags}; |
|
571
|
|
|
|
|
|
|
# If fewer than 2 tags, must be sorted |
|
572
|
0
|
0
|
|
|
|
0
|
return if @$tags < 2; |
|
573
|
|
|
|
|
|
|
|
|
574
|
0
|
|
|
|
|
0
|
my $prev = $tags->[0]; |
|
575
|
|
|
|
|
|
|
|
|
576
|
0
|
|
|
|
|
0
|
for( my $i = 1; $i < @$tags; $i++ ) { |
|
577
|
0
|
|
|
|
|
0
|
my $here = $tags->[$i]; |
|
578
|
0
|
|
|
|
|
0
|
local ( $a, $b ) = ( $prev, $here ); |
|
579
|
0
|
0
|
|
|
|
0
|
if( _cmp_tags() <= 0 ) { |
|
580
|
0
|
|
|
|
|
0
|
$prev = $here; |
|
581
|
0
|
|
|
|
|
0
|
next; |
|
582
|
|
|
|
|
|
|
} |
|
583
|
|
|
|
|
|
|
|
|
584
|
0
|
|
|
|
|
0
|
print STDERR "Tag order violation at i=$i\n"; |
|
585
|
0
|
|
|
|
|
0
|
print STDERR "[@{[ $i - 1 ]}] = [ $tags->[$i-1]->[0], $tags->[$i-1]->[1] ]\n"; |
|
|
0
|
|
|
|
|
0
|
|
|
586
|
0
|
|
|
|
|
0
|
print STDERR "[@{[ $i ]}] = [ $tags->[$i]->[0], $tags->[$i]->[1] ]\n"; |
|
|
0
|
|
|
|
|
0
|
|
|
587
|
0
|
|
|
|
|
0
|
die "Assert failure"; |
|
588
|
|
|
|
|
|
|
} |
|
589
|
|
|
|
|
|
|
} |
|
590
|
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
sub _insert_tag |
|
592
|
|
|
|
|
|
|
{ |
|
593
|
159
|
|
|
159
|
|
227
|
my $self = shift; |
|
594
|
159
|
|
|
|
|
311
|
my ( $start, $end, $name, $value, $flags ) = @_; |
|
595
|
|
|
|
|
|
|
|
|
596
|
159
|
|
|
|
|
250
|
my $tags = $self->{tags}; |
|
597
|
|
|
|
|
|
|
|
|
598
|
159
|
|
|
|
|
338
|
my $newtag = [ $start, $end, $name => $value, $flags ]; |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
# Specialcase - if there's no tags yet, just push it |
|
601
|
159
|
100
|
|
|
|
368
|
if( @$tags == 0 ) { |
|
602
|
81
|
|
|
|
|
199
|
push @$tags, $newtag; |
|
603
|
81
|
|
|
|
|
157
|
return; |
|
604
|
|
|
|
|
|
|
} |
|
605
|
|
|
|
|
|
|
|
|
606
|
78
|
|
|
|
|
160
|
local $a = $newtag; |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
# Two more special cases - it's quite likely we're either inserting an |
|
609
|
|
|
|
|
|
|
# 'everywhere' tag, or appending one to the end. Check the endpoints first |
|
610
|
78
|
|
|
|
|
114
|
local $b; |
|
611
|
|
|
|
|
|
|
|
|
612
|
78
|
|
|
|
|
120
|
$b = $tags->[0]; |
|
613
|
78
|
100
|
|
|
|
148
|
if( _cmp_tags() <= 0 ) { |
|
614
|
14
|
|
|
|
|
34
|
unshift @$tags, $newtag; |
|
615
|
14
|
|
|
|
|
34
|
return; |
|
616
|
|
|
|
|
|
|
} |
|
617
|
|
|
|
|
|
|
|
|
618
|
64
|
|
|
|
|
131
|
$b = $tags->[-1]; |
|
619
|
64
|
100
|
|
|
|
108
|
if( _cmp_tags() >= 0 ) { |
|
620
|
62
|
|
|
|
|
148
|
push @$tags, $newtag; |
|
621
|
62
|
|
|
|
|
139
|
return; |
|
622
|
|
|
|
|
|
|
} |
|
623
|
|
|
|
|
|
|
|
|
624
|
2
|
|
|
|
|
5
|
my $range_start = 0; |
|
625
|
2
|
|
|
|
|
6
|
my $range_end = $#$tags; |
|
626
|
|
|
|
|
|
|
|
|
627
|
2
|
|
|
|
|
2
|
my $inspos; |
|
628
|
|
|
|
|
|
|
|
|
629
|
2
|
|
|
|
|
8
|
while( $range_end > $range_start ) { |
|
630
|
2
|
|
|
|
|
10
|
my $i = int( ( $range_start + $range_end ) / 2 ); |
|
631
|
|
|
|
|
|
|
|
|
632
|
2
|
|
|
|
|
6
|
$b = $tags->[$i]; |
|
633
|
2
|
|
|
|
|
6
|
my $cmp = _cmp_tags; |
|
634
|
|
|
|
|
|
|
|
|
635
|
2
|
50
|
|
|
|
8
|
if( $cmp > 0 ) { |
|
|
|
0
|
|
|
|
|
|
|
636
|
2
|
|
|
|
|
15
|
$range_start = $i + 1; |
|
637
|
|
|
|
|
|
|
} |
|
638
|
|
|
|
|
|
|
elsif( $cmp < 0 ) { |
|
639
|
0
|
|
|
|
|
0
|
$range_end = $i; # open interval |
|
640
|
|
|
|
|
|
|
} |
|
641
|
|
|
|
|
|
|
else { |
|
642
|
0
|
|
|
|
|
0
|
$inspos = $i; |
|
643
|
0
|
|
|
|
|
0
|
last; |
|
644
|
|
|
|
|
|
|
} |
|
645
|
|
|
|
|
|
|
|
|
646
|
2
|
50
|
|
|
|
12
|
if( $range_start == $range_end ) { |
|
647
|
2
|
|
|
|
|
4
|
$inspos = $range_start; |
|
648
|
2
|
|
|
|
|
5
|
last; |
|
649
|
|
|
|
|
|
|
} |
|
650
|
|
|
|
|
|
|
} |
|
651
|
|
|
|
|
|
|
|
|
652
|
2
|
50
|
|
|
|
7
|
$inspos = $range_end unless defined $inspos; |
|
653
|
|
|
|
|
|
|
|
|
654
|
2
|
50
|
|
|
|
9
|
$inspos = 0 if $inspos < 0; |
|
655
|
2
|
50
|
|
|
|
8
|
$inspos = @$tags if $inspos > @$tags; |
|
656
|
|
|
|
|
|
|
|
|
657
|
2
|
|
|
|
|
8
|
splice @$tags, $inspos, 0, $newtag; |
|
658
|
|
|
|
|
|
|
|
|
659
|
2
|
|
|
|
|
5
|
$self->_assert_sorted if DEBUG; |
|
660
|
|
|
|
|
|
|
} |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=head2 apply_tag |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
$st->apply_tag( $start, $len, $name, $value ) |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
Apply the named tag value to the given extent. The tag will start on the |
|
667
|
|
|
|
|
|
|
character at the C<$start> index, and continue for the next C<$len> |
|
668
|
|
|
|
|
|
|
characters. |
|
669
|
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
If C<$start> is given as -1, the tag will be considered to start "before" the |
|
671
|
|
|
|
|
|
|
actual string. If C<$len> is given as -1, the tag will be considered to |
|
672
|
|
|
|
|
|
|
end "after" end of the actual string. These special limits are used by |
|
673
|
|
|
|
|
|
|
C when deciding whether to move a tag boundary. The start of any |
|
674
|
|
|
|
|
|
|
tag that starts "before" the string is never moved, even if more text is |
|
675
|
|
|
|
|
|
|
inserted at the beginning. Similarly, a tag which ends "after" the end of the |
|
676
|
|
|
|
|
|
|
string, will continue to the end even if more text is appended. |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
This method returns the C<$st> object. |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
$st->apply_tag( $e, $name, $value ) |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
Alternatively, an existing L object can be passed as |
|
683
|
|
|
|
|
|
|
the first argument instead of two integers. The new tag will apply at the |
|
684
|
|
|
|
|
|
|
given extent. |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=cut |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
sub apply_tag |
|
689
|
|
|
|
|
|
|
{ |
|
690
|
156
|
|
|
156
|
1
|
7028
|
my $self = shift; |
|
691
|
156
|
|
|
|
|
253
|
my ( $start, $end ); |
|
692
|
156
|
|
|
|
|
224
|
my $flags = 0; |
|
693
|
|
|
|
|
|
|
|
|
694
|
156
|
50
|
|
|
|
425
|
if( blessed $_[0] ) { |
|
695
|
0
|
|
|
|
|
0
|
my $e = shift; |
|
696
|
0
|
|
|
|
|
0
|
$start = $e->start; |
|
697
|
0
|
|
|
|
|
0
|
$end = $e->end; |
|
698
|
|
|
|
|
|
|
|
|
699
|
0
|
0
|
|
|
|
0
|
$flags |= FLAG_ANCHOR_BEFORE if $e->anchor_before; |
|
700
|
0
|
0
|
|
|
|
0
|
$flags |= FLAG_ANCHOR_AFTER if $e->anchor_after; |
|
701
|
|
|
|
|
|
|
} |
|
702
|
|
|
|
|
|
|
else { |
|
703
|
156
|
|
|
|
|
231
|
$start = shift; |
|
704
|
156
|
|
|
|
|
203
|
my $len = shift; |
|
705
|
|
|
|
|
|
|
|
|
706
|
156
|
|
|
|
|
309
|
my $strlen = $self->length; |
|
707
|
|
|
|
|
|
|
|
|
708
|
156
|
100
|
|
|
|
358
|
if( $start < 0 ) { |
|
709
|
34
|
|
|
|
|
50
|
$start = 0; |
|
710
|
34
|
|
|
|
|
61
|
$flags |= FLAG_ANCHOR_BEFORE; |
|
711
|
|
|
|
|
|
|
} |
|
712
|
|
|
|
|
|
|
|
|
713
|
156
|
100
|
|
|
|
312
|
if( $len == -1 ) { |
|
714
|
36
|
|
|
|
|
52
|
$end = $strlen; |
|
715
|
36
|
|
|
|
|
55
|
$flags |= FLAG_ANCHOR_AFTER; |
|
716
|
|
|
|
|
|
|
} |
|
717
|
|
|
|
|
|
|
else { |
|
718
|
120
|
|
|
|
|
171
|
$end = $start + $len; |
|
719
|
120
|
100
|
|
|
|
260
|
$end = $strlen if $end > $strlen; |
|
720
|
|
|
|
|
|
|
} |
|
721
|
|
|
|
|
|
|
} |
|
722
|
|
|
|
|
|
|
|
|
723
|
156
|
|
|
|
|
297
|
my ( $name, $value ) = @_; |
|
724
|
|
|
|
|
|
|
|
|
725
|
156
|
|
|
|
|
395
|
$self->_insert_tag( $start, $end, $name, $value, $flags ); |
|
726
|
|
|
|
|
|
|
|
|
727
|
156
|
|
|
|
|
399
|
return $self; |
|
728
|
|
|
|
|
|
|
} |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
sub _remove_tag |
|
731
|
|
|
|
|
|
|
{ |
|
732
|
9
|
|
|
9
|
|
14
|
my $self = shift; |
|
733
|
9
|
|
|
|
|
13
|
my $keepends = shift; |
|
734
|
9
|
|
|
|
|
12
|
my ( $start, $end ); |
|
735
|
|
|
|
|
|
|
|
|
736
|
9
|
100
|
|
|
|
26
|
if( blessed $_[0] ) { |
|
737
|
5
|
|
|
|
|
10
|
my $e = shift; |
|
738
|
5
|
|
|
|
|
9
|
$start = $e->start; |
|
739
|
5
|
|
|
|
|
11
|
$end = $e->end; |
|
740
|
|
|
|
|
|
|
} |
|
741
|
|
|
|
|
|
|
else { |
|
742
|
4
|
|
|
|
|
6
|
$start = shift; |
|
743
|
4
|
|
|
|
|
8
|
$end = $start + shift; |
|
744
|
|
|
|
|
|
|
} |
|
745
|
|
|
|
|
|
|
|
|
746
|
9
|
|
|
|
|
19
|
my ( $name ) = @_; |
|
747
|
|
|
|
|
|
|
|
|
748
|
9
|
100
|
|
|
|
20
|
if( my $t = $self->{iterating} ) { |
|
749
|
5
|
|
|
|
|
22
|
my ( $ts, $te, $tn ) = @$t; |
|
750
|
5
|
50
|
33
|
|
|
27
|
if( $start == $ts and $end == $te and $name eq $tn ) { |
|
|
|
|
33
|
|
|
|
|
|
751
|
5
|
|
|
|
|
7
|
$t->[4] |= FLAG_DELETED; |
|
752
|
5
|
|
|
|
|
12
|
return; |
|
753
|
|
|
|
|
|
|
} |
|
754
|
|
|
|
|
|
|
} |
|
755
|
|
|
|
|
|
|
|
|
756
|
4
|
|
|
|
|
8
|
my $tags = $self->{tags}; |
|
757
|
|
|
|
|
|
|
|
|
758
|
4
|
|
|
|
|
6
|
my $have_added = 0; |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
# Can't foreach() because we modify $i |
|
761
|
4
|
|
|
|
|
14
|
for( my $i = 0; $i < @$tags; $i++ ) { |
|
762
|
8
|
|
|
|
|
11
|
my ( $ts, $te, $tn, $tv, $tf ) = @{ $tags->[$i] }; |
|
|
8
|
|
|
|
|
18
|
|
|
763
|
|
|
|
|
|
|
|
|
764
|
8
|
100
|
|
|
|
21
|
next if $te <= $start; |
|
765
|
7
|
100
|
|
|
|
14
|
last if $ts >= $end; |
|
766
|
|
|
|
|
|
|
|
|
767
|
4
|
50
|
|
|
|
9
|
next if $tn ne $name; |
|
768
|
|
|
|
|
|
|
|
|
769
|
4
|
100
|
100
|
|
|
20
|
if( $keepends and $end < $te ) { |
|
770
|
2
|
|
|
|
|
8
|
$self->_insert_tag( $end, $te, $tn, $tv, $tf & ~(FLAG_ANCHOR_BEFORE|FLAG_ITERATING) ); |
|
771
|
2
|
|
|
|
|
4
|
$have_added = 1; |
|
772
|
|
|
|
|
|
|
} |
|
773
|
|
|
|
|
|
|
|
|
774
|
4
|
50
|
|
|
|
11
|
if( $tf & FLAG_ITERATING ) { |
|
775
|
0
|
|
|
|
|
0
|
die "ARGH encountered FLAG_ITERATING while walking the list of tags during ->_remove_tag"; |
|
776
|
|
|
|
|
|
|
} |
|
777
|
|
|
|
|
|
|
|
|
778
|
4
|
|
|
|
|
8
|
splice @$tags, $i, 1; |
|
779
|
|
|
|
|
|
|
|
|
780
|
4
|
100
|
100
|
|
|
18
|
if( $keepends and $ts < $start ) { |
|
781
|
1
|
|
|
|
|
5
|
$self->_insert_tag( $ts, $start, $tn, $tv, $tf & ~(FLAG_ANCHOR_AFTER|FLAG_ITERATING) ); |
|
782
|
1
|
|
|
|
|
3
|
$have_added = 1; |
|
783
|
|
|
|
|
|
|
} |
|
784
|
|
|
|
|
|
|
else { |
|
785
|
3
|
|
|
|
|
19
|
$i--; |
|
786
|
|
|
|
|
|
|
} |
|
787
|
|
|
|
|
|
|
} |
|
788
|
|
|
|
|
|
|
|
|
789
|
4
|
|
|
|
|
6
|
if( DEBUG && $have_added ) { |
|
790
|
|
|
|
|
|
|
$self->_assert_sorted; |
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
|
|
793
|
4
|
|
|
|
|
27
|
return $self; |
|
794
|
|
|
|
|
|
|
} |
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
=head2 unapply_tag |
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
$st->unapply_tag( $start, $len, $name ) |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
Unapply the named tag value from the given extent. If the tag extends beyond |
|
801
|
|
|
|
|
|
|
this extent, then any partial fragment of the tag will be left in the string. |
|
802
|
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
This method returns the C<$st> object. |
|
804
|
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
$st->unapply_tag( $e, $name ) |
|
806
|
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
Alternatively, an existing L object can be passed as |
|
808
|
|
|
|
|
|
|
the first argument instead of two integers. |
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=cut |
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
sub unapply_tag |
|
813
|
|
|
|
|
|
|
{ |
|
814
|
3
|
|
|
3
|
1
|
7
|
my $self = shift; |
|
815
|
3
|
|
|
|
|
9
|
return $self->_remove_tag( 1, @_ ); |
|
816
|
|
|
|
|
|
|
} |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=head2 delete_tag |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
$st->delete_tag( $start, $len, $name ) |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Delete the named tag within the given extent. Entire tags are removed, even if |
|
823
|
|
|
|
|
|
|
they extend beyond this extent. |
|
824
|
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
This method returns the C<$st> object. |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
$st->delete_tag( $e, $name ) |
|
828
|
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
Alternatively, an existing L object can be passed as |
|
830
|
|
|
|
|
|
|
the first argument instead of two integers. |
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=cut |
|
833
|
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
sub delete_tag |
|
835
|
|
|
|
|
|
|
{ |
|
836
|
6
|
|
|
6
|
1
|
8670
|
my $self = shift; |
|
837
|
6
|
|
|
|
|
14
|
return $self->_remove_tag( 0, @_ ); |
|
838
|
|
|
|
|
|
|
} |
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
=head2 delete_all_tag |
|
841
|
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
$st->delete_all_tag( $name ) |
|
843
|
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
I |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
Deletes every tag with the given name. This is more efficient than calling |
|
847
|
|
|
|
|
|
|
C to list the tags then C on each one individually |
|
848
|
|
|
|
|
|
|
in the case of a simple name match. |
|
849
|
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
This method returns the C<$st> object. |
|
851
|
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
=cut |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
sub delete_all_tag |
|
855
|
|
|
|
|
|
|
{ |
|
856
|
1
|
|
|
1
|
1
|
8
|
my $self = shift; |
|
857
|
1
|
|
|
|
|
4
|
my ( $name ) = @_; |
|
858
|
|
|
|
|
|
|
|
|
859
|
1
|
|
|
|
|
2
|
my $tags = $self->{tags}; |
|
860
|
|
|
|
|
|
|
|
|
861
|
1
|
|
|
|
|
8
|
for( my $i = 0; $i < @$tags; $i++ ) { |
|
862
|
3
|
|
|
|
|
4
|
my ( $ts, $te, $tn, $tv, $tf ) = @{ $tags->[$i] }; |
|
|
3
|
|
|
|
|
8
|
|
|
863
|
|
|
|
|
|
|
|
|
864
|
3
|
100
|
|
|
|
9
|
next if $tn ne $name; |
|
865
|
|
|
|
|
|
|
|
|
866
|
2
|
|
|
|
|
5
|
splice @$tags, $i, 1, (); |
|
867
|
2
|
|
|
|
|
5
|
$i--; |
|
868
|
|
|
|
|
|
|
} |
|
869
|
|
|
|
|
|
|
|
|
870
|
1
|
|
|
|
|
6
|
return $self; |
|
871
|
|
|
|
|
|
|
} |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
=head2 merge_tags |
|
874
|
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
$st->merge_tags( $eqsub ) |
|
876
|
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
Merge neighbouring or overlapping tags of the same name and equal values. |
|
878
|
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
For each pair of tags of the same name that apply on neighbouring or |
|
880
|
|
|
|
|
|
|
overlapping extents, the C<$eqsub> callback is called, as |
|
881
|
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
$equal = $eqsub->( $name, $value_a, $value_b ) |
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
If this function returns true then the tags are merged. |
|
885
|
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
The equallity test function is free to perform any comparison of the values |
|
887
|
|
|
|
|
|
|
that may be relevant to the application; for example it may deeply compare |
|
888
|
|
|
|
|
|
|
referred structures and check for equivalence in some application-defined |
|
889
|
|
|
|
|
|
|
manner. In this case, the first tag of a pair is retained, the second is |
|
890
|
|
|
|
|
|
|
deleted. This may be relevant if the tag value is a reference to some object. |
|
891
|
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
=cut |
|
893
|
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
sub merge_tags |
|
895
|
|
|
|
|
|
|
{ |
|
896
|
8
|
|
|
8
|
1
|
8766
|
my $self = shift; |
|
897
|
8
|
|
|
|
|
13
|
my ( $eqsub ) = @_; |
|
898
|
|
|
|
|
|
|
|
|
899
|
8
|
|
|
|
|
16
|
my $tags = $self->{tags}; |
|
900
|
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
# Can't foreach() because we modify @$tags |
|
902
|
8
|
|
|
|
|
23
|
OUTER: for( my $i = 0; $i < @$tags; $i++ ) { |
|
903
|
13
|
|
|
|
|
24
|
my ( $ts, $te, $tn, $tv, $tf ) = @{ $tags->[$i] }; |
|
|
13
|
|
|
|
|
31
|
|
|
904
|
|
|
|
|
|
|
|
|
905
|
13
|
|
|
|
|
40
|
for( my $j = $i+1; $j < @$tags; $j++ ) { |
|
906
|
11
|
|
|
|
|
17
|
my ( $t2s, $t2e, $t2n, $t2v, $t2f ) = @{ $tags->[$j] }; |
|
|
11
|
|
|
|
|
21
|
|
|
907
|
|
|
|
|
|
|
|
|
908
|
11
|
100
|
|
|
|
26
|
last if $t2s > $te; |
|
909
|
10
|
50
|
|
|
|
24
|
next unless $t2s <= $te; |
|
910
|
10
|
100
|
|
|
|
40
|
next unless $t2n eq $tn; |
|
911
|
|
|
|
|
|
|
|
|
912
|
7
|
100
|
|
|
|
18
|
last unless $eqsub->( $tn, $tv, $t2v ); |
|
913
|
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
# Need to delete the tag at $j, extend the end of the tag at $i, and |
|
915
|
|
|
|
|
|
|
# possibly move $i later |
|
916
|
6
|
|
|
|
|
34
|
splice @$tags, $j, 1, (); |
|
917
|
6
|
|
|
|
|
11
|
$j--; |
|
918
|
|
|
|
|
|
|
|
|
919
|
6
|
|
|
|
|
11
|
$te = $tags->[$i][1] = $t2e; |
|
920
|
|
|
|
|
|
|
|
|
921
|
6
|
100
|
|
|
|
30
|
$tags->[$i][4] |= FLAG_ANCHOR_AFTER if $t2f & FLAG_ANCHOR_AFTER; |
|
922
|
|
|
|
|
|
|
|
|
923
|
6
|
|
|
|
|
13
|
local $a = $tags->[$i]; |
|
924
|
|
|
|
|
|
|
|
|
925
|
6
|
100
|
100
|
|
|
39
|
if( local $b = $tags->[$i+1] and _cmp_tags() > 0 ) { |
|
926
|
1
|
|
|
|
|
3
|
my $newpos = $i+1; |
|
927
|
1
|
|
33
|
|
|
6
|
while( local $b = $tags->[$newpos ] and _cmp_tags() <= 0 ) { |
|
928
|
0
|
|
|
|
|
0
|
$newpos++; |
|
929
|
|
|
|
|
|
|
} |
|
930
|
|
|
|
|
|
|
|
|
931
|
1
|
|
|
|
|
4
|
splice @$tags, $newpos, 0, splice @$tags, $i, 1, (); |
|
932
|
|
|
|
|
|
|
|
|
933
|
1
|
|
|
|
|
3
|
redo OUTER; |
|
934
|
|
|
|
|
|
|
} |
|
935
|
|
|
|
|
|
|
} |
|
936
|
|
|
|
|
|
|
} |
|
937
|
|
|
|
|
|
|
} |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=head2 iter_extents |
|
940
|
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
$st->iter_extents( $callback, %opts ) |
|
942
|
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
Iterate the tags stored in the string. For each tag, the CODE reference in |
|
944
|
|
|
|
|
|
|
C<$callback> is invoked once, being passed a L object |
|
945
|
|
|
|
|
|
|
that represents the extent of the tag. |
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
$callback->( $extent, $tagname, $tagvalue ) |
|
948
|
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
Options passed in C<%opts> may include: |
|
950
|
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
=over 4 |
|
952
|
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
=item start => INT |
|
954
|
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
Start at the given position; defaults to 0. |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
=item end => INT |
|
958
|
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
End after the given position; defaults to end of string. This option overrides |
|
960
|
|
|
|
|
|
|
C. |
|
961
|
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
=item len => INT |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
End after the given length beyond the start position; defaults to end of |
|
965
|
|
|
|
|
|
|
string. This option only applies if C is not given. |
|
966
|
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
=item only => ARRAY |
|
968
|
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
Select only the tags named in the given ARRAY reference. |
|
970
|
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
=item except => ARRAY |
|
972
|
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
Select all the tags except those named in the given ARRAY reference. |
|
974
|
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
=back |
|
976
|
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
I it is safe to call C from within the |
|
978
|
|
|
|
|
|
|
callback function to remove the tag currently being iterated on. |
|
979
|
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
$str->iter_extents( sub { |
|
981
|
|
|
|
|
|
|
my ( $e, $n, $v ) = @_; |
|
982
|
|
|
|
|
|
|
$str->delete_tag( $e, $n ) if $n =~ m/^tmp_/; |
|
983
|
|
|
|
|
|
|
} ); |
|
984
|
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
Apart from this scenario, the tags in the string should not otherwise be added |
|
986
|
|
|
|
|
|
|
or removed while the iteration is occurring. |
|
987
|
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
=cut |
|
989
|
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
sub iter_extents |
|
991
|
|
|
|
|
|
|
{ |
|
992
|
51
|
|
|
51
|
1
|
131
|
my $self = shift; |
|
993
|
51
|
|
|
|
|
101
|
my ( $callback, %opts ) = @_; |
|
994
|
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
my $start = exists $opts{start} ? $opts{start} : |
|
996
|
51
|
100
|
|
|
|
163
|
0; |
|
997
|
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
my $end = exists $opts{end} ? $opts{end} : |
|
999
|
|
|
|
|
|
|
exists $opts{len} ? $start + $opts{len} : |
|
1000
|
51
|
50
|
|
|
|
223
|
$self->length + 1; # so as to include zerolen at end |
|
|
|
100
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
|
|
1002
|
51
|
100
|
|
|
|
139
|
my $only = exists $opts{only} ? { map { $_ => 1 } @{ $opts{only} } } : |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
3
|
|
|
1003
|
|
|
|
|
|
|
undef; |
|
1004
|
|
|
|
|
|
|
|
|
1005
|
51
|
100
|
|
|
|
119
|
my $except = exists $opts{except} ? { map { $_ => 1 } @{ $opts{except} } } : |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
5
|
|
|
1006
|
|
|
|
|
|
|
undef; |
|
1007
|
|
|
|
|
|
|
|
|
1008
|
51
|
|
|
|
|
113
|
my $tags = $self->{tags}; |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
51
|
|
|
|
|
190
|
for ( my $i = 0; $i < @$tags; $i++ ) { |
|
1011
|
76
|
|
|
|
|
146
|
my $t = $tags->[$i]; |
|
1012
|
76
|
|
|
|
|
184
|
my ( $ts, $te, $tn, $tv, $tf ) = @$t; |
|
1013
|
|
|
|
|
|
|
|
|
1014
|
76
|
100
|
|
|
|
187
|
next if $te < $start; |
|
1015
|
75
|
100
|
|
|
|
170
|
last if $ts >= $end; |
|
1016
|
|
|
|
|
|
|
|
|
1017
|
74
|
100
|
100
|
|
|
225
|
next if $only and !$only->{$tn}; |
|
1018
|
72
|
100
|
100
|
|
|
160
|
next if $except and $except->{$tn}; |
|
1019
|
|
|
|
|
|
|
|
|
1020
|
71
|
|
|
|
|
116
|
$t->[4] |= FLAG_ITERATING; |
|
1021
|
71
|
|
|
|
|
155
|
local $self->{iterating} = $t; |
|
1022
|
|
|
|
|
|
|
|
|
1023
|
71
|
|
|
|
|
168
|
$callback->( $self->_mkextent( $ts, $te, $tf ), $tn, $tv ); |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
71
|
|
|
|
|
4519
|
$t->[4] &= ~FLAG_ITERATING; |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
71
|
100
|
|
|
|
355
|
if( $t->[4] & FLAG_DELETED ) { |
|
1028
|
5
|
|
|
|
|
8
|
splice @$tags, $i, 1, (); |
|
1029
|
5
|
|
|
|
|
15
|
$i--; |
|
1030
|
|
|
|
|
|
|
} |
|
1031
|
|
|
|
|
|
|
} |
|
1032
|
|
|
|
|
|
|
} |
|
1033
|
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
=head2 iter_tags |
|
1035
|
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
$st->iter_tags( $callback, %opts ) |
|
1037
|
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
Iterate the tags stored in the string. For each tag, the CODE reference in |
|
1039
|
|
|
|
|
|
|
C<$callback> is invoked once, being passed the start point and length of the |
|
1040
|
|
|
|
|
|
|
tag. |
|
1041
|
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
$callback->( $start, $length, $tagname, $tagvalue ) |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
Options passed in C<%opts> are the same as for C. |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
=cut |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
sub iter_tags |
|
1049
|
|
|
|
|
|
|
{ |
|
1050
|
25
|
|
|
25
|
1
|
12087
|
my $self = shift; |
|
1051
|
25
|
|
|
|
|
61
|
my ( $callback, %opts ) = @_; |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
$self->iter_extents( |
|
1054
|
|
|
|
|
|
|
sub { |
|
1055
|
36
|
|
|
36
|
|
76
|
my ( $e, $tn, $tv ) = @_; |
|
1056
|
36
|
|
|
|
|
98
|
$callback->( $e->start, $e->length, $tn, $tv ); |
|
1057
|
|
|
|
|
|
|
}, |
|
1058
|
25
|
|
|
|
|
133
|
%opts |
|
1059
|
|
|
|
|
|
|
); |
|
1060
|
|
|
|
|
|
|
} |
|
1061
|
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
=head2 iter_extents_nooverlap |
|
1063
|
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
$st->iter_extents_nooverlap( $callback, %opts ) |
|
1065
|
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
Iterate non-overlapping extents of tags stored in the string. The CODE |
|
1067
|
|
|
|
|
|
|
reference in C<$callback> is invoked for each extent in the string where no |
|
1068
|
|
|
|
|
|
|
tags change. The entire set of tags active in that extent is given to the |
|
1069
|
|
|
|
|
|
|
callback. Because the extent covers possibly-multiple tags, it will not define |
|
1070
|
|
|
|
|
|
|
the C and C flags. |
|
1071
|
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
$callback->( $extent, %tags ) |
|
1073
|
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
The callback will be invoked over the entire length of the string, including |
|
1075
|
|
|
|
|
|
|
any extents with no tags applied. |
|
1076
|
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
Options may be passed in C<%opts> to control the range of the string iterated |
|
1078
|
|
|
|
|
|
|
over, in the same way as the C method. |
|
1079
|
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
If the C or C filters are applied, then only the tags that |
|
1081
|
|
|
|
|
|
|
survive filtering will be present in the C<%tags> hash. Tags that are excluded |
|
1082
|
|
|
|
|
|
|
by the filtering will not be present, nor will their bounds be used to split |
|
1083
|
|
|
|
|
|
|
the string into extents. |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
=cut |
|
1086
|
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
sub iter_extents_nooverlap |
|
1088
|
|
|
|
|
|
|
{ |
|
1089
|
34
|
|
|
34
|
1
|
68
|
my $self = shift; |
|
1090
|
34
|
|
|
|
|
60
|
my ( $callback, %opts ) = @_; |
|
1091
|
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
my $start = exists $opts{start} ? $opts{start} : |
|
1093
|
34
|
100
|
|
|
|
89
|
0; |
|
1094
|
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
my $end = exists $opts{end} ? $opts{end} : |
|
1096
|
|
|
|
|
|
|
exists $opts{len} ? $start + $opts{len} : |
|
1097
|
34
|
50
|
|
|
|
144
|
$self->length; |
|
|
|
100
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
|
|
1099
|
34
|
100
|
|
|
|
85
|
my $only = exists $opts{only} ? { map { $_ => 1 } @{ $opts{only} } } : |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
4
|
|
|
1100
|
|
|
|
|
|
|
undef; |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
34
|
100
|
|
|
|
74
|
my $except = exists $opts{except} ? { map { $_ => 1 } @{ $opts{except} } } : |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
3
|
|
|
1103
|
|
|
|
|
|
|
undef; |
|
1104
|
|
|
|
|
|
|
|
|
1105
|
34
|
|
|
|
|
63
|
my $tags = $self->{tags}; |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
34
|
|
|
|
|
50
|
my @active; # ARRAY of [ $ts, $te, $tn, $tv ] |
|
1108
|
34
|
|
|
|
|
46
|
my $pos = $start; |
|
1109
|
|
|
|
|
|
|
|
|
1110
|
34
|
|
|
|
|
80
|
foreach my $t ( @$tags ) { |
|
1111
|
76
|
|
|
|
|
152
|
my ( $ts, $te, $tn, $tv ) = @$t; |
|
1112
|
|
|
|
|
|
|
|
|
1113
|
76
|
100
|
|
|
|
164
|
next if $te < $start; |
|
1114
|
74
|
100
|
|
|
|
140
|
last if $ts > $end; |
|
1115
|
|
|
|
|
|
|
|
|
1116
|
72
|
100
|
100
|
|
|
167
|
next if $only and !$only->{$tn}; |
|
1117
|
70
|
100
|
100
|
|
|
137
|
next if $except and $except->{$tn}; |
|
1118
|
|
|
|
|
|
|
|
|
1119
|
69
|
|
|
|
|
163
|
while( $pos < $ts ) { |
|
1120
|
43
|
|
|
|
|
74
|
my %activetags; |
|
1121
|
|
|
|
|
|
|
my %tagends; |
|
1122
|
43
|
|
|
|
|
72
|
my $rangeend = $ts; |
|
1123
|
|
|
|
|
|
|
|
|
1124
|
43
|
|
|
|
|
93
|
foreach ( @active ) { |
|
1125
|
42
|
|
|
|
|
86
|
my ( undef, $e, $n, $v ) = @$_; |
|
1126
|
|
|
|
|
|
|
|
|
1127
|
42
|
100
|
|
|
|
84
|
$e < $rangeend and $rangeend = $e; |
|
1128
|
42
|
100
|
66
|
|
|
117
|
next if $tagends{$n} and $tagends{$n} < $e; |
|
1129
|
|
|
|
|
|
|
|
|
1130
|
41
|
|
|
|
|
80
|
$activetags{$n} = $v; |
|
1131
|
41
|
|
|
|
|
95
|
$tagends{$n} = $e; |
|
1132
|
|
|
|
|
|
|
} |
|
1133
|
|
|
|
|
|
|
|
|
1134
|
43
|
|
|
|
|
103
|
$callback->( $self->_mkextent( $pos, $rangeend, 0 ), %activetags ); |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
43
|
|
|
|
|
440
|
$pos = $rangeend; |
|
1137
|
43
|
|
|
|
|
87
|
@active = grep { $_->[1] > $pos } @active; |
|
|
42
|
|
|
|
|
161
|
|
|
1138
|
|
|
|
|
|
|
} |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
69
|
|
|
|
|
199
|
push @active, [ $ts, $te, $tn, $tv ]; |
|
1141
|
|
|
|
|
|
|
} |
|
1142
|
|
|
|
|
|
|
|
|
1143
|
34
|
|
|
|
|
79
|
while( $pos < $end ) { |
|
1144
|
53
|
|
|
|
|
82
|
my %activetags; |
|
1145
|
|
|
|
|
|
|
my %tagends; |
|
1146
|
53
|
|
|
|
|
81
|
my $rangeend = $end; |
|
1147
|
|
|
|
|
|
|
|
|
1148
|
53
|
|
|
|
|
89
|
foreach ( @active ) { |
|
1149
|
72
|
|
|
|
|
128
|
my ( undef, $e, $n, $v ) = @$_; |
|
1150
|
|
|
|
|
|
|
|
|
1151
|
72
|
100
|
|
|
|
146
|
$e < $rangeend and $rangeend = $e; |
|
1152
|
72
|
100
|
100
|
|
|
181
|
next if $tagends{$n} and $tagends{$n} < $e; |
|
1153
|
|
|
|
|
|
|
|
|
1154
|
71
|
|
|
|
|
125
|
$activetags{$n} = $v; |
|
1155
|
71
|
|
|
|
|
128
|
$tagends{$n} = $e; |
|
1156
|
|
|
|
|
|
|
} |
|
1157
|
|
|
|
|
|
|
|
|
1158
|
53
|
|
|
|
|
122
|
$callback->( $self->_mkextent( $pos, $rangeend, 0 ), %activetags ); |
|
1159
|
|
|
|
|
|
|
|
|
1160
|
53
|
|
|
|
|
1558
|
$pos = $rangeend; |
|
1161
|
53
|
|
|
|
|
128
|
@active = grep { $_->[1] > $pos } @active; |
|
|
72
|
|
|
|
|
273
|
|
|
1162
|
|
|
|
|
|
|
} |
|
1163
|
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
# We might have zero-length tags active at the very end of the range |
|
1165
|
34
|
100
|
|
|
|
222
|
if( my @zerolen = grep { $_->[0] == $pos and $_->[1] == $pos } @active ) { |
|
|
3
|
100
|
|
|
|
23
|
|
|
1166
|
1
|
|
|
|
|
4
|
my %activetags; |
|
1167
|
1
|
|
|
|
|
5
|
foreach ( @active ) { |
|
1168
|
1
|
|
|
|
|
2
|
my ( undef, undef, $n, $v ) = @$_; |
|
1169
|
|
|
|
|
|
|
|
|
1170
|
1
|
|
|
|
|
3
|
$activetags{$n} = $v; |
|
1171
|
|
|
|
|
|
|
} |
|
1172
|
|
|
|
|
|
|
|
|
1173
|
1
|
|
|
|
|
4
|
$callback->( $self->_mkextent( $pos, $pos, 0 ), %activetags ); |
|
1174
|
|
|
|
|
|
|
} |
|
1175
|
|
|
|
|
|
|
} |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
=head2 iter_tags_nooverlap |
|
1178
|
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
$st->iter_tags_nooverlap( $callback, %opts ) |
|
1180
|
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
Iterate extents of the string using C, but passing |
|
1182
|
|
|
|
|
|
|
the start and length of each extent to the callback instead of the extent |
|
1183
|
|
|
|
|
|
|
object. |
|
1184
|
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
$callback->( $start, $length, %tags ) |
|
1186
|
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
Options may be passed in C<%opts> to control the range of the string iterated |
|
1188
|
|
|
|
|
|
|
over, in the same way as the C method. |
|
1189
|
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
=cut |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
sub iter_tags_nooverlap |
|
1193
|
|
|
|
|
|
|
{ |
|
1194
|
28
|
|
|
28
|
1
|
8789
|
my $self = shift; |
|
1195
|
28
|
|
|
|
|
64
|
my ( $callback, %opts ) = @_; |
|
1196
|
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
$self->iter_extents_nooverlap( |
|
1198
|
|
|
|
|
|
|
sub { |
|
1199
|
80
|
|
|
80
|
|
168
|
my ( $e, %tags ) = @_; |
|
1200
|
80
|
|
|
|
|
201
|
$callback->( $e->start, $e->length, %tags ); |
|
1201
|
|
|
|
|
|
|
}, |
|
1202
|
28
|
|
|
|
|
180
|
%opts |
|
1203
|
|
|
|
|
|
|
); |
|
1204
|
|
|
|
|
|
|
} |
|
1205
|
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
=head2 iter_substr_nooverlap |
|
1207
|
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
$st->iter_substr_nooverlap( $callback, %opts ) |
|
1209
|
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
Iterate extents of the string using C, but passing the |
|
1211
|
|
|
|
|
|
|
substring of data instead of the extent object. |
|
1212
|
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
$callback->( $substr, %tags ) |
|
1214
|
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
Options may be passed in C<%opts> to control the range of the string iterated |
|
1216
|
|
|
|
|
|
|
over, in the same way as the C method. |
|
1217
|
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
=cut |
|
1219
|
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
sub iter_substr_nooverlap |
|
1221
|
|
|
|
|
|
|
{ |
|
1222
|
6
|
|
|
6
|
1
|
6374
|
my $self = shift; |
|
1223
|
6
|
|
|
|
|
18
|
my ( $callback, %opts ) = @_; |
|
1224
|
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
$self->iter_extents_nooverlap( |
|
1226
|
|
|
|
|
|
|
sub { |
|
1227
|
17
|
|
|
17
|
|
46
|
my ( $e, %tags ) = @_; |
|
1228
|
17
|
|
|
|
|
52
|
$callback->( $e->plain_substr, %tags ); |
|
1229
|
|
|
|
|
|
|
}, |
|
1230
|
6
|
|
|
|
|
42
|
%opts, |
|
1231
|
|
|
|
|
|
|
); |
|
1232
|
|
|
|
|
|
|
} |
|
1233
|
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
=head2 tagnames |
|
1235
|
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
@names = $st->tagnames |
|
1237
|
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
Returns the set of tag names used in the string, in no particular order. |
|
1239
|
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
=cut |
|
1241
|
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
sub tagnames |
|
1243
|
|
|
|
|
|
|
{ |
|
1244
|
14
|
|
|
14
|
1
|
75
|
my $self = shift; |
|
1245
|
|
|
|
|
|
|
|
|
1246
|
14
|
|
|
|
|
87
|
my $tags = $self->{tags}; |
|
1247
|
|
|
|
|
|
|
|
|
1248
|
14
|
|
|
|
|
26
|
my %tags; |
|
1249
|
14
|
|
|
|
|
37
|
foreach my $t ( @$tags ) { |
|
1250
|
15
|
|
|
|
|
38
|
$tags{$t->[2]}++; |
|
1251
|
|
|
|
|
|
|
} |
|
1252
|
|
|
|
|
|
|
|
|
1253
|
14
|
|
|
|
|
110
|
keys %tags; |
|
1254
|
|
|
|
|
|
|
} |
|
1255
|
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
=head2 get_tags_at |
|
1257
|
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
$tags = $st->get_tags_at( $pos ) |
|
1259
|
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
Returns a HASH reference of all the tag values active at the given position. |
|
1261
|
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
=cut |
|
1263
|
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
sub get_tags_at |
|
1265
|
|
|
|
|
|
|
{ |
|
1266
|
11
|
|
|
11
|
1
|
1934
|
my $self = shift; |
|
1267
|
11
|
|
|
|
|
25
|
my ( $pos ) = @_; |
|
1268
|
|
|
|
|
|
|
|
|
1269
|
11
|
|
|
|
|
21
|
my $tags = $self->{tags}; |
|
1270
|
|
|
|
|
|
|
|
|
1271
|
11
|
|
|
|
|
19
|
my %tags; |
|
1272
|
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
# TODO: turn this into a binary search |
|
1274
|
11
|
|
|
|
|
25
|
foreach my $t ( @$tags ) { |
|
1275
|
14
|
|
|
|
|
39
|
my ( $ts, $te, $tn, $tv ) = @$t; |
|
1276
|
|
|
|
|
|
|
|
|
1277
|
14
|
100
|
|
|
|
32
|
last if $ts > $pos; |
|
1278
|
11
|
100
|
|
|
|
48
|
next if $te <= $pos; |
|
1279
|
|
|
|
|
|
|
|
|
1280
|
10
|
|
|
|
|
31
|
$tags{$tn} = $tv; |
|
1281
|
|
|
|
|
|
|
} |
|
1282
|
|
|
|
|
|
|
|
|
1283
|
11
|
|
|
|
|
59
|
return \%tags; |
|
1284
|
|
|
|
|
|
|
} |
|
1285
|
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
=head2 get_tag_at |
|
1287
|
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
$value = $st->get_tag_at( $pos, $name ) |
|
1289
|
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
Returns the value of the named tag at the given position, or C if the |
|
1291
|
|
|
|
|
|
|
tag is not applied there. |
|
1292
|
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
=cut |
|
1294
|
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
sub get_tag_at |
|
1296
|
|
|
|
|
|
|
{ |
|
1297
|
6
|
|
|
6
|
1
|
19
|
my $self = shift; |
|
1298
|
6
|
|
|
|
|
19
|
my ( $pos, $name ) = @_; |
|
1299
|
|
|
|
|
|
|
|
|
1300
|
6
|
|
|
|
|
14
|
my $tags = $self->{tags}; |
|
1301
|
|
|
|
|
|
|
|
|
1302
|
6
|
|
|
|
|
10
|
my $value; |
|
1303
|
|
|
|
|
|
|
|
|
1304
|
6
|
|
|
|
|
18
|
foreach my $t ( @$tags ) { |
|
1305
|
15
|
|
|
|
|
35
|
my ( $ts, $te, $tn, $tv ) = @$t; |
|
1306
|
|
|
|
|
|
|
|
|
1307
|
15
|
100
|
|
|
|
37
|
last if $ts > $pos; |
|
1308
|
11
|
100
|
|
|
|
43
|
next if $te <= $pos; |
|
1309
|
|
|
|
|
|
|
|
|
1310
|
8
|
100
|
|
|
|
35
|
$value = $tv if $tn eq $name; |
|
1311
|
|
|
|
|
|
|
} |
|
1312
|
|
|
|
|
|
|
|
|
1313
|
6
|
|
|
|
|
22
|
return $value; |
|
1314
|
|
|
|
|
|
|
} |
|
1315
|
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
=head2 get_tag_extent |
|
1317
|
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
$extent = $st->get_tag_extent( $pos, $name ) |
|
1319
|
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
If the named tag applies to the given position, returns a |
|
1321
|
|
|
|
|
|
|
L object to represent the extent of the tag at that |
|
1322
|
|
|
|
|
|
|
position. If it does not, C is returned. If an extent is returned it |
|
1323
|
|
|
|
|
|
|
will define the C and C flags if appropriate. |
|
1324
|
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
=cut |
|
1326
|
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
sub get_tag_extent |
|
1328
|
|
|
|
|
|
|
{ |
|
1329
|
13
|
|
|
13
|
1
|
79
|
my $self = shift; |
|
1330
|
13
|
|
|
|
|
65
|
my ( $pos, $name ) = @_; |
|
1331
|
|
|
|
|
|
|
|
|
1332
|
13
|
|
|
|
|
34
|
my $tags = $self->{tags}; |
|
1333
|
|
|
|
|
|
|
|
|
1334
|
13
|
|
|
|
|
26
|
my ( $start, $end, $flags ); |
|
1335
|
|
|
|
|
|
|
|
|
1336
|
13
|
|
|
|
|
30
|
foreach my $t ( @$tags ) { |
|
1337
|
19
|
|
|
|
|
47
|
my ( $ts, $te, $tn, undef, $tf ) = @$t; |
|
1338
|
|
|
|
|
|
|
|
|
1339
|
19
|
100
|
|
|
|
45
|
last if $ts > $pos; |
|
1340
|
16
|
100
|
|
|
|
51
|
next if $te <= $pos; |
|
1341
|
|
|
|
|
|
|
|
|
1342
|
15
|
100
|
|
|
|
45
|
next unless $tn eq $name; |
|
1343
|
|
|
|
|
|
|
|
|
1344
|
12
|
|
|
|
|
21
|
$start = $ts; |
|
1345
|
12
|
|
|
|
|
20
|
$end = $te; |
|
1346
|
12
|
|
|
|
|
25
|
$flags = $tf; |
|
1347
|
|
|
|
|
|
|
} |
|
1348
|
|
|
|
|
|
|
|
|
1349
|
13
|
100
|
|
|
|
31
|
if( defined $start ) { |
|
1350
|
12
|
|
|
|
|
62
|
return $self->_mkextent( $start, $end, $flags ); |
|
1351
|
|
|
|
|
|
|
} |
|
1352
|
|
|
|
|
|
|
else { |
|
1353
|
1
|
|
|
|
|
4
|
return undef; |
|
1354
|
|
|
|
|
|
|
} |
|
1355
|
|
|
|
|
|
|
} |
|
1356
|
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
=head2 get_tag_missing_extent |
|
1358
|
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
$extent = $st->get_tag_missing_extent( $pos, $name ) |
|
1360
|
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
If the named tag does not apply at the given position, returns the extent of |
|
1362
|
|
|
|
|
|
|
the string around that position that does not have the tag. If it does exist, |
|
1363
|
|
|
|
|
|
|
C is returned. If an extent is returned it will not define the |
|
1364
|
|
|
|
|
|
|
C and C flags, as these do not make sense for the |
|
1365
|
|
|
|
|
|
|
range in which a tag is absent. |
|
1366
|
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
=cut |
|
1368
|
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
sub get_tag_missing_extent |
|
1370
|
|
|
|
|
|
|
{ |
|
1371
|
3
|
|
|
3
|
1
|
454
|
my $self = shift; |
|
1372
|
3
|
|
|
|
|
7
|
my ( $pos, $name ) = @_; |
|
1373
|
|
|
|
|
|
|
|
|
1374
|
3
|
|
|
|
|
5
|
my $tags = $self->{tags}; |
|
1375
|
|
|
|
|
|
|
|
|
1376
|
3
|
|
|
|
|
6
|
my $start = 0; |
|
1377
|
|
|
|
|
|
|
|
|
1378
|
3
|
|
|
|
|
6
|
foreach my $t ( @$tags ) { |
|
1379
|
6
|
|
|
|
|
13
|
my ( $ts, $te, $tn ) = @$t; |
|
1380
|
|
|
|
|
|
|
|
|
1381
|
6
|
100
|
|
|
|
15
|
next unless $tn eq $name; |
|
1382
|
|
|
|
|
|
|
|
|
1383
|
3
|
100
|
100
|
|
|
15
|
if( $ts <= $pos and $te > $pos ) { |
|
1384
|
1
|
|
|
|
|
4
|
return undef; |
|
1385
|
|
|
|
|
|
|
} |
|
1386
|
|
|
|
|
|
|
|
|
1387
|
2
|
100
|
|
|
|
6
|
if( $ts > $pos ) { |
|
1388
|
1
|
|
|
|
|
4
|
return $self->_mkextent( $start, $ts, 0 ); |
|
1389
|
|
|
|
|
|
|
} |
|
1390
|
|
|
|
|
|
|
|
|
1391
|
1
|
|
|
|
|
3
|
$start = $te; |
|
1392
|
|
|
|
|
|
|
} |
|
1393
|
|
|
|
|
|
|
|
|
1394
|
1
|
|
|
|
|
3
|
return $self->_mkextent( $start, $self->length, 0 ); |
|
1395
|
|
|
|
|
|
|
} |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
=head2 set_substr |
|
1398
|
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
$st->set_substr( $start, $len, $newstr ) |
|
1400
|
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
Modifies a extent of the underlying plain string to that given. The extents of |
|
1402
|
|
|
|
|
|
|
tags in the string are adjusted to cope with the modified region, and the |
|
1403
|
|
|
|
|
|
|
adjustment in length. |
|
1404
|
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
Tags entirely before the replaced extent remain unchanged. |
|
1406
|
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
Tags entirely within the replaced extent are deleted. |
|
1408
|
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
Tags entirely after the replaced extent are moved by appropriate amount to |
|
1410
|
|
|
|
|
|
|
ensure they still apply to the same characters as before. |
|
1411
|
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
Tags that start before and end after the extent remain, and have their lengths |
|
1413
|
|
|
|
|
|
|
suitably adjusted. |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
Tags that span just the start or end of the extent, but not both, are |
|
1416
|
|
|
|
|
|
|
truncated, so as to remove the part of the tag applied on the modified extent |
|
1417
|
|
|
|
|
|
|
but preserving that applied outside. |
|
1418
|
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
If C<$newstr> is a C object, then its tags will be applied to |
|
1420
|
|
|
|
|
|
|
C<$st> as appropriate. Edge-anchored tags in C<$newstr> will not be extended |
|
1421
|
|
|
|
|
|
|
through C<$st>, though they will apply as edge-anchored if they now sit at the |
|
1422
|
|
|
|
|
|
|
edge of the new string. |
|
1423
|
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
=cut |
|
1425
|
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
sub set_substr |
|
1427
|
|
|
|
|
|
|
{ |
|
1428
|
56
|
|
|
56
|
1
|
14821
|
my $self = shift; |
|
1429
|
56
|
|
|
|
|
125
|
my ( $start, $len, $new ) = @_; |
|
1430
|
|
|
|
|
|
|
|
|
1431
|
56
|
|
|
|
|
134
|
my $limit = $self->length; |
|
1432
|
|
|
|
|
|
|
|
|
1433
|
56
|
50
|
|
|
|
182
|
$start = $limit if $start > $limit; |
|
1434
|
56
|
50
|
|
|
|
163
|
$len = ( $limit - $start ) if $len > ( $limit - $start ); |
|
1435
|
|
|
|
|
|
|
|
|
1436
|
56
|
|
|
|
|
208
|
CORE::substr( $self->{str}, $start, $len ) = $new; |
|
1437
|
|
|
|
|
|
|
|
|
1438
|
56
|
|
|
|
|
105
|
my $oldend = $start + $len; |
|
1439
|
56
|
|
|
|
|
107
|
my $newend = $start + CORE::length( $new ); |
|
1440
|
|
|
|
|
|
|
|
|
1441
|
56
|
|
|
|
|
88
|
my $delta = $newend - $oldend; |
|
1442
|
|
|
|
|
|
|
# Positions after $oldend have now moved up $delta places |
|
1443
|
|
|
|
|
|
|
|
|
1444
|
56
|
|
|
|
|
93
|
my $tags = $self->{tags}; |
|
1445
|
|
|
|
|
|
|
|
|
1446
|
56
|
|
|
|
|
87
|
my $i = 0; |
|
1447
|
|
|
|
|
|
|
|
|
1448
|
56
|
|
|
|
|
159
|
for( ; $i < @$tags; $i++ ) { |
|
1449
|
|
|
|
|
|
|
# In this loop we'll handle tags that start before the deleted section |
|
1450
|
|
|
|
|
|
|
|
|
1451
|
40
|
|
|
|
|
68
|
my $t = $tags->[$i]; |
|
1452
|
40
|
|
|
|
|
132
|
my ( $ts, $te, undef, undef, $tf ) = @$t; |
|
1453
|
|
|
|
|
|
|
|
|
1454
|
40
|
100
|
100
|
|
|
202
|
last if $ts >= $start and not( $tf & FLAG_ANCHOR_BEFORE ); |
|
1455
|
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
# Two cases: |
|
1457
|
|
|
|
|
|
|
# A: Tag spans entirely outside deleted section - stretch/compress it |
|
1458
|
|
|
|
|
|
|
# We may have to collapse it to nothing, so delete it |
|
1459
|
|
|
|
|
|
|
# B: Tag starts before but ends within deleted section - truncate it |
|
1460
|
|
|
|
|
|
|
# Plus a case we don't care about |
|
1461
|
|
|
|
|
|
|
# Tag starts and ends entirely before the deleted section - ignore it |
|
1462
|
|
|
|
|
|
|
|
|
1463
|
28
|
100
|
100
|
|
|
219
|
if( $te > $oldend or |
|
|
|
100
|
100
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
( $te == $oldend and $tf & FLAG_ANCHOR_AFTER ) ) { |
|
1465
|
|
|
|
|
|
|
# Case A |
|
1466
|
14
|
|
|
|
|
54
|
$t->[1] += $delta; |
|
1467
|
|
|
|
|
|
|
|
|
1468
|
14
|
50
|
|
|
|
65
|
if( $t->[0] == $t->[1] ) { |
|
1469
|
0
|
|
|
|
|
0
|
splice @$tags, $i, 1, (); |
|
1470
|
0
|
|
|
|
|
0
|
$i--; |
|
1471
|
0
|
|
|
|
|
0
|
next; |
|
1472
|
|
|
|
|
|
|
} |
|
1473
|
|
|
|
|
|
|
} |
|
1474
|
|
|
|
|
|
|
elsif( $te > $start ) { |
|
1475
|
|
|
|
|
|
|
# Case B |
|
1476
|
1
|
|
|
|
|
3
|
$t->[1] = $start; |
|
1477
|
|
|
|
|
|
|
} |
|
1478
|
|
|
|
|
|
|
} |
|
1479
|
|
|
|
|
|
|
|
|
1480
|
56
|
|
|
|
|
176
|
for( ; $i < @$tags; $i++ ) { |
|
1481
|
13
|
|
|
|
|
63
|
my $t = $tags->[$i]; |
|
1482
|
13
|
|
|
|
|
39
|
my ( $ts, $te ) = @$t; |
|
1483
|
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
# In this loop we'll handle tags that start within the deleted section |
|
1485
|
13
|
100
|
|
|
|
52
|
last if $ts >= $oldend; |
|
1486
|
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
# Two cases |
|
1488
|
|
|
|
|
|
|
# C: Tag contained entirely within deleted section - delete it |
|
1489
|
|
|
|
|
|
|
# D: Tag starts within but ends after the deleted section - truncate it |
|
1490
|
|
|
|
|
|
|
|
|
1491
|
3
|
100
|
|
|
|
16
|
if( $te <= $oldend ) { |
|
1492
|
|
|
|
|
|
|
# Case C |
|
1493
|
2
|
|
|
|
|
7
|
splice @$tags, $i, 1; |
|
1494
|
2
|
|
|
|
|
3
|
$i--; |
|
1495
|
2
|
|
|
|
|
7
|
next; |
|
1496
|
|
|
|
|
|
|
} |
|
1497
|
|
|
|
|
|
|
else { |
|
1498
|
|
|
|
|
|
|
# Case D |
|
1499
|
1
|
|
|
|
|
3
|
$t->[0] = $newend; |
|
1500
|
1
|
|
|
|
|
3
|
$t->[1] += $delta; |
|
1501
|
|
|
|
|
|
|
} |
|
1502
|
|
|
|
|
|
|
} |
|
1503
|
|
|
|
|
|
|
|
|
1504
|
56
|
|
|
|
|
139
|
for( ; $i < @$tags; $i++ ) { |
|
1505
|
12
|
|
|
|
|
25
|
my $t = $tags->[$i]; |
|
1506
|
12
|
|
|
|
|
55
|
my ( $ts, $te, undef, undef, $tf ) = @$t; |
|
1507
|
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
# In this loop we'll handle tags that start after the deleted section |
|
1509
|
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
# One case |
|
1511
|
|
|
|
|
|
|
# E: Tag starts and ends after the deleted section - move it |
|
1512
|
12
|
100
|
|
|
|
62
|
$t->[0] += $delta unless $tf & FLAG_ANCHOR_BEFORE; |
|
1513
|
12
|
|
|
|
|
47
|
$t->[1] += $delta; |
|
1514
|
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
# If we've not moved the start (because it was FLAG_ANCHOR_BEFORE), we |
|
1516
|
|
|
|
|
|
|
# might now have an ordering constraint violation. Better fix it. |
|
1517
|
12
|
|
|
|
|
47
|
local $b = $t; |
|
1518
|
12
|
|
|
|
|
51
|
foreach my $new_i ( reverse 0 .. $i-1 ) { |
|
1519
|
7
|
|
|
|
|
14
|
local $a = $tags->[$new_i]; |
|
1520
|
|
|
|
|
|
|
|
|
1521
|
7
|
100
|
|
|
|
27
|
last if _cmp_tags() <= 0; |
|
1522
|
|
|
|
|
|
|
|
|
1523
|
1
|
|
|
|
|
6
|
splice @$tags, $new_i, 0, splice @$tags, $i, 1, (); |
|
1524
|
|
|
|
|
|
|
|
|
1525
|
1
|
|
|
|
|
3
|
last; |
|
1526
|
|
|
|
|
|
|
} |
|
1527
|
|
|
|
|
|
|
} |
|
1528
|
|
|
|
|
|
|
|
|
1529
|
56
|
100
|
|
|
|
115
|
if( is_string_tagged( $new ) ) { |
|
1530
|
21
|
|
|
|
|
67
|
my $atstart = $start == 0; |
|
1531
|
21
|
|
|
|
|
56
|
my $atend = $newend == $self->length; |
|
1532
|
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
$new->iter_extents( sub { |
|
1534
|
21
|
|
|
21
|
|
50
|
my ( $e, $tn, $tv ) = @_; |
|
1535
|
21
|
50
|
66
|
|
|
126
|
$self->apply_tag( |
|
|
|
100
|
100
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
( $atstart && $e->anchor_before ) ? -1 : $start + $e->start, |
|
1537
|
|
|
|
|
|
|
( $atend && $e->anchor_after ) ? -1 : $e->length, |
|
1538
|
|
|
|
|
|
|
$tn, $tv ); |
|
1539
|
21
|
|
|
|
|
253
|
} ); |
|
1540
|
|
|
|
|
|
|
} |
|
1541
|
|
|
|
|
|
|
|
|
1542
|
56
|
|
|
|
|
163
|
$self->_assert_sorted if DEBUG; |
|
1543
|
|
|
|
|
|
|
|
|
1544
|
56
|
|
|
|
|
222
|
return $self; |
|
1545
|
|
|
|
|
|
|
} |
|
1546
|
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
=head2 insert |
|
1548
|
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
$st->insert( $start, $newstr ) |
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
Insert the given string at the given position. A shortcut around |
|
1552
|
|
|
|
|
|
|
C. |
|
1553
|
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
If C<$newstr> is a C object, then its tags will be applied to |
|
1555
|
|
|
|
|
|
|
C<$st> as appropriate. If C<$start> is 0, any before-anchored tags in will |
|
1556
|
|
|
|
|
|
|
become before-anchored in C<$st>. |
|
1557
|
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
=cut |
|
1559
|
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
sub insert |
|
1561
|
|
|
|
|
|
|
{ |
|
1562
|
15
|
|
|
15
|
1
|
7863
|
my $self = shift; |
|
1563
|
15
|
|
|
|
|
47
|
my ( $at, $new ) = @_; |
|
1564
|
15
|
|
|
|
|
78
|
$self->set_substr( $at, 0, $new ); |
|
1565
|
|
|
|
|
|
|
} |
|
1566
|
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
=head2 append |
|
1568
|
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
$st->append( $newstr ) |
|
1570
|
|
|
|
|
|
|
|
|
1571
|
|
|
|
|
|
|
$st .= $newstr |
|
1572
|
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
Append to the underlying plain string. A shortcut around C. |
|
1574
|
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
If C<$newstr> is a C object, then its tags will be applied to |
|
1576
|
|
|
|
|
|
|
C<$st> as appropriate. Any after-anchored tags in will become after-anchored |
|
1577
|
|
|
|
|
|
|
in C<$st>. |
|
1578
|
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
=cut |
|
1580
|
|
|
|
|
|
|
|
|
1581
|
20
|
|
|
20
|
|
81524
|
use overload '.=' => 'append'; |
|
|
20
|
|
|
|
|
51
|
|
|
|
20
|
|
|
|
|
139
|
|
|
1582
|
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
sub append |
|
1584
|
|
|
|
|
|
|
{ |
|
1585
|
66
|
|
|
66
|
1
|
1465
|
my $self = shift; |
|
1586
|
66
|
|
|
|
|
143
|
my ( $new ) = @_; |
|
1587
|
|
|
|
|
|
|
|
|
1588
|
66
|
100
|
|
|
|
131
|
return $self->set_substr( $self->length, 0, $new ) if is_string_tagged( $new ); |
|
1589
|
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
# Optimised version |
|
1591
|
48
|
|
|
|
|
146
|
$self->{str} .= $new; |
|
1592
|
|
|
|
|
|
|
|
|
1593
|
48
|
|
|
|
|
119
|
my $newend = $self->length; |
|
1594
|
|
|
|
|
|
|
|
|
1595
|
48
|
|
|
|
|
83
|
my $tags = $self->{tags}; |
|
1596
|
|
|
|
|
|
|
|
|
1597
|
48
|
|
|
|
|
88
|
my $i = 0; |
|
1598
|
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
# Adjust boundaries of ANCHOR_AFTER tags |
|
1600
|
48
|
|
|
|
|
123
|
for( ; $i < @$tags; $i++ ) { |
|
1601
|
58
|
|
|
|
|
98
|
my $t = $tags->[$i]; |
|
1602
|
58
|
100
|
|
|
|
177
|
$t->[1] = $newend if $t->[4] & FLAG_ANCHOR_AFTER; |
|
1603
|
|
|
|
|
|
|
} |
|
1604
|
|
|
|
|
|
|
|
|
1605
|
48
|
|
|
|
|
150
|
return $self; |
|
1606
|
|
|
|
|
|
|
} |
|
1607
|
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
=head2 append_tagged |
|
1609
|
|
|
|
|
|
|
|
|
1610
|
|
|
|
|
|
|
$st->append_tagged( $newstr, %tags ) |
|
1611
|
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
Append to the underlying plain string, and apply the given tags to the |
|
1613
|
|
|
|
|
|
|
newly-inserted extent. |
|
1614
|
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
Returns C<$st> itself so that the method may be easily chained. |
|
1616
|
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
=cut |
|
1618
|
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
sub append_tagged |
|
1620
|
|
|
|
|
|
|
{ |
|
1621
|
19
|
|
|
19
|
1
|
52
|
my $self = shift; |
|
1622
|
19
|
|
|
|
|
59
|
my ( $new, %tags ) = @_; |
|
1623
|
|
|
|
|
|
|
|
|
1624
|
19
|
|
|
|
|
57
|
my $start = $self->length; |
|
1625
|
19
|
|
|
|
|
79
|
my $len = CORE::length( $new ); |
|
1626
|
|
|
|
|
|
|
|
|
1627
|
19
|
|
|
|
|
59
|
$self->append( $new ); |
|
1628
|
19
|
|
|
|
|
98
|
$self->apply_tag( $start, $len, $_, $tags{$_} ) for keys %tags; |
|
1629
|
|
|
|
|
|
|
|
|
1630
|
19
|
|
|
|
|
96
|
return $self; |
|
1631
|
|
|
|
|
|
|
} |
|
1632
|
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
=head2 concat |
|
1634
|
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
$ret = $st->concat( $other ) |
|
1636
|
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
$ret = $st . $other |
|
1638
|
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
Returns a new C containing the two strings concatenated |
|
1640
|
|
|
|
|
|
|
together, preserving any tags present. This method overloads normal string |
|
1641
|
|
|
|
|
|
|
concatenation operator, so expressions involving C values |
|
1642
|
|
|
|
|
|
|
retain their tags. |
|
1643
|
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
This method or operator tries to respect subclassing; preferring to return a |
|
1645
|
|
|
|
|
|
|
new object of a subclass if either argument or operand is a subclass of |
|
1646
|
|
|
|
|
|
|
C. If they are both subclasses, it will prefer the type of the |
|
1647
|
|
|
|
|
|
|
invocant or first operand. |
|
1648
|
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
=cut |
|
1650
|
|
|
|
|
|
|
|
|
1651
|
20
|
|
|
20
|
|
6292
|
use overload '.' => 'concat'; |
|
|
20
|
|
|
|
|
93
|
|
|
|
20
|
|
|
|
|
127
|
|
|
1652
|
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
sub concat |
|
1654
|
|
|
|
|
|
|
{ |
|
1655
|
17
|
|
|
17
|
1
|
8889
|
my $self = shift; |
|
1656
|
17
|
|
|
|
|
54
|
my ( $other, $swap ) = @_; |
|
1657
|
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
# Try to find the "higher" subclass |
|
1659
|
17
|
100
|
100
|
|
|
117
|
my $class = ( ref $self eq __PACKAGE__ and is_string_tagged( $other ) ) |
|
1660
|
|
|
|
|
|
|
? ref $other : ref $self; |
|
1661
|
|
|
|
|
|
|
|
|
1662
|
17
|
|
|
|
|
87
|
my $ret = $class->new( $self ); |
|
1663
|
17
|
100
|
|
|
|
118
|
return $ret->insert( 0, $other ) if $swap; |
|
1664
|
6
|
|
|
|
|
24
|
return $ret->append( $other ); |
|
1665
|
|
|
|
|
|
|
} |
|
1666
|
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
=head2 matches |
|
1668
|
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
@subs = $st->matches( $regexp ) |
|
1670
|
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
Returns a list of substrings (as C instances) for every |
|
1672
|
|
|
|
|
|
|
non-overlapping match of the given C<$regexp>. |
|
1673
|
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
This could be used, for example, to build a formatted string from a formatted |
|
1675
|
|
|
|
|
|
|
template containing variable expansions: |
|
1676
|
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
my $template = ... |
|
1678
|
|
|
|
|
|
|
my %vars = ... |
|
1679
|
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
my $ret = String::Tagged->new; |
|
1681
|
|
|
|
|
|
|
foreach my $m ( $template->matches( qr/\$\w+|[^$]+/ ) ) { |
|
1682
|
|
|
|
|
|
|
if( $m =~ m/^\$(\w+)$/ ) { |
|
1683
|
|
|
|
|
|
|
$ret->append_tagged( $vars{$1}, %{ $m->get_tags_at( 0 ) } ); |
|
1684
|
|
|
|
|
|
|
} |
|
1685
|
|
|
|
|
|
|
else { |
|
1686
|
|
|
|
|
|
|
$ret->append( $m ); |
|
1687
|
|
|
|
|
|
|
} |
|
1688
|
|
|
|
|
|
|
} |
|
1689
|
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
This iterates segments of the template containing variables expansions |
|
1691
|
|
|
|
|
|
|
starting with a C<$> symbol, and replaces them with values from the C<%vars> |
|
1692
|
|
|
|
|
|
|
hash, careful to preserve all the formatting tags from the original template |
|
1693
|
|
|
|
|
|
|
string. |
|
1694
|
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
=cut |
|
1696
|
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
sub matches |
|
1698
|
|
|
|
|
|
|
{ |
|
1699
|
1
|
|
|
1
|
1
|
9
|
my $self = shift; |
|
1700
|
1
|
|
|
|
|
3
|
my ( $re ) = @_; |
|
1701
|
|
|
|
|
|
|
|
|
1702
|
1
|
|
|
|
|
3
|
my $plain = $self->str; |
|
1703
|
|
|
|
|
|
|
|
|
1704
|
1
|
|
|
|
|
2
|
my @ret; |
|
1705
|
1
|
|
|
|
|
9
|
while( $plain =~ m/$re/g ) { |
|
1706
|
5
|
|
|
|
|
24
|
push @ret, $self->substr( $-[0], $+[0] - $-[0] ); |
|
1707
|
|
|
|
|
|
|
} |
|
1708
|
|
|
|
|
|
|
|
|
1709
|
1
|
|
|
|
|
6
|
return @ret; |
|
1710
|
|
|
|
|
|
|
} |
|
1711
|
|
|
|
|
|
|
|
|
1712
|
|
|
|
|
|
|
=head2 match_extents |
|
1713
|
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
@extents = $st->match_extents( $regexp ) |
|
1715
|
|
|
|
|
|
|
|
|
1716
|
|
|
|
|
|
|
I |
|
1717
|
|
|
|
|
|
|
|
|
1718
|
|
|
|
|
|
|
Returns a list of extent objects for every non-overlapping match of the given |
|
1719
|
|
|
|
|
|
|
C<$regexp>. This is similar to L, except that the results are |
|
1720
|
|
|
|
|
|
|
returned as extent objects instead of substrings, allowing access to the |
|
1721
|
|
|
|
|
|
|
position information as well. |
|
1722
|
|
|
|
|
|
|
|
|
1723
|
|
|
|
|
|
|
If using the result of this method to find regions of a string to modify, |
|
1724
|
|
|
|
|
|
|
remember that any length alterations will not update positions in later extent |
|
1725
|
|
|
|
|
|
|
objects. However, since the extents are non-overlapping and in position order, |
|
1726
|
|
|
|
|
|
|
this can be handled by iterating them in reverse order so that the |
|
1727
|
|
|
|
|
|
|
modifications done first are later in the string. |
|
1728
|
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
foreach my $e ( reverse $st->match_extents( $pattern ) ) { |
|
1730
|
|
|
|
|
|
|
$st->set_substr( $e->start, $e->length, $replacement ); |
|
1731
|
|
|
|
|
|
|
} |
|
1732
|
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
=cut |
|
1734
|
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
sub match_extents |
|
1736
|
|
|
|
|
|
|
{ |
|
1737
|
2
|
|
|
2
|
1
|
9287
|
my $self = shift; |
|
1738
|
2
|
|
|
|
|
5
|
my ( $re ) = @_; |
|
1739
|
|
|
|
|
|
|
|
|
1740
|
2
|
|
|
|
|
5
|
my $plain = $self->str; |
|
1741
|
|
|
|
|
|
|
|
|
1742
|
2
|
|
|
|
|
4
|
my @ret; |
|
1743
|
2
|
|
|
|
|
29
|
while( $plain =~ m/$re/g ) { |
|
1744
|
9
|
|
|
|
|
31
|
push @ret, $self->_mkextent( $-[0], $+[0], 0 ); |
|
1745
|
|
|
|
|
|
|
} |
|
1746
|
|
|
|
|
|
|
|
|
1747
|
2
|
|
|
|
|
9
|
return @ret; |
|
1748
|
|
|
|
|
|
|
} |
|
1749
|
|
|
|
|
|
|
|
|
1750
|
|
|
|
|
|
|
=head2 split |
|
1751
|
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
@parts = $st->split( $regexp, $limit ) |
|
1753
|
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
Returns a list of substrings by applying the regexp to the string content; |
|
1755
|
|
|
|
|
|
|
similar to the core perl C function. If C<$limit> is supplied, the |
|
1756
|
|
|
|
|
|
|
method will stop at that number of elements, returning the entire remainder of |
|
1757
|
|
|
|
|
|
|
the input string as the final element. If the C<$regexp> contains a capture |
|
1758
|
|
|
|
|
|
|
group then the content of the first one will be added to the return list as |
|
1759
|
|
|
|
|
|
|
well. |
|
1760
|
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
=cut |
|
1762
|
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
sub split |
|
1764
|
|
|
|
|
|
|
{ |
|
1765
|
4
|
|
|
4
|
1
|
34
|
my $self = shift; |
|
1766
|
4
|
|
|
|
|
8
|
my ( $re, $limit ) = @_; |
|
1767
|
|
|
|
|
|
|
|
|
1768
|
4
|
|
|
|
|
7
|
my $plain = $self->str; |
|
1769
|
|
|
|
|
|
|
|
|
1770
|
4
|
|
|
|
|
7
|
my $prev = 0; |
|
1771
|
4
|
|
|
|
|
9
|
my @ret; |
|
1772
|
4
|
|
|
|
|
43
|
while( $plain =~ m/$re/g ) { |
|
1773
|
5
|
|
|
|
|
29
|
push @ret, $self->substr( $prev, $-[0]-$prev ); |
|
1774
|
5
|
100
|
|
|
|
28
|
push @ret, $self->substr( $-[1], $+[1]-$-[1] ) if @- > 1; |
|
1775
|
|
|
|
|
|
|
|
|
1776
|
5
|
|
|
|
|
13
|
$prev = $+[0]; |
|
1777
|
|
|
|
|
|
|
|
|
1778
|
5
|
100
|
66
|
|
|
32
|
last if defined $limit and @ret == $limit-1; |
|
1779
|
|
|
|
|
|
|
} |
|
1780
|
|
|
|
|
|
|
|
|
1781
|
4
|
100
|
|
|
|
13
|
if( CORE::length $plain > $prev ) { |
|
1782
|
3
|
|
|
|
|
8
|
push @ret, $self->substr( $prev, CORE::length( $plain ) - $prev ); |
|
1783
|
|
|
|
|
|
|
} |
|
1784
|
|
|
|
|
|
|
|
|
1785
|
4
|
|
|
|
|
17
|
return @ret; |
|
1786
|
|
|
|
|
|
|
} |
|
1787
|
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
=head2 sprintf |
|
1789
|
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
$ret = $st->sprintf( @args ) |
|
1791
|
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
I |
|
1793
|
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
Returns a new string by using the given instance as the format string for a |
|
1795
|
|
|
|
|
|
|
L constructor call. The returned instance will be of the same |
|
1796
|
|
|
|
|
|
|
class as the invocant. |
|
1797
|
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
=cut |
|
1799
|
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
sub sprintf |
|
1801
|
|
|
|
|
|
|
{ |
|
1802
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
1803
|
|
|
|
|
|
|
|
|
1804
|
1
|
|
|
|
|
4
|
return ( ref $self )->from_sprintf( $self, @_ ); |
|
1805
|
|
|
|
|
|
|
} |
|
1806
|
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
=head2 debug_sprintf |
|
1808
|
|
|
|
|
|
|
|
|
1809
|
|
|
|
|
|
|
$ret = $st->debug_sprintf |
|
1810
|
|
|
|
|
|
|
|
|
1811
|
|
|
|
|
|
|
Returns a representation of the string data and all the tags, suitable for |
|
1812
|
|
|
|
|
|
|
debug printing or other similar use. This is a format such as is given in the |
|
1813
|
|
|
|
|
|
|
DESCRIPTION section above. |
|
1814
|
|
|
|
|
|
|
|
|
1815
|
|
|
|
|
|
|
The output will consist of a number of lines, the first containing the plain |
|
1816
|
|
|
|
|
|
|
underlying string, then one line per tag. The line shows the extent of the tag |
|
1817
|
|
|
|
|
|
|
given by C<[---]> markers, or a C<|> in the special case of a tag covering |
|
1818
|
|
|
|
|
|
|
only a single character. Special markings of C> and C> indicate |
|
1819
|
|
|
|
|
|
|
tags which are "before" or "after" anchored. |
|
1820
|
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
For example: |
|
1822
|
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
Hello, world |
|
1824
|
|
|
|
|
|
|
[---] word => 1 |
|
1825
|
|
|
|
|
|
|
<[----------]> everywhere => 1 |
|
1826
|
|
|
|
|
|
|
| space => 1 |
|
1827
|
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
=cut |
|
1829
|
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
sub debug_sprintf |
|
1831
|
|
|
|
|
|
|
{ |
|
1832
|
5
|
|
|
5
|
1
|
777
|
my $self = shift; |
|
1833
|
|
|
|
|
|
|
|
|
1834
|
5
|
|
|
|
|
18
|
my $str = $self->str; |
|
1835
|
5
|
|
|
|
|
11
|
my $len = CORE::length( $str ); |
|
1836
|
|
|
|
|
|
|
|
|
1837
|
5
|
|
|
|
|
9
|
my $maxnamelen = 0; |
|
1838
|
|
|
|
|
|
|
|
|
1839
|
5
|
|
|
|
|
23
|
my $ret = " " . ( $str =~ s/\n/./gr ) . "\n"; |
|
1840
|
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
$self->iter_tags( sub { |
|
1842
|
7
|
|
|
7
|
|
20
|
my ( undef, undef, $name, undef ) = @_; |
|
1843
|
7
|
100
|
|
|
|
24
|
CORE::length( $name ) > $maxnamelen and $maxnamelen = CORE::length( $name ); |
|
1844
|
5
|
|
|
|
|
32
|
} ); |
|
1845
|
|
|
|
|
|
|
|
|
1846
|
5
|
|
|
|
|
29
|
foreach my $t ( @{ $self->{tags} } ) { |
|
|
5
|
|
|
|
|
20
|
|
|
1847
|
7
|
|
|
|
|
30
|
my ( $ts, $te, $tn, $tv, $tf ) = @$t; |
|
1848
|
|
|
|
|
|
|
|
|
1849
|
7
|
100
|
|
|
|
26
|
$ret .= ( $tf & FLAG_ANCHOR_BEFORE ) ? " <" : " "; |
|
1850
|
|
|
|
|
|
|
|
|
1851
|
7
|
|
|
|
|
17
|
$ret .= " " x $ts; |
|
1852
|
|
|
|
|
|
|
|
|
1853
|
7
|
|
|
|
|
10
|
my $tl = $te - $ts; |
|
1854
|
|
|
|
|
|
|
|
|
1855
|
7
|
100
|
|
|
|
22
|
if( $tl == 0 ) { |
|
|
|
100
|
|
|
|
|
|
|
1856
|
1
|
|
|
|
|
9
|
$ret =~ s/ $/>; |
|
1857
|
1
|
|
|
|
|
2
|
$te++; # account for extra printed width |
|
1858
|
|
|
|
|
|
|
} |
|
1859
|
|
|
|
|
|
|
elsif( $tl == 1 ) { |
|
1860
|
2
|
|
|
|
|
4
|
$ret .= "|"; |
|
1861
|
|
|
|
|
|
|
} |
|
1862
|
|
|
|
|
|
|
else { |
|
1863
|
4
|
|
|
|
|
11
|
$ret .= "[" . ( "-" x ( $tl - 2 ) ) . "]"; |
|
1864
|
|
|
|
|
|
|
} |
|
1865
|
|
|
|
|
|
|
|
|
1866
|
7
|
|
|
|
|
14
|
$ret .= " " x ( $len - $te ); |
|
1867
|
|
|
|
|
|
|
|
|
1868
|
7
|
100
|
|
|
|
16
|
$ret .= ( $tf & FLAG_ANCHOR_AFTER ) ? "> " : " "; |
|
1869
|
|
|
|
|
|
|
|
|
1870
|
7
|
|
|
|
|
34
|
$ret .= CORE::sprintf "%-*s => %s\n", $maxnamelen, $tn, $tv; |
|
1871
|
|
|
|
|
|
|
} |
|
1872
|
|
|
|
|
|
|
|
|
1873
|
5
|
|
|
|
|
26
|
return $ret; |
|
1874
|
|
|
|
|
|
|
} |
|
1875
|
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
=head1 TODO |
|
1877
|
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
=over 4 |
|
1879
|
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
=item * |
|
1881
|
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
There are likely variations on the rules for C that could equally |
|
1883
|
|
|
|
|
|
|
apply to some uses of tagged strings. Consider whether the behaviour of |
|
1884
|
|
|
|
|
|
|
modification is chosen per-method, per-tag, or per-string. |
|
1885
|
|
|
|
|
|
|
|
|
1886
|
|
|
|
|
|
|
=item * |
|
1887
|
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
Consider how to implement a clone from one tag format to another which wants |
|
1889
|
|
|
|
|
|
|
to merge multiple different source tags together into a single new one. |
|
1890
|
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
=back |
|
1892
|
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
=head1 AUTHOR |
|
1894
|
|
|
|
|
|
|
|
|
1895
|
|
|
|
|
|
|
Paul Evans |
|
1896
|
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
=cut |
|
1898
|
|
|
|
|
|
|
|
|
1899
|
|
|
|
|
|
|
0x55AA; |