line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# You may distribute under the terms of either the GNU General Public License |
2
|
|
|
|
|
|
|
# or the Artistic License (the same terms as Perl itself) |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# (C) Paul Evans, 2008-2023 -- leonerd@leonerd.org.uk |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package String::Tagged 0.22; |
7
|
|
|
|
|
|
|
|
8
|
20
|
|
|
20
|
|
4688169
|
use v5.14; |
|
20
|
|
|
|
|
188
|
|
9
|
20
|
|
|
20
|
|
118
|
use warnings; |
|
20
|
|
|
|
|
38
|
|
|
20
|
|
|
|
|
653
|
|
10
|
|
|
|
|
|
|
|
11
|
20
|
|
|
20
|
|
114
|
use Scalar::Util qw( blessed ); |
|
20
|
|
|
|
|
36
|
|
|
20
|
|
|
|
|
1336
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
require String::Tagged::Extent; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use constant { |
16
|
20
|
|
|
|
|
1947
|
FLAG_ANCHOR_BEFORE => 0x01, |
17
|
|
|
|
|
|
|
FLAG_ANCHOR_AFTER => 0x02, |
18
|
|
|
|
|
|
|
FLAG_ITERATING => 0x04, |
19
|
|
|
|
|
|
|
FLAG_DELETED => 0x08, |
20
|
20
|
|
|
20
|
|
134
|
}; |
|
20
|
|
|
|
|
37
|
|
21
|
|
|
|
|
|
|
|
22
|
20
|
|
|
20
|
|
144
|
use constant DEBUG => 0; |
|
20
|
|
|
|
|
49
|
|
|
20
|
|
|
|
|
1080
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# Since we're providing overloading, we should set fallback by default |
25
|
20
|
|
|
20
|
|
126
|
use overload fallback => 1; |
|
20
|
|
|
|
|
38
|
|
|
20
|
|
|
|
|
171
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=head1 NAME |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
C - string buffers with value tags on extents |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 SYNOPSIS |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
use String::Tagged; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my $st = String::Tagged->new( "An important message" ); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
$st->apply_tag( 3, 9, bold => 1 ); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
$st->iter_substr_nooverlap( |
40
|
|
|
|
|
|
|
sub { |
41
|
|
|
|
|
|
|
my ( $substring, %tags ) = @_; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
print $tags{bold} ? "$substring" |
44
|
|
|
|
|
|
|
: $substring; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
This module implements an object class, instances of which store a (mutable) |
51
|
|
|
|
|
|
|
string buffer that supports tags. A tag is a name/value pair that applies to |
52
|
|
|
|
|
|
|
some extent of the underlying string. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
The types of tag names ought to be strings, or at least values that are |
55
|
|
|
|
|
|
|
well-behaved as strings, as the names will often be used as the keys in hashes |
56
|
|
|
|
|
|
|
or applied to the C operator. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
The types of tag values are not restricted - any scalar will do. This could be |
59
|
|
|
|
|
|
|
a simple integer or string, ARRAY or HASH reference, or even a CODE reference |
60
|
|
|
|
|
|
|
containing an event handler of some kind. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Tags may be arbitrarily overlapped. Any given offset within the string has in |
63
|
|
|
|
|
|
|
effect, a set of uniquely named tags. Tags of different names are independent. |
64
|
|
|
|
|
|
|
For tags of the same name, only the latest, shortest tag takes effect. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
For example, consider a string with three tags represented here: |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Here is my string with tags |
69
|
|
|
|
|
|
|
[-------------------------] foo => 1 |
70
|
|
|
|
|
|
|
[-------] foo => 2 |
71
|
|
|
|
|
|
|
[---] bar => 3 |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Every character in this string has a tag named C. The value of this tag |
74
|
|
|
|
|
|
|
is 2 for the words C and C and the space inbetween, and 1 |
75
|
|
|
|
|
|
|
elsewhere. Additionally, the words C and C and the space between them |
76
|
|
|
|
|
|
|
also have the tag C with a value 3. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Since C does not understand the significance of the tag values |
79
|
|
|
|
|
|
|
it therefore cannot detect if two neighbouring tags really contain the same |
80
|
|
|
|
|
|
|
semantic idea. Consider the following string: |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
A string with words |
83
|
|
|
|
|
|
|
[-------] type => "message" |
84
|
|
|
|
|
|
|
[--------] type => "message" |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
This string contains two tags. C will treat this as two |
87
|
|
|
|
|
|
|
different tag values as far as C is concerned, even |
88
|
|
|
|
|
|
|
though C yields the same value for the C tag at any position |
89
|
|
|
|
|
|
|
in the string. The C method may be used to merge tag extents of |
90
|
|
|
|
|
|
|
tags that should be considered as equal. |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head1 NAMING |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
I spent a lot of time considering the name for this module. It seems that a |
95
|
|
|
|
|
|
|
number of people across a number of languages all created similar |
96
|
|
|
|
|
|
|
functionality, though named very differently. For the benefit of |
97
|
|
|
|
|
|
|
keyword-based search tools and similar, here's a list of some other names this |
98
|
|
|
|
|
|
|
sort of object might be known by: |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=over 4 |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item * |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Extents |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=item * |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Overlays |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=item * |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Attribute or attributed strings |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=item * |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Markup |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item * |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Out-of-band data |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=back |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=cut |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
*is_string_tagged = |
127
|
|
|
|
|
|
|
# It would be nice if we could #ifdef HAVE_PERL_VERSION(...) |
128
|
|
|
|
|
|
|
( $] >= 5.034 ) ? |
129
|
|
|
|
|
|
|
do { eval 'use experimental "isa"; sub { $_[0] isa __PACKAGE__ }' // die $@ } : |
130
|
288
|
100
|
|
288
|
|
1536
|
do { sub { blessed $_[0] and $_[0]->isa( __PACKAGE__ ) } }; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head2 new |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
$st = String::Tagged->new( $str ) |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Returns a new instance of a C object. It will contain no tags. |
141
|
|
|
|
|
|
|
If the optional C<$str> argument is supplied, the string buffer will be |
142
|
|
|
|
|
|
|
initialised from this value. |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
If C<$str> is a C object then it will be cloned, as if calling |
145
|
|
|
|
|
|
|
the C method on it. |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub new |
150
|
|
|
|
|
|
|
{ |
151
|
137
|
|
|
137
|
1
|
25527
|
my $class = shift; |
152
|
137
|
|
|
|
|
341
|
my ( $str ) = @_; |
153
|
|
|
|
|
|
|
|
154
|
137
|
100
|
|
|
|
318
|
return $class->clone( $str ) if is_string_tagged( $str ); |
155
|
|
|
|
|
|
|
|
156
|
117
|
100
|
|
|
|
325
|
$str = "" unless defined $str; |
157
|
|
|
|
|
|
|
|
158
|
117
|
|
|
|
|
615
|
return bless { |
159
|
|
|
|
|
|
|
str => "$str", |
160
|
|
|
|
|
|
|
tags => [], |
161
|
|
|
|
|
|
|
}, $class; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head2 new_tagged |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
$st = String::Tagged->new_tagged( $str, %tags ) |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Shortcut for creating a new C object with the given tags |
169
|
|
|
|
|
|
|
applied to the entire length. The tags will not be anchored at either end. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub new_tagged |
174
|
|
|
|
|
|
|
{ |
175
|
8
|
|
|
8
|
1
|
1676
|
my $class = shift; |
176
|
8
|
|
|
|
|
28
|
my ( $str, %tags ) = @_; |
177
|
|
|
|
|
|
|
|
178
|
8
|
|
|
|
|
24
|
my $self = $class->new( $str ); |
179
|
|
|
|
|
|
|
|
180
|
8
|
|
|
|
|
25
|
my $length = $self->length; |
181
|
8
|
|
|
|
|
43
|
$self->apply_tag( 0, $length, $_ => $tags{$_} ) for keys %tags; |
182
|
|
|
|
|
|
|
|
183
|
8
|
|
|
|
|
31
|
return $self; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=head2 clone (class) |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
$new = String::Tagged->clone( $orig, %opts ) |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Returns a new instance of C made by cloning the original, |
191
|
|
|
|
|
|
|
subject to the options provided. The returned instance will be in the |
192
|
|
|
|
|
|
|
requested class, which need not match the class of the original. |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
The following options are recognised: |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=over 4 |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item only_tags => ARRAY |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
If present, gives an ARRAY reference containing tag names. Only those tags |
201
|
|
|
|
|
|
|
named here will be copied; others will be ignored. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=item except_tags => ARRAY |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
If present, gives an ARRAY reference containing tag names. All tags will be |
206
|
|
|
|
|
|
|
copied except those named here. |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=item convert_tags => HASH |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
If present, gives a HASH reference containing tag conversion functions. For |
211
|
|
|
|
|
|
|
any tags in the original to be copied whose names appear in the hash, the |
212
|
|
|
|
|
|
|
name and value are passed into the corresponding function, which should return |
213
|
|
|
|
|
|
|
an even-sized key/value list giving a tag, or a list of tags, to apply to the |
214
|
|
|
|
|
|
|
new clone. |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
my @new_tags = $convert_tags->{$orig_name}->( $orig_name, $orig_value ) |
217
|
|
|
|
|
|
|
# Where @new_tags is ( $new_name, $new_value, $new_name_2, $new_value_2, ... ) |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
As a further convenience, if the value for a given tag name is a plain string |
220
|
|
|
|
|
|
|
instead of a code reference, it gives the new name for the tag, and will be |
221
|
|
|
|
|
|
|
applied with its existing value. |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
If C is being used too, then the source names of any tags to be |
224
|
|
|
|
|
|
|
converted must also be listed there, or they will not be copied. |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item start => INT |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
I |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Start at the given position; defaults to 0. |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=item end => INT |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
I |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
End after the given position; defaults to end of string. This option overrides |
237
|
|
|
|
|
|
|
C. |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=item len => INT |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
End after the given length beyond the start position; defaults to end of |
242
|
|
|
|
|
|
|
string. This option only applies if C is not given. |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=back |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head2 clone (instance) |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
$new = $orig->clone( %args ) |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Called as an instance (rather than a class) method, the newly-cloned instance |
251
|
|
|
|
|
|
|
is returned in the same class as the original. |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=cut |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub clone |
256
|
|
|
|
|
|
|
{ |
257
|
56
|
100
|
|
56
|
1
|
2961
|
my ( $class, $orig ) = blessed $_[0] ? |
258
|
|
|
|
|
|
|
( ref $_[0], shift ) : |
259
|
|
|
|
|
|
|
( shift, shift ); |
260
|
56
|
|
|
|
|
155
|
my %opts = @_; |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
my $only = exists $opts{only_tags} ? |
263
|
56
|
100
|
|
|
|
145
|
{ map { $_ => 1 } @{ $opts{only_tags} } } : |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
4
|
|
264
|
|
|
|
|
|
|
undef; |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
my $except = exists $opts{except_tags} ? |
267
|
56
|
50
|
|
|
|
134
|
{ map { $_ => 1 } @{ $opts{except_tags} } } : |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
268
|
|
|
|
|
|
|
undef; |
269
|
|
|
|
|
|
|
|
270
|
56
|
|
|
|
|
122
|
my $convert = $opts{convert_tags}; |
271
|
|
|
|
|
|
|
|
272
|
56
|
|
|
|
|
167
|
my $origstr = $orig->str; |
273
|
|
|
|
|
|
|
|
274
|
56
|
|
100
|
|
|
209
|
my $start = $opts{start} // 0; |
275
|
|
|
|
|
|
|
my $end = $opts{end} // |
276
|
|
|
|
|
|
|
( defined $opts{len} ? $start + $opts{len} |
277
|
56
|
100
|
100
|
|
|
286
|
: length $origstr ); |
278
|
|
|
|
|
|
|
|
279
|
56
|
|
|
|
|
96
|
my $len = $end - $start; |
280
|
|
|
|
|
|
|
|
281
|
56
|
|
|
|
|
201
|
my $new = $class->new( substr $origstr, $start, $end - $start ); |
282
|
|
|
|
|
|
|
|
283
|
56
|
|
|
|
|
162
|
my $tags = $orig->{tags}; |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# We know we're only looking |
286
|
56
|
|
|
|
|
148
|
foreach my $t ( @$tags ) { |
287
|
63
|
|
|
|
|
143
|
my ( $ts, $te, $tn, $tv, $tf ) = @$t; |
288
|
|
|
|
|
|
|
|
289
|
63
|
100
|
|
|
|
158
|
next if $te < $start; |
290
|
58
|
100
|
|
|
|
141
|
last if $ts >= $end; |
291
|
|
|
|
|
|
|
|
292
|
55
|
100
|
100
|
|
|
168
|
next if $only and not $only->{$tn}; |
293
|
54
|
0
|
33
|
|
|
103
|
next if $except and $except->{$tn}; |
294
|
|
|
|
|
|
|
|
295
|
54
|
|
|
|
|
85
|
my @tags; |
296
|
54
|
100
|
66
|
|
|
141
|
if( $convert and my $c = $convert->{$tn} ) { |
297
|
2
|
100
|
|
|
|
8
|
if( ref $c eq "CODE" ) { |
298
|
1
|
|
|
|
|
6
|
@tags = $c->( $tn, $tv ); |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
else { |
301
|
1
|
|
|
|
|
4
|
@tags = ( $c, $tv ); |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
else { |
305
|
52
|
|
|
|
|
109
|
@tags = ( $tn, $tv ); |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
54
|
|
|
|
|
136
|
$_ -= $start for $ts, $te; |
309
|
|
|
|
|
|
|
|
310
|
54
|
100
|
|
|
|
134
|
my $tl = $te - ( $ts < 0 ? 0 : $ts ); |
311
|
|
|
|
|
|
|
|
312
|
54
|
100
|
|
|
|
142
|
next if $te <= 0; |
313
|
53
|
100
|
100
|
|
|
248
|
$ts = -1 if $ts < 0 or $tf & FLAG_ANCHOR_BEFORE; |
314
|
53
|
100
|
100
|
|
|
201
|
$tl = -1 if $te > $len or $tf & FLAG_ANCHOR_AFTER; |
315
|
|
|
|
|
|
|
|
316
|
53
|
|
|
|
|
140
|
while( @tags ) { |
317
|
53
|
|
|
|
|
150
|
$new->apply_tag( $ts, $tl, shift @tags, shift @tags ); |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
|
321
|
56
|
|
|
|
|
266
|
return $new; |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
sub _mkextent |
325
|
|
|
|
|
|
|
{ |
326
|
191
|
|
|
191
|
|
270
|
my $self = shift; |
327
|
191
|
|
|
|
|
355
|
my ( $start, $end, $flags ) = @_; |
328
|
|
|
|
|
|
|
|
329
|
191
|
|
|
|
|
290
|
$flags &= (FLAG_ANCHOR_BEFORE|FLAG_ANCHOR_AFTER); |
330
|
|
|
|
|
|
|
|
331
|
191
|
|
|
|
|
913
|
return bless [ $self, $start, $end, $flags ], 'String::Tagged::Extent'; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=head2 from_sprintf |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
$str = String::Tagged->from_sprintf( $format, @args ) |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
I |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
Returns a new instance of a C object, initialised by |
341
|
|
|
|
|
|
|
formatting the supplied arguments using the supplied format. |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
The C<$format> string is similar to that supported by the core C |
344
|
|
|
|
|
|
|
operator, though a few features such as out-of-order argument indexing and |
345
|
|
|
|
|
|
|
vector formatting are missing. This format string may be a plain perl string, |
346
|
|
|
|
|
|
|
or an instance of C. In the latter case, any tags within it |
347
|
|
|
|
|
|
|
are preserved in the result. |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
In the case of a C<%s> conversion, the value of the argument consumed may |
350
|
|
|
|
|
|
|
itself be a C instance. In this case it will be appended to |
351
|
|
|
|
|
|
|
the returned object, preserving any tags within it. |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
All other conversions are handled individually by the core C |
354
|
|
|
|
|
|
|
operator and appended to the result. |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=cut |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub from_sprintf |
359
|
|
|
|
|
|
|
{ |
360
|
12
|
|
|
12
|
1
|
1018
|
my $class = shift; |
361
|
12
|
|
|
|
|
28
|
my ( $format, @args ) = @_; |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Clone the format string into the candidate return value, and then |
364
|
|
|
|
|
|
|
# repeatedly replace %... expansions with their required value using |
365
|
|
|
|
|
|
|
# ->set_substr, so that embedded tags in the format will behave sensibly. |
366
|
|
|
|
|
|
|
|
367
|
12
|
100
|
|
|
|
28
|
my $ret = ( is_string_tagged( $format ) ) ? |
368
|
|
|
|
|
|
|
$class->clone( $format ) : |
369
|
|
|
|
|
|
|
$class->new( $format ); |
370
|
|
|
|
|
|
|
|
371
|
12
|
|
|
|
|
21
|
my $pos = 0; |
372
|
|
|
|
|
|
|
|
373
|
12
|
|
|
|
|
52
|
while( $pos < length $ret ) { |
374
|
22
|
|
|
|
|
41
|
my $str = "$ret"; |
375
|
22
|
|
|
|
|
59
|
pos( $str ) = $pos; |
376
|
|
|
|
|
|
|
|
377
|
22
|
|
|
|
|
38
|
my $replacement; |
378
|
|
|
|
|
|
|
|
379
|
22
|
100
|
|
|
|
144
|
if( $str =~ m/\G[^%]+/gc ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# A literal span |
381
|
9
|
|
|
|
|
20
|
$pos = $+[0]; |
382
|
9
|
|
|
|
|
23
|
next; |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
elsif( $str =~ m/\G%%/gc ) { |
385
|
|
|
|
|
|
|
# A literal %% conversion |
386
|
1
|
|
|
|
|
2
|
$replacement = "%"; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
elsif( $str =~ m/\G%([-]?)(\d+|\*)?(?:\.(\d+|\*))?s/gc ) { |
389
|
|
|
|
|
|
|
# A string |
390
|
10
|
|
|
|
|
39
|
my ( $flags, $width, $precision ) = ( $1, $2, $3 ); |
391
|
10
|
100
|
100
|
|
|
33
|
$width = shift @args if defined $width and $width eq "*"; |
392
|
10
|
100
|
100
|
|
|
35
|
$precision = shift @args if defined $precision and $precision eq "*"; |
393
|
10
|
|
|
|
|
17
|
my $arg = shift @args; |
394
|
|
|
|
|
|
|
|
395
|
10
|
50
|
|
|
|
20
|
defined $arg or do { |
396
|
0
|
|
|
|
|
0
|
warnings::warnif( uninitialized => "Use of ininitialized value in String::Tagged->from_sprintf" ); |
397
|
0
|
|
|
|
|
0
|
$arg = ""; |
398
|
|
|
|
|
|
|
}; |
399
|
|
|
|
|
|
|
|
400
|
10
|
100
|
|
|
|
20
|
if( defined $precision ) { |
401
|
2
|
50
|
|
|
|
6
|
if( is_string_tagged( $arg ) ) { |
402
|
0
|
|
|
|
|
0
|
$arg = $arg->substr( 0, $precision ); |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
else { |
405
|
2
|
|
|
|
|
10
|
$arg = substr $arg, 0, $precision; |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
10
|
|
|
|
|
16
|
my $leftalign = $flags =~ m/-/; |
410
|
|
|
|
|
|
|
|
411
|
10
|
100
|
|
|
|
22
|
my $padding = defined $width ? $width - length $arg : 0; |
412
|
10
|
100
|
|
|
|
18
|
$padding = 0 if $padding < 0; |
413
|
|
|
|
|
|
|
|
414
|
10
|
|
|
|
|
17
|
$replacement = ""; |
415
|
|
|
|
|
|
|
|
416
|
10
|
100
|
|
|
|
26
|
$replacement .= " " x $padding if !$leftalign; |
417
|
|
|
|
|
|
|
|
418
|
10
|
|
|
|
|
18
|
$replacement .= $arg; |
419
|
|
|
|
|
|
|
|
420
|
10
|
100
|
|
|
|
24
|
$replacement .= " " x $padding if $leftalign; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
elsif( $str =~ m/\G%(.*?)([cduoxefgXEGbBpaAiDUOF])/gc ) { |
423
|
|
|
|
|
|
|
# Another conversion format |
424
|
2
|
|
|
|
|
8
|
my ( $template, $flags ) = ( $2, $1 ); |
425
|
2
|
|
|
|
|
3
|
my $argc = 1; |
426
|
2
|
|
|
|
|
4
|
$argc += ( () = $flags =~ m/\*/g ); |
427
|
|
|
|
|
|
|
|
428
|
2
|
|
|
|
|
12
|
$replacement = sprintf "%$flags$template", @args[0..$argc-1]; |
429
|
2
|
|
|
|
|
5
|
splice @args, 0, $argc; |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
elsif( $str =~ m/\G%(.*?)([a-zA-Z])/gc ) { |
432
|
0
|
|
|
|
|
0
|
warn "Unrecognised sprintf conversion %$2"; |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
else { |
435
|
|
|
|
|
|
|
# must be at EOF now |
436
|
0
|
|
|
|
|
0
|
last; |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
13
|
|
|
|
|
45
|
my $templatelen = $+[0] - $-[0]; |
440
|
13
|
|
|
|
|
50
|
$ret->set_substr( $-[0], $templatelen, $replacement ); |
441
|
|
|
|
|
|
|
|
442
|
13
|
|
|
|
|
32
|
$pos += length( $replacement ); |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
12
|
|
|
|
|
49
|
return $ret; |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=head2 join |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
$str = String::Tagged->join( $sep, @parts ) |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
I |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Returns a new instance of a C object, formed by concatenating |
455
|
|
|
|
|
|
|
each of the component piece together, joined with the separator string. |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
The result will be much like the core C function, except that it will |
458
|
|
|
|
|
|
|
preserve tags in the resulting string. |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=cut |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
sub join |
463
|
|
|
|
|
|
|
{ |
464
|
1
|
|
|
1
|
1
|
2
|
my $class = shift; |
465
|
1
|
|
|
|
|
4
|
my ( $sep, @parts ) = @_; |
466
|
|
|
|
|
|
|
|
467
|
1
|
50
|
|
|
|
2
|
is_string_tagged( $sep ) or |
468
|
|
|
|
|
|
|
$sep = $class->new( $sep ); |
469
|
|
|
|
|
|
|
|
470
|
1
|
|
|
|
|
3
|
my $ret = shift @parts; |
471
|
1
|
|
|
|
|
5
|
$ret .= $sep . $_ for @parts; |
472
|
|
|
|
|
|
|
|
473
|
1
|
|
|
|
|
4
|
return $ret; |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head1 METHODS |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=cut |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=head2 str |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
$str = $st->str |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
$str = "$st" |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
Returns the plain string contained within the object. |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
This method is also called for stringification; so the C |
489
|
|
|
|
|
|
|
object can be used in a plain string interpolation such as |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
my $message = String::Tagged->new( "Hello world" ); |
492
|
|
|
|
|
|
|
print "My message is $message\n"; |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=cut |
495
|
|
|
|
|
|
|
|
496
|
20
|
|
|
20
|
|
32309
|
use overload '""' => 'str'; |
|
20
|
|
|
|
|
61
|
|
|
20
|
|
|
|
|
160
|
|
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
sub str |
499
|
|
|
|
|
|
|
{ |
500
|
227
|
|
|
227
|
1
|
20190
|
my $self = shift; |
501
|
227
|
|
|
|
|
792
|
return $self->{str}; |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=head2 length |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
$len = $st->length |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
$len = length( $st ) |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
Returns the length of the plain string. Because stringification works on this |
511
|
|
|
|
|
|
|
object class, the normal core C function works correctly on it. |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=cut |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
sub length |
516
|
|
|
|
|
|
|
{ |
517
|
412
|
|
|
412
|
1
|
598
|
my $self = shift; |
518
|
412
|
|
|
|
|
1129
|
return CORE::length $self->{str}; |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=head2 substr |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
$str = $st->substr( $start, $len ) |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
Returns a C instance representing a section from within the |
526
|
|
|
|
|
|
|
given string, containing all the same tags at the same conceptual positions. |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=cut |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
sub substr |
531
|
|
|
|
|
|
|
{ |
532
|
27
|
|
|
27
|
1
|
54
|
my $self = shift; |
533
|
27
|
|
|
|
|
53
|
my ( $start, $len ) = @_; |
534
|
|
|
|
|
|
|
|
535
|
27
|
|
|
|
|
72
|
return $self->clone( start => $start, len => $len ); |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=head2 plain_substr |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
$str = $st->plain_substr( $start, $len ) |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Returns as a plain perl string, the substring at the given position. This will |
543
|
|
|
|
|
|
|
be the same string data as returned by C, only as a plain string |
544
|
|
|
|
|
|
|
without the tags |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=cut |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
sub plain_substr |
549
|
|
|
|
|
|
|
{ |
550
|
23
|
|
|
23
|
1
|
42
|
my $self = shift; |
551
|
23
|
|
|
|
|
57
|
my ( $start, $len ) = @_; |
552
|
|
|
|
|
|
|
|
553
|
23
|
|
|
|
|
108
|
return CORE::substr( $self->{str}, $start, $len ); |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
sub _cmp_tags |
557
|
|
|
|
|
|
|
{ |
558
|
154
|
|
|
154
|
|
265
|
my ( $as, $ae ) = @$a; |
559
|
154
|
|
|
|
|
240
|
my ( $bs, $be ) = @$b; |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# Sort by start first; shortest first |
562
|
154
|
|
100
|
|
|
636
|
return $as <=> $bs || |
563
|
|
|
|
|
|
|
$ae <=> $be; |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
sub _assert_sorted |
567
|
|
|
|
|
|
|
{ |
568
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
569
|
|
|
|
|
|
|
|
570
|
0
|
|
|
|
|
0
|
my $tags = $self->{tags}; |
571
|
|
|
|
|
|
|
# If fewer than 2 tags, must be sorted |
572
|
0
|
0
|
|
|
|
0
|
return if @$tags < 2; |
573
|
|
|
|
|
|
|
|
574
|
0
|
|
|
|
|
0
|
my $prev = $tags->[0]; |
575
|
|
|
|
|
|
|
|
576
|
0
|
|
|
|
|
0
|
for( my $i = 1; $i < @$tags; $i++ ) { |
577
|
0
|
|
|
|
|
0
|
my $here = $tags->[$i]; |
578
|
0
|
|
|
|
|
0
|
local ( $a, $b ) = ( $prev, $here ); |
579
|
0
|
0
|
|
|
|
0
|
if( _cmp_tags() <= 0 ) { |
580
|
0
|
|
|
|
|
0
|
$prev = $here; |
581
|
0
|
|
|
|
|
0
|
next; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
0
|
|
|
|
|
0
|
print STDERR "Tag order violation at i=$i\n"; |
585
|
0
|
|
|
|
|
0
|
print STDERR "[@{[ $i - 1 ]}] = [ $tags->[$i-1]->[0], $tags->[$i-1]->[1] ]\n"; |
|
0
|
|
|
|
|
0
|
|
586
|
0
|
|
|
|
|
0
|
print STDERR "[@{[ $i ]}] = [ $tags->[$i]->[0], $tags->[$i]->[1] ]\n"; |
|
0
|
|
|
|
|
0
|
|
587
|
0
|
|
|
|
|
0
|
die "Assert failure"; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
sub _insert_tag |
592
|
|
|
|
|
|
|
{ |
593
|
159
|
|
|
159
|
|
227
|
my $self = shift; |
594
|
159
|
|
|
|
|
311
|
my ( $start, $end, $name, $value, $flags ) = @_; |
595
|
|
|
|
|
|
|
|
596
|
159
|
|
|
|
|
250
|
my $tags = $self->{tags}; |
597
|
|
|
|
|
|
|
|
598
|
159
|
|
|
|
|
338
|
my $newtag = [ $start, $end, $name => $value, $flags ]; |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
# Specialcase - if there's no tags yet, just push it |
601
|
159
|
100
|
|
|
|
368
|
if( @$tags == 0 ) { |
602
|
81
|
|
|
|
|
199
|
push @$tags, $newtag; |
603
|
81
|
|
|
|
|
157
|
return; |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
|
606
|
78
|
|
|
|
|
160
|
local $a = $newtag; |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
# Two more special cases - it's quite likely we're either inserting an |
609
|
|
|
|
|
|
|
# 'everywhere' tag, or appending one to the end. Check the endpoints first |
610
|
78
|
|
|
|
|
114
|
local $b; |
611
|
|
|
|
|
|
|
|
612
|
78
|
|
|
|
|
120
|
$b = $tags->[0]; |
613
|
78
|
100
|
|
|
|
148
|
if( _cmp_tags() <= 0 ) { |
614
|
14
|
|
|
|
|
34
|
unshift @$tags, $newtag; |
615
|
14
|
|
|
|
|
34
|
return; |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
|
618
|
64
|
|
|
|
|
131
|
$b = $tags->[-1]; |
619
|
64
|
100
|
|
|
|
108
|
if( _cmp_tags() >= 0 ) { |
620
|
62
|
|
|
|
|
148
|
push @$tags, $newtag; |
621
|
62
|
|
|
|
|
139
|
return; |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
|
624
|
2
|
|
|
|
|
5
|
my $range_start = 0; |
625
|
2
|
|
|
|
|
6
|
my $range_end = $#$tags; |
626
|
|
|
|
|
|
|
|
627
|
2
|
|
|
|
|
2
|
my $inspos; |
628
|
|
|
|
|
|
|
|
629
|
2
|
|
|
|
|
8
|
while( $range_end > $range_start ) { |
630
|
2
|
|
|
|
|
10
|
my $i = int( ( $range_start + $range_end ) / 2 ); |
631
|
|
|
|
|
|
|
|
632
|
2
|
|
|
|
|
6
|
$b = $tags->[$i]; |
633
|
2
|
|
|
|
|
6
|
my $cmp = _cmp_tags; |
634
|
|
|
|
|
|
|
|
635
|
2
|
50
|
|
|
|
8
|
if( $cmp > 0 ) { |
|
|
0
|
|
|
|
|
|
636
|
2
|
|
|
|
|
15
|
$range_start = $i + 1; |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
elsif( $cmp < 0 ) { |
639
|
0
|
|
|
|
|
0
|
$range_end = $i; # open interval |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
else { |
642
|
0
|
|
|
|
|
0
|
$inspos = $i; |
643
|
0
|
|
|
|
|
0
|
last; |
644
|
|
|
|
|
|
|
} |
645
|
|
|
|
|
|
|
|
646
|
2
|
50
|
|
|
|
12
|
if( $range_start == $range_end ) { |
647
|
2
|
|
|
|
|
4
|
$inspos = $range_start; |
648
|
2
|
|
|
|
|
5
|
last; |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
|
652
|
2
|
50
|
|
|
|
7
|
$inspos = $range_end unless defined $inspos; |
653
|
|
|
|
|
|
|
|
654
|
2
|
50
|
|
|
|
9
|
$inspos = 0 if $inspos < 0; |
655
|
2
|
50
|
|
|
|
8
|
$inspos = @$tags if $inspos > @$tags; |
656
|
|
|
|
|
|
|
|
657
|
2
|
|
|
|
|
8
|
splice @$tags, $inspos, 0, $newtag; |
658
|
|
|
|
|
|
|
|
659
|
2
|
|
|
|
|
5
|
$self->_assert_sorted if DEBUG; |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=head2 apply_tag |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
$st->apply_tag( $start, $len, $name, $value ) |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
Apply the named tag value to the given extent. The tag will start on the |
667
|
|
|
|
|
|
|
character at the C<$start> index, and continue for the next C<$len> |
668
|
|
|
|
|
|
|
characters. |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
If C<$start> is given as -1, the tag will be considered to start "before" the |
671
|
|
|
|
|
|
|
actual string. If C<$len> is given as -1, the tag will be considered to |
672
|
|
|
|
|
|
|
end "after" end of the actual string. These special limits are used by |
673
|
|
|
|
|
|
|
C when deciding whether to move a tag boundary. The start of any |
674
|
|
|
|
|
|
|
tag that starts "before" the string is never moved, even if more text is |
675
|
|
|
|
|
|
|
inserted at the beginning. Similarly, a tag which ends "after" the end of the |
676
|
|
|
|
|
|
|
string, will continue to the end even if more text is appended. |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
This method returns the C<$st> object. |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
$st->apply_tag( $e, $name, $value ) |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
Alternatively, an existing L object can be passed as |
683
|
|
|
|
|
|
|
the first argument instead of two integers. The new tag will apply at the |
684
|
|
|
|
|
|
|
given extent. |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=cut |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
sub apply_tag |
689
|
|
|
|
|
|
|
{ |
690
|
156
|
|
|
156
|
1
|
7028
|
my $self = shift; |
691
|
156
|
|
|
|
|
253
|
my ( $start, $end ); |
692
|
156
|
|
|
|
|
224
|
my $flags = 0; |
693
|
|
|
|
|
|
|
|
694
|
156
|
50
|
|
|
|
425
|
if( blessed $_[0] ) { |
695
|
0
|
|
|
|
|
0
|
my $e = shift; |
696
|
0
|
|
|
|
|
0
|
$start = $e->start; |
697
|
0
|
|
|
|
|
0
|
$end = $e->end; |
698
|
|
|
|
|
|
|
|
699
|
0
|
0
|
|
|
|
0
|
$flags |= FLAG_ANCHOR_BEFORE if $e->anchor_before; |
700
|
0
|
0
|
|
|
|
0
|
$flags |= FLAG_ANCHOR_AFTER if $e->anchor_after; |
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
else { |
703
|
156
|
|
|
|
|
231
|
$start = shift; |
704
|
156
|
|
|
|
|
203
|
my $len = shift; |
705
|
|
|
|
|
|
|
|
706
|
156
|
|
|
|
|
309
|
my $strlen = $self->length; |
707
|
|
|
|
|
|
|
|
708
|
156
|
100
|
|
|
|
358
|
if( $start < 0 ) { |
709
|
34
|
|
|
|
|
50
|
$start = 0; |
710
|
34
|
|
|
|
|
61
|
$flags |= FLAG_ANCHOR_BEFORE; |
711
|
|
|
|
|
|
|
} |
712
|
|
|
|
|
|
|
|
713
|
156
|
100
|
|
|
|
312
|
if( $len == -1 ) { |
714
|
36
|
|
|
|
|
52
|
$end = $strlen; |
715
|
36
|
|
|
|
|
55
|
$flags |= FLAG_ANCHOR_AFTER; |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
else { |
718
|
120
|
|
|
|
|
171
|
$end = $start + $len; |
719
|
120
|
100
|
|
|
|
260
|
$end = $strlen if $end > $strlen; |
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
} |
722
|
|
|
|
|
|
|
|
723
|
156
|
|
|
|
|
297
|
my ( $name, $value ) = @_; |
724
|
|
|
|
|
|
|
|
725
|
156
|
|
|
|
|
395
|
$self->_insert_tag( $start, $end, $name, $value, $flags ); |
726
|
|
|
|
|
|
|
|
727
|
156
|
|
|
|
|
399
|
return $self; |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
sub _remove_tag |
731
|
|
|
|
|
|
|
{ |
732
|
9
|
|
|
9
|
|
14
|
my $self = shift; |
733
|
9
|
|
|
|
|
13
|
my $keepends = shift; |
734
|
9
|
|
|
|
|
12
|
my ( $start, $end ); |
735
|
|
|
|
|
|
|
|
736
|
9
|
100
|
|
|
|
26
|
if( blessed $_[0] ) { |
737
|
5
|
|
|
|
|
10
|
my $e = shift; |
738
|
5
|
|
|
|
|
9
|
$start = $e->start; |
739
|
5
|
|
|
|
|
11
|
$end = $e->end; |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
else { |
742
|
4
|
|
|
|
|
6
|
$start = shift; |
743
|
4
|
|
|
|
|
8
|
$end = $start + shift; |
744
|
|
|
|
|
|
|
} |
745
|
|
|
|
|
|
|
|
746
|
9
|
|
|
|
|
19
|
my ( $name ) = @_; |
747
|
|
|
|
|
|
|
|
748
|
9
|
100
|
|
|
|
20
|
if( my $t = $self->{iterating} ) { |
749
|
5
|
|
|
|
|
22
|
my ( $ts, $te, $tn ) = @$t; |
750
|
5
|
50
|
33
|
|
|
27
|
if( $start == $ts and $end == $te and $name eq $tn ) { |
|
|
|
33
|
|
|
|
|
751
|
5
|
|
|
|
|
7
|
$t->[4] |= FLAG_DELETED; |
752
|
5
|
|
|
|
|
12
|
return; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
} |
755
|
|
|
|
|
|
|
|
756
|
4
|
|
|
|
|
8
|
my $tags = $self->{tags}; |
757
|
|
|
|
|
|
|
|
758
|
4
|
|
|
|
|
6
|
my $have_added = 0; |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
# Can't foreach() because we modify $i |
761
|
4
|
|
|
|
|
14
|
for( my $i = 0; $i < @$tags; $i++ ) { |
762
|
8
|
|
|
|
|
11
|
my ( $ts, $te, $tn, $tv, $tf ) = @{ $tags->[$i] }; |
|
8
|
|
|
|
|
18
|
|
763
|
|
|
|
|
|
|
|
764
|
8
|
100
|
|
|
|
21
|
next if $te <= $start; |
765
|
7
|
100
|
|
|
|
14
|
last if $ts >= $end; |
766
|
|
|
|
|
|
|
|
767
|
4
|
50
|
|
|
|
9
|
next if $tn ne $name; |
768
|
|
|
|
|
|
|
|
769
|
4
|
100
|
100
|
|
|
20
|
if( $keepends and $end < $te ) { |
770
|
2
|
|
|
|
|
8
|
$self->_insert_tag( $end, $te, $tn, $tv, $tf & ~(FLAG_ANCHOR_BEFORE|FLAG_ITERATING) ); |
771
|
2
|
|
|
|
|
4
|
$have_added = 1; |
772
|
|
|
|
|
|
|
} |
773
|
|
|
|
|
|
|
|
774
|
4
|
50
|
|
|
|
11
|
if( $tf & FLAG_ITERATING ) { |
775
|
0
|
|
|
|
|
0
|
die "ARGH encountered FLAG_ITERATING while walking the list of tags during ->_remove_tag"; |
776
|
|
|
|
|
|
|
} |
777
|
|
|
|
|
|
|
|
778
|
4
|
|
|
|
|
8
|
splice @$tags, $i, 1; |
779
|
|
|
|
|
|
|
|
780
|
4
|
100
|
100
|
|
|
18
|
if( $keepends and $ts < $start ) { |
781
|
1
|
|
|
|
|
5
|
$self->_insert_tag( $ts, $start, $tn, $tv, $tf & ~(FLAG_ANCHOR_AFTER|FLAG_ITERATING) ); |
782
|
1
|
|
|
|
|
3
|
$have_added = 1; |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
else { |
785
|
3
|
|
|
|
|
19
|
$i--; |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
} |
788
|
|
|
|
|
|
|
|
789
|
4
|
|
|
|
|
6
|
if( DEBUG && $have_added ) { |
790
|
|
|
|
|
|
|
$self->_assert_sorted; |
791
|
|
|
|
|
|
|
} |
792
|
|
|
|
|
|
|
|
793
|
4
|
|
|
|
|
27
|
return $self; |
794
|
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
=head2 unapply_tag |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
$st->unapply_tag( $start, $len, $name ) |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
Unapply the named tag value from the given extent. If the tag extends beyond |
801
|
|
|
|
|
|
|
this extent, then any partial fragment of the tag will be left in the string. |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
This method returns the C<$st> object. |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
$st->unapply_tag( $e, $name ) |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
Alternatively, an existing L object can be passed as |
808
|
|
|
|
|
|
|
the first argument instead of two integers. |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=cut |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
sub unapply_tag |
813
|
|
|
|
|
|
|
{ |
814
|
3
|
|
|
3
|
1
|
7
|
my $self = shift; |
815
|
3
|
|
|
|
|
9
|
return $self->_remove_tag( 1, @_ ); |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
=head2 delete_tag |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
$st->delete_tag( $start, $len, $name ) |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Delete the named tag within the given extent. Entire tags are removed, even if |
823
|
|
|
|
|
|
|
they extend beyond this extent. |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
This method returns the C<$st> object. |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
$st->delete_tag( $e, $name ) |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
Alternatively, an existing L object can be passed as |
830
|
|
|
|
|
|
|
the first argument instead of two integers. |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=cut |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
sub delete_tag |
835
|
|
|
|
|
|
|
{ |
836
|
6
|
|
|
6
|
1
|
8670
|
my $self = shift; |
837
|
6
|
|
|
|
|
14
|
return $self->_remove_tag( 0, @_ ); |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
=head2 delete_all_tag |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
$st->delete_all_tag( $name ) |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
I |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
Deletes every tag with the given name. This is more efficient than calling |
847
|
|
|
|
|
|
|
C to list the tags then C on each one individually |
848
|
|
|
|
|
|
|
in the case of a simple name match. |
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
This method returns the C<$st> object. |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
=cut |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
sub delete_all_tag |
855
|
|
|
|
|
|
|
{ |
856
|
1
|
|
|
1
|
1
|
8
|
my $self = shift; |
857
|
1
|
|
|
|
|
4
|
my ( $name ) = @_; |
858
|
|
|
|
|
|
|
|
859
|
1
|
|
|
|
|
2
|
my $tags = $self->{tags}; |
860
|
|
|
|
|
|
|
|
861
|
1
|
|
|
|
|
8
|
for( my $i = 0; $i < @$tags; $i++ ) { |
862
|
3
|
|
|
|
|
4
|
my ( $ts, $te, $tn, $tv, $tf ) = @{ $tags->[$i] }; |
|
3
|
|
|
|
|
8
|
|
863
|
|
|
|
|
|
|
|
864
|
3
|
100
|
|
|
|
9
|
next if $tn ne $name; |
865
|
|
|
|
|
|
|
|
866
|
2
|
|
|
|
|
5
|
splice @$tags, $i, 1, (); |
867
|
2
|
|
|
|
|
5
|
$i--; |
868
|
|
|
|
|
|
|
} |
869
|
|
|
|
|
|
|
|
870
|
1
|
|
|
|
|
6
|
return $self; |
871
|
|
|
|
|
|
|
} |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
=head2 merge_tags |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
$st->merge_tags( $eqsub ) |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
Merge neighbouring or overlapping tags of the same name and equal values. |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
For each pair of tags of the same name that apply on neighbouring or |
880
|
|
|
|
|
|
|
overlapping extents, the C<$eqsub> callback is called, as |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
$equal = $eqsub->( $name, $value_a, $value_b ) |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
If this function returns true then the tags are merged. |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
The equallity test function is free to perform any comparison of the values |
887
|
|
|
|
|
|
|
that may be relevant to the application; for example it may deeply compare |
888
|
|
|
|
|
|
|
referred structures and check for equivalence in some application-defined |
889
|
|
|
|
|
|
|
manner. In this case, the first tag of a pair is retained, the second is |
890
|
|
|
|
|
|
|
deleted. This may be relevant if the tag value is a reference to some object. |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
=cut |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
sub merge_tags |
895
|
|
|
|
|
|
|
{ |
896
|
8
|
|
|
8
|
1
|
8766
|
my $self = shift; |
897
|
8
|
|
|
|
|
13
|
my ( $eqsub ) = @_; |
898
|
|
|
|
|
|
|
|
899
|
8
|
|
|
|
|
16
|
my $tags = $self->{tags}; |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
# Can't foreach() because we modify @$tags |
902
|
8
|
|
|
|
|
23
|
OUTER: for( my $i = 0; $i < @$tags; $i++ ) { |
903
|
13
|
|
|
|
|
24
|
my ( $ts, $te, $tn, $tv, $tf ) = @{ $tags->[$i] }; |
|
13
|
|
|
|
|
31
|
|
904
|
|
|
|
|
|
|
|
905
|
13
|
|
|
|
|
40
|
for( my $j = $i+1; $j < @$tags; $j++ ) { |
906
|
11
|
|
|
|
|
17
|
my ( $t2s, $t2e, $t2n, $t2v, $t2f ) = @{ $tags->[$j] }; |
|
11
|
|
|
|
|
21
|
|
907
|
|
|
|
|
|
|
|
908
|
11
|
100
|
|
|
|
26
|
last if $t2s > $te; |
909
|
10
|
50
|
|
|
|
24
|
next unless $t2s <= $te; |
910
|
10
|
100
|
|
|
|
40
|
next unless $t2n eq $tn; |
911
|
|
|
|
|
|
|
|
912
|
7
|
100
|
|
|
|
18
|
last unless $eqsub->( $tn, $tv, $t2v ); |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
# Need to delete the tag at $j, extend the end of the tag at $i, and |
915
|
|
|
|
|
|
|
# possibly move $i later |
916
|
6
|
|
|
|
|
34
|
splice @$tags, $j, 1, (); |
917
|
6
|
|
|
|
|
11
|
$j--; |
918
|
|
|
|
|
|
|
|
919
|
6
|
|
|
|
|
11
|
$te = $tags->[$i][1] = $t2e; |
920
|
|
|
|
|
|
|
|
921
|
6
|
100
|
|
|
|
30
|
$tags->[$i][4] |= FLAG_ANCHOR_AFTER if $t2f & FLAG_ANCHOR_AFTER; |
922
|
|
|
|
|
|
|
|
923
|
6
|
|
|
|
|
13
|
local $a = $tags->[$i]; |
924
|
|
|
|
|
|
|
|
925
|
6
|
100
|
100
|
|
|
39
|
if( local $b = $tags->[$i+1] and _cmp_tags() > 0 ) { |
926
|
1
|
|
|
|
|
3
|
my $newpos = $i+1; |
927
|
1
|
|
33
|
|
|
6
|
while( local $b = $tags->[$newpos ] and _cmp_tags() <= 0 ) { |
928
|
0
|
|
|
|
|
0
|
$newpos++; |
929
|
|
|
|
|
|
|
} |
930
|
|
|
|
|
|
|
|
931
|
1
|
|
|
|
|
4
|
splice @$tags, $newpos, 0, splice @$tags, $i, 1, (); |
932
|
|
|
|
|
|
|
|
933
|
1
|
|
|
|
|
3
|
redo OUTER; |
934
|
|
|
|
|
|
|
} |
935
|
|
|
|
|
|
|
} |
936
|
|
|
|
|
|
|
} |
937
|
|
|
|
|
|
|
} |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=head2 iter_extents |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
$st->iter_extents( $callback, %opts ) |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
Iterate the tags stored in the string. For each tag, the CODE reference in |
944
|
|
|
|
|
|
|
C<$callback> is invoked once, being passed a L object |
945
|
|
|
|
|
|
|
that represents the extent of the tag. |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
$callback->( $extent, $tagname, $tagvalue ) |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
Options passed in C<%opts> may include: |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
=over 4 |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
=item start => INT |
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
Start at the given position; defaults to 0. |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
=item end => INT |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
End after the given position; defaults to end of string. This option overrides |
960
|
|
|
|
|
|
|
C. |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
=item len => INT |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
End after the given length beyond the start position; defaults to end of |
965
|
|
|
|
|
|
|
string. This option only applies if C is not given. |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
=item only => ARRAY |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
Select only the tags named in the given ARRAY reference. |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
=item except => ARRAY |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
Select all the tags except those named in the given ARRAY reference. |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
=back |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
I it is safe to call C from within the |
978
|
|
|
|
|
|
|
callback function to remove the tag currently being iterated on. |
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
$str->iter_extents( sub { |
981
|
|
|
|
|
|
|
my ( $e, $n, $v ) = @_; |
982
|
|
|
|
|
|
|
$str->delete_tag( $e, $n ) if $n =~ m/^tmp_/; |
983
|
|
|
|
|
|
|
} ); |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
Apart from this scenario, the tags in the string should not otherwise be added |
986
|
|
|
|
|
|
|
or removed while the iteration is occurring. |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
=cut |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
sub iter_extents |
991
|
|
|
|
|
|
|
{ |
992
|
51
|
|
|
51
|
1
|
131
|
my $self = shift; |
993
|
51
|
|
|
|
|
101
|
my ( $callback, %opts ) = @_; |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
my $start = exists $opts{start} ? $opts{start} : |
996
|
51
|
100
|
|
|
|
163
|
0; |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
my $end = exists $opts{end} ? $opts{end} : |
999
|
|
|
|
|
|
|
exists $opts{len} ? $start + $opts{len} : |
1000
|
51
|
50
|
|
|
|
223
|
$self->length + 1; # so as to include zerolen at end |
|
|
100
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
|
1002
|
51
|
100
|
|
|
|
139
|
my $only = exists $opts{only} ? { map { $_ => 1 } @{ $opts{only} } } : |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
3
|
|
1003
|
|
|
|
|
|
|
undef; |
1004
|
|
|
|
|
|
|
|
1005
|
51
|
100
|
|
|
|
119
|
my $except = exists $opts{except} ? { map { $_ => 1 } @{ $opts{except} } } : |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
5
|
|
1006
|
|
|
|
|
|
|
undef; |
1007
|
|
|
|
|
|
|
|
1008
|
51
|
|
|
|
|
113
|
my $tags = $self->{tags}; |
1009
|
|
|
|
|
|
|
|
1010
|
51
|
|
|
|
|
190
|
for ( my $i = 0; $i < @$tags; $i++ ) { |
1011
|
76
|
|
|
|
|
146
|
my $t = $tags->[$i]; |
1012
|
76
|
|
|
|
|
184
|
my ( $ts, $te, $tn, $tv, $tf ) = @$t; |
1013
|
|
|
|
|
|
|
|
1014
|
76
|
100
|
|
|
|
187
|
next if $te < $start; |
1015
|
75
|
100
|
|
|
|
170
|
last if $ts >= $end; |
1016
|
|
|
|
|
|
|
|
1017
|
74
|
100
|
100
|
|
|
225
|
next if $only and !$only->{$tn}; |
1018
|
72
|
100
|
100
|
|
|
160
|
next if $except and $except->{$tn}; |
1019
|
|
|
|
|
|
|
|
1020
|
71
|
|
|
|
|
116
|
$t->[4] |= FLAG_ITERATING; |
1021
|
71
|
|
|
|
|
155
|
local $self->{iterating} = $t; |
1022
|
|
|
|
|
|
|
|
1023
|
71
|
|
|
|
|
168
|
$callback->( $self->_mkextent( $ts, $te, $tf ), $tn, $tv ); |
1024
|
|
|
|
|
|
|
|
1025
|
71
|
|
|
|
|
4519
|
$t->[4] &= ~FLAG_ITERATING; |
1026
|
|
|
|
|
|
|
|
1027
|
71
|
100
|
|
|
|
355
|
if( $t->[4] & FLAG_DELETED ) { |
1028
|
5
|
|
|
|
|
8
|
splice @$tags, $i, 1, (); |
1029
|
5
|
|
|
|
|
15
|
$i--; |
1030
|
|
|
|
|
|
|
} |
1031
|
|
|
|
|
|
|
} |
1032
|
|
|
|
|
|
|
} |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
=head2 iter_tags |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
$st->iter_tags( $callback, %opts ) |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
Iterate the tags stored in the string. For each tag, the CODE reference in |
1039
|
|
|
|
|
|
|
C<$callback> is invoked once, being passed the start point and length of the |
1040
|
|
|
|
|
|
|
tag. |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
$callback->( $start, $length, $tagname, $tagvalue ) |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
Options passed in C<%opts> are the same as for C. |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
=cut |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
sub iter_tags |
1049
|
|
|
|
|
|
|
{ |
1050
|
25
|
|
|
25
|
1
|
12087
|
my $self = shift; |
1051
|
25
|
|
|
|
|
61
|
my ( $callback, %opts ) = @_; |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
$self->iter_extents( |
1054
|
|
|
|
|
|
|
sub { |
1055
|
36
|
|
|
36
|
|
76
|
my ( $e, $tn, $tv ) = @_; |
1056
|
36
|
|
|
|
|
98
|
$callback->( $e->start, $e->length, $tn, $tv ); |
1057
|
|
|
|
|
|
|
}, |
1058
|
25
|
|
|
|
|
133
|
%opts |
1059
|
|
|
|
|
|
|
); |
1060
|
|
|
|
|
|
|
} |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
=head2 iter_extents_nooverlap |
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
$st->iter_extents_nooverlap( $callback, %opts ) |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
Iterate non-overlapping extents of tags stored in the string. The CODE |
1067
|
|
|
|
|
|
|
reference in C<$callback> is invoked for each extent in the string where no |
1068
|
|
|
|
|
|
|
tags change. The entire set of tags active in that extent is given to the |
1069
|
|
|
|
|
|
|
callback. Because the extent covers possibly-multiple tags, it will not define |
1070
|
|
|
|
|
|
|
the C and C flags. |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
$callback->( $extent, %tags ) |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
The callback will be invoked over the entire length of the string, including |
1075
|
|
|
|
|
|
|
any extents with no tags applied. |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
Options may be passed in C<%opts> to control the range of the string iterated |
1078
|
|
|
|
|
|
|
over, in the same way as the C method. |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
If the C or C filters are applied, then only the tags that |
1081
|
|
|
|
|
|
|
survive filtering will be present in the C<%tags> hash. Tags that are excluded |
1082
|
|
|
|
|
|
|
by the filtering will not be present, nor will their bounds be used to split |
1083
|
|
|
|
|
|
|
the string into extents. |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
=cut |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
sub iter_extents_nooverlap |
1088
|
|
|
|
|
|
|
{ |
1089
|
34
|
|
|
34
|
1
|
68
|
my $self = shift; |
1090
|
34
|
|
|
|
|
60
|
my ( $callback, %opts ) = @_; |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
my $start = exists $opts{start} ? $opts{start} : |
1093
|
34
|
100
|
|
|
|
89
|
0; |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
my $end = exists $opts{end} ? $opts{end} : |
1096
|
|
|
|
|
|
|
exists $opts{len} ? $start + $opts{len} : |
1097
|
34
|
50
|
|
|
|
144
|
$self->length; |
|
|
100
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
|
1099
|
34
|
100
|
|
|
|
85
|
my $only = exists $opts{only} ? { map { $_ => 1 } @{ $opts{only} } } : |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
4
|
|
1100
|
|
|
|
|
|
|
undef; |
1101
|
|
|
|
|
|
|
|
1102
|
34
|
100
|
|
|
|
74
|
my $except = exists $opts{except} ? { map { $_ => 1 } @{ $opts{except} } } : |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
3
|
|
1103
|
|
|
|
|
|
|
undef; |
1104
|
|
|
|
|
|
|
|
1105
|
34
|
|
|
|
|
63
|
my $tags = $self->{tags}; |
1106
|
|
|
|
|
|
|
|
1107
|
34
|
|
|
|
|
50
|
my @active; # ARRAY of [ $ts, $te, $tn, $tv ] |
1108
|
34
|
|
|
|
|
46
|
my $pos = $start; |
1109
|
|
|
|
|
|
|
|
1110
|
34
|
|
|
|
|
80
|
foreach my $t ( @$tags ) { |
1111
|
76
|
|
|
|
|
152
|
my ( $ts, $te, $tn, $tv ) = @$t; |
1112
|
|
|
|
|
|
|
|
1113
|
76
|
100
|
|
|
|
164
|
next if $te < $start; |
1114
|
74
|
100
|
|
|
|
140
|
last if $ts > $end; |
1115
|
|
|
|
|
|
|
|
1116
|
72
|
100
|
100
|
|
|
167
|
next if $only and !$only->{$tn}; |
1117
|
70
|
100
|
100
|
|
|
137
|
next if $except and $except->{$tn}; |
1118
|
|
|
|
|
|
|
|
1119
|
69
|
|
|
|
|
163
|
while( $pos < $ts ) { |
1120
|
43
|
|
|
|
|
74
|
my %activetags; |
1121
|
|
|
|
|
|
|
my %tagends; |
1122
|
43
|
|
|
|
|
72
|
my $rangeend = $ts; |
1123
|
|
|
|
|
|
|
|
1124
|
43
|
|
|
|
|
93
|
foreach ( @active ) { |
1125
|
42
|
|
|
|
|
86
|
my ( undef, $e, $n, $v ) = @$_; |
1126
|
|
|
|
|
|
|
|
1127
|
42
|
100
|
|
|
|
84
|
$e < $rangeend and $rangeend = $e; |
1128
|
42
|
100
|
66
|
|
|
117
|
next if $tagends{$n} and $tagends{$n} < $e; |
1129
|
|
|
|
|
|
|
|
1130
|
41
|
|
|
|
|
80
|
$activetags{$n} = $v; |
1131
|
41
|
|
|
|
|
95
|
$tagends{$n} = $e; |
1132
|
|
|
|
|
|
|
} |
1133
|
|
|
|
|
|
|
|
1134
|
43
|
|
|
|
|
103
|
$callback->( $self->_mkextent( $pos, $rangeend, 0 ), %activetags ); |
1135
|
|
|
|
|
|
|
|
1136
|
43
|
|
|
|
|
440
|
$pos = $rangeend; |
1137
|
43
|
|
|
|
|
87
|
@active = grep { $_->[1] > $pos } @active; |
|
42
|
|
|
|
|
161
|
|
1138
|
|
|
|
|
|
|
} |
1139
|
|
|
|
|
|
|
|
1140
|
69
|
|
|
|
|
199
|
push @active, [ $ts, $te, $tn, $tv ]; |
1141
|
|
|
|
|
|
|
} |
1142
|
|
|
|
|
|
|
|
1143
|
34
|
|
|
|
|
79
|
while( $pos < $end ) { |
1144
|
53
|
|
|
|
|
82
|
my %activetags; |
1145
|
|
|
|
|
|
|
my %tagends; |
1146
|
53
|
|
|
|
|
81
|
my $rangeend = $end; |
1147
|
|
|
|
|
|
|
|
1148
|
53
|
|
|
|
|
89
|
foreach ( @active ) { |
1149
|
72
|
|
|
|
|
128
|
my ( undef, $e, $n, $v ) = @$_; |
1150
|
|
|
|
|
|
|
|
1151
|
72
|
100
|
|
|
|
146
|
$e < $rangeend and $rangeend = $e; |
1152
|
72
|
100
|
100
|
|
|
181
|
next if $tagends{$n} and $tagends{$n} < $e; |
1153
|
|
|
|
|
|
|
|
1154
|
71
|
|
|
|
|
125
|
$activetags{$n} = $v; |
1155
|
71
|
|
|
|
|
128
|
$tagends{$n} = $e; |
1156
|
|
|
|
|
|
|
} |
1157
|
|
|
|
|
|
|
|
1158
|
53
|
|
|
|
|
122
|
$callback->( $self->_mkextent( $pos, $rangeend, 0 ), %activetags ); |
1159
|
|
|
|
|
|
|
|
1160
|
53
|
|
|
|
|
1558
|
$pos = $rangeend; |
1161
|
53
|
|
|
|
|
128
|
@active = grep { $_->[1] > $pos } @active; |
|
72
|
|
|
|
|
273
|
|
1162
|
|
|
|
|
|
|
} |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
# We might have zero-length tags active at the very end of the range |
1165
|
34
|
100
|
|
|
|
222
|
if( my @zerolen = grep { $_->[0] == $pos and $_->[1] == $pos } @active ) { |
|
3
|
100
|
|
|
|
23
|
|
1166
|
1
|
|
|
|
|
4
|
my %activetags; |
1167
|
1
|
|
|
|
|
5
|
foreach ( @active ) { |
1168
|
1
|
|
|
|
|
2
|
my ( undef, undef, $n, $v ) = @$_; |
1169
|
|
|
|
|
|
|
|
1170
|
1
|
|
|
|
|
3
|
$activetags{$n} = $v; |
1171
|
|
|
|
|
|
|
} |
1172
|
|
|
|
|
|
|
|
1173
|
1
|
|
|
|
|
4
|
$callback->( $self->_mkextent( $pos, $pos, 0 ), %activetags ); |
1174
|
|
|
|
|
|
|
} |
1175
|
|
|
|
|
|
|
} |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
=head2 iter_tags_nooverlap |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
$st->iter_tags_nooverlap( $callback, %opts ) |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
Iterate extents of the string using C, but passing |
1182
|
|
|
|
|
|
|
the start and length of each extent to the callback instead of the extent |
1183
|
|
|
|
|
|
|
object. |
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
$callback->( $start, $length, %tags ) |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
Options may be passed in C<%opts> to control the range of the string iterated |
1188
|
|
|
|
|
|
|
over, in the same way as the C method. |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
=cut |
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
sub iter_tags_nooverlap |
1193
|
|
|
|
|
|
|
{ |
1194
|
28
|
|
|
28
|
1
|
8789
|
my $self = shift; |
1195
|
28
|
|
|
|
|
64
|
my ( $callback, %opts ) = @_; |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
$self->iter_extents_nooverlap( |
1198
|
|
|
|
|
|
|
sub { |
1199
|
80
|
|
|
80
|
|
168
|
my ( $e, %tags ) = @_; |
1200
|
80
|
|
|
|
|
201
|
$callback->( $e->start, $e->length, %tags ); |
1201
|
|
|
|
|
|
|
}, |
1202
|
28
|
|
|
|
|
180
|
%opts |
1203
|
|
|
|
|
|
|
); |
1204
|
|
|
|
|
|
|
} |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
=head2 iter_substr_nooverlap |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
$st->iter_substr_nooverlap( $callback, %opts ) |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
Iterate extents of the string using C, but passing the |
1211
|
|
|
|
|
|
|
substring of data instead of the extent object. |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
$callback->( $substr, %tags ) |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
Options may be passed in C<%opts> to control the range of the string iterated |
1216
|
|
|
|
|
|
|
over, in the same way as the C method. |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
=cut |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
sub iter_substr_nooverlap |
1221
|
|
|
|
|
|
|
{ |
1222
|
6
|
|
|
6
|
1
|
6374
|
my $self = shift; |
1223
|
6
|
|
|
|
|
18
|
my ( $callback, %opts ) = @_; |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
$self->iter_extents_nooverlap( |
1226
|
|
|
|
|
|
|
sub { |
1227
|
17
|
|
|
17
|
|
46
|
my ( $e, %tags ) = @_; |
1228
|
17
|
|
|
|
|
52
|
$callback->( $e->plain_substr, %tags ); |
1229
|
|
|
|
|
|
|
}, |
1230
|
6
|
|
|
|
|
42
|
%opts, |
1231
|
|
|
|
|
|
|
); |
1232
|
|
|
|
|
|
|
} |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
=head2 tagnames |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
@names = $st->tagnames |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
Returns the set of tag names used in the string, in no particular order. |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
=cut |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
sub tagnames |
1243
|
|
|
|
|
|
|
{ |
1244
|
14
|
|
|
14
|
1
|
75
|
my $self = shift; |
1245
|
|
|
|
|
|
|
|
1246
|
14
|
|
|
|
|
87
|
my $tags = $self->{tags}; |
1247
|
|
|
|
|
|
|
|
1248
|
14
|
|
|
|
|
26
|
my %tags; |
1249
|
14
|
|
|
|
|
37
|
foreach my $t ( @$tags ) { |
1250
|
15
|
|
|
|
|
38
|
$tags{$t->[2]}++; |
1251
|
|
|
|
|
|
|
} |
1252
|
|
|
|
|
|
|
|
1253
|
14
|
|
|
|
|
110
|
keys %tags; |
1254
|
|
|
|
|
|
|
} |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
=head2 get_tags_at |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
$tags = $st->get_tags_at( $pos ) |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
Returns a HASH reference of all the tag values active at the given position. |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
=cut |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
sub get_tags_at |
1265
|
|
|
|
|
|
|
{ |
1266
|
11
|
|
|
11
|
1
|
1934
|
my $self = shift; |
1267
|
11
|
|
|
|
|
25
|
my ( $pos ) = @_; |
1268
|
|
|
|
|
|
|
|
1269
|
11
|
|
|
|
|
21
|
my $tags = $self->{tags}; |
1270
|
|
|
|
|
|
|
|
1271
|
11
|
|
|
|
|
19
|
my %tags; |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
# TODO: turn this into a binary search |
1274
|
11
|
|
|
|
|
25
|
foreach my $t ( @$tags ) { |
1275
|
14
|
|
|
|
|
39
|
my ( $ts, $te, $tn, $tv ) = @$t; |
1276
|
|
|
|
|
|
|
|
1277
|
14
|
100
|
|
|
|
32
|
last if $ts > $pos; |
1278
|
11
|
100
|
|
|
|
48
|
next if $te <= $pos; |
1279
|
|
|
|
|
|
|
|
1280
|
10
|
|
|
|
|
31
|
$tags{$tn} = $tv; |
1281
|
|
|
|
|
|
|
} |
1282
|
|
|
|
|
|
|
|
1283
|
11
|
|
|
|
|
59
|
return \%tags; |
1284
|
|
|
|
|
|
|
} |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
=head2 get_tag_at |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
$value = $st->get_tag_at( $pos, $name ) |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
Returns the value of the named tag at the given position, or C if the |
1291
|
|
|
|
|
|
|
tag is not applied there. |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
=cut |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
sub get_tag_at |
1296
|
|
|
|
|
|
|
{ |
1297
|
6
|
|
|
6
|
1
|
19
|
my $self = shift; |
1298
|
6
|
|
|
|
|
19
|
my ( $pos, $name ) = @_; |
1299
|
|
|
|
|
|
|
|
1300
|
6
|
|
|
|
|
14
|
my $tags = $self->{tags}; |
1301
|
|
|
|
|
|
|
|
1302
|
6
|
|
|
|
|
10
|
my $value; |
1303
|
|
|
|
|
|
|
|
1304
|
6
|
|
|
|
|
18
|
foreach my $t ( @$tags ) { |
1305
|
15
|
|
|
|
|
35
|
my ( $ts, $te, $tn, $tv ) = @$t; |
1306
|
|
|
|
|
|
|
|
1307
|
15
|
100
|
|
|
|
37
|
last if $ts > $pos; |
1308
|
11
|
100
|
|
|
|
43
|
next if $te <= $pos; |
1309
|
|
|
|
|
|
|
|
1310
|
8
|
100
|
|
|
|
35
|
$value = $tv if $tn eq $name; |
1311
|
|
|
|
|
|
|
} |
1312
|
|
|
|
|
|
|
|
1313
|
6
|
|
|
|
|
22
|
return $value; |
1314
|
|
|
|
|
|
|
} |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
=head2 get_tag_extent |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
$extent = $st->get_tag_extent( $pos, $name ) |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
If the named tag applies to the given position, returns a |
1321
|
|
|
|
|
|
|
L object to represent the extent of the tag at that |
1322
|
|
|
|
|
|
|
position. If it does not, C is returned. If an extent is returned it |
1323
|
|
|
|
|
|
|
will define the C and C flags if appropriate. |
1324
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
=cut |
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
sub get_tag_extent |
1328
|
|
|
|
|
|
|
{ |
1329
|
13
|
|
|
13
|
1
|
79
|
my $self = shift; |
1330
|
13
|
|
|
|
|
65
|
my ( $pos, $name ) = @_; |
1331
|
|
|
|
|
|
|
|
1332
|
13
|
|
|
|
|
34
|
my $tags = $self->{tags}; |
1333
|
|
|
|
|
|
|
|
1334
|
13
|
|
|
|
|
26
|
my ( $start, $end, $flags ); |
1335
|
|
|
|
|
|
|
|
1336
|
13
|
|
|
|
|
30
|
foreach my $t ( @$tags ) { |
1337
|
19
|
|
|
|
|
47
|
my ( $ts, $te, $tn, undef, $tf ) = @$t; |
1338
|
|
|
|
|
|
|
|
1339
|
19
|
100
|
|
|
|
45
|
last if $ts > $pos; |
1340
|
16
|
100
|
|
|
|
51
|
next if $te <= $pos; |
1341
|
|
|
|
|
|
|
|
1342
|
15
|
100
|
|
|
|
45
|
next unless $tn eq $name; |
1343
|
|
|
|
|
|
|
|
1344
|
12
|
|
|
|
|
21
|
$start = $ts; |
1345
|
12
|
|
|
|
|
20
|
$end = $te; |
1346
|
12
|
|
|
|
|
25
|
$flags = $tf; |
1347
|
|
|
|
|
|
|
} |
1348
|
|
|
|
|
|
|
|
1349
|
13
|
100
|
|
|
|
31
|
if( defined $start ) { |
1350
|
12
|
|
|
|
|
62
|
return $self->_mkextent( $start, $end, $flags ); |
1351
|
|
|
|
|
|
|
} |
1352
|
|
|
|
|
|
|
else { |
1353
|
1
|
|
|
|
|
4
|
return undef; |
1354
|
|
|
|
|
|
|
} |
1355
|
|
|
|
|
|
|
} |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
=head2 get_tag_missing_extent |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
$extent = $st->get_tag_missing_extent( $pos, $name ) |
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
If the named tag does not apply at the given position, returns the extent of |
1362
|
|
|
|
|
|
|
the string around that position that does not have the tag. If it does exist, |
1363
|
|
|
|
|
|
|
C is returned. If an extent is returned it will not define the |
1364
|
|
|
|
|
|
|
C and C flags, as these do not make sense for the |
1365
|
|
|
|
|
|
|
range in which a tag is absent. |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
=cut |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
sub get_tag_missing_extent |
1370
|
|
|
|
|
|
|
{ |
1371
|
3
|
|
|
3
|
1
|
454
|
my $self = shift; |
1372
|
3
|
|
|
|
|
7
|
my ( $pos, $name ) = @_; |
1373
|
|
|
|
|
|
|
|
1374
|
3
|
|
|
|
|
5
|
my $tags = $self->{tags}; |
1375
|
|
|
|
|
|
|
|
1376
|
3
|
|
|
|
|
6
|
my $start = 0; |
1377
|
|
|
|
|
|
|
|
1378
|
3
|
|
|
|
|
6
|
foreach my $t ( @$tags ) { |
1379
|
6
|
|
|
|
|
13
|
my ( $ts, $te, $tn ) = @$t; |
1380
|
|
|
|
|
|
|
|
1381
|
6
|
100
|
|
|
|
15
|
next unless $tn eq $name; |
1382
|
|
|
|
|
|
|
|
1383
|
3
|
100
|
100
|
|
|
15
|
if( $ts <= $pos and $te > $pos ) { |
1384
|
1
|
|
|
|
|
4
|
return undef; |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
|
1387
|
2
|
100
|
|
|
|
6
|
if( $ts > $pos ) { |
1388
|
1
|
|
|
|
|
4
|
return $self->_mkextent( $start, $ts, 0 ); |
1389
|
|
|
|
|
|
|
} |
1390
|
|
|
|
|
|
|
|
1391
|
1
|
|
|
|
|
3
|
$start = $te; |
1392
|
|
|
|
|
|
|
} |
1393
|
|
|
|
|
|
|
|
1394
|
1
|
|
|
|
|
3
|
return $self->_mkextent( $start, $self->length, 0 ); |
1395
|
|
|
|
|
|
|
} |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
=head2 set_substr |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
$st->set_substr( $start, $len, $newstr ) |
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
Modifies a extent of the underlying plain string to that given. The extents of |
1402
|
|
|
|
|
|
|
tags in the string are adjusted to cope with the modified region, and the |
1403
|
|
|
|
|
|
|
adjustment in length. |
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
Tags entirely before the replaced extent remain unchanged. |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
Tags entirely within the replaced extent are deleted. |
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
Tags entirely after the replaced extent are moved by appropriate amount to |
1410
|
|
|
|
|
|
|
ensure they still apply to the same characters as before. |
1411
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
Tags that start before and end after the extent remain, and have their lengths |
1413
|
|
|
|
|
|
|
suitably adjusted. |
1414
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
Tags that span just the start or end of the extent, but not both, are |
1416
|
|
|
|
|
|
|
truncated, so as to remove the part of the tag applied on the modified extent |
1417
|
|
|
|
|
|
|
but preserving that applied outside. |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
If C<$newstr> is a C object, then its tags will be applied to |
1420
|
|
|
|
|
|
|
C<$st> as appropriate. Edge-anchored tags in C<$newstr> will not be extended |
1421
|
|
|
|
|
|
|
through C<$st>, though they will apply as edge-anchored if they now sit at the |
1422
|
|
|
|
|
|
|
edge of the new string. |
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
=cut |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
sub set_substr |
1427
|
|
|
|
|
|
|
{ |
1428
|
56
|
|
|
56
|
1
|
14821
|
my $self = shift; |
1429
|
56
|
|
|
|
|
125
|
my ( $start, $len, $new ) = @_; |
1430
|
|
|
|
|
|
|
|
1431
|
56
|
|
|
|
|
134
|
my $limit = $self->length; |
1432
|
|
|
|
|
|
|
|
1433
|
56
|
50
|
|
|
|
182
|
$start = $limit if $start > $limit; |
1434
|
56
|
50
|
|
|
|
163
|
$len = ( $limit - $start ) if $len > ( $limit - $start ); |
1435
|
|
|
|
|
|
|
|
1436
|
56
|
|
|
|
|
208
|
CORE::substr( $self->{str}, $start, $len ) = $new; |
1437
|
|
|
|
|
|
|
|
1438
|
56
|
|
|
|
|
105
|
my $oldend = $start + $len; |
1439
|
56
|
|
|
|
|
107
|
my $newend = $start + CORE::length( $new ); |
1440
|
|
|
|
|
|
|
|
1441
|
56
|
|
|
|
|
88
|
my $delta = $newend - $oldend; |
1442
|
|
|
|
|
|
|
# Positions after $oldend have now moved up $delta places |
1443
|
|
|
|
|
|
|
|
1444
|
56
|
|
|
|
|
93
|
my $tags = $self->{tags}; |
1445
|
|
|
|
|
|
|
|
1446
|
56
|
|
|
|
|
87
|
my $i = 0; |
1447
|
|
|
|
|
|
|
|
1448
|
56
|
|
|
|
|
159
|
for( ; $i < @$tags; $i++ ) { |
1449
|
|
|
|
|
|
|
# In this loop we'll handle tags that start before the deleted section |
1450
|
|
|
|
|
|
|
|
1451
|
40
|
|
|
|
|
68
|
my $t = $tags->[$i]; |
1452
|
40
|
|
|
|
|
132
|
my ( $ts, $te, undef, undef, $tf ) = @$t; |
1453
|
|
|
|
|
|
|
|
1454
|
40
|
100
|
100
|
|
|
202
|
last if $ts >= $start and not( $tf & FLAG_ANCHOR_BEFORE ); |
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
# Two cases: |
1457
|
|
|
|
|
|
|
# A: Tag spans entirely outside deleted section - stretch/compress it |
1458
|
|
|
|
|
|
|
# We may have to collapse it to nothing, so delete it |
1459
|
|
|
|
|
|
|
# B: Tag starts before but ends within deleted section - truncate it |
1460
|
|
|
|
|
|
|
# Plus a case we don't care about |
1461
|
|
|
|
|
|
|
# Tag starts and ends entirely before the deleted section - ignore it |
1462
|
|
|
|
|
|
|
|
1463
|
28
|
100
|
100
|
|
|
219
|
if( $te > $oldend or |
|
|
100
|
100
|
|
|
|
|
1464
|
|
|
|
|
|
|
( $te == $oldend and $tf & FLAG_ANCHOR_AFTER ) ) { |
1465
|
|
|
|
|
|
|
# Case A |
1466
|
14
|
|
|
|
|
54
|
$t->[1] += $delta; |
1467
|
|
|
|
|
|
|
|
1468
|
14
|
50
|
|
|
|
65
|
if( $t->[0] == $t->[1] ) { |
1469
|
0
|
|
|
|
|
0
|
splice @$tags, $i, 1, (); |
1470
|
0
|
|
|
|
|
0
|
$i--; |
1471
|
0
|
|
|
|
|
0
|
next; |
1472
|
|
|
|
|
|
|
} |
1473
|
|
|
|
|
|
|
} |
1474
|
|
|
|
|
|
|
elsif( $te > $start ) { |
1475
|
|
|
|
|
|
|
# Case B |
1476
|
1
|
|
|
|
|
3
|
$t->[1] = $start; |
1477
|
|
|
|
|
|
|
} |
1478
|
|
|
|
|
|
|
} |
1479
|
|
|
|
|
|
|
|
1480
|
56
|
|
|
|
|
176
|
for( ; $i < @$tags; $i++ ) { |
1481
|
13
|
|
|
|
|
63
|
my $t = $tags->[$i]; |
1482
|
13
|
|
|
|
|
39
|
my ( $ts, $te ) = @$t; |
1483
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
# In this loop we'll handle tags that start within the deleted section |
1485
|
13
|
100
|
|
|
|
52
|
last if $ts >= $oldend; |
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
# Two cases |
1488
|
|
|
|
|
|
|
# C: Tag contained entirely within deleted section - delete it |
1489
|
|
|
|
|
|
|
# D: Tag starts within but ends after the deleted section - truncate it |
1490
|
|
|
|
|
|
|
|
1491
|
3
|
100
|
|
|
|
16
|
if( $te <= $oldend ) { |
1492
|
|
|
|
|
|
|
# Case C |
1493
|
2
|
|
|
|
|
7
|
splice @$tags, $i, 1; |
1494
|
2
|
|
|
|
|
3
|
$i--; |
1495
|
2
|
|
|
|
|
7
|
next; |
1496
|
|
|
|
|
|
|
} |
1497
|
|
|
|
|
|
|
else { |
1498
|
|
|
|
|
|
|
# Case D |
1499
|
1
|
|
|
|
|
3
|
$t->[0] = $newend; |
1500
|
1
|
|
|
|
|
3
|
$t->[1] += $delta; |
1501
|
|
|
|
|
|
|
} |
1502
|
|
|
|
|
|
|
} |
1503
|
|
|
|
|
|
|
|
1504
|
56
|
|
|
|
|
139
|
for( ; $i < @$tags; $i++ ) { |
1505
|
12
|
|
|
|
|
25
|
my $t = $tags->[$i]; |
1506
|
12
|
|
|
|
|
55
|
my ( $ts, $te, undef, undef, $tf ) = @$t; |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
# In this loop we'll handle tags that start after the deleted section |
1509
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
# One case |
1511
|
|
|
|
|
|
|
# E: Tag starts and ends after the deleted section - move it |
1512
|
12
|
100
|
|
|
|
62
|
$t->[0] += $delta unless $tf & FLAG_ANCHOR_BEFORE; |
1513
|
12
|
|
|
|
|
47
|
$t->[1] += $delta; |
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
# If we've not moved the start (because it was FLAG_ANCHOR_BEFORE), we |
1516
|
|
|
|
|
|
|
# might now have an ordering constraint violation. Better fix it. |
1517
|
12
|
|
|
|
|
47
|
local $b = $t; |
1518
|
12
|
|
|
|
|
51
|
foreach my $new_i ( reverse 0 .. $i-1 ) { |
1519
|
7
|
|
|
|
|
14
|
local $a = $tags->[$new_i]; |
1520
|
|
|
|
|
|
|
|
1521
|
7
|
100
|
|
|
|
27
|
last if _cmp_tags() <= 0; |
1522
|
|
|
|
|
|
|
|
1523
|
1
|
|
|
|
|
6
|
splice @$tags, $new_i, 0, splice @$tags, $i, 1, (); |
1524
|
|
|
|
|
|
|
|
1525
|
1
|
|
|
|
|
3
|
last; |
1526
|
|
|
|
|
|
|
} |
1527
|
|
|
|
|
|
|
} |
1528
|
|
|
|
|
|
|
|
1529
|
56
|
100
|
|
|
|
115
|
if( is_string_tagged( $new ) ) { |
1530
|
21
|
|
|
|
|
67
|
my $atstart = $start == 0; |
1531
|
21
|
|
|
|
|
56
|
my $atend = $newend == $self->length; |
1532
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
$new->iter_extents( sub { |
1534
|
21
|
|
|
21
|
|
50
|
my ( $e, $tn, $tv ) = @_; |
1535
|
21
|
50
|
66
|
|
|
126
|
$self->apply_tag( |
|
|
100
|
100
|
|
|
|
|
1536
|
|
|
|
|
|
|
( $atstart && $e->anchor_before ) ? -1 : $start + $e->start, |
1537
|
|
|
|
|
|
|
( $atend && $e->anchor_after ) ? -1 : $e->length, |
1538
|
|
|
|
|
|
|
$tn, $tv ); |
1539
|
21
|
|
|
|
|
253
|
} ); |
1540
|
|
|
|
|
|
|
} |
1541
|
|
|
|
|
|
|
|
1542
|
56
|
|
|
|
|
163
|
$self->_assert_sorted if DEBUG; |
1543
|
|
|
|
|
|
|
|
1544
|
56
|
|
|
|
|
222
|
return $self; |
1545
|
|
|
|
|
|
|
} |
1546
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
=head2 insert |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
$st->insert( $start, $newstr ) |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
Insert the given string at the given position. A shortcut around |
1552
|
|
|
|
|
|
|
C. |
1553
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
If C<$newstr> is a C object, then its tags will be applied to |
1555
|
|
|
|
|
|
|
C<$st> as appropriate. If C<$start> is 0, any before-anchored tags in will |
1556
|
|
|
|
|
|
|
become before-anchored in C<$st>. |
1557
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
=cut |
1559
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
sub insert |
1561
|
|
|
|
|
|
|
{ |
1562
|
15
|
|
|
15
|
1
|
7863
|
my $self = shift; |
1563
|
15
|
|
|
|
|
47
|
my ( $at, $new ) = @_; |
1564
|
15
|
|
|
|
|
78
|
$self->set_substr( $at, 0, $new ); |
1565
|
|
|
|
|
|
|
} |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
=head2 append |
1568
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
$st->append( $newstr ) |
1570
|
|
|
|
|
|
|
|
1571
|
|
|
|
|
|
|
$st .= $newstr |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
Append to the underlying plain string. A shortcut around C. |
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
If C<$newstr> is a C object, then its tags will be applied to |
1576
|
|
|
|
|
|
|
C<$st> as appropriate. Any after-anchored tags in will become after-anchored |
1577
|
|
|
|
|
|
|
in C<$st>. |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
=cut |
1580
|
|
|
|
|
|
|
|
1581
|
20
|
|
|
20
|
|
81524
|
use overload '.=' => 'append'; |
|
20
|
|
|
|
|
51
|
|
|
20
|
|
|
|
|
139
|
|
1582
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
sub append |
1584
|
|
|
|
|
|
|
{ |
1585
|
66
|
|
|
66
|
1
|
1465
|
my $self = shift; |
1586
|
66
|
|
|
|
|
143
|
my ( $new ) = @_; |
1587
|
|
|
|
|
|
|
|
1588
|
66
|
100
|
|
|
|
131
|
return $self->set_substr( $self->length, 0, $new ) if is_string_tagged( $new ); |
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
# Optimised version |
1591
|
48
|
|
|
|
|
146
|
$self->{str} .= $new; |
1592
|
|
|
|
|
|
|
|
1593
|
48
|
|
|
|
|
119
|
my $newend = $self->length; |
1594
|
|
|
|
|
|
|
|
1595
|
48
|
|
|
|
|
83
|
my $tags = $self->{tags}; |
1596
|
|
|
|
|
|
|
|
1597
|
48
|
|
|
|
|
88
|
my $i = 0; |
1598
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
# Adjust boundaries of ANCHOR_AFTER tags |
1600
|
48
|
|
|
|
|
123
|
for( ; $i < @$tags; $i++ ) { |
1601
|
58
|
|
|
|
|
98
|
my $t = $tags->[$i]; |
1602
|
58
|
100
|
|
|
|
177
|
$t->[1] = $newend if $t->[4] & FLAG_ANCHOR_AFTER; |
1603
|
|
|
|
|
|
|
} |
1604
|
|
|
|
|
|
|
|
1605
|
48
|
|
|
|
|
150
|
return $self; |
1606
|
|
|
|
|
|
|
} |
1607
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
=head2 append_tagged |
1609
|
|
|
|
|
|
|
|
1610
|
|
|
|
|
|
|
$st->append_tagged( $newstr, %tags ) |
1611
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
Append to the underlying plain string, and apply the given tags to the |
1613
|
|
|
|
|
|
|
newly-inserted extent. |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
Returns C<$st> itself so that the method may be easily chained. |
1616
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
=cut |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
sub append_tagged |
1620
|
|
|
|
|
|
|
{ |
1621
|
19
|
|
|
19
|
1
|
52
|
my $self = shift; |
1622
|
19
|
|
|
|
|
59
|
my ( $new, %tags ) = @_; |
1623
|
|
|
|
|
|
|
|
1624
|
19
|
|
|
|
|
57
|
my $start = $self->length; |
1625
|
19
|
|
|
|
|
79
|
my $len = CORE::length( $new ); |
1626
|
|
|
|
|
|
|
|
1627
|
19
|
|
|
|
|
59
|
$self->append( $new ); |
1628
|
19
|
|
|
|
|
98
|
$self->apply_tag( $start, $len, $_, $tags{$_} ) for keys %tags; |
1629
|
|
|
|
|
|
|
|
1630
|
19
|
|
|
|
|
96
|
return $self; |
1631
|
|
|
|
|
|
|
} |
1632
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
=head2 concat |
1634
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
$ret = $st->concat( $other ) |
1636
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
$ret = $st . $other |
1638
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
Returns a new C containing the two strings concatenated |
1640
|
|
|
|
|
|
|
together, preserving any tags present. This method overloads normal string |
1641
|
|
|
|
|
|
|
concatenation operator, so expressions involving C values |
1642
|
|
|
|
|
|
|
retain their tags. |
1643
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
This method or operator tries to respect subclassing; preferring to return a |
1645
|
|
|
|
|
|
|
new object of a subclass if either argument or operand is a subclass of |
1646
|
|
|
|
|
|
|
C. If they are both subclasses, it will prefer the type of the |
1647
|
|
|
|
|
|
|
invocant or first operand. |
1648
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
=cut |
1650
|
|
|
|
|
|
|
|
1651
|
20
|
|
|
20
|
|
6292
|
use overload '.' => 'concat'; |
|
20
|
|
|
|
|
93
|
|
|
20
|
|
|
|
|
127
|
|
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
sub concat |
1654
|
|
|
|
|
|
|
{ |
1655
|
17
|
|
|
17
|
1
|
8889
|
my $self = shift; |
1656
|
17
|
|
|
|
|
54
|
my ( $other, $swap ) = @_; |
1657
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
# Try to find the "higher" subclass |
1659
|
17
|
100
|
100
|
|
|
117
|
my $class = ( ref $self eq __PACKAGE__ and is_string_tagged( $other ) ) |
1660
|
|
|
|
|
|
|
? ref $other : ref $self; |
1661
|
|
|
|
|
|
|
|
1662
|
17
|
|
|
|
|
87
|
my $ret = $class->new( $self ); |
1663
|
17
|
100
|
|
|
|
118
|
return $ret->insert( 0, $other ) if $swap; |
1664
|
6
|
|
|
|
|
24
|
return $ret->append( $other ); |
1665
|
|
|
|
|
|
|
} |
1666
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
=head2 matches |
1668
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
@subs = $st->matches( $regexp ) |
1670
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
Returns a list of substrings (as C instances) for every |
1672
|
|
|
|
|
|
|
non-overlapping match of the given C<$regexp>. |
1673
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
This could be used, for example, to build a formatted string from a formatted |
1675
|
|
|
|
|
|
|
template containing variable expansions: |
1676
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
my $template = ... |
1678
|
|
|
|
|
|
|
my %vars = ... |
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
my $ret = String::Tagged->new; |
1681
|
|
|
|
|
|
|
foreach my $m ( $template->matches( qr/\$\w+|[^$]+/ ) ) { |
1682
|
|
|
|
|
|
|
if( $m =~ m/^\$(\w+)$/ ) { |
1683
|
|
|
|
|
|
|
$ret->append_tagged( $vars{$1}, %{ $m->get_tags_at( 0 ) } ); |
1684
|
|
|
|
|
|
|
} |
1685
|
|
|
|
|
|
|
else { |
1686
|
|
|
|
|
|
|
$ret->append( $m ); |
1687
|
|
|
|
|
|
|
} |
1688
|
|
|
|
|
|
|
} |
1689
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
This iterates segments of the template containing variables expansions |
1691
|
|
|
|
|
|
|
starting with a C<$> symbol, and replaces them with values from the C<%vars> |
1692
|
|
|
|
|
|
|
hash, careful to preserve all the formatting tags from the original template |
1693
|
|
|
|
|
|
|
string. |
1694
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
=cut |
1696
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
sub matches |
1698
|
|
|
|
|
|
|
{ |
1699
|
1
|
|
|
1
|
1
|
9
|
my $self = shift; |
1700
|
1
|
|
|
|
|
3
|
my ( $re ) = @_; |
1701
|
|
|
|
|
|
|
|
1702
|
1
|
|
|
|
|
3
|
my $plain = $self->str; |
1703
|
|
|
|
|
|
|
|
1704
|
1
|
|
|
|
|
2
|
my @ret; |
1705
|
1
|
|
|
|
|
9
|
while( $plain =~ m/$re/g ) { |
1706
|
5
|
|
|
|
|
24
|
push @ret, $self->substr( $-[0], $+[0] - $-[0] ); |
1707
|
|
|
|
|
|
|
} |
1708
|
|
|
|
|
|
|
|
1709
|
1
|
|
|
|
|
6
|
return @ret; |
1710
|
|
|
|
|
|
|
} |
1711
|
|
|
|
|
|
|
|
1712
|
|
|
|
|
|
|
=head2 match_extents |
1713
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
@extents = $st->match_extents( $regexp ) |
1715
|
|
|
|
|
|
|
|
1716
|
|
|
|
|
|
|
I |
1717
|
|
|
|
|
|
|
|
1718
|
|
|
|
|
|
|
Returns a list of extent objects for every non-overlapping match of the given |
1719
|
|
|
|
|
|
|
C<$regexp>. This is similar to L, except that the results are |
1720
|
|
|
|
|
|
|
returned as extent objects instead of substrings, allowing access to the |
1721
|
|
|
|
|
|
|
position information as well. |
1722
|
|
|
|
|
|
|
|
1723
|
|
|
|
|
|
|
If using the result of this method to find regions of a string to modify, |
1724
|
|
|
|
|
|
|
remember that any length alterations will not update positions in later extent |
1725
|
|
|
|
|
|
|
objects. However, since the extents are non-overlapping and in position order, |
1726
|
|
|
|
|
|
|
this can be handled by iterating them in reverse order so that the |
1727
|
|
|
|
|
|
|
modifications done first are later in the string. |
1728
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
foreach my $e ( reverse $st->match_extents( $pattern ) ) { |
1730
|
|
|
|
|
|
|
$st->set_substr( $e->start, $e->length, $replacement ); |
1731
|
|
|
|
|
|
|
} |
1732
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
=cut |
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
sub match_extents |
1736
|
|
|
|
|
|
|
{ |
1737
|
2
|
|
|
2
|
1
|
9287
|
my $self = shift; |
1738
|
2
|
|
|
|
|
5
|
my ( $re ) = @_; |
1739
|
|
|
|
|
|
|
|
1740
|
2
|
|
|
|
|
5
|
my $plain = $self->str; |
1741
|
|
|
|
|
|
|
|
1742
|
2
|
|
|
|
|
4
|
my @ret; |
1743
|
2
|
|
|
|
|
29
|
while( $plain =~ m/$re/g ) { |
1744
|
9
|
|
|
|
|
31
|
push @ret, $self->_mkextent( $-[0], $+[0], 0 ); |
1745
|
|
|
|
|
|
|
} |
1746
|
|
|
|
|
|
|
|
1747
|
2
|
|
|
|
|
9
|
return @ret; |
1748
|
|
|
|
|
|
|
} |
1749
|
|
|
|
|
|
|
|
1750
|
|
|
|
|
|
|
=head2 split |
1751
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
@parts = $st->split( $regexp, $limit ) |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
Returns a list of substrings by applying the regexp to the string content; |
1755
|
|
|
|
|
|
|
similar to the core perl C function. If C<$limit> is supplied, the |
1756
|
|
|
|
|
|
|
method will stop at that number of elements, returning the entire remainder of |
1757
|
|
|
|
|
|
|
the input string as the final element. If the C<$regexp> contains a capture |
1758
|
|
|
|
|
|
|
group then the content of the first one will be added to the return list as |
1759
|
|
|
|
|
|
|
well. |
1760
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
=cut |
1762
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
sub split |
1764
|
|
|
|
|
|
|
{ |
1765
|
4
|
|
|
4
|
1
|
34
|
my $self = shift; |
1766
|
4
|
|
|
|
|
8
|
my ( $re, $limit ) = @_; |
1767
|
|
|
|
|
|
|
|
1768
|
4
|
|
|
|
|
7
|
my $plain = $self->str; |
1769
|
|
|
|
|
|
|
|
1770
|
4
|
|
|
|
|
7
|
my $prev = 0; |
1771
|
4
|
|
|
|
|
9
|
my @ret; |
1772
|
4
|
|
|
|
|
43
|
while( $plain =~ m/$re/g ) { |
1773
|
5
|
|
|
|
|
29
|
push @ret, $self->substr( $prev, $-[0]-$prev ); |
1774
|
5
|
100
|
|
|
|
28
|
push @ret, $self->substr( $-[1], $+[1]-$-[1] ) if @- > 1; |
1775
|
|
|
|
|
|
|
|
1776
|
5
|
|
|
|
|
13
|
$prev = $+[0]; |
1777
|
|
|
|
|
|
|
|
1778
|
5
|
100
|
66
|
|
|
32
|
last if defined $limit and @ret == $limit-1; |
1779
|
|
|
|
|
|
|
} |
1780
|
|
|
|
|
|
|
|
1781
|
4
|
100
|
|
|
|
13
|
if( CORE::length $plain > $prev ) { |
1782
|
3
|
|
|
|
|
8
|
push @ret, $self->substr( $prev, CORE::length( $plain ) - $prev ); |
1783
|
|
|
|
|
|
|
} |
1784
|
|
|
|
|
|
|
|
1785
|
4
|
|
|
|
|
17
|
return @ret; |
1786
|
|
|
|
|
|
|
} |
1787
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
=head2 sprintf |
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
$ret = $st->sprintf( @args ) |
1791
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
I |
1793
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
Returns a new string by using the given instance as the format string for a |
1795
|
|
|
|
|
|
|
L constructor call. The returned instance will be of the same |
1796
|
|
|
|
|
|
|
class as the invocant. |
1797
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
=cut |
1799
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
sub sprintf |
1801
|
|
|
|
|
|
|
{ |
1802
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
1803
|
|
|
|
|
|
|
|
1804
|
1
|
|
|
|
|
4
|
return ( ref $self )->from_sprintf( $self, @_ ); |
1805
|
|
|
|
|
|
|
} |
1806
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
=head2 debug_sprintf |
1808
|
|
|
|
|
|
|
|
1809
|
|
|
|
|
|
|
$ret = $st->debug_sprintf |
1810
|
|
|
|
|
|
|
|
1811
|
|
|
|
|
|
|
Returns a representation of the string data and all the tags, suitable for |
1812
|
|
|
|
|
|
|
debug printing or other similar use. This is a format such as is given in the |
1813
|
|
|
|
|
|
|
DESCRIPTION section above. |
1814
|
|
|
|
|
|
|
|
1815
|
|
|
|
|
|
|
The output will consist of a number of lines, the first containing the plain |
1816
|
|
|
|
|
|
|
underlying string, then one line per tag. The line shows the extent of the tag |
1817
|
|
|
|
|
|
|
given by C<[---]> markers, or a C<|> in the special case of a tag covering |
1818
|
|
|
|
|
|
|
only a single character. Special markings of C> and C> indicate |
1819
|
|
|
|
|
|
|
tags which are "before" or "after" anchored. |
1820
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
For example: |
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
Hello, world |
1824
|
|
|
|
|
|
|
[---] word => 1 |
1825
|
|
|
|
|
|
|
<[----------]> everywhere => 1 |
1826
|
|
|
|
|
|
|
| space => 1 |
1827
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
=cut |
1829
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
sub debug_sprintf |
1831
|
|
|
|
|
|
|
{ |
1832
|
5
|
|
|
5
|
1
|
777
|
my $self = shift; |
1833
|
|
|
|
|
|
|
|
1834
|
5
|
|
|
|
|
18
|
my $str = $self->str; |
1835
|
5
|
|
|
|
|
11
|
my $len = CORE::length( $str ); |
1836
|
|
|
|
|
|
|
|
1837
|
5
|
|
|
|
|
9
|
my $maxnamelen = 0; |
1838
|
|
|
|
|
|
|
|
1839
|
5
|
|
|
|
|
23
|
my $ret = " " . ( $str =~ s/\n/./gr ) . "\n"; |
1840
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
$self->iter_tags( sub { |
1842
|
7
|
|
|
7
|
|
20
|
my ( undef, undef, $name, undef ) = @_; |
1843
|
7
|
100
|
|
|
|
24
|
CORE::length( $name ) > $maxnamelen and $maxnamelen = CORE::length( $name ); |
1844
|
5
|
|
|
|
|
32
|
} ); |
1845
|
|
|
|
|
|
|
|
1846
|
5
|
|
|
|
|
29
|
foreach my $t ( @{ $self->{tags} } ) { |
|
5
|
|
|
|
|
20
|
|
1847
|
7
|
|
|
|
|
30
|
my ( $ts, $te, $tn, $tv, $tf ) = @$t; |
1848
|
|
|
|
|
|
|
|
1849
|
7
|
100
|
|
|
|
26
|
$ret .= ( $tf & FLAG_ANCHOR_BEFORE ) ? " <" : " "; |
1850
|
|
|
|
|
|
|
|
1851
|
7
|
|
|
|
|
17
|
$ret .= " " x $ts; |
1852
|
|
|
|
|
|
|
|
1853
|
7
|
|
|
|
|
10
|
my $tl = $te - $ts; |
1854
|
|
|
|
|
|
|
|
1855
|
7
|
100
|
|
|
|
22
|
if( $tl == 0 ) { |
|
|
100
|
|
|
|
|
|
1856
|
1
|
|
|
|
|
9
|
$ret =~ s/ $/>; |
1857
|
1
|
|
|
|
|
2
|
$te++; # account for extra printed width |
1858
|
|
|
|
|
|
|
} |
1859
|
|
|
|
|
|
|
elsif( $tl == 1 ) { |
1860
|
2
|
|
|
|
|
4
|
$ret .= "|"; |
1861
|
|
|
|
|
|
|
} |
1862
|
|
|
|
|
|
|
else { |
1863
|
4
|
|
|
|
|
11
|
$ret .= "[" . ( "-" x ( $tl - 2 ) ) . "]"; |
1864
|
|
|
|
|
|
|
} |
1865
|
|
|
|
|
|
|
|
1866
|
7
|
|
|
|
|
14
|
$ret .= " " x ( $len - $te ); |
1867
|
|
|
|
|
|
|
|
1868
|
7
|
100
|
|
|
|
16
|
$ret .= ( $tf & FLAG_ANCHOR_AFTER ) ? "> " : " "; |
1869
|
|
|
|
|
|
|
|
1870
|
7
|
|
|
|
|
34
|
$ret .= CORE::sprintf "%-*s => %s\n", $maxnamelen, $tn, $tv; |
1871
|
|
|
|
|
|
|
} |
1872
|
|
|
|
|
|
|
|
1873
|
5
|
|
|
|
|
26
|
return $ret; |
1874
|
|
|
|
|
|
|
} |
1875
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
=head1 TODO |
1877
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
=over 4 |
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
=item * |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
There are likely variations on the rules for C that could equally |
1883
|
|
|
|
|
|
|
apply to some uses of tagged strings. Consider whether the behaviour of |
1884
|
|
|
|
|
|
|
modification is chosen per-method, per-tag, or per-string. |
1885
|
|
|
|
|
|
|
|
1886
|
|
|
|
|
|
|
=item * |
1887
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
Consider how to implement a clone from one tag format to another which wants |
1889
|
|
|
|
|
|
|
to merge multiple different source tags together into a single new one. |
1890
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
=back |
1892
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
=head1 AUTHOR |
1894
|
|
|
|
|
|
|
|
1895
|
|
|
|
|
|
|
Paul Evans |
1896
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
=cut |
1898
|
|
|
|
|
|
|
|
1899
|
|
|
|
|
|
|
0x55AA; |