File Coverage

blib/lib/Text/Fragment.pm
Criterion Covered Total %
statement 182 187 97.3
branch 85 98 86.7
condition 17 22 77.2
subroutine 21 21 100.0
pod 5 5 100.0
total 310 333 93.0


line stmt bran cond sub pod time code
1             package Text::Fragment;
2              
3 6     6   506069 use 5.010001;
  6         23  
4 6     6   64 use strict;
  6         10  
  6         180  
5 6     6   42 use warnings;
  6         12  
  6         351  
6 6     6   12829 use Log::ger;
  6         411  
  6         37  
7              
8 6     6   4905 use Data::Clone;
  6         8802  
  6         467  
9 6     6   47 use Exporter qw(import);
  6         9  
  6         27556  
10              
11             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
12             our $DATE = '2025-10-15'; # DATE
13             our $DIST = 'Text-Fragment'; # DIST
14             our $VERSION = '0.111'; # VERSION
15              
16             our @EXPORT_OK = qw(
17             list_fragments
18             get_fragment
19             set_fragment_attrs
20             insert_fragment
21             delete_fragment
22             );
23              
24             our $re_id = qr/\A[A-Za-z0-9_.,:-]+\z/;
25              
26             our %SPEC;
27              
28             sub _format_quoted {
29 3     3   7 my $unquoted = shift;
30 3         8 my $res = "";
31 3         8 my $i = -1;
32 3         40 while (++$i < length($unquoted)) {
33 6         15 my $c = substr($unquoted, $i, 1);
34 6 50 33     46 if ($c eq '\\' or $c eq '"') {
    50          
35 0         0 $res .= "\\$c";
36             } elsif ($c !~ /[\x20-\x7F]/) {
37             # strip non-printables
38             } else {
39 6         21 $res .= $c;
40             }
41             }
42 3         17 qq("$res");
43             }
44              
45             sub _parse_quoted {
46 1     1   3 my $quoted = shift;
47 1         5 $quoted =~ s/\A"//; $quoted =~ s/"\z//;
  1         5  
48 1         2 my $res = "";
49 1         3 my $i = -1;
50 1         5 while (++$i < length($quoted)) {
51 2         6 my $c = substr($quoted, $i, 1);
52 2 50       7 if ($c eq '\\') {
53 0         0 $res .= substr($quoted, ++$i, 1);
54             } else {
55 2         6 $res .= $c;
56             }
57             }
58 1         8 $res;
59             }
60              
61             sub _format_attr_value {
62 8     8   18 my $val = shift;
63 8 100       56 $val =~ /\s|"|[^\x20-\x7f]/ ? _format_quoted($val) : $val;
64             }
65              
66             sub _label {
67 33     33   162 my %args = @_;
68 33   100     127 my $id = $args{id} // "";
69 33         72 my $label = $args{label}; # str
70 33         81 my $comment_style = $args{comment_style};
71 33         62 my $attrs = $args{attrs};
72              
73 33         166 my $quoted_re = qr/"(?:[^\n\r"\\]|\\[^\n\r])*"/;
74              
75 33         73 my $a_re; # regex to match attributes
76             my $ai_re; # also match attributes, but attribute id must be present
77 33 100       116 if (length $id) {
78 27         2991 $ai_re = qr/(?:\w+=\S*[ \t]+)*id=(?\Q$id\E)(?:[ \t]+\w+=(?:$quoted_re|\S+))*/;
79             } else {
80 6         353 $ai_re = qr/(?:\w+=\S*[ \t]+)*id=(?\S*)(?:[ \t]+\w+=(?:$quoted_re|\S+))*/;
81             }
82 33         4324 $a_re = qr/(?:\w+=\S*)?(?:[ \t]+\w+=(?:$quoted_re|\S+))*/;
83              
84 33         143 my ($ts, $te); # tag start and end
85 33 100       149 if ($comment_style eq 'shell') {
    100          
    100          
    100          
    50          
86 22         41 $ts = "#";
87 22         46 $te = "";
88             } elsif ($comment_style eq 'c') {
89 3         9 $ts = "/*";
90 3         9 $te = "*/";
91             } elsif ($comment_style eq 'cpp') {
92 3         9 $ts = "//";
93 3         10 $te = "";
94             } elsif ($comment_style eq 'html') {
95 2         7 $ts = "";
97             } elsif ($comment_style eq 'ini') {
98 3         9 $ts = ";";
99 3         8 $te = "";
100             }
101             # regex to detect fragment
102 33         4607 my $ore = qr!^(?.*?)[ \t]*\Q$ts\E[ \t]*
103             \Q$label\E[ \t]+
104             (?$ai_re)[ \t]*
105             \Q$te\E[ \t]*(?\R|\z)!mx;
106              
107 33         8538 my $mre = qr!^\Q$ts\E[ \t]*
108             BEGIN[ \t]+\Q$label\E[ \t]+
109             (?$ai_re)[ \t]*
110             \Q$te\E[ \t]*(?\R)
111             (?:
112             (?.*)
113             ^\Q$ts\E[ \t]*END[ \t]+\Q$label\E[ \t]+
114             (?:\w+=\S*[ \t]+)*id=\g{id}(?:[ \t]+\w+=\S+)*
115             [ \t]*\Q$te\E |
116             (?.*?) # without any ID at the ending comment
117             ^\Q$ts\E[ \t]*END[ \t]+\Q$label\E(?:[ \t]+$a_re)?[ \t]*
118             \Q$te\E
119             )
120             [ \t]*(?\R|\z)!msx;
121              
122             my $parse_attrs = sub {
123 18   100 18   143 my $s = shift // "";
124 18         32 my %a;
125 18         723 while ($s =~ /(\w+)=(?:($quoted_re)|(\S+))(?:\s+|\z)/g) {
126 28 100       268 $a{$1} = $2 ? _parse_quoted($2) : $3;
127             }
128 18         375 \%a;
129 33         339 };
130              
131             return {
132             one_line_pattern => $ore,
133             multi_line_pattern => $mre,
134             parse_attrs => $parse_attrs,
135             format_fragment => sub {
136 12     12   63 my %f = @_;
137              
138             # formatted attrs as string
139 12         25 my $as = "";
140 12 100       62 if (ref($f{attrs})) {
141 10         46 for (sort keys %{ $f{attrs} }) {
  10         49  
142 6         29 $as .= " " . "$_="._format_attr_value($f{attrs}{$_});
143             }
144             } else {
145 2         4 my $a = $parse_attrs->($f{attrs});
146 2         6 $as = join("", map {" $_="._format_attr_value($a->{$_})}
147 2         9 grep {$_ ne 'id'}
  4         11  
148             sort keys %$a);
149             }
150              
151 12         34 my $pl = $f{payload};
152              
153             # to keep things simple here, regardless of whether the replaced
154             # pattern contains ending newline (enl), we still format with ending
155             # newline. then we'll just need to strip ending newline if it's not
156             # needed.
157              
158 12 100 66     78 if ($f{is_multi} || $pl =~ /\R/) {
159 2 100       15 $pl .= "\n" unless $pl =~ /\R\z/;
160 2 50       31 "$ts BEGIN $label id=$id$as" . ($te ? " $te":"") . "\n" .
    50          
161             $pl .
162             "$ts END $label id=$id" . ($te ? " $te":"") . "\n";
163             } else {
164 10 100       136 "$pl $ts $label id=$id$as" . ($te ? " $te":"") . "\n";
165             }
166             },
167 33         594 };
168             }
169              
170             sub _doit {
171 35     35   199 my ($which, %args) = @_;
172              
173 35 50       972 die "BUG: invalid which"
174             unless $which =~ /\A(?:list|get|insert|delete|set_attrs)\z/;
175 35         87 my ($label_str, $label_sub);
176 35 50       2363 if (ref($args{label}) eq 'CODE') {
177 0         0 $label_str = "FRAGMENT";
178 0         0 $label_sub = $args{label};
179             } else {
180 35   100     186 $label_str = $args{label} || "FRAGMENT";
181 35         100 $label_sub = \&_label;
182             }
183              
184 35         103 my $text = $args{text};
185 35 50       97 defined($text) or return [400, "Please specify text"];
186 35         76 my $id = $args{id};
187 35 100       208 if ($which =~ /\A(?:get|insert|set_attrs|delete)\z/) {
188 29 50       96 defined($id) or return [400, "Please specify id"];
189             }
190 35 100       109 if (defined $id) {
191 29 100       334 $id =~ $re_id or return [400, "Invalid ID syntax '$id', please use ".
192             "letters/numbers/dots/dashes only"];
193             }
194 34   100     174 my $attrs = $args{attrs} // {};
195 34         145 for (keys %$attrs) {
196 12 100       61 /\A\w+\z/ or return [400, "Invalid attribute name '$_', please use ".
197             "letters/numbers only"];
198 11 100       34 if (!defined($attrs->{$_})) {
199 2 50       7 if ($which eq 'set_attrs') {
200 2         6 next;
201             } else {
202 0         0 return [400, "Undefined value for attribute name '$_'"];
203             }
204             }
205             }
206              
207 33         67 my $good_pattern = $args{good_pattern};
208 33         62 my $replace_pattern = $args{replace_pattern};
209 33         63 my $top_style = $args{top_style};
210 33   100     158 my $comment_style = $args{comment_style} // "shell";
211 33 50       183 $comment_style =~ /\A(cpp|c|shell|html|ini)\z/ or return [
212             400, "Invalid comment_style '$comment_style', ".
213             "please use cpp/c/shell/html/ini"];
214 33         150 my $res = $label_sub->(id=>$id, label=>$label_str,
215             comment_style=>$comment_style);
216 33         95 my $one_line_pattern = $res->{one_line_pattern};
217 33         70 my $multi_line_pattern = $res->{multi_line_pattern};
218 33         75 my $parse_attrs = $res->{parse_attrs};
219 33         63 my $format_fragment = $res->{format_fragment};
220 33         67 my $payload = $args{payload};
221 33 100       122 if ($which eq 'insert') {
222 12 50       33 defined($payload) or return [400, "Please specify payload"];
223             }
224              
225 33 100       157 if ($which eq 'list') {
    100          
    100          
    100          
226              
227 6         11 my @ff;
228 6         4313 while ($text =~ /($one_line_pattern|$multi_line_pattern)/xg) {
229             push @ff, {
230             raw => $1,
231             id => $+{id},
232             payload => $+{payload},
233 12         185 attrs => $parse_attrs->($+{attrs}),
234             };
235             }
236 6         315 return [200, "OK", \@ff];
237              
238             } elsif ($which eq 'get') {
239              
240 3 100       1186 if ($text =~ /($one_line_pattern|$multi_line_pattern)/x) {
241             return [200, "OK", {
242             raw => $1,
243             id => $+{id},
244             payload => $+{payload},
245 2         35 attrs => $parse_attrs->($+{attrs}),
246             }];
247             } else {
248 1         47 return [404, "Fragment with ID '$id' not found"];
249             }
250              
251             } elsif ($which eq 'set_attrs') {
252              
253 3         5 my $orig_attrs;
254             my $sub = sub {
255 2     2   29 my %f = @_;
256 2         10 $orig_attrs = $parse_attrs->($f{attrs});
257 2         13 my %a = %$orig_attrs; delete $a{id};
  2         5  
258 2         8 for my $k (keys %$attrs) {
259 6         14 my $v = $attrs->{$k};
260 6 100       15 if (defined $v) {
261 4         9 $a{$k} = $v;
262             } else {
263 2         6 delete $a{$k};
264             }
265             }
266 2         7 $f{attrs} = \%a;
267 2         8 $format_fragment->(%f);
268 3         15 };
269 3 100       1076 if ($text =~ s{$one_line_pattern | $multi_line_pattern}
270 2         41 {$sub->(%+)}egx) {
271 2         106 return [200, "OK", {text=>$text, orig_attrs=>$orig_attrs}];
272             } else {
273 1         59 return [404, "Fragment with ID '$id' not found"];
274             }
275              
276             } elsif ($which eq 'delete') {
277              
278 9         15 my %f;
279             my $sub = sub {
280 8     8   111 %f = @_;
281 8 100       153 $f{enl} ? $f{bnl} : "";
282 9         26 };
283 9 100       2164 if ($text =~ s{(?\R?)
284             (?$one_line_pattern | $multi_line_pattern)}
285 8         136 {$sub->(%+)}egx) {
286             return [200, "OK", {text=>$text,
287             orig_fragment=>$f{fragment},
288 7         271 orig_payload=>$f{payload}}];
289             } else {
290 2         70 return [304, "Fragment with ID '$id' already does not exist"];
291             }
292              
293             } else { # insert
294              
295 12         29 my $replaced;
296             my %f;
297             my $sub = sub {
298 3     3   33 %f = @_;
299 3 100       19 return $f{fragment} if $payload eq $f{payload};
300 2         3 $replaced++;
301 2         3 $f{orig_fragment} = $f{fragment};
302 2         4 $f{orig_payload} = $f{payload};
303 2         3 $f{payload} = $payload;
304 2         7 $format_fragment->(%f);
305 12         44 };
306 12 100 66     69 if ($good_pattern && $text =~ /$good_pattern/) {
307 1         26 return [304, "Text contains good pattern"];
308             }
309              
310 11 100       4647 if ($text =~ s{(?(?:$one_line_pattern | $multi_line_pattern))}
311 3         49 {$sub->(%+)}ex) {
312 3 100       8 if ($replaced) {
313             return [200, "Payload replaced", {
314             text=>$text, orig_fragment=>$f{orig_fragment},
315 2         72 orig_payload=>$f{orig_payload}}];
316             } else {
317 1         64 return [304, "Fragment with ID '$id' already exist with ".
318             "same content"];
319             }
320             }
321              
322 8         84 my $fragment = $format_fragment->(payload=>$payload, attrs=>$attrs);
323 8 100 66     49 if ($replace_pattern && $text =~ /($replace_pattern)/) {
324 1         5 my $orig_fragment = $1;
325 1         8 $text =~ s/$replace_pattern/$fragment/;
326 1         55 return [200, "Replace pattern replaced", {
327             text=>$text, orig_fragment=>$orig_fragment}];
328             }
329              
330 7 100       31 if ($top_style) {
    100          
331 1         4 $text = $fragment . $text;
332             } elsif (length($text)) {
333 5         26 my $enl = $text =~ /\R\z/; # text ends with newline
334 5 100       53 $fragment =~ s/\R\z// unless $enl;
335 5 100       22 $text .= ($enl ? "" : "\n") . $fragment;
336             } else {
337             # insert at bottom of empty string
338 1         3 $text = $fragment;
339             }
340 7 100       261 return [200, "Fragment inserted at the ".
341             ($top_style ? "top" : "bottom"), {text=>$text}];
342             }
343              
344             }
345              
346             $SPEC{':package'} = {
347             v => 1.1,
348             summary => 'Manipulate fragments in text',
349             description => <<'MARKDOWN',
350              
351             A fragment is a single line or a group of lines (called payload) with a metadata
352             encoded in the comment that is put adjacent to it (for a single line fragment)
353             or enclosing it (for a multiline fragment). Fragments are usually used in
354             configuration files or code. Here is the structure of a single-line fragment:
355              
356             #
357              
358             Here is the structure of a multi-line fragment:
359              
360             # BEGIN
361            
362             # END
363              
364             Label is by default `FRAGMENT` but can be other string. Attributes are a
365             sequence of `name=val` separated by whitespace, where name must be alphanums
366             only and val is zero or more non-whitespace characters. There must at least be
367             an attribute with name `id`, it is used to identify fragment and allow the
368             fragment to be easily replaced/modified/deleted from text. Attributes are
369             optional in the ending comment.
370              
371             Comment character used is by default `#` (`shell`-style comment), but other
372             comment styles are supported (see below).
373              
374             Examples of single-line fragments (the second example uses `c`-style comment and
375             the third uses `cpp`-style comment):
376              
377             RSYNC_ENABLE=1 # FRAGMENT id=enable
378             some text /* FRAGMENT id=id2 */
379             some text // FRAGMENT id=id3 foo=1 bar=2
380              
381             An example of multi-line fragment (using `html`-style comment instead of
382             `shell`):
383              
384            
385             some
386             lines
387             of
388             text
389            
390              
391             Another example (using `ini`-style comment):
392              
393             ; BEGIN FRAGMENT id=default-settings
394             register_globals=On
395             extension=mysql.so
396             extension=gd.so
397             memory_limit=256M
398             post_max_size=64M
399             upload_max_filesize=64M
400             browscap=/c/share/php/browscap.ini
401             allow_url_fopen=0
402             ; END FRAGMENT
403              
404             MARKDOWN
405             };
406              
407             my $arg_comment_style = {
408             summary => 'Comment style',
409             schema => ['str' => {
410             default => 'shell',
411             in => [qw/c cpp html shell ini/],
412             }],
413             };
414             my $arg_label = {
415             schema => [str => {default=>'FRAGMENT'}],
416             summary => 'Comment label',
417             };
418              
419             my $arg_id = {
420             summary => 'Fragment ID',
421             schema => ['str*' => { match => qr/\A[\w-]+\z/ }],
422             req => 1,
423             };
424              
425             my $arg_payload = {
426             summary => 'Fragment content',
427             schema => 'str*',
428             req => 1,
429             };
430              
431             $SPEC{list_fragments} = {
432             v => 1.1,
433             summary => 'List fragments in text',
434             args => {
435             text => {
436             summary => 'The text which contain fragments',
437             schema => 'str*',
438             req => 1,
439             pos => 0,
440             },
441             comment_style => $arg_comment_style,
442             label => $arg_label,
443             },
444             result => {
445             summary => 'List of fragments',
446             schema => 'array*',
447             description => <<'MARKDOWN',
448              
449             Will return status 200 if operation is successful. Result will be an array of
450             fragments, where each fragment is a hash containing these keys: `raw` (string),
451             `payload` (string), `attrs` (hash), `id` (string, can also be found in
452             attributes).
453              
454             MARKDOWN
455             },
456             };
457             sub list_fragments {
458 6     6 1 615683 _doit('list', @_);
459             }
460              
461             $SPEC{get_fragment} = {
462             v => 1.1,
463             summary => 'Get fragment with a certain ID in text',
464             description => <<'MARKDOWN',
465              
466             If there are multiple occurences of the fragment with the same ID ,
467              
468             MARKDOWN
469             args => {
470             text => {
471             summary => 'The text which contain fragments',
472             schema => 'str*',
473             req => 1,
474             pos => 0,
475             },
476             comment_style => $arg_comment_style,
477             label => $arg_label,
478             id => $arg_id,
479             },
480             result => {
481             summary => 'Fragment',
482             schema => 'array*',
483             description => <<'MARKDOWN',
484              
485             Will return status 200 if fragment is found. Result will be a hash with the
486             following keys: `raw` (string), `payload` (string), `attrs` (hash), `id`
487             (string, can also be found in attributes).
488              
489             Return 404 if fragment is not found.
490              
491             MARKDOWN
492             },
493             };
494             sub get_fragment {
495 3     3 1 533624 _doit('get', @_);
496             }
497              
498             $SPEC{set_fragment_attrs} = {
499             v => 1.1,
500             summary => 'Set/unset attributes of a fragment',
501             description => <<'MARKDOWN',
502              
503             If there are multiple occurences of the fragment with the same ID ,
504              
505             MARKDOWN
506             args => {
507             text => {
508             summary => 'The text which contain fragments',
509             schema => 'str*',
510             req => 1,
511             pos => 0,
512             },
513             comment_style => $arg_comment_style,
514             label => $arg_label,
515             id => $arg_id,
516             attrs => {
517             schema => 'hash*',
518             description => <<'MARKDOWN',
519              
520             To delete an attribute in the fragment, you can set the value to undef.
521              
522             MARKDOWN
523             req => 1,
524             },
525             },
526             result => {
527             summary => 'New text and other data',
528             schema => 'array*',
529             description => <<'MARKDOWN',
530              
531             Will return status 200 if fragment is found. Result will be a hash containing
532             these keys: `text` (string, the modified text), `orig_attrs` (hash, the old
533             attributes before being modified).
534              
535             Return 404 if fragment is not found.
536              
537             MARKDOWN
538             },
539             };
540             sub set_fragment_attrs {
541 4     4 1 658253 _doit('set_attrs', @_);
542             }
543              
544             $SPEC{insert_fragment} = {
545             v => 1.1,
546             summary => 'Insert or replace a fragment in text',
547             description => <<'MARKDOWN',
548              
549             Newline insertion behaviour: if fragment is inserted at the bottom and text does
550             not end with newline (which is considered bad style), the inserted fragment will
551             also not end with newline. Except when original text is an empty string, in
552             which case an ending newline will still be added.
553              
554             MARKDOWN
555             args => {
556             text => {
557             summary => 'The text to insert fragment into',
558             schema => 'str*',
559             req => 1,
560             pos => 0,
561             },
562             id => $arg_id,
563             payload => $arg_payload,
564             top_style => {
565             summary => 'Whether to append fragment at beginning of file '.
566             'instead of at the end',
567             schema => [bool => { default=>0 }],
568             description => <<'MARKDOWN',
569              
570             Default is false, which means to append at the end of file.
571              
572             Note that this only has effect if `replace_pattern` is not defined or replace
573             pattern is not found in file. Otherwise, fragment will be inserted to replace
574             the pattern.
575              
576             MARKDOWN
577             },
578             replace_pattern => {
579             summary => 'Regex pattern which if found will be used for '.
580             'placement of fragment',
581             schema => 'str',
582             description => <<'MARKDOWN',
583              
584             If fragment needs to be inserted into file, then if `replace_pattern` is defined
585             then it will be searched. If found, fragment will be placed to replace the
586             pattern. Otherwise, fragment will be inserted at the end (or beginning, see
587             `top_style`) of file.
588              
589             MARKDOWN
590             },
591             good_pattern => {
592             summary => 'Regex pattern which if found means fragment '.
593             'need not be inserted',
594             schema => 'str',
595             },
596             comment_style => $arg_comment_style,
597             label => $arg_label,
598             attrs => {
599             schema => [hash => {default=>{}}],
600             },
601             },
602             result => {
603             summary => 'A hash of result',
604             schema => 'hash*',
605             description => <<'MARKDOWN',
606              
607             Will return status 200 if operation is successful and text is changed. The
608             result is a hash with the following keys: `text` will contain the new text,
609             `orig_payload` will contain the original payload before being removed/replaced,
610             `orig_fragment` will contain the original fragment (or the text that matches
611             `replace_pattern`).
612              
613              
614             Will return status 304 if nothing is changed (i.e. if fragment with the
615             same payload that needs to be inserted already exists in the text).
616              
617             MARKDOWN
618             },
619             };
620             sub insert_fragment {
621 13     13 1 651472 _doit('insert', @_);
622             }
623              
624             $SPEC{delete_fragment} = {
625             v => 1.1,
626             summary => 'Delete fragment in text',
627             description => <<'MARKDOWN',
628              
629             If there are multiple occurences of fragment (which is considered an abnormal
630             condition), all occurences will be deleted.
631              
632             Newline deletion behaviour: if fragment at the bottom of text does not end with
633             newline (which is considered bad style), the text after the fragment is deleted
634             will also not end with newline.
635              
636             MARKDOWN
637             args => {
638             text => {
639             summary => 'The text to delete fragment from',
640             schema => 'str*',
641             req => 1,
642             pos => 0,
643             },
644             id => {
645             summary => 'Fragment ID',
646             schema => ['str*' => { match => qr/\A[\w-]+\z/ }],
647             req => 1,
648             pos => 1,
649             },
650             comment_style => $arg_comment_style,
651             label => {
652             schema => ['any' => {
653             of => ['str*', 'code*'],
654             default => 'FRAGMENT',
655             }],
656             summary => 'Comment label',
657             },
658             },
659             result => {
660             summary => 'A hash of result',
661             schema => 'hash*',
662             description => <<'MARKDOWN',
663              
664             Will return status 200 if operation is successful and text is deleted. The
665             result is a hash with the following keys: `text` will contain the new text,
666             `orig_payload` will contain the original fragment payload before being deleted,
667             `orig_fragment` will contain the original fragment. If there are multiple
668             occurences (which is considered an abnormal condition), only the last deleted
669             fragment will be returned in `orig_payload` and `orig_fragment`.
670              
671             Will return status 304 if nothing is changed (i.e. when the fragment that needs
672             to be deleted already does not exist in the text).
673              
674             MARKDOWN
675             },
676             };
677             sub delete_fragment {
678 9     9 1 510874 _doit('delete', @_);
679             }
680              
681             1;
682             # ABSTRACT: Manipulate fragments in text
683              
684             __END__