blib/lib/Mojo/DOM58/Entities.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 26 | 26 | 100.0 |
branch | 6 | 6 | 100.0 |
condition | 6 | 6 | 100.0 |
subroutine | 8 | 8 | 100.0 |
pod | 3 | 3 | 100.0 |
total | 49 | 49 | 100.0 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package Mojo::DOM58::Entities; | ||||||
2 | |||||||
3 | 3 | 3 | 79984 | use strict; | |||
3 | 17 | ||||||
3 | 94 | ||||||
4 | 3 | 3 | 16 | use warnings; | |||
3 | 6 | ||||||
3 | 85 | ||||||
5 | 3 | 3 | 27 | use Exporter 'import'; | |||
3 | 14 | ||||||
3 | 1851 | ||||||
6 | |||||||
7 | our $VERSION = '3.000'; | ||||||
8 | |||||||
9 | our @EXPORT_OK = qw(html_attr_unescape html_escape html_unescape); | ||||||
10 | |||||||
11 | # To generate a new HTML entity table run this command | ||||||
12 | # perl examples/entities.pl | ||||||
13 | my %ENTITIES; | ||||||
14 | for my $line (split "\n", join('', )) { | ||||||
15 | next unless $line =~ /^(\S+)\s+U\+(\S+)(?:\s+U\+(\S+))?/; | ||||||
16 | $ENTITIES{$1} = defined $3 ? (chr(hex $2) . chr(hex $3)) : chr(hex $2); | ||||||
17 | } | ||||||
18 | close DATA; | ||||||
19 | |||||||
20 | # Characters that should be escaped in HTML/XML | ||||||
21 | my %ESCAPE = ( | ||||||
22 | '&' => '&', | ||||||
23 | '<' => '<', | ||||||
24 | '>' => '>', | ||||||
25 | '"' => '"', | ||||||
26 | '\'' => ''' | ||||||
27 | ); | ||||||
28 | |||||||
29 | # HTML entities | ||||||
30 | my $ENTITY_RE = qr/&(?:\#((?:[0-9]{1,7}|x[0-9a-fA-F]{1,6}));|(\w+[;=]?))/; | ||||||
31 | |||||||
32 | sub html_escape { | ||||||
33 | 460 | 460 | 1 | 9101 | my $str = shift; | ||
34 | 460 | 963 | $str =~ s/([&<>"'])/$ESCAPE{$1}/ge; | ||||
15 | 64 | ||||||
35 | 460 | 1654 | return $str; | ||||
36 | } | ||||||
37 | |||||||
38 | 33035 | 33035 | 1 | 54810 | sub html_attr_unescape { _html(shift, 1) } | ||
39 | 1130 | 1130 | 1 | 24345 | sub html_unescape { _html(shift, 0) } | ||
40 | |||||||
41 | sub _entity { | ||||||
42 | 46 | 46 | 148 | my ($point, $name, $attr) = @_; | |||
43 | |||||||
44 | # Code point | ||||||
45 | 46 | 100 | 200 | return chr($point !~ /^x/ ? $point : hex $point) unless defined $name; | |||
100 | |||||||
46 | |||||||
47 | # Named character reference | ||||||
48 | 37 | 77 | my $rest = my $last = ''; | ||||
49 | 37 | 112 | while (length $name) { | ||||
50 | return $ENTITIES{$name} . reverse $rest | ||||||
51 | 75 | 100 | 100 | 531 | if exists $ENTITIES{$name} | ||
100 | |||||||
52 | && (!$attr || $name =~ /;$/ || $last !~ /[A-Za-z0-9=]/); | ||||||
53 | 48 | 111 | $rest .= $last = chop $name; | ||||
54 | } | ||||||
55 | 10 | 51 | return '&' . reverse $rest; | ||||
56 | } | ||||||
57 | |||||||
58 | sub _html { | ||||||
59 | 34165 | 34165 | 53638 | my ($str, $attr) = @_; | |||
60 | 34165 | 45591 | $str =~ s/$ENTITY_RE/_entity($1, $2, $attr)/geo; | ||||
46 | 127 | ||||||
61 | 34165 | 150101 | return $str; | ||||
62 | } | ||||||
63 | |||||||
64 | 1; | ||||||
65 | |||||||
66 | =encoding utf8 | ||||||
67 | |||||||
68 | =head1 NAME | ||||||
69 | |||||||
70 | Mojo::DOM58::Entities - Escape or unescape HTML entities in strings | ||||||
71 | |||||||
72 | =head1 SYNOPSIS | ||||||
73 | |||||||
74 | use Mojo::DOM58::Entities qw(html_escape html_unescape); | ||||||
75 | |||||||
76 | my $str = 'foo & bar'; | ||||||
77 | $str = html_unescape $str; # "foo & bar" | ||||||
78 | $str = html_escape $str; # "foo & bar" | ||||||
79 | |||||||
80 | =head1 DESCRIPTION | ||||||
81 | |||||||
82 | L |
||||||
83 | entities for L |
||||||
84 | functions are exported on demand. | ||||||
85 | |||||||
86 | =head1 FUNCTIONS | ||||||
87 | |||||||
88 | =head2 html_attr_unescape | ||||||
89 | |||||||
90 | my $str = html_attr_unescape $escaped; | ||||||
91 | |||||||
92 | Same as L"html_unescape">, but handles special rules from the | ||||||
93 | L for HTML attributes. | ||||||
94 | |||||||
95 | # "foo=bar<est=baz" | ||||||
96 | html_attr_unescape 'foo=bar<est=baz'; | ||||||
97 | |||||||
98 | # "foo=bar | ||||||
99 | html_attr_unescape 'foo=bar<est=baz'; | ||||||
100 | |||||||
101 | =head2 html_escape | ||||||
102 | |||||||
103 | my $escaped = html_escape $str; | ||||||
104 | |||||||
105 | Escape unsafe characters C<&>, C<< < >>, C<< > >>, C<">, and C<'> in string. | ||||||
106 | |||||||
107 | html_escape ' '; # "<div>" |
||||||
108 | |||||||
109 | =head2 html_unescape | ||||||
110 | |||||||
111 | my $str = html_unescape $escaped; | ||||||
112 | |||||||
113 | Unescape all HTML entities in string, according to the | ||||||
114 | L. | ||||||
115 | |||||||
116 | html_unescape '<div>'; # " " |
||||||
117 | |||||||
118 | =head1 BUGS | ||||||
119 | |||||||
120 | Report issues related to the format of this distribution or Perl 5.8 support to | ||||||
121 | the public bugtracker. Any other issues should be reported directly to the | ||||||
122 | upstream L |
||||||
123 | |||||||
124 | =head1 AUTHOR | ||||||
125 | |||||||
126 | Dan Book |
||||||
127 | |||||||
128 | Code and tests adapted from L |
||||||
129 | L |
||||||
130 | |||||||
131 | =head1 COPYRIGHT AND LICENSE | ||||||
132 | |||||||
133 | Copyright (c) 2008-2016 Sebastian Riedel and others. | ||||||
134 | |||||||
135 | Copyright (c) 2016 Dan Book for adaptation to standalone format. | ||||||
136 | |||||||
137 | This is free software, licensed under: | ||||||
138 | |||||||
139 | The Artistic License 2.0 (GPL Compatible) | ||||||
140 | |||||||
141 | =head1 SEE ALSO | ||||||
142 | |||||||
143 | L |
||||||
144 | |||||||
145 | =cut | ||||||
146 | |||||||
147 | __DATA__ |