blib/lib/String/Smart.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 58 | 58 | 100.0 |
branch | 14 | 16 | 87.5 |
condition | 13 | 18 | 72.2 |
subroutine | 15 | 15 | 100.0 |
pod | 7 | 7 | 100.0 |
total | 107 | 114 | 93.8 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package String::Smart; | ||||||
2 | |||||||
3 | 2 | 2 | 59361 | use warnings; | |||
2 | 5 | ||||||
2 | 157 | ||||||
4 | 2 | 2 | 11 | use strict; | |||
2 | 6 | ||||||
2 | 70 | ||||||
5 | 2 | 2 | 13 | use Carp; | |||
2 | 8 | ||||||
2 | 202 | ||||||
6 | 2 | 2 | 12 | use Exporter; | |||
2 | 5 | ||||||
2 | 111 | ||||||
7 | 2 | 2 | 16 | use Scalar::Util qw( blessed ); | |||
2 | 4 | ||||||
2 | 270 | ||||||
8 | |||||||
9 | 2 | 2 | 5062 | use overload '""' => \&str_val; | |||
2 | 2763 | ||||||
2 | 16 | ||||||
10 | |||||||
11 | =head1 NAME | ||||||
12 | |||||||
13 | String::Smart - Strings that know how to escape themselves. | ||||||
14 | |||||||
15 | =head1 VERSION | ||||||
16 | |||||||
17 | This document describes String::Smart version 0.4 | ||||||
18 | |||||||
19 | =cut | ||||||
20 | |||||||
21 | 2 | 2 | 132 | use vars qw( $VERSION @ISA @EXPORT_OK %EXPORT_TAGS ); | |||
2 | 3 | ||||||
2 | 2043 | ||||||
22 | |||||||
23 | $VERSION = '0.4'; | ||||||
24 | @ISA = qw( Exporter ); | ||||||
25 | @EXPORT_OK = qw( already as add_rep literal plain rep str_val ); | ||||||
26 | %EXPORT_TAGS = ( all => \@EXPORT_OK ); | ||||||
27 | |||||||
28 | my %rep_map = (); | ||||||
29 | |||||||
30 | =head1 SYNOPSIS | ||||||
31 | |||||||
32 | use String::Smart; | ||||||
33 | my $plain = " |
||||||
34 | my $html = as html => " <This is HTML> "; |
||||||
35 | |||||||
36 | print as html => $plain, as html => $html; | ||||||
37 | # Prints "<This is plain text> <This is HTML> " |
||||||
38 | |||||||
39 | print plain $html; | ||||||
40 | # Croaks: "Can't decode markup" | ||||||
41 | |||||||
42 | =head1 DESCRIPTION | ||||||
43 | |||||||
44 | String::Smart implements overloaded string values that know how they are | ||||||
45 | currently encoded or escaped and are capable of transforming themselves | ||||||
46 | into other encodings. | ||||||
47 | |||||||
48 | In many applications it is necessary to apply various escaping rules to | ||||||
49 | strings before they can safely be used. For example when building a SQL | ||||||
50 | query string literals must be escaped to avoid SQL injection | ||||||
51 | vulnerabilities. | ||||||
52 | |||||||
53 | Typically this is achieved by SQL escaping all strings that are passed | ||||||
54 | to the query builder. But what if you pass a string that has already | ||||||
55 | been SQL escaped? Or a string that is URL encoded? If you wish to pass a | ||||||
56 | mixture of already-encoded strings and plain string literals you have to | ||||||
57 | arrange some out of band means of communicating the encoding state of | ||||||
58 | each string. | ||||||
59 | |||||||
60 | With C |
||||||
61 | ask for SQL escaped strings and behind the scenes the appropriate | ||||||
62 | transformations will be applied to each string based on its | ||||||
63 | current encoding. | ||||||
64 | |||||||
65 | For example: | ||||||
66 | |||||||
67 | my $uri_enc = already uri => 'Spaces+are+evil'; | ||||||
68 | my $sql_enc = already sql => "\\'Quotes are backslashed\\'"; | ||||||
69 | my $not_enc = "Just some literal punctuation: %'+"; | ||||||
70 | |||||||
71 | print literal sql => $uri_enc; | ||||||
72 | # removes URI encoding | ||||||
73 | # applies SQL encoding | ||||||
74 | # prints | ||||||
75 | # Spaces are evil | ||||||
76 | |||||||
77 | print literal sql => $sql_enc; | ||||||
78 | # already sql encoded | ||||||
79 | # prints | ||||||
80 | # \'Quotes are backslashed\' | ||||||
81 | |||||||
82 | print literal sql => $not_enc; | ||||||
83 | # applies SQL encoding | ||||||
84 | # prints | ||||||
85 | # Just some literal punctuation: %\'+ | ||||||
86 | |||||||
87 | The important point is that the requested encoding is absolute rather | ||||||
88 | than relative. A C |
||||||
89 | can work out how to re-encode itself in the requested way. | ||||||
90 | |||||||
91 | =head2 A note on the examples | ||||||
92 | |||||||
93 | Throughout the documentation I assume that various encoding | ||||||
94 | representations (C |
||||||
95 | These are not defined by C |
||||||
96 | C |
||||||
97 | examples will run. | ||||||
98 | |||||||
99 | =head1 INTERFACE | ||||||
100 | |||||||
101 | =head2 C<< add_rep >> | ||||||
102 | |||||||
103 | Add an encoding representation. The namespace for encodings is global. | ||||||
104 | This may turn out to be a problem - and may therefore change. | ||||||
105 | |||||||
106 | add_rep reversed => sub { reverse $_[0] }, sub { reverse $_[0] }; | ||||||
107 | my $this = "Hello"; | ||||||
108 | my $that = reversed "Hello"; | ||||||
109 | print as reversed => $this, "\n"; | ||||||
110 | # prints "olleH" | ||||||
111 | print as reversed => $that, "\n"; | ||||||
112 | # also prints "olleH" | ||||||
113 | |||||||
114 | A representation consists of a name and two subroutine references. The | ||||||
115 | first subroutine applies the encoding, the second reverses it. If either | ||||||
116 | subroutine is undefined a boilerplate subroutine that throws a | ||||||
117 | descriptive error will be used in its place. | ||||||
118 | |||||||
119 | =cut | ||||||
120 | |||||||
121 | sub add_rep($$$) { | ||||||
122 | 6 | 6 | 1 | 677 | my ( $name, $to, $from ) = @_; | ||
123 | |||||||
124 | 6 | 100 | 321 | croak "$name contains an underscore" | |||
125 | if $name =~ /_/; | ||||||
126 | |||||||
127 | 5 | 15 | my %spec = ( from => $from, to => $to ); | ||||
128 | 5 | 15 | for my $dir ( keys %spec ) { | ||||
129 | 10 | 100 | 29 | unless ( defined $spec{$dir} ) { | |||
130 | $spec{$dir} = sub { | ||||||
131 | 2 | 2 | 295 | croak "Don't know how to convert $dir $name"; | |||
132 | 2 | 11 | }; | ||||
133 | } | ||||||
134 | } | ||||||
135 | |||||||
136 | 5 | 22 | $rep_map{$name} = \%spec; | ||||
137 | } | ||||||
138 | |||||||
139 | =head2 C<< as >> | ||||||
140 | |||||||
141 | Coerce a string into the specified encoding. | ||||||
142 | |||||||
143 | my $representation = as html => $some_string; | ||||||
144 | |||||||
145 | Optionally multiple encodings my be supplied either like this: | ||||||
146 | |||||||
147 | my $rep = as html_nl2br => $some_string; | ||||||
148 | |||||||
149 | Or like this: | ||||||
150 | |||||||
151 | my $rep = as ['html', 'nl2br'], $some_string; | ||||||
152 | |||||||
153 | The returned object (actually a hash blessed to C |
||||||
154 | will have the specified encoding irrespective of it's current | ||||||
155 | encoding. For example the sequence: | ||||||
156 | |||||||
157 | my $html1 = as html => $some_string; | ||||||
158 | my $html2 = as html => $html1; | ||||||
159 | |||||||
160 | Does I |
||||||
161 | 'absolute'. A path of transformations that will convert the string from | ||||||
162 | whatever its current encoding is will be computed and applied. | ||||||
163 | |||||||
164 | =cut | ||||||
165 | |||||||
166 | sub as($$) { | ||||||
167 | 16 | 16 | 1 | 1661 | my ( $desired, $str ) = @_; | ||
168 | |||||||
169 | my @desired | ||||||
170 | 16 | 100 | 58 | = map { split /_/ } 'ARRAY' eq ref $desired ? @$desired : $desired; | |||
11 | 48 | ||||||
171 | |||||||
172 | 16 | 100 | 66 | 129 | unless ( blessed $str && $str->isa( __PACKAGE__ ) ) { | ||
173 | 6 | 23 | $str = bless { val => $str, rep => [] }; | ||||
174 | } | ||||||
175 | |||||||
176 | 16 | 38 | my @got_rep = $str->rep; | ||||
177 | 16 | 65 | my @want_rep = @desired; | ||||
178 | |||||||
179 | # Prune common reps | ||||||
180 | 16 | 100 | 89 | while ( @got_rep && @want_rep && $got_rep[0] eq $want_rep[0] ) { | |||
100 | |||||||
181 | 7 | 10 | shift @got_rep; | ||||
182 | 7 | 33 | shift @want_rep; | ||||
183 | } | ||||||
184 | |||||||
185 | 16 | 27 | $str = $str->{val}; | ||||
186 | |||||||
187 | 16 | 65 | for my $spec ( [ 'from', reverse @got_rep ], [ 'to', @want_rep ] ) { | ||||
188 | 31 | 65 | my $dir = shift @$spec; | ||||
189 | 31 | 58 | for my $rep ( @$spec ) { | ||||
190 | 21 | 66 | 222 | my $handler = $rep_map{$rep} || croak "Don't know about $rep"; | |||
191 | 20 | 57 | $str = $handler->{$dir}->( $str ); | ||||
192 | } | ||||||
193 | } | ||||||
194 | |||||||
195 | 13 | 132 | return bless { | ||||
196 | val => $str, | ||||||
197 | rep => \@desired, | ||||||
198 | }; | ||||||
199 | } | ||||||
200 | |||||||
201 | =head2 C<< already >> | ||||||
202 | |||||||
203 | Declare that a string is already encoded in a particular way. For example: | ||||||
204 | |||||||
205 | my $html = already html => ' This is a paragraph '; |
||||||
206 | my $text = 'This is just << some text >>'; | ||||||
207 | |||||||
208 | print literal html => $html; | ||||||
209 | # already HTML encoded | ||||||
210 | # prints | ||||||
211 | # This is a paragraph |
||||||
212 | |||||||
213 | print literal html => $text; | ||||||
214 | # applies HTML encoding | ||||||
215 | # prints | ||||||
216 | # This is just << some text >> | ||||||
217 | |||||||
218 | =cut | ||||||
219 | |||||||
220 | sub already($$) { | ||||||
221 | 2 | 18 | return bless { | ||||
222 | val => $_[1], | ||||||
223 | 2 | 50 | 2 | 1 | 571 | rep => [ map { split /_/ } 'ARRAY' eq ref $_[0] ? @$_[0] : $_[0] ] | |
224 | }; | ||||||
225 | } | ||||||
226 | |||||||
227 | =head2 C<< literal >> | ||||||
228 | |||||||
229 | Convert a string to the specified encoding and return it as a normal | ||||||
230 | unblessed string. | ||||||
231 | |||||||
232 | =cut | ||||||
233 | |||||||
234 | 6 | 6 | 1 | 34 | sub literal($$) { as( $_[0], $_[1] )->{val} } | ||
235 | |||||||
236 | =head2 C<< plain >> | ||||||
237 | |||||||
238 | Remove any encoding from a string. | ||||||
239 | |||||||
240 | my $uri_enc = already uri => 'Spaces+are+evil%21'; | ||||||
241 | print plain $uri_enc; | ||||||
242 | # prints | ||||||
243 | # Spaces are evil! | ||||||
244 | |||||||
245 | =cut | ||||||
246 | |||||||
247 | 5 | 5 | 1 | 19 | sub plain($) { literal( [], $_[0] ) } | ||
248 | |||||||
249 | =head2 C<< str_val >> | ||||||
250 | |||||||
251 | Get the string representation of a C |
||||||
252 | coercion takes place; C |
||||||
253 | the current encodings. | ||||||
254 | |||||||
255 | =cut | ||||||
256 | |||||||
257 | sub str_val($) { | ||||||
258 | 10 | 10 | 1 | 1336 | my $str = $_[0]; | ||
259 | 10 | 50 | 33 | 116 | blessed $str && $str->isa( __PACKAGE__ ) ? $str->{val} : $str; | ||
260 | } | ||||||
261 | |||||||
262 | =head2 C<< rep >> | ||||||
263 | |||||||
264 | Return a list of encodings that currently applies to the specfied | ||||||
265 | string. | ||||||
266 | |||||||
267 | my $text = 'Just text'; | ||||||
268 | my @trep = rep $text; # @trep gets () | ||||||
269 | |||||||
270 | my $html = already html => ' Boo! '; |
||||||
271 | my @hrep = rep $html; # @hrep gets ( 'html' ) | ||||||
272 | |||||||
273 | =cut | ||||||
274 | |||||||
275 | sub rep { | ||||||
276 | 20 | 20 | 1 | 28 | my $str = $_[0]; | ||
277 | 20 | 100 | 66 | 138 | if ( blessed $str && $str->isa( __PACKAGE__ ) ) { | ||
278 | 19 | 26 | my @r = @{ $str->{rep} }; | ||||
19 | 105 | ||||||
279 | 19 | 100 | 81 | return wantarray ? @r : join '_', @r; | |||
280 | } | ||||||
281 | 1 | 6 | return; | ||||
282 | } | ||||||
283 | |||||||
284 | 1; | ||||||
285 | __END__ |