| blib/lib/String/Smart.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 58 | 58 | 100.0 |
| branch | 14 | 16 | 87.5 |
| condition | 13 | 18 | 72.2 |
| subroutine | 15 | 15 | 100.0 |
| pod | 7 | 7 | 100.0 |
| total | 107 | 114 | 93.8 |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | package String::Smart; | ||||||
| 2 | |||||||
| 3 | 2 | 2 | 59361 | use warnings; | |||
| 2 | 5 | ||||||
| 2 | 157 | ||||||
| 4 | 2 | 2 | 11 | use strict; | |||
| 2 | 6 | ||||||
| 2 | 70 | ||||||
| 5 | 2 | 2 | 13 | use Carp; | |||
| 2 | 8 | ||||||
| 2 | 202 | ||||||
| 6 | 2 | 2 | 12 | use Exporter; | |||
| 2 | 5 | ||||||
| 2 | 111 | ||||||
| 7 | 2 | 2 | 16 | use Scalar::Util qw( blessed ); | |||
| 2 | 4 | ||||||
| 2 | 270 | ||||||
| 8 | |||||||
| 9 | 2 | 2 | 5062 | use overload '""' => \&str_val; | |||
| 2 | 2763 | ||||||
| 2 | 16 | ||||||
| 10 | |||||||
| 11 | =head1 NAME | ||||||
| 12 | |||||||
| 13 | String::Smart - Strings that know how to escape themselves. | ||||||
| 14 | |||||||
| 15 | =head1 VERSION | ||||||
| 16 | |||||||
| 17 | This document describes String::Smart version 0.4 | ||||||
| 18 | |||||||
| 19 | =cut | ||||||
| 20 | |||||||
| 21 | 2 | 2 | 132 | use vars qw( $VERSION @ISA @EXPORT_OK %EXPORT_TAGS ); | |||
| 2 | 3 | ||||||
| 2 | 2043 | ||||||
| 22 | |||||||
| 23 | $VERSION = '0.4'; | ||||||
| 24 | @ISA = qw( Exporter ); | ||||||
| 25 | @EXPORT_OK = qw( already as add_rep literal plain rep str_val ); | ||||||
| 26 | %EXPORT_TAGS = ( all => \@EXPORT_OK ); | ||||||
| 27 | |||||||
| 28 | my %rep_map = (); | ||||||
| 29 | |||||||
| 30 | =head1 SYNOPSIS | ||||||
| 31 | |||||||
| 32 | use String::Smart; | ||||||
| 33 | my $plain = " |
||||||
| 34 | my $html = as html => " <This is HTML> "; |
||||||
| 35 | |||||||
| 36 | print as html => $plain, as html => $html; | ||||||
| 37 | # Prints "<This is plain text> <This is HTML> " |
||||||
| 38 | |||||||
| 39 | print plain $html; | ||||||
| 40 | # Croaks: "Can't decode markup" | ||||||
| 41 | |||||||
| 42 | =head1 DESCRIPTION | ||||||
| 43 | |||||||
| 44 | String::Smart implements overloaded string values that know how they are | ||||||
| 45 | currently encoded or escaped and are capable of transforming themselves | ||||||
| 46 | into other encodings. | ||||||
| 47 | |||||||
| 48 | In many applications it is necessary to apply various escaping rules to | ||||||
| 49 | strings before they can safely be used. For example when building a SQL | ||||||
| 50 | query string literals must be escaped to avoid SQL injection | ||||||
| 51 | vulnerabilities. | ||||||
| 52 | |||||||
| 53 | Typically this is achieved by SQL escaping all strings that are passed | ||||||
| 54 | to the query builder. But what if you pass a string that has already | ||||||
| 55 | been SQL escaped? Or a string that is URL encoded? If you wish to pass a | ||||||
| 56 | mixture of already-encoded strings and plain string literals you have to | ||||||
| 57 | arrange some out of band means of communicating the encoding state of | ||||||
| 58 | each string. | ||||||
| 59 | |||||||
| 60 | With C |
||||||
| 61 | ask for SQL escaped strings and behind the scenes the appropriate | ||||||
| 62 | transformations will be applied to each string based on its | ||||||
| 63 | current encoding. | ||||||
| 64 | |||||||
| 65 | For example: | ||||||
| 66 | |||||||
| 67 | my $uri_enc = already uri => 'Spaces+are+evil'; | ||||||
| 68 | my $sql_enc = already sql => "\\'Quotes are backslashed\\'"; | ||||||
| 69 | my $not_enc = "Just some literal punctuation: %'+"; | ||||||
| 70 | |||||||
| 71 | print literal sql => $uri_enc; | ||||||
| 72 | # removes URI encoding | ||||||
| 73 | # applies SQL encoding | ||||||
| 74 | # prints | ||||||
| 75 | # Spaces are evil | ||||||
| 76 | |||||||
| 77 | print literal sql => $sql_enc; | ||||||
| 78 | # already sql encoded | ||||||
| 79 | # prints | ||||||
| 80 | # \'Quotes are backslashed\' | ||||||
| 81 | |||||||
| 82 | print literal sql => $not_enc; | ||||||
| 83 | # applies SQL encoding | ||||||
| 84 | # prints | ||||||
| 85 | # Just some literal punctuation: %\'+ | ||||||
| 86 | |||||||
| 87 | The important point is that the requested encoding is absolute rather | ||||||
| 88 | than relative. A C |
||||||
| 89 | can work out how to re-encode itself in the requested way. | ||||||
| 90 | |||||||
| 91 | =head2 A note on the examples | ||||||
| 92 | |||||||
| 93 | Throughout the documentation I assume that various encoding | ||||||
| 94 | representations (C |
||||||
| 95 | These are not defined by C |
||||||
| 96 | C |
||||||
| 97 | examples will run. | ||||||
| 98 | |||||||
| 99 | =head1 INTERFACE | ||||||
| 100 | |||||||
| 101 | =head2 C<< add_rep >> | ||||||
| 102 | |||||||
| 103 | Add an encoding representation. The namespace for encodings is global. | ||||||
| 104 | This may turn out to be a problem - and may therefore change. | ||||||
| 105 | |||||||
| 106 | add_rep reversed => sub { reverse $_[0] }, sub { reverse $_[0] }; | ||||||
| 107 | my $this = "Hello"; | ||||||
| 108 | my $that = reversed "Hello"; | ||||||
| 109 | print as reversed => $this, "\n"; | ||||||
| 110 | # prints "olleH" | ||||||
| 111 | print as reversed => $that, "\n"; | ||||||
| 112 | # also prints "olleH" | ||||||
| 113 | |||||||
| 114 | A representation consists of a name and two subroutine references. The | ||||||
| 115 | first subroutine applies the encoding, the second reverses it. If either | ||||||
| 116 | subroutine is undefined a boilerplate subroutine that throws a | ||||||
| 117 | descriptive error will be used in its place. | ||||||
| 118 | |||||||
| 119 | =cut | ||||||
| 120 | |||||||
| 121 | sub add_rep($$$) { | ||||||
| 122 | 6 | 6 | 1 | 677 | my ( $name, $to, $from ) = @_; | ||
| 123 | |||||||
| 124 | 6 | 100 | 321 | croak "$name contains an underscore" | |||
| 125 | if $name =~ /_/; | ||||||
| 126 | |||||||
| 127 | 5 | 15 | my %spec = ( from => $from, to => $to ); | ||||
| 128 | 5 | 15 | for my $dir ( keys %spec ) { | ||||
| 129 | 10 | 100 | 29 | unless ( defined $spec{$dir} ) { | |||
| 130 | $spec{$dir} = sub { | ||||||
| 131 | 2 | 2 | 295 | croak "Don't know how to convert $dir $name"; | |||
| 132 | 2 | 11 | }; | ||||
| 133 | } | ||||||
| 134 | } | ||||||
| 135 | |||||||
| 136 | 5 | 22 | $rep_map{$name} = \%spec; | ||||
| 137 | } | ||||||
| 138 | |||||||
| 139 | =head2 C<< as >> | ||||||
| 140 | |||||||
| 141 | Coerce a string into the specified encoding. | ||||||
| 142 | |||||||
| 143 | my $representation = as html => $some_string; | ||||||
| 144 | |||||||
| 145 | Optionally multiple encodings my be supplied either like this: | ||||||
| 146 | |||||||
| 147 | my $rep = as html_nl2br => $some_string; | ||||||
| 148 | |||||||
| 149 | Or like this: | ||||||
| 150 | |||||||
| 151 | my $rep = as ['html', 'nl2br'], $some_string; | ||||||
| 152 | |||||||
| 153 | The returned object (actually a hash blessed to C |
||||||
| 154 | will have the specified encoding irrespective of it's current | ||||||
| 155 | encoding. For example the sequence: | ||||||
| 156 | |||||||
| 157 | my $html1 = as html => $some_string; | ||||||
| 158 | my $html2 = as html => $html1; | ||||||
| 159 | |||||||
| 160 | Does I |
||||||
| 161 | 'absolute'. A path of transformations that will convert the string from | ||||||
| 162 | whatever its current encoding is will be computed and applied. | ||||||
| 163 | |||||||
| 164 | =cut | ||||||
| 165 | |||||||
| 166 | sub as($$) { | ||||||
| 167 | 16 | 16 | 1 | 1661 | my ( $desired, $str ) = @_; | ||
| 168 | |||||||
| 169 | my @desired | ||||||
| 170 | 16 | 100 | 58 | = map { split /_/ } 'ARRAY' eq ref $desired ? @$desired : $desired; | |||
| 11 | 48 | ||||||
| 171 | |||||||
| 172 | 16 | 100 | 66 | 129 | unless ( blessed $str && $str->isa( __PACKAGE__ ) ) { | ||
| 173 | 6 | 23 | $str = bless { val => $str, rep => [] }; | ||||
| 174 | } | ||||||
| 175 | |||||||
| 176 | 16 | 38 | my @got_rep = $str->rep; | ||||
| 177 | 16 | 65 | my @want_rep = @desired; | ||||
| 178 | |||||||
| 179 | # Prune common reps | ||||||
| 180 | 16 | 100 | 89 | while ( @got_rep && @want_rep && $got_rep[0] eq $want_rep[0] ) { | |||
| 100 | |||||||
| 181 | 7 | 10 | shift @got_rep; | ||||
| 182 | 7 | 33 | shift @want_rep; | ||||
| 183 | } | ||||||
| 184 | |||||||
| 185 | 16 | 27 | $str = $str->{val}; | ||||
| 186 | |||||||
| 187 | 16 | 65 | for my $spec ( [ 'from', reverse @got_rep ], [ 'to', @want_rep ] ) { | ||||
| 188 | 31 | 65 | my $dir = shift @$spec; | ||||
| 189 | 31 | 58 | for my $rep ( @$spec ) { | ||||
| 190 | 21 | 66 | 222 | my $handler = $rep_map{$rep} || croak "Don't know about $rep"; | |||
| 191 | 20 | 57 | $str = $handler->{$dir}->( $str ); | ||||
| 192 | } | ||||||
| 193 | } | ||||||
| 194 | |||||||
| 195 | 13 | 132 | return bless { | ||||
| 196 | val => $str, | ||||||
| 197 | rep => \@desired, | ||||||
| 198 | }; | ||||||
| 199 | } | ||||||
| 200 | |||||||
| 201 | =head2 C<< already >> | ||||||
| 202 | |||||||
| 203 | Declare that a string is already encoded in a particular way. For example: | ||||||
| 204 | |||||||
| 205 | my $html = already html => ' This is a paragraph '; |
||||||
| 206 | my $text = 'This is just << some text >>'; | ||||||
| 207 | |||||||
| 208 | print literal html => $html; | ||||||
| 209 | # already HTML encoded | ||||||
| 210 | # prints | ||||||
| 211 | # This is a paragraph |
||||||
| 212 | |||||||
| 213 | print literal html => $text; | ||||||
| 214 | # applies HTML encoding | ||||||
| 215 | # prints | ||||||
| 216 | # This is just << some text >> | ||||||
| 217 | |||||||
| 218 | =cut | ||||||
| 219 | |||||||
| 220 | sub already($$) { | ||||||
| 221 | 2 | 18 | return bless { | ||||
| 222 | val => $_[1], | ||||||
| 223 | 2 | 50 | 2 | 1 | 571 | rep => [ map { split /_/ } 'ARRAY' eq ref $_[0] ? @$_[0] : $_[0] ] | |
| 224 | }; | ||||||
| 225 | } | ||||||
| 226 | |||||||
| 227 | =head2 C<< literal >> | ||||||
| 228 | |||||||
| 229 | Convert a string to the specified encoding and return it as a normal | ||||||
| 230 | unblessed string. | ||||||
| 231 | |||||||
| 232 | =cut | ||||||
| 233 | |||||||
| 234 | 6 | 6 | 1 | 34 | sub literal($$) { as( $_[0], $_[1] )->{val} } | ||
| 235 | |||||||
| 236 | =head2 C<< plain >> | ||||||
| 237 | |||||||
| 238 | Remove any encoding from a string. | ||||||
| 239 | |||||||
| 240 | my $uri_enc = already uri => 'Spaces+are+evil%21'; | ||||||
| 241 | print plain $uri_enc; | ||||||
| 242 | # prints | ||||||
| 243 | # Spaces are evil! | ||||||
| 244 | |||||||
| 245 | =cut | ||||||
| 246 | |||||||
| 247 | 5 | 5 | 1 | 19 | sub plain($) { literal( [], $_[0] ) } | ||
| 248 | |||||||
| 249 | =head2 C<< str_val >> | ||||||
| 250 | |||||||
| 251 | Get the string representation of a C |
||||||
| 252 | coercion takes place; C |
||||||
| 253 | the current encodings. | ||||||
| 254 | |||||||
| 255 | =cut | ||||||
| 256 | |||||||
| 257 | sub str_val($) { | ||||||
| 258 | 10 | 10 | 1 | 1336 | my $str = $_[0]; | ||
| 259 | 10 | 50 | 33 | 116 | blessed $str && $str->isa( __PACKAGE__ ) ? $str->{val} : $str; | ||
| 260 | } | ||||||
| 261 | |||||||
| 262 | =head2 C<< rep >> | ||||||
| 263 | |||||||
| 264 | Return a list of encodings that currently applies to the specfied | ||||||
| 265 | string. | ||||||
| 266 | |||||||
| 267 | my $text = 'Just text'; | ||||||
| 268 | my @trep = rep $text; # @trep gets () | ||||||
| 269 | |||||||
| 270 | my $html = already html => ' Boo! '; |
||||||
| 271 | my @hrep = rep $html; # @hrep gets ( 'html' ) | ||||||
| 272 | |||||||
| 273 | =cut | ||||||
| 274 | |||||||
| 275 | sub rep { | ||||||
| 276 | 20 | 20 | 1 | 28 | my $str = $_[0]; | ||
| 277 | 20 | 100 | 66 | 138 | if ( blessed $str && $str->isa( __PACKAGE__ ) ) { | ||
| 278 | 19 | 26 | my @r = @{ $str->{rep} }; | ||||
| 19 | 105 | ||||||
| 279 | 19 | 100 | 81 | return wantarray ? @r : join '_', @r; | |||
| 280 | } | ||||||
| 281 | 1 | 6 | return; | ||||
| 282 | } | ||||||
| 283 | |||||||
| 284 | 1; | ||||||
| 285 | __END__ |