| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Perl::ToPerl6::Transformer::BasicTypes::Strings::Interpolation; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 842 | use 5.006001; | 
|  | 1 |  |  |  |  | 3 |  | 
| 4 | 1 |  |  | 1 |  | 4 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 19 |  | 
| 5 | 1 |  |  | 1 |  | 4 | use warnings; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 25 |  | 
| 6 | 1 |  |  | 1 |  | 4 | use Readonly; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 41 |  | 
| 7 | 1 |  |  | 1 |  | 5 | use List::Util qw( max ); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 73 |  | 
| 8 | 1 |  |  | 1 |  | 15438 | use Text::Balanced qw( extract_variable ); | 
|  | 1 |  |  |  |  | 20245 |  | 
|  | 1 |  |  |  |  | 104 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 1 |  |  | 1 |  | 10 | use Perl::ToPerl6::Utils qw{ :severities }; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 83 |  | 
| 11 | 1 |  |  | 1 |  | 149 | use Perl::ToPerl6::Utils::PPI qw{ set_string }; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 57 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 | 1 |  |  | 1 |  | 10 | use base 'Perl::ToPerl6::Transformer'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 2060 |  | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | #----------------------------------------------------------------------------- | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | Readonly::Scalar my $DESC => q{Rewrite interpolated strings}; | 
| 18 |  |  |  |  |  |  | Readonly::Scalar my $EXPL => q{Rewrite interpolated strings}; | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | #----------------------------------------------------------------------------- | 
| 21 |  |  |  |  |  |  |  | 
| 22 | 0 |  |  | 0 |  |  | sub supported_parameters { return ()                 } | 
| 23 | 0 |  |  | 0 |  |  | sub default_necessity    { return $NECESSITY_HIGHEST } | 
| 24 | 0 |  |  | 0 |  |  | sub default_themes       { return qw( core )         } | 
| 25 |  |  |  |  |  |  | sub applies_to           { | 
| 26 | 0 |  |  | 0 |  |  | return 'PPI::Token::Quote::Interpolate', | 
| 27 |  |  |  |  |  |  | 'PPI::Token::Quote::Double' | 
| 28 |  |  |  |  |  |  | } | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | #----------------------------------------------------------------------------- | 
| 31 |  |  |  |  |  |  | # | 
| 32 |  |  |  |  |  |  | # A fairly comprehensive list of edge case handling: | 
| 33 |  |  |  |  |  |  | # | 
| 34 |  |  |  |  |  |  | #   "\o" --> "o". It's illegal in Perl5, but may be encountered. | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | # "\o" is illegal in Perl5, so it shouldn't be encountered. | 
| 37 |  |  |  |  |  |  | # "\o1" is illegal in Perl5, so it shouldn't be encountered. | 
| 38 |  |  |  |  |  |  | # "\0" is legal though. | 
| 39 |  |  |  |  |  |  | # "\c" is illegal in Perl5, so it shouldn't be encountered. | 
| 40 |  |  |  |  |  |  | # | 
| 41 |  |  |  |  |  |  | #----------------------------------------------------------------------------- | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | # Tokenizer II: Electric Boogaloo. | 
| 44 |  |  |  |  |  |  | # | 
| 45 |  |  |  |  |  |  | # Since it'll eventually be needed... | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | sub tokenize { | 
| 48 | 0 |  |  | 0 |  |  | my ( $str ) = @_; | 
| 49 | 0 |  |  |  |  |  | my @c = split //, $str; | 
| 50 | 0 |  |  |  |  |  | my @token; | 
| 51 |  |  |  |  |  |  |  | 
| 52 | 0 |  |  |  |  |  | for ( my $i = 0; $i < @c; $i++ ) { | 
| 53 | 0 |  |  |  |  |  | my ( $v, $la1 ) = @c[ $i, $i + 1 ]; | 
| 54 |  |  |  |  |  |  |  | 
| 55 | 0 | 0 |  |  |  |  | if ( $v eq '\\' ) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 56 | 0 | 0 |  |  |  |  | if ( $la1 eq 'c' ) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  | elsif ( $la1 eq 'l' ) { | 
| 59 |  |  |  |  |  |  | } | 
| 60 |  |  |  |  |  |  | elsif ( $la1 eq 'u' ) { | 
| 61 |  |  |  |  |  |  | } | 
| 62 |  |  |  |  |  |  | elsif ( $la1 eq 'E' ) { | 
| 63 |  |  |  |  |  |  | } | 
| 64 |  |  |  |  |  |  | elsif ( $la1 eq 'F' ) { | 
| 65 |  |  |  |  |  |  | } | 
| 66 |  |  |  |  |  |  | elsif ( $la1 eq 'L' ) { | 
| 67 |  |  |  |  |  |  | } | 
| 68 |  |  |  |  |  |  | elsif ( $la1 eq 'Q' ) { | 
| 69 |  |  |  |  |  |  | } | 
| 70 |  |  |  |  |  |  | elsif ( $la1 eq 'U' ) { | 
| 71 |  |  |  |  |  |  | } | 
| 72 |  |  |  |  |  |  | else { | 
| 73 |  |  |  |  |  |  | } | 
| 74 |  |  |  |  |  |  | } | 
| 75 |  |  |  |  |  |  | elsif ( $v eq '$' ) { | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  | elsif ( $v eq '@' ) { | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  | elsif ( $v eq '%' ) { | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  | elsif ( $v eq '{' ) { | 
| 82 |  |  |  |  |  |  | } | 
| 83 |  |  |  |  |  |  | elsif ( $v eq '}' ) { | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  | elsif ( $v eq '(' ) { | 
| 86 |  |  |  |  |  |  | } | 
| 87 |  |  |  |  |  |  | elsif ( $v eq ')' ) { | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  | elsif ( $v eq '<' ) { | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  | elsif ( $v eq '>' ) { | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  | else { | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  | } | 
| 96 |  |  |  |  |  |  |  | 
| 97 | 0 |  |  |  |  |  | return @token; | 
| 98 |  |  |  |  |  |  | } | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | #----------------------------------------------------------------------------- | 
| 101 |  |  |  |  |  |  | # | 
| 102 |  |  |  |  |  |  | # Some more cases get folded away here (hee.) | 
| 103 |  |  |  |  |  |  | # \U foo \L\E bar \E - 'bar' will get altered here. | 
| 104 |  |  |  |  |  |  | # \U foo \L$x\E bar \E - 'bar' will *not* get altered here, | 
| 105 |  |  |  |  |  |  | #                        even if $x is empty. | 
| 106 |  |  |  |  |  |  | # \U foo \Lxxx\E bar \E - 'bar' will *not* get altered here, | 
| 107 |  |  |  |  |  |  | # | 
| 108 |  |  |  |  |  |  | # So, \L..\E affects the rest of the string only if the contents | 
| 109 |  |  |  |  |  |  | # are empty. So it's effectively as if it never was there. | 
| 110 |  |  |  |  |  |  | # Get rid of it. | 
| 111 |  |  |  |  |  |  | # | 
| 112 |  |  |  |  |  |  | sub casefold { | 
| 113 | 0 |  |  | 0 |  |  | my ($self, $text) = @_; | 
| 114 | 0 |  |  |  |  |  | my @split = grep { $_ ne '' } split /( \\[luEFLQU] )/x, $text; | 
|  | 0 |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  |  | 
| 116 | 0 |  |  |  |  |  | my @token; | 
| 117 | 0 |  |  |  |  |  | for ( my $i = 0; $i < @split; $i++ ) { | 
| 118 | 0 |  |  |  |  |  | my ($v, $la1) = @split[$i,$i+1]; | 
| 119 | 0 | 0 | 0 |  |  |  | if ( $v =~ m< ^ \\[FLU] $ >x and | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | $la1 and $la1 eq '\\E' ) { | 
| 121 | 0 |  |  |  |  |  | $i+=2; | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  | elsif ( $v eq '\\Q' ) { | 
| 124 | 0 |  |  |  |  |  | push @token, { type => 'quotemeta', content => $v }; | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  | elsif ( $v =~ m{ \\[luEFLQU] }x ) { | 
| 127 | 0 |  |  |  |  |  | push @token, { type => 'casefold', content => $v }; | 
| 128 |  |  |  |  |  |  | } | 
| 129 |  |  |  |  |  |  | else { | 
| 130 | 0 |  |  |  |  |  | push @token, { type => 'uninterpolated', content => $v }; | 
| 131 |  |  |  |  |  |  | } | 
| 132 |  |  |  |  |  |  | } | 
| 133 | 0 |  |  |  |  |  | return @token; | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | # At the end of this process, ideally we should only have these types of tokens: | 
| 137 |  |  |  |  |  |  | # | 
| 138 |  |  |  |  |  |  | #   Disambiguated variable - '${foo}' (needs separate handling) | 
| 139 |  |  |  |  |  |  | #   Variable - '$foo', '$foo[32]', '$foo{blah}' | 
| 140 |  |  |  |  |  |  | #   Case folding - '\l', '\E' | 
| 141 |  |  |  |  |  |  | #   Quotemeta - '\Q' | 
| 142 |  |  |  |  |  |  | #   Uninterpolated content - Anything that's not one of the above. | 
| 143 |  |  |  |  |  |  | # | 
| 144 |  |  |  |  |  |  | sub tokenize_variables { | 
| 145 | 0 |  |  | 0 |  |  | my ($self, $elem, $string) = @_; | 
| 146 | 0 |  |  |  |  |  | my $full_string = $string; | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 0 |  |  |  |  |  | my @token; | 
| 149 |  |  |  |  |  |  |  | 
| 150 | 0 |  |  |  |  |  | my $iter = 100; | 
| 151 | 0 |  |  |  |  |  | while ( $string ) { | 
| 152 | 0 | 0 |  |  |  |  | unless ( --$iter  ) { | 
| 153 | 0 |  |  |  |  |  | my $line_number = $elem->line_number; | 
| 154 | 0 |  |  |  |  |  | die "Congratulations, you've broken string interpolation. Please report this message, along with the test file you were using to the author: <<$full_string>> on line $line_number\n"; | 
| 155 |  |  |  |  |  |  | } | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | # '${foo}', '@{foo}' is an interpolated value. | 
| 158 |  |  |  |  |  |  | # | 
| 159 | 0 | 0 |  |  |  |  | if ( $string =~ s< ^ ( [\$\@] ) \{ ( [^}]+ ) \} ><>x ) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 160 | 0 |  |  |  |  |  | push @token, { | 
| 161 |  |  |  |  |  |  | type => 'disambiguated variable', | 
| 162 |  |  |  |  |  |  | sigil => $1, | 
| 163 |  |  |  |  |  |  | content => $2 | 
| 164 |  |  |  |  |  |  | }; | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | # extract_variable() doesn't handle most 'special' Perl variables, | 
| 168 |  |  |  |  |  |  | # so handle them specially. | 
| 169 |  |  |  |  |  |  | # | 
| 170 |  |  |  |  |  |  | elsif ( $string =~ s< ^ ( [\$\@] ) ( \s | [^a-zA-Z] ) }><>x ) { | 
| 171 | 0 |  |  |  |  |  | my ( $sigil, $content ) = ( $1, $2 ); | 
| 172 | 0 |  | 0 |  |  |  | while ( $string =~ s< ^ ( \[ [^\]]+ \] ) ><>sx or | 
| 173 |  |  |  |  |  |  | $string =~ s< ^ ( \{ [^\}]+ \} ) ><>sx ) { | 
| 174 | 0 |  |  |  |  |  | $content .= $1; | 
| 175 |  |  |  |  |  |  | } | 
| 176 | 0 |  |  |  |  |  | push @token, { | 
| 177 |  |  |  |  |  |  | type => 'variable', | 
| 178 |  |  |  |  |  |  | sigil => $sigil, | 
| 179 |  |  |  |  |  |  | content => $content | 
| 180 |  |  |  |  |  |  | }; | 
| 181 |  |  |  |  |  |  | } | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  | # Anything else starting with a '$' or '@' is fair game. | 
| 184 |  |  |  |  |  |  | # | 
| 185 |  |  |  |  |  |  | elsif ( $string =~ m< ^ [\$\@] >x ) { | 
| 186 | 0 |  |  |  |  |  | my ( $var_name, $remainder, $prefix ) = | 
| 187 |  |  |  |  |  |  | extract_variable( $string ); | 
| 188 | 0 | 0 |  |  |  |  | if ( $var_name ) { | 
|  |  | 0 |  |  |  |  |  | 
| 189 | 0 |  |  |  |  |  | push @token, { type => 'variable', content => $var_name }; | 
| 190 | 0 |  |  |  |  |  | $string = $remainder; | 
| 191 |  |  |  |  |  |  | } | 
| 192 |  |  |  |  |  |  | # | 
| 193 |  |  |  |  |  |  | # XXX I"m betting that extract_variable() doesn't catch $] etc. | 
| 194 |  |  |  |  |  |  | # | 
| 195 |  |  |  |  |  |  | elsif ( $string =~ s< ^ ( [\$\@] [^\$\@]* ) ><>x ) { | 
| 196 | 0 | 0 |  |  |  |  | if ( @token ) { | 
| 197 | 0 |  |  |  |  |  | $token[-1]{content} .= $1; | 
| 198 |  |  |  |  |  |  | } | 
| 199 |  |  |  |  |  |  | else { | 
| 200 | 0 |  |  |  |  |  | push @token, { type => 'variable', content => $1 }; | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  | } | 
| 203 |  |  |  |  |  |  | else { | 
| 204 | 0 |  |  |  |  |  | die "XXX String interpolation (leading variable) failed on '$string'! Please report this to the author.\n"; | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  | } | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | # Anything that does *not* start with '$' or '@' is its own token, | 
| 209 |  |  |  |  |  |  | # at least up until the next '$' or '@' encountered. | 
| 210 |  |  |  |  |  |  | # | 
| 211 |  |  |  |  |  |  | elsif ( $string =~ s< ^ ( [^\$\@]+ ) ><>x ) { | 
| 212 | 0 |  |  |  |  |  | my $residue = $1; | 
| 213 |  |  |  |  |  |  |  | 
| 214 | 0 |  | 0 |  |  |  | while ( $residue and $residue =~ m< \\ $ >x ) { | 
| 215 | 0 | 0 |  |  |  |  | if ( $string =~ s< ^ ( \$\@ ) ><>x ) { | 
| 216 | 0 |  |  |  |  |  | $residue .= $1; | 
| 217 |  |  |  |  |  |  | } | 
| 218 | 0 |  |  |  |  |  | $string =~ s< (.) ><>x; | 
| 219 | 0 | 0 |  |  |  |  | $residue .= $1 if $1; | 
| 220 | 0 | 0 |  |  |  |  | if ( $string =~ s< ^ ( [^\$\@]+ ) ><>x ) { | 
| 221 | 0 |  |  |  |  |  | $residue .= $1; | 
| 222 |  |  |  |  |  |  | } | 
| 223 |  |  |  |  |  |  | else { | 
| 224 | 0 |  |  |  |  |  | last; | 
| 225 |  |  |  |  |  |  | } | 
| 226 |  |  |  |  |  |  | } | 
| 227 |  |  |  |  |  |  |  | 
| 228 |  |  |  |  |  |  | # Merge the first element of the residue with the last token if | 
| 229 |  |  |  |  |  |  | # possible. | 
| 230 |  |  |  |  |  |  | # | 
| 231 | 0 |  |  |  |  |  | my @result = $self->casefold($residue); | 
| 232 | 0 | 0 | 0 |  |  |  | if ( @token and $token[-1]{type} eq 'uninterpolated' ) { | 
| 233 | 0 |  |  |  |  |  | $token[-1]{content} .= shift(@result)->{content}; | 
| 234 |  |  |  |  |  |  | } | 
| 235 | 0 | 0 |  |  |  |  | push @token, @result if @result; | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | else { | 
| 239 | 0 |  |  |  |  |  | die "XXX String interpolation failed on '$string'! Please report this to the author.\n"; | 
| 240 |  |  |  |  |  |  | } | 
| 241 |  |  |  |  |  |  | } | 
| 242 |  |  |  |  |  |  |  | 
| 243 | 0 |  |  |  |  |  | return grep { $_ ne '' } @token; | 
|  | 0 |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | } | 
| 245 |  |  |  |  |  |  |  | 
| 246 |  |  |  |  |  |  | sub transform { | 
| 247 |  |  |  |  |  |  | my ($self, $elem, $doc) = @_; | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | my $old_string = $elem->string; | 
| 250 |  |  |  |  |  |  | my $new_string; | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | if ( index( $old_string, '@{[' ) >= 0 ) { | 
| 253 |  |  |  |  |  |  | warn "Interpolating perl code."; | 
| 254 |  |  |  |  |  |  | return; | 
| 255 |  |  |  |  |  |  | } | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | # Save the delimiters for later. Since they surrounded the original Perl5 | 
| 258 |  |  |  |  |  |  | # string, we can be certain that if we use these for the Perl6 equivalent | 
| 259 |  |  |  |  |  |  | # they'll be valid. | 
| 260 |  |  |  |  |  |  | # Yes, even in the case of: | 
| 261 |  |  |  |  |  |  | # | 
| 262 |  |  |  |  |  |  | # "\lfoo" -> "{lcfirst("f")}oo". | 
| 263 |  |  |  |  |  |  | # | 
| 264 |  |  |  |  |  |  | # Perl6 is smart enough to know which segment of the braid it's on, and | 
| 265 |  |  |  |  |  |  | # interprets the {..} boundary as a new Perl6 block. | 
| 266 |  |  |  |  |  |  | # | 
| 267 |  |  |  |  |  |  | my ( $start_delimiter ) = | 
| 268 |  |  |  |  |  |  | $elem->content =~ m{ ^ ( qq[ ]. | qq. | q[ ]. | q. | . ) }x; | 
| 269 |  |  |  |  |  |  | my $end_delimiter = substr( $elem->content, -1, 1 ); | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | # \l or \u followed *directly* by any \l or \u modifier simply ignores | 
| 272 |  |  |  |  |  |  | # the remaining \l or \u modifiers. | 
| 273 |  |  |  |  |  |  | # | 
| 274 |  |  |  |  |  |  | $old_string =~ s{ (\\[lu]) (\\[lu])+ }{$1}gx; | 
| 275 |  |  |  |  |  |  |  | 
| 276 |  |  |  |  |  |  | # \F, \L or \U followed *directly* by any \F, \L or \U modifier is a | 
| 277 |  |  |  |  |  |  | # syntax error in Perl5, and can be reduced to a single \F, \L or \U. | 
| 278 |  |  |  |  |  |  | # | 
| 279 |  |  |  |  |  |  | $old_string =~ s{ (\\[FLU]) (\\[FLU])+ }{$1}gx; | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | # \Q followed by anything is still a legal sequence. | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | # \t is unchanged in Perl6. | 
| 284 |  |  |  |  |  |  | # \n is unchanged in Perl6. | 
| 285 |  |  |  |  |  |  | # \r is unchanged in Perl6. | 
| 286 |  |  |  |  |  |  | # \f is unchanged in Perl6. | 
| 287 |  |  |  |  |  |  | # \b is unchanged in Perl6. | 
| 288 |  |  |  |  |  |  | # \a is unchanged in Perl6. | 
| 289 |  |  |  |  |  |  | # \e is unchanged in Perl6. | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  | # \v is deprecated | 
| 292 |  |  |  |  |  |  | # | 
| 293 |  |  |  |  |  |  | $old_string =~ s{ \\v }{v}mgx; | 
| 294 |  |  |  |  |  |  |  | 
| 295 |  |  |  |  |  |  | # \x{263a} now looks like \x[263a]. | 
| 296 |  |  |  |  |  |  | # | 
| 297 |  |  |  |  |  |  | $old_string =~ s{ \\x \{ ([0-9a-fA-F]+) \} }{\\x[$1]}mgx; | 
| 298 |  |  |  |  |  |  |  | 
| 299 |  |  |  |  |  |  | # \x12 is unaltered. | 
| 300 |  |  |  |  |  |  | # \x1L is unaltered. | 
| 301 |  |  |  |  |  |  | # | 
| 302 |  |  |  |  |  |  | # '..\x' is illegal in Perl6. | 
| 303 |  |  |  |  |  |  | # | 
| 304 |  |  |  |  |  |  | $old_string =~ s{ \\x $ }{}mx; | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | # \N{U+1234} is now \x[1234]. | 
| 307 |  |  |  |  |  |  | # Variants with whitespace are illegal in Perl5, so don't worry about 'em | 
| 308 |  |  |  |  |  |  | # | 
| 309 |  |  |  |  |  |  | $old_string =~ s{ \\N \{ U \+ ([0-9a-fA-F]+) \} }{\\x[$1]}mgx; | 
| 310 |  |  |  |  |  |  |  | 
| 311 |  |  |  |  |  |  | # \N{LATIN CAPITAL LETTER X} is now \c[LATIN CAPITAL LETTER X] | 
| 312 |  |  |  |  |  |  | # | 
| 313 |  |  |  |  |  |  | $old_string =~ s{ \\N \{ ([^\}]+) \} }{\\c[$1]}mgx; | 
| 314 |  |  |  |  |  |  |  | 
| 315 |  |  |  |  |  |  | # \o{2637} now looks like \o[2637]. | 
| 316 |  |  |  |  |  |  | # | 
| 317 |  |  |  |  |  |  | # \o12 is unaltered. | 
| 318 |  |  |  |  |  |  | # | 
| 319 |  |  |  |  |  |  | $old_string =~ s{ \\o \{ ([0-7]*) \) }{\\o[$1]}mgx; | 
| 320 |  |  |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | # \0123 is now \o[123]. | 
| 322 |  |  |  |  |  |  | # | 
| 323 |  |  |  |  |  |  | $old_string =~ s{ \\0 \{ ([0-7]*) \) }{\\o[$1]}mgx; | 
| 324 |  |  |  |  |  |  |  | 
| 325 |  |  |  |  |  |  | # \oL is now illegal, and in perl5 it generated nothing. | 
| 326 |  |  |  |  |  |  | # | 
| 327 |  |  |  |  |  |  | # "...\o" is a syntax error in both languages, so don't worry about it. | 
| 328 |  |  |  |  |  |  | # | 
| 329 |  |  |  |  |  |  | $old_string =~ s{ \\o \{ ([^\}]*) \} }{\\o[$1]}mgx; | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | # \c. is unchanged. Or at least I'll treat it as such. | 
| 332 |  |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | # At this point, you'll notice that practically every '{' and '}' | 
| 334 |  |  |  |  |  |  | # character is out of our target string, with trivial exceptions like | 
| 335 |  |  |  |  |  |  | # 'c{', which is illegal anyway. | 
| 336 |  |  |  |  |  |  | # | 
| 337 |  |  |  |  |  |  | # This is important for two main reasons. The first is that anything inside | 
| 338 |  |  |  |  |  |  | # {} in Perl6 is considered valid Perl6 code, which is also why a trick | 
| 339 |  |  |  |  |  |  | # we use later works. | 
| 340 |  |  |  |  |  |  | # | 
| 341 |  |  |  |  |  |  | # So, unless {} are part of a variable that can be interpolated, we have | 
| 342 |  |  |  |  |  |  | # to escape it. And we can't do that if there are constructs like \x{123} | 
| 343 |  |  |  |  |  |  | # hanging around in the string, because those would get messed up. | 
| 344 |  |  |  |  |  |  | # | 
| 345 |  |  |  |  |  |  |  | 
| 346 |  |  |  |  |  |  | my @token = $self->tokenize_variables($elem,$old_string); | 
| 347 |  |  |  |  |  |  | #use YAML;warn Dump grep { $_->{type} eq 'a' } @token; | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | # Now on to rewriting \l, \u, \E, \F, \L, \Q, \U in Perl6. | 
| 350 |  |  |  |  |  |  | # | 
| 351 |  |  |  |  |  |  | # \F, \L, \Q and \U are "sort of" nested. | 
| 352 |  |  |  |  |  |  | # | 
| 353 |  |  |  |  |  |  | # You can see this by running C<print "Start \L lOWER \U Upper Me \E mE \E"> | 
| 354 |  |  |  |  |  |  | # > Start  lower  UPPER ME  mE | 
| 355 |  |  |  |  |  |  | # | 
| 356 |  |  |  |  |  |  | # Note how 'lOWER' is case-flattend, but after the \U..E, 'mE' isn't? | 
| 357 |  |  |  |  |  |  | # | 
| 358 |  |  |  |  |  |  | # So, rather than having to retain case settings, we can simply stop the | 
| 359 |  |  |  |  |  |  | # lc(..) block after the first... | 
| 360 |  |  |  |  |  |  | # | 
| 361 |  |  |  |  |  |  | my $new_content; | 
| 362 |  |  |  |  |  |  | for ( my $i = 0; $i < @token; $i++ ) { | 
| 363 |  |  |  |  |  |  | my ( $v, $la1 ) = @token[$i,$i+1]; | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | # '${a}' is mapped to '{$a}'. | 
| 366 |  |  |  |  |  |  | # '${$a}' is a varvar in Perl5, needs other techniques in perl6 | 
| 367 |  |  |  |  |  |  | # | 
| 368 |  |  |  |  |  |  | if ( $v->{type} eq 'disambiguated variable' ) { | 
| 369 |  |  |  |  |  |  | if ( $v->{content} =~ m{ ^ ( \$ | \@ ) }x ) { | 
| 370 |  |  |  |  |  |  | warn "Use of varvar in string, not translating.\n"; | 
| 371 |  |  |  |  |  |  | $v->{content} = $v->{sigil} . '{' . $v->{content} . '}'; | 
| 372 |  |  |  |  |  |  | } | 
| 373 |  |  |  |  |  |  | else { | 
| 374 |  |  |  |  |  |  | $v->{content} =~ s< [-][\>] ><.>gx; | 
| 375 |  |  |  |  |  |  | $v->{content} =~ s< \{ (\w+) (\s*) \} >< '{' . | 
| 376 |  |  |  |  |  |  | $start_delimiter . $1 . | 
| 377 |  |  |  |  |  |  | $end_delimiter . $2 . | 
| 378 |  |  |  |  |  |  | '}'>segx; | 
| 379 |  |  |  |  |  |  | $v->{content} = '{' . $v->{sigil} . $v->{content} . '}'; | 
| 380 |  |  |  |  |  |  | } | 
| 381 |  |  |  |  |  |  | } | 
| 382 |  |  |  |  |  |  |  | 
| 383 |  |  |  |  |  |  | # All the other variables, including those with {} and [] indices, | 
| 384 |  |  |  |  |  |  | # are grouped in this category. | 
| 385 |  |  |  |  |  |  | # | 
| 386 |  |  |  |  |  |  | elsif ( $v->{type} eq 'variable' ) { | 
| 387 |  |  |  |  |  |  | $v->{content} =~ s< [-][\>] ><.>gx; | 
| 388 |  |  |  |  |  |  | $v->{content} =~ s< \{ (\w+) (\s*) \} >< '{' . | 
| 389 |  |  |  |  |  |  | $start_delimiter . $1 . | 
| 390 |  |  |  |  |  |  | $end_delimiter . $2 . | 
| 391 |  |  |  |  |  |  | '}'>segx; | 
| 392 |  |  |  |  |  |  |  | 
| 393 |  |  |  |  |  |  | $v->{content} =~ s< ^ ( [(<>)] ) ><\\$1>sgx; | 
| 394 |  |  |  |  |  |  | $v->{content} =~ s< ( [^\\] ) ( [(<>)] ) ><$1\\$2>sgx; | 
| 395 |  |  |  |  |  |  | } | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | # Non-variables are handled down here. | 
| 398 |  |  |  |  |  |  | # | 
| 399 |  |  |  |  |  |  | else { | 
| 400 |  |  |  |  |  |  | # < > is now a pointy block, { } is now a code block, ( ) is also | 
| 401 |  |  |  |  |  |  | # used. | 
| 402 |  |  |  |  |  |  | # | 
| 403 |  |  |  |  |  |  | $v->{content} =~ s< ^ ( [{(<>)}] ) ><\\$1>sgx; | 
| 404 |  |  |  |  |  |  | $v->{content} =~ s< ( [^\\] ) ( [{(<>)}] ) ><$1\\$2>sgx; | 
| 405 |  |  |  |  |  |  | } | 
| 406 |  |  |  |  |  |  | $new_content .= $v->{content}; | 
| 407 |  |  |  |  |  |  | } | 
| 408 |  |  |  |  |  |  |  | 
| 409 |  |  |  |  |  |  | #        elsif ( $v eq '\\F' or $v eq '\\L' ) { | 
| 410 |  |  |  |  |  |  | #            $collected .= '{' if @manip == 0; | 
| 411 |  |  |  |  |  |  | #            if ( @manip == 0 ) { | 
| 412 |  |  |  |  |  |  | #                $collected .= 'lc(' . $start_delimiter; | 
| 413 |  |  |  |  |  |  | #            } | 
| 414 |  |  |  |  |  |  | #            else { | 
| 415 |  |  |  |  |  |  | #                $collected .= $end_delimiter . ')~lc(' . $start_delimiter; | 
| 416 |  |  |  |  |  |  | #            } | 
| 417 |  |  |  |  |  |  | #            push @manip, $v; | 
| 418 |  |  |  |  |  |  | #        } | 
| 419 |  |  |  |  |  |  | #        elsif ( $v eq '\\Q' ) { | 
| 420 |  |  |  |  |  |  | #            $collected .= '{' if @manip == 0; | 
| 421 |  |  |  |  |  |  | #            if ( @manip == 0 ) { | 
| 422 |  |  |  |  |  |  | #                $collected .= 'quotemeta(' . $start_delimiter; | 
| 423 |  |  |  |  |  |  | #            } | 
| 424 |  |  |  |  |  |  | #            else { | 
| 425 |  |  |  |  |  |  | #                $collected .= $end_delimiter . ')~quotemeta(' . $start_delimiter; | 
| 426 |  |  |  |  |  |  | #            } | 
| 427 |  |  |  |  |  |  | #            push @manip, $v; | 
| 428 |  |  |  |  |  |  | #        } | 
| 429 |  |  |  |  |  |  | #        elsif ( $v eq '\\U' ) { | 
| 430 |  |  |  |  |  |  | #            $collected .= '{' if @manip == 0; | 
| 431 |  |  |  |  |  |  | #            if ( @manip == 0 ) { | 
| 432 |  |  |  |  |  |  | #                $collected .= 'tc(' . $start_delimiter; | 
| 433 |  |  |  |  |  |  | #            } | 
| 434 |  |  |  |  |  |  | #            else { | 
| 435 |  |  |  |  |  |  | #                $collected .= $end_delimiter . ')~tc(' . $start_delimiter; | 
| 436 |  |  |  |  |  |  | #            } | 
| 437 |  |  |  |  |  |  | #            push @manip, $v; | 
| 438 |  |  |  |  |  |  | #        } | 
| 439 |  |  |  |  |  |  | #        elsif ( $v eq '\\E' ) { | 
| 440 |  |  |  |  |  |  | #            pop @manip; | 
| 441 |  |  |  |  |  |  | #            if ( @manip == 0 ) { | 
| 442 |  |  |  |  |  |  | #                $collected .= $end_delimiter . ')}'; | 
| 443 |  |  |  |  |  |  | #            } | 
| 444 |  |  |  |  |  |  | #            else { | 
| 445 |  |  |  |  |  |  | #                $collected .= $end_delimiter . ')'; | 
| 446 |  |  |  |  |  |  | #            } | 
| 447 |  |  |  |  |  |  | #        } | 
| 448 |  |  |  |  |  |  |  | 
| 449 |  |  |  |  |  |  | eval { | 
| 450 |  |  |  |  |  |  | set_string($elem,$new_content); | 
| 451 |  |  |  |  |  |  | }; | 
| 452 |  |  |  |  |  |  | if ( $@ ) { | 
| 453 | 1 |  |  | 1 |  | 565 | use YAML;die "set_string broke! Please report this: ".Dump($elem); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 454 |  |  |  |  |  |  | } | 
| 455 |  |  |  |  |  |  |  | 
| 456 |  |  |  |  |  |  | return $self->transformation( $DESC, $EXPL, $elem ); | 
| 457 |  |  |  |  |  |  | } | 
| 458 |  |  |  |  |  |  |  | 
| 459 |  |  |  |  |  |  | 1; | 
| 460 |  |  |  |  |  |  |  | 
| 461 |  |  |  |  |  |  | #----------------------------------------------------------------------------- | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | __END__ | 
| 464 |  |  |  |  |  |  |  | 
| 465 |  |  |  |  |  |  | =pod | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | =head1 NAME | 
| 468 |  |  |  |  |  |  |  | 
| 469 |  |  |  |  |  |  | Perl::ToPerl6::Transformer::BasicTypes::Strings::Interpolation - Format C<${x}> correctly | 
| 470 |  |  |  |  |  |  |  | 
| 471 |  |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | =head1 AFFILIATION | 
| 473 |  |  |  |  |  |  |  | 
| 474 |  |  |  |  |  |  | This Transformer is part of the core L<Perl::ToPerl6|Perl::ToPerl6> | 
| 475 |  |  |  |  |  |  | distribution. | 
| 476 |  |  |  |  |  |  |  | 
| 477 |  |  |  |  |  |  |  | 
| 478 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 479 |  |  |  |  |  |  |  | 
| 480 |  |  |  |  |  |  | In Perl6, contents inside {} are now executable code. That means that inside interpolated strings, C<"${x}"> will be parsed as C<"${x()}"> and throw an exception if C<x()> is not defined. As such, this transforms C<"${x}"> into C<"{$x}">: | 
| 481 |  |  |  |  |  |  |  | 
| 482 |  |  |  |  |  |  | "The $x bit"      --> "The $x bit" | 
| 483 |  |  |  |  |  |  | "The $x-30 bit"   --> "The $x\-30 bit" | 
| 484 |  |  |  |  |  |  | "\N{FOO}"         --> "\c[FOO]" | 
| 485 |  |  |  |  |  |  | "The \l$x bit"    --> "The {lc $x} bit" | 
| 486 |  |  |  |  |  |  | "The \v bit"      --> "The  bit" | 
| 487 |  |  |  |  |  |  | "The ${x}rd bit"  --> "The {$x}rd bit" | 
| 488 |  |  |  |  |  |  | "The \${x}rd bit" --> "The \$\{x\}rd bit" | 
| 489 |  |  |  |  |  |  |  | 
| 490 |  |  |  |  |  |  | Many other transforms are performed in this module, see the code for a better | 
| 491 |  |  |  |  |  |  | idea of how complex this transformation really is. | 
| 492 |  |  |  |  |  |  |  | 
| 493 |  |  |  |  |  |  | Transforms only interpolated strings outside of comments, heredocs and POD. | 
| 494 |  |  |  |  |  |  |  | 
| 495 |  |  |  |  |  |  | =head1 CONFIGURATION | 
| 496 |  |  |  |  |  |  |  | 
| 497 |  |  |  |  |  |  | This Transformer is not configurable except for the standard options. | 
| 498 |  |  |  |  |  |  |  | 
| 499 |  |  |  |  |  |  | =head1 AUTHOR | 
| 500 |  |  |  |  |  |  |  | 
| 501 |  |  |  |  |  |  | Jeffrey Goff <drforr@pobox.com> | 
| 502 |  |  |  |  |  |  |  | 
| 503 |  |  |  |  |  |  | =head1 COPYRIGHT | 
| 504 |  |  |  |  |  |  |  | 
| 505 |  |  |  |  |  |  | Copyright (c) 2015 Jeffrey Goff | 
| 506 |  |  |  |  |  |  |  | 
| 507 |  |  |  |  |  |  | This program is free software; you can redistribute it and/or modify | 
| 508 |  |  |  |  |  |  | it under the same terms as Perl itself. | 
| 509 |  |  |  |  |  |  |  | 
| 510 |  |  |  |  |  |  | =cut | 
| 511 |  |  |  |  |  |  |  | 
| 512 |  |  |  |  |  |  | ############################################################################## | 
| 513 |  |  |  |  |  |  | # Local Variables: | 
| 514 |  |  |  |  |  |  | #   mode: cperl | 
| 515 |  |  |  |  |  |  | #   cperl-indent-level: 4 | 
| 516 |  |  |  |  |  |  | #   fill-column: 78 | 
| 517 |  |  |  |  |  |  | #   indent-tabs-mode: nil | 
| 518 |  |  |  |  |  |  | #   c-indentation-style: bsd | 
| 519 |  |  |  |  |  |  | # End: | 
| 520 |  |  |  |  |  |  | # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround : |