File Coverage

blib/lib/Code/Splice.pm
Criterion Covered Total %
statement 189 214 88.3
branch 89 138 64.4
condition 37 71 52.1
subroutine 24 25 96.0
pod 0 5 0.0
total 339 453 74.8


line stmt bran cond sub pod time code
1             package Code::Splice;
2              
3 4     4   101193 use 5.008;
  4         19  
  4         174  
4 4     4   24 use strict;
  4         8  
  4         155  
5 4     4   23 use warnings;
  4         13  
  4         403  
6              
7             our $VERSION = '0.02';
8              
9             <
10              
11             Todo:
12              
13             * Change the nextstate instructions in the code as we paste it:
14             Line number should be where it's inserted at, but filename should have info about the code
15             having been spliced.
16             * Option about whether to splice out the matching op or append/prepend to it.
17             * Option about whether to splice into an expression or to splice only at a nextstate/at the line level.
18             * Positional argument syntax, where arguments to the code being replced can be re-spliced into the
19             user provided code (needed to do real macroy stuff)
20             * Feature where certain subroutine names (or subroutines tagged with a certain attribute)
21             get replaced with their definitions at each point they appear, with their arguments spliced in
22              
23             comment
24              
25 4     4   24 use B qw< OPf_KIDS OPf_STACKED OPf_WANT OPf_WANT_VOID OPf_WANT_SCALAR OPf_WANT_LIST OPf_REF OPf_MOD OPf_SPECIAL OPf_KIDS >;
  4         13  
  4         526  
26 4     4   22 use B qw< OPpTARGET_MY ppname>;
  4         9  
  4         275  
27 4     4   19 use B qw< SVf_IOK SVf_NOK SVf_POK SVf_IVisUV >;
  4         8  
  4         218  
28 4     4   3789 use B::Generate;
  4         10324  
  4         124  
29 4     4   5153 use B::Concise;
  4         54359  
  4         293  
30 4     4   36 use B::Deparse;
  4         8  
  4         173  
31             # use B::Utils;
32             sub SVs_PADMY () { 0x00000400 } # use B qw< SVs_PADMY >;
33              
34 4     4   46 use strict;
  4         6  
  4         120  
35 4     4   19 use warnings;
  4         6  
  4         1354  
36              
37             #
38             # debugging
39             #
40              
41             my $debug;
42             # use Data::Dumper 'Dumper'; # debug
43             # use Carp 'confess';
44             # BEGIN { $SIG{USR1} = sub { use Carp; print confess("crap."); exit; }; };
45              
46             #
47             # api
48             #
49              
50             sub inject {
51              
52 3     3 0 69 my %args = @_;
53 3         11 my $code = delete $args{code}; # what to insert
54 3         12 my $package = delete $args{package}; # where to insert it
55 3         8 my $method = delete $args{method};
56              
57             # user-provided arrays-of-code specifications of where to inject at
58              
59 3   100     26 my $preconditions = delete $args{preconditions} || [ ];
60 3   50     21 my $postconditions = delete $args{postconditions} || [ ];
61              
62 3         23 for(my $i = 0; $i < @_; $i += 2) {
63 12 100       33 $_[$i] eq 'precondition' and push @$preconditions, $_[$i+1];
64 12 50       42 $_[$i] eq 'postcondition' and push @$postconditions, $_[$i+1];
65             }
66              
67 3         9 delete $args{precondition};
68 3         13 delete $args{postcondition};
69              
70             # specifications with which to build
71              
72 3         8 my $line = delete $args{line};
73 3         8 my $label = delete $args{label};
74              
75 3         7 $debug = delete $args{debug};
76              
77 3 50       13 %args and die "unknown arguments: " . join ', ', keys %args;
78              
79 3 50       22 UNIVERSAL::isa($code, 'CODE') or die;
80              
81             # Build list of conditions that must be true for the injection and list of things which cannot be true
82              
83             $line and push @$preconditions, sub {
84 22     22   31 my $op = shift;
85 22 100       575 $op->name eq 'nextstate' or return;
86 6 100 66     52 $line and $op->line == $line or return;
87 1         4 return 1;
88 3 100       16 };
89              
90             $label and push @$preconditions, sub {
91 0     0   0 my $op = shift;
92 0 0       0 $op->name eq 'nextstate' or return;
93 0 0 0     0 $line and $op->label eq $label or return;
94 0         0 return 1;
95 3 50       94 };
96              
97             # Look up the method we're supposed to insert into
98              
99 4   50 4   23 my $cv = do { no strict 'refs'; B::svref_2object(*{$package.'::'.$method}{CODE} or die "no such package/method"); };
  4         6  
  4         16554  
  3         10  
  3         6  
100 3 50       60 $cv->ROOT() or die "no code in $package\::method";
101 3 50       106 $cv->STASH()->isa('B::SPECIAL') and die "can't splice into binary compiled XS code you twit"; # Can't locate object method "NAME" via package "B::SPECIAL"
102 3 50       29 $cv->ROOT()->can('first') or die "$package\::$method cannot do ->ROOT->first\n";
103              
104             # Ready the code we're support to inject
105              
106             # Code we're to insert should have a structure as follows:
107              
108             # 5 <1> leavesub[1 ref] K/REFC,1 ->(end) <--- $newop
109             # - <@> lineseq KP ->5
110             # 1 <;> nextstate(splice 38 splice.pm:99) v/2 ->2
111             # 4 <@> print sK ->5 <--- $newopfirst ( $newop->first->first->sibling ); also $newlastop here
112             # 2 <0> pushmark s ->3
113             # 3 <$> const(PV "test!!\n") s ->4
114              
115             # We want the nextstate and all of its siblings (print, another nextstate perhaps, more stuff...)
116              
117 3         20 my $newcv = B::svref_2object($code);
118 3 50       11 $debug and do { print "\n\ncode to splice:\n"; B::Concise::concise_cv_obj('basic', $newcv); }; # dump the opcode tree of this code value
  0         0  
  0         0  
119 3         15 my $newop = $newcv->ROOT; # $newop points to a leavesub instruction
120 3 50       44 $newop->name eq 'leavesub' or die;
121 3 50 33     32 my $newopfirst = $newop->first->first; $newopfirst = $newopfirst->has_sibling if $newopfirst->has_sibling and $newopfirst->name eq 'nextstate'; # was causing coredumps when the nextstate was inserted into the wrong place
  3         28  
122 3         8 my $newoplast = do { my $x = $newopfirst; $x = $x->has_sibling while $x->has_sibling; $x; };
  3         7  
  3         19  
  3         10  
123              
124             # XXXX moved rewrite pad entries
125              
126 3         11 my @srcpad = lexicals($newcv);
127 3         22 my @destpad = lexicals($cv);
128              
129 3         32 my %destpad = map { ( $destpad[$_] => $_ ) } grep defined $destpad[$_], 0 .. $#destpad; # build a name-to-number index
  9         52  
130              
131             # map { ( $_ => $padnames[$_]->PVX) } grep { ! $padnames[$_]->isa('B::SPECIAL') } 0 .. $#padnames;
132              
133 3 50 0     15 $debug and do { print "debug: srcpad: ", join ', ', map $_||'(undef)', @srcpad; print "\n"; };
  0         0  
  0         0  
134 3 50 0     11 $debug and do { print "debug: destpad: ", join ', ', map $_||'(undef)', @destpad; print "\n"; };
  0         0  
  0         0  
135              
136             # Translate the spliced-in code's idea of lexicals to match where it's spliced in to
137              
138             walkoptree_slow($newcv->ROOT, sub {
139 32 50   32   70 my $op = shift or die; # op object
140             # warn "rewriting pad looking at an: " . B::class($op);
141 32 100       210 $op->can('targ') or return; # B::NULL cannot
142 23 100       125 $srcpad[$op->targ] or return;
143 8 50 0     21 $debug and print "debug: ", $op->name, " references pad slot ", $op->targ, " which contains ", $srcpad[$op->targ]||'', "\n";
144 8 50       35 exists $destpad{$srcpad[$op->targ]} or die "variable ``$srcpad[$op->targ]'' doesn't exist in target context";
145 8         32 $op->targ($destpad{$srcpad[$op->targ]});
146             # print "debug: variable name: $srcpad[$op->targ]\n";
147             # print "debug: index of same variable in dest: ", $destpad{$srcpad{$op->targ}}, "\n";
148 3         37 });
149              
150             my $redo_reverse_indices = sub {
151 6     6   11 my $siblings = { };
152             walkoptree_slow($cv->ROOT, sub {
153 548 100 66     576 my $self = shift; return unless $self and $$self;
  548         2063  
154 419         1252 my $next = $self->next;
155 419 50       1964 my $sibl = $self->can('sibling') ? $self->sibling : undef;
156 419 100 66     2491 $siblings->{$$sibl} = $self if $sibl and $$sibl;
157 6         52 });
158 6         40 return $siblings;
159 3         35 };
160              
161             # Get ready to recurse through the bytecode tree - build a reverse index, previous, from the next link
162              
163 3         15 my $siblings = $redo_reverse_indices->();
164              
165             # build a table of deparsed code to line number
166              
167 3         7 my @codelines;
168            
169             walkoptree_slow($cv->ROOT, sub {
170              
171 271     271   290 my $op = shift;
172 271 100       1214 return if $op->isa('B::NULL');
173 207 100       891 return unless $op->name eq 'nextstate';
174              
175 36 50       137 my $line = $op->line or die;
176 36         107 $op = $op->sibling;
177 36 50       187 return if $op->isa('B::NULL');
178              
179 36         823 my $dp = B::Deparse->new;
180 36         74 $dp->{curcv} = $cv;
181 36 50       76 $debug and print "debug: deparse: $line: ", $dp->deparse($op, 0), "\n";
182 36         6631 $codelines[$line] = $dp->deparse($op, 0);
183              
184 3         41 });
185              
186             # debugging for before we modify anything
187              
188 3 50       33 $debug and do { print "\n\nbefore:\n"; B::Concise::concise_cv_obj('basic', $cv); }; # dump the opcode tree of this code value
  0         0  
  0         0  
189              
190             # identify the pointcut and insert the target code in right there
191              
192 3         7 my $curcop;
193             my $codeline;
194              
195             my $look_for_things_to_diddle = sub {
196            
197 114 50   114   221 my $op = shift or die; # op object
198 114         110 my $level = shift;
199 114 50       210 my $parents = shift or die;
200            
201 114 100 66     454 return unless $op and $$op;
202 91 50       409 return if $op->isa('B::NULL');
203              
204 91 50       174 $debug and print "debug: look_for_things_to_diddle: doing an ", $op->name, "\n";
205            
206 91 100       177 return unless exists $parents->[0]; # root op isn't that interesting and we need a parent
207 88         111 my $parent = $parents->[-1];
208            
209             my $pointcut = sub {
210            
211             # When splicing bytecode, we must consider the parent's first, parent's last, our previous sibling, our next sibling
212             # That ignores threading next, which gets done later
213              
214             # print "modifying ", $op->name, " at addresss ", $$op, "\n";
215              
216             # XXX alternate between the two according to some test
217              
218 3         11 my $prev_sibling = $siblings->{$$op}; # may be undef
219 3         12 my $next_sibling = $op->sibling; # may be undef
220              
221 3 50 33     38 $prev_sibling->sibling($newopfirst) if $prev_sibling and $$prev_sibling;
222 3 100 66     19 $newoplast->sibling($op->sibling) if $op->sibling and ${$op->sibling};
  3         31  
223            
224 3 50       18 $debug and print "debug: splicing code, I think the parent is a ", $parent->name, "\n";
225            
226 3 50 33     23 $parent->first($newopfirst) if $parent->can('first') and ${$parent->first} == $$op;
  3         20  
227 3 100 66     18 $parent->last($newoplast) if $parent->can('last') and ${$parent->last} == $$op;
  3         28  
228            
229 3         19 $siblings = $redo_reverse_indices->(); # only one swath of code is injected at a time, so this isn't currently needed
230              
231             # One chunk of bytecode can only be spliced into one place unless we make a deep copy of it,
232             # which we don't know how to do yet, so we just bail.
233              
234 3         207 goto did_pointcut;
235            
236 88         366 };
237              
238 88 100       406 $curcop = $op if $op->name eq 'nextstate';
239 88 100 66     570 $codeline = $codelines[$curcop->line] if $curcop and defined $codelines[$curcop->line];
240            
241 88         155 for my $post (@$postconditions) {
242 0 0       0 if($post->($op, $codeline)) {
243 0         0 die "post condition true before insert point found: ". B::Deparse->new->coderef2text($post);
244             }
245             }
246              
247 88         161 for my $i (0 .. @$preconditions-1) {
248 88 100       207 if($preconditions->[$i]->($op, $codeline)) {
249 3         34 splice @$preconditions, $i, 1, ();
250             }
251             }
252            
253 88 100       583 if(! @$preconditions) {
254 3 100 66     14 $op = $op->has_sibling if $op->has_sibling and $op->name eq 'nextstate';
255 3         22 $pointcut->();
256 0         0 goto did_pointcut;
257             }
258            
259 85         451 return;
260            
261 3         36 };
262              
263 3         22 walkoptree_slow($cv->ROOT, $look_for_things_to_diddle);
264 0         0 die "pointcut failed";
265 3         42 did_pointcut:
266              
267             # re-thread next:
268              
269             fix($cv->ROOT->first, $cv->ROOT);
270              
271             # my @srcpad = lexicals($newcv);
272             # my @destpad = lexicals($cv);
273             #
274             # my %destpad = map { ( $destpad[$_] => $_ ) } grep defined $destpad[$_], 0 .. $#destpad; # build a name-to-number index
275             #
276             # # map { ( $_ => $padnames[$_]->PVX) } grep { ! $padnames[$_]->isa('B::SPECIAL') } 0 .. $#padnames;
277             #
278             # $debug and do { print "debug: srcpad: ", join ', ', map $_||'(undef)', @srcpad; print "\n"; };
279             # $debug and do { print "debug: destpad: ", join ', ', map $_||'(undef)', @destpad; print "\n"; };
280              
281             # original version of pad rewriting:
282             # walkoptree_slow($cv->ROOT, sub {
283             # my $op = shift or die; # op object
284             # $op->can('targ') or return; # B::NULL cannot
285             # $srcpad[$op->targ] or return;
286             # $debug and print "debug: ", $op->name, " references pad slot ", $op->targ, " which contains ", $srcpad[$op->targ]||'', "\n";
287             # exists $destpad{$srcpad[$op->targ]} or die "variable ``$srcpad[$op->targ]'' doesn't exist in target context";
288             # $op->targ($destpad{$srcpad[$op->targ]});
289             # # print "debug: variable name: $srcpad[$op->targ]\n";
290             # # print "debug: index of same variable in dest: ", $destpad{$srcpad{$op->targ}}, "\n";
291             # });
292             # that's not working either now...
293              
294 3 50       61 $debug and do { print "\n\nafter:\n"; B::Concise::concise_cv_obj('basic', $cv); }; # dump the opcode tree of this code value
  0         0  
  0         0  
295              
296 3         176 return 1;
297             }
298              
299              
300             #
301             # utility methods
302             #
303              
304             my @parents = ();
305              
306             sub walkoptree_slow {
307             # actually recurse the bytecode tree
308             # stolen from B.pm, modified
309 965     965 0 1109 my $op = shift;
310 965         969 my $sub = shift;
311 965         967 my $level = shift;
312              
313 965   100     1947 $level ||= 0;
314              
315             # warn "walkoptree_debug: $level " . $op->name if our($walkoptree_debug) and $op and $$op;
316              
317 965         1628 $sub->($op, $level, \@parents);
318 962 100 100     5335 if ($op->can('flags') and $op->flags() & OPf_KIDS) {
319             # print "debug: go: ", ' ' x $level, $op->name(), "\n"; # debug
320 232         308 push @parents, $op;
321 232         703 my $kid = $op->first();
322 232         244 my $next;
323 950         1061 next_kid:
324             # was being changed right out from under us, so pre-compute
325 950 100       3140 $next = 0; $next = $kid->sibling() if $$kid;
326 950         1987 walkoptree_slow($kid, $sub, $level + 1);
327 943         1394 $kid = $next;
328 943 100       2546 goto next_kid if $kid;
329 225         306 pop @parents;
330             }
331 955 0 33     7040 if (B::class($op) eq 'PMOP' && $op->pmreplroot() && ${$op->pmreplroot()}) {
  0   33     0  
332             # pattern-match operators
333 0         0 push @parents, $op;
334 0         0 walkoptree_slow($op->pmreplroot(), $sub, $level + 1);
335 0         0 pop @parents;
336             }
337             };
338              
339             sub fix {
340 209     209 0 279 my ($op, $parent) = @_;
341 209 0       390 $debug and print "fixing: ", $$op ? $op->name : '(null)', "\n";
    50          
342 209 50       959 if($op->isa('B::NULL')) {
343 0 0       0 $debug and print "skipping null\n";
344             #return fix($op->first, $parent);
345 0         0 return $op;
346             }
347             # $op = denull($op);
348 209 100       450 if($op->has_sibling) {
349 144 50       260 $debug and print "has sibling, fixing and hooking\n";
350 144         338 $op->next(fix($op->has_sibling, $parent));
351             } else {
352 65 50       120 $debug and print "no sibling, hooking to parent (if applicable)\n";
353 65 50       249 $op->next($parent) if $parent;
354             }
355 209 100       962 if($op->has_first) {
356 62 50       119 $debug and print "Fixing children, and getting lastmost first\n";
357 62         125 return fix($op->has_first, $op);
358             } else {
359 147 50       262 $debug and print "No kids... we are the lastmost first!\n";
360 147         510 return $op;
361             }
362             }
363              
364             sub B::OP::has_sibling {
365 371     371   412 my $op = shift;
366             # eval { warn 'has_sibling: ' . $op->sibling; };
367 371 100 33     2586 return unless $op->can('sibling') and $op->sibling and ${$op->sibling}; # and ref $op->sibling ne 'B::NULL';
  371   66     1764  
368 302         1057 return denull($op->sibling);
369             }
370              
371             sub B::OP::has_first {
372 271     271   319 my $op = shift;
373             # eval { warn 'has_first: ' . $op->first; };
374 271 50 66     1482 return unless $op->can('first') and $op->first and ${$op->first}; # and ref $op->first ne 'B::NULL';
  124   66     580  
375 124         474 return denull($op->first);
376             }
377              
378             sub denull {
379 426     426 0 546 my $op = shift;
380 426 50       1770 if( $op->isa('B::NULL') ) {
381 0         0 return denull($op->first);
382             } else {
383 426         1483 return $op;
384             }
385             }
386              
387             sub lexicals {
388 6     6 0 11 my $cv = shift;
389 6 100       107 map { $_->isa('B::SPECIAL') ? undef : $_->PVX } ($cv->PADLIST->ARRAY)[0]->ARRAY;
  62         276  
390             }
391              
392             1;
393              
394             =pod
395              
396             =head1 NAME
397              
398             Code::Splice - Injects the contents of one subroutine at a specified point elsewhere.
399              
400             =head1 SYNOPSIS
401              
402             use Code::Splice;
403              
404             Code::Splice::inject(
405             code => sub { print "fred\n"; },
406             package => 'main',
407             method => 'foo',
408             precondition => sub {
409             my $op = shift;
410             my $line = shift;
411             $line =~ m/print/ and $line =~ m/four/;
412             },
413             postcondition => sub {
414             my $op = shift;
415             my $line = shift;
416             $line =~ m/print/ and $line =~ m/five/;
417             },
418             );
419              
420             sub foo {
421             print "one\n";
422             print "two\n";
423             print "three\n";
424             print "four\n";
425             print "five\n";
426             }
427              
428             =head1 DESCRIPTION
429              
430             Removes the contents of a subroutine (usually an anonymous subroutine created just
431             for the purpose) and splices in into the program elsewhere.
432              
433             Why, you ask?
434              
435             =over 1
436              
437             =item Write stronger unit tests than the granularity of the API would otherwise allow
438              
439             =item Write unit tests for nasty, interdependant speghetti code (my motivation -- hey, you gotta have tests before you can start refactoring, and if you can't write tests for the code, you're screwed)
440              
441             =item Fix stupid bugs and remove stupid restrictions in other people's code in a way that's more resiliant across upgrades than editing files you don't own
442              
443             =item Be what "aspects" should be
444              
445             =item Screw with your cow-orkers by introducing monster heisenbugs
446              
447             =item Play with self-modifying code
448              
449             =item Write self-replicating code (but be nice, we're all friends here, right?)
450              
451             =back
452              
453             The specifics:
454              
455             The body of the C<< code { } >> block are extracted from the subroutine and inserted in a place
456             in the code specified by the call to the C function.
457             Where the new code is spliced in, the old code is spliced out.
458             The C and C arguments are required and tell the thing how to find the
459             code to be modified.
460             The C argument is required as it specifies the code to be spliced in.
461             That same code block should not be used for anything else under penalty of coredump.
462              
463             The rest of the argumets specify where the code is to be inserted.
464             Any number of C and C arguments provide callbacks
465             to help locate the exact area to splice the code in at.
466             Before the code can e spliced in, all of the C blocks must have returned
467             true, and none of the C blocks may have yet returned true.
468             If a C returns true before all of the C blocks have,
469             an error is raised.
470             Both blocks get called numerous times per line and get passed a reference to the C OP object currently under consideration
471             and the text of the current line:
472              
473             precondition => sub {
474             my $op = shift;
475             my $line = shift;
476             $line =~ m/print/ and $line =~ m/four/;
477             },
478              
479             ... or...
480              
481             precondition => sub { my $op = shift; $op->name eq 'padsv' and $op->sv->sv =~ m/fred/; },
482              
483             It's possible to insert code in the middle of an expression when testing ops, but when
484             testing the text of the line of code, the spliced in code will always replace the whole line.
485              
486             I'll probably drop sending in the opcode in a future version, at least for the
487             precondition/postcondition blocks, or maybe I'll swap them to the 2nd arg so they're
488             more optional.
489              
490             Do not attempt to match text in comments as it won't be there.
491             The code in C<$line> is re-generated from the bytecode using F and will
492             vary from the original source code in a few ways, including changes to formatting,
493             changes to some idioms and details of the expressions, and formatting of the code
494             with regards to whitespace.
495              
496             The splicing code will C if it fails for any reason.
497             This will likely change in possible future versions.
498              
499             There are also C
500             simple cases.
501             Of course, you shouldn't use C for anything other than simple experimentation.
502              
503             References to lexical variables in the code to be injected are replaced with references to the
504             lexical variables of the same name in the location the code is inserted into.
505             If a variable of the same name doesn't exist there, it's an error.
506             ... but it probably shouldn't be an error, at least in the cases where the code being
507             spliced in declares that lexical with C, or when the variable was initiailized entirely
508             outside of the sub block being spliced in and was merely closed over by it.
509              
510             See the comments in the source code (at the top, in a nice block) for my todo/desired features.
511             Let me know if there are any features in there or yet unsuggested that you want.
512             I won't promise them, but I would like to hear about them.
513              
514             =head1 BUGS
515              
516             The original code reference passed in cannot be used elsewhere.
517             It can't be called, and it should not be passed back to C<< inject() >> again.
518             Failure to heed these warnings will result in coredumps and strange behaviors.
519              
520             Until I get around to finishing reworking C, C needs
521             line 940 of C changed to read
522             C (the word C and an understore should be inserted).
523             This is in order to build C on newer Perls.
524             I have a fixed and slightly extended version in my area on CPAN, if you search
525             for SWALTERS.
526              
527             Should gracefully default to not fixing up lexicals where no direct equivilent exists.
528              
529             Should repair the provided subroutine reference so that if were to be accidentally
530             called, Perl wouldn't coredump.
531              
532              
533             =head1 HISTORY
534              
535             0.1 -- initial release.
536              
537             =head1 SEE ALSO
538              
539             L -- slightly updated B::Generate -- you'll need this
540              
541             L attempts to document the Perl internals I'm prodding so bluntly.
542              
543             =head1 AUTHORS
544              
545             Scott Walters L - http://slowass.net/
546              
547             Brock Wilcox L - http://thelackthereof.org/
548              
549             Code lifted from various B modules...
550              
551             =head1 COPYRIGHT AND LICENSE
552              
553             Copyright (C) 2007 by Scott Walters and Brock Wilcox
554              
555             This library is free software; you can redistribute it and/or modify
556             it under the same terms as Perl itself, either Perl version 5.8.8 or,
557             at your option, any later version of Perl 5 you may have available.
558              
559             =cut
560              
561              
562             __END__