File Coverage

blib/lib/Python/Bytecode.pm
Criterion Covered Total %
statement 168 264 63.6
branch 59 100 59.0
condition 10 12 83.3
subroutine 24 59 40.6
pod 4 28 14.2
total 265 463 57.2


line stmt bran cond sub pod time code
1             package Python::Bytecode;
2 1     1   32318 use 5.6.0;
  1         5  
  1         46  
3              
4 1     1   7 use strict;
  1         2  
  1         136  
5              
6             our $VERSION = "2.7";
7              
8 0     0   0 use overload '""' => sub { my $obj = shift;
9 0         0 "{name}.", file ".$obj->{filename}." line ".$obj->{lineno}." at ".sprintf('0x%x>',0+$obj);
10 1     1   1681 }, "0+" => sub { $_[0] }, fallback => 1;
  1     0   1141  
  1         12  
  0         0  
11              
12             sub new {
13 2     2 0 915 my ($class, $fh) = (@_);
14 2         5 my $self = bless { };
15 2 50       8 if (ref $fh) { $self->{fh} = $fh; }
  2         50  
16 0         0 else { $self->{stuff} = [ split //, $fh ] }
17              
18 2         7 my $magic = $self->r_long();
19 2         6 my $data = _get_data_by_magic($magic);
20 2         6 $self->{version} = $Python::Bytecode::versions{$magic};
21             # What we use to read words from the source. May be r_short or r_long
22 2         7 *Python::Bytecode::r_word = $Python::Bytecode::readword{$magic};
23 2         6 $self->r_long(); # Second magic number
24 2         5 $self->{mainobj} = $self->r_object();
25 2         8 $self->_init($data);
26 2         7 return $self;
27             }
28              
29             sub _get_data_by_magic {
30 2     2   1505 require Python::Bytecode::v21;
31 2         1402 require Python::Bytecode::v22;
32 2         1015 require Python::Bytecode::v23;
33 2         9 my $magic = shift;
34 2 50       7 unless (exists $Python::Bytecode::data{$magic}) {
35 0         0 require Carp;
36 0         0 Carp::croak("Unrecognised magic number $magic; Only know Python versions "
37 0         0 . join ", ", map { "$_ ($Python::Bytecode::versions{$_})" } keys %Python::Bytecode::versions
38             );
39             }
40 2         6 return $Python::Bytecode::data{$magic};
41             }
42              
43             sub r_byte {
44 170     170 0 264 my $self = shift;
45 170 50       637 if (exists $self->{stuff}) { ord shift @{$self->{stuff}};}
  0         0  
  0         0  
46 170         1592 else { ord getc $self->{fh} }
47             }
48              
49             sub r_long {
50 1     1   1451 use integer;
  1         12  
  1         6  
51 33     33 0 40 my $self = shift;
52 33         66 my $x = $self->r_byte;
53 33         71 $x |= $self->r_byte << 8;
54 33         123 $x |= $self->r_byte << 16;
55 33         61 $x |= $self->r_byte << 24;
56 33         73 return $x;
57             }
58              
59             sub r_short {
60 5     5 0 7 my $self = shift;
61 5         9 my $x = $self->r_byte;
62 5         10 $x |= $self->r_byte << 8;
63 5         7 $x |= -($x & 0x8000);
64 5         50 return $x;
65             }
66              
67             sub r_string {
68 12     12 0 15 my $self = shift;
69 12         21 my $length = $self->r_long;
70 12         13 my $buf;
71 12 50       26 if ( exists $self->{stuff}) {
72 0         0 $buf = join "", splice ( @{$self->{stuff}},0,$length,() );
  0         0  
73             } else {
74 12         31 read $self->{fh}, $buf, $length;
75             }
76 12         57 return $buf;
77             }
78              
79             # This really ought to return a real unicode string, rather than a plain
80             # binary string that we fib about
81             sub r_unicode {
82 0     0 0 0 my $self = shift;
83 0         0 my $length = $self->r_long;
84 0         0 my $buf;
85 0 0       0 if ( exists $self->{stuff}) {
86 0         0 $buf = join "", splice ( @{$self->{stuff}},0,$length,() );
  0         0  
87             } else {
88 0         0 read $self->{fh}, $buf, $length;
89             }
90 0         0 return $buf;
91             }
92              
93             sub r_float {
94 0     0 0 0 my $self = shift;
95 0         0 my $length = $self->r_byte;
96 0         0 my $buf;
97 0 0       0 if ( exists $self->{stuff}) {
98 0         0 $buf = join "", splice ( @{$self->{stuff}},0,$length,() );
  0         0  
99             } else {
100 0         0 read $self->{fh}, $buf, $length;
101             }
102 0         0 $buf += 0;
103 0         0 return $buf;
104             }
105              
106             sub r_complex {
107 0     0 0 0 my $self = shift;
108 0         0 my $length = $self->r_byte;
109 0         0 my $real;
110 0 0       0 if ( exists $self->{stuff}) {
111 0         0 $real = join "", splice ( @{$self->{stuff}},0,$length,() );
  0         0  
112             } else {
113 0         0 read $self->{fh}, $real, $length;
114             }
115 0         0 $real += 0;
116              
117 0         0 $length = $self->r_byte;
118 0         0 my $imag;
119 0 0       0 if ( exists $self->{stuff}) {
120 0         0 $imag = join "", splice ( @{$self->{stuff}},0,$length,() );
  0         0  
121             } else {
122 0         0 read $self->{fh}, $imag, $length;
123             }
124 0         0 $imag += 0;
125 0         0 return bless([$real, $imag], "Python::Bytecode::Complex");
126             }
127              
128             sub r_object {
129 28     28 0 35 my $self = shift;
130 28         131 my $cooked = shift;
131 28         111 my $type = chr $self->r_byte();
132 28 100       73 return $self->r_code() if $type eq "c";
133 26 100       45 if ($cooked) {
134 6 50       58 return bless \($self->r_string()), "Python::Bytecode::String" if $type eq "s";
135 6 100       18 return bless \($self->r_long()), "Python::Bytecode::Long" if $type eq "i";
136 4 100       10 return bless \do{my $x=undef}, "Python::Bytecode::Undef" if $type eq "N";
  2         15  
137             } else {
138 20 100       54 return $self->r_string if $type eq "s";
139 8 50       18 return $self->r_long() if $type eq "i";
140 8 50       16 return undef if $type eq "N"; # None indeed.
141             }
142 10 50       23 if ($type eq "(") {
143 10         21 my @tuple = $self->r_tuple($cooked);
144 10 50       83 return bless [@tuple], "Python::Bytecode::Tuple" unless wantarray;
145 0         0 return @tuple;
146             }
147 0 0       0 if ($type eq 'u') {
148 0         0 return bless\($self->r_unicode()), "Python::Bytecode::Unicode";
149             }
150 0 0       0 if ($type eq 'l') {
151 0         0 return bless \($self->r_extralong()), "Python::Bytecode::Extralong";
152             }
153 0 0       0 if ($type eq 'f') {
154 0         0 return bless \($self->r_float()), "Python::Bytecode::Float";
155             }
156 0 0       0 if ($type eq 'x') {
157 0         0 return $self->r_complex();
158             }
159 0         0 die "Oops! I didn't implement ".ord($type) . " (". length($type) . " bytes)";
160             }
161              
162             sub r_tuple {
163 10     10 0 13 my $self = shift;
164 10         12 my $cooked = shift;
165 10         210 my $n = $self->r_long;
166 10 100       25 return () unless $n;
167 6         7 my @rv;
168 6         24 push @rv, scalar $self->r_object($cooked) for (1..$n);
169 6         20 return @rv;
170             }
171              
172             # This is an extended precision long read. It'll likely be incorrect if the size of the
173             # long exceeds the precision of perl's double, and it really ought to generate a
174             # Math::BigInt instead.
175             sub r_extralong {
176 0     0 0 0 my $self = shift;
177 0         0 my $n = $self->r_long;
178 0 0       0 my $size = $n<0 ? -$n : $n;
179 0         0 my $num = 0;
180              
181 0         0 foreach (1..$size) {
182 0         0 my $digit = $self->r_short();
183 0         0 $num *= 32768;
184 0         0 $num += $digit;
185             }
186              
187 0         0 return $num;
188              
189             }
190              
191             sub r_code {
192 2     2 0 3 my $file = shift;
193 2         8 my $self = bless {bytecode => $file}, 'Python::Bytecode::Codeobj';
194 2         7 $self->{argcount} = $file->r_word; #
195 2         6 $self->{nlocals} = $file->r_word; #
196 2         7 $self->{stacksize}= $file->r_word; #
197 2         7 $self->{flags} = $file->r_word; #
198 2 50       9 if ($self->{code} = $file->r_object) {
199 2 50       6 if ($self->{constants}= $file->r_object(1)) {
200 2 50       6 if ($self->{names} = $file->r_object) {
201 2 50       6 if ($self->{varnames} = $file->r_object) {
202 2 50       5 if ($self->{freevars} = $file->r_object) {
203 2 50       6 if ($self->{cellvars} = $file->r_object) {
204 2 50       6 if ($self->{filename} = $file->r_object) {
205 2 50       5 if ($self->{name} = $file->r_object) {
206 2         5 $self->{lineno} = $file->r_word; #
207 2         5 $self->{lnotab} = $file->r_object;
208             }}}}}}}}
209 2         8 return $self;
210             }
211              
212             for (qw(argcount nlocals stacksize flags code constants names
213             varnames freevars cellvars filename name lineno lnotab)) {
214 1     1   1353 no strict q/subs/;
  1         2  
  1         5091  
215 0     0 0 0 eval "sub $_ { return \$_[0]->{mainobj}->$_ }";
  0     0 0 0  
  0     0 0 0  
  0     0 1 0  
  2     2 1 975  
  0     0 0 0  
  0     0 0 0  
  0     0 0 0  
  0     0 0 0  
  0     0 0 0  
  0     0 0 0  
  0     0 0 0  
  0     0 0 0  
  0     0 1 0  
216             }
217              
218             sub version {
219 0     0 0 0 return $_[0]->{version};
220             }
221              
222             $Parrot::Bytecode::DATA = <
223              
224             # This'll amuse you. It's actually lifted directly from dis.py :)
225             # Instruction opcodes for compiled code
226              
227             def_op('STOP_CODE', 0)
228             def_op('POP_TOP', 1)
229             def_op('ROT_TWO', 2)
230             def_op('ROT_THREE', 3)
231             def_op('DUP_TOP', 4)
232             def_op('ROT_FOUR', 5)
233              
234             def_op('UNARY_POSITIVE', 10)
235             def_op('UNARY_NEGATIVE', 11)
236             def_op('UNARY_NOT', 12)
237             def_op('UNARY_CONVERT', 13)
238              
239             def_op('UNARY_INVERT', 15)
240              
241             def_op('BINARY_POWER', 19)
242              
243             def_op('BINARY_MULTIPLY', 20)
244             def_op('BINARY_DIVIDE', 21)
245             def_op('BINARY_MODULO', 22)
246             def_op('BINARY_ADD', 23)
247             def_op('BINARY_SUBTRACT', 24)
248             def_op('BINARY_SUBSCR', 25)
249              
250             def_op('SLICE+0', 30)
251             def_op('SLICE+1', 31)
252             def_op('SLICE+2', 32)
253             def_op('SLICE+3', 33)
254              
255             def_op('STORE_SLICE+0', 40)
256             def_op('STORE_SLICE+1', 41)
257             def_op('STORE_SLICE+2', 42)
258             def_op('STORE_SLICE+3', 43)
259              
260             def_op('DELETE_SLICE+0', 50)
261             def_op('DELETE_SLICE+1', 51)
262             def_op('DELETE_SLICE+2', 52)
263             def_op('DELETE_SLICE+3', 53)
264              
265             def_op('INPLACE_ADD', 55)
266             def_op('INPLACE_SUBTRACT', 56)
267             def_op('INPLACE_MULTIPLY', 57)
268             def_op('INPLACE_DIVIDE', 58)
269             def_op('INPLACE_MODULO', 59)
270             def_op('STORE_SUBSCR', 60)
271             def_op('DELETE_SUBSCR', 61)
272              
273             def_op('BINARY_LSHIFT', 62)
274             def_op('BINARY_RSHIFT', 63)
275             def_op('BINARY_AND', 64)
276             def_op('BINARY_XOR', 65)
277             def_op('BINARY_OR', 66)
278             def_op('INPLACE_POWER', 67)
279              
280             def_op('PRINT_EXPR', 70)
281             def_op('PRINT_ITEM', 71)
282             def_op('PRINT_NEWLINE', 72)
283             def_op('PRINT_ITEM_TO', 73)
284             def_op('PRINT_NEWLINE_TO', 74)
285             def_op('INPLACE_LSHIFT', 75)
286             def_op('INPLACE_RSHIFT', 76)
287             def_op('INPLACE_AND', 77)
288             def_op('INPLACE_XOR', 78)
289             def_op('INPLACE_OR', 79)
290             def_op('BREAK_LOOP', 80)
291              
292             def_op('LOAD_LOCALS', 82)
293             def_op('RETURN_VALUE', 83)
294             def_op('IMPORT_STAR', 84)
295             def_op('EXEC_STMT', 85)
296              
297             def_op('POP_BLOCK', 87)
298             def_op('END_FINALLY', 88)
299             def_op('BUILD_CLASS', 89)
300              
301             HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
302              
303             name_op('STORE_NAME', 90) # Index in name list
304             name_op('DELETE_NAME', 91) # ""
305             def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
306              
307             name_op('STORE_ATTR', 95) # Index in name list
308             name_op('DELETE_ATTR', 96) # ""
309             name_op('STORE_GLOBAL', 97) # ""
310             name_op('DELETE_GLOBAL', 98) # ""
311             def_op('DUP_TOPX', 99) # number of items to duplicate
312             def_op('LOAD_CONST', 100) # Index in const list
313             hasconst.append(100)
314             name_op('LOAD_NAME', 101) # Index in name list
315             def_op('BUILD_TUPLE', 102) # Number of tuple items
316             def_op('BUILD_LIST', 103) # Number of list items
317             def_op('BUILD_MAP', 104) # Always zero for now
318             name_op('LOAD_ATTR', 105) # Index in name list
319             def_op('COMPARE_OP', 106) # Comparison operator
320             hascompare.append(106)
321             name_op('IMPORT_NAME', 107) # Index in name list
322             name_op('IMPORT_FROM', 108) # Index in name list
323              
324             jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
325             jrel_op('JUMP_IF_FALSE', 111) # ""
326             jrel_op('JUMP_IF_TRUE', 112) # ""
327             jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
328             jrel_op('FOR_LOOP', 114) # Number of bytes to skip
329              
330             name_op('LOAD_GLOBAL', 116) # Index in name list
331              
332             jrel_op('SETUP_LOOP', 120) # Distance to target address
333             jrel_op('SETUP_EXCEPT', 121) # ""
334             jrel_op('SETUP_FINALLY', 122) # ""
335              
336             def_op('LOAD_FAST', 124) # Local variable number
337             haslocal.append(124)
338             def_op('STORE_FAST', 125) # Local variable number
339             haslocal.append(125)
340             def_op('DELETE_FAST', 126) # Local variable number
341             haslocal.append(126)
342              
343             def_op('SET_LINENO', 127) # Current line number
344             SET_LINENO = 127
345              
346             def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
347             def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
348             def_op('MAKE_FUNCTION', 132) # Number of args with default values
349             def_op('BUILD_SLICE', 133) # Number of items
350              
351             def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
352             def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
353             def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
354              
355             def_op('EXTENDED_ARG', 143)
356             EXTENDED_ARG = 143
357              
358             EOF
359              
360             # Set up op code data structures
361             sub _init {
362 2     2   3 my $self = shift;
363 2         2 my $data = shift;
364 2         3 my @opnames;
365             my %c; # Natty constants.
366 0         0 my %has;
367 2         88 for (split /\n/, $data) { # This ought to come predigested, but I am lazy
368 292 100 100     2078 next if /^#/ or not /\S/;
369 243 100       969 if (/^def_op\('([^']+)', (\d+)\)/) { $opnames[$2]=$1; }
  180 100       706  
    100          
    50          
370 41         89 elsif (/^(jrel|jabs|name)_op\('([^']+)', (\d+)\)/) { $opnames[$3]=$2; $has{$1}{$3}++ }
  41         113  
371 6         27 elsif (/(\w+)\s*=\s*(\d+)/) { $c{$1}=$2; }
372 16         57 elsif (/^has(\w+)\.append\((\d+)\)/) { $has{$1}{$2}++ }
373             }
374 2         26 $self->{opnames} = \@opnames;
375 2         5 $self->{has} = \%has;
376 2         6 $self->{c} = \%c;
377             }
378              
379             sub disassemble {
380 2     2 1 4 my $self = shift;
381 2         8 return $self->{mainobj}->disassemble;
382             }
383              
384             # Now we've read in the op tree, disassemble it.
385             package Python::Bytecode::Codeobj;
386              
387 0     0   0 use overload '""' => sub { my $obj = shift;
388 0         0 "{name}.", file ".$obj->{filename}." line ".$obj->{lineno}." at ".sprintf('0x%x>',0+$obj);
389 1     1   14 }, "0+" => sub { $_[0] }, fallback => 1;
  1     0   1  
  1         16  
  0         0  
390              
391             for (qw(argcount nlocals stacksize flags code constants names
392             varnames freevars cellvars filename name lineno lnotab)) {
393 1     1   107 no strict q/subs/;
  1         3  
  1         7465  
394 0     0   0 eval "sub $_ { return \$_[0]->{$_} }";
  0     0   0  
  0     0   0  
  0     0   0  
  2     2   11  
  0     0   0  
  0     0   0  
  0     0   0  
  0     0   0  
  0     0   0  
  0     0   0  
  0     0   0  
  0     0   0  
  0     0   0  
395             }
396              
397              
398             sub findlabels {
399 2     2   3 my $self = shift;
400 2         4 my $bytecode = $self->{bytecode};
401 2         4 my %labels = ();
402 2         7 my @code = @_;
403 2         3 my $offset = 0;
404 2         6 while (@code) {
405 28         32 my $c = shift @code;
406 28         30 $offset++;
407 28 100       72 if ($c>=$bytecode->{c}{HAVE_ARGUMENT}) {
408 18         84 my $arg = shift @code;
409 18         21 $arg += (256 * shift (@code));
410 18         19 $offset += 2;
411 18 100       48 if ($bytecode->{has}{jrel}{$c}) { $labels{$offset + $arg}++ };
  4         11  
412 18 50       62 if ($bytecode->{has}{jabs}{$c}) { $labels{$offset}++ };
  0         0  
413             }
414             }
415 2         12 return %labels;
416             }
417              
418             my @cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is', 'is not', 'exception match', 'BAD');
419              
420             sub __printconst {
421 4     4   5 my $thing = shift;
422              
423 4         7 my $class = ref $thing;
424 4 100 66     31 if ($class =~ /String/ || $class =~ /Long/) {
425 2         7 return $$thing;
426             }
427 2 50       8 if ($class =~ /Undef/) {
428 2         6 return "";
429             }
430 0         0 return $thing;
431            
432             }
433              
434             sub disassemble {
435 2     2   3 my $self = shift;
436 2         4 my $bytecode = $self->{bytecode};
437 2         196 my @code = map { ord } split //, $self->{code};
  64         76  
438 2         14 my %labels = $self->findlabels(@code);
439 2         4 my $offset = 0;
440 2         3 my $extarg = 0;
441 2         3 my @dis;
442 2         5 while (@code) {
443 28         109 my $c = shift @code;
444 28 100       56 my $text = (($labels{$offset}) ? ">>" : " ");
445 28         54 $text .= sprintf "%4i", $offset;
446 28         233 $text .= sprintf "%20s", $self->opname($c);
447 28         34 $offset++;
448 28         35 my $arg;
449 28 100       71 if ($c>=$bytecode->{c}{HAVE_ARGUMENT}) {
450 18         19 $arg = shift @code;
451 18         24 $arg += (256 * shift (@code)) + $extarg;
452 18         20 $extarg = 0;
453 18 50       39 $extarg = $arg * 65535 if ($c == $bytecode->{c}{EXTENDED_ARG});
454 18         20 $offset+=2;
455 18         32 $text .= sprintf "%5i", $arg;
456 18 100 66     121 $text .= " (".__printconst($self->{constants}->[$arg]).")" if (ref $self->{constants}->[$arg] && $bytecode->{has}{const} && $bytecode->{has}{const}{$c});
      100        
457 18 50       43 $text .= " (".$self->{varnames}->[$arg].")" if ($bytecode->{has}{"local"}{$c});
458 18 100       47 $text .= " [".$self->{names}->[$arg]."]" if ($bytecode->{has}{name}{$c});
459 18 50       103 $text .= " [".$cmp_op[$arg]."]" if ($bytecode->{has}{compare}{$c});
460 18 100       49 $text .= " (to ".($offset+$arg).")" if ($bytecode->{has}{jrel}{$c});
461             }
462 28         142 push @dis, [$text, $c, $arg];
463             }
464 2         13 return @dis;
465             }
466              
467 28     28   248 sub opname { $_[0]->{bytecode}{opnames}[$_[1]] }
468              
469             1;
470              
471             =head1 NAME
472              
473             Python::Bytecode - Disassemble and investigate Python bytecode
474              
475             =head1 SYNOPSIS
476              
477             use Python::Bytecode
478             my $bytecode = Python::Bytecode->new($bytecode);
479             my $bytecode = Python::Bytecode->new(FH);
480             for ($bytecode->disassemble) {
481             print $_->[0],"\n"; # Textual representation of disassembly
482             }
483              
484             foreach my $constant (@{$bytecode->constants()}) {
485             if ($constant->can('disassemble')) {
486             print "code constant:\n";
487             for ($constant->disassemble) {
488             print $_->[0], "\n";
489             }
490             }
491             }
492              
493             =head1 DESCRIPTION
494              
495             C accepts a string or filehandle contain Python
496             bytecode and puts it into a format you can manipulate.
497              
498             =head1 METHODS
499              
500             =over 3
501              
502             =item C
503              
504             This is the basic method for getting at the actual code. It returns an
505             array representing the individual operations in the bytecode stream.
506             Each element is a reference to a three-element array containing
507             a textual representation of the disassembly, the opcode number, (the
508             C function can be used to turn this into an op name) and
509             the argument to the op, if any.
510              
511             =item C
512              
513             This returns an array reflecting the constants table of the code object.
514             Some operations such as C refer to constants by index in
515             this array.
516              
517             =item C
518              
519             Similar to C, some operations branch to labels by index
520             in this table.
521              
522             =item C
523              
524             Again, when variables are referred to by name, the names are stored
525             as an index into this table.
526              
527             =item C
528              
529             The filename from which this compiled bytecode is derived.
530              
531             =back
532              
533             There are other methods, but you can read the code to find them. It's
534             not hard, and besides, it's probably easiest to work off the textual
535             representation of the disassembly anyway.
536              
537             =head1 STRUCTURE
538              
539             The structure of the decoded bytecode file is reasonably simple.
540              
541             The output of the C method is an object that represents the fully
542             parsed bytecode file. This object contains the information about the
543             bytecode file, as well as the top-level code object for the file.
544              
545             Each python code object in the bytecode file has its own perl object
546             that represents it. This object can be disassembled, has its own
547             constants (which themselves may be code objects) and its own variables.
548              
549             The module completely decodes the bytecode object when the bytecode
550             file is handed to the C method, but to get all the pieces of the
551             bytecode may require digging into the constants of each code object.
552              
553             =head1 PERPETRATORS
554              
555             Simon Cozens, C. Mutation for Python 2.3 by Dan
556             Sugalski C
557              
558             =head1 LICENSE
559              
560             This code is licensed under the same terms as Perl itself.
561