line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Acme::Pythonic; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# Please, if you tested it in some earlier version of Perl and works let |
4
|
|
|
|
|
|
|
# me know! The versions of Filter::Simple, Text::Tabs, and Test::More |
5
|
|
|
|
|
|
|
# would be useful as well. |
6
|
23
|
|
|
23
|
|
637030
|
use 5.006_001; |
|
23
|
|
|
|
|
94
|
|
|
23
|
|
|
|
|
1219
|
|
7
|
23
|
|
|
23
|
|
147
|
use strict; |
|
23
|
|
|
|
|
45
|
|
|
23
|
|
|
|
|
881
|
|
8
|
23
|
|
|
23
|
|
118
|
use warnings; |
|
23
|
|
|
|
|
44
|
|
|
23
|
|
|
|
|
1709
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our ($VERSION, $DEBUG, $CALLER); |
11
|
|
|
|
|
|
|
$VERSION = '0.47'; |
12
|
|
|
|
|
|
|
|
13
|
23
|
|
|
23
|
|
22535
|
use Text::Tabs; |
|
23
|
|
|
|
|
21070
|
|
|
23
|
|
|
|
|
4017
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub import { |
16
|
|
|
|
|
|
|
my ($package, %cfg) = @_; |
17
|
|
|
|
|
|
|
$DEBUG = $cfg{debug}; |
18
|
|
|
|
|
|
|
$CALLER = caller() # to be able to check sub prototypes |
19
|
|
|
|
|
|
|
} |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
23
|
|
|
23
|
|
30304
|
use Filter::Simple; |
|
23
|
|
|
|
|
829058
|
|
|
23
|
|
|
|
|
186
|
|
23
|
|
|
|
|
|
|
FILTER_ONLY code => sub { |
24
|
|
|
|
|
|
|
unpythonize(); |
25
|
|
|
|
|
|
|
cuddle_elses_and_friends(); |
26
|
|
|
|
|
|
|
if ($DEBUG) { |
27
|
|
|
|
|
|
|
s/$Filter::Simple::placeholder/BLANKED_OUT/g; |
28
|
|
|
|
|
|
|
print; |
29
|
|
|
|
|
|
|
$_ = '1;'; |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
}; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# This regexp matches a 7-bit ASCII identifier. We use atomic grouping |
35
|
|
|
|
|
|
|
# because an identifier cannot be backtracked. |
36
|
|
|
|
|
|
|
my $id = qr/(?>[_a-zA-Z](?:[_a-zA-Z0-9']|::)*)/; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Shorthand to put an eventual trailing comment in some regexps. |
39
|
|
|
|
|
|
|
my $tc = qr/(?
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Tries its best at converting Pythonic code to Perl. |
43
|
|
|
|
|
|
|
sub unpythonize { |
44
|
|
|
|
|
|
|
# Sometimes Filter::Simple adds newlines blanking out stuff, which |
45
|
|
|
|
|
|
|
# interferes with Pythonic conventions. |
46
|
22
|
|
|
22
|
0
|
99
|
my %bos = (); # BlanketOutS |
47
|
22
|
|
|
|
|
51
|
my $count = 0; |
48
|
22
|
|
|
|
|
587
|
s<$Filter::Simple::placeholder> |
|
146
|
|
|
|
|
353
|
|
49
|
|
|
|
|
|
|
|
50
|
146
|
|
|
|
|
499
|
$bos{$bo} = $&; |
51
|
146
|
|
|
|
|
577
|
$bo>geo; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# In addition, we can now normalize newlines without breaking |
54
|
|
|
|
|
|
|
# Filter::Simple's identifiers. |
55
|
22
|
|
|
|
|
103
|
normalize_newlines(); |
56
|
22
|
|
|
|
|
733
|
my @lines = split /\n/; |
57
|
22
|
50
|
|
|
|
236
|
return unless @lines; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# If unsure about the ending indentation level, add an extra |
60
|
|
|
|
|
|
|
# non-indented line to ensure the stack gets emptied. |
61
|
22
|
100
|
|
|
|
148
|
push @lines, '1; # added by Acme::Pythonic' if $lines[-1] =~ /^(?:\s|\s*#)/; |
62
|
|
|
|
|
|
|
|
63
|
22
|
|
|
|
|
49
|
my ($comment, # comment in the current line, if any |
64
|
|
|
|
|
|
|
$indent, # indentation of the current logical line |
65
|
|
|
|
|
|
|
$id_at_sob, # identifier at StartOfBlock, for instance "else", or "eval" |
66
|
|
|
|
|
|
|
$prev_line_with_code, # previous line with code |
67
|
|
|
|
|
|
|
$might_be_modifier, # flag: current logical line might be a modifier |
68
|
|
|
|
|
|
|
$line_with_modifier, # physical line which started the current modifier |
69
|
|
|
|
|
|
|
$joining, # flag: are we joining lines? |
70
|
|
|
|
|
|
|
$unbalanced_paren, # flag: we opened a paren that remains to be closed |
71
|
|
|
|
|
|
|
@stack, # keeps track of indentation stuff |
72
|
|
|
|
|
|
|
); |
73
|
|
|
|
|
|
|
|
74
|
22
|
|
|
|
|
62
|
@stack = (); |
75
|
22
|
|
|
|
|
66
|
foreach my $line (@lines) { |
76
|
|
|
|
|
|
|
# We remove any trailing comment so that we can assert stuff |
77
|
|
|
|
|
|
|
# easily about the end of the code in this line. It is later |
78
|
|
|
|
|
|
|
# appended back in the continue block below. |
79
|
2046
|
100
|
|
|
|
5823
|
$comment = $line =~ s/(\s*$tc)//o ? $1 : ''; |
80
|
2046
|
100
|
|
|
|
6438
|
next if $line =~ /^\s*$/; |
81
|
|
|
|
|
|
|
|
82
|
1306
|
100
|
|
|
|
2438
|
if (!$joining) { |
83
|
1231
|
|
|
|
|
2192
|
$unbalanced_paren = left_parenthesize($line); |
84
|
1231
|
|
|
|
|
3099
|
$might_be_modifier = $line =~ /^\s*(?:if|unless|while|until|for|foreach)\b/; |
85
|
1231
|
100
|
|
|
|
2435
|
$line_with_modifier = \$line if $might_be_modifier; |
86
|
1231
|
|
|
|
|
3907
|
($indent) = $line =~ /^(\s*)/; |
87
|
1231
|
|
|
|
|
3645
|
$indent = length(expand($indent)); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
1306
|
100
|
100
|
|
|
24547
|
if ($line =~ /(?:,|=>)\s*$/ || $line =~ s/\\\s*$//) { |
91
|
75
|
|
|
|
|
82
|
++$joining; |
92
|
75
|
100
|
|
|
|
189
|
next if $joining > 1; # if 1 we need yet to handle indentation |
93
|
|
|
|
|
|
|
} else { |
94
|
1231
|
|
|
|
|
1593
|
$joining = 0; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Handle trailing colons, which can be Pythonic, mark a labeled |
98
|
|
|
|
|
|
|
# block, mean some map, or &-sub call, etc. |
99
|
|
|
|
|
|
|
# |
100
|
|
|
|
|
|
|
# We check the parity of the number of ending colons to try to |
101
|
|
|
|
|
|
|
# avoid breaking things like |
102
|
|
|
|
|
|
|
# |
103
|
|
|
|
|
|
|
# print for keys %main:: |
104
|
|
|
|
|
|
|
# |
105
|
1262
|
|
|
|
|
1818
|
my $bracket_opened_by = ''; |
106
|
1262
|
100
|
100
|
|
|
11584
|
if ($line =~ /(:+)$/ && length($1) % 2) { |
|
|
100
|
|
|
|
|
|
107
|
272
|
|
|
|
|
330
|
$might_be_modifier = 0; |
108
|
|
|
|
|
|
|
# We perform some checks because labels have to keep their colon. |
109
|
272
|
100
|
100
|
|
|
2551
|
if ($line !~ /^\s*$id:$/o || |
|
|
|
100
|
|
|
|
|
110
|
|
|
|
|
|
|
$line =~ /[[:lower:]]/ || # labels are not allowed to have lower-case letters |
111
|
|
|
|
|
|
|
$line =~ /^\s*(?:BEGIN|CHECK|INIT|END):$/) { |
112
|
264
|
|
|
|
|
475
|
chop $line; |
113
|
264
|
100
|
|
|
|
522
|
if ($unbalanced_paren) { |
114
|
165
|
|
|
|
|
237
|
$line .= ")"; |
115
|
165
|
|
|
|
|
308
|
$unbalanced_paren = 0; |
116
|
|
|
|
|
|
|
} else { |
117
|
99
|
|
|
|
|
1466
|
($bracket_opened_by) = $line =~ /($id)\s*$/o; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
} elsif (!$joining) { |
121
|
959
|
100
|
|
|
|
1673
|
$$line_with_modifier =~ s/\(// if $might_be_modifier; |
122
|
959
|
|
|
|
|
1598
|
$unbalanced_paren = 0; |
123
|
959
|
|
|
|
|
1717
|
$line .= ';'; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# Handle indentation. Language::Pythonesque was the basis of |
127
|
|
|
|
|
|
|
# this code. |
128
|
1262
|
100
|
|
|
|
2831
|
my $prev_indent = @stack ? $stack[-1]{indent} : 0; |
129
|
1262
|
100
|
|
|
|
3816
|
if ($prev_indent < $indent) { |
|
|
100
|
|
|
|
|
|
130
|
269
|
|
|
|
|
1245
|
push @stack, {indent => $indent, id_at_sob => $id_at_sob}; |
131
|
269
|
100
|
|
|
|
2007
|
$$prev_line_with_code .= " {" unless $$prev_line_with_code =~ s/(?=\s*$tc)/ {/o; |
132
|
|
|
|
|
|
|
} elsif ($prev_indent > $indent) { |
133
|
232
|
|
|
|
|
790
|
do { |
134
|
269
|
|
|
|
|
430
|
my $prev_id_at_sob = $stack[-1]{id_at_sob}; |
135
|
269
|
|
|
|
|
300
|
pop @stack; |
136
|
269
|
100
|
|
|
|
756
|
$prev_indent = @stack ? $stack[-1]{indent} : 0; |
137
|
269
|
|
|
|
|
706
|
$$prev_line_with_code .= "\n" . ((' ' x $prev_indent) . "}"); |
138
|
269
|
100
|
|
|
|
486
|
$$prev_line_with_code .= ";" if needs_semicolon($prev_id_at_sob); |
139
|
|
|
|
|
|
|
} while $prev_indent > $indent; |
140
|
232
|
100
|
|
|
|
487
|
$$prev_line_with_code =~ s/;$/ / if $might_be_modifier; |
141
|
|
|
|
|
|
|
} |
142
|
1262
|
|
|
|
|
2266
|
$id_at_sob = $bracket_opened_by; |
143
|
|
|
|
|
|
|
} continue { |
144
|
2046
|
|
|
|
|
2709
|
$line =~ s/^\s*pass;?\s*$//; |
145
|
2046
|
100
|
100
|
|
|
10917
|
$prev_line_with_code = \$line if !$joining && $line =~ /\S/; |
146
|
2046
|
|
|
|
|
4288
|
$line .= $comment; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
22
|
|
|
|
|
488
|
$_ = join "\n", @lines; |
150
|
22
|
|
|
|
|
1104
|
s/$;BLANKED_OUT_\d+$;/$bos{$&}/go; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# In the trials I've done seems like the Python interpreter understands |
155
|
|
|
|
|
|
|
# any of the three conventions, even if they are not the ones in the |
156
|
|
|
|
|
|
|
# platform, and even if they are mixed in the same file. |
157
|
|
|
|
|
|
|
# |
158
|
|
|
|
|
|
|
# In addition, it guarantees make test works no matter the platform. |
159
|
|
|
|
|
|
|
sub normalize_newlines { |
160
|
22
|
|
|
22
|
0
|
126
|
s/\015\012/\n/g; |
161
|
22
|
|
|
|
|
172
|
tr/\015/\n/ unless "\n" eq "\015"; |
162
|
22
|
|
|
|
|
49
|
tr/\012/\n/ unless "\n" eq "\012"; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# Put an opening paren in the places we forgive parens. It will be later |
167
|
|
|
|
|
|
|
# closed or removed as needed in the main subroutine. |
168
|
|
|
|
|
|
|
sub left_parenthesize { |
169
|
1231
|
100
|
100
|
1231
|
0
|
16991
|
$_[0] =~ s/^(\s*\b(?:if|elsif|unless)\b\s*)/$1(/ || |
170
|
44
|
100
|
|
|
|
366
|
$_[0] =~ s/^(\s*(?:$id\s*:)?\s*\b(?:while|until)\b(\s*))/$2 eq '' ? "$1 (" : "$1("/eo || |
171
|
69
|
|
|
|
|
166
|
$_[0] =~ s/^(\s*(?:$id\s*:\s*)?\bfor(?:each)?\b\s*)(.*)/fortype_guesser($1,$2)/oxe |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# Tries its best at guessing a for(each) type or, at least, where to put |
176
|
|
|
|
|
|
|
# the opening paren. |
177
|
|
|
|
|
|
|
# |
178
|
|
|
|
|
|
|
# Returns a string which is a copy of the original with the paren |
179
|
|
|
|
|
|
|
# inserted. |
180
|
|
|
|
|
|
|
sub fortype_guesser { |
181
|
69
|
|
|
69
|
0
|
235
|
my ($for, $rest) = @_; |
182
|
69
|
|
|
|
|
88
|
my $guess = ""; |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# Try to match "for VAR in LIST", and "for VAR LIST" |
185
|
69
|
100
|
100
|
|
|
1481
|
if ($rest =~ m/^((?:my|our)? \s* \$ $id\s+) in\s* ((?: (?:[\$\@%&\\]) | (?:\b\w) ) .*)$/ox || |
186
|
|
|
|
|
|
|
$rest =~ m/^((?:my|our)? \s* \$ $id\s*) ((?: (?:[\$\@%&\\]) | (?:\b\w) ) .*)$/ox) { |
187
|
29
|
|
|
|
|
134
|
$guess = "$for$1($2"; |
188
|
|
|
|
|
|
|
} else { |
189
|
|
|
|
|
|
|
# We are not sure whether this is a for or a foreach, but it is |
190
|
|
|
|
|
|
|
# very likely that putting parens around gets it right. |
191
|
40
|
|
|
|
|
62
|
$rest =~ s/^\s*in\b//; # fixes "foreach in LIST" |
192
|
40
|
|
|
|
|
88
|
$guess = "$for($rest"; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
69
|
|
|
|
|
350
|
return $guess; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# Guesses whether a block started by $id_at_sob needs a semicolon after the |
200
|
|
|
|
|
|
|
# ending bracket. |
201
|
|
|
|
|
|
|
sub needs_semicolon { |
202
|
269
|
|
|
269
|
0
|
350
|
my $id_at_sob = shift; |
203
|
269
|
100
|
|
|
|
1022
|
return 0 if !$id_at_sob; |
204
|
91
|
100
|
|
|
|
413
|
return 1 if $id_at_sob =~ /^(do|sub|eval)$/; |
205
|
|
|
|
|
|
|
|
206
|
68
|
100
|
|
|
|
366
|
my $proto = $id_at_sob =~ /::/ ? prototype($id_at_sob) : prototype("${CALLER}::$id_at_sob"); |
207
|
68
|
100
|
|
|
|
387
|
return 0 if not defined $proto; |
208
|
6
|
|
|
|
|
61
|
return $proto =~ /^;?&$/; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# We follow perlstyle here, as we did until now. |
213
|
|
|
|
|
|
|
sub cuddle_elses_and_friends { |
214
|
22
|
|
|
22
|
0
|
1406
|
s/^([ \t]*})\s*(?=(?:elsif|else|continue)\b)/$1 /gm; |
215
|
22
|
|
|
|
|
2075
|
s/^([ \t]*})\s*(?=(?:if|unless|while|until|for|foreach)\b(?!.*{$tc?$))/$1 /gm; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
1; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
__END__ |