line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# vim: ts=2 sw=2 expandtab |
2
|
|
|
|
|
|
|
package Data::Transform::Line; |
3
|
2
|
|
|
2
|
|
2230
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
69
|
|
4
|
|
|
|
|
|
|
|
5
|
2
|
|
|
2
|
|
668
|
use Data::Transform; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
44
|
|
6
|
|
|
|
|
|
|
|
7
|
2
|
|
|
2
|
|
9
|
use vars qw($VERSION @ISA); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
117
|
|
8
|
|
|
|
|
|
|
$VERSION = '0.01'; |
9
|
|
|
|
|
|
|
@ISA = qw(Data::Transform); |
10
|
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
|
10
|
use Carp qw(carp croak); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
2289
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Data::Transform::Line - serialize and parse terminated records (lines) |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
#!perl |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use POE qw(Wheel::FollowTail Filter::Line); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
POE::Session->create( |
24
|
|
|
|
|
|
|
inline_states => { |
25
|
|
|
|
|
|
|
_start => sub { |
26
|
|
|
|
|
|
|
$_[HEAP]{tailor} = POE::Wheel::FollowTail->new( |
27
|
|
|
|
|
|
|
Filename => "/var/log/system.log", |
28
|
|
|
|
|
|
|
InputEvent => "got_log_line", |
29
|
|
|
|
|
|
|
Filter => POE::Filter::Line->new(), |
30
|
|
|
|
|
|
|
); |
31
|
|
|
|
|
|
|
}, |
32
|
|
|
|
|
|
|
got_log_line => sub { |
33
|
|
|
|
|
|
|
print "Log: $_[ARG0]\n"; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
POE::Kernel->run(); |
39
|
|
|
|
|
|
|
exit; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 DESCRIPTION |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Data::Transform::Line parses stream data into terminated records. The |
44
|
|
|
|
|
|
|
default parser interprets newlines as the record terminator, and the |
45
|
|
|
|
|
|
|
default serializer appends network newlines (CR/LF, or "\x0D\x0A") to |
46
|
|
|
|
|
|
|
outbound records. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
Data::Transform::Line supports a number of other ways to parse lines. |
49
|
|
|
|
|
|
|
Constructor parameters may specify literal newlines, regular |
50
|
|
|
|
|
|
|
expressions, or that the filter should detect newlines on its own. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 PUBLIC FILTER METHODS |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Data::Transform::Line's new() method has some interesting parameters. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=cut |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub DEBUG () { 0 } |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub INPUT_BUFFER () { 0 } |
61
|
|
|
|
|
|
|
sub FRAMING_BUFFER () { 1 } |
62
|
|
|
|
|
|
|
sub INPUT_REGEXP () { 2 } |
63
|
|
|
|
|
|
|
sub OUTPUT_LITERAL () { 3 } |
64
|
|
|
|
|
|
|
sub AUTODETECT_STATE () { 4 } |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub AUTO_STATE_DONE () { 0x00 } |
67
|
|
|
|
|
|
|
sub AUTO_STATE_FIRST () { 0x01 } |
68
|
|
|
|
|
|
|
sub AUTO_STATE_SECOND () { 0x02 } |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head2 new |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
new() accepts a list of named parameters. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
C may be used to parse records that are terminated by |
75
|
|
|
|
|
|
|
some literal string. For example, Data::Transform::Line may be used to |
76
|
|
|
|
|
|
|
parse and emit C-style lines, which are terminated with an ASCII NUL: |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
my $c_line_filter = Data::Transform::Line->new( |
79
|
|
|
|
|
|
|
InputLiteral => chr(0), |
80
|
|
|
|
|
|
|
OutputLiteral => chr(0), |
81
|
|
|
|
|
|
|
); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
C allows a filter to put() records with a different |
84
|
|
|
|
|
|
|
record terminator than it parses. This can be useful in applications |
85
|
|
|
|
|
|
|
that must translate record terminators. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
C is a shorthand for the common case where the input and |
88
|
|
|
|
|
|
|
output literals are identical. The previous example may be written |
89
|
|
|
|
|
|
|
as: |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $c_line_filter = Data::Transform::Line->new( |
92
|
|
|
|
|
|
|
Literal => chr(0), |
93
|
|
|
|
|
|
|
); |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
An application can also allow Data::Transform::Line to figure out which |
96
|
|
|
|
|
|
|
newline to use. This is done by specifying C to be |
97
|
|
|
|
|
|
|
undef: |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
my $whichever_line_filter = Data::Transform::Line->new( |
100
|
|
|
|
|
|
|
InputLiteral => undef, |
101
|
|
|
|
|
|
|
OutputLiteral => "\n", |
102
|
|
|
|
|
|
|
); |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
C may be used in place of C to recognize |
105
|
|
|
|
|
|
|
line terminators based on a regular expression. In this example, |
106
|
|
|
|
|
|
|
input is terminated by two or more consecutive newlines. On output, |
107
|
|
|
|
|
|
|
the paragraph separator is "---" on a line by itself. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my $paragraph_filter = Data::Transform::Line->new( |
110
|
|
|
|
|
|
|
InputRegexp => "([\x0D\x0A]{2,})", |
111
|
|
|
|
|
|
|
OutputLiteral => "\n---\n", |
112
|
|
|
|
|
|
|
); |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=cut |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub new { |
117
|
16
|
|
|
16
|
1
|
10766
|
my $type = shift; |
118
|
|
|
|
|
|
|
|
119
|
16
|
100
|
100
|
|
|
356
|
croak "$type requires an even number of parameters" if @_ and @_ & 1; |
120
|
15
|
|
|
|
|
39
|
my %params = @_; |
121
|
|
|
|
|
|
|
|
122
|
15
|
100
|
66
|
|
|
175
|
croak "$type cannot have both Regexp and Literal line endings" if ( |
123
|
|
|
|
|
|
|
defined $params{Regexp} and defined $params{Literal} |
124
|
|
|
|
|
|
|
); |
125
|
|
|
|
|
|
|
|
126
|
14
|
|
|
|
|
18
|
my ($input_regexp, $output_literal); |
127
|
14
|
|
|
|
|
15
|
my $autodetect = AUTO_STATE_DONE; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Literal newline for both incoming and outgoing. Every other known |
130
|
|
|
|
|
|
|
# parameter conflicts with this one. |
131
|
14
|
100
|
|
|
|
28
|
if (defined $params{Literal}) { |
132
|
6
|
100
|
|
|
|
129
|
croak "A defined Literal must have a nonzero length" |
133
|
|
|
|
|
|
|
unless length($params{Literal}); |
134
|
5
|
|
|
|
|
10
|
$input_regexp = quotemeta $params{Literal}; |
135
|
5
|
|
|
|
|
7
|
$output_literal = $params{Literal}; |
136
|
5
|
100
|
100
|
|
|
33
|
if ( exists $params{InputLiteral } or # undef means something |
|
|
|
100
|
|
|
|
|
137
|
|
|
|
|
|
|
defined $params{InputRegexp } or |
138
|
|
|
|
|
|
|
defined $params{OutputLiteral } ) { |
139
|
3
|
|
|
|
|
412
|
croak "$type cannot have Literal with any other parameter"; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
} else { # Input and output are specified separately, then. |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# Input can be either a literal or a regexp. The regexp may be |
145
|
|
|
|
|
|
|
# compiled or not; we don't rightly care at this point. |
146
|
8
|
100
|
|
|
|
518
|
if (exists $params{InputLiteral}) { |
|
|
100
|
|
|
|
|
|
147
|
5
|
|
|
|
|
22
|
$input_regexp = $params{InputLiteral}; |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# InputLiteral is defined. Turn it into a regexp and be done. |
150
|
|
|
|
|
|
|
# Otherwise we will autodetect it. |
151
|
5
|
100
|
66
|
|
|
16
|
if (defined($input_regexp) and length($input_regexp)) { |
152
|
2
|
|
|
|
|
4
|
$input_regexp = quotemeta $input_regexp; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
else { |
155
|
3
|
|
|
|
|
6
|
$autodetect = AUTO_STATE_FIRST; |
156
|
3
|
|
|
|
|
5
|
$input_regexp = ''; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
5
|
100
|
|
|
|
167
|
croak "$type cannot have both InputLiteral and InputRegexp" |
160
|
|
|
|
|
|
|
if defined $params{InputRegexp}; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
elsif (defined $params{InputRegexp}) { |
163
|
2
|
|
|
|
|
4
|
$input_regexp = $params{InputRegexp}; |
164
|
|
|
|
|
|
|
# unreachable |
165
|
|
|
|
|
|
|
#croak "$type cannot have both InputLiteral and InputRegexp" |
166
|
|
|
|
|
|
|
# if defined $params{InputLiteral}; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
else { |
169
|
1
|
|
|
|
|
2
|
$input_regexp = "(\\x0D\\x0A?|\\x0A\\x0D?)"; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
7
|
100
|
|
|
|
13
|
if (defined $params{OutputLiteral}) { |
173
|
6
|
|
|
|
|
9
|
$output_literal = $params{OutputLiteral}; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
else { |
176
|
1
|
|
|
|
|
2
|
$output_literal = "\x0D\x0A"; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
9
|
|
|
|
|
19
|
delete @params{qw(Literal InputLiteral OutputLiteral InputRegexp)}; |
181
|
9
|
50
|
|
|
|
21
|
carp("$type ignores unknown parameters: ", join(', ', sort keys %params)) |
182
|
|
|
|
|
|
|
if scalar keys %params; |
183
|
|
|
|
|
|
|
|
184
|
9
|
|
|
|
|
35
|
my $self = bless [ |
185
|
|
|
|
|
|
|
[], # INPUT_BUFFER |
186
|
|
|
|
|
|
|
'', # FRAMING_BUFFER |
187
|
|
|
|
|
|
|
$input_regexp, # INPUT_REGEXP |
188
|
|
|
|
|
|
|
$output_literal, # OUTPUT_LITERAL |
189
|
|
|
|
|
|
|
$autodetect, # AUTODETECT_STATE |
190
|
|
|
|
|
|
|
], $type; |
191
|
|
|
|
|
|
|
|
192
|
9
|
|
|
|
|
9
|
DEBUG and warn join ':', @$self; |
193
|
|
|
|
|
|
|
|
194
|
9
|
|
|
|
|
31
|
$self; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub clone { |
198
|
3
|
|
|
3
|
1
|
1297
|
my $self = shift; |
199
|
|
|
|
|
|
|
|
200
|
3
|
|
|
|
|
12
|
my $new = bless [ |
201
|
|
|
|
|
|
|
[], |
202
|
|
|
|
|
|
|
'', |
203
|
|
|
|
|
|
|
$self->[INPUT_REGEXP], |
204
|
|
|
|
|
|
|
$self->[OUTPUT_LITERAL], |
205
|
|
|
|
|
|
|
$self->[AUTODETECT_STATE], |
206
|
|
|
|
|
|
|
]; |
207
|
|
|
|
|
|
|
|
208
|
3
|
|
|
|
|
14
|
return bless $new, ref $self; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub get_pending { |
212
|
7
|
|
|
7
|
1
|
18
|
my $self = shift; |
213
|
7
|
|
|
|
|
8
|
my @ret = @{$self->[INPUT_BUFFER]}; |
|
7
|
|
|
|
|
15
|
|
214
|
7
|
100
|
|
|
|
21
|
if (length $self->[FRAMING_BUFFER]) { |
215
|
3
|
|
|
|
|
5
|
unshift @ret, $self->[FRAMING_BUFFER]; |
216
|
|
|
|
|
|
|
} |
217
|
7
|
100
|
|
|
|
34
|
return @ret ? [ @ret ] : undef; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# get() is inherited from Data::Transform. |
221
|
|
|
|
|
|
|
# get_one_start() is inherited from Data::Transform. |
222
|
|
|
|
|
|
|
# get_one() is inherited from Data::Transform. |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
sub _handle_get_data { |
225
|
110
|
|
|
110
|
|
123
|
my ($self, $data) = @_; |
226
|
|
|
|
|
|
|
|
227
|
110
|
100
|
|
|
|
191
|
if (defined $data) { |
228
|
42
|
|
|
|
|
61
|
$self->[FRAMING_BUFFER] .= $data; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
# Process as many newlines an we can find. |
231
|
110
|
|
|
|
|
109
|
LINE: while (1) { |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# Autodetect is done, or it never started. Parse some buffer! |
234
|
112
|
100
|
|
|
|
228
|
unless ($self->[AUTODETECT_STATE]) { |
235
|
100
|
|
|
|
|
81
|
DEBUG and warn unpack 'H*', $self->[INPUT_REGEXP]; |
236
|
|
|
|
|
|
|
last LINE |
237
|
100
|
100
|
|
|
|
604
|
unless $self->[FRAMING_BUFFER] =~ s/^(.*?)$self->[INPUT_REGEXP]//s; |
238
|
36
|
|
|
|
|
43
|
DEBUG and warn "got line: <<", unpack('H*', $1), ">>\n"; |
239
|
|
|
|
|
|
|
|
240
|
36
|
|
|
|
|
123
|
return $1; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# Waiting for the first line ending. Look for a generic newline. |
244
|
12
|
100
|
|
|
|
25
|
if ($self->[AUTODETECT_STATE] & AUTO_STATE_FIRST) { |
245
|
|
|
|
|
|
|
last LINE |
246
|
6
|
100
|
|
|
|
31
|
unless $self->[FRAMING_BUFFER] =~ s/^(.*?)(\x0D\x0A?|\x0A\x0D?)//; |
247
|
|
|
|
|
|
|
|
248
|
3
|
|
|
|
|
8
|
my $line = $1; |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
# The newline can be complete under two conditions. First: If |
251
|
|
|
|
|
|
|
# it's two characters. Second: If there's more data in the |
252
|
|
|
|
|
|
|
# framing buffer. Loop around in case there are more lines. |
253
|
3
|
100
|
66
|
|
|
17
|
if ( (length($2) == 2) or |
254
|
|
|
|
|
|
|
(length $self->[FRAMING_BUFFER]) |
255
|
|
|
|
|
|
|
) { |
256
|
1
|
|
|
|
|
2
|
DEBUG and warn "detected complete newline after line: <<$1>>\n"; |
257
|
1
|
|
|
|
|
3
|
$self->[INPUT_REGEXP] = $2; |
258
|
1
|
|
|
|
|
2
|
$self->[AUTODETECT_STATE] = AUTO_STATE_DONE; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# The regexp has matched a potential partial newline. Save it, |
262
|
|
|
|
|
|
|
# and move to the next state. There is no more data in the |
263
|
|
|
|
|
|
|
# framing buffer, so we're done. |
264
|
|
|
|
|
|
|
else { |
265
|
2
|
|
|
|
|
2
|
DEBUG and warn "detected suspicious newline after line: <<$1>>\n"; |
266
|
2
|
|
|
|
|
5
|
$self->[INPUT_REGEXP] = $2; |
267
|
2
|
|
|
|
|
3
|
$self->[AUTODETECT_STATE] = AUTO_STATE_SECOND; |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
3
|
|
|
|
|
10
|
return $line; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# Waiting for the second line beginning. Bail out if we don't |
274
|
|
|
|
|
|
|
# have anything in the framing buffer. |
275
|
6
|
50
|
|
|
|
13
|
if ($self->[AUTODETECT_STATE] & AUTO_STATE_SECOND) { |
276
|
6
|
100
|
|
|
|
23
|
return unless length $self->[FRAMING_BUFFER]; |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
# Test the first character to see if it completes the previous |
279
|
|
|
|
|
|
|
# potentially partial newline. |
280
|
2
|
100
|
|
|
|
11
|
if ( |
|
|
100
|
|
|
|
|
|
281
|
|
|
|
|
|
|
substr($self->[FRAMING_BUFFER], 0, 1) eq |
282
|
|
|
|
|
|
|
( $self->[INPUT_REGEXP] eq "\x0D" ? "\x0A" : "\x0D" ) |
283
|
|
|
|
|
|
|
) { |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# Combine the first character with the previous newline, and |
286
|
|
|
|
|
|
|
# discard the newline from the buffer. This is two statements |
287
|
|
|
|
|
|
|
# for backward compatibility. |
288
|
1
|
|
|
|
|
2
|
DEBUG and warn "completed newline after line: <<$1>>\n"; |
289
|
1
|
|
|
|
|
3
|
$self->[INPUT_REGEXP] .= substr($self->[FRAMING_BUFFER], 0, 1); |
290
|
1
|
|
|
|
|
4
|
substr($self->[FRAMING_BUFFER], 0, 1) = ''; |
291
|
|
|
|
|
|
|
} |
292
|
0
|
|
|
|
|
0
|
elsif (DEBUG) { |
293
|
|
|
|
|
|
|
warn "decided prior suspicious newline is okay\n"; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
# Regardless, whatever is in INPUT_REGEXP is now a complete |
297
|
|
|
|
|
|
|
# newline. End autodetection, post-process the found newline, |
298
|
|
|
|
|
|
|
# and loop to see if there are other lines in the buffer. |
299
|
2
|
|
|
|
|
4
|
$self->[INPUT_REGEXP] = $self->[INPUT_REGEXP]; |
300
|
2
|
|
|
|
|
4
|
$self->[AUTODETECT_STATE] = AUTO_STATE_DONE; |
301
|
2
|
|
|
|
|
3
|
next LINE; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
0
|
|
|
|
|
0
|
die "consistency error: AUTODETECT_STATE = $self->[AUTODETECT_STATE]"; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
67
|
|
|
|
|
197
|
return; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# New behavior. First translate system newlines ("\n") into whichever |
311
|
|
|
|
|
|
|
# newlines are supposed to be sent. Second, add a trailing newline if |
312
|
|
|
|
|
|
|
# one doesn't already exist. Since the referenced output list is |
313
|
|
|
|
|
|
|
# supposed to contain one line per element, we also do a split and |
314
|
|
|
|
|
|
|
# join. Bleah. ... why isn't the code doing what the comment says? |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub _handle_put_data { |
317
|
31
|
|
|
31
|
|
42
|
my ($self, $line) = @_; |
318
|
|
|
|
|
|
|
|
319
|
31
|
|
|
|
|
126
|
return $line . $self->[OUTPUT_LITERAL]; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
1; |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
__END__ |