line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package ETL::Yertl::Command::ygrok; |
2
|
|
|
|
|
|
|
our $VERSION = '0.037'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Parse lines of text into documents |
4
|
|
|
|
|
|
|
|
5
|
11
|
|
|
11
|
|
3106
|
use ETL::Yertl; |
|
11
|
|
|
|
|
22
|
|
|
11
|
|
|
|
|
53
|
|
6
|
11
|
|
|
11
|
|
330
|
use ETL::Yertl::Util qw( load_module ); |
|
11
|
|
|
|
|
17
|
|
|
11
|
|
|
|
|
372
|
|
7
|
11
|
|
|
11
|
|
4341
|
use Getopt::Long qw( GetOptionsFromArray ); |
|
11
|
|
|
|
|
76399
|
|
|
11
|
|
|
|
|
38
|
|
8
|
11
|
|
|
11
|
|
4768
|
use Regexp::Common; |
|
11
|
|
|
|
|
21300
|
|
|
11
|
|
|
|
|
43
|
|
9
|
11
|
|
|
11
|
|
1392265
|
use File::HomeDir; |
|
11
|
|
|
|
|
40260
|
|
|
11
|
|
|
|
|
659
|
|
10
|
11
|
|
|
11
|
|
2799
|
use Hash::Merge::Simple qw( merge ); |
|
11
|
|
|
|
|
3929
|
|
|
11
|
|
|
|
|
9593
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our %PATTERNS = ( |
13
|
|
|
|
|
|
|
WORD => '\b\w+\b', |
14
|
|
|
|
|
|
|
DATA => '.*?', |
15
|
|
|
|
|
|
|
NUM => $RE{num}{real}."", # stringify to allow YAML serialization |
16
|
|
|
|
|
|
|
INT => $RE{num}{int}."", # stringify to allow YAML serialization |
17
|
|
|
|
|
|
|
VERSION => '\d+(?:[.]\d+)*', |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
DATE => { |
20
|
|
|
|
|
|
|
MONTH => '\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b', |
21
|
|
|
|
|
|
|
ISO8601 => '\d{4}-?\d{2}-?\d{2}[T ]\d{2}:?\d{2}:?\d{2}(?:Z|[+-]\d{4})', |
22
|
|
|
|
|
|
|
HTTP => '\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4}', |
23
|
|
|
|
|
|
|
SYSLOG => '%{DATE.MONTH} +\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}', |
24
|
|
|
|
|
|
|
}, |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
OS => { |
27
|
|
|
|
|
|
|
USER => '[a-zA-Z0-9._-]+', |
28
|
|
|
|
|
|
|
PROCNAME => '[\w._-]+', |
29
|
|
|
|
|
|
|
}, |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
NET => { |
32
|
|
|
|
|
|
|
HOSTNAME => join( "|", $RE{net}{IPv4}, $RE{net}{IPv6}, $RE{net}{domain}{-rfc1101} ), |
33
|
|
|
|
|
|
|
IPV6 => $RE{net}{IPv6}."", |
34
|
|
|
|
|
|
|
IPV4 => $RE{net}{IPv4}."", |
35
|
|
|
|
|
|
|
}, |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
URL => { |
38
|
|
|
|
|
|
|
PATH => '[^?#]*(?:\?[^#]*)?', |
39
|
|
|
|
|
|
|
# URL regex from URI.pm |
40
|
|
|
|
|
|
|
URL => '(?:[^:/?#]+:)?(?://[^/?#]*)?[^?#]*(?:\?[^#]*)?(?:#.*)?', |
41
|
|
|
|
|
|
|
}, |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
LOG => { |
44
|
|
|
|
|
|
|
HTTP_COMMON => join( " ", |
45
|
|
|
|
|
|
|
'%{NET.HOSTNAME:remote_addr}', '%{OS.USER:ident}', '%{OS.USER:user}', |
46
|
|
|
|
|
|
|
'\[%{DATE.HTTP:timestamp}]', |
47
|
|
|
|
|
|
|
'"%{WORD:method} %{URL.PATH:path} [^/]+/%{VERSION:http_version}"', |
48
|
|
|
|
|
|
|
'%{INT:status}', '(?\d+|-)', |
49
|
|
|
|
|
|
|
), |
50
|
|
|
|
|
|
|
HTTP_COMBINED => join( " ", |
51
|
|
|
|
|
|
|
'%{LOG.HTTP_COMMON}', |
52
|
|
|
|
|
|
|
'"%{URL:referer}"', '"%{DATA:user_agent}"', |
53
|
|
|
|
|
|
|
), |
54
|
|
|
|
|
|
|
SYSLOG => join( "", |
55
|
|
|
|
|
|
|
'%{DATE.SYSLOG:timestamp} ', |
56
|
|
|
|
|
|
|
'(?:<%{INT:facility}.%{INT:priority}> )?', |
57
|
|
|
|
|
|
|
'%{NET.HOSTNAME:host} ', |
58
|
|
|
|
|
|
|
'%{OS.PROCNAME:program}(?:\[%{INT:pid}\])?: ', |
59
|
|
|
|
|
|
|
'%{DATA:text}', |
60
|
|
|
|
|
|
|
), |
61
|
|
|
|
|
|
|
}, |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
POSIX => { |
64
|
|
|
|
|
|
|
LS => join( " +", |
65
|
|
|
|
|
|
|
'(?[bcdlsp-][rwxSsTt-]{9})', |
66
|
|
|
|
|
|
|
'%{INT:links}', |
67
|
|
|
|
|
|
|
'%{OS.USER:owner}', |
68
|
|
|
|
|
|
|
'%{OS.USER:group}', |
69
|
|
|
|
|
|
|
'%{INT:bytes}', |
70
|
|
|
|
|
|
|
'(?%{DATE.MONTH} +\d+ +\d+(?::\d+)?)', |
71
|
|
|
|
|
|
|
'%{DATA:name}', |
72
|
|
|
|
|
|
|
), |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# -- Mac OSX |
75
|
|
|
|
|
|
|
# TTY field starts with "tty" |
76
|
|
|
|
|
|
|
# No STAT field |
77
|
|
|
|
|
|
|
# -- OpenBSD |
78
|
|
|
|
|
|
|
# STAT field |
79
|
|
|
|
|
|
|
# -- RHEL 5 |
80
|
|
|
|
|
|
|
# tty can contain / |
81
|
|
|
|
|
|
|
# Seconds time optional |
82
|
|
|
|
|
|
|
PS => join( " +", |
83
|
|
|
|
|
|
|
' *%{INT:pid}', |
84
|
|
|
|
|
|
|
'(?[\w?/]+)', |
85
|
|
|
|
|
|
|
'(?(?:[\w+]+))?', |
86
|
|
|
|
|
|
|
'(? |
87
|
|
|
|
|
|
|
'%{DATA:command}', |
88
|
|
|
|
|
|
|
), |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Mac OSX and OpenBSD are the same |
91
|
|
|
|
|
|
|
PSU => join ( " +", |
92
|
|
|
|
|
|
|
'%{OS.USER:user}', |
93
|
|
|
|
|
|
|
'%{INT:pid}', |
94
|
|
|
|
|
|
|
'%{NUM:cpu}', |
95
|
|
|
|
|
|
|
'%{NUM:mem}', |
96
|
|
|
|
|
|
|
'%{INT:vsz}', |
97
|
|
|
|
|
|
|
'%{INT:rss}', |
98
|
|
|
|
|
|
|
'(?[\w?/]+)', |
99
|
|
|
|
|
|
|
'(?(?:[\w+]+))?', |
100
|
|
|
|
|
|
|
'(?[\w:]+)', |
101
|
|
|
|
|
|
|
'(? |
102
|
|
|
|
|
|
|
'%{DATA:command}', |
103
|
|
|
|
|
|
|
), |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# Max OSX and OpenBSD are the same |
106
|
|
|
|
|
|
|
PSX => join ( " +", |
107
|
|
|
|
|
|
|
' *%{INT:pid}', |
108
|
|
|
|
|
|
|
'(?[\w?/]+)', |
109
|
|
|
|
|
|
|
'(?(?:[\w+]+))', |
110
|
|
|
|
|
|
|
'(? |
111
|
|
|
|
|
|
|
'%{DATA:command}', |
112
|
|
|
|
|
|
|
), |
113
|
|
|
|
|
|
|
}, |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
LINUX => { |
116
|
|
|
|
|
|
|
PROC => { |
117
|
|
|
|
|
|
|
UPTIME => '%{NUM:uptime}\s+%{NUM:idletime}', |
118
|
|
|
|
|
|
|
LOADAVG => '%{NUM:load1m}\s+%{NUM:load5m}\s+%{NUM:load15m}\s+%{INT:running}/%{INT:total}\s+%{INT:lastpid}', |
119
|
|
|
|
|
|
|
}, |
120
|
|
|
|
|
|
|
}, |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
); |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub main { |
125
|
89
|
|
|
89
|
0
|
578257
|
my $class = shift; |
126
|
|
|
|
|
|
|
|
127
|
89
|
|
|
|
|
166
|
my %opt; |
128
|
89
|
100
|
|
|
|
285
|
if ( ref $_[-1] eq 'HASH' ) { |
129
|
5
|
|
|
|
|
8
|
%opt = %{ pop @_ }; |
|
5
|
|
|
|
|
13
|
|
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
89
|
|
|
|
|
194
|
my @args = @_; |
133
|
89
|
|
|
|
|
359
|
GetOptionsFromArray( \@args, \%opt, |
134
|
|
|
|
|
|
|
'pattern', |
135
|
|
|
|
|
|
|
'loose', |
136
|
|
|
|
|
|
|
); |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# Manage patterns |
139
|
89
|
100
|
|
|
|
21114
|
if ( $opt{pattern} ) { |
140
|
11
|
|
|
|
|
25
|
my ( $pattern_name, $pattern ) = @args; |
141
|
|
|
|
|
|
|
|
142
|
11
|
100
|
|
|
|
23
|
if ( $pattern ) { |
143
|
|
|
|
|
|
|
# Edit a pattern |
144
|
7
|
|
|
|
|
19
|
config_pattern( $pattern_name, $pattern ); |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
else { |
147
|
4
|
|
|
|
|
9
|
my $patterns = $class->_all_patterns; |
148
|
|
|
|
|
|
|
|
149
|
4
|
100
|
|
|
|
186
|
if ( $pattern_name ) { |
150
|
|
|
|
|
|
|
# Show a single pattern |
151
|
3
|
|
|
|
|
5
|
my $pattern = $patterns; |
152
|
3
|
|
|
|
|
9
|
my @parts = split /[.]/, $pattern_name; |
153
|
3
|
|
|
|
|
6
|
for my $part ( @parts ) { |
154
|
4
|
|
50
|
|
|
12
|
$pattern = $pattern->{ $part } ||= {}; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
3
|
100
|
|
|
|
7
|
if ( !ref $pattern ) { |
158
|
2
|
|
|
|
|
44
|
say $pattern; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
else { |
161
|
1
|
|
|
|
|
3
|
my $out_fmt = load_module( format => 'default' )->new; |
162
|
1
|
|
|
|
|
5
|
say $out_fmt->write( $pattern ); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
else { |
166
|
|
|
|
|
|
|
# Show all patterns we know about |
167
|
1
|
|
|
|
|
4
|
my $out_fmt = load_module( format => 'default' )->new; |
168
|
1
|
|
|
|
|
4
|
say $out_fmt->write( $patterns ); |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
11
|
|
|
|
|
50
|
return 0; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# Grok incoming lines |
176
|
78
|
|
|
|
|
177
|
my ( $pattern, @files ) = @args; |
177
|
78
|
100
|
|
|
|
183
|
die "Must give a pattern\n" unless $pattern; |
178
|
|
|
|
|
|
|
|
179
|
77
|
|
|
|
|
221
|
my $re = $class->parse_pattern( $pattern ); |
180
|
77
|
100
|
|
|
|
196
|
if ( !$opt{loose} ) { |
181
|
75
|
|
|
|
|
7165
|
$re = qr{^$re$}; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
77
|
|
|
|
|
442
|
my $out_formatter = load_module( format => 'default' )->new; |
185
|
77
|
100
|
|
|
|
218
|
push @files, "-" unless @files; |
186
|
77
|
|
|
|
|
163
|
for my $file ( @files ) { |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# We're doing a similar behavior to <>, but manually for easier testing. |
189
|
77
|
|
|
|
|
96
|
my $fh; |
190
|
77
|
100
|
|
|
|
223
|
if ( $file eq '-' ) { |
191
|
|
|
|
|
|
|
# Use the existing STDIN so tests can fake it |
192
|
39
|
|
|
|
|
74
|
$fh = \*STDIN; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
else { |
195
|
38
|
50
|
|
|
|
398
|
unless ( open $fh, '<', $file ) { |
196
|
0
|
|
|
|
|
0
|
warn "Could not open file '$file' for reading: $!\n"; |
197
|
0
|
|
|
|
|
0
|
next; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
77
|
|
|
|
|
2198
|
while ( my $line = <$fh> ) { |
202
|
|
|
|
|
|
|
#; say STDERR "$line =~ $re"; |
203
|
249
|
100
|
|
|
|
3471
|
if ( $line =~ $re ) { |
204
|
209
|
|
|
10
|
|
4016
|
print $out_formatter->write( { %+ } ); |
|
10
|
|
|
|
|
2692
|
|
|
10
|
|
|
|
|
3383
|
|
|
10
|
|
|
|
|
7739
|
|
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub _all_patterns { |
211
|
371
|
|
|
371
|
|
551
|
my ( $class ) = @_; |
212
|
371
|
|
|
|
|
677
|
return merge( \%PATTERNS, config() ); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub _get_pattern { |
216
|
367
|
|
|
367
|
|
1124
|
my ( $class, $pattern_name, $field_name ) = @_; |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
#; say STDERR "_get_pattern( $pattern_name, $field_name )"; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# Handle nested patterns |
221
|
367
|
|
|
|
|
878
|
my @parts = split /[.]/, $pattern_name; |
222
|
367
|
|
|
|
|
670
|
my $pattern = $class->_all_patterns->{ shift @parts }; |
223
|
367
|
|
|
|
|
7486
|
for my $part ( @parts ) { |
224
|
134
|
50
|
|
|
|
350
|
if ( !$pattern->{ $part } ) { |
225
|
|
|
|
|
|
|
# warn "Could not find pattern $pattern_name for field $field_name\n"; |
226
|
0
|
0
|
|
|
|
0
|
if ( $field_name ) { |
227
|
0
|
|
|
|
|
0
|
return "%{$pattern_name:$field_name}"; |
228
|
|
|
|
|
|
|
} |
229
|
0
|
|
|
|
|
0
|
return "%{$pattern_name}"; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
134
|
|
|
|
|
257
|
$pattern = $pattern->{ $part }; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# Handle the "default" pattern for a pattern group |
236
|
367
|
100
|
|
|
|
722
|
if ( ref $pattern eq 'HASH' ) { |
237
|
4
|
|
33
|
|
|
23
|
$pattern = $pattern->{ $parts[-1] || $pattern_name }; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
367
|
100
|
|
|
|
622
|
if ( $field_name ) { |
241
|
324
|
|
|
|
|
874
|
return "(?<$field_name>" . $class->parse_pattern( $pattern ) . ")"; |
242
|
|
|
|
|
|
|
} |
243
|
43
|
|
|
|
|
134
|
return "(?:" . $class->parse_pattern( $pattern ) . ")"; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
sub parse_pattern { |
247
|
444
|
|
|
444
|
0
|
761
|
my ( $class, $pattern ) = @_; |
248
|
444
|
|
|
|
|
1212
|
$pattern =~ s/\%\{([^:}]+)(?::([^:}]+))?\}/$class->_get_pattern( $1, $2 )/ge; |
|
367
|
|
|
|
|
823
|
|
249
|
|
|
|
|
|
|
#; say STDERR 'PATTERN: ' . $pattern; |
250
|
444
|
|
|
|
|
2571
|
return $pattern; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
sub config { |
254
|
378
|
|
|
378
|
0
|
1209
|
my $conf_file = path( File::HomeDir->my_home, '.yertl', 'ygrok.yml' ); |
255
|
378
|
|
|
|
|
17752
|
my $config = {}; |
256
|
378
|
100
|
|
|
|
893
|
if ( $conf_file->exists ) { |
257
|
31
|
|
|
|
|
296
|
my $yaml = load_module( format => 'yaml' )->new( input => $conf_file->openr ); |
258
|
31
|
|
|
|
|
85
|
( $config ) = $yaml->read; |
259
|
|
|
|
|
|
|
} |
260
|
378
|
|
|
|
|
2778
|
return $config; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
sub config_pattern { |
264
|
7
|
|
|
7
|
0
|
16
|
my ( $pattern_name, $pattern ) = @_; |
265
|
7
|
|
|
|
|
13
|
my $all_config = config(); |
266
|
7
|
|
|
|
|
12
|
my $pattern_category = $all_config; |
267
|
7
|
|
|
|
|
30
|
my @parts = split /[.]/, $pattern_name; |
268
|
7
|
|
|
|
|
27
|
for my $part ( @parts[0..$#parts-1] ) { |
269
|
5
|
|
100
|
|
|
27
|
$pattern_category = $pattern_category->{ $part } ||= {}; |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
7
|
50
|
|
|
|
17
|
if ( $pattern ) { |
273
|
7
|
|
|
|
|
28
|
my $conf_file = path( File::HomeDir->my_home, '.yertl', 'ygrok.yml' ); |
274
|
7
|
100
|
|
|
|
357
|
if ( !$conf_file->exists ) { |
275
|
4
|
|
|
|
|
32
|
$conf_file->touchpath; |
276
|
|
|
|
|
|
|
} |
277
|
7
|
|
|
|
|
1419
|
$pattern_category->{ $parts[-1] } = $pattern; |
278
|
7
|
|
|
|
|
25
|
my $yaml = load_module( format => 'yaml' )->new; |
279
|
7
|
|
|
|
|
23
|
$conf_file->spew( $yaml->write( $all_config ) ); |
280
|
7
|
|
|
|
|
2732
|
return; |
281
|
|
|
|
|
|
|
} |
282
|
0
|
|
0
|
|
|
0
|
return $pattern_category->{ $parts[-1] } || ''; |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
1; |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
__END__ |