| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package ETL::Yertl::Command::ygrok; |
|
2
|
|
|
|
|
|
|
our $VERSION = '0.037'; |
|
3
|
|
|
|
|
|
|
# ABSTRACT: Parse lines of text into documents |
|
4
|
|
|
|
|
|
|
|
|
5
|
11
|
|
|
11
|
|
3106
|
use ETL::Yertl; |
|
|
11
|
|
|
|
|
22
|
|
|
|
11
|
|
|
|
|
53
|
|
|
6
|
11
|
|
|
11
|
|
330
|
use ETL::Yertl::Util qw( load_module ); |
|
|
11
|
|
|
|
|
17
|
|
|
|
11
|
|
|
|
|
372
|
|
|
7
|
11
|
|
|
11
|
|
4341
|
use Getopt::Long qw( GetOptionsFromArray ); |
|
|
11
|
|
|
|
|
76399
|
|
|
|
11
|
|
|
|
|
38
|
|
|
8
|
11
|
|
|
11
|
|
4768
|
use Regexp::Common; |
|
|
11
|
|
|
|
|
21300
|
|
|
|
11
|
|
|
|
|
43
|
|
|
9
|
11
|
|
|
11
|
|
1392265
|
use File::HomeDir; |
|
|
11
|
|
|
|
|
40260
|
|
|
|
11
|
|
|
|
|
659
|
|
|
10
|
11
|
|
|
11
|
|
2799
|
use Hash::Merge::Simple qw( merge ); |
|
|
11
|
|
|
|
|
3929
|
|
|
|
11
|
|
|
|
|
9593
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our %PATTERNS = ( |
|
13
|
|
|
|
|
|
|
WORD => '\b\w+\b', |
|
14
|
|
|
|
|
|
|
DATA => '.*?', |
|
15
|
|
|
|
|
|
|
NUM => $RE{num}{real}."", # stringify to allow YAML serialization |
|
16
|
|
|
|
|
|
|
INT => $RE{num}{int}."", # stringify to allow YAML serialization |
|
17
|
|
|
|
|
|
|
VERSION => '\d+(?:[.]\d+)*', |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
DATE => { |
|
20
|
|
|
|
|
|
|
MONTH => '\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b', |
|
21
|
|
|
|
|
|
|
ISO8601 => '\d{4}-?\d{2}-?\d{2}[T ]\d{2}:?\d{2}:?\d{2}(?:Z|[+-]\d{4})', |
|
22
|
|
|
|
|
|
|
HTTP => '\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4}', |
|
23
|
|
|
|
|
|
|
SYSLOG => '%{DATE.MONTH} +\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}', |
|
24
|
|
|
|
|
|
|
}, |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
OS => { |
|
27
|
|
|
|
|
|
|
USER => '[a-zA-Z0-9._-]+', |
|
28
|
|
|
|
|
|
|
PROCNAME => '[\w._-]+', |
|
29
|
|
|
|
|
|
|
}, |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
NET => { |
|
32
|
|
|
|
|
|
|
HOSTNAME => join( "|", $RE{net}{IPv4}, $RE{net}{IPv6}, $RE{net}{domain}{-rfc1101} ), |
|
33
|
|
|
|
|
|
|
IPV6 => $RE{net}{IPv6}."", |
|
34
|
|
|
|
|
|
|
IPV4 => $RE{net}{IPv4}."", |
|
35
|
|
|
|
|
|
|
}, |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
URL => { |
|
38
|
|
|
|
|
|
|
PATH => '[^?#]*(?:\?[^#]*)?', |
|
39
|
|
|
|
|
|
|
# URL regex from URI.pm |
|
40
|
|
|
|
|
|
|
URL => '(?:[^:/?#]+:)?(?://[^/?#]*)?[^?#]*(?:\?[^#]*)?(?:#.*)?', |
|
41
|
|
|
|
|
|
|
}, |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
LOG => { |
|
44
|
|
|
|
|
|
|
HTTP_COMMON => join( " ", |
|
45
|
|
|
|
|
|
|
'%{NET.HOSTNAME:remote_addr}', '%{OS.USER:ident}', '%{OS.USER:user}', |
|
46
|
|
|
|
|
|
|
'\[%{DATE.HTTP:timestamp}]', |
|
47
|
|
|
|
|
|
|
'"%{WORD:method} %{URL.PATH:path} [^/]+/%{VERSION:http_version}"', |
|
48
|
|
|
|
|
|
|
'%{INT:status}', '(?\d+|-)', |
|
49
|
|
|
|
|
|
|
), |
|
50
|
|
|
|
|
|
|
HTTP_COMBINED => join( " ", |
|
51
|
|
|
|
|
|
|
'%{LOG.HTTP_COMMON}', |
|
52
|
|
|
|
|
|
|
'"%{URL:referer}"', '"%{DATA:user_agent}"', |
|
53
|
|
|
|
|
|
|
), |
|
54
|
|
|
|
|
|
|
SYSLOG => join( "", |
|
55
|
|
|
|
|
|
|
'%{DATE.SYSLOG:timestamp} ', |
|
56
|
|
|
|
|
|
|
'(?:<%{INT:facility}.%{INT:priority}> )?', |
|
57
|
|
|
|
|
|
|
'%{NET.HOSTNAME:host} ', |
|
58
|
|
|
|
|
|
|
'%{OS.PROCNAME:program}(?:\[%{INT:pid}\])?: ', |
|
59
|
|
|
|
|
|
|
'%{DATA:text}', |
|
60
|
|
|
|
|
|
|
), |
|
61
|
|
|
|
|
|
|
}, |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
POSIX => { |
|
64
|
|
|
|
|
|
|
LS => join( " +", |
|
65
|
|
|
|
|
|
|
'(?[bcdlsp-][rwxSsTt-]{9})', |
|
66
|
|
|
|
|
|
|
'%{INT:links}', |
|
67
|
|
|
|
|
|
|
'%{OS.USER:owner}', |
|
68
|
|
|
|
|
|
|
'%{OS.USER:group}', |
|
69
|
|
|
|
|
|
|
'%{INT:bytes}', |
|
70
|
|
|
|
|
|
|
'(?%{DATE.MONTH} +\d+ +\d+(?::\d+)?)', |
|
71
|
|
|
|
|
|
|
'%{DATA:name}', |
|
72
|
|
|
|
|
|
|
), |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# -- Mac OSX |
|
75
|
|
|
|
|
|
|
# TTY field starts with "tty" |
|
76
|
|
|
|
|
|
|
# No STAT field |
|
77
|
|
|
|
|
|
|
# -- OpenBSD |
|
78
|
|
|
|
|
|
|
# STAT field |
|
79
|
|
|
|
|
|
|
# -- RHEL 5 |
|
80
|
|
|
|
|
|
|
# tty can contain / |
|
81
|
|
|
|
|
|
|
# Seconds time optional |
|
82
|
|
|
|
|
|
|
PS => join( " +", |
|
83
|
|
|
|
|
|
|
' *%{INT:pid}', |
|
84
|
|
|
|
|
|
|
'(?[\w?/]+)', |
|
85
|
|
|
|
|
|
|
'(?(?:[\w+]+))?', |
|
86
|
|
|
|
|
|
|
'(? |
|
87
|
|
|
|
|
|
|
'%{DATA:command}', |
|
88
|
|
|
|
|
|
|
), |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Mac OSX and OpenBSD are the same |
|
91
|
|
|
|
|
|
|
PSU => join ( " +", |
|
92
|
|
|
|
|
|
|
'%{OS.USER:user}', |
|
93
|
|
|
|
|
|
|
'%{INT:pid}', |
|
94
|
|
|
|
|
|
|
'%{NUM:cpu}', |
|
95
|
|
|
|
|
|
|
'%{NUM:mem}', |
|
96
|
|
|
|
|
|
|
'%{INT:vsz}', |
|
97
|
|
|
|
|
|
|
'%{INT:rss}', |
|
98
|
|
|
|
|
|
|
'(?[\w?/]+)', |
|
99
|
|
|
|
|
|
|
'(?(?:[\w+]+))?', |
|
100
|
|
|
|
|
|
|
'(?[\w:]+)', |
|
101
|
|
|
|
|
|
|
'(? |
|
102
|
|
|
|
|
|
|
'%{DATA:command}', |
|
103
|
|
|
|
|
|
|
), |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# Max OSX and OpenBSD are the same |
|
106
|
|
|
|
|
|
|
PSX => join ( " +", |
|
107
|
|
|
|
|
|
|
' *%{INT:pid}', |
|
108
|
|
|
|
|
|
|
'(?[\w?/]+)', |
|
109
|
|
|
|
|
|
|
'(?(?:[\w+]+))', |
|
110
|
|
|
|
|
|
|
'(? |
|
111
|
|
|
|
|
|
|
'%{DATA:command}', |
|
112
|
|
|
|
|
|
|
), |
|
113
|
|
|
|
|
|
|
}, |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
LINUX => { |
|
116
|
|
|
|
|
|
|
PROC => { |
|
117
|
|
|
|
|
|
|
UPTIME => '%{NUM:uptime}\s+%{NUM:idletime}', |
|
118
|
|
|
|
|
|
|
LOADAVG => '%{NUM:load1m}\s+%{NUM:load5m}\s+%{NUM:load15m}\s+%{INT:running}/%{INT:total}\s+%{INT:lastpid}', |
|
119
|
|
|
|
|
|
|
}, |
|
120
|
|
|
|
|
|
|
}, |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
); |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub main { |
|
125
|
89
|
|
|
89
|
0
|
578257
|
my $class = shift; |
|
126
|
|
|
|
|
|
|
|
|
127
|
89
|
|
|
|
|
166
|
my %opt; |
|
128
|
89
|
100
|
|
|
|
285
|
if ( ref $_[-1] eq 'HASH' ) { |
|
129
|
5
|
|
|
|
|
8
|
%opt = %{ pop @_ }; |
|
|
5
|
|
|
|
|
13
|
|
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
|
|
132
|
89
|
|
|
|
|
194
|
my @args = @_; |
|
133
|
89
|
|
|
|
|
359
|
GetOptionsFromArray( \@args, \%opt, |
|
134
|
|
|
|
|
|
|
'pattern', |
|
135
|
|
|
|
|
|
|
'loose', |
|
136
|
|
|
|
|
|
|
); |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# Manage patterns |
|
139
|
89
|
100
|
|
|
|
21114
|
if ( $opt{pattern} ) { |
|
140
|
11
|
|
|
|
|
25
|
my ( $pattern_name, $pattern ) = @args; |
|
141
|
|
|
|
|
|
|
|
|
142
|
11
|
100
|
|
|
|
23
|
if ( $pattern ) { |
|
143
|
|
|
|
|
|
|
# Edit a pattern |
|
144
|
7
|
|
|
|
|
19
|
config_pattern( $pattern_name, $pattern ); |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
else { |
|
147
|
4
|
|
|
|
|
9
|
my $patterns = $class->_all_patterns; |
|
148
|
|
|
|
|
|
|
|
|
149
|
4
|
100
|
|
|
|
186
|
if ( $pattern_name ) { |
|
150
|
|
|
|
|
|
|
# Show a single pattern |
|
151
|
3
|
|
|
|
|
5
|
my $pattern = $patterns; |
|
152
|
3
|
|
|
|
|
9
|
my @parts = split /[.]/, $pattern_name; |
|
153
|
3
|
|
|
|
|
6
|
for my $part ( @parts ) { |
|
154
|
4
|
|
50
|
|
|
12
|
$pattern = $pattern->{ $part } ||= {}; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
3
|
100
|
|
|
|
7
|
if ( !ref $pattern ) { |
|
158
|
2
|
|
|
|
|
44
|
say $pattern; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
else { |
|
161
|
1
|
|
|
|
|
3
|
my $out_fmt = load_module( format => 'default' )->new; |
|
162
|
1
|
|
|
|
|
5
|
say $out_fmt->write( $pattern ); |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
else { |
|
166
|
|
|
|
|
|
|
# Show all patterns we know about |
|
167
|
1
|
|
|
|
|
4
|
my $out_fmt = load_module( format => 'default' )->new; |
|
168
|
1
|
|
|
|
|
4
|
say $out_fmt->write( $patterns ); |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
11
|
|
|
|
|
50
|
return 0; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# Grok incoming lines |
|
176
|
78
|
|
|
|
|
177
|
my ( $pattern, @files ) = @args; |
|
177
|
78
|
100
|
|
|
|
183
|
die "Must give a pattern\n" unless $pattern; |
|
178
|
|
|
|
|
|
|
|
|
179
|
77
|
|
|
|
|
221
|
my $re = $class->parse_pattern( $pattern ); |
|
180
|
77
|
100
|
|
|
|
196
|
if ( !$opt{loose} ) { |
|
181
|
75
|
|
|
|
|
7165
|
$re = qr{^$re$}; |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
|
|
184
|
77
|
|
|
|
|
442
|
my $out_formatter = load_module( format => 'default' )->new; |
|
185
|
77
|
100
|
|
|
|
218
|
push @files, "-" unless @files; |
|
186
|
77
|
|
|
|
|
163
|
for my $file ( @files ) { |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# We're doing a similar behavior to <>, but manually for easier testing. |
|
189
|
77
|
|
|
|
|
96
|
my $fh; |
|
190
|
77
|
100
|
|
|
|
223
|
if ( $file eq '-' ) { |
|
191
|
|
|
|
|
|
|
# Use the existing STDIN so tests can fake it |
|
192
|
39
|
|
|
|
|
74
|
$fh = \*STDIN; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
else { |
|
195
|
38
|
50
|
|
|
|
398
|
unless ( open $fh, '<', $file ) { |
|
196
|
0
|
|
|
|
|
0
|
warn "Could not open file '$file' for reading: $!\n"; |
|
197
|
0
|
|
|
|
|
0
|
next; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
77
|
|
|
|
|
2198
|
while ( my $line = <$fh> ) { |
|
202
|
|
|
|
|
|
|
#; say STDERR "$line =~ $re"; |
|
203
|
249
|
100
|
|
|
|
3471
|
if ( $line =~ $re ) { |
|
204
|
209
|
|
|
10
|
|
4016
|
print $out_formatter->write( { %+ } ); |
|
|
10
|
|
|
|
|
2692
|
|
|
|
10
|
|
|
|
|
3383
|
|
|
|
10
|
|
|
|
|
7739
|
|
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub _all_patterns { |
|
211
|
371
|
|
|
371
|
|
551
|
my ( $class ) = @_; |
|
212
|
371
|
|
|
|
|
677
|
return merge( \%PATTERNS, config() ); |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub _get_pattern { |
|
216
|
367
|
|
|
367
|
|
1124
|
my ( $class, $pattern_name, $field_name ) = @_; |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
#; say STDERR "_get_pattern( $pattern_name, $field_name )"; |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# Handle nested patterns |
|
221
|
367
|
|
|
|
|
878
|
my @parts = split /[.]/, $pattern_name; |
|
222
|
367
|
|
|
|
|
670
|
my $pattern = $class->_all_patterns->{ shift @parts }; |
|
223
|
367
|
|
|
|
|
7486
|
for my $part ( @parts ) { |
|
224
|
134
|
50
|
|
|
|
350
|
if ( !$pattern->{ $part } ) { |
|
225
|
|
|
|
|
|
|
# warn "Could not find pattern $pattern_name for field $field_name\n"; |
|
226
|
0
|
0
|
|
|
|
0
|
if ( $field_name ) { |
|
227
|
0
|
|
|
|
|
0
|
return "%{$pattern_name:$field_name}"; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
0
|
|
|
|
|
0
|
return "%{$pattern_name}"; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
134
|
|
|
|
|
257
|
$pattern = $pattern->{ $part }; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# Handle the "default" pattern for a pattern group |
|
236
|
367
|
100
|
|
|
|
722
|
if ( ref $pattern eq 'HASH' ) { |
|
237
|
4
|
|
33
|
|
|
23
|
$pattern = $pattern->{ $parts[-1] || $pattern_name }; |
|
238
|
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
|
|
240
|
367
|
100
|
|
|
|
622
|
if ( $field_name ) { |
|
241
|
324
|
|
|
|
|
874
|
return "(?<$field_name>" . $class->parse_pattern( $pattern ) . ")"; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
43
|
|
|
|
|
134
|
return "(?:" . $class->parse_pattern( $pattern ) . ")"; |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
sub parse_pattern { |
|
247
|
444
|
|
|
444
|
0
|
761
|
my ( $class, $pattern ) = @_; |
|
248
|
444
|
|
|
|
|
1212
|
$pattern =~ s/\%\{([^:}]+)(?::([^:}]+))?\}/$class->_get_pattern( $1, $2 )/ge; |
|
|
367
|
|
|
|
|
823
|
|
|
249
|
|
|
|
|
|
|
#; say STDERR 'PATTERN: ' . $pattern; |
|
250
|
444
|
|
|
|
|
2571
|
return $pattern; |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
sub config { |
|
254
|
378
|
|
|
378
|
0
|
1209
|
my $conf_file = path( File::HomeDir->my_home, '.yertl', 'ygrok.yml' ); |
|
255
|
378
|
|
|
|
|
17752
|
my $config = {}; |
|
256
|
378
|
100
|
|
|
|
893
|
if ( $conf_file->exists ) { |
|
257
|
31
|
|
|
|
|
296
|
my $yaml = load_module( format => 'yaml' )->new( input => $conf_file->openr ); |
|
258
|
31
|
|
|
|
|
85
|
( $config ) = $yaml->read; |
|
259
|
|
|
|
|
|
|
} |
|
260
|
378
|
|
|
|
|
2778
|
return $config; |
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
sub config_pattern { |
|
264
|
7
|
|
|
7
|
0
|
16
|
my ( $pattern_name, $pattern ) = @_; |
|
265
|
7
|
|
|
|
|
13
|
my $all_config = config(); |
|
266
|
7
|
|
|
|
|
12
|
my $pattern_category = $all_config; |
|
267
|
7
|
|
|
|
|
30
|
my @parts = split /[.]/, $pattern_name; |
|
268
|
7
|
|
|
|
|
27
|
for my $part ( @parts[0..$#parts-1] ) { |
|
269
|
5
|
|
100
|
|
|
27
|
$pattern_category = $pattern_category->{ $part } ||= {}; |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
|
|
272
|
7
|
50
|
|
|
|
17
|
if ( $pattern ) { |
|
273
|
7
|
|
|
|
|
28
|
my $conf_file = path( File::HomeDir->my_home, '.yertl', 'ygrok.yml' ); |
|
274
|
7
|
100
|
|
|
|
357
|
if ( !$conf_file->exists ) { |
|
275
|
4
|
|
|
|
|
32
|
$conf_file->touchpath; |
|
276
|
|
|
|
|
|
|
} |
|
277
|
7
|
|
|
|
|
1419
|
$pattern_category->{ $parts[-1] } = $pattern; |
|
278
|
7
|
|
|
|
|
25
|
my $yaml = load_module( format => 'yaml' )->new; |
|
279
|
7
|
|
|
|
|
23
|
$conf_file->spew( $yaml->write( $all_config ) ); |
|
280
|
7
|
|
|
|
|
2732
|
return; |
|
281
|
|
|
|
|
|
|
} |
|
282
|
0
|
|
0
|
|
|
0
|
return $pattern_category->{ $parts[-1] } || ''; |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
1; |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
__END__ |