line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package App::RecordStream::Operation::normalizetime; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# We need to set DM5 for the backend with modern DateManip. |
4
|
|
|
|
|
|
|
# TODO: use backend 6 |
5
|
|
|
|
|
|
|
BEGIN { |
6
|
2
|
|
|
2
|
|
884
|
$Date::Manip::Backend = 'DM5'; |
7
|
|
|
|
|
|
|
} |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = "4.0.25"; |
10
|
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
|
9
|
use strict; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
31
|
|
12
|
|
|
|
|
|
|
|
13
|
2
|
|
|
2
|
|
17
|
use base qw(App::RecordStream::Operation); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
145
|
|
14
|
|
|
|
|
|
|
|
15
|
2
|
|
|
2
|
|
681
|
use Date::Manip qw (ParseDate UnixDate ParseDateDelta Delta_Format); |
|
2
|
|
|
|
|
86617
|
|
|
2
|
|
|
|
|
975
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub init { |
18
|
3
|
|
|
3
|
0
|
6
|
my $this = shift; |
19
|
3
|
|
|
|
|
8
|
my $args = shift; |
20
|
|
|
|
|
|
|
|
21
|
3
|
|
|
|
|
10
|
my $key; |
22
|
|
|
|
|
|
|
my $threshold; |
23
|
3
|
|
|
|
|
0
|
my $strict; |
24
|
3
|
|
|
|
|
0
|
my $epoch; |
25
|
|
|
|
|
|
|
|
26
|
3
|
|
|
|
|
18
|
my $spec = { |
27
|
|
|
|
|
|
|
"key|k=s" => \$key, |
28
|
|
|
|
|
|
|
"strict|s" => \$strict, |
29
|
|
|
|
|
|
|
"epoch|e" => \$epoch, |
30
|
|
|
|
|
|
|
"threshold|n=s" => \$threshold, |
31
|
|
|
|
|
|
|
}; |
32
|
|
|
|
|
|
|
|
33
|
3
|
|
|
|
|
13
|
$this->parse_options($args, $spec); |
34
|
|
|
|
|
|
|
|
35
|
3
|
50
|
|
|
|
8
|
die('Must specify --key') unless ( $key ); |
36
|
3
|
50
|
|
|
|
10
|
die('Must specify --threshold') unless ( $threshold ); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# if threshold is not a number, assume its a parsable string |
39
|
3
|
50
|
|
|
|
20
|
if ( not ($threshold =~ m/^[0-9.]+$/) ) |
40
|
|
|
|
|
|
|
{ |
41
|
0
|
|
|
|
|
0
|
my $delta = ParseDateDelta($threshold); |
42
|
0
|
|
|
|
|
0
|
$threshold = Delta_Format($delta, 0, '%st'); |
43
|
|
|
|
|
|
|
|
44
|
0
|
0
|
|
|
|
0
|
unless ( $threshold =~ m/^[0-9.]+$/ ) { |
45
|
0
|
|
|
|
|
0
|
die "Threshold passed isn't a number or parsable, " |
46
|
|
|
|
|
|
|
. "see perldoc Date::Manip for parseable formats\n"; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
3
|
|
|
|
|
5
|
my $sanitized_key = $key; |
51
|
3
|
|
|
|
|
9
|
$sanitized_key =~ s!/!_!; |
52
|
|
|
|
|
|
|
|
53
|
3
|
|
|
|
|
7
|
$this->{'KEY'} = $key; |
54
|
3
|
|
|
|
|
6
|
$this->{'SANITIZED_KEY'} = $sanitized_key; |
55
|
3
|
|
|
|
|
6
|
$this->{'STRICT'} = $strict; |
56
|
3
|
|
|
|
|
5
|
$this->{'EPOCH'} = $epoch; |
57
|
3
|
|
|
|
|
24
|
$this->{'THRESHOLD'} = $threshold; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub accept_record { |
62
|
17
|
|
|
17
|
0
|
18
|
my $this = shift; |
63
|
17
|
|
|
|
|
18
|
my $record = shift; |
64
|
|
|
|
|
|
|
|
65
|
17
|
|
|
|
|
22
|
my $key = $this->{'KEY'}; |
66
|
17
|
|
|
|
|
20
|
my $threshold = $this->{'THRESHOLD'}; |
67
|
17
|
|
|
|
|
21
|
my $strict = $this->{'STRICT'}; |
68
|
17
|
|
|
|
|
21
|
my $sanitized_key = $this->{'SANITIZED_KEY'}; |
69
|
17
|
|
|
|
|
22
|
my $prior_normalized_value = $this->{'PRIOR_NORMALIZED_VALUE'}; |
70
|
|
|
|
|
|
|
|
71
|
17
|
|
|
|
|
17
|
my $value = ${$record->guess_key_from_spec($key)}; |
|
17
|
|
|
|
|
44
|
|
72
|
|
|
|
|
|
|
|
73
|
17
|
|
|
|
|
21
|
my $time = $value; |
74
|
17
|
100
|
|
|
|
33
|
if ( ! $this->{'EPOCH'} ) { |
75
|
12
|
|
|
|
|
26
|
$time = UnixDate( ParseDate( $value ), "%s" ); |
76
|
12
|
50
|
|
|
|
33517
|
die "I can't understand Key: $key, with value: $value" unless $time; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
17
|
|
|
|
|
48
|
my $normalized_time_cur_period = int( $time / $threshold ) * $threshold; |
80
|
17
|
|
|
|
|
30
|
my $normalized_time_prior_period = $normalized_time_cur_period - $threshold; |
81
|
|
|
|
|
|
|
|
82
|
17
|
|
|
|
|
19
|
my $normalized_time; |
83
|
17
|
100
|
100
|
|
|
70
|
if( !$strict && defined( $prior_normalized_value ) && $prior_normalized_value == $normalized_time_prior_period ) { |
|
|
|
100
|
|
|
|
|
84
|
3
|
|
|
|
|
3
|
$normalized_time = $prior_normalized_value; |
85
|
|
|
|
|
|
|
} else { |
86
|
14
|
|
|
|
|
17
|
$normalized_time = $normalized_time_cur_period; |
87
|
14
|
|
|
|
|
26
|
$prior_normalized_value = $normalized_time_cur_period; |
88
|
14
|
|
|
|
|
34
|
$this->{'PRIOR_NORMALIZED_VALUE'} = $normalized_time_cur_period; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
17
|
|
|
|
|
33
|
$record->{"n_$sanitized_key"} = $normalized_time; |
92
|
17
|
|
|
|
|
89
|
$this->push_record($record); |
93
|
|
|
|
|
|
|
|
94
|
17
|
|
|
|
|
63
|
return 1; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub add_help_types { |
98
|
3
|
|
|
3
|
0
|
5
|
my $this = shift; |
99
|
|
|
|
|
|
|
|
100
|
3
|
|
|
|
|
22
|
$this->use_help_type('keyspecs'); |
101
|
3
|
|
|
|
|
11
|
$this->add_help_type( |
102
|
|
|
|
|
|
|
'full', |
103
|
|
|
|
|
|
|
\&full_help, |
104
|
|
|
|
|
|
|
'Indepth description of normalization alogrithm' |
105
|
|
|
|
|
|
|
); |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub full_help { |
109
|
0
|
|
|
0
|
0
|
|
print <
|
110
|
|
|
|
|
|
|
Full Help |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
This recs processor will generate normalized versions of date/time values and |
113
|
|
|
|
|
|
|
add this value as another attribute to the record stream. Used in conjunction |
114
|
|
|
|
|
|
|
with recs-collate you can aggregate information over the normalized time. For |
115
|
|
|
|
|
|
|
example if you use |
116
|
|
|
|
|
|
|
recs-normalized -k date --n 1 | recs-collate -k n_date -a firstrec |
117
|
|
|
|
|
|
|
then this picks a single record from a stream to serve in placement of lots of |
118
|
|
|
|
|
|
|
records which are close to each other in time. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
The normalized time value generated depends on whether or not you are using |
121
|
|
|
|
|
|
|
strict normalization or not. The default is to use non-strict. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
The use of the optional --epoch argument indicates that the date/time values |
124
|
|
|
|
|
|
|
are expressed in epoch seconds. This argument both speeds up the execution of |
125
|
|
|
|
|
|
|
an invocation (due to avoiding the expensive perl Date:Manip executions) and is |
126
|
|
|
|
|
|
|
required for correctness when the values are epoch seconds. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
1. When using strict normalization then time is chunked up into fixed segments |
129
|
|
|
|
|
|
|
of --threshold seconds in each segment with the first segment occurring on |
130
|
|
|
|
|
|
|
January 1st 1970 at 0:00. So if the threshold is 60 seconds then the following |
131
|
|
|
|
|
|
|
record stream would be produced |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
date n_date |
134
|
|
|
|
|
|
|
1:00:00 1:00:00 |
135
|
|
|
|
|
|
|
1:00:14 1:00:00 |
136
|
|
|
|
|
|
|
1:00:59 1:00:00 |
137
|
|
|
|
|
|
|
1:02:05 1:02:00 |
138
|
|
|
|
|
|
|
1:02:55 1:02:00 |
139
|
|
|
|
|
|
|
1:03:15 1:03:00 |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
2. When not using strict normalization then the time is again chunked up into |
143
|
|
|
|
|
|
|
fixed segments however the actual segment assigned to a value depends on the |
144
|
|
|
|
|
|
|
segement chunk seen in the prior record. |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
The logic used is the following: |
147
|
|
|
|
|
|
|
- a time is distilled down to a representative sample where the precision is |
148
|
|
|
|
|
|
|
defined by the --threshold. For example if you said that the threshold is |
149
|
|
|
|
|
|
|
10 (seconds) then 10:22:01 and 10:22:09 would both become 10:22:00. 10:22:10 |
150
|
|
|
|
|
|
|
would be 10:22:10. |
151
|
|
|
|
|
|
|
- as you can tell the representative values is the first second within the range |
152
|
|
|
|
|
|
|
that you define, with one exception |
153
|
|
|
|
|
|
|
- if the representative value of the prior record is in the prior range to the |
154
|
|
|
|
|
|
|
current representative value then the prior record value will be used |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
So if the threshold is 60 seconds then the following record stream would be produced |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
date n_date |
159
|
|
|
|
|
|
|
1:00:00 1:00:00 |
160
|
|
|
|
|
|
|
1:00:59 1:00:00 |
161
|
|
|
|
|
|
|
1:02:05 1:02:00 |
162
|
|
|
|
|
|
|
1:02:55 1:02:00 |
163
|
|
|
|
|
|
|
1:03:15 1:02:00 ** Note - still matches prior representative value ** |
164
|
|
|
|
|
|
|
1:05:59 1:05:00 |
165
|
|
|
|
|
|
|
1:06:15 1:05:00 ** Note - matches prior entry ** |
166
|
|
|
|
|
|
|
1:07:01 1:07:00 ** Note - since the 1:05 and 1:06 had the same representative ** |
167
|
|
|
|
|
|
|
** value then this is considered a new representative time slice ** |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Basically a 60 second threshold will match the current minute and the next minute unless |
170
|
|
|
|
|
|
|
the prior minute was seen and then the 60 second threshold matches the current minute and |
171
|
|
|
|
|
|
|
the prior minute. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Example usage: if you have log records for "out of memory" exceptions which may occur multiple |
175
|
|
|
|
|
|
|
times because of exception catching and logging then you can distill them all down to a |
176
|
|
|
|
|
|
|
single logical event and then count the number of occurrences for a host via: |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
grep "OutOfMemory" logs |\ |
179
|
|
|
|
|
|
|
recs-frommultire --re 'host=@([^:]*):' --re 'date=^[A-Za-z]* (.*) GMT ' |\ |
180
|
|
|
|
|
|
|
recs-normalizetime --key date --threshold 300 | \ |
181
|
|
|
|
|
|
|
recs-collate --perfect --key n_date -a firstrec | \ |
182
|
|
|
|
|
|
|
recs-collate --perfect --key firstrec_host -a count=count |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
FULL_HELP |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub usage { |
188
|
0
|
|
|
0
|
0
|
|
my $this = shift; |
189
|
|
|
|
|
|
|
|
190
|
0
|
|
|
|
|
|
my $options = [ |
191
|
|
|
|
|
|
|
['key|-k ', 'Single Key field containing the date/time may be a key spec, see \'--help-keyspecs\' for more info'], |
192
|
|
|
|
|
|
|
['epoch|-e', 'Assumes date/time field is expressed in epoch seconds (optional, defaults to non-epoch)'], |
193
|
|
|
|
|
|
|
['threshold|-n |
194
|
|
|
|
|
|
|
['strict|-s', 'Apply strict normalization (defaults to non-strict)'], |
195
|
|
|
|
|
|
|
]; |
196
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
|
my $args_string = $this->options_string($options); |
198
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
return <
|
200
|
|
|
|
|
|
|
Usage: recs-normalizetime [] |
201
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
202
|
|
|
|
|
|
|
Given a single key field containing a date/time value this recs processor |
203
|
|
|
|
|
|
|
will construct a normalized version of the value and place this new value |
204
|
|
|
|
|
|
|
into a field named "n_" (where is the key field appearing in |
205
|
|
|
|
|
|
|
the args). |
206
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Arguments: |
209
|
|
|
|
|
|
|
$args_string |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Examples: |
212
|
|
|
|
|
|
|
# Tag records with normalized time in 5 minute buckets from the date field |
213
|
|
|
|
|
|
|
... | recs-normalizetime --strict --key date -n 300 |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# Normalize time with fuzzy normalization into 1 minute buckets from the |
216
|
|
|
|
|
|
|
# epoch-relative 'time' field |
217
|
|
|
|
|
|
|
... | recs-normalizetime --key time -e -n 60 |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
#Get 1 week buckets |
220
|
|
|
|
|
|
|
... | recs-normalizetime --key timestamp -n '1 week' |
221
|
|
|
|
|
|
|
USAGE |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
1; |