| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package App::RecordStream::Operation::normalizetime; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# We need to set DM5 for the backend with modern DateManip. |
|
4
|
|
|
|
|
|
|
# TODO: use backend 6 |
|
5
|
|
|
|
|
|
|
BEGIN { |
|
6
|
2
|
|
|
2
|
|
919
|
$Date::Manip::Backend = 'DM5'; |
|
7
|
|
|
|
|
|
|
} |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = "4.0.23"; |
|
10
|
|
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
|
12
|
use strict; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
39
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
2
|
|
|
2
|
|
9
|
use base qw(App::RecordStream::Operation); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
115
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
2
|
|
|
2
|
|
709
|
use Date::Manip qw (ParseDate UnixDate ParseDateDelta Delta_Format); |
|
|
2
|
|
|
|
|
85789
|
|
|
|
2
|
|
|
|
|
1135
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub init { |
|
18
|
3
|
|
|
3
|
0
|
8
|
my $this = shift; |
|
19
|
3
|
|
|
|
|
7
|
my $args = shift; |
|
20
|
|
|
|
|
|
|
|
|
21
|
3
|
|
|
|
|
14
|
my $key; |
|
22
|
|
|
|
|
|
|
my $threshold; |
|
23
|
3
|
|
|
|
|
0
|
my $strict; |
|
24
|
3
|
|
|
|
|
0
|
my $epoch; |
|
25
|
|
|
|
|
|
|
|
|
26
|
3
|
|
|
|
|
16
|
my $spec = { |
|
27
|
|
|
|
|
|
|
"key|k=s" => \$key, |
|
28
|
|
|
|
|
|
|
"strict|s" => \$strict, |
|
29
|
|
|
|
|
|
|
"epoch|e" => \$epoch, |
|
30
|
|
|
|
|
|
|
"threshold|n=s" => \$threshold, |
|
31
|
|
|
|
|
|
|
}; |
|
32
|
|
|
|
|
|
|
|
|
33
|
3
|
|
|
|
|
18
|
$this->parse_options($args, $spec); |
|
34
|
|
|
|
|
|
|
|
|
35
|
3
|
50
|
|
|
|
13
|
die('Must specify --key') unless ( $key ); |
|
36
|
3
|
50
|
|
|
|
11
|
die('Must specify --threshold') unless ( $threshold ); |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# if threshold is not a number, assume its a parsable string |
|
39
|
3
|
50
|
|
|
|
25
|
if ( not ($threshold =~ m/^[0-9.]+$/) ) |
|
40
|
|
|
|
|
|
|
{ |
|
41
|
0
|
|
|
|
|
0
|
my $delta = ParseDateDelta($threshold); |
|
42
|
0
|
|
|
|
|
0
|
$threshold = Delta_Format($delta, 0, '%st'); |
|
43
|
|
|
|
|
|
|
|
|
44
|
0
|
0
|
|
|
|
0
|
unless ( $threshold =~ m/^[0-9.]+$/ ) { |
|
45
|
0
|
|
|
|
|
0
|
die "Threshold passed isn't a number or parsable, " |
|
46
|
|
|
|
|
|
|
. "see perldoc Date::Manip for parseable formats\n"; |
|
47
|
|
|
|
|
|
|
} |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
|
|
50
|
3
|
|
|
|
|
8
|
my $sanitized_key = $key; |
|
51
|
3
|
|
|
|
|
12
|
$sanitized_key =~ s!/!_!; |
|
52
|
|
|
|
|
|
|
|
|
53
|
3
|
|
|
|
|
12
|
$this->{'KEY'} = $key; |
|
54
|
3
|
|
|
|
|
9
|
$this->{'SANITIZED_KEY'} = $sanitized_key; |
|
55
|
3
|
|
|
|
|
10
|
$this->{'STRICT'} = $strict; |
|
56
|
3
|
|
|
|
|
9
|
$this->{'EPOCH'} = $epoch; |
|
57
|
3
|
|
|
|
|
39
|
$this->{'THRESHOLD'} = $threshold; |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub accept_record { |
|
62
|
17
|
|
|
17
|
0
|
32
|
my $this = shift; |
|
63
|
17
|
|
|
|
|
29
|
my $record = shift; |
|
64
|
|
|
|
|
|
|
|
|
65
|
17
|
|
|
|
|
40
|
my $key = $this->{'KEY'}; |
|
66
|
17
|
|
|
|
|
38
|
my $threshold = $this->{'THRESHOLD'}; |
|
67
|
17
|
|
|
|
|
31
|
my $strict = $this->{'STRICT'}; |
|
68
|
17
|
|
|
|
|
31
|
my $sanitized_key = $this->{'SANITIZED_KEY'}; |
|
69
|
17
|
|
|
|
|
35
|
my $prior_normalized_value = $this->{'PRIOR_NORMALIZED_VALUE'}; |
|
70
|
|
|
|
|
|
|
|
|
71
|
17
|
|
|
|
|
27
|
my $value = ${$record->guess_key_from_spec($key)}; |
|
|
17
|
|
|
|
|
65
|
|
|
72
|
|
|
|
|
|
|
|
|
73
|
17
|
|
|
|
|
41
|
my $time = $value; |
|
74
|
17
|
100
|
|
|
|
50
|
if ( ! $this->{'EPOCH'} ) { |
|
75
|
12
|
|
|
|
|
43
|
$time = UnixDate( ParseDate( $value ), "%s" ); |
|
76
|
12
|
50
|
|
|
|
42222
|
die "I can't understand Key: $key, with value: $value" unless $time; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
|
|
79
|
17
|
|
|
|
|
69
|
my $normalized_time_cur_period = int( $time / $threshold ) * $threshold; |
|
80
|
17
|
|
|
|
|
39
|
my $normalized_time_prior_period = $normalized_time_cur_period - $threshold; |
|
81
|
|
|
|
|
|
|
|
|
82
|
17
|
|
|
|
|
36
|
my $normalized_time; |
|
83
|
17
|
100
|
100
|
|
|
116
|
if( !$strict && defined( $prior_normalized_value ) && $prior_normalized_value == $normalized_time_prior_period ) { |
|
|
|
|
100
|
|
|
|
|
|
84
|
3
|
|
|
|
|
5
|
$normalized_time = $prior_normalized_value; |
|
85
|
|
|
|
|
|
|
} else { |
|
86
|
14
|
|
|
|
|
26
|
$normalized_time = $normalized_time_cur_period; |
|
87
|
14
|
|
|
|
|
28
|
$prior_normalized_value = $normalized_time_cur_period; |
|
88
|
14
|
|
|
|
|
39
|
$this->{'PRIOR_NORMALIZED_VALUE'} = $normalized_time_cur_period; |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
|
|
91
|
17
|
|
|
|
|
51
|
$record->{"n_$sanitized_key"} = $normalized_time; |
|
92
|
17
|
|
|
|
|
101
|
$this->push_record($record); |
|
93
|
|
|
|
|
|
|
|
|
94
|
17
|
|
|
|
|
112
|
return 1; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub add_help_types { |
|
98
|
3
|
|
|
3
|
0
|
9
|
my $this = shift; |
|
99
|
|
|
|
|
|
|
|
|
100
|
3
|
|
|
|
|
19
|
$this->use_help_type('keyspecs'); |
|
101
|
3
|
|
|
|
|
20
|
$this->add_help_type( |
|
102
|
|
|
|
|
|
|
'full', |
|
103
|
|
|
|
|
|
|
\&full_help, |
|
104
|
|
|
|
|
|
|
'Indepth description of normalization alogrithm' |
|
105
|
|
|
|
|
|
|
); |
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub full_help { |
|
109
|
0
|
|
|
0
|
0
|
|
print <
|
|
110
|
|
|
|
|
|
|
Full Help |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
This recs processor will generate normalized versions of date/time values and |
|
113
|
|
|
|
|
|
|
add this value as another attribute to the record stream. Used in conjunction |
|
114
|
|
|
|
|
|
|
with recs-collate you can aggregate information over the normalized time. For |
|
115
|
|
|
|
|
|
|
example if you use |
|
116
|
|
|
|
|
|
|
recs-normalized -k date --n 1 | recs-collate -k n_date -a firstrec |
|
117
|
|
|
|
|
|
|
then this picks a single record from a stream to serve in placement of lots of |
|
118
|
|
|
|
|
|
|
records which are close to each other in time. |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
The normalized time value generated depends on whether or not you are using |
|
121
|
|
|
|
|
|
|
strict normalization or not. The default is to use non-strict. |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
The use of the optional --epoch argument indicates that the date/time values |
|
124
|
|
|
|
|
|
|
are expressed in epoch seconds. This argument both speeds up the execution of |
|
125
|
|
|
|
|
|
|
an invocation (due to avoiding the expensive perl Date:Manip executions) and is |
|
126
|
|
|
|
|
|
|
required for correctness when the values are epoch seconds. |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
1. When using strict normalization then time is chunked up into fixed segments |
|
129
|
|
|
|
|
|
|
of --threshold seconds in each segment with the first segment occurring on |
|
130
|
|
|
|
|
|
|
January 1st 1970 at 0:00. So if the threshold is 60 seconds then the following |
|
131
|
|
|
|
|
|
|
record stream would be produced |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
date n_date |
|
134
|
|
|
|
|
|
|
1:00:00 1:00:00 |
|
135
|
|
|
|
|
|
|
1:00:14 1:00:00 |
|
136
|
|
|
|
|
|
|
1:00:59 1:00:00 |
|
137
|
|
|
|
|
|
|
1:02:05 1:02:00 |
|
138
|
|
|
|
|
|
|
1:02:55 1:02:00 |
|
139
|
|
|
|
|
|
|
1:03:15 1:03:00 |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
2. When not using strict normalization then the time is again chunked up into |
|
143
|
|
|
|
|
|
|
fixed segments however the actual segment assigned to a value depends on the |
|
144
|
|
|
|
|
|
|
segement chunk seen in the prior record. |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
The logic used is the following: |
|
147
|
|
|
|
|
|
|
- a time is distilled down to a representative sample where the precision is |
|
148
|
|
|
|
|
|
|
defined by the --threshold. For example if you said that the threshold is |
|
149
|
|
|
|
|
|
|
10 (seconds) then 10:22:01 and 10:22:09 would both become 10:22:00. 10:22:10 |
|
150
|
|
|
|
|
|
|
would be 10:22:10. |
|
151
|
|
|
|
|
|
|
- as you can tell the representative values is the first second within the range |
|
152
|
|
|
|
|
|
|
that you define, with one exception |
|
153
|
|
|
|
|
|
|
- if the representative value of the prior record is in the prior range to the |
|
154
|
|
|
|
|
|
|
current representative value then the prior record value will be used |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
So if the threshold is 60 seconds then the following record stream would be produced |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
date n_date |
|
159
|
|
|
|
|
|
|
1:00:00 1:00:00 |
|
160
|
|
|
|
|
|
|
1:00:59 1:00:00 |
|
161
|
|
|
|
|
|
|
1:02:05 1:02:00 |
|
162
|
|
|
|
|
|
|
1:02:55 1:02:00 |
|
163
|
|
|
|
|
|
|
1:03:15 1:02:00 ** Note - still matches prior representative value ** |
|
164
|
|
|
|
|
|
|
1:05:59 1:05:00 |
|
165
|
|
|
|
|
|
|
1:06:15 1:05:00 ** Note - matches prior entry ** |
|
166
|
|
|
|
|
|
|
1:07:01 1:07:00 ** Note - since the 1:05 and 1:06 had the same representative ** |
|
167
|
|
|
|
|
|
|
** value then this is considered a new representative time slice ** |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Basically a 60 second threshold will match the current minute and the next minute unless |
|
170
|
|
|
|
|
|
|
the prior minute was seen and then the 60 second threshold matches the current minute and |
|
171
|
|
|
|
|
|
|
the prior minute. |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Example usage: if you have log records for "out of memory" exceptions which may occur multiple |
|
175
|
|
|
|
|
|
|
times because of exception catching and logging then you can distill them all down to a |
|
176
|
|
|
|
|
|
|
single logical event and then count the number of occurrences for a host via: |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
grep "OutOfMemory" logs |\ |
|
179
|
|
|
|
|
|
|
recs-frommultire --re 'host=@([^:]*):' --re 'date=^[A-Za-z]* (.*) GMT ' |\ |
|
180
|
|
|
|
|
|
|
recs-normalizetime --key date --threshold 300 | \ |
|
181
|
|
|
|
|
|
|
recs-collate --perfect --key n_date -a firstrec | \ |
|
182
|
|
|
|
|
|
|
recs-collate --perfect --key firstrec_host -a count=count |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
FULL_HELP |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub usage { |
|
188
|
0
|
|
|
0
|
0
|
|
my $this = shift; |
|
189
|
|
|
|
|
|
|
|
|
190
|
0
|
|
|
|
|
|
my $options = [ |
|
191
|
|
|
|
|
|
|
['key|-k ', 'Single Key field containing the date/time may be a key spec, see \'--help-keyspecs\' for more info'], |
|
192
|
|
|
|
|
|
|
['epoch|-e', 'Assumes date/time field is expressed in epoch seconds (optional, defaults to non-epoch)'], |
|
193
|
|
|
|
|
|
|
['threshold|-n |
|
194
|
|
|
|
|
|
|
['strict|-s', 'Apply strict normalization (defaults to non-strict)'], |
|
195
|
|
|
|
|
|
|
]; |
|
196
|
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
|
my $args_string = $this->options_string($options); |
|
198
|
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
return <
|
|
200
|
|
|
|
|
|
|
Usage: recs-normalizetime [] |
|
201
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
202
|
|
|
|
|
|
|
Given a single key field containing a date/time value this recs processor |
|
203
|
|
|
|
|
|
|
will construct a normalized version of the value and place this new value |
|
204
|
|
|
|
|
|
|
into a field named "n_" (where is the key field appearing in |
|
205
|
|
|
|
|
|
|
the args). |
|
206
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Arguments: |
|
209
|
|
|
|
|
|
|
$args_string |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Examples: |
|
212
|
|
|
|
|
|
|
# Tag records with normalized time in 5 minute buckets from the date field |
|
213
|
|
|
|
|
|
|
... | recs-normalizetime --strict --key date -n 300 |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# Normalize time with fuzzy normalization into 1 minute buckets from the |
|
216
|
|
|
|
|
|
|
# epoch-relative 'time' field |
|
217
|
|
|
|
|
|
|
... | recs-normalizetime --key time -e -n 60 |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
#Get 1 week buckets |
|
220
|
|
|
|
|
|
|
... | recs-normalizetime --key timestamp -n '1 week' |
|
221
|
|
|
|
|
|
|
USAGE |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
1; |