File Coverage

blib/lib/Algorithm/History/Levels.pm

Criterion	Covered	Total	%
statement	99	99	100.0
branch	35	40	87.5
condition	8	10	80.0
subroutine	6	6	100.0
pod	1	1	100.0
total	149	156	95.5

line	stmt	bran	cond	sub	pod	time	code
1							package Algorithm::History::Levels;
2
3							our $DATE = '2017-06-14'; # DATE
4							our $VERSION = '0.001'; # VERSION
5
6	1			1		48750	use 5.010001;
	1					4
7	1			1		5	use strict;
	1					2
	1					22
8	1			1		4	use warnings;
	1					3
	1					24
9
10	1			1		4	use Exporter qw(import);
	1					2
	1					757
11							our @EXPORT_OK = qw(group_histories_into_levels);
12
13							our %SPEC;
14
15							sub _pick_history {
16	81			81		144	my ($histories, $min_time, $max_time) = @_;
17	81					112	for my $i (0..$#{$histories}) {
	81					155
18							#say "D:$histories->[$i][1] between $min_time & $max_time?";
19	265	100	100			927	if ($histories->[$i][1] >= $min_time &&
20							$histories->[$i][1] <= $max_time) {
21	54					122	return splice(@$histories, $i, 1);
22							}
23							}
24	27					46	undef;
25							}
26
27							$SPEC{group_histories_into_levels} = {
28							v => 1.1,
29							summary => 'Group histories into levels',
30							description => <<'_',
31
32							This routine can group a single, linear histories into levels. This is be better
33							explained by an example. Suppose you produce daily database backups. Your backup
34							files are named:
35
36							mydb.2017-06-13.sql.gz
37							mydb.2017-06-12.sql.gz
38							mydb.2017-06-11.sql.gz
39							mydb.2017-06-10.sql.gz
40							mydb.2017-06-09.sql.gz
41							...
42
43							After a while, your backups grow into tens and then hundreds of dump files. You
44							typically want to keep certain number of backups only, for example: 7 daily
45							backups, 4 weekly backups, 6 monthly backups (so you practically have 6 months
46							of history but do not need to store 6*30 = 180 dumps, only 7 + 4 + 6 = 17). This
47							is the routine you can use to select which files to keep and which to discard.
48
49							You provide the list of histories either in the form of Unix timestamps:
50
51							[1497286800, 1497200400, 1497114000, ...]
52
53							or in the form of `[name, timestamp]` pairs, e.g.:
54
55							[
56							['mydb.2017-06-13.sql.gz', 1497286800],
57							['mydb.2017-06-12.sql.gz', 1497200400],
58							['mydb.2017-06-11.sql.gz', 1497114000],
59							...
60							]
61
62							Duplicates of timestamps are allowed, but duplicates of names are not allowed.
63							If list of timestamps are given, the name is assumed to be the timestamp itself
64							and there must not be duplicates.
65
66							Then, you specify the levels with a list of `[period, num-in-this-level]` pairs.
67							For example, 7 daily + 4 weekly + 6 monthly can be specified using:
68
69							[
70							[86400, 7],
71							[7*86400, 4],
72							[30*86400, 6],
73							]
74
75							Subsequent level must have greater period than its previous.
76
77							This routine will return a hash. The `levels` key will contain the history
78							names, grouped into levels. The `discard` key will contain list of history names
79							to discard:
80
81							{
82							levels => [
83
84							# histories for the first level
85							['mydb.2017-06-13.sql.gz',
86							'mydb.2017-06-12.sql.gz',
87							'mydb.2017-06-11.sql.gz',
88							'mydb.2017-06-10.sql.gz',
89							'mydb.2017-06-09.sql.gz',
90							'mydb.2017-06-08.sql.gz',
91							'mydb.2017-06-07.sql.gz'],
92
93							# histories for the second level
94							['mydb.2017-06-06.sql.gz',
95							'mydb.2017-05-30.sql.gz',
96							'mydb.2017-05-23.sql.gz',
97							'mydb.2017-05-16.sql.gz'],
98
99							# histories for the third level
100							['mydb.2017-06-05.sql.gz',
101							'mydb.2017-05-06.sql.gz',
102							'mydb.2017-04-06.sql.gz',
103							...],
104
105							discard => [
106							'mydb.2017-06-04.sql.gz',
107							'mydb.2017-06-03.sql.gz',
108							...
109							],
110							}
111
112							_
113							args => {
114							histories => {
115							schema => ['array*', {
116							of=>['any*', {
117							of=>[
118							'int*',
119							['array', elems=>['str', 'float*']],
120							],
121							}],
122							}],
123							req => 1,
124							},
125							levels => {
126							schema => ['array*', {
127							of => ['array', elems => ['float', 'posint*']],
128							min_len => 1,
129							}],
130							req => 1,
131							},
132							now => {
133							schema => 'int*',
134							},
135							discard_old_histories => {
136							schema => ['bool*'],
137							default => 0,
138							},
139							discard_young_histories => {
140							schema => ['bool*'],
141							default => 0,
142							},
143							},
144							result_naked => 1,
145							};
146							sub group_histories_into_levels {
147	13			13	1	7317	require Array::Sample::Partition;
148
149	13					269	my %args = @_;
150
151	13		33			69	my $now = $args{now} // time();
152
153	13	100				47	my $histories0 = $args{histories} or die "Please specify histories";
154	12					20	my @histories;
155							{
156	12					21	my %seen;
	12					17
157	12					31	for my $h (@$histories0) {
158	103					154	my ($name, $time);
159	103	100				210	if (ref $h eq 'ARRAY') {
160	84					152	($name, $time) = @$h;
161							} else {
162	19					35	$name = $h;
163	19					27	$time = $h;
164							}
165	103	100				278	$seen{$name}++ and die "Duplicate history name '$name'";
166	101					236	push @histories, [$name, $time];
167							}
168							}
169
170	10	100				35	my $levels = $args{levels} or die "Please specify levels";
171	9	100				32	@$levels > 0 or die "Please specify at least one level";
172	8					13	my $i = 0;
173	8					15	my $min_period;
174	8					15	for my $l (@$levels) {
175	20	50				46	ref($l) eq 'ARRAY' or die "Level #$i: not an array";
176	20	100				52	@$l == 2 or die "Level #$i: not a 2-element array";
177	19	50				41	$l->[0] > 0 or die "Level #$i: period must be a positive number";
178	19	50				40	$l->[1] >= 1 or die "Level #$i: number of items must be at least 1";
179	19	100				44	if (defined $min_period) {
180	12	100				37	$l->[0] > $min_period or die "Level #$i: period must be larger than previous ($min_period)";
181							}
182	18					27	$min_period = $l->[0];
183	18					32	$i++;
184							}
185
186							# first, we sort the histories by timestamp (newer first)
187	6					28	@histories = sort { $b->[1] <=> $a->[1] } @histories;
	207					296
188
189							my $res = {
190	6					14	levels => [ map {[]} @$levels],
	17					43
191							discard => [],
192							};
193
194							LEVEL:
195	6					14	for my $l (0..$#{$levels}) {
	6					16
196	17					27	my ($period, $num_per_level) = @{ $levels->[$l] };
	17					38
197
198							# first, fill the level with histories that fit the time frame for each
199							# level's slot
200	17					38	for my $slot (0..$num_per_level-1) {
201	81					135	my $min_time = $now-($slot+1)*$period;
202	81					113	my $max_time = $now-($slot )*$period;
203	81	100				166	if ($l > 0) {
204	39					60	my ($lower_period, $lower_num_per_level) = @{ $levels->[$l-1] };
	39					66
205	39					59	$min_time -= $lower_num_per_level*$lower_period;
206	39					55	$max_time -= $lower_num_per_level*$lower_period;
207							}
208	81					159	my $h = _pick_history(\@histories, $min_time, $max_time);
209	81	100				178	push @{ $res->{levels}[$l] }, $h if $h;
	54					120
210							}
211
212							# if the level is not fully filled yet, fill it with young or old
213							# histories
214	17					25	my $num_filled = @{ $res->{levels}[$l] };
	17					32
215							#say "D:level=$l, num_filled=$num_filled";
216	17	100				54	unless ($num_filled >= $num_per_level) {
217	11					25	my @filler = @histories;
218	11	100	100			43	if ($args{discard_young_histories} // 0) {
219	4					6	my $time = $now-$num_per_level*$period;
220	4	50				12	if ($l > 0) {
221							my ($lower_period, $lower_num_per_level) =
222	4					7	@{ $levels->[$l-1] };
	4					7
223	4					9	$time -= $lower_num_per_level*$lower_period;
224							}
225	4					7	@filler = grep { $_->[1] <= $time }
	30					55
226							@filler;
227							}
228	11	100	100			37	if ($args{discard_old_histories} // 0) {
229	4					8	my $time = $now-$num_per_level*$period;
230	4	50				10	if ($l > 0) {
231							my ($lower_period, $lower_num_per_level) =
232	4					5	@{ $levels->[$l-1] };
	4					9
233	4					7	$time -= $lower_num_per_level*$lower_period;
234							}
235	4					7	@filler = grep { $_->[1] >= $time }
	16					30
236							@filler;
237							}
238	11					39	my @sample = Array::Sample::Partition::sample_partition(
239							\@filler, $num_per_level - $num_filled);
240							$res->{levels}[$l] = [
241	31					56	sort { $b->[1] <=> $a->[1] }
242	11					190	(@{ $res->{levels}[$l] }, @sample),
	11					29
243							];
244	11					30	for my $i (reverse 0..$#histories) {
245	73					126	for my $j (0..$#sample) {
246	88	100				226	if ($histories[$i] eq $sample[$j]) {
247	17					26	splice @histories, $i, 1;
248	17					33	last;
249							}
250							}
251							}
252							}
253
254							# only return names
255	17					32	$res->{levels}[$l] = [ map {$_->[0]} @{ $res->{levels}[$l] } ];
	71					196
	17					37
256							}
257
258	6					15	push @{ $res->{discard} }, $_->[0] for @histories;
	26					55
259
260	6					55	END:
261							$res;
262							}
263
264							1;
265							# ABSTRACT: Group histories into levels
266
267							__END__