line
stmt
bran
cond
sub
pod
time
code
1
#!/usr/bin/perl
2
package Acme::Tools;
3
4
our $VERSION = '0.20';
5
6
33
33
477084
use 5.008; #Perl 5.8 was released July 18th 2002
33
89
7
33
33
131
use strict;
33
45
33
710
8
33
33
100
use warnings;
33
39
33
764
9
33
33
115
use Carp;
33
28
33
8503
10
11
require Exporter;
12
our @ISA = qw(Exporter);
13
our %EXPORT_TAGS = ( all => [ qw() ] );
14
our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } );
15
our @EXPORT = qw(
16
min
17
max
18
mins
19
maxs
20
sum
21
avg
22
geomavg
23
harmonicavg
24
stddev
25
rstddev
26
median
27
percentile
28
$Resolve_iterations
29
$Resolve_last_estimate
30
$Resolve_time
31
resolve
32
resolve_equation
33
conv
34
rank
35
rankstr
36
eqarr
37
sorted
38
sortedstr
39
pushsort
40
pushsortstr
41
binsearch
42
binsearchstr
43
random
44
random_gauss
45
big
46
bigi
47
bigf
48
bigr
49
bigscale
50
nvl
51
repl
52
replace
53
decode
54
decode_num
55
between
56
curb
57
bound
58
log10
59
log2
60
distinct
61
in
62
in_num
63
uniq
64
union
65
union_all
66
minus
67
minus_all
68
intersect
69
intersect_all
70
not_intersect
71
mix
72
zip
73
subhash
74
hashtrans
75
zipb64
76
zipbin
77
unzipb64
78
unzipbin
79
gzip
80
gunzip
81
bzip2
82
bunzip2
83
ipaddr
84
ipnum
85
webparams
86
urlenc
87
urldec
88
ht2t
89
chall
90
makedir
91
qrlist
92
ansicolor
93
ccn_ok
94
KID_ok
95
writefile
96
readfile
97
readdirectory
98
basename
99
dirname
100
wipe
101
username
102
range
103
globr
104
permutations
105
trigram
106
sliding
107
chunks
108
chars
109
cart
110
reduce
111
int2roman
112
roman2int
113
num2code
114
code2num
115
gcd
116
lcm
117
pivot
118
tablestring
119
upper
120
lower
121
trim
122
rpad
123
lpad
124
cpad
125
dserialize
126
serialize
127
srlz
128
cnttbl
129
nicenum
130
bytes_readable
131
sec_readable
132
distance
133
tms
134
easter
135
time_fp
136
timems
137
sleep_fp
138
sleeps
139
sleepms
140
sleepus
141
sleepns
142
eta
143
sys
144
recursed
145
md5sum
146
pwgen
147
read_conf
148
openstr
149
ldist
150
$Re_isnum
151
isnum
152
part
153
parth
154
parta
155
a2h
156
h2a
157
refa
158
refh
159
refs
160
refaa
161
refah
162
refha
163
refhh
164
pushr
165
popr
166
shiftr
167
unshiftr
168
splicer
169
keysr
170
valuesr
171
eachr
172
ed
173
$Edcursor
174
brainfu
175
brainfu2perl
176
brainfu2perl_optimized
177
bfinit
178
bfsum
179
bfaddbf
180
bfadd
181
bfcheck
182
bfgrep
183
bfgrepnot
184
bfdelete
185
bfstore
186
bfretrieve
187
bfclone
188
bfdimensions
189
$PI
190
install_acme_command_tools
191
192
$Dbh
193
dlogin
194
dlogout
195
drow
196
drows
197
drowc
198
drowsc
199
dcols
200
dpk
201
dsel
202
ddo
203
dins
204
dupd
205
ddel
206
dcommit
207
drollback
208
);
209
210
our $PI = '3.141592653589793238462643383279502884197169399375105820974944592307816406286';
211
212
=head1 NAME
213
214
Acme::Tools - Lots of more or less useful subs lumped together and exported into your namespace
215
216
=head1 SYNOPSIS
217
218
use Acme::Tools;
219
220
print sum(1,2,3); # 6
221
print avg(2,3,4,6); # 3.75
222
223
my @list = minus(\@listA, \@listB); # set operations
224
my @list = union(\@listA, \@listB); # set operations
225
226
print length(gzip("abc" x 1000)); # far less than 3000
227
228
writefile("/dir/filename",$string); # convenient
229
my $s=readfile("/dir/filename"); # also conventient
230
231
print "yes!" if between($pi,3,4);
232
233
print percentile(0.05, @numbers);
234
235
my @even = range(1000,2000,2); # even numbers between 1000 and 2000
236
my @odd = range(1001,2001,2);
237
238
my $dice = random(1,6);
239
my $color = random(['red','green','blue','yellow','orange']);
240
241
...and more.
242
243
=encoding utf8
244
245
=head1 ABSTRACT
246
247
About 120 more or less useful perl subroutines lumped together and exported into your namespace.
248
249
=head1 DESCRIPTION
250
251
Subs created and collected since the mid-90s.
252
253
=head1 INSTALLATION
254
255
sudo cpan Acme::Tools
256
sudo cpanm Acme::Tools # after: sudo apt-get install cpanminus make # for Ubuntu 12.04
257
258
=head1 EXPORT
259
260
Almost every sub, about 90 of them.
261
262
Beware of namespace pollution. But what did you expect from an Acme module?
263
264
=head1 NUMBERS
265
266
=head2 num2code
267
268
See L
269
270
=head2 code2num
271
272
C convert numbers (integers) from the normal decimal system to some arbitrary other number system.
273
That can be binary (2), oct (8), hex (16) or others.
274
275
Example:
276
277
print num2code(255,2,"0123456789ABCDEF"); # prints FF
278
print num2code( 14,2,"0123456789ABCDEF"); # prints 0E
279
280
...because 255 are converted to hex FF (base C<< length("0123456789ABCDEF") >> ) which is 2 digits of 0-9 or A-F.
281
...and 14 are converted to 0E, with leading 0 because of the second argument 2.
282
283
Example:
284
285
print num2code(1234,16,"01")
286
287
Prints the 16 binary digits 0000010011010010 which is 1234 converted to binary zeros and ones.
288
289
To convert back:
290
291
print code2num("0000010011010010","01"); #prints 1234
292
293
C can be used to compress numeric IDs to something shorter:
294
295
$chars="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_";
296
print num2code("241274432",5,$chars); # prints EOOv0
297
print code2num("EOOv0",$chars); # prints 241274432
298
299
=cut
300
301
#Math::BaseCnv
302
303
sub num2code {
304
4
4
1
356
my($num,$digits,$validchars,$start)=@_;
305
4
6
my $l=length($validchars);
306
4
5
my $key;
307
33
33
141
no warnings;
33
41
33
160203
308
4
50
14
croak if $num<$start;
309
4
6
$num-=$start;
310
4
9
for(1..$digits){
311
25
33
$key=substr($validchars,$num%$l,1).$key;
312
25
27
$num=int($num/$l);
313
}
314
4
50
13
croak if $num>0;
315
4
14
return $key;
316
}
317
318
sub code2num {
319
1
50
1
1
3
my($code,$validchars,$start)=@_; $start=0 if !defined $start;
1
5
320
1
2
my $l=length($validchars);
321
1
2
my $num=0;
322
1
11
$num=$num*$l+index($validchars,$_) for split//,$code;
323
1
6
return $num+$start;
324
}
325
326
327
=head2 gcd
328
329
I< C<">The Euclidean algorithm (also called Euclid's algorithm) is an
330
algorithm to determine the greatest common divisor (gcd) of two
331
integers. It is one of the oldest algorithms known, since it appeared
332
in the classic Euclid's Elements around 300 BC. The algorithm does not
333
require factoring.C<"> >
334
335
B two or more positive numbers (integers, without decimals that is)
336
337
B an integer
338
339
B
340
341
print gcd(12, 8); # prints 4
342
343
Because the (prime number) factors of 12 is 2 * 2 * 3 and the factors of 8 is 2 * 2 * 2
344
and the common ('overlapping') factors for both 12 and 8 is then 2 * 2 and the result becomes 4.
345
346
B:
347
348
print gcd(90, 135, 315); # prints 45
349
print gcd(2*3*3*5, 3*3*3*5, 3*3*5*7); # prints 45 ( = 3*3*5 which is common to all three args)
350
351
Implementation:
352
353
sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
354
355
One way of putting it: Keep replacing the larger of the two numbers with the difference between them until you got two equal numbers. Then thats the answer.
356
357
L
358
359
L
360
361
=cut
362
363
26
100
26
1
273
sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
26
100
74
364
365
=head2 lcm
366
367
C finds the Least Common Multiple of two or more numbers (integers).
368
369
B two or more positive numbers (integers)
370
371
B an integer number
372
373
Example: C< 2/21 + 1/6 = 4/42 + 7/42 = 11/42>
374
375
Where 42 = lcm(21,6).
376
377
B
378
379
print lcm(45,120,75); # prints 1800
380
381
Because the factors are:
382
383
45 = 2^0 * 3^2 * 5^1
384
120 = 2^3 * 3^1 * 5^1
385
75 = 2^0 * 3^1 * 5^2
386
387
Take the bigest power of each primary number (2, 3 and 5 here).
388
Which is 2^3, 3^2 and 5^2. Multiplied this is 8 * 9 * 25 = 1800.
389
390
sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }
391
392
Seems to works with L as well: (C of all integers from 1 to 200)
393
394
perl -MAcme::Tools -MMath::BigInt -le'print lcm(map Math::BigInt->new($_),1..200)'
395
396
337293588832926264639465766794841407432394382785157234228847021917234018060677390066992000
397
398
=cut
399
400
3
100
3
1
5
sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }
3
10
401
402
=head2 resolve
403
404
Resolves an equation by Newtons method.
405
406
B 1-6 arguments. At least one argument.
407
408
First argument: must be a coderef to a subroutine (a function)
409
410
Second argument: if present, the target, f(x)=target. Default 0.
411
412
Third argument: a start position for x. Default 0.
413
414
Fourth argument: a small delta value. Default 1e-4 (0.0001).
415
416
Fifth argument: a maximum number of iterations before resolve gives up
417
and carps. Default 100 (if fifth argument is not given or is
418
undef). The number 0 means infinite here. If the derivative of the
419
start position is zero or close to zero more iterations are typically
420
needed.
421
422
Sixth argument: A number of seconds to run before giving up. If both
423
fifth and sixth argument is given and > 0, C stops at
424
whichever comes first.
425
426
B returns the number C for C = 0
427
428
...or equal to the second input argument if present.
429
430
B
431
432
The equation C<< x^2 - 4x - 21 = 0 >> has two solutions: -3 and 7.
433
434
The result of C will depend on the start position:
435
436
print resolve(sub{ $_**2 - 4*$_ - 21 }); # -3 with $_ as your x
437
print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 }); # -3 more elaborate call
438
print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 },0,3); # 7 with start position 3
439
print "Iterations: $Acme::Tools::Resolve_iterations\n"; # 3 or larger, about 10-15 is normal
440
441
The variable C< $Acme::Tools::Resolve_iterations > (which is exported) will be set
442
to the last number of iterations C used. Also if C dies (carps).
443
444
The variable C< $Acme::Tools::Resolve_last_estimate > (which is exported) will be
445
set to the last estimate. This number will often be close to the solution and can
446
be used even if C dies (carps).
447
448
B
449
450
If either second, third or fourth argument is an instance of L, so will the result be:
451
452
use Acme::Tools;
453
my $equation = sub{ $_ - 1 - 1/$_ };
454
my $gr1 = resolve( $equation, 0, 1 ); #
455
my $gr2 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
456
bigscale(50);
457
my $gr3 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
458
459
print 1/2 + sqrt(5)/2, "\n";
460
print "Golden ratio 1: $gr1\n";
461
print "Golden ratio 2: $gr2\n";
462
print "Golden ratio 3: $gr3\n";
463
464
Output:
465
466
1.61803398874989
467
Golden ratio 1: 1.61803398874989
468
Golden ratio 2: 1.61803398874989484820458683436563811772029300310882395927211731893236137472439025
469
Golden ratio 3: 1.6180339887498948482045868343656381177203091798057610016490334024184302360920167724737807104860909804
470
471
See:
472
473
L
474
475
L
476
477
L
478
479
=cut
480
481
our $Resolve_iterations;
482
our $Resolve_last_estimate;
483
our $Resolve_time;
484
485
#sub resolve(\[&$]@) {
486
#sub resolve(&@) { <=0.17
487
#todo: perl -MAcme::Tools -le 'print resolve(sub{$_[0]**2-9431**2});print$Acme::Tools::Resolve_iterations'
488
# =>Div by zero: df(x) = 0 at n'th iteration, n=0, delta=0.0001, fx=CODE(0xc81d470) at -e line 1
489
#todo: ren solve?
490
sub resolve {
491
16
16
1
1766
my($f,$goal,$start,$delta,$iters,$sec)=@_;
492
16
100
33
$goal=0 if !defined $goal;
493
16
100
26
$start=0 if !defined $start;
494
16
50
23
$delta=1e-4 if !defined $delta;
495
16
50
22
$iters=100 if !defined $iters;
496
16
100
29
$sec=0 if !defined $sec;
497
16
50
23
$iters=13e13 if $iters==0;
498
16
50
33
65
croak "Iterations ($iters) or seconds ($sec) can not be a negative number" if $iters<0 or $sec<0;
499
16
14
$Resolve_iterations=undef;
500
16
11
$Resolve_last_estimate=undef;
501
16
50
23
croak "Should have at least 1 argument, a coderef" if !@_;
502
16
50
30
croak "First argument should be a coderef" if ref($f) ne 'CODE';
503
504
16
22
my @x=($start);
505
16
100
24
my $time_start=$sec>0?time_fp():undef;
506
16
50
21
my $ds=ref($start) eq 'Math::BigFloat' ? Math::BigFloat->div_scale() : undef;
507
my $fx=sub{
508
559
559
370
local$_=$_[0];
509
559
790
my $fx=&$f($_);
510
559
100
66
2435
if($fx=~/x/ and $fx=~/^[ \(\)\.\d\+\-\*\/x\=\^]+$/){
511
1
8
$fx=~s/(\d)x/$1*x/g;
512
1
3
$fx=~s/\^/**/g;
513
1
2
$fx=~s/^(.*)=(.*)$/($1)-($2)/;
514
1
2
$fx=~s,x,\$_,g;
515
1
141
$f=eval"sub{$fx}";
516
1
16
$fx=&$f($_);
517
}
518
$fx
519
16
42
};
559
675
520
#warn "delta=$delta\n";
521
16
17
my $n=0;
522
16
28
while($n<=$iters-1){
523
182
222
my $fd= &$fx($x[$n]+$delta*0.5) - &$fx($x[$n]-$delta*0.5);
524
182
100
268
$fd = &$fx($x[$n]+$delta*0.7) - &$fx($x[$n]-$delta*0.3) if $fd==0;# and warn"wigle 1\n";
525
182
100
193
$fd = &$fx($x[$n]+$delta*0.2) - &$fx($x[$n]-$delta*0.8) if $fd==0;# and warn"wigle 2\n";
526
182
100
353
croak "Div by zero: df(x) = $x[$n] at n'th iteration, n=$n, delta=$delta, fx=$fx" if $fd==0;
527
181
161
$x[$n+1]=$x[$n]-(&$fx($x[$n])-$goal)/($fd/$delta);
528
181
139
$Resolve_last_estimate=$x[$n+1];
529
#warn "n=$n fd=$fd x=$x[$n+1]\n";
530
181
130
$Resolve_iterations=$n;
531
181
100
100
463
last if $n>3 and $x[$n+1]==$x[$n] and $x[$n]==$x[$n-1];
100
532
180
100
66
523
last if $n>4 and $x[$n]!=0 and abs(1-$x[$n+1]/$x[$n])<1e-13; #sub{3*$_+$_**4-12}
100
533
167
50
66
374
last if $n>3 and ref($x[$n+1]) eq 'Math::BigFloat' and substr($x[$n+1],0,$ds) eq substr($x[$n],0,$ds); #hm
33
534
167
100
66
233
croak "Could not resolve, perhaps too little time given ($sec), iteratons=$n"
535
if $sec>0 and ($Resolve_time=time_fp()-$time_start)>$sec;
536
#warn "$n: ".$x[$n+1]."\n";
537
166
195
$n++;
538
}
539
14
50
22
croak "Could not resolve, perhaps too few iterations ($iters)" if @x>=$iters;
540
14
113
return $x[-1];
541
}
542
543
=head2 resolve_equation
544
545
This prints 2:
546
547
print resolve_equation "x + 13*(3-x) = 17 - x"
548
549
A string containing at least one x is converted into a perl function.
550
Then x is found by using L. The string conversion is done by
551
replacing every x with $_ and if a C< = > char is present it converts
552
C< leftside = rightside > into C< (leftside) - (rightside) = 0 > which
553
is the default behaviour of L.
554
555
=cut
556
557
1
1
1
2
sub resolve_equation { my $e=shift;resolve(sub{$e},@_)}
1
1
2
1
5
558
559
=head2 conv
560
561
Converts between:
562
563
=over 4
564
565
=item * units of measurement
566
567
=item * number systems
568
569
=item * currencies
570
571
=back
572
573
B
574
575
print conv( 2000, "meters", "miles" ); #prints 1.24274238447467
576
print conv( 2.1, 'km', 'm'); #prints 2100
577
print conv( 70,"cm","in"); #prints 27.5590551181102
578
print conv( 4,"USD","EUR"); #prints 3.20481552905431 (depending on todays rates)
579
print conv( 4000,"b","kb"); #prints 3.90625 (1 kb = 1024 bytes)
580
print conv( 4000,"b","Kb"); #prints 4 (1 Kb = 1000 bytes)
581
print conv( 1000,"mb","kb"); #prints 1024000
582
print conv( 101010,"bin","roman"); #prints XLII
583
print conv( "DCCXLII","roman","oct"); #prints 1346
584
585
B are:>
586
587
Note: units starting with the symbol _ means that all metric
588
prefixes from yocto 10^-24 to yotta 10^+24 is supported, so _m means
589
km, mm, cm, µm and so on. And _N means kN, MN GN and so on.
590
591
Note2: Many units have synonyms: m, meter, meters ...
592
593
acceleration: g, g0, m/s2, mps2
594
595
angle: binary_degree, binary_radian, brad, deg, degree, degrees,
596
gon, grad, grade, gradian, gradians, hexacontade, hour,
597
new_degree, nygrad, point, quadrant, rad, radian, radians,
598
sextant, turn
599
600
area: a, ar, are, ares, bunder, ca, centiare, cho, cm2,
601
daa, decare, decares, deciare, dekar,
602
djerib, m2, dunam, dönüm, earths, feddan, ft2, gongqing, ha
603
ha, hectare, hectares, hektar, jerib, km2, m2, manzana,
604
mi2, mm2, mu, qing, rai, sotka,
605
sqcm, sqft, sqkm, sqm, sqmi, sqmm
606
stremmata, um2, µm2
607
608
bytes: Eb, Gb, Kb, KiB, Mb, Pb, Tb, Yb, Zb, b, byte,
609
kb, kilobyte, mb, megabyte,
610
gb, gigabyte, tb, terabyte,
611
pb, petabyte, eb, exabyte,
612
zb, zettabyte, yb, yottabyte
613
614
charge: As, C, _e, coulomb, e
615
616
current: A, _A, N/m2
617
618
energy: BTU, Btu, J, Nm, W/s, Wh, Wps, Ws, _J, _eV,
619
cal, calorie, calories, eV, electronvolt, BeV,
620
erg, ergs, foot-pound, foot-pounds, ftlb, joule, kWh,
621
kcal, kilocalorie, kilocalories,
622
newtonmeter, newtonmeters, th, thermie
623
624
force: N, _N, dyn, dyne, dynes, lb, newton
625
626
length: NM, _m, _pc, astronomical unit, au, chain, ft, furlong,
627
in, inch, inches, km, league, lightyear, ls, ly,
628
m, meter, meters, mi, mil, mile, miles,
629
nautical mile, nautical miles, nmi,
630
parsec, pc, planck, yard, yard_imperical, yd, Å, ångstrøm
631
632
mass: Da, _eV, _g, bag, carat, ct, dwt, eV, electronvolt, g,
633
grain, grains, gram, grams, kilo, kilos, kt, lb, lb_av,
634
lb_t, lb_troy, lbs, ounce, ounce_av, ounce_troy, oz, oz_av, oz_t,
635
pennyweight, pound, pound_av, pound_metric, pound_troy, pounds,
636
pwt, seer, sl, slug, solar_mass, st, stone, t, tonn, tonne, tonnes, u, wey
637
638
mileage: mpg, l/100km, l/km, l/10km, lp10km, l/mil, liter_pr_100km, liter_pr_km, lp100km
639
640
money: AED, ARS, AUD, BGN, BHD, BND, BRL, BWP, CAD, CHF, CLP, CNY,
641
COP, CZK, DKK, EUR, GBP, HKD, HRK, HUF, IDR, ILS, INR, IRR,
642
ISK, JPY, KRW, KWD, KZT, LKR, LTL, LVL, LYD, MUR, MXN, MYR,
643
NOK, NPR, NZD, OMR, PHP, PKR, PLN, QAR, RON, RUB, SAR, SEK,
644
SGD, THB, TRY, TTD, TWD, USD, VEF, ZAR, BTC, LTC, mBTC, XBT
645
Currency rates are automatically updated from the net
646
at least every 24h since last update (on linux/cygwin).
647
648
numbers: dec, hex, bin, oct, roman, dozen, doz, dz, dusin, gross, gro,
649
gr, great_gross, small_gross (not supported: decimal numbers)
650
651
power: BTU, BTU/h, BTU/s, BTUph, GWhpy, J/s, Jps, MWhpy, TWhpy,
652
W, Whpy, _W, ftlb/min, ftlb/s, hk, hp, kWh/yr, kWhpy
653
654
pressure: N/m2, Pa, _Pa, at, atm, bar, mbar, pascal, psi, torr
655
656
radioactivity: Bq, becquerel, curie
657
658
speed: _m/s, km/h, km/t, kmh, kmph, kmt, m/s, mi/h, mph, mps,
659
kn, knot, knots, kt, mach, machs, c, fps, ft/s, ftps
660
661
temperature: C, F, K, celsius, fahrenheit, kelvin
662
663
time: _s, biennium, century, d, day, days, decade, dy, fortnight,
664
h, hour, hours, hr, indiction, jubilee, ke, lustrum, m,
665
millennium, min, minute, minutes, mo, moment, mon, month,
666
olympiad, quarter, s, season, sec, second, seconds, shake,
667
tp, triennium, w, week, weeks, y, y365, ySI, ycommon,
668
year, years, ygregorian, yjulian, ysideral, ytropical
669
670
volume: l, L, _L, _l, cm3, m3, ft3, in3, liter, liters, litre, litres,
671
gal, gallon, gallon_imp, gallon_uk, gallon_us, gallons,
672
pint, pint_imp, pint_uk, pint_us, tsp, tablespoon, teaspoon,
673
floz, floz_uk, therm, thm, fat, bbl, Mbbl, MMbbl, drum,
674
container (or container20), container40, container40HC, container45HC
675
676
See: L
677
678
=cut
679
680
#TODO: @arr2=conv(\@arr1,"from","to") # should be way faster than:
681
#TODO: @arr2=map conv($_,"from","to"),@arr1
682
#TODO: conv(123456789,'b','h'); # h converts to something human-readable
683
684
our %conv=(
685
length=>{
686
m => 1,
687
_m => 1,
688
meter => 1,
689
meters => 1,
690
km => 1000,
691
mil => 10000, #scandinavian #also: inch/1000!
692
in => 0.0254,
693
inch => 0.0254,
694
inches => 0.0254,
695
ft => 0.0254*12, #0.3048 m
696
feet => 0.0254*12, #0.3048 m
697
yd => 0.0254*12*3, #0.9144 m
698
yard => 0.0254*12*3, #0.9144 m
699
yards => 0.0254*12*3, #0.9144 m
700
fathom => 0.0254*12*3*2, #1.8288 m
701
fathoms => 0.0254*12*3*2, #1.8288 m
702
chain => 0.0254*12*3*22, #20.1168 m
703
chains => 0.0254*12*3*22, #20.1168 m
704
furlong => 0.0254*12*3*22*10, #201.168 m
705
furlongs=> 0.0254*12*3*22*10, #201.168 m
706
mi => 0.0254*12*3*22*10*8, #1609.344 m
707
mile => 0.0254*12*3*22*10*8, #1609.344 m
708
miles => 0.0254*12*3*22*10*8,
709
league => 0.0254*12*3*22*10*8*3, #4828.032 m
710
leagues => 0.0254*12*3*22*10*8*3, #4828.032 m
711
yard_imp => 0.914398416,
712
yard_imperical => 0.914398416,
713
NM => 1852, #nautical mile
714
nmi => 1852, #nautical mile
715
'nautical mile' => 1852,
716
'nautical miles' => 1852,
717
'Å' => 1e-10,
718
'ångstrøm' => 1e-10,
719
ly => 299792458*3600*24*365.25,
720
lightyear => 299792458*3600*24*365.25, # = 9460730472580800 by def
721
ls => 299792458, #light-second
722
pc => 3.0857e16, #3.26156 ly
723
_pc => 3.0857e16, #3.26156 ly
724
parsec => 3.0857e16,
725
au => 149597870700, # by def, earth-sun
726
'astronomical unit' => 149597870700,
727
planck => 1.61619997e-35, #planck length
728
#Norwegian (old) lengths:
729
tomme => 0.0254,
730
tommer => 0.0254,
731
fot => 0.0254*12, #0.3048m
732
alen => 0.0254*12*2, #0.6096m
733
favn => 0.0254*12*2*3, #1.8288m
734
kvart => 0.0254*12*2/4, #0.1524m a quarter alen
735
},
736
mass =>{ #https://en.wikipedia.org/wiki/Unit_conversion#Mass
737
g => 1,
738
_g => 1,
739
gram => 1,
740
grams => 1,
741
kilo => 1000,
742
kilos => 1000,
743
t => 1000000,
744
tonn => 1000000,
745
tonne => 1000000,
746
tonnes => 1000000,
747
seer => 933.1,
748
lb => 453.59237,
749
lbs => 453.59237,
750
lb_av => 453.59237,
751
lb_t => 373.2417216, #5760 grains
752
lb_troy => 373.2417216,
753
pound => 453.59237,
754
pounds => 453.59237,
755
pound_av => 453.59237,
756
pound_troy => 373.2417216,
757
pound_metric => 500,
758
ounce => 28, # US food, 28g
759
ounce_av => 453.59237/16, # avoirdupois lb/16 = 28.349523125g
760
ounce_troy => 31.1034768, # lb_troy / 12
761
oz => 28, # US food, 28g
762
oz_av => 453.59237/16, # avoirdupois lb/16 = 28.349523125g
763
oz_t => 31.1034768, # lb_troy / 12,
764
grain => 64.79891/1000, # 453.59237/7000
765
grains => 64.79891/1000,
766
pennyweight => 31.1034768 / 20,
767
pwt => 31.1034768 / 20,
768
dwt => 31.1034768 / 20,
769
st => 6350.29318, # 14 lb_av
770
stone => 6350.29318,
771
wey => 114305.27724, # 252 lb = 18 stone
772
carat => 0.2,
773
ct => 0.2, #carat (metric)
774
kt => 64.79891/1000 * (3+1/6), #carat/karat
775
u => 1.66053892173e-30, #atomic mass carbon-12
776
Da => 1.66053892173e-30, #atomic mass carbon-12
777
slug => 14600,
778
sl => 14600,
779
eV => 1.78266172802679e-33, #e=mc2 = 1.60217646e-19 J / (2.99792458e8 m/s)**2
780
_eV => 1.78266172802679e-33,
781
electronvolt => 1.78266172802679e-33,
782
'solar mass' => 1.99e33,
783
solar_mass => 1.99e33,
784
bag => 60*1000, #60kg coffee
785
},
786
area =>{ # https://en.wikipedia.org/wiki/Unit_conversion#Area
787
m2 => 1,
788
dm2 => 0.1**2,
789
cm2 => 0.01**2,
790
mm2 => 0.001**2,
791
'µm2' => 1e-6**2,
792
um2 => 1e-6**2,
793
sqm => 1,
794
sqcm => 0.01**2,
795
sqmm => 0.001**2,
796
km2 => 1000**2,
797
sqkm => 1000**2,
798
a => 100,
799
ar => 100,
800
are => 100,
801
ares => 100,
802
dekar => 1000,
803
decare => 1000,
804
decares => 1000,
805
daa => 1000,
806
'mål' => 1000,
807
ha => 10000,
808
hektar => 10000,
809
hectare => 10000,
810
hectares=> 10000,
811
ft2 => (0.0254*12)**2,
812
sqft => (0.0254*12)**2,
813
mi2 => 1609.344**2,
814
sqmi => 1609.344**2,
815
yd2 => (0.0254*12*3)**2, #square yard
816
sqyd => (0.0254*12*3)**2,
817
yard2 => (0.0254*12*3)**2,
818
sqyard => (0.0254*12*3)**2,
819
rood => 1210*(0.0254*12*3)**2, # 1/4 acres
820
roods => 1210*(0.0254*12*3)**2, # 1/4 acres
821
ac => 4840*(0.0254*12*3)**2, # 4840 square yards = 1 chain x 1 furlong
822
acre => 4840*(0.0254*12*3)**2,
823
acres => 4840*(0.0254*12*3)**2,
824
homestead => 4840*(0.0254*12)**2 *160, #160 acres US Surveyors or 1/4 sqmiles
825
township => 4840*(0.0254*12)**2 *160*144, #144 homesteads or 36 sqmiles
826
perches => 4840*(0.0254*12)**2 /160, #160 perches = 1 acre in sri lanka
827
sotka => 100, #russian are
828
jerib => 10000, #iran hectare
829
djerib => 10000, #turkish hectare
830
gongqing => 10000, #chinese hectare
831
manzana => 10000, #argentinian hectare
832
bunder => 10000, #dutch hectare
833
centiare => 1,
834
deciare => 10,
835
ca => 1,
836
mu => 10000/15, #China
837
qing => 10000/0.15, #China
838
dunam => 10000/10, #Middle East
839
'dönüm' => 10000/10, #Middle East
840
stremmata => 10000/10, #Greece
841
rai => 10000/6.25, #Thailand
842
cho => 10000/1.008, #Japan
843
feddan => 10000/2.381, #Egypt
844
earths => 510072000e6, #510072000 km2, surface area of earth
845
barn => 1e-28, #physics
846
outhouse => 1e-34, #physics
847
shed => 1e-52, #physics
848
},
849
volume=>{
850
l => 1,
851
L => 1,
852
_L => 1,
853
_l => 1,
854
liter => 1,
855
liters => 1,
856
litre => 1,
857
litres => 1,
858
gal => 231*2.54**3/1000, #3.785411784, #231 cubic inches
859
gallon => 231*2.54**3/1000,
860
gallons => 231*2.54**3/1000,
861
gallon_us => 231*2.54**3/1000,
862
gallon_uk => 4.54609,
863
gallon_imp=> 4.54609,
864
gallon_us_dry => 4.40488377086, # ~ 9.25**2*pi*2.54**3/1000 L
865
m3 => 10**3, #1000 L
866
cm3 => 0.1**3, #0.001 L
867
in3 => 0.254**3, #0.016387064 L
868
ft3 => (0.254*12)**3,
869
tablespoon=> 3.785411784/256, #14.78676478125 mL
870
tsp => 3.785411784/256/3, #4.92892159375 mL
871
teaspoon => 3.785411784/256/3, #4.92892159375 mL
872
floz => 3.785411784/128, #fluid ounce US
873
floz_uk => 4.54609/160, #fluid ounce UK
874
pint => 4.54609/8, #0.56826125 L
875
pint_uk => 4.54609/8,
876
pint_imp => 4.54609/8,
877
pint_us => 3.785411784/8, #0.473176473
878
therm => 2.74e3, #? 100000BTUs? (!= thermie)
879
thm => 2.74e3, #? (!= th)
880
fat => 42*231*2.54**3/1000,
881
bbl => 42*231*2.54**3/1000, #oil barrel ~159 liters https://en.wikipedia.org/wiki/Barrel_(unit)
882
Mbbl => 42*231*2.54**3, #mille (thousand) oil barrels
883
MMbbl => 42*231*2.54**3*1000, #mille mille (million) oil barrels
884
drum => 200,
885
container => 33.1e3, #container20
886
container20 => 33.1e3,
887
container40 => 67.5e3,
888
container40HC => 75.3e3,
889
container45HC => 86.1e3,
890
#Norwegian:
891
meterfavn => 2 * 2 * 0.6, #ved 2.4 m3
892
storfavn => 2 * 2 * 3, #ved 12 m3
893
},
894
time =>{
895
s => 1,
896
_s => 1,
897
sec => 1,
898
second => 1,
899
seconds => 1,
900
m => 60,
901
min => 60,
902
minute => 60,
903
minutes => 60,
904
h => 60*60,
905
hr => 60*60,
906
hour => 60*60,
907
hours => 60*60,
908
d => 60*60*24,
909
dy => 60*60*24,
910
day => 60*60*24,
911
days => 60*60*24,
912
w => 60*60*24*7,
913
week => 60*60*24*7,
914
weeks => 60*60*24*7,
915
mo => 60*60*24 * 365.2425/12,
916
mon => 60*60*24 * 365.2425/12,
917
month => 60*60*24 * 365.2425/12,
918
quarter => 60*60*24 * 365.2425/12 * 3, #3 months
919
season => 60*60*24 * 365.2425/12 * 3, #3 months
920
y => 60*60*24 * 365.2425, # 365+97/400 #97 leap yers in 400 years
921
year => 60*60*24 * 365.2425,
922
years => 60*60*24 * 365.2425,
923
yjulian => 60*60*24 * 365.25, # 365+1/4
924
y365 => 60*60*24 * 365, # finance/science
925
ycommon => 60*60*24 * 365, # finance/science
926
ygregorian => 60*60*24 * 365.2425, # 365+97/400
927
#ygaussian => 365+(6*3600+9*60+56)/(24*3600), # 365+97/400
928
ytropical => 60*60*24 * 365.24219,
929
ysideral => 365.256363004,
930
ySI => 60*60*24*365.25, #31556925.9747
931
decade => 10 * 60*60*24*365.2425,
932
biennium => 2 * 60*60*24*365.2425,
933
triennium => 3 * 60*60*24*365.2425,
934
olympiad => 4 * 60*60*24*365.2425,
935
lustrum => 5 * 60*60*24*365.2425,
936
indiction => 15 * 60*60*24*365.2425,
937
jubilee => 50 * 60*60*24*365.2425,
938
century => 100 * 60*60*24*365.2425,
939
millennium => 1000 * 60*60*24*365.2425,
940
shake => 1e-8,
941
moment => 3600/40, #1/40th of an hour, used by Medieval Western European computists
942
ke => 864, #1/100th of a day, trad Chinese, 14m24s
943
fortnight => 14*24*3600,
944
tp => 5.3910632e-44, #planck time, time for ligth to travel 1 planck length
945
},
946
speed=>{
947
'm/s' => 1,
948
'_m/s' => 1,
949
mps => 1,
950
mph => 1609.344/3600,
951
'mi/h' => 1609.344/3600,
952
kmh => 1/3.6,
953
kmph => 1/3.6,
954
'km/h' => 1/3.6,
955
kmt => 1/3.6, # t=time or temps (scandinavian and french and dutch)
956
'km/t' => 1/3.6,
957
kn => 1852/3600,
958
kt => 1852/3600,
959
knot => 1852/3600,
960
knop => 1852/3600, #scandinavian
961
knots => 1852/3600,
962
c => 299792458, #speed of light
963
mach => 340.3, #speed of sound
964
machs => 340.3,
965
fps => 0.3048, #0.0254*12
966
ftps => 0.3048,
967
'ft/s' => 0.3048,
968
},
969
acceleration=>{
970
'm/s2' => 1,
971
'mps2' => 1,
972
g => 9.80665,
973
g0 => 9.80665,
974
#0-100kmh or ca 0-60 mph x seconds...
975
},
976
temperature=>{ #http://en.wikipedia.org/wiki/Temperature#Conversion
977
C=>1, F=>1, K=>1, celsius=>1, fahrenheit=>1, kelvin=>1
978
},
979
radioactivity=>{
980
Bq => 1,
981
becquerel => 1,
982
curie => 3.7e10,
983
},
984
current=> {
985
A => 1,
986
_A => 1,
987
'N/m2' => 2e-7,
988
},
989
charge=>{
990
e => 1,
991
_e => 1,
992
C => 6.24150964712042e+18,
993
coulomb => 6.24150964712042e+18,
994
As => 6.24150964712042e+18,
995
#Faraday unit of charge ???
996
},
997
power=> {
998
W => 1,
999
_W => 1,
1000
'J/s' => 1,
1001
Jps => 1,
1002
hp => 746,
1003
hk => 746, #hestekrefter (norwegian, scandinavian)
1004
PS => 746/1.014, #pferdestärken
1005
'kWh/yr' => 1000 * 3600/(24*365), #kWh annually
1006
Whpy => 3600/(24*365), #kWh annually
1007
kWhpy => 1000 * 3600/(24*365), #kWh annually
1008
MWhpy => 1000**2 * 3600/(24*365), #kWh annually
1009
GWhpy => 1000**3 * 3600/(24*365), #kWh annually
1010
TWhpy => 1000**4 * 3600/(24*365), #kWh annually
1011
BTU => 1055.05585262/3600, #
1012
BTUph => 1055.05585262/3600,
1013
'BTU/h' => 1055.05585262/3600,
1014
'BTU/s' => 1055.05585262,
1015
'ftlb/s' => 746/550,
1016
'ftlb/min'=> 746/550/60,
1017
},
1018
energy=>{
1019
joule => 1,
1020
J => 1,
1021
_J => 1,
1022
Ws => 1,
1023
Wps => 1,
1024
'W/s' => 1,
1025
Nm => 1,
1026
newtonmeter => 1,
1027
newtonmeters => 1,
1028
Wh => 3600,
1029
kWh => 3600000, #3.6 million J
1030
cal => 4.1868, # ~ 3600/860
1031
calorie => 4.1868,
1032
calories => 4.1868,
1033
kcal => 4.1868*1000,
1034
kilocalorie => 4.1868*1000,
1035
kilocalories => 4.1868*1000,
1036
BTU => 4.1868 * 252, # = 1055.0736 or is 1055.05585262 right?
1037
Btu => 4.1868 * 252,
1038
ftlb => 746/550, # ~ 1/0.7375621
1039
'foot-pound' => 746/550,
1040
'foot-pounds' => 746/550,
1041
erg => 1e-7,
1042
ergs => 1e-7,
1043
eV => 1.60217656535e-19,
1044
_eV => 1.60217656535e-19,
1045
BeV => 1.60217656535e-10,
1046
electronvolt => 1.60217656535e-19,
1047
thermie => 4.1868e6,
1048
th => 4.1868e6,
1049
hph => 3600*746,
1050
PSh => 3600*746/1.014,
1051
galatm_imp => 460.63256925,
1052
galatm_US => 383.5568490138,
1053
quad => 1.05505585262e18,
1054
Ry => 2.179872e-18,
1055
rydberg => 2.179872e-18,
1056
th => 4.1868e6,
1057
thermie => 4.1868e6,
1058
boe => 6.12e9, #barrel of oil equivalent
1059
TCE => 29.288e9, #ton of coal equivalent
1060
toe => 41.868e9, #tonne of oil equivalent
1061
tTNT => 4.184e9, #ton of TNT equivalent
1062
},
1063
force=> {
1064
newton=> 1,
1065
N => 1,
1066
_N => 1,
1067
dyn => 1e-5,
1068
dyne => 1e-5,
1069
dynes => 1e-5,
1070
lb => 4.448222,
1071
},
1072
pressure=>{
1073
Pa => 1,
1074
_Pa => 1,
1075
pascal => 1,
1076
'N/m2' => 1,
1077
bar => 100000.0,
1078
mbar => 100.0,
1079
at => 98066.5, #technical atmosphere
1080
atm => 101325.0, #standard atmosphere
1081
torr => 133.3224,
1082
psi => 6894.8, #pounds per square inch
1083
},
1084
bytes=> {
1085
b => 1,
1086
kb => 1024, #2**10
1087
mb => 1024**2, #2**20 = 1048576
1088
gb => 1024**3, #2**30 = 1073741824
1089
tb => 1024**4, #2**40 = 1099511627776
1090
pb => 1024**5, #2**50 = 1.12589990684262e+15
1091
eb => 1024**6, #2**60 =
1092
zb => 1024**7, #2**70 =
1093
yb => 1024**8, #2**80 =
1094
KiB => 1024, #2**10
1095
KiB => 1024**2, #2**20 = 1048576
1096
KiB => 1024**3, #2**30 = 1073741824
1097
KiB => 1024**4, #2**40 = 1099511627776
1098
KiB => 1024**5, #2**50 = 1.12589990684262e+15
1099
KiB => 1024**6, #2**60 =
1100
KiB => 1024**7, #2**70 =
1101
KiB => 1024**8, #2**80 =
1102
Kb => 1000, #2**10
1103
Mb => 1000**2, #2**20 = 1048576
1104
Gb => 1000**3, #2**30 = 1073741824
1105
Tb => 1000**4, #2**40 = 1099511627776
1106
Pb => 1000**5, #2**50 = 1.12589990684262e+15
1107
Eb => 1000**6, #2**60 =
1108
Zb => 1000**7, #2**70 =
1109
Yb => 1000**8, #2**80 =
1110
byte => 1,
1111
kilobyte => 1024, #2**10
1112
megabyte => 1024**2, #2**20 = 1048576
1113
gigabyte => 1024**3, #2**30 = 1073741824
1114
terabyte => 1024**4, #2**40 = 1099511627776
1115
petabyte => 1024**5, #2**50 = 1.12589990684262e+15
1116
exabyte => 1024**6, #2**60 =
1117
zettabyte => 1024**7, #2**70 =
1118
yottabyte => 1024**8, #2**80 =
1119
},
1120
milage=>{ #fuel consumption
1121
'l/mil' => 1,
1122
'l/10km' => 1,
1123
'lp10km' => 1,
1124
'l/km' => 10,
1125
'l/100km' => 1/10,
1126
lp100km => 1/10,
1127
liter_pr_100km => 1/10,
1128
liter_pr_km => 10,
1129
mpg => -23.5214584, #negative signals inverse
1130
},
1131
# light=> {
1132
# cd => 1,
1133
# candela => 1,
1134
# },
1135
# lumens
1136
# lux
1137
angle =>{
1138
turn => 1,
1139
rad => 1/(2*$PI), # 2 * pi
1140
radian => 1/(2*$PI), # 2 * pi
1141
radians => 1/(2*$PI), # 2 * pi
1142
deg => 1/360, # 4 * 90
1143
degree => 1/360, # 4 * 90
1144
degrees => 1/360, # 4 * 90
1145
grad => 1/400,
1146
gradian => 1/400,
1147
gradians => 1/400,
1148
grade => 1/400, #french revolutionary unit
1149
gon => 1/400,
1150
new_degree => 1/400,
1151
nygrad => 1/400, #scandinavian
1152
quadrant => 1/4,
1153
sextant => 1/6,
1154
hour => 1/24,
1155
point => 1/32, #used in navigation
1156
hexacontade => 1/60,
1157
binary_degree => 1/256,
1158
binary_radian => 1/256,
1159
brad => 1/256,
1160
},
1161
money =>{ # rates at dec 17 2015
1162
AED => 2.389117, #
1163
ARS => 0.895122, #
1164
AUD => 6.253619, #
1165
BGN => 4.847575, #
1166
BHD => 23.267384, #
1167
BND => 6.184624, #
1168
BRL => 2.260703, #
1169
BTC => 3910.932213547, #bitcoin
1170
BWP => 0.794654, #
1171
CAD => 6.289957, #
1172
CHF => 8.799974, #
1173
CLP => 0.012410, #
1174
CNY => 1.353406, #
1175
COP => 0.00262229, #
1176
CZK => 0.351171, #
1177
DKK => 1.271914, #
1178
EUR => 9.489926, #
1179
GBP => 13.069440, #
1180
HKD => 1.131783, #
1181
HRK => 1.240878, #
1182
HUF => 0.029947, #
1183
IDR => 0.00062471, #
1184
ILS => 2.254456, #
1185
INR => 0.132063, #
1186
IRR => 0.00029370, #
1187
ISK => 0.067245, #
1188
JPY => 0.071492, #
1189
KRW => 0.00739237, #
1190
KWD => 28.862497, #
1191
KZT => 0.027766, #
1192
LKR => 0.061173, #
1193
LTC => 31.78895354018, #litecoin
1194
LTL => 2.748472, #
1195
LVL => 13.503025, #
1196
LYD => 6.296978, #
1197
MUR => 0.240080, #
1198
MXN => 0.515159, #
1199
MYR => 2.032465, #
1200
NOK => 1.000000000, #norwegian kroner
1201
NPR => 0.084980, #
1202
NZD => 5.878331, #
1203
OMR => 22.795994, #
1204
PHP => 0.184839, #
1205
PKR => 0.083779, #
1206
PLN => 2.207243, #
1207
QAR => 2.409162, #
1208
RON => 2.101513, #
1209
RUB => 0.122991, #
1210
SAR => 2.339745, #
1211
SEK => 1.023591, #
1212
SGD => 6.184624, #
1213
THB => 0.242767, #
1214
TRY => 2.994338, #
1215
TTD => 1.374484, #
1216
TWD => 0.265806, #
1217
USD => 8.774159, #
1218
VEF => 1.395461, #
1219
ZAR => 0.576487, #
1220
XBT => 3910.932213547, # bitcoin
1221
mBTC => 3910.932213547, # bitcoin
1222
mXBT => 3910.932213547, # bitcoin
1223
},
1224
numbers =>{
1225
dec=>1,hex=>1,bin=>1,oct=>1,roman=>1, des=>1,#des: spelling error in v0.15-0.16
1226
dusin=>1,dozen=>1,doz=>1,dz=>1,gross=>144,gr=>144,gro=>144,great_gross=>12*144,small_gross=>10*12,
1227
}
1228
);
1229
our $conv_prepare_time=0;
1230
our $conv_prepare_money_time=0;
1231
sub conv_prepare {
1232
1
1
0
10
my %b =(da =>1e+1, h =>1e+2, k =>1e+3, M =>1e+6, G =>1e+9, T =>1e+12, P =>1e+15, E =>1e+18, Z =>1e+21, Y =>1e+24, H =>1e+27);
1233
1
8
my %big =(deca=>1e+1, hecto=>1e+2, kilo =>1e+3, mega =>1e+6, giga=>1e+9, tera=>1e+12, peta =>1e+15, exa =>1e+18, zetta=>1e+21, yotta=>1e+24, hella=>1e+27);
1234
1
7
my %s =(d =>1e-1, c =>1e-2, m =>1e-3,'µ' =>1e-6, u=>1e-6, n =>1e-9, p =>1e-12, f =>1e-15, a =>1e-18, z =>1e-21, y =>1e-24);
1235
1
6
my %small=(deci=>1e-1, centi=>1e-2, milli=>1e-3, micro =>1e-6, nano=>1e-9, pico=>1e-12, femto=>1e-15, atto=>1e-18, zepto=>1e-21, yocto=>1e-24);
1236
# myria=> 10000 #obsolete
1237
# demi => 1/2, double => 2 #obsolete
1238
# lakh => 1e5, crore => 1e7 #south asian
1239
1
12
my %x = (%s,%b);
1240
1
6
for my $type (keys%conv) {
1241
20
13
for(grep/^_/,keys%{$conv{$type}}) {
20
214
1242
15
14
my $c=$conv{$type}{$_};
1243
15
14
delete$conv{$type}{$_};
1244
15
16
my $unit=substr($_,1);
1245
15
319
$conv{$type}{$_.$unit}=$x{$_}*$c for keys%x;
1246
}
1247
}
1248
1
13
$conv_prepare_time=time();
1249
}
1250
1251
our $Currency_rates_url = 'http://calthis.com/currency-rates';
1252
our $Currency_rates_expire = 6*3600;
1253
sub conv_prepare_money {
1254
0
0
0
0
eval {
1255
0
0
require LWP::Simple;
1256
0
0
0
my $td=$^O=~/^(?:linux|cygwin)$/?"/tmp":"/tmp"; #hm wrong!
1257
0
0
my $fn="$td/acme-tools-currency-rates.data";
1258
0
0
0
0
if( !-e$fn or time() - (stat($fn))[9] >= $Currency_rates_expire){
1259
0
0
LWP::Simple::getstore($Currency_rates_url,"$fn.$$.tmp"); # get ... see getrates.cmd
1260
0
0
0
die "nothing downloaded" if !-s"$fn.$$.tmp";
1261
0
0
rename "$fn.$$.tmp",$fn;
1262
0
0
chmod 0666,$fn;
1263
}
1264
0
0
my $d=readfile($fn);
1265
0
0
my %r=$d=~/^\s*([A-Z]{3}) +(\d+\.\d+)\b/gm;
1266
0
0
$r{lc($_)}=$r{$_} for keys%r;
1267
#warn serialize([minus([sort keys(%r)],[sort keys(%{$conv{money}})])],'minus'); #ARS,AED,COP,BWP,LVL,BHD,NPR,LKR,QAR,KWD,LYD,SAR,KZT,CLP,IRR,VEF,TTD,OMR,MUR,BND
1268
#warn serialize([minus([sort keys(%{$conv{money}})],[sort keys(%r)])],'minus'); #LTC,I44,BTC,BYR,TWI,NOK,XDR
1269
0
0
0
$conv{money}={%{$conv{money}},%r} if keys(%r)>20;
0
0
1270
};
1271
0
0
0
carp "conv: conv_prepare_money (currency conversion automatic daily updated rates) - $@\n" if $@;
1272
0
0
$conv{money}{"m$_"}=$conv{money}{$_}/1000 for qw/BTC XBT/;
1273
0
0
$conv_prepare_money_time=time();
1274
0
0
1; #not yet
1275
}
1276
1277
sub conv {
1278
43
43
1
6692
my($num,$from,$to)=@_;
1279
43
50
80
croak "conf requires 3 args" if @_!=3;
1280
43
100
78
conv_prepare() if !$conv_prepare_time;
1281
43
86
116
my $types=sub{ my $unit=shift; [sort grep$conv{$_}{$unit}, keys%conv] };
86
61
86
913
1282
43
33
57
my @types=map{ my $ru=$_; my $r;$r=&$types($_) and @$r and $$ru=$_ and last for ($$ru,uc($$ru),lc($$ru)); $r }(\$from,\$to);
86
33
61
86
50
50
86
147
86
157
1283
43
44
my @err=map "Unit ".[$from,$to]->[$_]." is unknown",grep!@{$types[$_]},0..1;
86
91
1284
43
58
my @type=intersect(@types);
1285
43
50
60
push @err, "from=$from and to=$to has more than one possible conversions: ".join(", ", @type) if @type>1;
1286
push @err, "from $from (".(join(",",@{$types[0]})||'?').") and "
1287
43
50
0
55
."to $to (" .(join(",",@{$types[1]})||'?').") has no known common unit type.\n" if @type<1;
0
1288
43
50
53
croak join"\n",map"conv: $_",@err if @err;
1289
43
33
my $type=$type[0];
1290
43
50
66
79
conv_prepare_money() if $type eq 'money' and time() >= $conv_prepare_money_time + $Currency_rates_expire;
1291
43
100
60
return conv_temperature(@_) if $type eq 'temperature';
1292
36
100
47
return conv_numbers(@_) if $type eq 'numbers';
1293
31
25
my $c=$conv{$type};
1294
31
24
my($cf,$ct)=@{$conv{$type}}{$from,$to};
31
42
1295
31
100
100
141
my $r= $cf>0 && $ct<0 ? -$ct/$num/$cf
100
66
1296
: $cf<0 && $ct>0 ? -$cf/$num/$ct
1297
: $cf*$num/$ct;
1298
# print STDERR "$num $from => $to from=$ff to=$ft r=$r\n";
1299
31
105
return $r;
1300
}
1301
1302
sub conv_temperature { #http://en.wikipedia.org/wiki/Temperature#Conversion
1303
7
7
0
22
my($t,$from,$to)=(shift(),map uc(substr($_,0,1)),@_);
1304
7
100
16
$from=~s/K/C/ and $t-=273.15;
1305
#$from=~s/R/F/ and $t-=459.67; #rankine
1306
7
100
14
return $t if $from eq $to;
1307
1
1
7
{CK=>sub{$t+273.15},
1308
3
3
61
FC=>sub{($t-32)*5/9},
1309
2
2
13
CF=>sub{$t*9/5+32},
1310
0
0
0
FK=>sub{($t-32)*5/9+273.15},
1311
6
39
}->{$from.$to}->();
1312
}
1313
1314
sub conv_numbers {
1315
5
5
0
5
my($n,$fr,$to)=@_;
1316
5
0
29
my $dec=$fr eq 'dec' ? $n
50
50
50
50
50
50
100
50
100
50
1317
:$fr eq 'hex' ? hex($n)
1318
:$fr eq 'oct' ? oct($n)
1319
:$fr eq 'bin' ? oct("0b$n")
1320
:$fr =~ /^(dusin|dozen|doz|dz)$/ ? $n*12
1321
:$fr =~ /^(gross|gr|gro)$/ ? $n*144
1322
:$fr eq 'great_gross' ? $n*12*144
1323
:$fr eq 'small_gross' ? $n*12*10
1324
:$fr eq 'skokk' ? $n*60 #norwegian unit
1325
:$fr eq 'roman' ? roman2int($n)
1326
:$fr eq 'des' ? $n
1327
:croak "Conv from $fr not supported yet";
1328
5
50
41
my $ret=$to eq 'dec' ? $dec
100
50
50
50
50
50
100
100
50
50
1329
:$to eq 'hex' ? sprintf("%x",$dec)
1330
:$to eq 'oct' ? sprintf("%o",$dec)
1331
:$to eq 'bin' ? sprintf("%b",$dec)
1332
:$to =~ /^(dusin|dozen|doz|dz)$/ ? $dec/12
1333
:$to =~ /^(gross|gr|gro)$/ ? $dec/144
1334
:$to eq 'great_gross' ? $dec/(12*144)
1335
:$to eq 'small_gross' ? $dec/(12*10)
1336
:$to eq 'skokk' ? $dec/60
1337
:$to eq 'roman' ? int2roman($dec)
1338
:$to eq 'des' ? $dec
1339
:croak "Conv to $to not suppoerted yet";
1340
5
19
$ret;
1341
}
1342
#http://en.wikipedia.org/wiki/Norwegian_units_of_measurement
1343
1344
1345
=head2 bytes_readable
1346
1347
Converts a number of bytes to something human readable.
1348
1349
Input 1: a number
1350
1351
Input 2: optionally the number of decimals if >1000 B. Default is 2.
1352
1353
Output: a string containing:
1354
1355
the number with a B behind if the number is less than 1000
1356
1357
the number divided by 1024 with two decimals and "kB" behind if the number is less than 1024*1000
1358
1359
the number divided by 1048576 with two decimals and "MB" behind if the number is less than 1024*1024*1000
1360
1361
the number divided by 1073741824 with two decimals and "GB" behind if the number is less than 1024*1024*1024*1000
1362
1363
the number divided by 1099511627776 with two decimals and "TB" behind otherwise
1364
1365
Examples:
1366
1367
print bytes_readable(999); # 999 B
1368
print bytes_readable(1000); # 1000 B
1369
print bytes_readable(1001); # 0.98 kB
1370
print bytes_readable(1024); # 1.00 kB
1371
print bytes_readable(1153433.6); # 1.10 MB
1372
print bytes_readable(1181116006.4); # 1.10 GB
1373
print bytes_readable(1209462790553.6); # 1.10 TB
1374
print bytes_readable(1088516511498.24*1000); # 990.00 TB
1375
print bytes_readable(1088516511498.24*1000,3); # 990.000 TB
1376
print bytes_readable(1088516511498.24*1000,1); # 990.0 TB
1377
1378
=cut
1379
1380
sub bytes_readable {
1381
32
32
1
2644
my $bytes=shift();
1382
32
100
149
my $d=shift()||2; #decimals
1383
32
50
68
return undef if !defined $bytes;
1384
32
100
151
return "$bytes B" if abs($bytes) <= 2** 0*1000; #bytes
1385
24
100
352
return sprintf("%.*f kB",$d,$bytes/2**10) if abs($bytes) < 2**10*1000; #kilobyte
1386
8
100
37
return sprintf("%.*f MB",$d,$bytes/2**20) if abs($bytes) < 2**20*1000; #megabyte
1387
6
100
32
return sprintf("%.*f GB",$d,$bytes/2**30) if abs($bytes) < 2**30*1000; #gigabyte
1388
4
50
53
return sprintf("%.*f TB",$d,$bytes/2**40) if abs($bytes) < 2**40*1000; #terrabyte
1389
0
0
return sprintf("%.*f PB",$d,$bytes/2**50); #petabyte, exabyte, zettabyte, yottabyte
1390
}
1391
1392
=head2 sec_readable
1393
1394
Time written as C< 14h 37m > is often more humanly comprehensible than C< 52620 seconds >.
1395
1396
print sec_readable( 0 ); # 0s
1397
print sec_readable( 0.0123 ); # 0.0123s
1398
print sec_readable(-0.0123 ); # -0.0123s
1399
print sec_readable( 1.23 ); # 1.23s
1400
print sec_readable( 1 ); # 1s
1401
print sec_readable( 9.87 ); # 9.87s
1402
print sec_readable( 10 ); # 10s
1403
print sec_readable( 10.1 ); # 10.1s
1404
print sec_readable( 59 ); # 59s
1405
print sec_readable( 59.123 ); # 59.1s
1406
print sec_readable( 60 ); # 1m 0s
1407
print sec_readable( 60.1 ); # 1m 0s
1408
print sec_readable( 121 ); # 2m 1s
1409
print sec_readable( 131 ); # 2m 11s
1410
print sec_readable( 1331 ); # 22m 11s
1411
print sec_readable(-1331 ); # -22m 11s
1412
print sec_readable( 13331 ); # 3h 42m
1413
print sec_readable( 133331 ); # 1d 13h
1414
print sec_readable( 1333331 ); # 15d 10h
1415
print sec_readable( 13333331 ); # 154d 7h
1416
print sec_readable( 133333331 ); # 4yr 82d
1417
print sec_readable( 1333333331 ); # 42yr 91d
1418
1419
=cut
1420
1421
sub sec_readable {
1422
25
25
1
5097
my $s=shift();
1423
25
32
my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
1424
25
100
100
358
!defined$s ? undef
100
100
100
100
100
50
50
1425
:!length($s) ? ''
1426
:$s<0 ? '-'.sec_readable(-$s)
1427
:$s<60 && int($s)==$s
1428
? $s."s"
1429
:$s<60 ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
1430
:$s<3600 ? int($s/60)."m " .($s%60) ."s"
1431
:$s<24*3600 ? int($s/$h)."h " .int(($s%$h)/60)."m"
1432
:$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
1433
: int($s/$y)."yr ".int(($s%$y)/$d)."d";
1434
}
1435
1436
=head2 int2roman
1437
1438
Converts integers to roman numbers.
1439
1440
B
1441
1442
print int2roman(1234); # prints MCCXXXIV
1443
print int2roman(1971); # prints MCMLXXI
1444
1445
(Adapted subroutine from Peter J. Acklam, jacklam(&)math.uio.no)
1446
1447
I = 1
1448
V = 5
1449
X = 10
1450
L = 50
1451
C = 100 (centum)
1452
D = 500
1453
M = 1000 (mille)
1454
1455
See also L.
1456
1457
See L for more.
1458
1459
=head2 roman2int
1460
1461
roman2int("MCMLXXI") == 1971
1462
1463
=cut
1464
1465
#alternative algorithm: http://www.rapidtables.com/convert/number/how-number-to-roman-numerals.htm
1466
sub int2roman {
1467
1161
1161
1
5652
my($n,@p)=(shift,[],[1],[1,1],[1,1,1],[1,2],[2],[2,1],[2,1,1],[2,1,1,1],[1,3],[3]);
1468
!defined($n)? undef
1469
: !length($n) ? ""
1470
: int($n)!=$n ? croak"int2roman: $n is not an integer"
1471
: $n==0 ? ""
1472
: $n<0 ? "-".int2roman(-$n)
1473
: $n>3999 ? "M".int2roman($n-1000)
1474
1161
100
4034
: join'',@{[qw/I V X L C D M/]}[map{my$i=$_;map($_+5-$i*2,@{$p[$n/10**(3-$i)%10]})}(0..3)];
913
100
3804
3652
100
2123
3652
50
1948
3652
100
6098
100
1475
}
1476
1477
sub roman2int {
1478
1017
1017
1
4484
my($r,$n,%c)=(shift,0,'',0,qw/I 1 V 5 X 10 L 50 C 100 D 500 M 1000/);
1479
1017
100
66
2926
$r=~s/^-//?-roman2int($r):
1480
$r=~s/(C?)([DM])|(X?)([LCDM])|(I?)([VXLCDM])|(I)|(.)/
1481
croak "roman2int: Invalid number $r" if $8;
1482
$n += $c{$2||$4||$6||$7} - $c{$1||$3||$5||''}; ''/eg && $n
1483
}
1484
1485
#sub roman2int_slow {
1486
# my $r=shift;
1487
# $r=~s,^\-,, ? 0-roman2int($r)
1488
# : $r=~s,^M,,i ? 1000+roman2int($r)
1489
# : $r=~s,^CM,,i ? 900+roman2int($r)
1490
# : $r=~s,^D,,i ? 500+roman2int($r)
1491
# : $r=~s,^CD,,i ? 400+roman2int($r)
1492
# : $r=~s,^C,,i ? 100+roman2int($r)
1493
# : $r=~s,^XC,,i ? 90+roman2int($r)
1494
# : $r=~s,^L,,i ? 50+roman2int($r)
1495
# : $r=~s,^XL,,i ? 40+roman2int($r)
1496
# : $r=~s,^X,,i ? 10+roman2int($r)
1497
# : $r=~s,^IX,,i ? 9+roman2int($r)
1498
# : $r=~s,^V,,i ? 5+roman2int($r)
1499
# : $r=~s,^IV,,i ? 4+roman2int($r)
1500
# : $r=~s,^I,,i ? 1+roman2int($r)
1501
# : !length($r) ? 0
1502
# : croak "Invalid roman number $r";
1503
#}
1504
1505
=head2 distance
1506
1507
B the four decimal numbers of two GPS positions: latutude1, longitude1, latitude2, longitude2
1508
1509
B the air distance in meters between the two points
1510
1511
Calculation is done using the Haversine Formula for spherical distance:
1512
1513
a = sin((lat2-lat1)/2)^2
1514
+ sin((lon2-lon1)/2)^2 * cos(lat1) * cos(lat2);
1515
1516
c = 2 * atan2(min(1,sqrt(a)),
1517
min(1,sqrt(1-a)))
1518
1519
distance = c * R
1520
1521
With earth radius set to:
1522
1523
R = Re - (Re-Rp) * sin(abs(lat1+lat2)/2)
1524
1525
Where C (equatorial radius) and C (polar radius).
1526
1527
B
1528
1529
my @oslo = ( 59.93937, 10.75135); # oslo in norway
1530
my @rio = (-22.97673, -43.19508); # rio in brazil
1531
1532
printf "%.1f km\n", distance(@oslo,@rio)/1000; # 10431.7 km
1533
printf "%.1f km\n", distance(@rio,@oslo)/1000; # 10431.7 km
1534
printf "%.1f nmi\n", distance(@oslo,@rio)/1852.000; # 5632.7 nmi (nautical miles)
1535
printf "%.1f miles\n",distance(@oslo,@rio)/1609.344; # 6481.9 miles
1536
printf "%.1f miles\n",conv(distance(@oslo,@rio),"meters","miles");# 6481.9 miles
1537
1538
See L
1539
1540
and L
1541
1542
and L
1543
1544
and L, but Acme::Tools::distance() is about 8 times faster.
1545
1546
=cut
1547
1548
our $Distance_factor = $PI / 180;
1549
0
0
0
0
sub acos { atan2( sqrt(1 - $_[0] * $_[0]), $_[0] ) }
1550
sub distance_great_circle {
1551
0
0
0
0
my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
1552
0
0
my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1553
0
0
my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
1554
0
0
return $R*acos(sin($lat1)*sin($lat2)+cos($lat1)*cos($lat2)*cos($lon2-$lon1))
1555
}
1556
1557
sub distance {
1558
4
4
1
786
my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
1559
4
39
my $a= sin(($lat2-$lat1)/2)**2
1560
+ sin(($lon2-$lon1)/2)**2 * cos($lat1) * cos($lat2);
1561
4
50
4
my $sqrt_a =sqrt($a); $sqrt_a =1 if $sqrt_a >1;
4
9
1562
4
50
5
my $sqrt_1ma=sqrt(1-$a); $sqrt_1ma=1 if $sqrt_1ma>1;
4
8
1563
4
17
my $c=2*atan2($sqrt_a,$sqrt_1ma);
1564
4
5
my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1565
4
7
my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
1566
4
14
return $c*$R;
1567
}
1568
1569
1570
=head2 big
1571
1572
=head2 bigi
1573
1574
=head2 bigf
1575
1576
=head2 bigr
1577
1578
=head2 bigscale
1579
1580
big, bigi, bigf, bigr and bigscale are sometimes convenient shorthands for using
1581
C<< Math::BigInt->new() >>, C<< Math::BigFloat->new() >> and C<< Math::BigRat->new() >>
1582
(preferably with the GMP for faster calculations). Examples:
1583
1584
my $num1 = big(3); #returns a new Math::BigInt-object
1585
my $num2 = big('3.0'); #returns a new Math::BigFloat-object
1586
my $num3 = big(3.0); #returns a new Math::BigInt-object
1587
my $num4 = big(3.1); #returns a new Math::BigFloat-object
1588
my $num5 = big('2/7'); #returns a new Math::BigRat-object
1589
my($i1,$f1,$i2,$f2) = big(3,'3.0',3.0,3.1); #returns the four new numbers, as the above four lines
1590
#uses wantarray
1591
1592
print 2**200; # 1.60693804425899e+60
1593
print big(2)**200; # 1606938044258990275541962092341162602522202993782792835301376
1594
print 2**big(200); # 1606938044258990275541962092341162602522202993782792835301376
1595
print big(2**200); # 1606938044258990000000000000000000000000000000000000000000000
1596
1597
print 1/7; # 0.142857142857143
1598
print 1/big(7); # 0 because of integer arithmetics
1599
print 1/big(7.0); # 0 because 7.0 is viewed as an integer
1600
print 1/big('7.0'); # 0.1428571428571428571428571428571428571429
1601
print 1/bigf(7); # 0.1428571428571428571428571428571428571429
1602
print bigf(1/7); # 0.142857142857143 probably not what you wanted
1603
1604
print 1/bigf(7); # 0.1428571428571428571428571428571428571429
1605
bigscale(80); # for increased precesion (default is 40)
1606
print 1/bigf(7); # 0.14285714285714285714285714285714285714285714285714285714285714285714285714285714
1607
1608
In C the characters C<< . >> and C<< / >> will make it return a
1609
Math::BigFloat- and Math::BigRat-object accordingly. Or else a Math::BigInt-object is returned.
1610
1611
Instead of guessing, use C, C and C to return what you want.
1612
1613
B Acme::Tools does not depend on Math::BigInt and
1614
Math::BigFloat and GMP, but these four big*-subs do (by C).
1615
To use big, bigi, bigf and bigr effectively you should
1616
install Math::BigInt::GMP and Math::BigFloat::GMP like this:
1617
1618
sudo cpanm Math::BigFloat Math::GMP Math::BingInt::GMP # or
1619
sudo cpan Math::BigFloat Math::GMP Math::BingInt::GMP # or
1620
sudo yum install perl-Math-BigInt-GMP perl-Math-GMP # on RedHat, RHEL or
1621
sudo apt-get install libmath-bigint-gmp-perl libmath-gmp-perl # on Ubuntu or some other way
1622
1623
Unless GMP is installed for perl like this, the Math::Big*-modules
1624
will fall back to using similar but slower built in modules. See: L
1625
1626
=cut
1627
1628
sub bigi {
1629
9
100
9
1
185
eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
2
2
948
2
16420
2
176
1630
9
100
10755
if (wantarray) { return (map Math::BigInt->new($_),@_) }
2
9
1631
7
20
else { return Math::BigInt->new($_[0]) }
1632
}
1633
sub bigf {
1634
7
100
7
1
1044
eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
2
2
792
2
16045
2
369
1635
7
100
795
if (wantarray) { return (map Math::BigFloat->new($_),@_) }
2
6
1636
5
15
else { return Math::BigFloat->new($_[0]) }
1637
}
1638
sub bigr {
1639
17
100
17
1
9521
eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
2
2
826
2
8477
2
308
1640
17
100
1018
if (wantarray) { return (map Math::BigRat->new($_),@_) }
7
23
1641
10
24
else { return Math::BigRat->new($_[0]) }
1642
}
1643
sub big {
1644
wantarray
1645
12
50
12
1
3198
? (map /\./ ? bigf($_) : /\// ? bigr($_) : bigi($_), @_)
100
100
100
100
1646
: $_[0]=~/\./ ? bigf($_[0]) : $_[0]=~/\// ? bigr($_[0]) : bigi($_[0]);
1647
}
1648
sub bigscale {
1649
4
100
4
1
2186
@_==1 or croak "bigscale requires one and only one argument";
1650
2
3
my $scale=shift();
1651
2
50
4
eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
1652
2
50
5
eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
1653
2
50
4
eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
1654
2
6
Math::BigInt->div_scale($scale);
1655
2
24
Math::BigFloat->div_scale($scale);
1656
2
14
Math::BigRat->div_scale($scale);
1657
2
13
return;
1658
}
1659
1660
#my $R_authalic=6371007.2; #earth radius in meters, mean, Authalic radius, real R varies 6353-6384km, http://en.wikipedia.org/wiki/Earth_radius
1661
#*)
1662
# ( 6378157.5, 6356772.2 ) #hmm
1663
#my $e=0.081819218048345;#sqrt(1 - $b**2/$a**2); #eccentricity of the ellipsoid
1664
#my($a,$b)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1665
#warn "e=$e\n";
1666
#warn "t=".(1 - $e**2)."\n";
1667
#warn "n=".((1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5)."\n";
1668
#my $t=1 - $e**2;
1669
#my $n=(1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5;
1670
#warn "t=$t\n";
1671
#warn "n=$n\n";
1672
#$a * (1 - $e**2) / ((1 - $e**2 * sin(($lat1+$lat2)/2)**2)**1.5); #hmm avg lat
1673
#$R=$a * $t/$n;
1674
1675
#=head2 fractional
1676
#=cut
1677
1678
sub fractional { #http://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/lil1.html
1679
0
0
0
0
carp "fractional: NOT FINISHED";
1680
0
0
my $n=shift;
1681
0
0
print "----fractional n=$n\n";
1682
0
0
my $nn=$n; my $dec;
0
0
1683
0
0
$nn=~s,\.(\d+)$,$dec=length($1);$1.,;
1684
0
0
my $l;
1685
0
0
my $max=0;
1686
0
0
my($te,$ne);
1687
0
0
for(1..length($nn)/2){
1688
0
0
0
if( $nn=~/^(\d*?)((.{$_})(\3)+)$/ ){
1689
0
0
print "_ = $_ ".length($2)."\n";
1690
0
0
0
if(length($2)>$max){
1691
0
0
$l=$_;
1692
0
0
$te="$1$3"-$1;
1693
0
0
$max=length($2);
1694
}
1695
}
1696
}
1697
0
0
0
0
return fractional($n) if !$l and !recursed() and $dec>6 and substr($n,-1) and substr($n,-1)--;
0
0
0
1698
0
0
print "l=$l max=$max\n";
1699
0
0
$ne="9" x $l;
1700
0
0
print log($n),"\n";
1701
0
0
0
my $st=sub{print "status: ".($te/$ne)." n=$n ".($n/$te*$ne)."\n"};
0
0
1702
0
0
while($n/$te*$ne<0.99){ &$st(); $ne*=10 }
0
0
0
0
1703
0
0
while($te/$n/$ne<0.99){ &$st(); $te*=10 }
0
0
0
0
1704
0
0
&$st();
1705
0
0
while(1){
1706
0
0
my $d=gcd($te,$ne); print "gcd=$d\n";
0
0
1707
0
0
0
last if $d==1;
1708
0
0
$te/=$d; $ne/=$d;
0
0
1709
}
1710
0
0
&$st();
1711
0
0
0
wantarray ? ($te,$ne) : "$te/$ne"; #gcd()
1712
}
1713
1714
=head2 isnum
1715
1716
B String to be tested on regexp C<< /^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x >>. If no argument is given isnum checks C<< $_ >>.
1717
1718
B True or false (1 or 0)
1719
1720
use Acme::Tools;
1721
my @e=(' +32.12354E-21 ', 2.2, '9' x 99999, ' -123.12', '29,323.31', '29 323.31');
1722
print isnum() ? 'num' : 'str' for @e; #prints num for every element except the last two
1723
print $_=~$Re_isnum ? 'num' : 'str' for @e; #same
1724
1725
=cut
1726
1727
our $Re_isnum=qr/^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x;
1728
20
100
20
1
3165
sub isnum {(@_?$_[0]:$_)=~$Re_isnum}
1729
1730
=head2 between
1731
1732
Input: Three arguments.
1733
1734
Returns: Something I if the first argument is numerically between the two next.
1735
1736
=cut
1737
1738
sub between {
1739
27
27
1
6369
my($test,$fom,$tom)=@_;
1740
33
33
210
no warnings;
33
41
33
6505
1741
27
100
66
427
return $fom<$tom ? $test>=$fom&&$test<=$tom
33
1742
: $test>=$tom&&$test<=$fom;
1743
}
1744
1745
=head2 curb
1746
1747
B Three arguments: value, minumum, maximum.
1748
1749
B Returns the value if its between the given minumum and maximum.
1750
Returns minimum if the value is less or maximum if the value is more.
1751
1752
my $v = 234;
1753
print curb( $v, 200, 250 ); #prints 234
1754
print curb( $v, 150, 200 ); #prints 200
1755
print curb( $v, 250, 300 ); #prints 250
1756
print curb(\$v, 250, 300 ); #prints 250 and changes $v
1757
print $v; #prints 250
1758
1759
In the last example $v is changed because the argument is a reference. (To keep backward compatability, C<< bound() >> is a synonym for C<< curb() >>)
1760
1761
=cut
1762
1763
sub curb {
1764
129
129
1
744
my($val,$min,$max)=@_;
1765
129
50
66
1096
croak "curb: wrong args" if @_!=3 or !defined$min or !defined$max or !defined$val or $min>$max;
100
66
66
1766
126
100
151
return $$val=curb($$val,$min,$max) if ref($val) eq 'SCALAR';
1767
125
100
192
$val < $min ? $min :
100
1768
$val > $max ? $max :
1769
$val;
1770
}
1771
0
0
0
0
sub bound { curb(@_) }
1772
0
0
0
0
sub log10 { log($_[0])/log(10) }
1773
0
0
0
0
sub log2 { log($_[0])/log(2) }
1774
1775
=head1 STRINGS
1776
1777
=head2 upper
1778
1779
=head2 lower
1780
1781
Returns input string as uppercase or lowercase.
1782
1783
Can be used if Perls build in C and C for some reason does not convert æøå or other latin letters outsize a-z.
1784
1785
Converts C<< æøåäëïöüÿâêîôûãõàèìòùáéíóúýñð >> to and from C<< ÆØÅÄËÏÖÜ?ÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ >>
1786
1787
See also C<< perldoc -f uc >> and C<< perldoc -f lc >>
1788
1789
=head2 trim
1790
1791
Removes space from the beginning and end of a string. Whitespace (C<< \s >>) that is.
1792
And removes any whitespace inside the string of more than one char, leaving the first whitespace char. Thus:
1793
1794
trim(" asdf \t\n 123 ") eq "asdf 123"
1795
trim(" asdf\t\n 123\n") eq "asdf\t123"
1796
1797
Works on C<< $_ >> if no argument i given:
1798
1799
print join",", map trim, " please ", " remove ", " my ", " spaces "; # please,remove,my,spaces
1800
print join",", trim(" please ", " remove ", " my ", " spaces "); # works on arrays as well
1801
my $s=' please '; trim(\$s); # now $s eq 'please'
1802
trim(\@untrimmedstrings); # trims array strings inplace
1803
@untrimmedstrings = map trim, @untrimmedstrings; # same, works on $_
1804
trim(\$_) for @untrimmedstrings; # same, works on \$_
1805
1806
=head2 lpad
1807
1808
=head2 rpad
1809
1810
Left or right pads a string to the given length by adding one or more spaces at the end for I or at the start for I.
1811
1812
B First argument: string to be padded. Second argument: length of the output. Optional third argument: character(s) used to pad.
1813
Default is space.
1814
1815
rpad('gomle',9); # 'gomle '
1816
lpad('gomle',9); # ' gomle'
1817
rpad('gomle',9,'-'); # 'gomle----'
1818
lpad('gomle',9,'+'); # '++++gomle'
1819
rpad('gomle',4); # 'goml'
1820
lpad('gomle',4); # 'goml'
1821
rpad('gomle',7,'xyz'); # 'gomlxy'
1822
lpad('gomle',10,'xyz'); # 'xyzxygoml'
1823
1824
=head2 cpad
1825
1826
Center pads. Pads the string both on left and right equal to the given length. Centers the string. Pads right side first.
1827
1828
cpad('mat',5) eq ' mat '
1829
cpad('mat',4) eq 'mat '
1830
cpad('mat',6) eq ' mat '
1831
cpad('mat',9) eq ' mat '
1832
cpad('mat',5,'+') eq '+mat+'
1833
cpad('MMMM',20,'xyzXYZ') eq 'xyzXYZxyMMMMxyzXYZxy'
1834
1835
=cut
1836
1837
33
50
33
1
138
sub upper {no warnings;my $s=@_?shift:$_;$s=~tr/a-zæøåäëïöüÿâêîôûãõàèìòùáéíóúýñð/A-ZÆØÅÄËÏÖÜÿÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ/;$s}
33
1
39
33
1980
1
6
1
5
1
5
1838
33
50
33
1
129
sub lower {no warnings;my $s=@_?shift:$_;$s=~tr/A-ZÆØÅÄËÏÖÜÿÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ/a-zæøåäëïöüÿâêîôûãõàèìòùáéíóúýñð/;$s}
33
1
37
33
86206
1
7
1
2
1
6
1839
1840
sub trim {
1841
24
100
24
1
58
return trim($_) if !@_;
1842
20
100
35
return map trim($_), @_ if @_>1;
1843
19
16
my $s=shift;
1844
19
100
30
if(ref($s) eq 'SCALAR'){ $$s=~s,^\s+|(?<=\s)\s+|\s+$,,g; return $$s}
5
19
5
7
1845
14
100
20
if(ref($s) eq 'ARRAY') { trim(\$_) for @$s; return $s }
1
3
1
2
1846
13
63
$s=~s,^\s+|(?<=\s)\s+|\s+$,,g;
1847
13
41
$s;
1848
}
1849
1850
sub rpad {
1851
17
17
1
536
my($s,$l,$p)=@_;
1852
17
100
66
38
$p=' ' if @_<3 or !length($p);
1853
17
50
$s.=$p while length($s)<$l;
1854
17
54
substr($s,0,$l);
1855
}
1856
1857
sub lpad {
1858
5
5
1
994
my($s,$l,$p)=@_;
1859
5
100
66
21
$p=' ' if @_<3 or !length($p);
1860
5
100
33
$l
1861
? substr($s,0,$l)
1862
: substr($p x (1+$l/length($p)), 0, $l-length($s)).$s;
1863
}
1864
1865
sub cpad {
1866
11
11
1
1788
my($s,$l,$p)=@_;
1867
11
100
66
46
$p=' ' if @_<3 or !length($p);
1868
11
11
my $ls=length($s);
1869
11
50
15
return substr($s,0,$l) if $l<$ls;
1870
11
23
$p=$p x (($l-$ls+2)/length($p));
1871
11
44
substr($p, 0, ($l-$ls )/2) . $s .
1872
substr($p, 0, ($l-$ls+1)/2);
1873
}
1874
1875
sub cpad_old {
1876
0
0
0
0
my($s,$l,$p)=@_;
1877
0
0
0
$p=' ' if !length($p);
1878
0
0
0
return substr($s,0,$l) if $l
1879
0
0
my $i=0;
1880
0
0
while($l>length($s)){
1881
0
0
0
my $pc=substr($p,($i==int($i)?1:-1)*($i%length($p)),1);
1882
0
0
0
$i==int($i) ? ($s.=$pc) : ($s=$pc.$s);
1883
0
0
$i+=1/2;
1884
}
1885
0
0
$s;
1886
}
1887
1888
=head2 trigram
1889
1890
B A string (i.e. a name). And an optional x (see example 2)
1891
1892
B A list of this strings trigrams (See examlpe)
1893
1894
B
1895
1896
print join ", ", trigram("Kjetil Skotheim");
1897
1898
Prints:
1899
1900
Kje, jet, eti, til, il , l S, Sk, Sko, kot, oth, the, hei, eim
1901
1902
B
1903
1904
Default is 3, but here 4 is used instead in the second optional input argument:
1905
1906
print join ", ", trigram("Kjetil Skotheim", 4);
1907
1908
And this prints:
1909
1910
Kjet, jeti, etil, til , il S, l Sk, Sko, Skot, koth, othe, thei, heim
1911
1912
C was created for "fuzzy" name searching. If you have a database of many names,
1913
addresses, phone numbers, customer numbers etc. You can use trigram() to search
1914
among all of those at the same time. If the search form only has one input field.
1915
One general search box.
1916
1917
Store all of the trigrams of the trigram-indexed input fields coupled
1918
with each person, and when you search, you take each trigram of you
1919
query string and adds the list of people that has that trigram. The
1920
search result should then be sorted so that the persons with most hits
1921
are listed first. Both the query strings and the indexed database
1922
fields should have a space added first and last before C-ing
1923
them.
1924
1925
This search algorithm is not includes here yet...
1926
1927
C should perhaps have been named ngram for obvious reasons.
1928
1929
=head2 sliding
1930
1931
Same as trigram (except there is no default width). Works also with arrayref instead of string.
1932
1933
Example:
1934
1935
sliding( ["Reven","rasker","over","isen"], 2 )
1936
1937
Result:
1938
1939
( ['Reven','rasker'], ['rasker','over'], ['over','isen'] )
1940
1941
=head2 chunks
1942
1943
Splits strings and arrays into chunks of given size:
1944
1945
my @a = chunks("Reven rasker over isen",7);
1946
my @b = chunks([qw/Og gubben satt i kveldinga og koste seg med skillinga/], 3);
1947
1948
Resulting arrays:
1949
1950
( 'Reven r', 'asker o', 'ver ise', 'n' )
1951
( ['Og','gubben','satt'], ['i','kveldinga','og'], ['koste','seg','med'], ['skillinga'] )
1952
1953
=head2 chars
1954
1955
chars("Tittentei"); # ('T','i','t','t','e','n','t','e','i')
1956
1957
=cut
1958
1959
2
100
2
1
248
sub trigram { sliding($_[0],$_[1]||3) }
1960
1961
sub sliding {
1962
3
3
1
6
my($s,$w)=@_;
1963
3
100
46
return map substr($s,$_,$w), 0..length($s)-$w if !ref($s);
1964
1
50
19
return map [@$s[$_..$_+$w-1]], 0..@$s-$w if ref($s) eq 'ARRAY';
1965
}
1966
1967
sub chunks {
1968
2
2
1
6
my($s,$w)=@_;
1969
2
100
52
return $s=~/(.{1,$w})/g if !ref($s);
1970
1
50
66
return map [@$s[$_*$w .. min($_*$w+$w-1,$#$s)]], 0..$#$s/$w if ref($s) eq 'ARRAY';
1971
}
1972
1973
0
0
1
0
sub chars { split//, shift }
1974
1975
=head2 repl
1976
1977
Synonym for replace().
1978
1979
=head2 replace
1980
1981
Return the string in the first input argument, but where pairs of search-replace strings (or rather regexes) has been run.
1982
1983
Works as C in Oracle, or rather regexp_replace() in Oracle 10 and onward. Except that this C accepts more than three arguments.
1984
1985
Examples:
1986
1987
print replace("water","ater","ine"); # Turns water into wine
1988
print replace("water","ater"); # w
1989
print replace("water","at","eath"); # weather
1990
print replace("water","wa","ju",
1991
"te","ic",
1992
"x","y", # No x is found, no y is returned
1993
'r$',"e"); # Turns water into juice. 'r$' says that the r it wants
1994
# to change should be the last letters. This reveals that
1995
# second, fourth, sixth and so on argument is really regexs,
1996
# not normal strings. So use \ (or \\ inside "") to protect
1997
# the special characters of regexes. You probably also
1998
# should write qr/regexp/ instead of 'regexp' if you make
1999
# use of regexps here, just to make it more clear that
2000
# these are really regexps, not strings.
2001
2002
print replace('JACK and JUE','J','BL'); # prints BLACK and BLUE
2003
print replace('JACK and JUE','J'); # prints ACK and UE
2004
print replace("abc","a","b","b","c"); # prints ccc (not bcc)
2005
2006
If the first argument is a reference to a scalar variable, that variable is changed "in place".
2007
2008
Example:
2009
2010
my $str="test";
2011
replace(\$str,'e','ee','s','S');
2012
print $str; # prints teeSt
2013
2014
=cut
2015
2016
46
46
1
76
sub replace { repl(@_) }
2017
sub repl {
2018
83
83
1
152
my $str=shift;
2019
83
100
187
return $$str=replace($$str,@_) if ref($str) eq 'SCALAR';
2020
#return ? if ref($str) eq 'ARRAY';
2021
#return ? if ref($str) eq 'HASH';
2022
82
149
while(@_){
2023
109
152
my($fra,$til)=(shift,shift);
2024
109
100
4181
defined $til ? $str=~s/$fra/$til/g : $str=~s/$fra//g;
2025
}
2026
82
456
return $str;
2027
}
2028
2029
=head1 ARRAYS
2030
2031
=head2 min
2032
2033
Returns the smallest number in a list. Undef is ignored.
2034
2035
@lengths=(2,3,5,2,10,undef,5,4);
2036
$shortest = min(@lengths); # returns 2
2037
2038
Note: The comparison operator is perls C<< < >>> which means empty strings is treated as C<0>, the number zero. The same goes for C, except of course C<< > >> is used instead.
2039
2040
min(3,4,5) # 3
2041
min(3,4,5,undef) # 3
2042
min(3,4,5,'') # returns the empty string
2043
2044
=head2 max
2045
2046
Returns the largest number in a list. Undef is ignored.
2047
2048
@heights=(123,90,134,undef,132);
2049
$highest = max(@heights); # 134
2050
2051
=head2 mins
2052
2053
Just as L, except for strings.
2054
2055
print min(2,7,10); # 2
2056
print mins("2","7","10"); # 10
2057
print mins(2,7,10); # 10
2058
2059
=head2 maxs
2060
2061
Just as L, except for strings.
2062
2063
print max(2,7,10); # 10
2064
print maxs("2","7","10"); # 7
2065
print maxs(2,7,10); # 7
2066
2067
=cut
2068
2069
35
100
100
35
1
1502
sub min {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ < $min } $min }
35
66
56
7087
20529
35
191
2070
1
100
100
1
1
1
sub mins {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ lt $min} $min }
1
33
3
3
17
1
3
2071
32
100
100
32
1
27
sub max {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ > $max } $max }
32
66
52
7093
20654
32
378
2072
1
100
100
1
1
1
sub maxs {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ gt $max} $max }
1
33
2
3
16
1
4
2073
2074
=head2 zip
2075
2076
B Two or more arrayrefs. A number of equal sized arrays
2077
containing numbers, strings or anything really.
2078
2079
B An array of those input arrays zipped (interlocked, merged) into each other.
2080
2081
print join " ", zip( [1,3,5], [2,4,6] ); # 1 2 3 4 5 6
2082
print join " ", zip( [1,4,7], [2,5,8], [3,6,9] ); # 1 2 3 4 5 6 7 8 9
2083
2084
Example:
2085
2086
zip() creates a hash where the keys are found in the first array and values in the secord in the correct order:
2087
2088
my @media = qw/CD DVD VHS LP Blueray/;
2089
my @count = qw/20 12 2 4 3/;
2090
my %count = zip(\@media,\@count); # or zip( [@media], [@count] )
2091
print "I got $count{DVD} DVDs\n"; # I got 12 DVDs
2092
2093
Dies (croaks) if the two lists are of different sizes
2094
2095
...or any input argument is not an array ref.
2096
2097
=cut
2098
2099
sub zip {
2100
7
7
1
45865
my @t=@_;
2101
7
66
255
ref($_) ne 'ARRAY' and croak "ERROR: zip should have arrayrefs as arguments" for @t;
2102
5
66
9
@{$t[$_]} != @{$t[0]} and croak "ERROR: zip should have equal sized arrays" for 1..$#t;
5
6
5
163
2103
3
4
my @res;
2104
3
3
for my $i (0..@{$t[0]}-1){
3
5
2105
9
15
push @res, $$_[$i] for @t;
2106
}
2107
3
18
return @res;
2108
}
2109
2110
2111
=head2 pushsort
2112
2113
Adds one or more element to a numerically sorted array and keeps it sorted.
2114
2115
pushsort @a, 13; # this...
2116
push @a, 13; @a = sort {$a<=>$b} @a; # is the same as this, but the former is faster if @a is large
2117
2118
=head2 pushsortstr
2119
2120
Same as pushsort except that the array is kept sorted alphanumerically (cmp) instead of numerically (<=>). See L.
2121
2122
pushsort @a, "abc"; # this...
2123
push @a, "abc"; @a = sort @a; # is the same as this, but the former is faster if @a is large
2124
2125
=cut
2126
2127
our $Pushsort_cmpsub=undef;
2128
sub pushsort (\@@) {
2129
10915
10915
1
8497
my $ar=shift;
2130
2131
#not needed but often faster
2132
10915
100
100
18499
if(!defined $Pushsort_cmpsub and @$ar+@_<100){ #hm speedup?
2133
2348
2382
@$ar=(sort {$a<=>$b} (@$ar,@_));
52064
30065
2134
2348
1807
return 0+@$ar;
2135
}
2136
2137
8567
6473
for my $v (@_){
2138
2139
#not needed but often faster
2140
7567
100
8354
if(!defined $Pushsort_cmpsub){ #faster rank() in most cases
2141
901
100
50
1048
push @$ar, $v and next if $v>=$$ar[-1];
2142
898
100
50
1025
unshift @$ar, $v and next if $v< $$ar[0];
2143
}
2144
2145
7562
7368
splice @$ar, binsearch($v,$ar,1,$Pushsort_cmpsub)+1, 0, $v;
2146
}
2147
8567
7598
0+@$ar
2148
}
2149
0
0
1
0
sub pushsortstr(\@@){ local $Pushsort_cmpsub=sub{$_[0]cmp$_[1]}; pushsort(@_) } #speedup: copy sub pushsort
1000
1000
1939
1000
943
2150
2151
=head2 binsearch
2152
2153
Returns the position of an element in a numerically sorted array. Returns undef if the element is not found.
2154
2155
B Two, three or four arguments
2156
2157
B the element to find. Usually a number.
2158
2159
B a reference to the array to search in. The array
2160
should be sorted in ascending numerical order (se exceptions below).
2161
2162
B Optional. Default false.
2163
2164
If true, whether result I should return undef or a fractional position.
2165
2166
If the third argument is false binsearch returns undef if the element is not found.
2167
2168
If the third argument is true binsearch returns 0.5 plus closest position below the searched value.
2169
2170
Returns C< last position + 0.5 > if the searched element is greater than all elements in the sorted array.
2171
2172
Returns C< -0.5 > if the searched element is less than all elements in the sorted array.
2173
2174
Fourth argument: Optional. Default C<< sub { $_[0] <=> $_[1] } >>.
2175
2176
If present, the fourth argument is either:
2177
2178
=over 4
2179
2180
=item * a code-ref that alters the way binsearch compares two elements, default is C<< sub{$_[0]<=>$_[1]} >>
2181
2182
=item * a string that works as a hash key (column name), see example below
2183
2184
=back
2185
2186
B
2187
2188
binsearch(10,[5,10,15,20]); # 1
2189
binsearch(10,[20,15,10,5],undef,sub{$_[1]<=>$_[0]}); # 2 search arrays sorted numerically in opposite order
2190
binsearch("c",["a","b","c","d"],undef,sub{$_[0]cmp$_[1]}); # 2 search arrays sorted alphanumerically
2191
binsearchstr("b",["a","b","c","d"]); # 1 search arrays sorted alphanumerically
2192
2193
my @data=( map { {num=>$_, sqrt=>sqrt($_), square=>$_**2} }
2194
grep !$_%7, 1..1000000 );
2195
my $i = binsearch( {num=>913374}, \@data, undef, sub {$_[0]{num} <=> $_[1]{num}} );
2196
my $i = binsearch( {num=>913374}, \@data, undef, 'num' ); #same as previous line
2197
my $found_hashref = defined $i ? $data[$i] : undef;
2198
2199
=head2 binsearchstr
2200
2201
Same as binsearch except that the arrays is sorted alphanumerically
2202
(cmp) instead of numerically (<=>) and the searched element is a
2203
string, not a number. See L.
2204
2205
=cut
2206
2207
our $Binsearch_steps;
2208
our $Binsearch_maxsteps=100;
2209
sub binsearch {
2210
14609
14609
1
28791
my($search,$aref,$insertpos,$cmpsub)=@_; #search pos of search in array
2211
14609
50
19850
croak "binsearch did not get arrayref as second arg" if ref($aref) ne 'ARRAY';
2212
14609
50
100
37024
croak "binsearch got fourth arg which is not a code-ref" if defined $cmpsub and ref($cmpsub) and ref($cmpsub) ne 'CODE';
66
2213
14609
100
100
26990
if(defined $cmpsub and !ref($cmpsub)){
2214
1
2
my $key=$cmpsub;
2215
1
17
4
$cmpsub = sub{ $_[0]{$key} <=> $_[1]{$key} };
17
47
2216
}
2217
14609
50
16724
return $insertpos ? -0.5 : undef if !@$aref;
100
2218
14597
11015
my($min,$max)=(0,$#$aref);
2219
14597
9574
$Binsearch_steps=0;
2220
14597
17240
while (++$Binsearch_steps <= $Binsearch_maxsteps) {
2221
128735
108476
my $middle=int(($min+$max+0.5)/2);
2222
128735
78797
my $middle_value=$$aref[$middle];
2223
2224
#croak "binsearch got non-sorted array" if !$cmpsub and $$aref[$min]>$$aref[$min]
2225
# or $cmpsub and &$cmpsub($$aref[$min],$$aref[$min])>0;
2226
2227
128735
100
100
453784
if( !$cmpsub and $search < $middle_value
100
100
66
100
100
66
2228
or $cmpsub and &$cmpsub($search,$middle_value) < 0 ) { #print "<\n";
2229
38944
50
66
51700
$max=$min, next if $middle == $max and $min != $max;
2230
38944
50
41473
return $insertpos ? $middle-0.5 : undef if $middle == $max;
100
2231
37494
44437
$max=$middle;
2232
}
2233
elsif( !$cmpsub and $search > $middle_value
2234
or $cmpsub and &$cmpsub($search,$middle_value) > 0 ) { #print ">\n";
2235
82752
100
100
131076
$min=$max, next if $middle == $min and $max != $min;
2236
75247
50
87387
return $insertpos ? $middle+0.5 : undef if $middle == $min;
100
2237
69139
85869
$min=$middle;
2238
}
2239
else { #print "=\n";
2240
7039
10191
return $middle;
2241
}
2242
}
2243
0
0
croak "binsearch exceded $Binsearch_maxsteps steps";
2244
}
2245
2246
sub binsearchfast { # binary search routine finds index just below value
2247
0
0
0
0
my ($x,$v)=@_;
2248
0
0
my ($klo,$khi)=(0,$#{$x});
0
0
2249
0
0
my $k;
2250
0
0
while (($khi-$klo)>1) {
2251
0
0
$k=int(($khi+$klo)/2);
2252
0
0
0
if ($$x[$k]>$v) { $khi=$k; } else { $klo=$k; }
0
0
0
0
2253
}
2254
0
0
return $klo;
2255
}
2256
2257
2258
2
2
1
8
sub binsearchstr {binsearch(@_[0..2],sub{$_[0]cmp$_[1]})}
1
1
8
2259
2260
=head2 rank
2261
2262
B Two or three arguments. N and an arrayref for the list to look at.
2263
2264
In scalar context: Returns the nth smallest number in an array. The array doesn't have to be sorted.
2265
2266
In array context: Returns the n smallest numbers in an array.
2267
2268
To return the n(th) largest number(s) instead of smallest, just negate n.
2269
2270
An optional third argument can be a sub that is used to compare the elements of the input array.
2271
2272
Examples:
2273
2274
my $second_smallest = rank(2, [11,12,13,14]); # 12
2275
my @top10 = rank(-10, [1..100]); # 100, 99, 98, 97, 96, 95, 94, 93, 92, 91
2276
my $max = rank(-1, [101,102,103,102,101]); #103
2277
my @contest = ({name=>"Alice",score=>14},{name=>"Bob",score=>13},{name=>"Eve",score=>12});
2278
my $second = rank(2, \@contest, sub{$_[1]{score}<=>$_[0]{score}})->{name}; #Bob
2279
2280
=head2 rankstr
2281
2282
Just as C but sorts alphanumerically (strings, cmp) instead of numerically.
2283
2284
=cut
2285
2286
sub rank {
2287
35
35
1
334
my($rank,$aref,$cmpsub)=@_;
2288
35
100
64
if($rank<0){
2289
10
100
30037
44
$cmpsub||=sub{$_[0]<=>$_[1]};
30037
83109
2290
10
60180
32
return rank(-$rank,$aref,sub{0-&$cmpsub});
60180
46990
2291
}
2292
25
19
my @sort;
2293
25
27
local $Pushsort_cmpsub=$cmpsub;
2294
25
32
for(@$aref){
2295
8914
8457
pushsort @sort, $_;
2296
8914
100
13664
pop @sort if @sort>$rank;
2297
}
2298
25
100
1206
return wantarray ? @sort : $sort[$rank-1];
2299
}
2300
0
50
0
1
0
sub rankstr {wantarray?(rank(@_,sub{$_[0]cmp$_[1]})):rank(@_,sub{$_[0]cmp$_[1]})}
60142
10
168873
10
54
2301
2302
=head2 eqarr
2303
2304
B Two or more references to arrays.
2305
2306
B True (1) or false (0) for whether or not the arrays are numerically I alphanumerically equal.
2307
Comparing each element in each array with both C< == > and C< eq >.
2308
2309
Examples:
2310
2311
eqarr([1,2,3],[1,2,3],[1,2,3]); # 1 (true)
2312
eqarr([1,2,3],[1,2,3],[1,2,4]); # 0 (false)
2313
eqarr([1,2,3],[1,2,3,4]); # undef (different size, false)
2314
eqarr([1,2,3]); # croak (should be two or more arrays)
2315
eqarr([1,2,3],1,2,3); # croak (not arraysrefs)
2316
2317
=cut
2318
2319
sub eqarr {
2320
27
27
1
1209
my @arefs=@_;
2321
27
100
229
croak if @arefs<2;
2322
26
66
174
ref($_) ne 'ARRAY' and croak for @arefs;
2323
25
100
55
@{$arefs[0]} != @{$arefs[$_]} and return undef for 1..$#arefs;
27
25
27
69
2324
24
24
my $ant;
2325
2326
24
75
for my $ar (@arefs[1..$#arefs]){
2327
26
36
for(0..@$ar-1){
2328
272
50
33
630
++$ant and $ant>100 and croak ">100"; #TODO: feiler ved sammenligning av to tabeller > 10000(?) tall
2329
272
100
66
771
return 0 if $arefs[0][$_] ne $$ar[$_]
2330
or $arefs[0][$_] != $$ar[$_];
2331
}
2332
}
2333
22
135
return 1;
2334
}
2335
2336
=head2 sorted
2337
2338
Return true if the input array is numerically sorted.
2339
2340
@a=(1..10); print "array is sorted" if sorted @a; #true
2341
2342
Optionally the last argument can be a comparison sub:
2343
2344
@person=({Rank=>1,Name=>'Amy'}, {Rank=>2,Name=>'Paula'}, {Rank=>3,Name=>'Ruth'});
2345
print "Persons are sorted" if sorted @person, sub{$_[0]{Rank}<=>$_[1]{Rank}};
2346
2347
=head2 sortedstr
2348
2349
Return true if the input array is Inumerically sorted.
2350
2351
@a=(1..10); print "array is sorted" if sortedstr @a; #false
2352
@a=("01".."10"); print "array is sorted" if sortedstr @a; #true
2353
2354
=cut
2355
2356
sub sorted (\@@) {
2357
4
4
1
11
my($a,$cmpsub)=@_;
2358
4
12
for(0..$#$a-1){
2359
1197
50
66
3729
return 0 if !$cmpsub and $$a[$_]>$$a[$_+1]
66
33
2360
or $cmpsub and &$cmpsub($$a[$_],$$a[$_+1])>0;
2361
}
2362
4
20
return 1;
2363
}
2364
99
99
1
458
sub sortedstr { sorted(@_,sub{$_[0]cmp$_[1]}) }
2
2
13
2365
2366
=head2 part
2367
2368
B A code-ref and a list
2369
2370
B Two array-refs
2371
2372
Like C but returns the false list as well. Partitions a list
2373
into two lists where each element goes into the first or second list
2374
whether the predicate (a code-ref) is true or false for that element.
2375
2376
my( $odd, $even ) = part {$_%2} (1..8);
2377
print for @$odd; #prints 1 3 5 7
2378
print for @$even; #prints 2 4 6 8
2379
2380
(Works like C< partition() > in the Scala programming language)
2381
2382
=head2 parth
2383
2384
Like C but returns any number of lists.
2385
2386
B A code-ref and a list
2387
2388
B A hash where the returned values from the code-ref are keys and the values are arrayrefs to the list elements which gave those keys.
2389
2390
my %hash = parth { uc(substr($_,0,1)) } ('These','are','the','words','of','this','array');
2391
print serialize(\%hash);
2392
2393
Result:
2394
2395
%hash = ( T=>['These','the','this'],
2396
A=>['are','array'],
2397
O=>['of'],
2398
W=>['words'] )
2399
2400
=head2 parta
2401
2402
Like L but returns an array of lists.
2403
2404
my @a = parta { length } qw/These are the words of this array/;
2405
2406
Result:
2407
2408
@a = ( undef, undef, ['of'], ['are','the'], ['this'], ['These','words','array'] )
2409
2410
Two undefs at first (index positions 0 and 1) since there are no words of length 0 or 1 in the input array.
2411
2412
=cut
2413
2414
1
100
1
1
12
sub part (&@) { my($c,@r)=(shift,[],[]); push @{ $r[ &$c?0:1 ] }, $_ for @_; @r }
1
2
8
19
1
5
2415
1
1
1
162
sub parth (&@) { my($c,%r)=(shift); push @{ $r{ &$c } }, $_ for @_; %r }
1
2
7
19
1
7
2416
1
1
1
234
sub parta (&@) { my($c,@r)=(shift); push @{ $r[ &$c ] }, $_ for @_; @r }
1
3
7
15
1
4
2417
#sub mapn (&$@) { ... } like map but @_ contains n elems at a time, n=1 is map
2418
2419
=head2 refa
2420
2421
=head2 refh
2422
2423
=head2 refs
2424
2425
=head2 refaa
2426
2427
=head2 refah
2428
2429
=head2 refha
2430
2431
=head2 refhh
2432
2433
Returns true or false (1 or 0) if the argument is an arrayref, hashref, scalarref, ref to an array of arrays, ref to an array of hashes
2434
2435
Examples:
2436
2437
my $ref_to_array = [1,2,3];
2438
my $ref_to_hash = {1,100,2,200,3,300};
2439
my $ref_to_scalar = \"String";
2440
print "arrayref" if ref($ref_to_array) eq 'ARRAY'; #true
2441
print "hashref" if ref($ref_to_hash) eq 'HASH'; #true
2442
print "scalarref" if ref($ref_to_scalar) eq 'SCALAR'; #true
2443
print "arrayref" if refa($ref_to_array); #also true, without: eq 'ARRAY'
2444
print "hashref" if refh($ref_to_hash); #also true, without: eq 'HASH'
2445
print "scalarref" if refs($ref_to_scalar); #also true, without: eq 'SCALAR'
2446
2447
my $ref_to_array_of_arrays = [ [1,2,3], [2,4,8], [10,100,1000] ];
2448
my $ref_to_array_of_hashes = [ {1=>10, 2=>100}, {first=>1, second=>2} ];
2449
my $ref_to_hash_of_arrays = { alice=>[1,2,3], bob=>[2,4,8], eve=>[10,100,1000] };
2450
my $ref_to_hash_of_hashes = { alice=>{a=>22,b=>11}, bob=>{a=>33,b=>66} };
2451
2452
print "aa" if refaa($ref_to_array_of_arrays); #true
2453
print "ah" if refah($ref_to_array_of_hashes); #true
2454
print "ha" if refha($ref_to_hash_of_arrays); #true
2455
print "hh" if refhh($ref_to_hash_of_hashes); #true
2456
2457
=cut
2458
2459
3
0
3
1
24
sub refa { ref($_[0]) eq 'ARRAY' ? 1 : ref($_[0]) ? 0 : undef }
50
2460
3
0
3
1
13
sub refh { ref($_[0]) eq 'HASH' ? 1 : ref($_[0]) ? 0 : undef }
50
2461
1
0
1
1
6
sub refs { ref($_[0]) eq 'SCALAR' ? 1 : ref($_[0]) ? 0 : undef }
50
2462
1
0
1
1
6
sub refaa { ref($_[0]) eq 'ARRAY' ? refa($_[0][0]) : ref($_[0]) ? 0 : undef }
50
2463
1
0
1
1
5
sub refah { ref($_[0]) eq 'ARRAY' ? refh($_[0][0]) : ref($_[0]) ? 0 : undef }
50
2464
1
0
1
1
5
sub refha { ref($_[0]) eq 'HASH' ? refa((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
1
50
4
2465
1
0
1
1
6
sub refhh { ref($_[0]) eq 'HASH' ? refh((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
1
50
4
2466
2467
2468
=head2 pushr
2469
2470
=head2 popr
2471
2472
=head2 shiftr
2473
2474
=head2 unshiftr
2475
2476
=head2 splicer
2477
2478
=head2 keysr
2479
2480
=head2 valuesr
2481
2482
=head2 eachr
2483
2484
In Perl versions 5.12 - 5.22 push, pop, shift, unshift, splice, keys, values and each
2485
handled references to arrays and references to hashes just as if they where arrays and hashes. Examples:
2486
2487
my $person={name=>'Gaga', array=>[1,2,3]};
2488
push $person{array} , 4; #works in perl 5.12-5.22 but not before and after
2489
push @{ $person{array} }, 4; #works in all perl5 versions
2490
pushr $person{array} , 4; #use Acme::Tools and this should work in perl >= 5.8
2491
popr $person{array}; #returns 4
2492
2493
=cut
2494
2495
3
3
1
258
sub pushr { push @{shift()}, @_ } # ? ($@)
3
7
2496
1
1
1
1
sub popr { pop @{shift()} }
1
3
2497
1
1
1
245
sub shiftr { shift @{shift()} }
1
3
2498
2
2
1
485
sub unshiftr { unshift @{shift()}, @_ }
2
7
2499
0
0
sub splicer { @_==1 ? splice( @{shift()} )
2500
1
4
:@_==2 ? splice( @{shift()}, shift() )
2501
2
7
:@_==3 ? splice( @{shift()}, shift(), shift() )
2502
4
50
4
1
1048
:@_>=4 ? splice( @{shift()}, shift(), shift(), @_ ) : die }
1
100
6
100
50
2503
1
1
1
246
sub keysr { keys( %{shift()} ) } #hm sort(keys%{shift()}) ?
1
9
2504
1
1
1
3
sub valuesr { values( %{shift()} ) }
1
4
2505
4
50
4
1
1265
sub eachr { ref($_[0]) eq 'HASH' ? each(%{shift()})
4
13
2506
#:ref($_[0]) eq 'ARRAY' ? each(@{shift()}) # perl 5.8.8 cannot compile each on array! eval?
2507
: croak("eachr needs hashref or arrayref got '".ref($_[0])."'") }
2508
#sub eachr { each(%{shift()}) }
2509
2510
=head1 STATISTICS
2511
2512
=head2 sum
2513
2514
Returns the sum of a list of numbers. Undef is ignored.
2515
2516
print sum(1,3,undef,8); # 12
2517
print sum(1..1000); # 500500
2518
print sum(undef); # undef
2519
2520
=cut
2521
2522
33
66
33
1
199
sub sum { my $sum; no warnings; defined($_) and $sum+=$_ for @_; $sum }
33
60
37
33
2560
60
2852
60
10708
60
464
2523
2524
=head2 avg
2525
2526
Returns the I number of a list of numbers. That is C
2527
2528
print avg( 2, 4, 9); # 5 (2+4+9) / 3 = 5
2529
print avg( [2, 4, 9] ); # 5 pass by reference, same result but faster for large arrays
2530
2531
Also known as I.
2532
2533
Pass by reference: If one argument is given and it is a reference to an array,
2534
this array is taken as the list of numbers. This mode is about twice as fast
2535
for 10000 numbers or more. It most likely also saves memory.
2536
2537
=cut
2538
2539
sub avg {
2540
880
880
1
2181
my($sum,$n,@a)=(0,0);
2541
33
33
359
no warnings;
33
46
33
11800
2542
880
50
1088
if( @_==0 ) { return undef }
0
0
2543
880
100
100
1231
if( @_==1 and ref($_[0]) eq 'ARRAY' ){ @a=grep defined,@{$_[0]} }
1
1
1
5
2544
879
2279
else { @a=grep defined,@_ }
2545
880
100
1047
if( @a==0 ) { return undef }
1
3
2546
879
2966
$sum+=$_ for @a;
2547
879
2149
return $sum/@a
2548
}
2549
2550
=head2 geomavg
2551
2552
Returns the I (a.k.a I) of a list of numbers.
2553
2554
print geomavg(10,100,1000,10000,100000); # 1000
2555
print 0+ (10*100*1000*10000*100000) ** (1/5); # 1000 same thing
2556
print exp(avg(map log($_),10,100,1000,10000,100000)); # 1000 same thing, this is how geomavg() works internally
2557
2558
=cut
2559
2560
771
771
1
1680
sub geomavg { exp(avg(map log($_), @_)) }
2561
2562
=head2 harmonicavg
2563
2564
Returns the I (a.k.a I) of a list of numbers. L
2565
2566
print harmonicavg(10,11,12); # 3 / ( 1/10 + 1/11 + 1/12) = 10.939226519337
2567
2568
=cut
2569
2570
0
0
1
0
sub harmonicavg { my $s; $s+=1/$_ for @_; @_/$s }
0
0
0
0
2571
2572
=head2 variance
2573
2574
C<< variance = ( sum (x[i]-Average)**2)/(n-1) >>
2575
2576
=cut
2577
2578
sub variance {
2579
0
0
1
0
my $sumx2; $sumx2+=$_*$_ for @_;
0
0
2580
0
0
my $sumx; $sumx+=$_ for @_;
0
0
2581
0
0
(@_*$sumx2-$sumx*$sumx)/(@_*(@_-1));
2582
}
2583
2584
=head2 stddev
2585
2586
C<< Standard_Deviation = sqrt(variance) >>
2587
2588
Standard deviation (stddev) is a measurement of the width of a normal
2589
distribution where one stddev on each side of the mean covers 68% and
2590
two stddevs 95%. Normal distributions are sometimes called Gauss curves
2591
or Bell shapes. L
2592
2593
stddev(4,5,6,5,6,4,3,5,5,6,7,6,5,7,5,6,4) # = 1.0914103126635
2594
avg(@testscores) + stddev(@testscores) # = the score for IQ = 115 (by one definition)
2595
avg(@testscores) - stddev(@testscores) # = the score for IQ = 85
2596
2597
=cut
2598
2599
sub stddev {
2600
14
100
14
1
378
return undef if @_==0;
2601
13
100
66
48
return stddev(\@_) if @_>0 and !ref($_[0]);
2602
8
9
my $ar=shift;
2603
8
50
10
return undef if @$ar==0;
2604
8
100
12
return 0 if @$ar==1;
2605
6
6
my $sumx2; $sumx2 += $_*$_ for @$ar;
6
52
2606
6
5
my $sumx; $sumx += $_ for @$ar;
6
41
2607
6
22
sqrt( (@$ar*$sumx2-$sumx*$sumx)/(@$ar*(@$ar-1)) );
2608
}
2609
2610
=head2 rstddev
2611
2612
Relative stddev = stddev / avg
2613
2614
=cut
2615
2616
0
0
1
0
sub rstddev { stddev(@_) / avg(@_) }
2617
2618
=head2 median
2619
2620
Returns the median value of a list of numbers. The list do not have to
2621
be sorted.
2622
2623
Example 1, list having an odd number of numbers:
2624
2625
print median(1, 100, 101); # 100
2626
2627
100 is the middlemost number after sorting.
2628
2629
Example 2, an even number of numbers:
2630
2631
print median(1005, 100, 101, 99); # 100.5
2632
2633
100.5 is the average of the two middlemost numbers.
2634
2635
=cut
2636
2637
sub median {
2638
33
33
143
no warnings;
33
50
33
11562
2639
6
6
1
217
my @list = sort {$a<=>$b} @_;
10035
5617
2640
6
7
my $n=@list;
2641
6
100
164
$n%2 ? $list[($n-1)/2]
2642
: ($list[$n/2-1] + $list[$n/2])/2;
2643
}
2644
2645
2646
=head2 percentile
2647
2648
Returns one or more percentiles of a list of numbers.
2649
2650
Percentile 50 is the same as the I, percentile 25 is the first
2651
quartile, 75 is the third quartile.
2652
2653
B
2654
2655
First argument is your wanted percentile, or a refrence to a list of percentiles you want from the dataset.
2656
2657
If the first argument to percentile() is a scalar, this percentile is returned.
2658
2659
If the first argument is a reference to an array, then all those percentiles are returned as an array.
2660
2661
Second, third, fourth and so on argument are the numbers from which you want to find the percentile(s).
2662
2663
B
2664
2665
This finds the 50-percentile (the median) to the four numbers 1, 2, 3 and 4:
2666
2667
print "Median = " . percentile(50, 1,2,3,4); # 2.5
2668
2669
This:
2670
2671
@data=(11, 5, 3, 5, 7, 3, 1, 17, 4, 2, 6, 4, 12, 9, 0, 5);
2672
@p = map percentile($_,@data), (25, 50, 75);
2673
2674
Is the same as this:
2675
2676
@p = percentile([25, 50, 75], @data);
2677
2678
But the latter is faster, especially if @data is large since it sorts
2679
the numbers only once internally.
2680
2681
B
2682
2683
Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992
2684
2685
Average (or mean) is 143
2686
2687
Median is 15.5 (which is the average of 9 and 22 who both equally lays in the middle)
2688
2689
The 25-percentile is 6.25 which are between 6 and 7, but closer to 6.
2690
2691
The 75-percentile is 46.5, which are between 39 and 49 but close to 49.
2692
2693
Linear interpolation is used to find the 25- and 75-percentile and any
2694
other x-percentile which doesn't fall exactly on one of the numbers in
2695
the set.
2696
2697
B
2698
2699
As you saw, 6.25 are closer to 6 than to 7 because 25% along the set of
2700
the twelve numbers is closer to the third number (6) than to he fourth
2701
(7). The median (50-percentile) is also really interpolated, but it is
2702
always in the middle of the two center numbers if there are an even count
2703
of numbers.
2704
2705
However, there is two methods of interpolation:
2706
2707
Example, we have only three numbers: 5, 6 and 7.
2708
2709
Method 1: The most common is to say that 5 and 7 lays on the 25- and
2710
75-percentile. This method is used in Acme::Tools.
2711
2712
Method 2: In Oracle databases the least and greatest numbers
2713
always lay on the 0- and 100-percentile.
2714
2715
As an argument on why Oracles (and others?) definition is not the best way is to
2716
look at your data as for instance temperature measurements. If you
2717
place the highest temperature on the 100-percentile you are sort of
2718
saying that there can never be a higher temperatures in future measurements.
2719
2720
A quick non-exhaustive Google survey suggests that method 1 here is most used.
2721
2722
The larger the data sets, the less difference there is between the two methods.
2723
2724
B
2725
2726
In method one, when you want a percentile outside of any possible
2727
interpolation, you use the smallest and second smallest to extrapolate
2728
from. For instance in the data set C<5, 6, 7>, if you want an
2729
x-percentile of x < 25, this is below 5.
2730
2731
If you feel tempted to go below 0 or above 100, C will
2732
I (or I to be more precise)
2733
2734
Another method could be to use "soft curves" instead of "straight
2735
lines" in interpolation. Maybe B-splines or Bezier curves. This is not
2736
used here.
2737
2738
For large sets of data Hoares algorithm would be faster than the
2739
simple straightforward implementation used in C
2740
here. Hoares don't sort all the numbers fully.
2741
2742
B
2743
2744
Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992
2745
2746
Percentile Method 1 Method 2
2747
(Acme::Tools::percentile (Oracle)
2748
and others)
2749
------------- ----------------------------- ---------
2750
0 -2 1
2751
1 -1.61 1.33
2752
25 6.25 6.75
2753
50 (median) 15.5 15.5
2754
75 46.5 41.5
2755
99 1372.19 943.93
2756
100 1429 992
2757
2758
Found like this:
2759
2760
perl -MAcme::Tools -le 'print for percentile([0,1,25,50,75,99,100], 1,4,6,7,8,9,22,24,39,49,555,992)'
2761
2762
And like this in Oracle-databases:
2763
2764
select
2765
percentile_cont(0.00) within group(order by n) per0,
2766
percentile_cont(0.01) within group(order by n) per1,
2767
percentile_cont(0.25) within group(order by n) per25,
2768
percentile_cont(0.50) within group(order by n) per50,
2769
percentile_cont(0.75) within group(order by n) per75,
2770
percentile_cont(0.99) within group(order by n) per99,
2771
percentile_cont(1.00) within group(order by n) per100
2772
from (
2773
select 0+regexp_substr('1,4,6,7,8,9,22,24,39,49,555,992','[^,]+',1,i) n
2774
from dual,(select level i from dual connect by level <= 12)
2775
);
2776
2777
(Oracle also provides a similar function: C where I
2778
is short for I, meaning no interpolation is taking
2779
place. Instead the closest number from the data set is picked.)
2780
2781
=cut
2782
2783
sub percentile {
2784
3
3
1
3
my(@p,@t,@ret);
2785
3
100
12
if(ref($_[0]) eq 'ARRAY'){ @p=@{shift()} }
1
50
9
1
3
2786
2
4
elsif(not ref($_[0])) { @p=(shift()) }
2787
0
0
else{croak()}
2788
3
5
@t=@_;
2789
3
50
6
return if !@p;
2790
3
50
6
croak if !@t;
2791
3
8
@t=sort{$a<=>$b}@t;
72
46
2792
3
50
8
push@t,$t[0] if @t==1;
2793
3
5
for(@p){
2794
9
50
33
34
croak if $_<0 or $_>100;
2795
9
13
my $i=(@t+1)*$_/100-1;
2796
9
50
49
push@ret,
100
100
2797
$i<0 ? $t[0]+($t[1]-$t[0])*$i:
2798
$i>$#t ? $t[-1]+($t[-1]-$t[-2])*($i-$#t):
2799
$i==int($i)? $t[$i]:
2800
$t[$i]*(int($i+1)-$i) + $t[$i+1]*($i-int($i));
2801
}
2802
3
100
42
return @p==1 ? $ret[0] : @ret;
2803
}
2804
2805
=head1 RANDOM
2806
2807
=head2 random
2808
2809
B One or two arguments.
2810
2811
B
2812
2813
If two integer arguments: returns a random integer between the integers in argument one and two.
2814
2815
If the first argument is an arrayref: returns a random member of that array without changing the array.
2816
2817
If the first argument is an arrayref and there is a second arg: return that many random members of that array
2818
2819
If the first argument is an hashref and there is no second arg: return a random key weighted by the values of that hash
2820
2821
If the first argument is an hashref and there is a second arg: return that many random keys weighted by the values of that hash
2822
2823
If there is no second argument and the first is an integer, a random integer between 0 and that number is returned. Including 0 and the number itself.
2824
2825
B
2826
2827
$dice=random(1,6); # 1, 2, 3, 4, 5 or 6
2828
$dice=random([1..6]); # same as previous
2829
@dice=random([1..6],10); # 10 dice tosses
2830
$dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2}); # weighted dice with 6 being twice as likely as the others
2831
@dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2},10); # 10 weighted dice tosses
2832
print random({head=>0.4999,tail=>0.4999,edge=>0.0002}); # coin toss (sum 1 here but not required to be)
2833
print random(2); # prints 0, 1 or 2
2834
print 2**random(7); # prints 1, 2, 4, 8, 16, 32, 64 or 128
2835
@dice=map random([1..6]), 1..10; # as third example above, but much slower
2836
perl -MAcme::Tools -le 'print for random({head=>0.499,tail=>0.499,edge=>0.002},10000);' | sort | uniq -c
2837
2838
=cut
2839
2840
sub random {
2841
21002
21002
1
29129
my($from,$to)=@_;
2842
21002
14989
my $ref=ref($from);
2843
21002
100
21822
if($ref eq 'ARRAY'){
100
2844
15001
100
41210
my @r=map $$from[rand@$from], 1..$to||1;
2845
15001
100
38073
return @_>1?@r:$r[0];
2846
}
2847
elsif($ref eq 'HASH') {
2848
4001
7162
my @k=keys%$from;
2849
33
66
33
142
my $max;do{no warnings 'uninitialized';$_>$max and $max=$_ or $_<0 and croak"negative weight" for values%$from};
33
33
39
33
66
25052
4001
2952
4001
2168
4001
34369
2850
4001
100
8624
my @r=map {my$r;1 while $$from{$r=$k[rand@k]}
4010
2219
4010
9691
4010
5493
2851
4001
100
9941
return @_>1?@r:$r[0];
2852
}
2853
2000
100
2323
($from,$to)=(0,$from) if @_==1;
2854
2000
50
2066
($from,$to)=($to,$from) if $from>$to;
2855
2000
2860
return int($from+rand(1+$to-$from));
2856
}
2857
#todo?: https://en.wikipedia.org/wiki/Irwin%E2%80%93Hall_distribution
2858
2859
=head2 random_gauss
2860
2861
Returns an pseudo-random number with a Gaussian distribution instead
2862
of the uniform distribution of perls C or C in this
2863
module. The algorithm is a variation of the one at
2864
L which is both faster
2865
and better than adding a long series of C.
2866
2867
Uses perls C function internally.
2868
2869
B 0 - 3 arguments.
2870
2871
First argument: the average of the distribution. Default 0.
2872
2873
Second argument: the standard deviation of the distribution. Default 1.
2874
2875
Third argument: If a third argument is present, C
2876
returns an array of that many pseudo-random numbers. If there is no
2877
third argument, a number (a scalar) is returned.
2878
2879
B One or more pseudo-random numbers with a Gaussian distribution. Also known as a Bell curve or Normal distribution.
2880
2881
Example:
2882
2883
my @I=random_gauss(100, 15, 100000); # produces 100000 pseudo-random numbers, average=100, stddev=15
2884
#my @I=map random_gauss(100, 15), 1..100000; # same but more than three times slower
2885
print "Average is: ".avg(@I)."\n"; # prints a number close to 100
2886
print "Stddev is: ".stddev(@I)."\n"; # prints a number close to 15
2887
2888
my @M=grep $_>100+15*2, @I; # those above 130
2889
print "Percent above two stddevs: ".(100*@M/@I)."%\n"; #prints a number close to 2.2%
2890
2891
Example 2:
2892
2893
my $num=1e6;
2894
my @h; $h[$_/2]++ for random_gauss(100,15, $num);
2895
$h[$_] and printf "%3d - %3d %6d %s\n",
2896
$_*2,$_*2+1,$h[$_],'=' x ($h[$_]*1000/$num)
2897
for 1..200/2;
2898
2899
...prints an example of the famous Bell curve:
2900
2901
44 - 45 70
2902
46 - 47 114
2903
48 - 49 168
2904
50 - 51 250
2905
52 - 53 395
2906
54 - 55 588
2907
56 - 57 871
2908
58 - 59 1238 =
2909
60 - 61 1807 =
2910
62 - 63 2553 ==
2911
64 - 65 3528 ===
2912
66 - 67 4797 ====
2913
68 - 69 6490 ======
2914
70 - 71 8202 ========
2915
72 - 73 10577 ==========
2916
74 - 75 13319 =============
2917
76 - 77 16283 ================
2918
78 - 79 20076 ====================
2919
80 - 81 23742 =======================
2920
82 - 83 27726 ===========================
2921
84 - 85 32205 ================================
2922
86 - 87 36577 ====================================
2923
88 - 89 40684 ========================================
2924
90 - 91 44515 ============================================
2925
92 - 93 47575 ===============================================
2926
94 - 95 50098 ==================================================
2927
96 - 97 52062 ====================================================
2928
98 - 99 53338 =====================================================
2929
100 - 101 52834 ====================================================
2930
102 - 103 52185 ====================================================
2931
104 - 105 50472 ==================================================
2932
106 - 107 47551 ===============================================
2933
108 - 109 44471 ============================================
2934
110 - 111 40704 ========================================
2935
112 - 113 36642 ====================================
2936
114 - 115 32171 ================================
2937
116 - 117 28166 ============================
2938
118 - 119 23618 =======================
2939
120 - 121 19873 ===================
2940
122 - 123 16360 ================
2941
124 - 125 13452 =============
2942
126 - 127 10575 ==========
2943
128 - 129 8283 ========
2944
130 - 131 6224 ======
2945
132 - 133 4661 ====
2946
134 - 135 3527 ===
2947
136 - 137 2516 ==
2948
138 - 139 1833 =
2949
140 - 141 1327 =
2950
142 - 143 860
2951
144 - 145 604
2952
146 - 147 428
2953
148 - 149 275
2954
150 - 151 184
2955
152 - 153 111
2956
154 - 155 67
2957
2958
=cut
2959
2960
sub random_gauss {
2961
1
1
1
2
my($avg,$stddev,$num)=@_;
2962
1
50
3
$avg=0 if !defined $avg;
2963
1
50
3
$stddev=1 if !defined $stddev;
2964
1
50
3
$num=1 if !defined $num;
2965
1
50
4
croak "random_gauss should not have more than 3 arguments" if @_>3;
2966
1
2
my @r;
2967
1
4
while (@r<$num) {
2968
2500
1384
my($x1,$x2,$w);
2969
2500
1359
do {
2970
3196
2058
$x1=2.0*rand()-1.0;
2971
3196
1739
$x2=2.0*rand()-1.0;
2972
3196
3830
$w=$x1*$x1+$x2*$x2;
2973
} while $w>=1.0;
2974
2500
1868
$w=sqrt(-2.0*log($w)/$w) * $stddev;
2975
2500
3726
push @r, $x1*$w + $avg,
2976
$x2*$w + $avg;
2977
}
2978
1
50
4
pop @r if @r > $num;
2979
1
50
6
return $r[0] if @_<3;
2980
1
503
return @r;
2981
}
2982
2983
=head2 mix
2984
2985
Mixes an array in random order. In-place if given an array reference or not if given an array.
2986
2987
C could also have been named C, as in shuffling a deck of cards.
2988
2989
Example:
2990
2991
This:
2992
2993
print mix("a".."z"),"\n" for 1..3;
2994
2995
...could write something like:
2996
2997
trgoykzfqsduphlbcmxejivnwa
2998
qycatilmpgxbhrdezfwsovujkn
2999
ytogrjialbewcpvndhkxfzqsmu
3000
3001
B
3002
3003
=over 4
3004
3005
=item 1.
3006
Either a reference to an array as the only input. This array will then be mixed I. The array will be changed:
3007
3008
This: C<< @a=mix(@a) >> is the same as: C<< mix(\@a) >>.
3009
3010
=item 2.
3011
Or an array of zero, one or more elements.
3012
3013
=back
3014
3015
Note that an input-array which COINCIDENTLY SOME TIMES has one element
3016
(but more other times), and that element is an array-ref, you will
3017
probably not get the expected result.
3018
3019
To check distribution:
3020
3021
perl -MAcme::Tools -le 'print mix("a".."z") for 1..26000'|cut -c1|sort|uniq -c|sort -n
3022
3023
The letters a-z should occur around 1000 times each.
3024
3025
Shuffles a deck of cards: (s=spaces, h=hearts, c=clubs, d=diamonds)
3026
3027
perl -MAcme::Tools -le '@cards=map join("",@$_),cart([qw/s h c d/],[2..10,qw/J Q K A/]); print join " ",mix(@cards)'
3028
3029
(Uses L, which is not a typo, see further down here)
3030
3031
Note: C is approximately four times faster. Both respects the Perl built-in C.
3032
3033
=cut
3034
3035
sub mix {
3036
1020
50
33
1020
1
2860
if(@_==1 and ref($_[0]) eq 'ARRAY'){ #kun ett arg, og det er ref array
3037
0
0
my $r=$_[0];
3038
0
0
push@$r,splice(@$r,rand(@$r-$_),1) for 0..(@$r-1);
3039
0
0
return $r;
3040
}
3041
else{
3042
1020
1085
my@e=@_;
3043
1020
7763
push@e,splice(@e,rand(@e-$_),1) for 0..$#e;
3044
1020
2717
return @e;
3045
}
3046
}
3047
3048
=head2 pwgen
3049
3050
Generates random passwords.
3051
3052
B 0-n args
3053
3054
* First arg: length of password(s), default 8
3055
3056
* Second arg: number of passwords, default 1
3057
3058
* Third arg: string containing legal chars in password, default A-Za-z0-9,-./&%_!
3059
3060
* Fourth to n'th arg: list of requirements for passwords, default if the third arg is false/undef (so default third arg is used) is:
3061
3062
sub{/^[a-zA-Z0-9].*[a-zA-Z0-9]$/ and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
3063
3064
...meaning the password should:
3065
* start and end with: a letter a-z (lower- or uppercase) or a digit 0-9
3066
* should contain at least one char from each of the groups lower, upper, digit and special char
3067
3068
To keep the default requirement-sub but add additional ones just set the fourth arg to false/undef
3069
and add your own requirements in the fifth arg and forward (examples below). Sub pwgen uses perls
3070
own C internally.
3071
3072
C<< $Acme::Tools::Pwgen_max_sec >> and C<< $Acme::Tools::Pwgen_max_trials >> can be set to adjust for how long
3073
pwgen tries to find a password. Defaults for those are 0.01 and 10000.
3074
Whenever one of the two limits is reached, a first generates a croak.
3075
3076
Examples:
3077
3078
my $pw=pwgen(); # a random 8 chars password A-Z a-z 0-9 ,-./&%!_ (8 is default length)
3079
my $pw=pwgen(12); # a random 12 chars password A-Z a-z 0-9 ,-./&%!_
3080
my @pw=pwgen(0,10); # 10 random 8 chars passwords, containing the same possible chars
3081
my @pw=pwgen(0,1000,'A-Z'); # 1000 random 8 chars passwords containing just uppercase letters from A to Z
3082
3083
pwgen(3); # dies, defaults require chars in each of 4 group (see above)
3084
pwgen(5,1,'A-C0-9', qr/^\D{3}\d{2}$/); # a 5 char string starting with three A, B or Cs and endring with two digits
3085
pwgen(5,1,'ABC0-9',sub{/^\D{3}\d{2}$/}); # same as above
3086
3087
Examples of adding additional requirements to the default ones:
3088
3089
my @pwreq = ( qr/^[A-C]/ );
3090
pwgen(8,1,'','',@pwreq); # use defaults for allowed chars and the standard requirements
3091
# but also demand that the password must start with A, B or C
3092
3093
push @pwreq, sub{ not /[a-z]{3}/i };
3094
pwgen(8,1,'','',@pwreq); # as above and in addition the password should not contain three
3095
# or more consecutive letters (to avoid "offensive" words perhaps)
3096
3097
=cut
3098
3099
our $Pwgen_max_sec=0.01; #max seconds/password before croak (for hard to find requirements)
3100
our $Pwgen_max_trials=10000; #max trials/password before croak (for hard to find requirements)
3101
our $Pwgen_sec=0; #seconds used in last call to pwgen()
3102
our $Pwgen_trials=0; #trials in last call to pwgen()
3103
3218
100
100
3218
0
23102
sub pwgendefreq{/^[a-z].*[a-z\d]$/i and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
100
100
3104
sub pwgen {
3105
60
60
1
3181
my($len,$num,$chars,@req)=@_;
3106
60
100
95
$len||=8;
3107
60
100
97
$num||=1;
3108
60
100
89
$chars||='A-Za-z0-9,-./&%_!';
3109
60
100
50
95
$req[0]||=\&pwgendefreq if !$_[2];
3110
60
1202
$chars=~s/([$_])-([$_])/join("","$1".."$2")/eg for ('a-z','A-Z','0-9');
125
1412
3111
60
87
my($c,$t,@pw,$d)=(length($chars),time_fp());
3112
60
55
($Pwgen_trials,$Pwgen_sec)=(0,0);
3113
TRIAL:
3114
60
89
while(@pw<$num){
3115
12286
100
66
33180
croak "pwgen timeout after $Pwgen_trials trials"
66
3116
if ++$Pwgen_trials >= $Pwgen_max_trials
3117
or ($d=time_fp()-$t) > $Pwgen_max_sec*$num
3118
and $d!~/^\d+$/; #jic int from time_fp
3119
12284
49598
my $pw=join"",map substr($chars,rand($c),1),1..$len;
3120
12284
15611
for my $r (@req){
3121
18216
100
25160
if (ref($r) eq 'CODE' ){ local$_=$pw; &$r() or next TRIAL }
7178
100
5326
7178
50
7033
3122
33
100
33
178
elsif(ref($r) eq 'Regexp'){ no warnings; $pw=~$r or next TRIAL }
33
48
33
5974
11038
33505
3123
0
0
else { croak "pwgen: invalid req type $r ".ref($r) }
3124
}
3125
651
1506
push@pw,$pw;
3126
}
3127
58
63
$Pwgen_sec=time_fp()-$t;
3128
58
100
284
return $pw[0] if $num==1;
3129
7
278
return @pw;
3130
}
3131
3132
# =head1 veci
3133
#
3134
# Perls C takes 1, 2, 4, 8, 16, 32 and possibly 64 as its third argument.
3135
#
3136
# This limitation is removed with C (vec improved, but much slower)
3137
#
3138
# The third argument still needs to be 32 or lower (or possibly 64 or lower).
3139
#
3140
# =cut
3141
#
3142
# sub vecibs ($) {
3143
# my($s,$o,$b,$new)=@_;
3144
# if($b=~/^(1|2|4|8|16|32|64)$/){
3145
# return vec($s,$o,$b)=$new if @_==4;
3146
# return vec($s,$o,$b);
3147
# }
3148
# my $bb=$b<4?4:$b<8?8:$b<16?16:$b<32?32:$b<64?64:die;
3149
# my $ob=int($o*$b/$bb);
3150
# my $v=vec($s,$ob,$bb)*2**$bb+vec($s,$ob+1,$bb);
3151
# $v & (2**$b-1)
3152
# }
3153
3154
3155
=head1 SETS
3156
3157
=head2 distinct
3158
3159
Returns the values of the input list, sorted alfanumerically, but only
3160
one of each value. This is the same as L except uniq does not
3161
sort the returned list.
3162
3163
Example:
3164
3165
print join(", ", distinct(4,9,3,4,"abc",3,"abc")); # 3, 4, 9, abc
3166
print join(", ", distinct(4,9,30,4,"abc",30,"abc")); # 30, 4, 9, abc note: alphanumeric sort
3167
3168
=cut
3169
3170
1
1
1
165
sub distinct { return sort keys %{{map {($_,1)} @_}} }
1
3
7
18
3171
3172
=head2 in
3173
3174
Returns I<1> (true) if first argument is in the list of the remaining arguments. Uses the perl-operator C<< eq >>.
3175
3176
Otherwise it returns I<0> (false).
3177
3178
print in( 5, 1,2,3,4,6); # 0
3179
print in( 4, 1,2,3,4,6); # 1
3180
print in( 'a', 'A','B','C','aa'); # 0
3181
print in( 'a', 'A','B','C','a'); # 1
3182
3183
I guess in perl 5.10 or perl 6 you could use the C<< ~~ >> operator instead.
3184
3185
=head2 in_num
3186
3187
Just as sub L, but for numbers. Internally uses the perl operator C<< == >> instead of C< eq >.
3188
3189
print in(5000, '5e3'); # 0
3190
print in(5000, 5e3); # 1 since 5e3 is converted to 5000 before the call
3191
print in_num(5000, 5e3); # 1
3192
print in_num(5000, '+5.0e03'); # 1
3193
3194
=cut
3195
3196
sub in {
3197
33
33
142
no warnings 'uninitialized';
33
50
33
2294
3198
95
95
1
101
my $val=shift;
3199
95
100
94
for(@_){ return 1 if $_ eq $val }
294
534
3200
6
19
return 0;
3201
}
3202
3203
sub in_num {
3204
33
33
138
no warnings 'uninitialized';
33
44
33
60742
3205
1
1
1
1
my $val=shift;
3206
1
100
3
for(@_){ return 1 if $_ == $val }
5000
5299
3207
0
0
return 0;
3208
}
3209
3210
=head2 union
3211
3212
Input: Two arrayrefs. (Two lists, that is)
3213
3214
Output: An array containing all elements from both input lists, but no element more than once even if it occurs twice or more in the input.
3215
3216
Example, prints 1,2,3,4:
3217
3218
perl -MAcme::Tools -le 'print join ",", union([1,2,3],[2,3,3,4,4])' # 1,2,3,4
3219
3220
=cut
3221
3222
1
1
1
1
sub union { my %seen; grep !$seen{$_}++, map @{shift()},@_ }
1
3
2
12
3223
=head2 minus
3224
3225
Input: Two arrayrefs.
3226
3227
Output: An array containing all elements in the first input array but not in the second.
3228
3229
Example:
3230
3231
perl -MAcme::Tools -le 'print join " ", minus( ["five", "FIVE", 1, 2, 3.0, 4], [4, 3, "FIVE"] )'
3232
3233
Output is C<< five 1 2 >>.
3234
3235
=cut
3236
3237
sub minus {
3238
1
1
0
1
my %seen;
3239
1
2
my %notme=map{($_=>1)}@{$_[1]};
3
6
1
2
3240
1
66
1
grep !$notme{$_}&&!$seen{$_}++, @{$_[0]};
1
25
3241
}
3242
3243
=head2 intersect
3244
3245
Input: Two arrayrefs
3246
3247
Output: An array containing all elements which exists in both input arrays.
3248
3249
Example:
3250
3251
perl -MAcme::Tools -le 'print join" ", intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )' # 4 3 five
3252
3253
Output: C<< 4 3 five >>
3254
3255
=cut
3256
3257
sub intersect {
3258
44
44
1
31
my %first=map{($_=>1)}@{$_[0]};
52
89
44
46
3259
44
34
my %seen;
3260
44
100
36
return grep{$first{$_}&&!$seen{$_}++}@{$_[1]};
50
193
44
37
3261
}
3262
3263
=head2 not_intersect
3264
3265
Input: Two arrayrefs
3266
3267
Output: An array containing all elements member of just one of the input arrays (not both).
3268
3269
Example:
3270
3271
perl -MAcme::Tools -le ' print join " ", not_intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )'
3272
3273
The output is C<< 1 2 >>.
3274
3275
=cut
3276
3277
sub not_intersect {
3278
1
1
1
1
my %code;
3279
my %seen;
3280
1
1
for(@{$_[0]}){$code{$_}|=1}
1
3
5
11
3281
1
1
for(@{$_[1]}){$code{$_}|=2}
1
3
3
3
3282
1
100
2
return grep{$code{$_}!=3&&!$seen{$_}++}(@{$_[0]},@{$_[1]});
8
22
1
2
1
2
3283
}
3284
3285
=head2 uniq
3286
3287
Input: An array of strings (or numbers)
3288
3289
Output: The same array in the same order, except elements which exists earlier in the list.
3290
3291
Same as L but distinct sorts the returned list, I does not.
3292
3293
Example:
3294
3295
my @t=(7,2,3,3,4,2,1,4,5,3,"x","xx","x",02,"07");
3296
print join " ", uniq @t; # prints 7 2 3 4 1 5 x xx 07
3297
3298
=cut
3299
3300
1
1
1
2
sub uniq(@) { my %seen; grep !$seen{$_}++, @_ }
1
17
3301
3302
=head1 HASHES
3303
3304
=head2 subhash
3305
3306
Copies a subset of keys/values from one hash to another.
3307
3308
B First argument is a reference to a hash. The rest of the arguments are a list of the keys of which key/value-pair you want to be copied.
3309
3310
B The hash consisting of the keys and values you specified.
3311
3312
Example:
3313
3314
%population = ( Norway=>5000000, Sweden=>9500000, Finland=>5400000,
3315
Denmark=>5600000, Iceland=>320000,
3316
India => 1.21e9, China=>1.35e9, USA=>313e6, UK=>62e6 );
3317
3318
%scandinavia = subhash( \%population , 'Norway', 'Sweden', 'Denmark' ); # this and
3319
%scandinavia = (Norway=>5000000,Sweden=>9500000,Denmark=>5600000); # this is the same
3320
3321
print "Population of $_ is $scandinavia{$_}\n" for keys %scandinavia;
3322
3323
...prints the populations of the three scandinavian countries.
3324
3325
Note: The values are NOT deep copied when they are references. (Use C<< Storable::dclone() >> to do that).
3326
3327
Note2: For perl version 5.20+ subhashes (hash slices returning keys as well as values) is built in like this:
3328
3329
%scandinavia = %population{'Norway','Sweden','Denmark'};
3330
3331
=cut
3332
3333
sub subhash {
3334
1
1
1
1
my $hr=shift;
3335
1
2
my @r;
3336
1
2
for(@_){ push@r,($_=>$$hr{$_}) }
3
9
3337
1
9
return @r;
3338
}
3339
3340
=head2 hashtrans
3341
3342
B a reference to a hash of hashes
3343
3344
B a hash like the input-hash, but matrix transposed (kind of). Think of it as if X and Y has swapped places.
3345
3346
%h = ( 1 => {a=>33,b=>55},
3347
2 => {a=>11,b=>22},
3348
3 => {a=>88,b=>99} );
3349
print serialize({hashtrans(\%h)},'v');
3350
3351
Gives:
3352
3353
%v=( 'a'=>{'1'=>'33','2'=>'11','3'=>'88'},
3354
'b'=>{'1'=>'55','2'=>'22','3'=>'99'} );
3355
3356
=cut
3357
3358
#Hashtrans brukes automatisk når første argument er -1 i sub hashtabell()
3359
3360
sub hashtrans {
3361
1
1
1
2
my $h=shift;
3362
1
1
my %new;
3363
1
4
for my $k (keys%$h){
3364
3
4
my $r=$$h{$k};
3365
3
4
for(keys%$r){
3366
6
8
$new{$_}{$k}=$$r{$_};
3367
}
3368
}
3369
1
11
return %new;
3370
}
3371
3372
=head2 a2h
3373
3374
B array of arrays
3375
3376
B array of hashes
3377
3378
Transforms an array of arrays (arrayrefs) to an array of hashes (hashrefs).
3379
3380
Example:
3381
3382
my @h = a2h( ['Name', 'Age', 'Gender'], #1st row become keys
3383
['Alice', 20, 'F'],
3384
['Bob', 30, 'M'],
3385
['Eve', undef, 'F'] );
3386
3387
Result array @h:
3388
3389
(
3390
{Name=>'Alice', Age=>20, Gender=>'F'},
3391
{Name=>'Bob', Age=>30, Gender=>'M'},
3392
{Name=>'Eve', Age=>undef, Gender=>'F'},
3393
);
3394
3395
=head2 h2a
3396
3397
B array of hashes
3398
3399
B array of arrays
3400
3401
Opposite of L
3402
3403
=cut
3404
3405
sub a2h {
3406
1
1
1
76
my @col=@{shift@_};
1
5
3407
1
3
map { my%h;@h{@col}=@$_;\%h} @_;
10
8
10
32
10
15
3408
}
3409
3410
sub h2a {
3411
1
1
1
5
my %c;
3412
1
18
map $c{$_}++, keys%$_ for @_;
3413
1
0
8
my @c=sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c;
4
11
3414
1
12
(\@c,map[@$_{@c}],@_);
3415
}
3416
3417
=head1 COMPRESSION
3418
3419
L, L, L, L, L, and L
3420
compresses and uncompresses strings to save space in disk, memory,
3421
database or network transfer. Trades time for space. (Beware of wormholes)
3422
3423
=head2 zipb64
3424
3425
Compresses the input (text or binary) and returns a base64-encoded string of the compressed binary data.
3426
No known limit on input length, several MB has been tested, as long as you've got the RAM...
3427
3428
B One or two strings.
3429
3430
First argument: The string to be compressed.
3431
3432
Second argument is optional: A I string.
3433
3434
B a base64-kodet string of the compressed input.
3435
3436
The use of an optional I string will result in an even
3437
further compressed output in the dictionary string is somewhat similar
3438
to the string that is compressed (the data in the first argument).
3439
3440
If x relatively similar string are to be compressed, i.e. x number
3441
automatic of email responses to some action by a user, it will pay of
3442
to choose one of those x as a dictionary string and store it as
3443
such. (You will also use the same dictionary string when decompressing
3444
using L.
3445
3446
The returned string is base64 encoded. That is, the output is 33%
3447
larger than it has to be. The advantage is that this string more
3448
easily can be stored in a database (without the hassles of CLOB/BLOB)
3449
or perhaps easier transfer in http POST requests (it still needs some
3450
url-encoding, normally). See L and L for the
3451
same without base 64 encoding.
3452
3453
Example 1, normal compression without dictionary:
3454
3455
$txt = "Test av komprimering, hva skjer? " x 10; # ten copies of this norwegian string, $txt is now 330 bytes (or chars rather...)
3456
print length($txt)," bytes input!\n"; # prints 330
3457
$zip = zipb64($txt); # compresses
3458
print length($zip)," bytes output!\n"; # prints 65
3459
print $zip; # prints the base64 string ("noise")
3460
3461
$output=unzipb64($zip); # decompresses
3462
print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
3463
print length($output),"\n"; # prints 330
3464
3465
Example 2, same compression, now with dictionary:
3466
3467
$txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
3468
$dict = "Testing av kompresjon, hva vil skje?"; # dictionary with certain similarities
3469
# of the text to be compressed
3470
$zip2 = zipb64($txt,$dict); # compressing with $dict as dictionary
3471
print length($zip2)," bytes output!\n"; # prints 49, which is less than 65 in ex. 1 above
3472
$output=unzipb64($zip2,$dict); # uses $dict in the decompressions too
3473
print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
3474
3475
3476
Example 3, dictionary = string to be compressed: (out of curiosity)
3477
3478
$txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
3479
$zip3 = zipb64($txt,$txt); # hmm
3480
print length($zip3)," bytes output!\n"; # prints 25
3481
print "Hurra\n" if unzipb64($zip3,$txt) eq $txt; # hipp hipp ...
3482
3483
zipb64() and zipbin() is really just wrappers around L and C & co there.
3484
3485
=cut
3486
3487
sub zipb64 {
3488
9
9
1
631
require MIME::Base64;
3489
9
593
return MIME::Base64::encode_base64(zipbin(@_));
3490
}
3491
3492
3493
=head2 zipbin
3494
3495
C does the same as C except that zipbin()
3496
does not base64 encode the result. Returns binary data.
3497
3498
See L for documentation.
3499
3500
=cut
3501
3502
sub zipbin {
3503
17
17
1
12426
require Compress::Zlib;
3504
17
23
my($data,$dict)=@_;
3505
17
50
100
99
my $x=Compress::Zlib::deflateInit(-Dictionary=>$dict||'',-Level=>Compress::Zlib::Z_BEST_COMPRESSION()) or croak();
3506
17
50
3561
my($output,$status)=$x->deflate($data); croak() if $status!=Compress::Zlib::Z_OK();
17
48061
3507
17
50
70
my($out,$status2)=$x->flush(); croak() if $status2!=Compress::Zlib::Z_OK();
17
1723
3508
17
415
return $output.$out;
3509
}
3510
3511
=head2 unzipb64
3512
3513
Opposite of L.
3514
3515
Input:
3516
3517
First argument: A string made by L
3518
3519
Second argument: (optional) a dictionary string which where used in L.
3520
3521
Output: The original string (be it text or binary).
3522
3523
See L.
3524
3525
=cut
3526
3527
sub unzipb64 {
3528
2
2
1
3
my($data,$dict)=@_;
3529
2
10
require MIME::Base64;
3530
2
31
unzipbin(MIME::Base64::decode_base64($data),$dict);
3531
}
3532
3533
=head2 unzipbin
3534
3535
C does the same as L except that C
3536
wants a pure binary compressed string as input, not base64.
3537
3538
See L for documentation.
3539
3540
=cut
3541
3542
sub unzipbin {
3543
5
5
1
215
require Compress::Zlib;
3544
5
12
require Carp;
3545
5
5
my($data,$dict)=@_;
3546
5
50
50
31
my $x=Compress::Zlib::inflateInit(-Dictionary=>$dict||'') or croak();
3547
5
420
my($output,$status)=$x->inflate($data);
3548
5
50
458
croak() if $status!=Compress::Zlib::Z_STREAM_END();
3549
5
61
return $output;
3550
}
3551
3552
=head2 gzip
3553
3554
B A string or reference to a string you want to compress. Text or binary.
3555
3556
B The binary compressed representation of that input string.
3557
3558
C is really just a wrapper for C< Compress:Zlib::memGzip() > and uses the same
3559
compression algorithm as the well known GNU program gzip found in most unix/linux/cygwin
3560
distros. Except C does this in-memory. (Both using the C-library C).
3561
3562
writefile( "file.gz", gzip("some string") );
3563
3564
=head2 gunzip
3565
3566
B A binary compressed string or a reference to such a string. I.e. something returned from
3567
C earlier or read from a C<< .gz >> file.
3568
3569
B The original larger non-compressed string. Text or binary.
3570
3571
C is a wrapper for Compress::Zlib::memGunzip()
3572
3573
print gunzip( gzip("some string") ); #some string
3574
3575
=head2 bzip2
3576
3577
Same as L and L except with a different compression algorithm (compresses more but is slower). Wrapper for Compress::Bzip2::memBzip.
3578
3579
Compared to gzip/gunzip, bzip2 compression is much slower, bunzip2 decompression not so much.
3580
3581
See also L, C, C, C.
3582
3583
writefile( "file.bz2", bzip2("some string") );
3584
print bunzip2( bzip2("some string") ); #some string
3585
3586
=head2 bunzip2
3587
3588
Decompressed something compressed by bzip2() or data from a C<.bz2> file. See L.
3589
3590
=cut
3591
3592
8
100
8
1
12654
sub gzip { my $s=shift(); eval"require Compress::Zlib" if !$INC{'Compress/Zlib.pm'}; croak "Compress::Zlib not found" if $@; Compress::Zlib::memGzip( ref($s)?$s:\$s ) }
8
50
204
8
50
85311
8
37
3593
3
50
3
1
12456
sub gunzip { my $s=shift(); eval"require Compress::Zlib" if !$INC{'Compress/Zlib.pm'}; croak "Compress::Zlib not found" if $@; Compress::Zlib::memGunzip( ref($s)?$s:\$s ) }
3
50
10
3
50
6
3
15
3594
0
0
0
1
0
sub bzip2 { my $s=shift(); eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBzip( ref($s)?$s:\$s ) }
0
0
0
0
0
0
0
0
3595
0
0
0
1
0
sub bunzip2 { my $s=shift(); eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBunzip( ref($s)?$s:\$s ) }
0
0
0
0
0
0
0
0
3596
3597
=head1 NET, WEB, CGI-STUFF
3598
3599
=head2 ipaddr
3600
3601
B an IP-number
3602
3603
B either an IP-address I or an empty string
3604
if the DNS lookup didn't find anything.
3605
3606
Example:
3607
3608
perl -MAcme::Tools -le 'print ipaddr("129.240.8.200")' # prints www.uio.no
3609
3610
Uses perls C internally.
3611
3612
C memoizes the results internally (using the
3613
C<%Acme::Tools::IPADDR_memo> hash) so only the first loopup on a
3614
particular IP number might take some time.
3615
3616
Some few DNS loopups can take several seconds.
3617
Most is done in a fraction of a second. Due to this slowness, medium to high traffic web servers should
3618
probably turn off hostname lookups in their logs and just log IP numbers by using
3619
C in Apache C and then use I afterwards if necessary.
3620
3621
=cut
3622
3623
our %IPADDR_memo;
3624
sub ipaddr {
3625
2
2
1
458
my $ipnr=shift;
3626
#NB, 2-tallet på neste kodelinje er ikke det samme på alle os,
3627
#men ser ut til å funke i linux og hpux. Den Riktige Måten(tm)
3628
#er konstanten AF_INET i Socket eller IO::Socket-pakken.
3629
2
66
207790
return $IPADDR_memo{$ipnr} ||= gethostbyaddr(pack("C4",split("\\.",$ipnr)),2);
3630
}
3631
3632
=head2 ipnum
3633
3634
C does the opposite of C
3635
3636
Does an attempt of converting an IP address (hostname) to an IP number.
3637
Uses DNS name servers via perls internal C.
3638
Return empty string (undef) if unsuccessful.
3639
3640
print ipnum("www.uio.no"); # prints 129.240.13.152
3641
3642
Does internal memoization via the hash C<%Acme::Tools::IPNUM_memo>.
3643
3644
=cut
3645
3646
our %IPNUM_memo;
3647
sub ipnum {
3648
1
1
1
2
my $ipaddr=shift;
3649
#croak "No $ipaddr" if !length($ipaddr);
3650
1
50
3
return $IPNUM_memo{$ipaddr} if exists $IPNUM_memo{$ipaddr};
3651
1
43124
my $h=gethostbyname($ipaddr);
3652
#croak "No ipnum for $ipaddr" if !$h;
3653
1
50
11
return if !defined $h;
3654
1
13
my $ipnum = join(".",unpack("C4",$h));
3655
1
50
25
$IPNUM_memo{$ipaddr} = $ipnum=~/^(\d+\.){3}\d+$/ ? $ipnum : undef;
3656
1
7
return $IPNUM_memo{$ipaddr};
3657
}
3658
3659
=head2 webparams
3660
3661
B (optional)
3662
3663
Zero or one input argument: A string of the same type often found behind the first question mark (C<< ? >>) in URLs.
3664
3665
This string can have one or more parts separated by C<&> chars.
3666
3667
Each part consists of C pairs (with the first C<=> char being the separation char).
3668
3669
Both C and C can be url-encoded.
3670
3671
If there is no input argument, C uses C<< $ENV{QUERY_STRING} >> instead.
3672
3673
If also C<< $ENV{QUERY_STRING} >> is lacking, C checks if C<< $ENV{REQUEST_METHOD} eq 'POST' >>.
3674
In that case C<< $ENV{CONTENT_LENGTH} >> is taken as the number of bytes to be read from C
3675
and those bytes are used as the missing input argument.
3676
3677
The environment variables QUERY_STRING, REQUEST_METHOD and CONTENT_LENGTH is
3678
typically set by a web server following the CGI standard (which Apache and
3679
most of them can do I guess) or in mod_perl by Apache. Although you are
3680
probably better off using L. Or C<< $R->args() >> or C<< $R->content() >> in mod_perl.
3681
3682
B
3683
3684
C returns a hash of the key/value pairs in the input argument. Url-decoded.
3685
3686
If an input string has more than one occurrence of the same key, that keys value in the returned hash will become concatenated each value separated by a C<,> char. (A comma char)
3687
3688
Examples:
3689
3690
use Acme::Tools;
3691
my %R=webparams();
3692
print "Content-Type: text/plain\n\n"; # or rather \cM\cJ\cM\cJ instead of \n\n to be http-compliant
3693
print "My name is $R{name}";
3694
3695
Storing those four lines in a file in the directory designated for CGI-scripts
3696
on your web server (or perhaps naming the file .cgi is enough), and C
3697
/.../cgi-bin/script> and the URL
3698
L will print
3699
C to the web page.
3700
3701
L will print C.
3702
3703
=cut
3704
3705
sub webparams {
3706
2
2
1
401
my $query=shift();
3707
2
50
9
$query=$ENV{QUERY_STRING} if !defined $query;
3708
2
50
33
7
if(!defined $query and $ENV{REQUEST_METHOD} eq "POST"){
3709
0
0
read(STDIN,$query , $ENV{CONTENT_LENGTH});
3710
0
0
$ENV{QUERY_STRING}=$query;
3711
}
3712
2
3
my %R;
3713
2
11
for(split("&",$query)){
3714
7
50
14
next if !length($_);
3715
7
17
my($nkl,$verdi)=map urldec($_),split("=",$_,2);
3716
7
100
27
$R{$nkl}=exists$R{$nkl}?"$R{$nkl},$verdi":$verdi;
3717
}
3718
2
17
return %R;
3719
}
3720
3721
=head2 urlenc
3722
3723
Input: a string
3724
3725
Output: the same string URL encoded so it can be sent in URLs or POST requests.
3726
3727
In URLs (web addresses) certain characters are illegal. For instance I and I.
3728
And certain other chars have special meaning, such as C<+>, C<%>, C<=>, C>, C<&>.
3729
3730
These illegal and special chars needs to be encoded to be sent in
3731
URLs. This is done by sending them as C<%> and two hex-digits. All
3732
chars can be URL encodes this way, but it's necessary just on some.
3733
3734
Example:
3735
3736
$search="Østdal, Åge";
3737
my $url="http://machine.somewhere.com/search?q=" . urlenc($search);
3738
print $url;
3739
3740
Prints C<< http://machine.somewhere.com/search?q=%D8stdal%2C%20%C5ge >>
3741
3742
=cut
3743
3744
sub urlenc {
3745
4
4
1
837
my $str=shift;
3746
4
13
$str=~s/([^\w\-\.\/\,\[\]])/sprintf("%%%02x",ord($1))/eg; #more chars is probably legal...
631
812
3747
4
18
return $str;
3748
}
3749
3750
=head2 urldec
3751
3752
Opposite of L.
3753
3754
Example, this returns 'C< ø>'. That is space and C<< ø >>.
3755
3756
urldec('+%C3')
3757
3758
=cut
3759
3760
sub urldec {
3761
14
14
1
11
my $str=shift;
3762
14
16
$str=~s/\+/ /gs;
3763
14
20
$str=~s/%([a-f\d]{2})/pack("C", hex($1))/egi;
632
1350
3764
14
30
return $str;
3765
}
3766
3767
=head2 ht2t
3768
3769
C is short for I.
3770
3771
This sub extracts an html-C<< >>s and returns its C<< s >>
3772
and C<< s >> as an array of arrayrefs. And strips away any html
3773
inside the C<< s >> as well.
3774
3775
my @table = ht2t($html,'some string occuring before the you want');
3776
3777
Input: One or two arguments.
3778
3779
First argument: the html where a C<< >> is to be found and converted.
3780
3781
Second argument: (optional) If the html contains more than one C<<
3782
>>, and you do not want the first one, applying a second
3783
argument is a way of telling C which to capture: the one with this word
3784
or string occurring before it.
3785
3786
Output: An array of arrayrefs.
3787
3788
C is a quick and dirty way of scraping (or harvesting as it is
3789
also called) data from a web page. Look too L to do this
3790
more accurate.
3791
3792
Example:
3793
3794
use Acme::Tools;
3795
use LWP::Simple;
3796
my $url = "http://en.wikipedia.org/wiki/List_of_countries_by_population";
3797
for( ht2t( get($url), "Countries" ) ) {
3798
my($rank, $country, $pop) = @$_;
3799
$pop =~ s/,//g;
3800
printf "%3d | %-32s | %9d\n", @$_ if $pop>0;
3801
}
3802
3803
Output:
3804
3805
1 | China | 1367740000
3806
2 | India | 1262090000
3807
3 | United States | 319043000
3808
4 | Indonesia | 252164800
3809
5 | Brazil | 203404000
3810
3811
...and so on.
3812
3813
=cut
3814
3815
sub ht2t {
3816
9
100
100
9
1
220
my($f,$s,$r)=@_; 1>@_||@_>3 and croak; $s='' if @_==1;
9
100
237
7
13
3817
7
87
$f=~s,.*?($s).*?(
3818
33
33
20548
my $e=0;$e++ while index($f,$s=chr($e))>=$[;
33
9956
33
96190
7
7
7
29
3819
7
67
$f=~s//\l$1$s/gsi;
3820
7
35
$f=~s/\s*<.*?>\s*/ /gsi;
3821
7
48
my @t=split("r$s",$f);shift @t;
7
5
3822
20
20
17
$r||=sub{s/&(#160|nbsp);/ /g;s/&/&/g;s/^\s*(.*?)\s*$/$1/s;
20
14
20
45
3823
7
100
100
30
s/(\d) (\d)/$1$2/g if /^[\d \.\,]+$/};
20
37
3824
7
9
for(@t){my @r=split/[dh]$s/;shift@r;$_=[map{&$r;$_}@r]}
9
38
9
5
9
10
20
22
20
37
3825
7
27
@t;
3826
}
3827
3828
=head1 FILES, DIRECTORIES
3829
3830
=head2 writefile
3831
3832
Justification:
3833
3834
Perl needs three or four operations to make a file out of a string:
3835
3836
open my $FILE, '>', $filename or die $!;
3837
print $FILE $text;
3838
close($FILE);
3839
3840
This is way simpler:
3841
3842
writefile($filename,$text);
3843
3844
Sub writefile opens the file i binary mode (C) and has two usage modes:
3845
3846
B Two arguments
3847
3848
B is the filename. If the file exists, its overwritten.
3849
If the file can not be opened for writing, a die (a croak really) happens.
3850
3851
B is one of:
3852
3853
=over 4
3854
3855
=item * Either a scaler. That is a normal string to be written to the file.
3856
3857
=item * Or a reference to a scalar. That referred text is written to the file.
3858
3859
=item * Or a reference to an array of scalars. This array is the written to the
3860
file element by element and C<< \n >> is automatically appended to each element.
3861
3862
=back
3863
3864
Alternativelly, you can write several files at once.
3865
3866
Example, this:
3867
3868
writefile('file1.txt','The text....tjo');
3869
writefile('file2.txt','The text....hip');
3870
writefile('file3.txt','The text....and hop');
3871
3872
...is the same as this:
3873
3874
writefile([
3875
['file1.txt','The text....tjo'],
3876
['file2.txt','The text....hip'],
3877
['file3.txt','The text....and hop'],
3878
]);
3879
3880
B Nothing (for the time being). Cs (C really) if something goes wrong.
3881
3882
=cut
3883
3884
#todo: use openstr() as in readfile(), transparently gzip .gz filenames and so on
3885
sub writefile {
3886
9
9
1
81967
my($filename,$text)=@_;
3887
9
50
73
if(ref($filename) eq 'ARRAY'){
3888
0
0
writefile(@$_) for @$filename;
3889
0
0
return;
3890
}
3891
9
50
33
1065
open(WRITEFILE,">",$filename) and binmode(WRITEFILE) or croak($!);
3892
9
50
33
88
if(!defined $text or !ref($text)){
0
0
3893
9
1300
print WRITEFILE $text;
3894
}
3895
elsif(ref($text) eq 'SCALAR'){
3896
0
0
print WRITEFILE $$text;
3897
}
3898
elsif(ref($text) eq 'ARRAY'){
3899
0
0
print WRITEFILE "$_\n" for @$text;
3900
}
3901
else {
3902
0
0
croak;
3903
}
3904
9
274
close(WRITEFILE);
3905
9
2731
return;
3906
}
3907
3908
=head2 readfile
3909
3910
Just as with L you can read in a whole file in one operation with C. Instead of:
3911
3912
open my $FILE,'<', $filename or die $!;
3913
my $data = join"",<$FILE>;
3914
close($FILE);
3915
3916
This is simpler:
3917
3918
my $data = readfile($filename);
3919
3920
B
3921
3922
Reading the content of the file to a scalar variable: (Any content in C<$data> will be overwritten)
3923
3924
my $data;
3925
readfile('filename.txt',\$data);
3926
3927
Reading the lines of a file into an array:
3928
3929
my @lines;
3930
readfile('filnavn.txt',\@lines);
3931
for(@lines){
3932
...
3933
}
3934
3935
Note: Chomp is done on each line. That is, any newlines (C<< \n >>) will be removed.
3936
If C<@lines> is non-empty, this will be lost.
3937
3938
Sub readfile is context aware. If an array is expected it returns an array of the lines without a trailing C<< \n >>.
3939
The last example can be rewritten:
3940
3941
for(readfile('filnavn.txt')){
3942
...
3943
}
3944
3945
With two input arguments, nothing (undef) is returned from C.
3946
3947
=cut
3948
3949
#http://blogs.perl.org/users/leon_timmermans/2013/05/why-you-dont-need-fileslurp.html
3950
#todo: readfile with grep-filter code ref in a third arg (avoid reading all into mem)
3951
3952
sub readfile {
3953
12
12
1
8040
my($filename,$ref)=@_;
3954
12
100
28
if(@_==1){
3955
6
100
13
if(wantarray){ my @data; readfile($filename,\@data); return @data }
1
2
1
4
1
3590
3956
5
5
else { my $data; readfile($filename,\$data); return $data }
5
20
5
418
3957
}
3958
else {
3959
6
50
49
open my $fh,openstr($filename) or croak("ERROR: readfile $! $?");
3960
6
100
34
if ( ref($ref) eq 'SCALAR') { $$ref=join"",<$fh> }
5
50
5934
3961
1
15
elsif( ref($ref) eq 'ARRAY' ) { while(my $l=<$fh>){ chomp($l); push @$ref, $l } }
20003
11907
20003
30113
3962
0
0
else { croak "ERROR: Second arg to readfile should be a ref to a scalar og array" }
3963
6
888
close($fh);
3964
6
43
return;#?
3965
}
3966
}
3967
3968
=head2 readdirectory
3969
3970
B
3971
3972
Name of a directory.
3973
3974
B
3975
3976
A list of all files in it, except of C<.> and C<..> (on linux/unix systems, all directories have a C<.> and C<..> directory).
3977
3978
The names of all types of files are returned: normal files, directories, symbolic links,
3979
pipes, semaphores. That is every thing shown by C except C<.> and C<..>
3980
3981
C do not recurce down into subdirectories (but see example below).
3982
3983
B
3984
3985
my @files = readdirectory("/tmp");
3986
3987
B
3988
3989
Sometimes calling the built ins C, C and C seems a tad tedious, since this:
3990
3991
my $dir="/usr/bin";
3992
opendir(D,$dir);
3993
my @files=map "$dir/$_", grep {!/^\.\.?$/} readdir(D);
3994
closedir(D);
3995
3996
Is the same as this:
3997
3998
my @files=readdirectory("/usr/bin");
3999
4000
See also: L
4001
4002
B
4003
4004
On huge directories with perhaps tens or houndreds of thousands of
4005
files, readdirectory() will consume more memory than perls
4006
opendir/readdir. This isn't usually a concern anymore for modern
4007
computers with gigabytes of RAM, but might be the rationale behind
4008
Perls more tedious way created in the 80s. The same argument goes for
4009
file slurping. On the other side it's also a good practice to never
4010
assume to much on available memory and the number of files if you
4011
don't know for certain that enough memory is available whereever your
4012
code is run or that the size of the directory is limited.
4013
4014
B
4015
4016
How to get all files in the C directory including all subdirectories below of any depth:
4017
4018
my @files=("/tmp");
4019
map {-d $_ and unshift @files,$_ or push @files,$_} readdirectory(shift(@files)) while -d $files[0];
4020
4021
...or to avoid symlinks and only get real files:
4022
4023
map {-d and !-l and unshift @files,$_ or -f and !-l and push @files,$_} readdirectory(shift(@files)) while -d $files[0];
4024
4025
=cut
4026
4027
sub readdirectory {
4028
0
0
1
0
my $dir=shift;
4029
0
0
opendir(my $D,$dir);
4030
0
0
my @filer=map "$dir/$_", grep {!/^\.\.?$/} readdir($D);
0
0
4031
0
0
closedir($D);
4032
0
0
return @filer;
4033
}
4034
4035
=head2 basename
4036
4037
The basename and dirname functions behaves like the *nix shell commands with the same names.
4038
4039
B One or two arguments: Filename and an optional suffix
4040
4041
B Returns the filename with any directory and (if given) the suffix removed.
4042
4043
basename('/usr/bin/perl') # returns 'perl'
4044
basename('/usr/local/bin/report.pl','.pl') # returns 'report' since .pl at the end is removed
4045
basename('report2.pl','.pl') # returns 'report2'
4046
basename('report2.pl','.\w+') # returns 'report2.pl', probably not what you meant
4047
basename('report2.pl',qr/.\w+/) # returns 'report2', use qr for regex
4048
4049
=head2 dirname
4050
4051
B A filename including path
4052
4053
B Removes the filename path and returns just the directory path up until but not including
4054
the last /. Return just a one char C<< . >> (period string) if there is no directory in the input.
4055
4056
dirname('/usr/bin/perl') # returns '/usr/bin'
4057
dirname('perl') # returns '.'
4058
4059
=head2 username
4060
4061
Returns the current linux/unix username, for example the string root
4062
4063
print username(); #just (getpwuid($<))[0] but more readable perhaps
4064
4065
=cut
4066
4067
8
100
8
1
1380
sub basename { my($f,$s)=(@_,'');$s=quotemeta($s)if!ref($s);$f=~m,^(.*/)?([^/]*?)($s)?$,;$2 }
8
20
8
156
8
22
4068
2
100
66
2
1
182
sub dirname { $_[0]=~m,^(.*)/,;defined($1) && length($1) ? $1 : '.' }
2
20
4069
0
0
1
0
sub username { (getpwuid($<))[0] }
4070
4071
=head2 wipe
4072
4073
Deletes a file by "wiping" it on the disk. Overwrites the file before deleting. (May not work properly on SSDs)
4074
4075
B
4076
* Arg 1: A filename
4077
* Optional arg 2: number of times to overwrite file. Default is 3 if omitted, 0 or undef
4078
* Optional arg 3: keep (true/false), wipe() but no delete of file
4079
4080
B Same as the C (remove file): 1 for success, 0 or false for failure.
4081
4082
See also: L, L
4083
4084
=cut
4085
4086
sub wipe {
4087
2
2
1
2945
my($file,$times,$keep)=@_;
4088
2
100
9
$times||=3;
4089
2
50
33
47
croak "ERROR: File $file nonexisting\n" if not -f $file or not -e $file;
4090
2
13
my $size=-s$file;
4091
2
50
49
open my $WIFH, '+<', $file or croak "ERROR: Unable to open $file: $!\n";
4092
2
3
binmode($WIFH);
4093
2
6
for(1..$times){
4094
4
12
my $block=chr(int(rand(256))) x 1024;#hm
4095
4
8
for(0..($size/1024)){
4096
73
237
seek($WIFH,$_*1024,0);
4097
73
71
print $WIFH $block;
4098
}
4099
}
4100
2
16
close($WIFH);
4101
2
100
114
$keep || unlink($file);
4102
}
4103
4104
=head2 chall
4105
4106
Does chmod + utime + chown on one or more files.
4107
4108
Returns the number of files of which those operations was successful.
4109
4110
Mode, uid, gid, atime and mtime are set from the array ref in the first argument.
4111
4112
The first argument references an array which is exactly like an array returned from perls internal C -function.
4113
4114
Example:
4115
4116
my @stat=stat($filenameA);
4117
chall( \@stat, $filenameB, $filenameC, ... ); # by stat-array
4118
chall( $filenameA, $filenameB, $filenameC, ... ); # by file name
4119
4120
Copies the chmod, owner, group, access time and modify time from file A to file B and C.
4121
4122
See C, C, C, C
4123
4124
=cut
4125
4126
4127
sub chall {
4128
my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks )
4129
12
100
12
1
12777
= ref($_[0]) ? @{shift()} : stat(shift());
1
4
4130
12
52
my $successful=0;
4131
12
50
33
70
for(@_){ chmod($mode,$_) && utime($atime,$mtime,$_) && chown($uid,$gid,$_) && $successful++ }
12
33
3654
4132
12
76
return $successful;
4133
}
4134
4135
=head2 makedir
4136
4137
Input: One or two arguments.
4138
4139
Works like perls C except that C will create nesessary parent directories if they dont exists.
4140
4141
First input argument: A directory name (absolute, starting with C< / > or relative).
4142
4143
Second input argument: (optional) permission bits. Using the normal C<< 0777^umask() >> as the default if no second input argument is provided.
4144
4145
Example:
4146
4147
makedir("dirB/dirC")
4148
4149
...will create directory C if it does not already exists, to be able to create C inside C.
4150
4151
Returns true on success, otherwise false.
4152
4153
C memoizes directories it has checked for existence before (trading memory and for speed).
4154
Thus directories removed during running the script is not discovered by makedir.
4155
4156
See also C<< perldoc -f mkdir >>, C<< man umask >>
4157
4158
=cut
4159
4160
our %MAKEDIR;
4161
4162
sub makedir {
4163
0
0
1
0
my($d,$p,$dd)=@_;
4164
0
0
0
$p=0777^umask() if !defined$p;
4165
(
4166
$MAKEDIR{$d} or -d$d or mkdir($d,$p) #or croak("mkdir $d, $p")
4167
or ($dd)=($d=~m,^(.+)/+([^/]+)$,) and makedir($dd,$p) and mkdir($d,$p) #or die;
4168
0
0
0
0
) and ++$MAKEDIR{$d};
0
0
0
0
4169
}
4170
4171
=head2 md5sum
4172
4173
B a filename.
4174
4175
B a string of 32 hexadecimal chars from 0-9 or a-f.
4176
4177
Example, the md5sum gnu/linux command without options could be implementet like this:
4178
4179
use Acme::Tools;
4180
print eval{ md5sum($_)." $_\n" } || $@ for @ARGV;
4181
4182
This sub requires L, which is a core perl-module since
4183
version 5.?.? It does not slurp the files or spawn new processes.
4184
4185
=cut
4186
4187
sub md5sum {
4188
2
2
1
15
require Digest::MD5;
4189
2
3
my $fn=shift;
4190
2
100
168
croak "md5sum: $fn is a directory (no md5sum)" if -d $fn;
4191
1
50
21
open my $FH, '<', $fn or croak "Could not open file $fn for md5sum() $!";
4192
1
2
binmode($FH);
4193
1
1
my $r = eval { Digest::MD5->new->addfile($FH)->hexdigest };
1
17
4194
1
50
3
croak "md5sum on $fn failed ($@)\n" if $@;
4195
1
12
$r;
4196
}
4197
4198
=head2 read_conf
4199
4200
B A file name or a reference to a string with settings in the format described below.
4201
4202
B A reference to a hash. This hash will have the settings from the file (or stringref).
4203
The hash do not have to be empty beforehand.
4204
4205
Returns a hash with the settings as in this examples:
4206
4207
my %conf = read_conf('/etc/your/thing.conf');
4208
print $conf{sectionA}{knobble}; #prints ABC if the file is as shown below
4209
print $conf{sectionA}{gobble}; #prints ZZZ, the last gobble
4210
print $conf{switch}; #prints OK here as well, unsectioned value
4211
print $conf{part2}{password}; #prints oh:no= x
4212
4213
File use for the above example:
4214
4215
switch: OK #before first section, the '' (empty) section
4216
[sectionA]
4217
knobble: ABC
4218
gobble: XYZ #this gobble is overwritten by the gobble on the next line
4219
gobble: ZZZ
4220
[part2]
4221
password: oh:no= x #should be better
4222
text: { values starting with { continues
4223
until reaching a line with }
4224
4225
Everything from # and behind is regarded comments and ignored. Comments can be on any line.
4226
To keep a # char, put a \ in front of it.
4227
4228
A C< : > or C< = > separates keys and values. Spaces at the beginning or end of lines are
4229
ignored (after removal of #comments), as are any spaces before and after : and = separators.
4230
4231
Empty lines or lines with no C< : > or C< = > is also ignored. Keys and values can contain
4232
internal spaces and tabs, but not at the beginning or end.
4233
4234
Multi-line values must start and end with { and }. Using { and } keep spaces at the start
4235
or end in both one-line and multi-line values.
4236
4237
Sections are marked with C<< [sectionname] >>. Section names, keys and values is case
4238
sensitive. C above the first section or below and empty C<< [] >> is placed
4239
both in the empty section in the returned hash and as top level key/values.
4240
4241
C can be a simpler alternative to the core module L which has
4242
its own hassles.
4243
4244
$Acme::Tools::Read_conf_empty_section=1; #default 0 (was 1 in version 0.16)
4245
my %conf = read_conf('/etc/your/thing.conf');
4246
print $conf{''}{switch}; #prints OK with the file above
4247
print $conf{switch}; #prints OK here as well
4248
4249
=cut
4250
4251
our $Read_conf_empty_section=0;
4252
sub read_conf {
4253
3
3
1
11
my($fn,$hr)=(@_,{});
4254
3
100
11
my $conf=ref($fn)?$$fn:readfile($fn);
4255
3
61
$conf=~s,\s*(?
4256
3
26
my($section,@l)=('',split"\n",$conf);
4257
3
9
while(@l) {
4258
63
44
my $l=shift@l;
4259
63
100
199
if( $l=~/^\s*\[\s*(.*?)\s*\]/ ) {
100
4260
12
13
$section=$1;
4261
12
100
40
$$hr{$1}||={};
4262
}
4263
elsif( $l=~/^\s*([^\:\=]+)[:=]\s*(.*?)\s*$/ ) {
4264
33
66
33
63
my $ml=sub{my$v=shift;$v.="\n".shift@l while $v=~/^\{[^\}]*$/&&@l;$v=~s/^\{(.*)\}\s*$/$1/s;$v=~s,\\#,#,g;$v};
33
38
33
106
33
39
33
30
33
33
4265
33
35
my $v=&$ml($2);
4266
33
100
66
81
$$hr{$section}{$1}=$v if length($section) or $Read_conf_empty_section;
4267
33
100
103
$$hr{$1}=$v if !length($section);
4268
}
4269
}
4270
3
19
%$hr;
4271
}
4272
# my $incfn=sub{return $1 if $_[0]=~m,^(/.+),;my$f=$fn;$f=~s,[^/]+$,$_[0],;$f};
4273
# s,]+)>,"".readfile(&$incfn($1)),eg; #todo
4274
4275
4276
=head2 openstr
4277
4278
# returned from openstr:
4279
open my $FH, openstr("fil.txt") or die; # fil.txt
4280
open my $FH, openstr("fil.gz") or die; # zcat fil.gz |
4281
open my $FH, openstr("fil.bz2") or die; # bzcat fil.bz2 |
4282
open my $FH, openstr("fil.xz") or die; # xzcat fil.xz |
4283
open my $FH, openstr(">fil.txt") or die; # > fil.txt
4284
open my $FH, openstr(">fil.gz") or die; # | gzip > fil.gz
4285
open my $FH, openstr(">fil.bz2") or die; # | bzip2 > fil.bz2
4286
open my $FH, openstr(">fil.xz") or die; # | xz > fil.bz2
4287
4288
Environment variable PATH is used. So in the examples above, /bin/gzip
4289
is returned instead of gzip if /bin is the first directory in
4290
$ENV{PATH} containing an executable file gzip. Dirs /usr/bin, /bin and
4291
/usr/local/bin is added to PATH in openstr(). They are checked even if
4292
PATH is empty.
4293
4294
=cut
4295
4296
our @Openstrpath=(grep$_,split(":",$ENV{PATH}),qw(/usr/bin /bin /usr/local/bin));
4297
sub openstr {
4298
22
22
1
2900
my($fn,$ext)=(shift()=~/^(.*?(?:\.(t?gz|bz2|xz))?)$/i);
4299
22
100
263
return $fn if !$ext;
4300
12
50
12
37
my $prog=sub{@Openstrpath or return $_[0];(grep -x$_, map "$_/$_[0]", @Openstrpath)[0] or croak"$_[0] not found"};
12
100
22
12
1441
4301
$fn =~ /^\s*>/
4302
? "| ".(&$prog({qw/gz gzip bz2 bzip2 xz xz tgz gzip/ }->{lc($ext)})).$fn
4303
12
100
65
: &$prog({qw/gz zcat bz2 bzcat xz xzcat tgz zcat/}->{lc($ext)})." $fn |";
4304
}
4305
4306
=head1 TIME FUNCTIONS
4307
4308
=head2 tms
4309
4310
Timestring, works somewhat like the Gnu/Linux C command and Oracle's C
4311
4312
Converts timestamps to more readable forms of time strings.
4313
4314
Converts seconds since I and time strings on the form C to other forms.
4315
4316
B One, two or three arguments.
4317
4318
B A format string.
4319
4320
B An epock C number or a time
4321
string of the form YYYYMMDD-HH24:MI:SS or YYYYMMDDTHH:MI:SS or
4322
YYYY-MM-DDTHH:MI:SS (in which T is litteral and HH is the 24-hour
4323
version of hours) or YYYYMMDD. Uses the current C if the
4324
second argument is missing.
4325
4326
TODO: Formats with % as in C (C<%Y%m%d> and so on)
4327
4328
B True or false. If true and first argument
4329
is eight digits: Its interpreted as a date like YYYYMMDD time string,
4330
not an epoch time. If true and first argument is six digits its
4331
interpreted as a date like DDMMYY (not YYMMDD!).
4332
4333
B a date or clock string on the wanted form.
4334
4335
B
4336
4337
Prints C<< 3. july 1997 >> if thats the dato today:
4338
4339
perl -MAcme::Tools -le 'print timestr("D. month YYYY")'
4340
4341
print tms("HH24:MI"); # prints 23:55 if thats the time now
4342
tms("HH24:MI",time()); # ...same,since time() is the default
4343
tms("HH:MI",time()-5*60); # 23:50 if that was the time 5 minutes ago
4344
tms("HH:MI",time()-5*60*60); # 18:55 if thats the time 5 hours ago
4345
tms("Day Month Dth YYYY HH:MI"); # Saturday July 1st 2004 23:55 (big S, big J)
4346
tms("Day D. Month YYYY HH:MI"); # Saturday 8. July 2004 23:55 (big S, big J)
4347
tms("DAY D. MONTH YYYY HH:MI"); # SATURDAY 8. JULY 2004 23:55 (upper)
4348
tms("dy D. month YYYY HH:MI"); # sat 8. july 2004 23:55 (small s, small j)
4349
tms("Dy DD. MON YYYY HH12:MI am"); # Sat 08. JUL 2004 11:55 pm (HH12, am becomes pm if after 12)
4350
tms("DD-MON-YYYY"); # 03-MAY-2004 (mon, english)
4351
4352
The following list of codes in the first argument will be replaced:
4353
4354
YYYY Year, four digits
4355
YY Year, two digits, i.e. 04 instead of 2004
4356
yyyy Year, four digits, but nothing if its the current year
4357
YYYY|HH:MI Year if its another year than the current, a time in hours and minutes elsewise
4358
MM Month, two digits. I.e. 08 for August
4359
DD Day of month, two digits. I.e. 01 (not 1) for the first day in a month
4360
D Day of month, one digit. I.e. 1 (not 01)
4361
HH Hour. From 00 to 23.
4362
HH24 Same as HH.
4363
HH12 12 becomes 12 (never 00), 13 becomes 01, 14 02 and so on.
4364
Note: 00 after midnight becomes 12 (am). Tip: always include the code
4365
am in a format string that uses HH12.
4366
MI Minutt. Fra 00 til 59.
4367
SS Sekund. Fra 00 til 59.
4368
am Becomes am or pm
4369
pm Same
4370
AM Becomes AM or PM (upper case)
4371
PM Same
4372
4373
Month The full name of the month in English from January to December
4374
MONTH Same in upper case (JANUARY)
4375
month Same in lower case (january)
4376
Mont Jan Feb Mars Apr May June July Aug Sep Oct Nov Dec
4377
Mont. Jan. Feb. Mars Apr. May June July Aug. Sep. Oct. Nov. Dec. (always four chars)
4378
Mon Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec (always three chars)
4379
4380
Day The full name of the weekday. Sunday to Saturday
4381
Dy Three letters: Sun Mon Tue Wed Thu Fri Sat
4382
DAY Upper case
4383
DY Upper case
4384
Dth 1st 2nd 3rd 4th 5th ... 11th 12th ... 20th 21st 22nd 23rd 24th ... 30th 31st
4385
4386
WW Week number of the year 01-53 according to the ISO8601-definition (which most countries uses)
4387
WWUS Week number of the year 01-53 according to the most used definition in the USA.
4388
Other definitions also exists.
4389
4390
epoch Converts a time string from YYYYMMDD-HH24:MI:SS, YYYYMMDD-HH24:MI:SS, YYYYMMDDTHH:MI:SS,
4391
YYYY-MM-DDTHH:MI:SS or YYYYMMDD to the number of seconds since January 1st 1970.
4392
Commonly known as the Unix epoch.
4393
4394
JDN Julian day number. Integer. The number of days since the day starting at noon on January 1 4713 BC
4395
JD Same as JDN but a float accounting for the time of day
4396
4397
TODO: sub smt() (tms backward... or something better named, converts the other way)
4398
As to_date and to_char in Oracle. Se maybe L instead
4399
4400
B (optional) Is_date. False|true, default false. If true, the second argument is
4401
interpreted as a date of the form YYYYMMDD, not as a number of seconds since epoch (January 1st 1970).
4402
4403
=cut
4404
4405
#Se også L og L
4406
4407
our $Tms_pattern;
4408
our %Tms_str=
4409
('MÅNED' => [4, 'JANUAR','FEBRUAR','MARS','APRIL','MAI','JUNI','JULI',
4410
'AUGUST','SEPTEMBER','OKTOBER','NOVEMBER','DESEMBER' ],
4411
'Måned' => [4, 'Januar','Februar','Mars','April','Mai','Juni','Juli',
4412
'August','September','Oktober','November','Desember'],
4413
'måned' => [4, 'januar','februar','mars','april','mai','juni','juli',
4414
'august','september','oktober','november','desember'],
4415
'MÅNE.' => [4, 'JAN.','FEB.','MARS','APR.','MAI','JUNI','JULI','AUG.','SEP.','OKT.','NOV.','DES.'],
4416
'Måne.' => [4, 'Jan.','Feb.','Mars','Apr.','Mai','Juni','Juli','Aug.','Sep.','Okt.','Nov.','Des.'],
4417
'måne.' => [4, 'jan.','feb.','mars','apr.','mai','juni','juli','aug.','sep.','okt.','nov.','des.'],
4418
'MÅNE' => [4, 'JAN','FEB','MARS','APR','MAI','JUNI','JULI','AUG','SEP','OKT','NOV','DES'],
4419
'Måne' => [4, 'Jan','Feb','Mars','Apr','Mai','Juni','Juli','Aug','Sep','Okt','Nov','Des'],
4420
'måne' => [4, 'jan','feb','mars','apr','mai','juni','juli','aug','sep','okt','nov','des'],
4421
'MÅN' => [4, 'JAN','FEB','MAR','APR','MAI','JUN','JUL','AUG','SEP','OKT','NOV','DES'],
4422
'Mån' => [4, 'Jan','Feb','Mar','Apr','Mai','Jun','Jul','Aug','Sep','Okt','Nov','Des'],
4423
'mån' => [4, 'jan','feb','mar','apr','mai','jun','jul','aug','sep','okt','nov','des'],
4424
'MONTH' => [4, 'JANUARY','FEBRUARY','MARCH','APRIL','MAY','JUNE','JULY',
4425
'AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER'],
4426
'Month' => [4, 'January','February','March','April','May','June','July',
4427
'August','September','October','November','December'],
4428
'month' => [4, 'january','february','march','april','may','june','july',
4429
'august','september','october','november','december'],
4430
'MONT.' => [4, 'JAN.','FEB.','MAR.','APR.','MAY','JUNE','JULY','AUG.','SEP.','OCT.','NOV.','DEC.'],
4431
'Mont.' => [4, 'Jan.','Feb.','Mar.','Apr.','May','June','July','Aug.','Sep.','Oct.','Nov.','Dec.'],
4432
'mont.' => [4, 'jan.','feb.','mar.','apr.','may','june','july','aug.','sep.','oct.','nov.','dec.'],
4433
'MONT' => [4, 'JAN','FEB','MAR','APR','MAY','JUNE','JULY','AUG','SEP','OCT','NOV','DEC'],
4434
'Mont' => [4, 'Jan','Feb','Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec'],
4435
'mont' => [4, 'jan','feb','mar','apr','may','june','july','aug','sep','oct','nov','dec'],
4436
'MON' => [4, 'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'],
4437
'Mon' => [4, 'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],
4438
'mon' => [4, 'jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'],
4439
'DAY' => [6, 'SUNDAY','MONDAY','TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY'],
4440
'Day' => [6, 'Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'],
4441
'day' => [6, 'sunday','monday','tuesday','wednesday','thursday','friday','saturday'],
4442
'DY' => [6, 'SUN','MON','TUE','WED','THU','FRI','SAT'],
4443
'Dy' => [6, 'Sun','Mon','Tue','Wed','Thu','Fri','Sat'],
4444
'dy' => [6, 'sun','mon','tue','wed','thu','fri','sat'],
4445
'DAG' => [6, 'SØNDAG','MANDAG','TIRSDAG','ONSDAG','TORSDAG','FREDAG','LØRDAG'],
4446
'Dag' => [6, 'Søndag','Mandag','Tirsdag','Onsdag','Torsdag','Fredag','Lørdag'],
4447
'dag' => [6, 'søndag','mandag','tirsdag','onsdag','torsdag','fredag','lørdag'],
4448
'DG' => [6, 'SØN','MAN','TIR','ONS','TOR','FRE','LØR'],
4449
'Dg' => [6, 'Søn','Man','Tir','Ons','Tor','Fre','Lør'],
4450
'dg' => [6, 'søn','man','tir','ons','tor','fre','lør'],
4451
);
4452
my $_tms_inited=0;
4453
sub tms_init {
4454
1
50
1
0
3
return if $_tms_inited++;
4455
1
2
for(qw(MAANED Maaned maaned MAAN Maan maan),'MAANE.','Maane.','maane.'){
4456
9
12
$Tms_str{$_}=$Tms_str{replace($_,"aa","å","AA","Å")};
4457
}
4458
45
40
$Tms_pattern=join("|",map{quotemeta($_)}
4459
1
13
sort{length($b)<=>length($a)}
197
115
4460
keys %Tms_str);
4461
#uten sort kan "måned" bli "mared", fordi "mån"=>"mar"
4462
}
4463
4464
0
0
sub totime {
4465
4466
}
4467
4468
sub date_ok {
4469
0
0
0
0
my($y,$m,$d)=@_;
4470
0
0
0
0
return date_ok($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
4471
0
0
0
return 0 if $y!~/^\d\d\d\d$/;
4472
0
0
0
0
return 0 if $m<1||$m>12||$d<1||$d>(31,$y%4||$y%100==0&&$y%400?28:29,31,30,31,30,31,31,30,31,30,31)[$m-1];
0
0
0
0
4473
0
0
return 1;
4474
}
4475
4476
sub weeknum {
4477
0
0
0
0
0
return weeknum(tms('YYYYMMDD')) if @_<1;
4478
0
0
0
0
return weeknum($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
4479
0
0
my($year,$month,$day)= @_;
4480
0
0
eval{
4481
0
0
0
if(@_<2){
0
4482
0
0
0
if($year=~/^\d{8}$/) { ($year,$month,$day)=unpack("A4A2A2",$year) }
0
0
0
4483
0
0
elsif($year>99999999){ ($year,$month,$day)=(localtime($year))[5,4,3]; $year+=1900; $month++ }
0
0
0
0
4484
0
0
else {die}
4485
}
4486
0
0
elsif(@_!=3){croak}
4487
0
0
0
croak if !date_ok(sprintf("%04d%02d%02d",$year,$month,$day));
4488
};
4489
0
0
0
croak "ERROR: Wrong args Acme::Tools::weeknum(".join(",",@_).")" if $@;
4490
33
33
14597
use integer;#heltallsdivisjon
33
275
33
127
4491
0
0
my $y=$year+4800-(14-$month)/12;
4492
0
0
my $j=$day+(153*($month+(14-$month)/12*12-3)+2)/5+365*$y+$y/4-$y/100+$y/400-32045;
4493
0
0
my $d=($j+31741-$j%7)%146097%36524%1461;
4494
0
0
return (($d-$d/1460)%365+$d/1460)/7+1;
4495
}
4496
4497
#perl -MAcme::Tools -le 'print "$_ ".tms($_."0501","day",1) for 2015..2026'
4498
4499
sub tms {
4500
42
50
66
42
1
5873
return undef if @_>1 and not defined $_[1]; #time=undef => undef
4501
42
100
57
if(@_==1){
4502
2
8
my @lt=localtime();
4503
2
50
5
$_[0] eq 'YYYY' and return 1900+$lt[5];
4504
2
50
4
$_[0] eq 'YYYYMMDD' and return sprintf("%04d%02d%02d",1900+$lt[5],1+$lt[4],$lt[3]);
4505
2
100
66
27
$_[0] =~ $Re_isnum and @lt=localtime($_[0]) and return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]);
4506
}
4507
41
42
my($format,$time,$is_date)=@_;
4508
41
100
80
$time=time_fp() if !defined$time;
4509
41
100
100
235
($time,$format)=($format,$time) if @_>=2 and $format=~/^[\d+\:\-\.]+$/; #swap /hm/
4510
41
168
my @lt=localtime($time);
4511
#todo? $is_date=0 if $time=~s/^\@(\-?\d)/$1/; #@n where n is sec since epoch makes it clear that its not a formatted, as in `date`
4512
#todo? date --date='TZ="America/Los_Angeles" 09:00 next Fri' #`info date`
4513
# Fri Nov 13 18:00:00 CET 2015
4514
#date --date="next Friday" #--date or -d
4515
#date --date="last friday"
4516
#date --date="2 days ago"
4517
#date --date="yesterday" #or tomorrow
4518
#date --date="-1 day" #date --date='10 week'
4519
4520
41
50
57
if( $is_date ){
4521
0
0
0
0
my $yy2c=sub{10+$_[0]>$lt[5]%100?"20":"19"}; #hm 10+
0
0
4522
0
0
0
$time=totime(&$yy2c($1)."$1$2$3")."000000" if $time=~/^(\d\d)(\d\d)(\d\d)$/;
4523
0
0
0
$time=totime("$1$2${3}000000") if $time=~/^((?:18|19|20)\d\d)(\d\d)(\d\d)$/; #hm 18-20?
4524
}
4525
else {
4526
41
50
86
$time = yyyymmddhh24miss_time("$1$2$3$4$5$6") #yyyymmddhh24miss_time ???
4527
if $time=~/^((?:19|20|18)\d\d) #yyyy
4528
(0[1-9]|1[012]) #mm
4529
(0[1-9]|[12]\d|3[01]) \-? #dd
4530
([01]\d|2[0-3]) \:? #hh24
4531
([0-5]\d) \:? #mi
4532
([0-5]\d) $/x; #ss
4533
}
4534
41
100
57
tms_init() if !$_tms_inited;
4535
41
100
58
return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]) if !$format;
4536
40
339
my %p=('%'=>'%',
4537
a=>'Dy',
4538
A=>'Day',
4539
b=>'Mon',
4540
b=>'Month',
4541
c=>'Dy Mon D HH:MI:SS YYYY',
4542
C=>'CC',
4543
d=>'DD',
4544
D=>'MM/DD/YY',
4545
e=>'D',
4546
F=>'YYYY-MM-DD',
4547
#G=>'',
4548
h=>'Month', H=>'HH24', I=>'HH12',
4549
j=>'DoY', #day of year
4550
k=>'H24', _H=>'H24',
4551
l=>'H12', _I=>'H12',
4552
m=>'MM', M=>'MI',
4553
n=>"\n",
4554
#N=>'NS', #sprintf%09d,1e9*(time_fp()-time()) #000000000..999999999
4555
p=>'AM', #AM|PM upper (yes, opposite: date +%H%M%S%P%p)
4556
P=>'am', #am|pm lower
4557
S=>'SS',
4558
t=>"\t",
4559
T=>'HH24:MI:SS',
4560
u=>'DoW', #day of week 1..7, 1=mon 7=sun
4561
w=>'DoW0', #day of week 0..6, 1=mon 0=sun
4562
#U=>'WoYs', #week num of year 00..53, sunday as first day of week
4563
#V=>'UKE', #ISO week num of year 01..53, monday as first day of week
4564
#W=>'WoYm', #week num of year 00..53, monday as first day of week, not ISO!
4565
#x=>$ENV{locale's date representation}, #e.g. MM/DD/YY
4566
#X=>$ENV{locale's time representation}, #e.g. HH/MI/SS
4567
y=>'YY',
4568
Y=>'YYYY',
4569
#z=>'TZHHMI', #time zone hour minute e.g. -0430
4570
#':z'=>'TZHH:MI',
4571
#'::z'=>'TZHH:MI:SS',
4572
#':::z'=>'TZ', #number of :'s necessary precision, e.g. -02 or +03:30
4573
#Z=>'TZN', #e.g. CET, EDT, ...
4574
);
4575
40
178
my $pkeys=join"|",keys%p;
4576
40
1229
$format=~s,\%($pkeys),$p{$1},g;
4577
40
285
$format=~s/($Tms_pattern)/$Tms_str{$1}[1+$lt[$Tms_str{$1}[0]]]/g;
4578
40
49
$format=~s/YYYY / 1900+$lt[5] /gxe;
13
27
4579
40
0
31
$format=~s/(\s?)yyyy / $lt[5]==(localtime)[5]?"":$1.(1900+$lt[5])/gxe;
0
0
4580
40
40
$format=~s/YY / sprintf("%02d",$lt[5]%100) /gxei;
1
5
4581
40
42
$format=~s|CC | sprintf("%02d",(1900+$lt[5])/100) |gxei;
2
9
4582
40
35
$format=~s/MM / sprintf("%02d",$lt[4]+1) /gxe;
5
16
4583
40
21
$format=~s/mm / sprintf("%d",$lt[4]+1) /gxe;
0
0
4584
40
33
$format=~s,M/ , ($lt[4]+1).'/' ,gxe;
2
4
4585
40
32
$format=~s,/M , '/'.($lt[4]+1) ,gxe;
1
2
4586
40
27
$format=~s/DD / sprintf("%02d",$lt[3]) /gxe;
6
15
4587
40
60
$format=~s/d0w|dow0 / $lt[6] /gxei;
4
9
4588
40
100
33
$format=~s/dow / $lt[6]?$lt[6]:7 /gxei;
2
5
4589
40
45
$format=~s/d0y|doy0 / $lt[7] /gxei; #0-364 (365 leap)
2
3
4590
40
37
$format=~s/doy / $lt[7]+1 /gxei; #1-365 (366 leap)
1
3
4591
40
56
$format=~s/D(?![AaGgYyEeNn]) / $lt[3] /gxe; #EN pga desember og wednesday
5
7
4592
40
24
$format=~s/dd / sprintf("%d",$lt[3]) /gxe;
0
0
4593
40
100
50
53
$format=~s/hh12|HH12 / sprintf("%02d",$lt[2]<13?$lt[2]||12:$lt[2]-12)/gxe;
8
41
4594
40
45
$format=~s/HH24|HH24|HH|hh / sprintf("%02d",$lt[2]) /gxe;
4
14
4595
40
45
$format=~s/MI / sprintf("%02d",$lt[1]) /gxei;
12
22
4596
40
100
36
$format=~s{SS\.([1-9]) }{ sprintf("%0*.$1f",3+$1,$lt[0]+(repl($time,qr/^[^\.]+/)||0)) }gxei;
4
23
4597
40
48
$format=~s/SS(?:\.0)? / sprintf("%02d",$lt[0]) /gxei;
5
14
4598
40
100
40
$format=~s/(?:am|pm|apm|xm) / $lt[2]<13 ? 'am' : 'pm' /gxe;
4
9
4599
40
100
45
$format=~s/(?:AM|PM|APM|XM) / $lt[2]<13 ? 'AM' : 'PM' /gxe;
4
8
4600
40
43
$format=~s/WWI|WW / sprintf("%02d",weeknum($time)) /gxei;
0
0
4601
40
27
$format=~s/W / weeknum($time) /gxei;
0
0
4602
40
204
$format;
4603
}
4604
4605
=head2 easter
4606
4607
Input: A year (a four digit number)
4608
4609
Output: array of two numbers: day and month of Easter Sunday that year. Month 3 means March and 4 means April.
4610
4611
sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
4612
(($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
4613
4614
...is a "golfed" version of Oudins algorithm (1940) L
4615
(see also http://www.smart.net/~mmontes/ec-cal.html )
4616
4617
Valid for any Gregorian year. Dates repeat themselves after 70499183
4618
lunations = 2081882250 days = ca 5699845 years. However, our planet will
4619
by then have a different rotation and spin time...
4620
4621
Example:
4622
4623
( $day, $month ) = easter( 2012 ); # $day == 8 and $month == 4
4624
4625
Example 2:
4626
4627
my @e=map sprintf("%02d%02d", reverse(easter($_))), 1800..300000;
4628
print "First: ".min(@e)." Last: ".max(@e)."\n"; # First: 0322 Last: 0425
4629
4630
Note: The Spencer Jones formula differs Oudins used in C in some years
4631
before 1498. However, in that period the Julian calendar with a different formula was
4632
used anyway. Countries introduced the current Gregorian calendar between 1583 and 1926.
4633
4634
=cut
4635
4636
33
33
1
43995
sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
33
13503
42
33
91
13503
86855
13503
9013
13503
13396
4637
13503
100
66
39767
(($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
4638
4639
4640
=head2 time_fp
4641
4642
No input arguments.
4643
4644
Return the same number as perls C except with decimals (fractions of a second, _fp as in floating point number).
4645
4646
print time_fp(),"\n";
4647
print time(),"\n";
4648
4649
Could write:
4650
4651
1116776232.38632
4652
4653
...if that is the time now.
4654
4655
Or just:
4656
4657
1116776232
4658
4659
...from perl's internal C if C isn't installed and available.
4660
4661
4662
=cut
4663
4664
sub time_fp { # {return 0+gettimeofday} is just as well?
4665
12446
50
12446
1
10467
eval{ require Time::HiRes } or return time();
12446
39223
4666
12446
27137
my($sec,$mic)=Time::HiRes::gettimeofday();
4667
12446
25878
return $sec+$mic/1e6; #1e6 not portable?
4668
}
4669
4670
sub timems {
4671
0
0
0
0
0
eval{ require Time::HiRes } or return time();
0
0
4672
0
0
my($sec,$mic)=Time::HiRes::gettimeofday();
4673
0
0
return $sec*1000+$mic/1e3;
4674
}
4675
4676
=head2 sleep_fp
4677
4678
sleep_fp() work as the built in C<< sleep() >> but also accepts fractional seconds:
4679
4680
sleep_fp(0.020); # sleeps for 20 milliseconds
4681
4682
Sub sleep_fp do a C, thus it might take some
4683
extra time the first call. To avoid that, add C<< use Time::HiRes >>
4684
to your code. Sleep_fp should not be trusted for accuracy to more than
4685
a tenth of a second. Virtual machines tend to be less accurate (sleep
4686
longer) than physical ones. This was tested on VMware and RHEL
4687
(Linux). See also L.
4688
4689
=head2 sleeps
4690
4691
=head2 sleepms
4692
4693
=head2 sleepus
4694
4695
=head2 sleepns
4696
4697
sleep_fp(0.020); #sleeps for 20 milliseconds
4698
sleeps(0.020); #sleeps for 20 milliseconds, sleeps() is a synonym to sleep_fp()
4699
sleepms(20); #sleeps for 20 milliseconds
4700
sleepus(20000); #sleeps for 20000 microseconds = 20 milliseconds
4701
sleepns(20000000); #sleeps for 20 million nanoseconds = 20 milliseconds
4702
4703
=cut
4704
4705
5
50
5
1
18
sub sleep_fp { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
5
30
5
170554
4706
1
50
1
1
5
sub sleeps { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
1
10
1
10093
4707
1
50
1
1
8
sub sleepms { eval{require Time::HiRes} or (sleep(shift()/1e3),return);Time::HiRes::sleep(shift()/1e3) }
1
9
1
10092
4708
1
50
1
1
6
sub sleepus { eval{require Time::HiRes} or (sleep(shift()/1e6),return);Time::HiRes::sleep(shift()/1e6) }
1
9
1
10088
4709
1
50
1
1
22
sub sleepns { eval{require Time::HiRes} or (sleep(shift()/1e9),return);Time::HiRes::sleep(shift()/1e9) }
1
9
1
10172
4710
4711
=head2 eta
4712
4713
Estimated time of arrival (ETA).
4714
4715
for(@files){
4716
...do work on file...
4717
my $eta = eta( ++$i, 0+@files ); # file now, number of files
4718
print "" . localtime($eta);
4719
}
4720
4721
..DOC MISSING..
4722
4723
=head2 etahhmm
4724
4725
...NOT YET
4726
4727
=cut
4728
4729
#http://en.wikipedia.org/wiki/Kalman_filter god idé?
4730
our %Eta;
4731
our $Eta_forgetfulness=2;
4732
sub eta {
4733
2012
100
2012
1
7796
my($id,$pos,$end,$time_fp)=( @_==2 ? (join(";",caller()),@_) : @_ );
4734
2012
66
2320
$time_fp||=time_fp();
4735
2012
100
2638
my $a=$Eta{$id}||=[];
4736
2012
2273
push @$a, [$pos,$time_fp];
4737
2012
100
3320
@$a=@$a[map$_*2,0..@$a/2] if @$a>40; #hm 40
4738
2012
100
100
5321
splice(@$a,-2,1) if @$a>1 and $$a[-2][0]==$$a[-1][0]; #same pos as last
4739
2012
100
2254
return undef if @$a<2;
4740
2001
1194
my @eta;
4741
2001
1636
for(2..@$a){
4742
58602
83049
push @eta, $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-$_][1])/($$a[-1][0]-$$a[-$_][0]);
4743
}
4744
2001
1566
my($sum,$sumw,$w)=(0,0,1);
4745
2001
1507
for(@eta){
4746
58602
35348
$sum+=$w*$_;
4747
58602
32676
$sumw+=$w;
4748
58602
38103
$w/=$Eta_forgetfulness;
4749
}
4750
2001
1409
my $avg=$sum/$sumw;
4751
2001
3351
return $avg;
4752
# return avg(@eta);
4753
#return $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-2][1])/($$a[-1][0]-$$a[-2][0]);
4754
0
0
1;
4755
}
4756
4757
=head2 sleep_until
4758
4759
sleep_until(0.5) sleeps until half a second has passed since the last
4760
call to sleep_until. This example starts the next job excactly ten
4761
seconds after the last job started even if the last job lasted for a
4762
while (but not more than ten seconds):
4763
4764
for(@jobs){
4765
sleep_until(10);
4766
print localtime()."\n";
4767
...heavy job....
4768
}
4769
4770
Might print:
4771
4772
Thu Jan 12 16:00:00 2012
4773
Thu Jan 12 16:00:10 2012
4774
Thu Jan 12 16:00:20 2012
4775
4776
...and so on even if the C<< ...heavy job... >>-part takes more than a
4777
second to complete. Whereas if sleep(10) was used, each job would
4778
spend more than ten seconds in average since the work time would be
4779
added to sleep(10).
4780
4781
Note: sleep_until() will remember the time of ANY last call of this sub,
4782
not just the one on the same line in the source code (this might change
4783
in the future). The first call to sleep_until() will be the same as
4784
sleep_fp() or Perl's own sleep() if the argument is an integer.
4785
4786
=cut
4787
4788
our $Time_last_sleep_until;
4789
sub sleep_until {
4790
0
0
0
1
0
my $s=@_==1?shift():0;
4791
0
0
my $time=time_fp();
4792
0
0
my $sleep=$s-($time-nvl($Time_last_sleep_until,0));
4793
0
0
$Time_last_sleep_until=time;
4794
0
0
0
sleep_fp($sleep) if $sleep>0;
4795
}
4796
4797
my %thr;
4798
sub throttle {
4799
0
0
0
0
my($times,$mintime,$what)=@_;
4800
0
0
0
$what||=join(":",@{[caller(1)]}[3,2]);
0
0
4801
0
0
0
$thr{$what}||=[];
4802
0
0
my $thr=$thr{$what};
4803
0
0
push @$thr,time_fp();
4804
0
0
0
return if @$thr<$times;
4805
0
0
my $since=$$thr[-1]-shift(@$thr);
4806
0
0
0
my $sleep=$since<$mintime?$mintime-$since:0;
4807
0
0
sleep_fp($sleep);
4808
0
0
return $sleep;
4809
}
4810
4811
=head2 leapyear
4812
4813
B A year. A four digit number.
4814
4815
B True (1) or false (0) of whether the year is a leap year or
4816
not. (Uses current calendar even for periods before leapyears was used).
4817
4818
print join(", ",grep leapyear($_), 1900..2014)."\n";
4819
4820
1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 1948, 1952, 1956,
4821
1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012
4822
4823
Note: 1900 is not a leap year, but 2000 is. Years divided by 100 is a leap year only
4824
if it can be divided by 400.
4825
4826
=cut
4827
4828
0
0
0
1
0
sub leapyear{$_[0]%400?$_[0]%100?$_[0]%4?0:1:0:1} #bool
0
0
4829
4830
#http://rosettacode.org/wiki/Levenshtein_distance#Perl
4831
our %ldist_cache;
4832
sub ldist {
4833
0
0
0
0
my($s,$t,$l) = @_;
4834
0
0
0
return length($t) if !$s;
4835
0
0
0
return length($s) if !$t;
4836
0
0
0
0
%ldist_cache=() if !$l and 1000<0+%ldist_cache;
4837
$ldist_cache{$s,$t} ||=
4838
0
0
0
do {
4839
0
0
my($s1,$t1) = ( substr($s,1), substr($t,1) );
4840
0
0
0
substr($s,0,1) eq substr($t,0,1)
4841
? ldist($s1,$t1)
4842
: 1 + min( ldist($s1,$t1,1+$l), ldist($s,$t1,1+$l), ldist($s1,$t,1+$l) );
4843
};
4844
}
4845
4846
=head1 OTHER
4847
4848
=head2 nvl
4849
4850
The I function (or I function)
4851
4852
C takes two or more arguments. (Oracles nvl-function take just two)
4853
4854
Returns the value of the first input argument with length() > 0.
4855
4856
Return I if there is no such input argument.
4857
4858
In perl 5.10 and perl 6 this will most often be easier with the C< //
4859
> operator, although C and C<< // >> treats empty strings C<"">
4860
differently. Sub nvl here considers empty strings and undef the same.
4861
4862
=cut
4863
4864
sub nvl {
4865
38
100
66
38
1
195
return $_[0] if defined $_[0] and length($_[0]) or @_==1;
100
4866
22
100
55
return $_[1] if @_==2;
4867
9
100
26
return nvl(@_[1..$#_]) if @_>2;
4868
1
3
return undef;
4869
}
4870
4871
=head2 decode_num
4872
4873
See L.
4874
4875
=head2 decode
4876
4877
C and C works just as Oracles C.
4878
4879
C and C accordingly uses perl operators C and C<==> for comparison.
4880
4881
Examples:
4882
4883
my $a=123;
4884
print decode($a, 123,3, 214,4, $a); # prints 3
4885
print decode($a, 123=>3, 214=>4, $a); # prints 3, same thing since => is synonymous to comma in Perl
4886
4887
The first argument is tested against the second, fourth, sixth and so on,
4888
and then the third, fifth, seventh and so on is
4889
returned if decode() finds an equal string or number.
4890
4891
In the above example: 123 maps to 3, 124 maps to 4 and the last argument $a is returned elsewise.
4892
4893
More examples:
4894
4895
my $a=123;
4896
print decode($a, 123=>3, 214=>7, $a); # also 3, note that => is synonym for , (comma) in perl
4897
print decode($a, 122=>3, 214=>7, $a); # prints 123
4898
print decode($a, 123.0 =>3, 214=>7); # prints 3
4899
print decode($a, '123.0'=>3, 214=>7); # prints nothing (undef), no last argument default value here
4900
print decode_num($a, 121=>3, 221=>7, '123.0','b'); # prints b
4901
4902
Sort of:
4903
4904
decode($string, %conversion, $default);
4905
4906
The last argument is returned as a default if none of the keys in the keys/value-pairs matched.
4907
4908
A more perl-ish and often faster way of doing the same:
4909
4910
{123=>3, 214=>7}->{$a} || $a # (beware of 0)
4911
4912
=cut
4913
4914
sub decode {
4915
4
50
4
1
9
croak "Must have a mimimum of two arguments" if @_<2;
4916
4
2
my $uttrykk=shift;
4917
4
50
100
8
if(defined$uttrykk){ shift eq $uttrykk and return shift or shift for 1..@_/2 }
4
33
28
4918
0
0
0
else { !defined shift and return shift or shift for 1..@_/2 }
0
4919
2
5
return shift;
4920
}
4921
4922
sub decode_num {
4923
1
50
1
1
4
croak "Must have a mimimum of two arguments" if @_<2;
4924
1
2
my $uttrykk=shift;
4925
1
50
100
3
if(defined$uttrykk){ shift == $uttrykk and return shift or shift for 1..@_/2 }
1
33
12
4926
0
0
0
else { !defined shift and return shift or shift for 1..@_/2 }
0
4927
0
0
return shift;
4928
}
4929
4930
=head2 qrlist
4931
4932
Input: An array of values to be used to test againts for existence.
4933
4934
Output: A reference to a regular expression. That is a C
4935
4936
The regex sets $1 if it match.
4937
4938
Example:
4939
4940
my @list=qw/ABc XY DEF DEFG XYZ/;
4941
my $filter=qrlist("ABC","DEF","XY."); # makes a regex of it qr/^(\QABC\E|\QDEF\E|\QXY.\E)$/
4942
my @filtered= grep { $_ =~ $filter } @list; # returns DEF and XYZ, but not XYZ because the . char is taken literally
4943
4944
Note: Filtering with hash lookups are WAY faster.
4945
4946
Source:
4947
4948
sub qrlist (@) { my $str=join"|",map quotemeta, @_; qr/^($str)$/ }
4949
4950
=cut
4951
4952
sub qrlist (@) {
4953
0
0
1
0
my $str=join"|",map quotemeta,@_;
4954
0
0
return qr/^($str)$/;
4955
}
4956
4957
=head2 ansicolor
4958
4959
Perhaps easier to use than L ?
4960
4961
B One argument. A string where the char C<¤> have special
4962
meaning and is replaced by color codings depending on the letter
4963
following the C<¤>.
4964
4965
B The same string, but with C<¤letter> replaced by ANSI color
4966
codes respected by many types terminal windows. (xterm, telnet, ssh,
4967
telnet, rlog, vt100, cygwin, rxvt and such...).
4968
4969
B
4970
4971
¤r red
4972
¤g green
4973
¤b blue
4974
¤y yellow
4975
¤m magenta
4976
¤B bold
4977
¤u underline
4978
¤c clear
4979
¤¤ reset, quits and returns to default text color.
4980
4981
B
4982
4983
print ansicolor("This is maybe ¤ggreen¤¤?");
4984
4985
Prints I where the word I is shown in green.
4986
4987
If L is not installed or not found, returns the input
4988
string with every C<¤> including the following code letters
4989
removed. (That is: ansicolor is safe to use even if Term::ANSIColor is
4990
not installed, you just don't get the colors).
4991
4992
See also L.
4993
4994
=cut
4995
4996
sub ansicolor {
4997
0
0
1
0
my $txt=shift;
4998
0
0
0
eval{require Term::ANSIColor} or return replace($txt,qr/¤./);
0
0
4999
0
0
my %h=qw/r red g green b blue y yellow m magenta B bold u underline c clear ¤ reset/;
5000
0
0
my $re=join"|",keys%h;
5001
0
0
$txt=~s/¤($re)/Term::ANSIColor::color($h{$1})/ge;
0
0
5002
0
0
return $txt;
5003
}
5004
5005
=head2 ccn_ok
5006
5007
Checks if a Credit Card number (CCN) has correct control digits according to the LUHN-algorithm from 1960.
5008
This method of control digits is used by MasterCard, Visa, American Express,
5009
Discover, Diners Club / Carte Blanche, JCB and others.
5010
5011
B
5012
5013
A credit card number. Can contain non-digits, but they are removed internally before checking.
5014
5015
B
5016
5017
Something true or false.
5018
5019
Or more accurately:
5020
5021
Returns C (false) if the input argument is missing digits.
5022
5023
Returns 0 (zero, which is false) is the digits is not correct according to the LUHN algorithm.
5024
5025
Returns 1 or the name of a credit card company (true either way) if the last digit is an ok control digit for this ccn.
5026
5027
The name of the credit card company is returned like this (without the C<'> character)
5028
5029
Returns (wo '') Starts on Number of digits
5030
------------------------------ ------------------------ ----------------
5031
'MasterCard' 51-55 16
5032
'Visa' 4 13 eller 16
5033
'American Express' 34 eller 37 15
5034
'Discover' 6011 16
5035
'Diners Club / Carte Blanche' 300-305, 36 eller 38 14
5036
'JCB' 3 16
5037
'JCB' 2131 eller 1800 15
5038
5039
And should perhaps have had:
5040
5041
'enRoute' 2014 eller 2149 15
5042
5043
...but that card uses either another control algorithm or no control
5044
digits at all. So C is never returned here.
5045
5046
If the control digits is valid, but the input does not match anything in the column C, 1 is returned.
5047
5048
(This is also the same control digit mechanism used in Norwegian KID numbers on payment bills)
5049
5050
The first digit in a credit card number is supposed to tell what "industry" the card is meant for:
5051
5052
MII Digit Value Issuer Category
5053
--------------------------- ----------------------------------------------------
5054
0 ISO/TC 68 and other industry assignments
5055
1 Airlines
5056
2 Airlines and other industry assignments
5057
3 Travel and entertainment
5058
4 Banking and financial
5059
5 Banking and financial
5060
6 Merchandizing and banking
5061
7 Petroleum
5062
8 Telecommunications and other industry assignments
5063
9 National assignment
5064
5065
...although this has no meaning to C.
5066
5067
The first six digits is I, that is the bank
5068
(probably). The rest in the "account number", except the last digits,
5069
which is the control digit. Max length on credit card numbers are 19
5070
digits.
5071
5072
=cut
5073
5074
sub ccn_ok {
5075
0
0
1
0
my $ccn=shift(); #credit card number
5076
0
0
$ccn=~s/\D+//g;
5077
0
0
0
if(KID_ok($ccn)){
5078
0
0
0
return "MasterCard" if $ccn=~/^5[1-5]\d{14}$/;
5079
0
0
0
return "Visa" if $ccn=~/^4\d{12}(?:\d{3})?$/;
5080
0
0
0
return "American Express" if $ccn=~/^3[47]\d{13}$/;
5081
0
0
0
return "Discover" if $ccn=~/^6011\d{12}$/;
5082
0
0
0
return "Diners Club / Carte Blanche" if $ccn=~/^3(?:0[0-5]\d{11}|[68]\d{12})$/;
5083
0
0
0
return "JCB" if $ccn=~/^(?:3\d{15}|(?:2131|1800)\d{11})$/;
5084
0
0
return 1;
5085
}
5086
#return "enRoute" if $ccn=~/^(?:2014|2149)\d{11}$/; #ikke LUHN-krav?
5087
0
0
return 0;
5088
}
5089
5090
=head2 KID_ok
5091
5092
Checks if a norwegian KID number has an ok control digit.
5093
5094
To check if a customer has typed the number correctly.
5095
5096
This uses the LUHN algorithm (also known as mod-10) from 1960 which is also used
5097
internationally in control digits for credit card numbers, and Canadian social security ID numbers as well.
5098
5099
The algorithm, as described in Phrack (47-8) (a long time hacker online publication):
5100
5101
"For a card with an even number of digits, double every odd numbered
5102
digit and subtract 9 if the product is greater than 9. Add up all the
5103
even digits as well as the doubled-odd digits, and the result must be
5104
a multiple of 10 or it's not a valid card. If the card has an odd
5105
number of digits, perform the same addition doubling the even numbered
5106
digits instead."
5107
5108
B A KID-nummer. Must consist of digits 0-9 only, otherwise a die (croak) happens.
5109
5110
B
5111
5112
- Returns undef if the input argument is missing.
5113
5114
- Returns 0 if the control digit (the last digit) does not satify the LUHN/mod-10 algorithm.
5115
5116
- Returns 1 if ok
5117
5118
B L
5119
5120
=cut
5121
5122
sub KID_ok {
5123
0
0
0
1
0
croak "Non-numeric argument" if $_[0]=~/\D/;
5124
0
0
0
my @k=split//,shift or return undef;
5125
0
0
my $s;$s+=pop(@k)+[qw/0 2 4 6 8 1 3 5 7 9/]->[pop@k] while @k;
0
0
5126
0
0
0
$s%10==0?1:0;
5127
}
5128
5129
5130
5131
=head2 range
5132
5133
B
5134
5135
One or more numeric arguments:
5136
5137
First: x (first returned element)
5138
5139
Second: y (last but not including)
5140
5141
Third: step, default 1. The step between each returned element
5142
5143
If a fourth, fifth and so on arguments are given, they change the step for each returned element. As first derivative, second derivative.
5144
5145
B
5146
5147
If one argument: returns the array C<(0 .. x-1)>
5148
5149
If two arguments: returns the array C<(x .. y-1)>
5150
5151
If three arguments: The default step is 1. Use a third argument to use a different step.
5152
5153
B
5154
5155
print join ",", range(11); # prints 0,1,2,3,4,5,6,7,8,9,10 (but not 11)
5156
print join ",", range(2,11); # 2,3,4,5,6,7,8,9,10 (but not 11)
5157
print join ",", range(11,2,-1); # 11,10,9,8,7,6,5,4,3
5158
print join ",", range(2,11,3); # 2,5,8
5159
print join ",", range(11,2,-3); # 11,8,5
5160
print join ",", range(11,2,+3); # prints nothing
5161
5162
print join ", ",range(2,11,1,0.1); # 2, 3, 4.1, 5.3, 6.6, 8, 9.5 adds 0.1 to step each time
5163
print join ", ",range(2,11,1,0.1,-0.01); # 2, 3, 4.1, 5.29, 6.56, 7.9, 9.3, 10.75
5164
5165
Note: In the Python language and others, C is a build in iterator (a
5166
generator), not an array. This saves memory for large sets and sometimes time.
5167
Use C in L to get a similar lazy generator in Perl.
5168
5169
=cut
5170
5171
sub range {
5172
8
100
8
1
26
return _range_accellerated(@_) if @_>3; #se under
5173
6
8
my($x,$y,$jump)=@_;
5174
6
100
19
return ( 0 .. $x-1 ) if @_==1;
5175
5
100
47
return ( $x .. $y-1 ) if @_==2;
5176
4
50
33
21
croak "Wrong number of arguments or jump==0" if @_!=3 or $jump==0;
5177
4
5
my @r;
5178
4
100
7
if($jump>0){ while($x<$y){ push @r, $x; $x+=$jump } }
2
6
772
452
772
834
5179
2
4
else { while($x>$y){ push @r, $x; $x+=$jump } }
12
10
12
14
5180
4
37
return @r;
5181
}
5182
5183
#jumps derivative, double der., trippled der usw
5184
sub _range_accellerated {
5185
2
2
3
my($x,$y,@jump)=@_;
5186
2
3
my @r;
5187
2
50
17
8
my $test = $jump[0]>=0 ? sub{$x<$y} : sub{$x>$y};
17
29
0
0
5188
2
4
while(&$test()){
5189
15
19
push @r, $x;
5190
15
11
$x+=$jump[0];
5191
15
26
$jump[$_-1]+=$jump[$_] for 1..$#jump;
5192
}
5193
2
13
return @r;
5194
}
5195
5196
=head2 globr
5197
5198
Works like and uses Perls builtin C<< glob() >> function but adds support for ranges
5199
with C<< {from..to} >> and C<< {from..to..step} >>. Like brace expansion in bash.
5200
5201
Examples:
5202
5203
my @arr = glob "X{a,b,c,d}Z"; # return four element array: XaZ XbZ XcZ XdZ
5204
my @arr = globr "X{a,b,c,d}Z"; # same as above
5205
my @arr = globr "X{a..d}Z"; # same as above
5206
my @arr = globr "X{a..d..2}Z"; # step 2, returns array: XaZ XcZ
5207
my @arr = globr "X{aa..bz..13}Z"; # XaaZ XanZ XbaZ XbnZ
5208
my @arr = globr "{1..12}b"; # 1b 2b 3b 4b 5b 6b 7b 8b 9b 10b 11b 12b
5209
my @arr = globr "{01..12}b"; # 01b 02b 03b 04b 05b 06b 07b 08b 09b 10b 11b 12b
5210
my @arr = globr "{01..12..3}b"; # 01b 04b 07b 10b
5211
5212
=cut
5213
5214
sub globr($) {
5215
8
8
1
11
my $p=shift;
5216
8
100
36
$p=~s/\{(\w+)\.\.(\w+)(\.\.(\d+))?\}/my$i=0;"{".join(",",grep{$4?!($i++%$4):1}$1..$2)."}";/eg;
7
6
7
38
133
199
5217
8
669
glob $p;
5218
}
5219
5220
=head2 permutations
5221
5222
How many ways (permutations) can six people be placed around a table:
5223
5224
If one person: one
5225
If two persons: two (they can swap places)
5226
If three persons: six
5227
If four persons: 24
5228
If five persons: 120
5229
If six persons: 720
5230
5231
The formula is C where the postfix unary operator C, also known as I is defined like:
5232
C. Example: C<5! = 5 * 4 * 3 * 2 * 1 = 120>.Run this to see the 100 first C<< n! >>
5233
5234
perl -MAcme::Tools -le'$i=big(1);print "$_!=",$i*=$_ for 1..100'
5235
5236
1! = 1
5237
2! = 2
5238
3! = 6
5239
4! = 24
5240
5! = 120
5241
6! = 720
5242
7! = 5040
5243
8! = 40320
5244
9! = 362880
5245
10! = 3628800
5246
.
5247
.
5248
.
5249
100! = 93326215443944152681699238856266700490715968264381621468592963895217599993229915608941463976156518286253697920827223758251185210916864000000000000000000000000
5250
5251
C takes a list and return a list of arrayrefs for each
5252
of the permutations of the input list:
5253
5254
permutations('a','b'); #returns (['a','b'],['b','a'])
5255
5256
permutations('a','b','c'); #returns (['a','b','c'],['a','c','b'],
5257
# ['b','a','c'],['b','c','a'],
5258
# ['c','a','b'],['c','b','a'])
5259
5260
Up to five input arguments C is probably as fast as it
5261
can be in this pure perl implementation (see source). For more than
5262
five, it could be faster. How fast is it now: Running with different
5263
n, this many time took that many seconds:
5264
5265
n times seconds
5266
-- ------- ---------
5267
2 100000 0.32
5268
3 10000 0.09
5269
4 10000 0.33
5270
5 1000 0.18
5271
6 100 0.27
5272
7 10 0.21
5273
8 1 0.17
5274
9 1 1.63
5275
10 1 17.00
5276
5277
If the first argument is a coderef, that sub will be called for each permutation and the return from those calls with be the real return from C. For example this:
5278
5279
print for permutations(sub{join"",@_},1..3);
5280
5281
...will print the same as:
5282
5283
print for map join("",@$_), permutations(1..3);
5284
5285
...but the first of those two uses less RAM if 3 has been say 9.
5286
Changing 3 with 10, and many computers hasn't enough memory
5287
for the latter.
5288
5289
The examples prints:
5290
5291
123
5292
132
5293
213
5294
231
5295
312
5296
321
5297
5298
If you just want to say calculate something on each permutation,
5299
but is not interested in the list of them, you just don't
5300
take the return. That is:
5301
5302
my $ant;
5303
permutations(sub{$ant++ if $_[-1]>=$_[0]*2},1..9);
5304
5305
...is the same as:
5306
5307
$$_[-1]>=$$_[0]*2 and $ant++ for permutations(1..9);
5308
5309
...but the first uses next to nothing of memory compared to the latter. They have about the same speed.
5310
(The examples just counts the permutations where the last number is at least twice as large as the first)
5311
5312
C was created to find all combinations of a persons
5313
name. This is useful in "fuzzy" name searches with
5314
L if you can not be certain what is first, middle
5315
and last names. In foreign or unfamiliar names it can be difficult to
5316
know that.
5317
5318
=cut
5319
5320
#TODO: se test_perl.pl
5321
5322
sub permutations {
5323
2
50
2
1
14
my $code=ref($_[0]) eq 'CODE' ? shift() : undef;
5324
2
50
33
11
$code and @_<6 and return map &$code(@$_),permutations(@_);
5325
5326
2
50
8
return [@_] if @_<2;
5327
5328
2
100
27
return ([@_[0,1]],[@_[1,0]]) if @_==2;
5329
5330
1
50
39
return ([@_[0,1,2]],[@_[0,2,1]],[@_[1,0,2]],
5331
[@_[1,2,0]],[@_[2,0,1]],[@_[2,1,0]]) if @_==3;
5332
5333
0
0
0
return ([@_[0,1,2,3]],[@_[0,1,3,2]],[@_[0,2,1,3]],[@_[0,2,3,1]],
5334
[@_[0,3,1,2]],[@_[0,3,2,1]],[@_[1,0,2,3]],[@_[1,0,3,2]],
5335
[@_[1,2,0,3]],[@_[1,2,3,0]],[@_[1,3,0,2]],[@_[1,3,2,0]],
5336
[@_[2,0,1,3]],[@_[2,0,3,1]],[@_[2,1,0,3]],[@_[2,1,3,0]],
5337
[@_[2,3,0,1]],[@_[2,3,1,0]],[@_[3,0,1,2]],[@_[3,0,2,1]],
5338
[@_[3,1,0,2]],[@_[3,1,2,0]],[@_[3,2,0,1]],[@_[3,2,1,0]]) if @_==4;
5339
5340
0
0
0
return ([@_[0,1,2,3,4]],[@_[0,1,2,4,3]],[@_[0,1,3,2,4]],[@_[0,1,3,4,2]],[@_[0,1,4,2,3]],
5341
[@_[0,1,4,3,2]],[@_[0,2,1,3,4]],[@_[0,2,1,4,3]],[@_[0,2,3,1,4]],[@_[0,2,3,4,1]],
5342
[@_[0,2,4,1,3]],[@_[0,2,4,3,1]],[@_[0,3,1,2,4]],[@_[0,3,1,4,2]],[@_[0,3,2,1,4]],
5343
[@_[0,3,2,4,1]],[@_[0,3,4,1,2]],[@_[0,3,4,2,1]],[@_[0,4,1,2,3]],[@_[0,4,1,3,2]],
5344
[@_[0,4,2,1,3]],[@_[0,4,2,3,1]],[@_[0,4,3,1,2]],[@_[0,4,3,2,1]],[@_[1,0,2,3,4]],
5345
[@_[1,0,2,4,3]],[@_[1,0,3,2,4]],[@_[1,0,3,4,2]],[@_[1,0,4,2,3]],[@_[1,0,4,3,2]],
5346
[@_[1,2,0,3,4]],[@_[1,2,0,4,3]],[@_[1,2,3,0,4]],[@_[1,2,3,4,0]],[@_[1,2,4,0,3]],
5347
[@_[1,2,4,3,0]],[@_[1,3,0,2,4]],[@_[1,3,0,4,2]],[@_[1,3,2,0,4]],[@_[1,3,2,4,0]],
5348
[@_[1,3,4,0,2]],[@_[1,3,4,2,0]],[@_[1,4,0,2,3]],[@_[1,4,0,3,2]],[@_[1,4,2,0,3]],
5349
[@_[1,4,2,3,0]],[@_[1,4,3,0,2]],[@_[1,4,3,2,0]],[@_[2,0,1,3,4]],[@_[2,0,1,4,3]],
5350
[@_[2,0,3,1,4]],[@_[2,0,3,4,1]],[@_[2,0,4,1,3]],[@_[2,0,4,3,1]],[@_[2,1,0,3,4]],
5351
[@_[2,1,0,4,3]],[@_[2,1,3,0,4]],[@_[2,1,3,4,0]],[@_[2,1,4,0,3]],[@_[2,1,4,3,0]],
5352
[@_[2,3,0,1,4]],[@_[2,3,0,4,1]],[@_[2,3,1,0,4]],[@_[2,3,1,4,0]],[@_[2,3,4,0,1]],
5353
[@_[2,3,4,1,0]],[@_[2,4,0,1,3]],[@_[2,4,0,3,1]],[@_[2,4,1,0,3]],[@_[2,4,1,3,0]],
5354
[@_[2,4,3,0,1]],[@_[2,4,3,1,0]],[@_[3,0,1,2,4]],[@_[3,0,1,4,2]],[@_[3,0,2,1,4]],
5355
[@_[3,0,2,4,1]],[@_[3,0,4,1,2]],[@_[3,0,4,2,1]],[@_[3,1,0,2,4]],[@_[3,1,0,4,2]],
5356
[@_[3,1,2,0,4]],[@_[3,1,2,4,0]],[@_[3,1,4,0,2]],[@_[3,1,4,2,0]],[@_[3,2,0,1,4]],
5357
[@_[3,2,0,4,1]],[@_[3,2,1,0,4]],[@_[3,2,1,4,0]],[@_[3,2,4,0,1]],[@_[3,2,4,1,0]],
5358
[@_[3,4,0,1,2]],[@_[3,4,0,2,1]],[@_[3,4,1,0,2]],[@_[3,4,1,2,0]],[@_[3,4,2,0,1]],
5359
[@_[3,4,2,1,0]],[@_[4,0,1,2,3]],[@_[4,0,1,3,2]],[@_[4,0,2,1,3]],[@_[4,0,2,3,1]],
5360
[@_[4,0,3,1,2]],[@_[4,0,3,2,1]],[@_[4,1,0,2,3]],[@_[4,1,0,3,2]],[@_[4,1,2,0,3]],
5361
[@_[4,1,2,3,0]],[@_[4,1,3,0,2]],[@_[4,1,3,2,0]],[@_[4,2,0,1,3]],[@_[4,2,0,3,1]],
5362
[@_[4,2,1,0,3]],[@_[4,2,1,3,0]],[@_[4,2,3,0,1]],[@_[4,2,3,1,0]],[@_[4,3,0,1,2]],
5363
[@_[4,3,0,2,1]],[@_[4,3,1,0,2]],[@_[4,3,1,2,0]],[@_[4,3,2,0,1]],[@_[4,3,2,1,0]]) if @_==5;
5364
5365
0
0
my(@r,@p,@c,@i,@n); @i=(0,@_); @p=@c=1..@_; @n=1..@_-1;
0
0
0
0
0
0
5366
PERM:
5367
0
0
while(1){
5368
0
0
0
if($code){if(defined wantarray){push(@r,&$code(@i[@p]))}else{&$code(@i[@p])}}else{push@r,[@i[@p]]}
0
0
0
0
0
0
0
0
0
5369
0
0
0
for my$i(@n){splice@p,$i,0,shift@p;next PERM if --$c[$i];$c[$i]=$i+1}
0
0
0
0
0
0
5370
return@r
5371
0
0
}
5372
}
5373
5374
=head2 cart
5375
5376
Cartesian product
5377
5378
B
5379
5380
Input: two or more arrayrefs with accordingly x, y, z and so on number of elements.
5381
5382
Output: An array of x * y * z number of arrayrefs. The arrays being the cartesian product of the input arrays.
5383
5384
It can be useful to think of this as joins in SQL. In C statements with
5385
more tables behind C, but without any C condition to join the tables.
5386
5387
B
5388
5389
B
5390
5391
- Either two or more arrayrefs with x, y, z and so on number of elements.
5392
5393
- Or coderefs to subs containing condition checks. Somewhat like C conditions in SQL.
5394
5395
B An array of x * y * z number of arrayrefs (the cartesian product)
5396
minus the ones that did not fulfill the condition(s).
5397
5398
This of is as joins with one or more where conditions as coderefs.
5399
5400
The coderef input arguments can be placed last or among the array refs
5401
to save both runtime and memory if the conditions depend on
5402
arrays further back.
5403
5404
B
5405
5406
for(cart(\@a1,\@a2,\@a3)){
5407
my($a1,$a2,$a3) = @$_;
5408
print "$a1,$a2,$a3\n";
5409
}
5410
5411
Prints the same as this:
5412
5413
for my $a1 (@a1){
5414
for my $a2 (@a2){
5415
for my $a3 (@a3){
5416
print "$a1,$a2,$a3\n";
5417
}
5418
}
5419
}
5420
5421
B (with a condition: the sum of the first two should be dividable with 3)
5422
5423
for( cart( \@a1, \@a2, sub{sum(@$_)%3==0}, \@a3 ) ) {
5424
my($a1,$a2,$a3)=@$_;
5425
print "$a1,$a2,$a3\n";
5426
}
5427
5428
Prints the same as this:
5429
5430
for my $a1 (@a1){
5431
for my $a2 (@a2){
5432
next if 0==($a1+$a2)%3;
5433
for my $a3 (@a3){
5434
print "$a1,$a2,$a3\n";
5435
}
5436
}
5437
}
5438
5439
B
5440
5441
my @a1 = (1,2);
5442
my @a2 = (10,20,30);
5443
my @a3 = (100,200,300,400);
5444
5445
my $s = join"", map "*".join(",",@$_), cart(\@a1,\@a2,\@a3);
5446
ok( $s eq "*1,10,100*1,10,200*1,10,300*1,10,400*1,20,100*1,20,200"
5447
."*1,20,300*1,20,400*1,30,100*1,30,200*1,30,300*1,30,400"
5448
."*2,10,100*2,10,200*2,10,300*2,10,400*2,20,100*2,20,200"
5449
."*2,20,300*2,20,400*2,30,100*2,30,200*2,30,300*2,30,400");
5450
5451
$s=join"",map "*".join(",",@$_), cart(\@a1,\@a2,\@a3,sub{sum(@$_)%3==0});
5452
ok( $s eq "*1,10,100*1,10,400*1,20,300*1,30,200*2,10,300*2,20,200*2,30,100*2,30,400");
5453
5454
B
5455
5456
Returns hashrefs instead of arrayrefs:
5457
5458
my @cards=cart( #5200 cards: 100 decks of 52 cards
5459
deck => [1..100],
5460
value => [qw/2 3 4 5 6 7 8 9 10 J Q K A/],
5461
col => [qw/heart diamond club star/],
5462
);
5463
for my $card ( mix(@cards) ) {
5464
print "From deck number $$card{deck} we got $$card{value} $$card{col}\n";
5465
}
5466
5467
Note: using sub-ref filters do not work (yet) in hash-mode. Use grep on result instead.
5468
5469
=cut
5470
5471
sub cart {
5472
5
5
1
437
my @ars=@_;
5473
5
100
12
if(!ref($_[0])){ #if hash-mode detected
5474
1
33
3
my(@k,@v); push@k,shift@ars and push@v,shift@ars while @ars;
1
11
5475
1
7
return map{my%h;@h{@k}=@$_;\%h}cart(@v);
24
14
24
31
24
25
5476
}
5477
4
6
my @res=map[$_],@{shift@ars};
4
14
5478
4
6
for my $ar (@ars){
5479
9
100
50
21
@res=grep{&$ar(@$_)}@res and next if ref($ar) eq 'CODE';
24
40
5480
8
7
@res=map{my$r=$_;map{[@$r,$_]}@$ar}@res;
34
24
34
17
120
147
5481
}
5482
4
49
return @res;
5483
}
5484
5485
sub cart_easy { #not tested/exported http://stackoverflow.com/questions/2457096/in-perl-how-can-i-get-the-cartesian-product-of-multiple-sets
5486
0
0
0
0
my $last = pop @_;
5487
0
0
0
@_ ? (map {my$left=$_; map [@$left, $_], @$last } cart_easy(@_) )
0
0
0
0
5488
: (map [$_], @$last);
5489
}
5490
5491
=head2 reduce
5492
5493
From: Why Functional Programming Matters: L
5494
5495
L
5496
5497
DON'T TRY THIS AT HOME, C PROGRAMMERS.
5498
5499
sub reduce (&@) {
5500
my ($proc, $first, @rest) = @_;
5501
return $first if @rest == 0;
5502
local ($a, $b) = ($first, reduce($proc, @rest));
5503
return $proc->();
5504
}
5505
5506
Many functions can then be implemented with very little code. Such as:
5507
5508
sub mean { (reduce {$a + $b} @_) / @_ }
5509
5510
=cut
5511
5512
sub reduce (&@) {
5513
0
0
1
0
my ($proc, $first, @rest) = @_;
5514
0
0
0
return $first if @rest == 0;
5515
33
33
102629
no warnings;
33
83
33
19623
5516
0
0
local ($a, $b) = ($first, reduce($proc, @rest));
5517
0
0
return $proc->();
5518
}
5519
5520
5521
=head2 pivot
5522
5523
Resembles the pivot table function in Excel.
5524
5525
C is used to spread out a slim and long table to a visually improved layout.
5526
5527
For instance spreading out the results of C-selects from SQL:
5528
5529
pivot( arrayref, columnname1, columnname2, ...)
5530
5531
pivot( ref_to_array_of_arrayrefs, @list_of_names_to_down_fields )
5532
5533
The first argument is a ref to a two dimensional table.
5534
5535
The rest of the arguments is a list which also signals the number of
5536
columns from left in each row that is ending up to the left of the
5537
data table, the rest ends up at the top and the last element of
5538
each row ends up as data.
5539
5540
top1 top1 top1 top1
5541
left1 left2 left3 top2 top2 top2 top2
5542
----- ----- ----- ---- ---- ---- ----
5543
data data data data
5544
data data data data
5545
data data data data
5546
5547
Example:
5548
5549
my @table=(
5550
["1997","Gerd", "Weight", "Summer",66],
5551
["1997","Gerd", "Height", "Summer",170],
5552
["1997","Per", "Weight", "Summer",75],
5553
["1997","Per", "Height", "Summer",182],
5554
["1997","Hilde","Weight", "Summer",62],
5555
["1997","Hilde","Height", "Summer",168],
5556
["1997","Tone", "Weight", "Summer",70],
5557
5558
["1997","Gerd", "Weight", "Winter",64],
5559
["1997","Gerd", "Height", "Winter",158],
5560
["1997","Per", "Weight", "Winter",73],
5561
["1997","Per", "Height", "Winter",180],
5562
["1997","Hilde","Weight", "Winter",61],
5563
["1997","Hilde","Height", "Winter",164],
5564
["1997","Tone", "Weight", "Winter",69],
5565
5566
["1998","Gerd", "Weight", "Summer",64],
5567
["1998","Gerd", "Height", "Summer",171],
5568
["1998","Per", "Weight", "Summer",76],
5569
["1998","Per", "Height", "Summer",182],
5570
["1998","Hilde","Weight", "Summer",62],
5571
["1998","Hilde","Height", "Summer",168],
5572
["1998","Tone", "Weight", "Summer",70],
5573
5574
["1998","Gerd", "Weight", "Winter",64],
5575
["1998","Gerd", "Height", "Winter",171],
5576
["1998","Per", "Weight", "Winter",74],
5577
["1998","Per", "Height", "Winter",183],
5578
["1998","Hilde","Weight", "Winter",62],
5579
["1998","Hilde","Height", "Winter",168],
5580
["1998","Tone", "Weight", "Winter",71],
5581
);
5582
5583
.
5584
5585
my @reportA=pivot(\@table,"Year","Name");
5586
print "\n\nReport A\n\n".tablestring(\@reportA);
5587
5588
Will print:
5589
5590
Report A
5591
5592
Year Name Height Height Weight Weight
5593
Summer Winter Summer Winter
5594
---- ----- ------ ------ ------ ------
5595
1997 Gerd 170 158 66 64
5596
1997 Hilde 168 164 62 61
5597
1997 Per 182 180 75 73
5598
1997 Tone 70 69
5599
1998 Gerd 171 171 64 64
5600
1998 Hilde 168 168 62 62
5601
1998 Per 182 183 76 74
5602
1998 Tone 70 71
5603
5604
.
5605
5606
my @reportB=pivot([map{$_=[@$_[0,3,2,1,4]]}(@t=@table)],"Year","Season");
5607
print "\n\nReport B\n\n".tablestring(\@reportB);
5608
5609
Will print:
5610
5611
Report B
5612
5613
Year Season Height Height Height Weight Weight Weight Weight
5614
Gerd Hilde Per Gerd Hilde Per Tone
5615
---- ------ ------ ------ ----- ----- ------ ------ ------
5616
1997 Summer 170 168 182 66 62 75 70
5617
1997 Winter 158 164 180 64 61 73 69
5618
1998 Summer 171 168 182 64 62 76 70
5619
1998 Winter 171 168 183 64 62 74 71
5620
5621
.
5622
5623
my @reportC=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name","Attributt");
5624
print "\n\nReport C\n\n".tablestring(\@reportC);
5625
5626
Will print:
5627
5628
Report C
5629
5630
Name Attributt 1997 1997 1998 1998
5631
Summer Winter Summer Winter
5632
----- --------- ------ ------ ------ ------
5633
Gerd Height 170 158 171 171
5634
Gerd Weight 66 64 64 64
5635
Hilde Height 168 164 168 168
5636
Hilde Weight 62 61 62 62
5637
Per Height 182 180 182 183
5638
Per Weight 75 73 76 74
5639
Tone Weight 70 69 70 71
5640
5641
.
5642
5643
my @reportD=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name");
5644
print "\n\nReport D\n\n".tablestring(\@reportD);
5645
5646
Will print:
5647
5648
Report D
5649
5650
Name Height Height Height Height Weight Weight Weight Weight
5651
1997 1997 1998 1998 1997 1997 1998 1998
5652
Summer Winter Summer Winter Summer Winter Summer Winter
5653
----- ------ ------ ------ ------ ------ ------ ------ ------
5654
Gerd 170 158 171 171 66 64 64 64
5655
Hilde 168 164 168 168 62 61 62 62
5656
Per 182 180 182 183 75 73 76 74
5657
Tone 70 69 70 71
5658
5659
Options:
5660
5661
Options to sort differently and show sums and percents are available. (...MORE DOC ON THAT LATER...)
5662
5663
See also L
5664
5665
=cut
5666
5667
sub pivot {
5668
4
4
1
12
my($tabref,@vertikalefelt)=@_;
5669
4
50
19
my %opt=ref($vertikalefelt[-1]) eq 'HASH' ? %{pop(@vertikalefelt)} : ();
0
0
5670
4
50
16
my $opt_sum=1 if $opt{sum};
5671
4
50
0
15
my $opt_pro=exists $opt{prosent}?$opt{prosent}||0:undef;
5672
4
50
47
my $sortsub = $opt{'sortsub'} || \&_sortsub;
5673
4
33
23
my $sortsub_bortover = $opt{'sortsub_bortover'} || $sortsub;
5674
4
33
19
my $sortsub_nedover = $opt{'sortsub_nedover'} || $sortsub;
5675
#print serialize(\%opt,'opt');
5676
#print serialize(\$opt_pro,'opt_pro');
5677
4
8
my $antned=0+@vertikalefelt;
5678
4
6
my $bakerst=-1+@{$$tabref[0]};
4
11
5679
4
8
my(%h,%feltfinnes,%sum);
5680
#print "Bakerst<$bakerst>\n";
5681
4
9
for(@$tabref){
5682
112
207
my $rad=join($;,@$_[0..($antned-1)]);
5683
112
162
my $felt=join($;,@$_[$antned..($bakerst-1)]);
5684
112
116
my $verdi=$$_[$bakerst];
5685
112
50
161
length($rad) or $rad=' ';
5686
112
50
135
length($felt) or $felt=' ';
5687
112
178
$h{$rad}{$felt}=$verdi;
5688
112
176
$h{$rad}{"%$felt"}=$verdi;
5689
112
50
33
602
if($opt_sum or defined $opt_pro){
5690
0
0
$h{$rad}{Sum}+=$verdi;
5691
0
0
$sum{$felt}+=$verdi;
5692
0
0
$sum{Sum}+=$verdi;
5693
}
5694
112
111
$feltfinnes{$felt}++;
5695
112
50
184
$feltfinnes{"%$felt"}++ if $opt_pro;
5696
}
5697
4
38
my @feltfinnes = sort $sortsub_bortover keys%feltfinnes;
5698
4
50
17
push @feltfinnes, "Sum" if $opt_sum;
5699
4
11
my @t=([@vertikalefelt,map{replace($_,$;,"\n")}@feltfinnes]);
23
36
5700
#print serialize(\@feltfinnes,'feltfinnes');
5701
#print serialize(\%h,'h');
5702
#print "H = ".join(", ",sort _sortsub keys%h)."\n";
5703
4
21
for my $rad (sort $sortsub_nedover keys(%h)){
5704
my @rad=(split($;,$rad),
5705
map{
5706
23
50
33
166
if(/^\%/ and defined $opt_pro){
120
225
5707
0
0
my $sum=$h{$rad}{Sum};
5708
0
0
my $verdi=$h{$rad}{$_};
5709
0
0
0
if($sum!=0){
5710
0
0
0
defined $verdi
5711
?sprintf("%*.*f",3+1+$opt_pro,$opt_pro,100*$verdi/$sum)
5712
:$verdi;
5713
}
5714
else{
5715
0
0
0
$verdi!=0?"div0":$verdi;
5716
}
5717
}
5718
else{
5719
120
149
$h{$rad}{$_};
5720
}
5721
}
5722
@feltfinnes);
5723
23
69
push(@t,[@rad]);
5724
}
5725
4
50
10
push(@t,"-",["Sum",(map{""}(2..$antned)),map{print "<$_>\n";$sum{$_}}@feltfinnes]) if $opt_sum;
0
0
0
0
0
0
5726
4
48
return @t;
5727
}
5728
5729
# default sortsub for pivot()
5730
5731
sub _sortsub {
5732
33
33
162
no warnings;
33
40
33
10401
5733
#my $c=($a<=>$b)||($a cmp $b);
5734
#return $c if $c;
5735
#printf "%-30s %-30s ",replace($a,$;,','),replace($b,$;,',');
5736
78
78
651
my @a=split $;,$a;
5737
78
491
my @b=split $;,$b;
5738
78
146
for(0..$#a){
5739
117
232
my $c=$a[$_]<=>$b[$_];
5740
117
100
66
307
return $c if $c and "$a[$_]$b[$_]"!~/[iI][nN][fF]|þ/i; # inf(inity)
5741
99
95
$c=$a[$_]cmp$b[$_];
5742
99
100
246
return $c if $c;
5743
}
5744
0
0
return 0;
5745
}
5746
5747
=head2 tablestring
5748
5749
B a reference to an array of arrayrefs -- a two dimensional table of strings and numbers
5750
5751
B a string containing the textual table -- a string of two or more lines
5752
5753
The first arrayref in the list refers to a list of either column headings (scalar)
5754
or ... (...more later...)
5755
5756
In this output table:
5757
5758
- the columns will not be wider than necessary by its widest value (any -tags are removed in every internal width-calculation)
5759
5760
- multi-lined cell values are handled also
5761
5762
- and so are html-tags, if the output is to be used inside -tags on a web page.
5763
5764
- columns with just numeric values are right justified (header row excepted)
5765
5766
Example:
5767
5768
print tablestring([
5769
[qw/AA BB CCCC/],
5770
[123,23,"d"],
5771
[12,23,34],
5772
[77,88,99],
5773
["lin\nes",12,"asdff\nfdsa\naa"],[0,22,"adf"]
5774
]);
5775
5776
Prints this string of 11 lines:
5777
5778
AA BB CCCC
5779
--- -- -----
5780
123 23 d
5781
12 23 34
5782
77 8 99
5783
5784
lin 12 asdff
5785
es fdsa
5786
aa
5787
5788
10 22 adf
5789
5790
As you can see, rows containing multi-lined cells gets an empty line before and after the row to separate it more clearly.
5791
5792
=cut
5793
5794
sub tablestring {
5795
5
5
1
31
my $tab=shift;
5796
5
50
15
my %o=$_[0] ? %{shift()} : ();
0
0
5797
5
8
my $fjern_tom=$o{fjern_tomme_kolonner};
5798
5
9
my $ikke_space=$o{ikke_space};
5799
5
50
24
my $nodup=$o{nodup}||0;
5800
5
9
my $ikke_hodestrek=$o{ikke_hodestrek};
5801
5
50
11
my $pagesize=exists $o{pagesize} ? $o{pagesize}-3 : 9999999;
5802
5
7
my $venstretvang=$o{venstre};
5803
5
5
my(@bredde,@venstre,@hoeyde,@ikketom,@nodup);
5804
5
7
my $hode=1;
5805
5
5
my $i=0;
5806
5
8
my $j;
5807
5
12
for(@$tab){
5808
33
27
$j=0;
5809
33
32
$hoeyde[$i]=0;
5810
33
29
my $nodup_rad=$nodup;
5811
33
50
66
if(ref($_) eq 'ARRAY'){
5812
33
43
for(@$_){
5813
210
184
my $celle=$_;
5814
210
100
352
$bredde[$j]||=0;
5815
210
50
33
429
if($nodup_rad and $i>0 and $$tab[$i][$j] eq $$tab[$i-1][$j] || ($nodup_rad=0)){
0
33
5816
0
0
0
$celle=$nodup==1?"":$nodup;
5817
0
0
$nodup[$i][$j]=1;
5818
}
5819
else{
5820
210
156
my $hoeyde=0;
5821
210
135
my $bredere;
5822
33
33
175
no warnings;
33
49
33
12903
5823
210
100
100
622
$ikketom[$j]=1 if !$hode && length($celle)>0;
5824
210
364
for(split("\n",$celle)){
5825
236
50
298
$bredere=/
5826
236
268
s/<[^>]+>//g;
5827
236
162
$hoeyde++;
5828
236
178
s/>/>/g;
5829
236
165
s/</
5830
236
100
391
$bredde[$j]=length($_)+1+$bredere if length($_)+1+$bredere>$bredde[$j];
5831
236
100
100
1252
$venstre[$j]=1 if $_ && !/^\s*[\-\+]?(\d+|\d*\.\d+)\s*\%?$/ && !$hode;
100
5832
}
5833
210
100
66
432
if( $hoeyde>1 && !$ikke_space){
5834
25
100
38
$hoeyde++ unless $hode;
5835
25
100
100
50
$hoeyde[$i-1]++ if $i>1 && $hoeyde[$i-1]==1;
5836
}
5837
210
100
329
$hoeyde[$i]=$hoeyde if $hoeyde>$hoeyde[$i];
5838
}
5839
210
196
$j++;
5840
}
5841
}
5842
else{
5843
0
0
$hoeyde[$i]=1;
5844
0
0
$ikke_hodestrek=1;
5845
}
5846
33
34
$hode=0;
5847
33
45
$i++;
5848
}
5849
5
7
$i=$#hoeyde;
5850
5
10
$j=$#bredde;
5851
5
50
33
26
if($i==0 or $venstretvang) { @venstre=map{1}(0..$j) }
0
0
0
0
5852
5
50
16
else { for(0..$j){ $venstre[$_]=1 if !$ikketom[$_] } }
33
56
5853
5
6
my @tabut;
5854
5
8
my $rad_startlinje=0;
5855
5
11
my @overskrift;
5856
my $overskrift_forrige;
5857
5
12
for my $x (0..$i){
5858
33
50
81
if($$tab[$x] eq '-'){
5859
0
0
0
my @tegn=map {$$tab[$x-1][$_]=~/\S/?"-":" "} (0..$j);
0
0
5860
0
0
$tabut[$rad_startlinje]=join(" ",map {$tegn[$_] x ($bredde[$_]-1)} (0..$j));
0
0
5861
}
5862
else{
5863
33
36
for my $y (0..$j){
5864
210
50
33
337
next if $fjern_tom && !$ikketom[$y];
5865
33
33
168
no warnings;
33
53
33
18322
5866
5867
210
0
33
1204
my @celle=
50
5868
!$overskrift_forrige&&$nodup&&$nodup[$x][$y]
5869
?($nodup>0?():((" " x (($bredde[$y]-length($nodup))/2)).$nodup))
5870
:split("\n",$$tab[$x][$y]);
5871
210
335
for(0..($hoeyde[$x]-1)){
5872
261
225
my $linje=$rad_startlinje+$_;
5873
261
100
430
my $txt=shift @celle || '';
5874
261
100
100
1225
$txt=sprintf("%*s",$bredde[$y]-1,$txt) if length($txt)>0 && !$venstre[$y] && ($x>0 || $ikke_hodestrek);
66
66
5875
261
302
$tabut[$linje].=$txt;
5876
261
100
313
if($y==$j){
5877
42
197
$tabut[$linje]=~s/\s+$//;
5878
}
5879
else{
5880
219
161
my $bredere;
5881
219
50
262
$bredere = $txt=~/
5882
219
230
$txt=~s/<[^>]+>//g;
5883
219
180
$txt=~s/>/>/g;
5884
219
170
$txt=~s/</
5885
219
474
$tabut[$linje].= ' ' x ($bredde[$y]-length($txt)-$bredere);
5886
}
5887
}
5888
}
5889
}
5890
33
40
$rad_startlinje+=$hoeyde[$x];
5891
5892
#--lage streker?
5893
33
50
58
if(not $ikke_hodestrek){
5894
33
100
33
136
if($x==0){
50
33
0
5895
5
12
for my $y (0..$j){
5896
33
50
33
99
next if $fjern_tom && !$ikketom[$y];
5897
33
64
$tabut[$rad_startlinje].=('-' x ($bredde[$y]-1))." ";
5898
}
5899
5
8
$rad_startlinje++;
5900
5
18
@overskrift=("",@tabut);
5901
}
5902
elsif(
5903
$x%$pagesize==0 || $nodup>0&&!$nodup[$x+1][$nodup-1]
5904
and $x+1<@$tab
5905
and !$ikke_hodestrek
5906
)
5907
{
5908
0
0
push(@tabut,@overskrift);
5909
0
0
$rad_startlinje+=@overskrift;
5910
0
0
$overskrift_forrige=1;
5911
}
5912
else{
5913
28
37
$overskrift_forrige=0;
5914
}
5915
}
5916
}#for x
5917
5
85
return join("\n",@tabut)."\n";
5918
}
5919
5920
=head2 serialize
5921
5922
Returns a data structure as a string. See also C
5923
(serialize was created long time ago before Data::Dumper appeared on
5924
CPAN, before CPAN even...)
5925
5926
B One to four arguments.
5927
5928
First argument: A reference to the structure you want.
5929
5930
Second argument: (optional) The name the structure will get in the output string.
5931
If second argument is missing or is undef or '', it will get no name in the output.
5932
5933
Third argument: (optional) The string that is returned is also put
5934
into a created file with the name given in this argument. Putting a
5935
C<< > >> char in from of the filename will append that file
5936
instead. Use C<''> or C to not write to a file if you want to
5937
use a fourth argument.
5938
5939
Fourth argument: (optional) A number signalling the depth on which newlines is used in the output.
5940
The default is infinite (some big number) so no extra newlines are output.
5941
5942
B A string containing the perl-code definition that makes that data structure.
5943
The input reference (first input argument) can be to an array, hash or a string.
5944
Those can contain other refs and strings in a deep data structure.
5945
5946
Limitations:
5947
5948
- Code refs are not handled (just returns C)
5949
5950
- Regex, class refs and circular recursive structures are also not handled.
5951
5952
B
5953
5954
$a = 'test';
5955
@b = (1,2,3);
5956
%c = (1=>2, 2=>3, 3=>5, 4=>7, 5=>11);
5957
%d = (1=>2, 2=>3, 3=>\5, 4=>7, 5=>11, 6=>[13,17,19,{1,2,3,'asdf\'\\\''}],7=>'x');
5958
print serialize(\$a,'a');
5959
print serialize(\@b,'tab');
5960
print serialize(\%c,'c');
5961
print serialize(\%d,'d');
5962
print serialize(\("test'n roll",'brb "brb"'));
5963
print serialize(\%d,'d',undef,1);
5964
5965
Prints accordingly:
5966
5967
$a='test';
5968
@tab=('1','2','3');
5969
%c=('1','2','2','3','3','5','4','7','5','11');
5970
%d=('1'=>'2','2'=>'3','3'=>\'5','4'=>'7','5'=>'11','6'=>['13','17','19',{'1'=>'2','3'=>'asdf\'\\\''}]);
5971
('test\'n roll','brb "brb"');
5972
%d=('1'=>'2',
5973
'2'=>'3',
5974
'3'=>\'5',
5975
'4'=>'7',
5976
'5'=>'11',
5977
'6'=>['13','17','19',{'1'=>'2','3'=>'asdf\'\\\''}],
5978
'7'=>'x');
5979
5980
Areas of use:
5981
5982
- Debugging (first and foremost)
5983
5984
- Storing arrays and hashes and data structures of those on file, database or sending them over the net
5985
5986
- eval earlier stored string to get back the data structure
5987
5988
Be aware of the security implications of Cing a perl code string
5989
stored somewhere that unauthorized users can change them! You are
5990
probably better of using L or L without
5991
enabling the CODE-options if you have such security issues.
5992
More on decompiling Perl-code: L or L.
5993
5994
=head2 dserialize
5995
5996
Debug-serialize, dumping data structures for you to look at.
5997
5998
Same as C but the output is given a newline every 80th character.
5999
(Every 80th or whatever C<$Acme::Tools::Dserialize_width> contains)
6000
6001
=cut
6002
6003
our $Dserialize_width=80;
6004
0
0
0
0
sub _kallstack { my $tilbake=shift||0; my @c; my $ret; $ret.=serialize(\@c,"caller$tilbake") while @c=caller(++$tilbake); $ret }
0
0
0
0
0
0
6005
0
0
1
0
sub dserialize{join "\n",serialize(@_)=~/(.{1,$Dserialize_width})/gs}
6006
sub serialize {
6007
33
33
163
no warnings;
33
42
33
109759
6008
1421
1421
1
3664
my($r,$name,$filename,$level)=@_;
6009
1421
100
2471
my @r=(undef,undef,($level||0)-1);
6010
1421
50
1628
if($filename){
6011
0
0
0
open my $fh, '>', $filename or croak("FEIL: could not open file $filename\n" . _kallstack());
6012
0
0
my $ret=serialize($r,$name,undef,$level);
6013
0
0
print $fh "$ret\n1;\n";
6014
0
0
close($fh);
6015
0
0
return $ret;
6016
}
6017
1421
100
2007
if(ref($r) eq 'SCALAR'){
100
100
100
50
0
0
0
0
6018
1151
50
1346
return "\$$name=".serialize($r,@r).";\n" if $name;
6019
1151
100
1367
return "undef" unless defined $$r;
6020
1143
862
my $ret=$$r;
6021
1143
1063
$ret=~s/\\/\\\\/g;
6022
1143
785
$ret=~s/\'/\\'/g;
6023
1143
2296
return "'$ret'";
6024
}
6025
elsif(ref($r) eq 'ARRAY'){
6026
76
100
125
return "\@$name=".serialize($r,@r).";\n" if $name;
6027
74
69
my $ret="(";
6028
74
97
for(@$r){
6029
392
535
$ret.=serialize(\$_,@r).",";
6030
392
100
690
$ret.="\n" if $level>=0;
6031
}
6032
74
199
$ret=~s/,$//;
6033
74
90
$ret.=")";
6034
74
50
116
$ret.=";\n" if $name;
6035
74
207
return $ret;
6036
}
6037
elsif(ref($r) eq 'HASH'){
6038
31
100
172
return "\%$name=".serialize($r,@r).";\n" if $name;
6039
21
23
my $ret="(";
6040
21
84
for(sort keys %$r){
6041
73
119
$ret.=serialize(\$_,@r)."=>".serialize(\$$r{$_},@r).",";
6042
73
50
129
$ret.="\n" if $level>=0;
6043
}
6044
21
75
$ret=~s/,$//;
6045
21
32
$ret.=")";
6046
21
50
34
$ret.=";\n" if $name;
6047
21
76
return $ret;
6048
}
6049
elsif(ref($$r) eq 'ARRAY'){
6050
70
50
90
return "\@$name=".serialize($r,@r).";\n" if $name;
6051
70
54
my $ret="[";
6052
70
72
for(@$$r){
6053
188
213
$ret.=serialize(\$_,@r).",";
6054
188
50
33
613
$ret.="\n" if !defined $level or $level>=0;
6055
}
6056
70
213
$ret=~s/,$//;
6057
70
58
$ret.="]";
6058
70
50
90
$ret.=";\n" if $name;
6059
70
129
return $ret;
6060
}
6061
elsif(ref($$r) eq 'HASH'){
6062
93
50
133
return "\%$name=".serialize($r,@r).";\n" if $name;
6063
93
76
my $ret="{";
6064
93
288
for(sort keys %$$r){
6065
294
367
$ret.=serialize(\$_,@r)."=>".serialize(\$$$r{$_},@r).",";
6066
294
50
519
$ret.="\n" if $level>=0;
6067
}
6068
93
223
$ret=~s/,$//;
6069
93
76
$ret.="}";
6070
93
50
117
$ret.=";\n" if $name;
6071
93
193
return $ret;
6072
}
6073
elsif(ref($$r) eq 'SCALAR'){
6074
0
0
return "\\".serialize($$r,@r);
6075
}
6076
elsif(ref($r) eq 'LVALUE'){
6077
0
0
my $s="$$r";
6078
0
0
return serialize(\$s,@r);
6079
}
6080
elsif(ref($$r) eq 'CODE'){
6081
#warn "Tried to serialize CODE";
6082
0
0
return 'sub{croak "Can not serialize CODE-references, see perhaps B::Deparse and Storable"}'
6083
}
6084
elsif(ref($$r) eq 'GLOB'){
6085
0
0
warn "Tried to serialize a GLOB";
6086
0
0
return '\*STDERR'
6087
}
6088
else{
6089
0
0
my $tilbake;
6090
0
0
my($pakke,$fil,$linje,$sub,$hasargs,$wantarray);
6091
0
0
0
($pakke,$fil,$linje,$sub,$hasargs,$wantarray)=caller($tilbake++) until $sub ne 'serialize' || $tilbake>20;
6092
0
0
croak("serialize() argument should be reference!\n".
6093
"\$r=$r\n".
6094
"ref(\$r) = ".ref($r)."\n".
6095
"ref(\$\$r) = ".ref($$r)."\n".
6096
"kallstack:\n". _kallstack());
6097
}
6098
}
6099
6100
=head2 srlz
6101
6102
Synonym to L, but remove unnecessary single quote chars around
6103
C<< \w+ >>-keys and number values (except numbers with leading zeros). Example:
6104
6105
serialize:
6106
6107
%s=('action'=>{'del'=>'0','ins'=>'0','upd'=>'18'},'post'=>'1348','pre'=>'1348',
6108
'updcol'=>{'Laerestednr'=>'18','Studietypenr'=>'18','Undervisningssted'=>'7','Url'=>'11'},
6109
'where'=>'where 1=1');
6110
6111
srlz:
6112
6113
%s=(action=>{del=>0,ins=>0,upd=>18},post=>1348,pre=>1348,
6114
updcol=>{Laerestednr=>18,Studietypenr=>18,Undervisningssted=>7,Url=>11},
6115
where=>'where 1=1');
6116
6117
Todo: update L to do the same, but in the right way. (For now
6118
srlz runs the string from serialize() through two C<< s/// >>, this will break
6119
in certain cases). L will be kept as a synonym (or the other way around).
6120
6121
=cut
6122
6123
sub srlz {
6124
0
0
1
0
my $s=serialize(@_);
6125
0
0
$s=~s,'(\w+)'=>,$1=>,g;
6126
0
0
$s=~s,=>'((0|[1-9]\d*)(\.\d+)?(e[-+]?\d+)?)',=>$1,gi; #ikke ledende null! hm
6127
0
0
$s;
6128
}
6129
6130
=head2 cnttbl
6131
6132
my %nordic_country_population=(Norway=>5214890,Sweden=>9845155,Denmark=>5699220,Finland=>5496907,Iceland=>331310);
6133
print cnttbl(\%nordic_country_population);
6134
Iceland 331310 1.25%
6135
Norway 5214890 19.61%
6136
Finland 5496907 20.67%
6137
Denmark 5699220 21.44%
6138
Sweden 9845155 37.03%
6139
SUM 26587482 100.00%
6140
6141
Todo: Levels...:
6142
6143
my %sales=(
6144
Toyota=>{Prius=>19,RAV=>12,Auris=>18,Avensis=>7},
6145
Volvo=>{V40=>14, XC90=>4},
6146
Nissan=>{Leaf=>19,Qashqai=>17},
6147
Tesla=>{ModelS=>8}
6148
);
6149
print cnttbl(\%sales);
6150
Toyota SUM 56
6151
Volvo SUM 18
6152
Nissan SUM 36
6153
Tesla SUM 8
6154
SUM SUM 56 100%
6155
6156
=cut
6157
6158
sub cnttbl {
6159
6
6
1
365
my $hr=shift;
6160
6
33
my $maxlen=max(3,map length($_),keys%$hr);
6161
join"",ref((values%$hr)[0])
6162
4
4
?do{ map {my$o=$_;join("",map rpad($$o[0],$maxlen)." $_\n",split("\n",$$o[1]))}
4
13
6163
1
8
map [$_,cnttbl($$hr{$_})],
6164
sort keys%$hr }
6165
6
100
14
:do{ my $sum=sum(values%$hr);
5
9
6166
5
12
my $fmt=repl("%-xs %yd %6.2f%%\n",x=>$maxlen,y=>length($sum));
6167
map sprintf($fmt,@$_,100*$$_[1]/$sum),
6168
5
50
27
(map[$_,$$hr{$_}],sort{$$hr{$a}<=>$$hr{$b} or $a cmp $b}keys%$hr),
15
156
6169
(['SUM',$sum]) }
6170
}
6171
6172
=head2 nicenum
6173
6174
print 14.3 - 14.0; # 0.300000000000001
6175
print 34.3 - 34.0; # 0.299999999999997
6176
print nicenum( 14.3 - 14.0 ); # 0.3
6177
print nicenum( 34.3 - 34.0 ); # 0.3
6178
6179
=cut
6180
6181
2
2
1
3
sub nicenum { my $n=shift; $n=~s,((\d)\2\2\2\2\2\2\2)\d$,$1$2$2$2$2$2$2$2$2$2$2$2,; 0+$n }
2
41
2
13
6182
6183
6184
=head2 sys
6185
6186
Call instead of C if you want C (Carp::croak) when something fails.
6187
6188
sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }
6189
6190
6191
=cut
6192
6193
11
50
11
1
21
sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }
11
169247
11
369
6194
6195
=head2 recursed
6196
6197
Returns true or false (actually 1 or 0) depending on whether the
6198
current sub has been called by itself or not.
6199
6200
sub xyz
6201
{
6202
xyz() if not recursed;
6203
6204
}
6205
6206
=cut
6207
6208
0
0
0
1
0
sub recursed {(caller(1))[3] eq (caller(2))[3]?1:0}
6209
6210
6211
6212
=head2 ed
6213
6214
String editor commands
6215
6216
literals: a-z 0-9 space
6217
move cursor: FBAEPN MF MB ME
6218
delete: D Md
6219
up/low/camelcase word U L C
6220
backspace: -
6221
search: S
6222
return/enter: R
6223
meta/esc/alt: M
6224
shift: T
6225
cut to eol: K
6226
caps lock: C
6227
yank: Y
6228
start and end: < >
6229
macro start/end/play: { } !
6230
times for next cmd: M (i.e. M24a inserts 24 a's)
6231
6232
(TODO: alfa...and more docs needed)
6233
6234
=cut
6235
6236
our $Edcursor;
6237
sub ed {
6238
18
18
1
3029
my($s,$cs,$p,$buf)=@_; #string, commands, point (or cursor)
6239
18
100
45
return $$s=ed($$s,$cs,$p,$buf) if ref($s);
6240
17
20
my($sh,$cl,$m,$t,@m)=(0,0,0,undef);
6241
17
29
while(length($cs)){
6242
121
75
my $n = 0;
6243
121
50
304
my $c = $cs=~s,^(M\d+|M.|""|".+?"|S.+?R|\\.|.),,s ? $1 : die;
6244
121
100
228
$p = curb($p||0,0,length($s));
6245
121
100
142
if(defined$t){$cs="".($c x $t).$cs;$t=undef;next}
1
4
1
1
1
2
6246
120
81
226
my $add=sub{substr($s,$p,0)=$_[0];$p+=length($_[0])};
81
86
81
61
6247
120
50
471
if ($c =~ /^([a-z0-9 ])/){ &$add($sh^$cl?uc($1):$1); $sh=0 }
65
100
87
65
100
44
100
100
100
100
100
50
100
100
100
100
100
50
100
100
100
50
50
50
50
50
50
50
100
100
100
50
6248
1
2
elsif($c =~ /^"(.+)"$/) { &$add($1) }
6249
1
2
elsif($c =~ /^\\(.)/) { &$add($1) }
6250
2
50
4
elsif($c =~ /^S(.+)R/) { my $i=index($s,$1,$p);$p=$i+length($1) if $i>=0 }
2
6
6251
1
2
elsif($c =~ /^M(\d+)/) { $t=$1; next }
1
3
6252
2
3
elsif($c eq 'F') { $p++ }
6253
1
1
elsif($c eq 'B') { $p-- }
6254
0
0
0
elsif($c eq 'A') { $p-- while $p>0 and substr($s,$p-1,2)!~/^\n/ }
6255
8
50
28
elsif($c eq 'E') { substr($s,$p)=~/(.*)/ and $p+=length($1) }
6256
4
4
elsif($c eq 'D') { substr($s,$p,1)='' }
6257
3
50
16
elsif($c eq 'MD'){ substr($s,$p)=~s/^(\W*\w+)// and $buf=$1 }
6258
3
50
10
elsif($c eq 'MF'){ substr($s,$p)=~/(\W*\w+)/ and $p+=length($1) }
6259
3
50
16
elsif($c eq 'MB'){ substr($s,0,$p)=~/(\w+\W*)$/ and $p-=length($1) }
6260
0
0
0
elsif($c eq '-') { substr($s,--$p,1)='' if $p }
6261
5
50
30
elsif($c eq 'M-'){ substr($s,0,$p)=~s/(\w+\W*)$// and $p-=length($buf=$1)}
6262
3
50
19
elsif($c eq 'K') { substr($s,$p)=~s/(\S.+|\s*?\n)// and $buf=$1 }
6263
12
14
elsif($c eq 'Y') { &$add($buf) }
6264
0
0
elsif($c eq 'U') { substr($s,$p)=~s/(\W*)(\w+)/$1\U$2\E/; $p+=length($1.$2) }
0
0
6265
0
0
elsif($c eq 'L') { substr($s,$p)=~s/(\W*)(\w+)/$1\L$2\E/; $p+=length($1.$2) }
0
0
6266
0
0
elsif($c eq 'C') { substr($s,$p)=~s/(\W*)(\w+)/$1\u\L$2\E/; $p+=length($1.$2) }
0
0
6267
0
0
elsif($c eq '<') { $p=0 }
6268
0
0
elsif($c eq '>') { $p=length($s) }
6269
0
0
elsif($c eq 'T') { $sh=1 }
6270
0
0
elsif($c eq 'C') { $cl^=1 }
6271
1
2
elsif($c eq '{') { $m=1; @m=() }
1
1
6272
1
2
elsif($c eq '}') { $m=0 }
6273
2
50
33
12
elsif($c eq '!') { $m||!@m and die"ed: no macro"; $cs=join("",@m).$cs }
2
3
6274
2
3
elsif($c eq '""'){ &$add('"') }
6275
0
0
else { croak "ed: Unknown cmd '$c'\n" }
6276
119
100
100
347
push @m, $c if $m and $c ne '{';
6277
#warn serialize([$c,$m,$cs],'d');
6278
}
6279
17
15
$Edcursor=$p;
6280
17
36
$s;
6281
}
6282
6283
#todo: sub unbless eller sub damn
6284
#todo: ..se også: use Data::Structure::Util qw/unbless/;
6285
#todo: ...og: Acme::Damn sin damn()
6286
#todo? sub swap($$) http://www.idg.no/computerworld/article242008.ece
6287
#todo? catal
6288
#todo?
6289
#void quicksort(int t, int u) int i, m; if (t >= u) return; swap(t, randint(t, u)); m = t; for (i = t + 1; i <= u; i++) if (x[i] < x[t]) swap(++m, i); swap(t, m) quicksort(t, m-1); quicksort(m+1, u);
6290
6291
6292
=head1 JUST FOR FUN
6293
6294
=head2 brainfu
6295
6296
B one or two arguments
6297
6298
First argument: a string, source code of the brainfu
6299
language. String containing the eight charachters + - < > [ ] . ,
6300
Every other char is ignored silently.
6301
6302
Second argument: if the source code contains commas (,) the second
6303
argument is the input characters in a string.
6304
6305
B The resulting output from the program.
6306
6307
Example:
6308
6309
print brainfu(<<""); #prints "Hallo Verden!\n"
6310
++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>---.+++++++++++..+++.>++.<<++++++++++++++
6311
.>----------.+++++++++++++.--------------.+.+++++++++.>+.>.
6312
6313
See L
6314
6315
=head2 brainfu2perl
6316
6317
Just as L but instead it return the perl code to which the
6318
brainfu code is translated. Just C<< eval() >> this perl code to run.
6319
6320
Example:
6321
6322
print brainfu2perl('>++++++++[<++++++++>-]<++++++++.>++++++[<++++++>-]<---.');
6323
6324
Prints this string:
6325
6326
my($c,$o,@b)=(0); sub out{$o.=chr($b[$c]) for 1..$_[0]||1}
6327
++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6328
while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6329
++$b[$c];++$c;--$b[$c];}--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6330
++$b[$c];++$b[$c];out;++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6331
while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$c;--$b[$c];}
6332
--$c;--$b[$c];--$b[$c];--$b[$c];out;$o;
6333
6334
=head2 brainfu2perl_optimized
6335
6336
Just as L but optimizes the perl code. The same
6337
example as above with brainfu2perl_optimized returns this equivalent
6338
but shorter perl code:
6339
6340
$b[++$c]+=8;while($b[$c]){$b[--$c]+=8;--$b[++$c]}$b[--$c]+=8;out;$b[++$c]+=6;
6341
while($b[$c]){$b[--$c]+=6;--$b[++$c]}$b[--$c]-=3;out;$o;
6342
6343
=cut
6344
6345
3
3
1
489
sub brainfu { eval(brainfu2perl(@_)) }
6346
6347
sub brainfu2perl {
6348
6
6
1
17
my($bf,$inp)=@_;
6349
17
48
my $perl='my($c,$inp,$o,@b)=(0,\''.$inp.'\'); no warnings; sub out{$o.=chr($b[$c]) for 1..$_[0]||1}'."\n";
6350
3
100
66
17
$perl.='sub inp{$inp=~s/(.)//s and $b[$c]=ord($1)}'."\n" if $inp and $bf=~/,/;
6351
3
100
516
$perl.=join("",map/\+/?'++$b[$c];':/\-/?'--$b[$c];':/\[/?'while($b[$c]){':/\]/?'}':/>/?'++$c;':/?'--$c;':/\./?'out;':/\,/?'inp;':'',split//,$bf).'$o;';
100
100
100
100
100
100
100
6352
3
245
$perl;
6353
}
6354
6355
sub brainfu2perl_optimized {
6356
0
14
1
0
my $perl=brainfu2perl(@_);
6357
0
0
$perl=~s{(((\+|\-)\3\$b\[\$c\];){2,})}{ '$b[$c]'.$3.'='.(grep/b/,split//,$1).';' }gisex;
0
0
6358
0
0
1 while $perl=~s/\+\+\$c;\-\-\$c;//g + $perl=~s/\-\-\$c;\+\+\$c;//g;
6359
0
0
$perl=~s{((([\-\+])\3\$c;){2,})}{"\$c$3=".(grep/c/,split//,$1).';'}gisex;
0
0
6360
0
0
0
$perl=~s{((\+\+|\-\-)\$c;([^;{}]+;))}{my($o,$s)=($2,$3);$s=~s/\$c/$o\$c/?$s:$1}ge;
0
0
0
0
6361
0
0
$perl=~s/\$c(\-|\+)=(\d+);(\+\+|\-\-)\$b\[\$c\]/$3.'$b[$c'.$1.'='.$2.'];'/ge;
0
0
6362
0
0
$perl=~s{((out;){2,})}{'out('.(grep/o/,split//,$1).');'}ge;
0
0
6363
0
0
$perl=~s/;}/}/g;$perl=~s/;+/;/g;
0
0
6364
0
0
$perl;
6365
}
6366
6367
6368
=head1 BLOOM FILTER SUBROUTINES
6369
6370
Bloom filters can be used to check whether an element (a string) is a
6371
member of a large set using much less memory or disk space than other
6372
data structures. Trading speed and accuracy for memory usage. While
6373
risking false positives, Bloom filters have a very strong space
6374
advantage over other data structures for representing sets.
6375
6376
In the example below, a set of 100000 phone numbers (or any string of
6377
any length) can be "stored" in just 91230 bytes if you accept that you
6378
can only check the data structure for existence of a string and accept
6379
false positives with an error rate of 0.03 (that is three percent, error
6380
rates are given in numbers larger than 0 and smaller than 1).
6381
6382
You can not retrieve the strings in the set without using "brute
6383
force" methods and even then you would get slightly more strings than
6384
you put in because of the error rate inaccuracy.
6385
6386
Bloom Filters have many uses.
6387
6388
See also: L
6389
6390
See also: L
6391
6392
=head2 bfinit
6393
6394
Initialize a new Bloom Filter:
6395
6396
my $bf = bfinit( error_rate=>0.01, capacity=>100000 );
6397
6398
The same:
6399
6400
my $bf = bfinit( 0.01, 100000 );
6401
6402
since two arguments is interpreted as error_rate and capacity accordingly.
6403
6404
6405
=head2 bfadd
6406
6407
bfadd($bf, $_) for @phone_numbers; # Adding strings one at a time
6408
6409
bfadd($bf, @phone_numbers); # ...or all at once (faster)
6410
6411
Returns 1 on success. Dies (croaks) if more strings than capacity is added.
6412
6413
=head2 bfcheck
6414
6415
my $phone_number="97713246";
6416
if ( bfcheck($bf, $phone_number) ) {
6417
print "Yes, $phone_number was PROBABLY added\n";
6418
}
6419
else{
6420
print "No, $phone_number was DEFINITELY NOT added\n";
6421
}
6422
6423
Returns true if C<$phone_number> exists in C<@phone_numbers>.
6424
6425
Returns false most of the times, but sometimes true*), if C<$phone_number> doesn't exists in C<@phone_numbers>.
6426
6427
*) This is called a false positive.
6428
6429
Checking more than one key:
6430
6431
@bools = bfcheck($bf, @keys); # or ...
6432
@bools = bfcheck($bf, \@keys); # better, uses less memory if @keys is large
6433
6434
Returns an array the same size as @keys where each element is true or false accordingly.
6435
6436
=head2 bfgrep
6437
6438
Same as C except it returns the keys that exists in the bloom filter
6439
6440
@found = bfgrep($bf, @keys); # or ...
6441
@found = bfgrep($bf, \@keys); # better, uses less memory if @keys is large, or ...
6442
@found = grep bfcheck($bf,$_), @keys; # same but slower
6443
6444
=head2 bfgrepnot
6445
6446
Same as C except it returns the keys that do NOT exists in the bloom filter:
6447
6448
@not_found = bfgrepnot($bf, @keys); # or ...
6449
@not_found = bfgrepnot($bf, \@keys); # better, uses less memory if @keys is large, or ...
6450
@not_found = grep !bfcheck($bf,$_), @keys); # same but slower
6451
6452
=head2 bfdelete
6453
6454
Deletes from a counting bloom filter.
6455
6456
To enable deleting be sure to initialize the bloom filter with the
6457
numeric C argument. The number of bits could be 2 or 3*)
6458
for small filters with a small capacity (a small number of keys), but
6459
setting the number to 4 ensures that even very large filters with very
6460
small error rates would not overflow.
6461
6462
*) Acme::Tools do not currently support C<< counting_bits => 3 >> so 4
6463
and 8 are the only practical alternatives where 8 is almost always overkill.
6464
6465
my $bf=bfinit(
6466
error_rate => 0.001,
6467
capacity => 10000000,
6468
counting_bits => 4 # power of 2, that is 2, 4, 8, 16 or 32
6469
);
6470
bfadd( $bf, @unique_phone_numbers);
6471
bfdelete($bf, @unique_phone_numbers);
6472
6473
Example: examine the frequency of the counters with 4 bit counters and 4 million keys:
6474
6475
my $bf=bfinit( error_rate=>0.001, capacity=>4e6, counting_bits=>4 );
6476
bfadd($bf,[1e3*$_+1 .. 1e3*($_+1)]) for 0..4000-1; # adding 4 million keys one thousand at a time
6477
my %c; $c{vec($$bf{filter},$_,$$bf{counting_bits})}++ for 0..$$bf{filterlength}-1;
6478
printf "%8d counters = %d\n",$c{$_},$_ for sort{$a<=>$b}keys%c;
6479
6480
The output:
6481
6482
28689562 counters = 0
6483
19947673 counters = 1
6484
6941082 counters = 2
6485
1608250 counters = 3
6486
280107 counters = 4
6487
38859 counters = 5
6488
4533 counters = 6
6489
445 counters = 7
6490
46 counters = 8
6491
1 counters = 9
6492
6493
Even after the error_rate is changed from 0.001 to a percent of that, 0.00001, the limit of 16 (4 bits) is still far away:
6494
6495
47162242 counters = 0
6496
33457237 counters = 1
6497
11865217 counters = 2
6498
2804447 counters = 3
6499
497308 counters = 4
6500
70608 counters = 5
6501
8359 counters = 6
6502
858 counters = 7
6503
65 counters = 8
6504
4 counters = 9
6505
6506
In algorithmic terms the number of bits needed is C. Thats why 4 bits (counters up
6507
to 15) is "always" good enough except for extremely large capasities or extremely small error rates.
6508
(Except when adding the same key many times, which should be avoided, and Acme::Tools::bfadd do not
6509
check for that, perhaps in future versions).
6510
6511
Bloom filters of the counting type are not very space efficient: The tables above shows that 84%-85%
6512
of the counters are 0 or 1. This means most bits are zero-bits. This doesn't have to be a problem if
6513
a counting bloom filter is used to be sent over slow networks because they are very compressable by
6514
common compression tools like I or L and such.
6515
6516
Deletion of non-existing keys makes C die (croak).
6517
6518
=head2 bfdelete
6519
6520
Deletes from a counting bloom filter:
6521
6522
bfdelete($bf, @keys);
6523
bfdelete($bf, \@keys);
6524
6525
Returns C<$bf> after deletion.
6526
6527
Croaks (dies) on deleting a non-existing key or deleting from an previouly overflown counter in a counting bloom filter.
6528
6529
=head2 bfaddbf
6530
6531
Adds another bloom filter to a bloom filter.
6532
6533
Bloom filters has the proberty that bit-wise I-ing the bit-filters
6534
of two filters with the same capacity and the same number and type of
6535
hash functions, adds the filters:
6536
6537
my $bf1=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[1..500]);
6538
my $bf2=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[501..1000]);
6539
6540
bfaddbf($bf1,$bf2);
6541
6542
print "Yes!" if bfgrep($bf1, 1..1000) == 1000;
6543
6544
Prints yes since C now returns an array of all the 1000 elements.
6545
6546
Croaks if the filters are of different dimensions.
6547
6548
Works for counting bloom filters as well (C<< counting_bits=>4 >> e.g.)
6549
6550
=head2 bfsum
6551
6552
Returns the number of 1's in the filter.
6553
6554
my $percent=100*bfsum($bf)/$$bf{filterlength};
6555
printf "The filter is %.1f%% filled\n",$percent; #prints 50.0% or so if filled to capacity
6556
6557
Sums the counters for counting bloom filters (much slower than for non counting).
6558
6559
=head2 bfdimensions
6560
6561
Input, two numeric arguments: Capacity and error_rate.
6562
6563
Outputs an array of two numbers: m and k.
6564
6565
m = - n * log(p) / log(2)**2 # n = capacity, m = bits in filter (divide by 8 to get bytes)
6566
k = log(1/p) / log(2) # p = error_rate, uses perls internal log() with base e (2.718)
6567
6568
...that is: m = the best number of bits in the filter and k = the best
6569
number of hash functions optimized for the given capacity (n) and
6570
error_rate (p). Note that k is a dependent only of the error_rate. At
6571
about two percent error rate the bloom filter needs just the same
6572
number of bytes as the number of keys.
6573
6574
Storage (bytes):
6575
Capacity Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate
6576
0.000000001 0.00000001 0.0000001 0.000001 0.00001 0.0001 0.001 0.01 0.02141585 0.1 0.5 0.99
6577
------------- ----------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
6578
10 54.48 48.49 42.5 36.51 30.52 24.53 18.53 12.54 10.56 6.553 2.366 0.5886
6579
100 539.7 479.8 419.9 360 300.1 240.2 180.3 120.4 100.6 60.47 18.6 0.824
6580
1000 5392 4793 4194 3595 2996 2397 1798 1199 1001 599.6 180.9 3.177
6581
10000 5.392e+04 4.793e+04 4.194e+04 3.594e+04 2.995e+04 2.396e+04 1.797e+04 1.198e+04 1e+04 5991 1804 26.71
6582
100000 5.392e+05 4.793e+05 4.193e+05 3.594e+05 2.995e+05 2.396e+05 1.797e+05 1.198e+05 1e+05 5.991e+04 1.803e+04 262
6583
1000000 5.392e+06 4.793e+06 4.193e+06 3.594e+06 2.995e+06 2.396e+06 1.797e+06 1.198e+06 1e+06 5.991e+05 1.803e+05 2615
6584
10000000 5.392e+07 4.793e+07 4.193e+07 3.594e+07 2.995e+07 2.396e+07 1.797e+07 1.198e+07 1e+07 5.991e+06 1.803e+06 2.615e+04
6585
100000000 5.392e+08 4.793e+08 4.193e+08 3.594e+08 2.995e+08 2.396e+08 1.797e+08 1.198e+08 1e+08 5.991e+07 1.803e+07 2.615e+05
6586
1000000000 5.392e+09 4.793e+09 4.193e+09 3.594e+09 2.995e+09 2.396e+09 1.797e+09 1.198e+09 1e+09 5.991e+08 1.803e+08 2.615e+06
6587
10000000000 5.392e+10 4.793e+10 4.193e+10 3.594e+10 2.995e+10 2.396e+10 1.797e+10 1.198e+10 1e+10 5.991e+09 1.803e+09 2.615e+07
6588
100000000000 5.392e+11 4.793e+11 4.193e+11 3.594e+11 2.995e+11 2.396e+11 1.797e+11 1.198e+11 1e+11 5.991e+10 1.803e+10 2.615e+08
6589
1000000000000 5.392e+12 4.793e+12 4.193e+12 3.594e+12 2.995e+12 2.396e+12 1.797e+12 1.198e+12 1e+12 5.991e+11 1.803e+11 2.615e+09
6590
6591
Error rate: 0.99 Hash functions: 1
6592
Error rate: 0.5 Hash functions: 1
6593
Error rate: 0.1 Hash functions: 3
6594
Error rate: 0.0214158522653385 Hash functions: 6
6595
Error rate: 0.01 Hash functions: 7
6596
Error rate: 0.001 Hash functions: 10
6597
Error rate: 0.0001 Hash functions: 13
6598
Error rate: 0.00001 Hash functions: 17
6599
Error rate: 0.000001 Hash functions: 20
6600
Error rate: 0.0000001 Hash functions: 23
6601
Error rate: 0.00000001 Hash functions: 27
6602
Error rate: 0.000000001 Hash functions: 30
6603
6604
=head2 bfstore
6605
6606
Storing and retrieving bloom filters to and from disk uses Ls C and C. This:
6607
6608
bfstore($bf,'filename.bf');
6609
6610
It the same as:
6611
6612
use Storable qw(store retrieve);
6613
...
6614
store($bf,'filename.bf');
6615
6616
=head2 bfretrieve
6617
6618
This:
6619
6620
my $bf=bfretrieve('filename.bf');
6621
6622
Or this:
6623
6624
my $bf=bfinit('filename.bf');
6625
6626
Is the same as:
6627
6628
use Storable qw(store retrieve);
6629
my $bf=retrieve('filename.bf');
6630
6631
=head2 bfclone
6632
6633
Deep copies the bloom filter data structure. (Which btw is not very deep, two levels at most)
6634
6635
This:
6636
6637
my $bfc = bfclone($bf);
6638
6639
Works just as:
6640
6641
use Storable;
6642
my $bfc=Storable::dclone($bf);
6643
6644
=head2 Object oriented interface to bloom filters
6645
6646
use Acme::Tools;
6647
my $bf=new Acme::Tools::BloomFilter(0.1,1000); # the same as bfinit, see bfinit above
6648
print ref($bf),"\n"; # prints Acme::Tools::BloomFilter
6649
$bf->add(@keys);
6650
$bf->check($keys[0]) and print "ok\n"; # prints ok
6651
$bf->grep(\@keys)==@keys and print "ok\n"; # prints ok
6652
$bf->store('filename.bf');
6653
my $bf2=bfretrieve('filename.bf');
6654
$bf2->check($keys[0]) and print "ok\n"; # still ok
6655
6656
$bf2=$bf->clone();
6657
6658
To instantiate a previously stored bloom filter:
6659
6660
my $bf = Acme::Tools::BloomFilter->new( '/path/to/stored/bloomfilter.bf' );
6661
6662
The o.o. interface has the same methods as the C-subs without the
6663
C-prefix in the names. The C is not available as a
6664
method, although C, C and
6665
C are synonyms.
6666
6667
=head2 Internals and speed
6668
6669
The internal hash-functions are C<< md5( "$key$salt" ) >> from L.
6670
6671
Since C returns 128 bits and most medium to large sized bloom
6672
filters need only a 32 bit hash function, the result from md5() are
6673
split (C-ed) into 4 parts 32 bits each and are treated as if 4
6674
hash functions was called at once (speedup). Using different salts to
6675
the key on each md5 results in different hash functions.
6676
6677
Digest::SHA512 would have been even better since it returns more bits,
6678
if it werent for the fact that it's much slower than Digest::MD5.
6679
6680
String::CRC32::crc32 is faster than Digest::MD5, but not 4 times faster:
6681
6682
time perl -e'use Digest::MD5 qw(md5);md5("asdf$_") for 1..10e6' #5.56 sec
6683
time perl -e'use String::CRC32;crc32("asdf$_") for 1..10e6' #2.79 sec, faster but not per bit
6684
time perl -e'use Digest::SHA qw(sha512);sha512("asdf$_") for 1..10e6' #36.10 sec, too slow (sha1, sha224, sha256 and sha384 too)
6685
6686
Md5 seems to be an ok choice both for speed and avoiding collitions due to skewed data keys.
6687
6688
=head2 Theory and math behind bloom filters
6689
6690
L
6691
6692
L
6693
6694
L
6695
6696
See also Scaleable Bloom Filters: L (not implemented in Acme::Tools)
6697
6698
...and perhaps L
6699
6700
=cut
6701
6702
sub bfinit {
6703
24
100
24
1
8141
return bfretrieve(@_) if @_==1;
6704
23
50
66
90
return bfinit(error_rate=>$_[0], capacity=>$_[1]) if @_==2 and 0<$_[0] and $_[0]<1 and $_[1]>1;
66
33
6705
21
0
33
47
return bfinit(error_rate=>$_[1], capacity=>$_[0]) if @_==2 and 0<$_[1] and $_[1]<1 and $_[0]>1;
33
0
6706
21
113
require Digest::MD5;
6707
21
50
41
@_%2&&croak "Arguments should be a hash of equal number of keys and values";
6708
21
68
my %arg=@_;
6709
21
57
my @ok_param=qw/error_rate capacity min_hashfuncs max_hashfuncs hashfuncs counting_bits adaptive keys/;
6710
21
79
my @not_ok=sort(grep!in($_,@ok_param),keys%arg);
6711
21
100
212
croak "Not ok param to bfinit: ".join(", ",@not_ok) if @not_ok;
6712
20
50
66
71
croak "Not an arrayref in keys-param" if exists $arg{keys} and ref($arg{keys}) ne 'ARRAY';
6713
20
50
56
croak "Not implemented counting_bits=$arg{counting_bits}, should be 2, 4, 8, 16 or 32" if !in(nvl($arg{counting_bits},1),1,2,4,8,16,32);
6714
20
50
33
52
croak "An bloom filters here can not be in both adaptive and counting_bits modes" if $arg{adaptive} and $arg{counting_bits}>1;
6715
20
129
my $bf={error_rate => 0.001, #default p
6716
capacity => 100000, #default n
6717
min_hashfuncs => 1,
6718
max_hashfuncs => 100,
6719
counting_bits => 1, #default: not counting filter
6720
adaptive => 0,
6721
%arg, #arguments
6722
key_count => 0,
6723
overflow => {},
6724
version => $Acme::Tools::VERSION,
6725
};
6726
20
100
100
269
croak "Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1" if $$bf{error_rate}<=0 or $$bf{error_rate}>=1;
6727
18
50
32
@$bf{'min_hashfuncs','max_hashfuncs'}=(map$arg{hashfuncs},1..2) if $arg{hashfuncs};
6728
18
35
@$bf{'filterlength','hashfuncs'}=bfdimensions($bf); #m and k
6729
18
776
$$bf{filter}=pack("b*", '0' x ($$bf{filterlength}*$$bf{counting_bits}) ); #hm x new empty filter
6730
$$bf{unpack}= $$bf{filterlength}<=2**16/4 ? "n*" # /4 alleviates skewing if m just slightly < 2**x
6731
18
50
39
:$$bf{filterlength}<=2**32/4 ? "N*"
100
6732
: "Q*";
6733
18
100
36
bfadd($bf,@{$arg{keys}}) if $arg{keys};
8
42
6734
16
76
return $bf;
6735
}
6736
sub bfaddbf {
6737
2
2
1
3
my($bf,$bf2)=@_;
6738
my $differror=join"\n",
6739
map "Property $_ differs ($$bf{$_} vs $$bf2{$_})",
6740
2
37
grep $$bf{$_} ne $$bf2{$_},
6741
qw/capacity counting_bits adaptive hashfuncs filterlength/; #not error_rate
6742
2
50
6
croak $differror if $differror;
6743
2
50
7
croak "Can not add adaptive bloom filters" if $$bf{adaptive};
6744
2
3
my $count=$$bf{key_count}+$$bf2{key_count};
6745
croak "Exceeded filter capacity $$bf{key_count} + $$bf2{key_count} = $count > $$bf{capacity}"
6746
2
50
4
if $count > $$bf{capacity};
6747
2
4
$$bf{key_count}+=$$bf2{key_count};
6748
2
100
6
if($$bf{counting_bits}==1){
6749
1
4
$$bf{filter} |= $$bf2{filter};
6750
#$$bf{filter} = $$bf{filter} | $$bf2{filter}; #or-ing
6751
}
6752
else {
6753
1
2
my $cb=$$bf{counting_bits};
6754
1
4
for(0..$$bf{filterlength}-1){
6755
my $sum=
6756
vec($$bf{filter}, $_,$cb)+
6757
4793
3595
vec($$bf2{filter},$_,$cb);
6758
4793
50
5262
if( $sum>2**$cb-1 ){
6759
0
0
$sum=2**$cb-1;
6760
0
0
$$bf{overflow}{$_}++;
6761
}
6762
4793
4417
vec($$bf{filter}, $_,$cb)=$sum;
6763
33
33
188
no warnings;
33
41
33
73187
6764
$$bf{overflow}{$_}+=$$bf2{overflow}{$_}
6765
0
0
and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
6766
and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb)
6767
4793
50
0
6085
if exists $$bf2{overflow}{$_};
0
6768
}
6769
}
6770
2
7
return $bf; #for convenience
6771
}
6772
sub bfsum {
6773
8
8
1
48
my($bf)=@_;
6774
8
100
59
return unpack( "%32b*", $$bf{filter}) if $$bf{counting_bits}==1;
6775
3
10
my($sum,$cb)=(0,$$bf{counting_bits});
6776
3
2586
$sum+=vec($$bf{filter},$_,$cb) for 0..$$bf{filterlength}-1;
6777
3
24
return $sum;
6778
}
6779
sub bfadd {
6780
11
11
1
1405
require Digest::MD5;
6781
11
521
my($bf,@keys)=@_;
6782
11
50
28
return if !@keys;
6783
11
100
66
39
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6784
11
39
my($m,$k,$up,$n,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','capacity','counting_bits','adaptive'};
6785
11
18
for(@$keysref){
6786
#croak "Key should be scalar" if ref($_);
6787
15084
50
66
21608
$$bf{key_count} >= $n and croak "Exceeded filter capacity $n" or $$bf{key_count}++;
6788
15083
8044
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
15083
75694
6789
15083
100
66
32569
if ($cb==1 and !$adaptive) { # normal bloom filter
50
0
6790
11110
50389
vec($$bf{filter}, $h[$_] % $m, 1) = 1 for 0..$k-1;
6791
}
6792
elsif ($cb>1) { # counting bloom filter
6793
3973
3442
for(0..$k-1){
6794
11919
7956
my $pos=$h[$_] % $m;
6795
my $c=
6796
vec($$bf{filter}, $pos, $cb) =
6797
11919
13857
vec($$bf{filter}, $pos, $cb) + 1;
6798
11919
100
16590
if($c==0){
6799
26
25
vec($$bf{filter}, $pos, $cb) = -1;
6800
$$bf{overflow}{$pos}++
6801
26
100
100
74
and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
2
283
6802
and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb);
6803
}
6804
}
6805
}
6806
elsif ($adaptive) { # adaptive bloom filter
6807
0
0
my($i,$key,$bit)=(0+@h,$_);
6808
0
0
for(0..$$bf{filterlength}-1){
6809
0
0
0
$i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
6810
0
0
my $pos=shift(@h) % $m;
6811
0
0
$bit=vec($$bf{filter}, $pos, 1);
6812
0
0
vec($$bf{filter}, $pos, 1)=1;
6813
0
0
0
0
last if $_>=$k-1 and $bit==0;
6814
}
6815
}
6816
0
0
else {croak}
6817
}
6818
9
532
return 1;
6819
}
6820
sub bfcheck {
6821
5
5
1
77
require Digest::MD5;
6822
5
705
my($bf,@keys)=@_;
6823
5
50
18
return if !@keys;
6824
5
50
33
28
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6825
5
22
my($m,$k,$up,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','counting_bits','adaptive'};
6826
5
9
my $wa=wantarray();
6827
5
50
14
if(!$adaptive){ # normal bloom filter or counting bloom filter
6828
return map {
6829
5
45
my $match = 1; # match if every bit is on
24001
13210
6830
24001
12786
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
24001
125010
6831
24001
100
118525
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
100
6832
24001
50
24049
return $match if !$wa;
6833
24001
25810
$match;
6834
} @$keysref;
6835
}
6836
else { # adaptive bloom filter
6837
return map {
6838
0
0
my($match,$i,$key,$bit,@h)=(1,0,$_);
0
0
6839
0
0
for(0..$$bf{filterlength}-1){
6840
0
0
0
$i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
6841
0
0
my $pos=shift(@h) % $m;
6842
0
0
$bit=vec($$bf{filter}, $pos, 1);
6843
0
0
0
0
$match++ if $_ > $k-1 and $bit==1;
6844
0
0
0
0
$match=0 if $_ <= $k-1 and $bit==0;
6845
0
0
0
last if $bit==0;
6846
}
6847
0
0
0
return $match if !$wa;
6848
0
0
$match;
6849
} @$keysref;
6850
}
6851
}
6852
sub bfgrep { # just a copy of bfcheck with map replaced by grep
6853
8
8
1
612
require Digest::MD5;
6854
8
12
my($bf,@keys)=@_;
6855
8
50
23
return if !@keys;
6856
8
50
33
51
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6857
8
27
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6858
return grep {
6859
8
24
my $match = 1; # match if every bit is on
6100
3386
6860
6100
3361
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
6100
22568
6861
6100
100
22486
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
100
6862
6100
5723
$match;
6863
} @$keysref;
6864
}
6865
sub bfgrepnot { # just a copy of bfgrep with $match replaced by not $match
6866
1
1
1
9
require Digest::MD5;
6867
1
3
my($bf,@keys)=@_;
6868
1
50
3
return if !@keys;
6869
1
50
33
13
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6870
1
4
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6871
return grep {
6872
1
4
my $match = 1; # match if every bit is on
1000
573
6873
1000
516
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
1000
3646
6874
1000
50
3758
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
50
6875
1000
1142
!$match;
6876
} @$keysref;
6877
}
6878
sub bfdelete {
6879
3
3
1
86
require Digest::MD5;
6880
3
14
my($bf,@keys)=@_;
6881
3
50
10
return if !@keys;
6882
3
100
100
18
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6883
3
10
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6884
3
50
6
croak "Cannot delete from non-counting bloom filter (use counting_bits 4 e.g.)" if $cb==1;
6885
3
5
for my $key (@$keysref){
6886
501
260
my @h; push @h, unpack $up, Digest::MD5::md5($key,0+@h) while @h<$k;
501
2005
6887
501
50
33
718
$$bf{key_count}==0 and croak "Deleted all and then some" or $$bf{key_count}--;
6888
501
387
my($ones,$croak,@pos)=(0);
6889
501
419
for(0..$k-1){
6890
1503
1068
my $pos=$h[$_] % $m;
6891
my $c=
6892
1503
1000
vec($$bf{filter}, $pos, $cb);
6893
1503
1494
vec($$bf{filter}, $pos, $cb)=$c-1;
6894
1503
100
1736
$croak="Cannot delete a non-existing key $key" if $c==0;
6895
$croak="Cannot delete with previously overflown position. Try doubleing counting_bits"
6896
1503
50
66
3619
if $c==1 and ++$ones and $$bf{overflow}{$pos};
66
6897
}
6898
501
100
677
if($croak){ #rollback
6899
vec($$bf{filter}, $h[$_] % $m, $cb)=
6900
1
8
vec($$bf{filter}, $h[$_] % $m, $cb)+1 for 0..$k-1;
6901
1
170
croak $croak;
6902
}
6903
}
6904
2
15
return $bf;
6905
}
6906
sub bfstore {
6907
1
1
1
14473
require Storable;
6908
1
7
Storable::store(@_);
6909
}
6910
sub bfretrieve {
6911
2
2
1
313
require Storable;
6912
2
9
my $bf=Storable::retrieve(@_);
6913
2
50
466
carp "Retrieved bloom filter was stored in version $$bf{version}, this is version $VERSION" if $$bf{version}>$VERSION;
6914
2
3
return $bf;
6915
}
6916
sub bfclone {
6917
1
1
1
822
require Storable;
6918
1
2634
return Storable::dclone(@_); #could be faster
6919
}
6920
sub bfdimensions_old {
6921
my($n,$p,$mink,$maxk, $k,$flen,$m)=
6922
0
0
0
0
0
@_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'},1)
0
0
0
6923
:@_==2 ? (@_,1,100,1)
6924
: croak "Wrong number of arguments (".@_."), should be 2";
6925
0
0
0
0
croak "p ($p) should be > 0 and < 1" if not ( 0<$p && $p<1 );
6926
0
0
0
$m=-1*$_*$n/log(1-$p**(1/$_)) and (!defined $flen or $m<$flen) and ($flen,$k)=($m,$_) for $mink..$maxk;
0
0
6927
0
0
$flen = int(1+$flen);
6928
0
0
return ($flen,$k);
6929
}
6930
sub bfdimensions {
6931
my($n,$p,$mink,$maxk)=
6932
18
0
18
1
55
@_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'})
18
50
36
6933
:@_==2 ? (@_,1,100)
6934
: croak "Wrong number of arguments (".@_."), should be 2";
6935
18
48
my $k=log(1/$p)/log(2); # k hash funcs
6936
18
33
my $m=-$n*log($p)/log(2)**2; # m bits in filter
6937
18
60
return ($m+0.5,min($maxk,max($mink,int($k+0.5))));
6938
}
6939
6940
#crontab -e
6941
#01 4,10,16,22 * * * /usr/bin/perl -MAcme::Tools -e'Acme::Tools::_update_currency_file("/var/www/html/currency-rates")' > /dev/null 2>&1
6942
6943
sub _update_currency_file { #call from cron
6944
0
0
0
0
my $fn=shift()||'/var/www/html/currency-rates';
6945
0
0
0
my %exe=map+($_=>"/usr/bin/$_"),qw/curl ci/;-x$_ or die for values %exe;
0
0
6946
0
0
0
open my $F, '>', $fn or die"ERROR: Could not write file $fn ($!)\n";
6947
0
0
print $F "#-- Currency rates ".localtime()." (".time().")\n";
6948
0
0
print $F "# File generated by Acme::Tools version $VERSION\n";
6949
0
0
print $F "# Updated every 6th hour on http://calthis.com/currency-rates\n";
6950
0
0
print $F "NOK 1.000000000\n";
6951
0
0
my $amount=1000;
6952
0
0
my $data=qx($exe{curl} -s "http://www.x-rates.com/table/?from=NOK&amount=$amount");
6953
0
0
$data=~s,to=([A-Z]{3})(.)>,$2>$1 ,g;
6954
0
0
my @data=ht2t($data,"Alphabetical order"); shift @data;
0
0
6955
0
0
0
@data=map "$$_[1] ".($$_[4]>1e-2?$$_[4]:$$_[2]?sprintf("%.8f",$amount/$$_[2]):0)."\n",@data;
0
6956
0
0
my %data=map split,@data;
6957
0
0
my@tc=qx($exe{curl} -s https://btc-e.com/api/3/ticker/btc_usd-ltc_usd)=~/avg.?:(\d+\.?\d*)/g;
6958
0
0
push @data,"BTC ".($tc[0]*$data{USD})."\n";
6959
0
0
push @data,"LTC ".($tc[1]*$data{USD})."\n";
6960
0
0
print $F sort(@data);
6961
0
0
close($F);
6962
0
0
0
qx($exe{ci} -l -m. -d $fn) if -w"$fn,v";
6963
}
6964
6965
sub ftype {
6966
0
0
0
0
my $f=shift;
6967
0
0
0
0
-e $f and
0
0
0
0
0
0
0
0
6968
-f$f ? 'file' # -f File is a plain file.
6969
:-d$f ? 'dir' # -d File is a directory.
6970
:-l$f ? 'symlink' # -l File is a symbolic link.
6971
:-p$f ? 'pipe' # -p File is a named pipe (FIFO), or Filehandle is a pipe.
6972
:-S$f ? 'socket' # -S File is a socket.
6973
:-b$f ? 'blockfile' # -b File is a block special file.
6974
:-c$f ? 'charfile' # -c File is a character special file.
6975
:-t$f ? 'ttyfile' # -t Filehandle is opened to a tty.
6976
: ''
6977
or undef;
6978
}
6979
6980
sub ext2mime {
6981
0
0
0
0
my $ext=shift(); #or filename
6982
#http://www.sitepoint.com/web-foundations/mime-types-complete-list/
6983
0
0
croak "todo: ext2mime not yet implemented";
6984
#return "application/json";#feks
6985
}
6986
6987
=head1 COMMANDS
6988
6989
=head2 install_acme_command_tools
6990
6991
sudo perl -MAcme::Tools -e install_acme_command_tools
6992
6993
Wrote executable /usr/local/bin/conv
6994
Wrote executable /usr/local/bin/due
6995
Wrote executable /usr/local/bin/xcat
6996
Wrote executable /usr/local/bin/freq
6997
Wrote executable /usr/local/bin/deldup
6998
Wrote executable /usr/local/bin/ccmd
6999
Wrote executable /usr/local/bin/z2z
7000
Wrote executable /usr/local/bin/2gz
7001
Wrote executable /usr/local/bin/2gzip
7002
Wrote executable /usr/local/bin/2bz2
7003
Wrote executable /usr/local/bin/2bzip2
7004
Wrote executable /usr/local/bin/2xz
7005
7006
Examples of commands then made available:
7007
7008
conv 1 USD EUR #might show 0.88029 if thats the current currency rate. Uses conv()
7009
conv .5 in cm #reveals that 1/2 inch is 1.27 cm, see doc on conv() for all supported units
7010
due [-h] /path/1/ /path/2/ #like du, but show statistics on file extentions instead of subdirs
7011
xcat file #like cat, zcat, bzcat or xzcat in one. Uses file extention to decide. Uses openstr()
7012
freq file #reads file(s) or stdin and view counts of each byte 0-255
7013
ccmd grep string /huge/file #caches stdout+stderr for 15 minutes (default) for much faster results later
7014
ccmd "sleep 2;echo hello" #slow first time. Note the quotes!
7015
ccmd "du -s ~/*|sort -n|tail" #ccmd store stdout+stderr in /tmp files (default)
7016
z2z [-pvk1-9o -t type] files #convert from/to .gz/bz2/xz files, -p progress, -v verbose (output result),
7017
#-k keep org file, -o overwrite, 1-9 compression degree
7018
#2xz and 2bz2 depends on xz and bzip2 being installed on system
7019
2xz #same as z2z with -t xz
7020
2bz2 #same as z2z with -t bz2
7021
2gz #same as z2z with -t gz
7022
7023
TODO :
7024
finddup [-v -d -s -h] path1/ path2/
7025
#reports (+deletes with -d) duplicate files
7026
#finddup is NOT IMPLEMENTED YET! Use -s for symlink dups, -h for hardlink
7027
rttop
7028
trunc file(s)
7029
wipe file(s)
7030
7031
=head3 z2z
7032
7033
=head3 2xz
7034
7035
=head3 2bz2
7036
7037
=head3 2gz
7038
7039
The commands C<2xz>, C<2bz2> and C<2gz> are just synonyms for C with an implicitly added option C<-t xz>, C<-t xz> or C<-t gz> accordingly.
7040
7041
z2z [-p -k -v -o -1 -2 -3 -4 -5 -6 -7 -8 -9 ] files
7042
7043
Converts (recompresses) files from one compression sc
7044
7045
7046
7047
=head3 due
7048
7049
Like C command but views space used by file extentions instead of dirs. Options:
7050
7051
due [-options] [dirs] [files]
7052
due -h View bytes "human readable", i.e. C<8.72 MB> instead of C<9145662 b> (bytes)
7053
due -k | -m View bytes in kilobytes | megabytes (1024 | 1048576)
7054
due -K Like -k but uses 1000 instead of 1024
7055
due -z View two extentions if .z .Z .gz .bz2 .rz or .xz (.tar.gz, not just .gz)
7056
due -M Also show min, medium and max date (mtime) of files, give an idea of their age
7057
due -P Also show 10, 50 (medium) and 90 percentile of file date
7058
due -MP Both -M and -P, shows min, 10p, 50p, 90p and max
7059
due -a Sort output alphabetically by extention (default order is by size)
7060
due -c Sort output by number of files
7061
due -i Ignore case, .GZ and .gz is the same, output in lower case
7062
due -t Adds time of day to -M and -P output
7063
due -e 'regex' Exclude files (full path) matching regex. Ex: due -e '\.git'
7064
TODO: due -l TODO: Exclude hardlinks (dont count "same" file more than once, "man du")
7065
ls -l | due Parses output of ls -l, find -ls, tar tvf for size+filename and reports
7066
find | due List of filenames from stdin produces same as just command 'due'
7067
ls | due Reports on just files in current dir without recursing into subdirs
7068
7069
=cut
7070
7071
sub install_acme_command_tools {
7072
0
0
1
0
my $dir=(grep -d$_, @_, '/usr/local/bin', '/usr/bin')[0];
7073
0
0
for( qw( conv due xcat freq finddup ccmd trunc wipe rttop z2z 2gz 2gzip 2bz2 2bzip2 2xz ) ){
7074
0
0
unlink("$dir/$_");
7075
0
0
writefile("$dir/$_", "#!$^X\nuse Acme::Tools;\nAcme::Tools::cmd_$_(\@ARGV);\n");
7076
0
0
sys("/bin/chmod +x $dir/$_"); #hm umask
7077
0
0
print "Wrote executable $dir/$_\n";
7078
}
7079
}
7080
0
0
0
0
sub cmd_conv { print conv(@ARGV)."\n" }
7081
7082
sub cmd_due { #TODO: output from tar tvf and ls and find -ls
7083
0
0
0
0
my %o=_go("zkKmhciMPate:l");
7084
0
0
require File::Find;
7085
33
33
161
no warnings 'uninitialized';
33
40
33
102944
7086
0
0
0
die"$0: -l not implemented yet\n" if $o{l}; #man du: default is not to count hardlinks more than once, with -l it does
7087
0
0
0
die"$0: -h, -k or -m can not be used together\n" if $o{h}+$o{k}+$o{m}>1;
7088
0
0
0
die"$0: -c and -a can not be used together\n" if $o{a}+$o{c}>1;
7089
0
0
0
die"$0: -k and -m can not be used together\n" if $o{k}+$o{m}>1;
7090
0
0
0
my @q=@ARGV; @q=('.') if !@q;
0
0
7091
0
0
my(%c,%b,$cnt,$bts,%mtime);
7092
0
0
0
my $zext=$o{z}?'(\.(z|Z|gz|bz2|xz|rz|kr))?':'';
7093
0
0
my $r=qr/(\.[^\.\/]{1,10}$zext)$/;
7094
0
0
0
my $qrexcl=exists$o{e}?qr/$o{e}/:0;
7095
#TODO: ought to work: tar cf - .|tar tvf -|due
7096
#my $qrstdin=qr/(^| )\-[rwx\-sS]{9} +\d+ \w+ +\w+ +(\d+) [a-zA-Z]+\.? +\d+ +(?:\d\d:\d\d|\d{4}) (.*)$/;
7097
0
0
my $qrstdin=qr/(^| )\-[rwx\-sS]{9} +(\d+ )?\w+[ \/]+\w+ +(\d+) [a-zA-Z]+\.? +\d+ +(?:\d\d:\d\d|\d{4}) (.*)$/;
7098
0
0
0
if(-p STDIN){
7099
0
0
while(<>){
7100
0
0
chomp;
7101
0
0
0
my($sz,$f)=/$qrstdin/?($2,$3):-f$_?(-s$_,$_):next;
0
7102
0
0
0
my $ext=$f=~$r?$1:'';
7103
0
0
0
$ext=lc($ext) if $o{i};
7104
0
0
$cnt++; $c{$ext}++;
0
0
7105
0
0
$bts+=$sz; $b{$ext}+=$sz;
0
0
7106
#$mtime{$ext}.=",$mtime" if
7107
0
0
0
0
$o{M} || $o{P} and die"due: -M and -P not yet implemented for STDIN";
7108
}
7109
}
7110
else { #hm DRY
7111
File::Find::find({wanted =>
7112
sub {
7113
0
0
0
0
return if !-f$_;
7114
0
0
0
0
return if $qrexcl and defined $File::Find::name and $File::Find::name=~$qrexcl;
0
7115
0
0
my($sz,$mtime)=(stat($_))[7,9];
7116
0
0
0
my $ext=m/$r/?$1:'';
7117
0
0
0
$ext=lc($ext) if $o{i};
7118
0
0
$cnt++; $c{$ext}++;
0
0
7119
0
0
$bts+=$sz; $b{$ext}+=$sz;
0
0
7120
0
0
0
0
$mtime{$ext}.=",$mtime" if $o{M} || $o{P};
7121
0
0
1;
7122
0
0
} },@q);
7123
}
7124
0
0
0
my($f,$s)=$o{k}?("%14.2f kb",sub{$_[0]/1024})
7125
0
0
0
:$o{K}?("%14.2f Kb",sub{$_[0]/1000})
7126
0
0
0
:$o{m}?("%14.2f mb",sub{$_[0]/1024**2})
7127
0
0
0
:$o{h}?("%14s", sub{bytes_readable($_[0])})
7128
0
0
0
0
: ("%14d b", sub{$_[0]});
0
0
0
0
0
7129
my @e=$o{a}?(sort(keys%c))
7130
0
0
0
:$o{c}?(sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c)
7131
0
0
0
: (sort{$b{$a}<=>$b{$b} or $a cmp $b}keys%c);
0
0
0
0
7132
0
0
0
my $perc=!$o{M}&&!$o{P}?sub{""}:
7133
sub{
7134
0
0
0
0
my @p=$o{P}?(10,50,90):(50);
7135
0
0
my @m=@_>0 ? do {grep$_, split",", $mtime{$_[0]}}
7136
0
0
0
: do {grep$_, map {split","} values %mtime};
0
0
0
0
7137
0
0
my @r=percentile(\@p,@m);
7138
0
0
0
@r=(min(@m),@r,max(@m)) if $o{M};
7139
0
0
@r=map int($_), @r;
7140
0
0
0
my $fmt=$o{t}?'YYYY/MM/DD-MM:MI:SS':'YYYY/MM/DD';
7141
0
0
@r=map tms($_,$fmt), @r;
7142
0
0
" ".join(" ",@r);
7143
0
0
0
0
};
7144
0
0
printf("%-11s %8d $f %7.2f%%%s\n",$_,$c{$_},&$s($b{$_}),100*$b{$_}/$bts,&$perc($_)) for @e;
7145
0
0
printf("%-11s %8d $f %7.2f%%%s\n","Sum",$cnt,&$s($bts),100,&$perc());
7146
}
7147
sub cmd_xcat {
7148
0
0
0
0
for my $fn (@_){
7149
0
0
my $os=openstr($fn);
7150
0
0
0
0
open my $FH, $os or warn "xcat: cannot open $os ($!)\n" and next;
7151
#binmode($FH);#hm?
7152
0
0
print while <$FH>;
7153
0
0
close($FH);
7154
}
7155
}
7156
sub cmd_freq {
7157
0
0
0
0
my(@f,$i);
7158
0
0
map $f[$_]++, unpack("C*",$_) while <>;
7159
0
0
my $s=" " x 12;map{print"$_$s$_$s$_\n"}("BYTE CHAR COUNT","---- ----- -------");
0
0
0
0
7160
0
0
my %m=(145,"DOS-æ",155,"DOS-ø",134,"DOS-å",146,"DOS-Æ",157,"DOS-Ø",143,"DOS-Å",map{($_," ")}0..31);
0
0
7161
0
0
0
0
printf("%4d %5s%8d".(++$i%3?$s:"\n"),$_,$m{$_}||chr,$f[$_]) for grep$f[$_],0..255;print "\n";
0
0
7162
0
0
my @no=grep!$f[$_],0..255; print "No bytes for these ".@no.": ".join(" ",@no)."\n";
0
0
7163
}
7164
sub cmd_deldup {
7165
0
0
0
0
cmd_finddup(@_);
7166
}
7167
sub cmd_finddup {
7168
# ~/test/deldup.pl #find and optionally delete duplicate files effiencently
7169
#http://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash
7170
0
0
0
0
die "todo: finddup not ready yet"
7171
}
7172
#http://stackoverflow.com/questions/11900239/can-i-cache-the-output-of-a-command-on-linux-from-cli
7173
our $Ccmd_cache_dir='/tmp/acme-tools-ccmd-cache';
7174
our $Ccmd_cache_expire=15*60; #default 15 minutes
7175
sub cmd_ccmd {
7176
0
0
0
0
require Digest::MD5;
7177
0
0
my $cmd=join" ",@_;
7178
0
0
my $d="$Ccmd_cache_dir/".username();
7179
0
0
makedir($d);
7180
0
0
my $md5=Digest::MD5::md5_hex($cmd);
7181
0
0
my($fno,$fne)=map"$d/cmd.$md5.std$_","out","err";
7182
0
0
0
my $too_old=sub{time()-(stat(shift))[9] >= $Ccmd_cache_expire};
0
0
7183
0
0
unlink grep &$too_old($_), <$d/*.std???>;
7184
0
0
0
0
sys("($cmd) > $fno 2> $fne") if !-e$fno or &$too_old($fno);
7185
0
0
print STDOUT "".readfile($fno);
7186
0
0
print STDERR "".readfile($fne);
7187
}
7188
7189
0
0
0
0
sub cmd_trunc { die "todo: trunc not ready yet"} #truncate a file, size 0, keep all other attr
7190
7191
sub cmd_wipe {
7192
0
0
0
0
my %o=_go("n:k");
7193
0
0
wipe($_,$o{n},$o{k}) for @_;
7194
}
7195
7196
0
0
0
0
sub cmd_2gz {cmd_z2z("-t","gz", @_)}
7197
0
0
0
0
sub cmd_2gzip {cmd_z2z("-t","gz", @_)}
7198
0
0
0
0
sub cmd_2bz2 {cmd_z2z("-t","bz2",@_)}
7199
0
0
0
0
sub cmd_2bzip2 {cmd_z2z("-t","bz2",@_)}
7200
0
0
0
0
sub cmd_2xz {cmd_z2z("-t","xz", @_)}
7201
#todo?: sub cmd_7z
7202
sub cmd_z2z {
7203
8
8
0
54949
local @ARGV=@_;
7204
8
70
my %o=_go("pt:kvhon123456789");
7205
8
119
my $t=repl(lc$o{t},qw/gzip gz bzip2 bz2/);
7206
8
50
107
die "due: unknown compression type $o{t}, known are gz, bz2 and xz" if $t!~/^(gz|bz2|xz)$/;
7207
8
410
my $sum=sum(map -s$_,@ARGV);
7208
8
100
66
105
print "Converting ".@ARGV." files, total ".bytes_readable($sum)."\n" if $o{v} and @ARGV>1;
7209
8
35
my $cat='cat';
7210
8
0
37
if($o{p}){ if(qx(which pv)){ $cat='pv' } else { warn repl(<<"",qr/^\s+/) } }
0
50
0
0
0
0
0
7211
due: pv for -p not found, install with sudo yum install pv, sudo apt-get install pv or similar
7212
7213
8
19
my $sumnew=0;
7214
8
124
my $start=time_fp();
7215
8
17
my($i,$bsf)=(0,0);#bytes so far
7216
8
43
$Eta{'z2z'}=[];eta('z2z',0,$sum);
8
63
7217
8
28
for(@ARGV){
7218
11
50
43
my $new=$_; $new=~s/(\.(gz|bz2|xz))?$/.$t/i or die;
11
293
7219
11
100
69
my $ext=defined($2)?lc($2):'';
7220
11
50
33
370
my $same=/^$new$/; $new.=".tmp" if $same; die if $o{k} and $same;
11
50
37
11
45
7221
11
50
33
297
next if !-e$_ and warn"$_ do not exists\n";
7222
11
50
33
161
next if !-r$_ and warn"$_ is not readable\n";
7223
11
0
33
301
next if -e$new and !$o{o} and warn"$new already exists, skipping (use -o to overwrite)\n";
33
7224
11
100
192
my $unz={qw/gz gunzip bz2 bunzip2 xz unxz/}->{$ext}||'';
7225
#todo: my $cntfile="/tmp/acme-tools-z2z-wc-c.$$";
7226
#todo: my $cnt="tee >(wc -c>$cntfile)" if $ENV{SHELL}=~/bash/ and $o{v}; #hm dash vs bash
7227
11
80
my $z= {qw/gz gzip bz2 bzip2 xz xz/}->{$t};
7228
11
133
$z.=" -$_" for grep$o{$_},1..9;
7229
11
71
my $cmd="$cat $_|$unz|$z>$new";
7230
#todo: "$cat $_|$unz|$cnt|$z>$new";
7231
#cat /tmp/kontroll-linux.xz|unxz|tee >(wc -c>/tmp/p)|gzip|wc -c;cat /tmp/p
7232
11
103
$cmd=~s,\|+,|,g; #print "cmd: $cmd\n";
7233
11
81
sys($cmd);
7234
11
50
50
245
chall($_,$new)||die if !$o{n};
7235
11
96
my($szold,$sznew)=map{-s$_}($_,$new);
22
423
7236
11
153
$bsf+=-s$_;
7237
11
50
1435
unlink $_ if !$o{k};
7238
11
50
0
50
rename($new, replace($new,qr/.tmp$/)) or die if $same;
7239
11
50
45
if($o{v}){
7240
11
26
$sumnew+=$sznew;
7241
11
414
my $pr=sprintf"%0.1f%%",100*$sznew/$szold;
7242
#todo: my $szuncmp=-s$cntfile&&time()-(stat($cntfile))[9]<10 ? qx(cat $cntfile) : '';
7243
#todo: $o{h} ? printf("%6.1f%% %9s => %9s => %9s %s\n", $pr,(map bytes_readable($_),$szold,$szuncmp,$sznew),$_)
7244
#todo: : printf("%6.1f%% %11d b => %11d b => %11 b %s\n",$pr,$szold,$szuncmp,$sznew,$_)
7245
my $str= $o{h}
7246
11
100
163
? sprintf("%-7s %9s => %9s", $pr,(map bytes_readable($_),$szold,$sznew))
7247
: sprintf("%-7s %11d b => %11d b", $pr,$szold,$sznew);
7248
11
100
77
if(@ARGV>1){
7249
4
19
$i++;
7250
4
0
25
$str=$i<@ARGV
50
7251
? " ETA:".sec_readable(eta('z2z',$bsf,$sum)-time_fp())." $str"
7252
: " TA: 0s $str"
7253
if $sum>1e6;
7254
4
37
$str="$i/".@ARGV." $str";
7255
}
7256
11
7511
print "$str $new\n";
7257
}
7258
}
7259
8
100
66
363
if($o{v} and @ARGV>1){
7260
1
50
28
my $bytes=$o{h}?'':'bytes ';
7261
my $str=
7262
sprintf "%d files compressed in %.3f seconds from %s to %s $bytes (%s bytes) %.1f%% of original\n",
7263
0+@ARGV,
7264
time_fp()-$start,
7265
1
50
21
(map{$o{h}?bytes_readable($_):$_}($sum,$sumnew,$sumnew-$sum)),
3
17
7266
100*$sumnew/$sum;
7267
1
27
$str=~s,\((\d),(+$1,;
7268
1
31
print $str;
7269
}
7270
}
7271
7272
8
8
3205
sub _go { require Getopt::Std; my %o; Getopt::Std::getopts(shift() => \%o); %o }
8
103
8
101
8
1142
7273
7274
0
0
0
0
sub cmd_rttop { die "rttop: not implemented here yet.\n" }
7275
0
0
0
0
sub cmd_whichpm { die "whichpm: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
7276
0
0
0
0
sub cmd_catal { die "catal: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
7277
#todo: cmd_tabdiff (fra sonyk)
7278
#todo: cmd_catlog (ala catal med /etc/catlog.conf, default er access_log)
7279
7280
=head1 DATABASE STUFF - NOT IMPLEMENTED YET
7281
7282
Uses L. Comming soon...
7283
7284
$Dbh
7285
dlogin
7286
dlogout
7287
drow
7288
drows
7289
drowc
7290
drowsc
7291
dcols
7292
dpk
7293
dsel
7294
ddo
7295
dins
7296
dupd
7297
ddel
7298
dcommit
7299
drollback
7300
7301
=cut
7302
7303
#my$dummy=<<'SOON';
7304
sub dtype {
7305
0
0
0
0
my $connstr=shift;
7306
0
0
0
return 'SQLite' if $connstr=~/(\.sqlite|sqlite:.*\.db)$/i;
7307
0
0
0
return 'Oracle' if $connstr=~/\@/;
7308
0
0
return 'Pg' if 1==2;
7309
0
0
die;
7310
}
7311
7312
our($Dbh,@Dbh,%Sth);
7313
our %Dbattr=(RaiseError => 1, AutoCommit => 0); #defaults
7314
sub dlogin {
7315
0
0
0
0
my $connstr=shift();
7316
0
0
my %attr=(%Dbattr,@_);
7317
0
0
my $type=dtype($connstr);
7318
0
0
my($dsn,$u,$p)=('','','');
7319
0
0
0
if($type eq 'SQLite'){
0
0
7320
0
0
$dsn=$connstr;
7321
}
7322
elsif($type eq 'Oracle'){
7323
0
0
($u,$p,$dsn)=($connstr=~m,(.+?)(/.+?)?\@(.+),);
7324
}
7325
elsif($type eq 'Pg'){
7326
0
0
croak "todo";
7327
}
7328
else{
7329
0
0
croak "dblogin: unknown database type for connection string $connstr\n";
7330
}
7331
0
0
$dsn="dbi:$type:$dsn";
7332
0
0
0
push @Dbh, $Dbh if $Dbh; #local is better?
7333
0
0
require DBI;
7334
0
0
$Dbh=DBI->connect($dsn,$u,$p,\%attr); #connect_cached?
7335
}
7336
sub dlogout {
7337
0
0
0
0
$Dbh->disconnect;
7338
0
0
0
$Dbh=pop@Dbh if @Dbh;
7339
}
7340
sub drow {
7341
0
0
0
0
my($q,@b)=_dattrarg(@_);
7342
#my $sth=do{$Sth{$Dbh,$q} ||= $Dbh->prepare_cached($q)};
7343
0
0
my $sth=$Dbh->prepare_cached($q);
7344
0
0
$sth->execute(@b);
7345
0
0
my @r=$sth->fetchrow_array;
7346
0
0
0
$sth->finish if $$Dbh{Driver}{Name} eq 'SQLite';
7347
#$dbh->selectrow_array($statement);
7348
0
0
0
return @r==1?$r[0]:@r;
7349
}
7350
0
0
sub drows {
7351
}
7352
0
0
sub drowc {
7353
}
7354
0
0
sub drowsc {
7355
}
7356
0
0
sub dcols {
7357
}
7358
0
0
sub dpk {
7359
}
7360
0
0
sub dsel {
7361
}
7362
sub ddo {
7363
0
0
0
0
my @arg=_dattrarg(@_);
7364
#warn serialize(\@arg,'arg','',1);
7365
0
0
$Dbh->do(@arg); #hm cache?
7366
}
7367
0
0
sub dins {
7368
}
7369
0
0
sub dupd {
7370
}
7371
0
0
sub ddel {
7372
}
7373
0
0
0
0
sub dcommit { $Dbh->commit }
7374
0
0
0
0
sub drollback { $Dbh->rollback }
7375
7376
sub _dattrarg {
7377
0
0
0
my @arg=@_;
7378
0
0
0
splice @arg,1,0, ref($arg[-1]) eq 'HASH' ? pop(@arg) : {};
7379
0
0
@arg;
7380
}
7381
7382
=head2 self_update
7383
7384
Update Acme::Tools to newest version quick and dirty:
7385
7386
function pmview(){ ls -ld `perl -M$1 -le'$m=shift;$mi=$m;$mi=~s,::,/,g;print $INC{"$mi.pm"};warn"Version ".${$m."::VERSION"}."\n"' $1`;}
7387
7388
pmview Acme::Tools #view date and version before
7389
sudo perl -MAcme::Tools -e Acme::Tools::self_update #update to newest version
7390
pmview Acme::Tools #view date and version after
7391
7392
Does C to where Acme/Tools.pm are and then wget -N https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm
7393
7394
TODO: cmd_acme_tools_self_update, accept --no-check-certificate to use on curl
7395
7396
=cut
7397
7398
our $Wget;
7399
our $Self_update_url='https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm'; #todo: change site
7400
sub self_update {
7401
#in($^O,'linux','cygwin') or die"ERROR: self_update works on linux and cygwin only";
7402
0
0
0
1
0
$Wget||=(grep -x$_,map"$_/wget",'/usr/bin','/bin','/usr/local/bin','.')[0]; #hm --no-check-certificate
7403
0
0
0
-x$Wget or die"ERROR: wget ($Wget) executable not found\n";
7404
0
0
my $d=dirname(__FILE__);
7405
0
0
sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
7406
0
0
0
sys("cd $d; $Wget -N ".($ARGV[0]||$Self_update_url));
7407
0
0
sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
7408
}
7409
7410
1;
7411
7412
package Acme::Tools::BloomFilter;
7413
33
33
813
use 5.008; use strict; use warnings; use Carp;
33
33
103
33
33
129
33
33
38
33
757
33
105
33
35
33
946
33
113
33
33
33
13675
7414
2
2
180
sub new { my($class,@p)=@_; my $self=Acme::Tools::bfinit(@p); bless $self, $class }
2
9
2
8
7415
2
2
66
sub add { &Acme::Tools::bfadd }
7416
0
0
0
sub addbf { &Acme::Tools::bfaddbf }
7417
2
2
64
sub check { &Acme::Tools::bfcheck }
7418
1
1
3
sub grep { &Acme::Tools::bfgrep }
7419
0
0
0
sub grepnot { &Acme::Tools::bfgrepnot }
7420
0
0
0
sub delete { &Acme::Tools::bfdelete }
7421
0
0
0
sub store { &Acme::Tools::bfstore }
7422
0
0
0
sub retrieve { &Acme::Tools::bfretrieve }
7423
1
1
4
sub clone { &Acme::Tools::bfclone }
7424
0
0
0
sub sum { &Acme::Tools::bfsum }
7425
1;
7426
7427
# Ny versjon:
7428
# + c-s todo
7429
# + endre $VERSION
7430
# + endre Release history under HISTORY
7431
# + endre årstall under COPYRIGHT AND LICENSE
7432
# + oppd default valutakurser inkl datoen
7433
# + emacs Changes
7434
# + emacs README + aarstall
7435
# + emacs MANIFEST legg til ev nye t/*.t
7436
# + perl Makefile.PL;make test
7437
# + /usr/bin/perl Makefile.PL;make test
7438
# + perlbrew exec "perl ~/Acme-Tools/Makefile.PL ; time make test"
7439
# + perlbrew use perl-5.10.1; perl Makefile.PL; make test; perlbrew off
7440
# + test evt i cygwin og mingw-perl
7441
# + pod2html Tools.pm > Tools.html ; firefox Tools.html
7442
# + https://metacpan.org/pod/Acme::Tools
7443
# + http://cpants.cpanauthors.org/dist/Acme-Tools #kvalitee
7444
# + perl Makefile.PL ; make test && make dist
7445
# + cp -p *tar.gz /htdocs/
7446
# + ci -l -mversjon -d `cat MANIFEST`
7447
# + git add `cat MANIFEST`
7448
# + git status
7449
# + git commit -am versjon
7450
# + git push #eller:
7451
# + git push origin master
7452
# + http://pause.perl.org/
7453
# + tegnsett/utf8-kroell
7454
# http://en.wikipedia.org/wiki/Birthday_problem#Approximations
7455
7456
# memoize_expire() http://perldoc.perl.org/Memoize/Expire.html
7457
# memoize_file_expire()
7458
# memoize_limit_size() #lru
7459
# memoize_file_limit_size()
7460
# memoize_memcached http://search.cpan.org/~dtrischuk/Memoize-Memcached-0.03/lib/Memoize/Memcached.pm
7461
# hint on http://perl.jonallen.info/writing/articles/install-perl-modules-without-root
7462
7463
# sub mycrc32 { #http://billauer.co.il/blog/2011/05/perl-crc32-crc-xs-module/ eller String::CRC32::crc32 som er 100 x raskere enn Digest::CRC::crc32
7464
# my ($input, $init_value, $polynomial) = @_;
7465
# $init_value = 0 unless (defined $init_value);
7466
# $polynomial = 0xedb88320 unless (defined $polynomial);
7467
# my @lookup_table;
7468
# for (my $i=0; $i<256; $i++) {
7469
# my $x = $i;
7470
# for (my $j=0; $j<8; $j++) {
7471
# if ($x & 1) {
7472
# $x = ($x >> 1) ^ $polynomial;
7473
# } else {
7474
# $x = $x >> 1;
7475
# }
7476
# }
7477
# push @lookup_table, $x;
7478
# }
7479
# my $crc = $init_value ^ 0xffffffff;
7480
# foreach my $x (unpack ('C*', $input)) {
7481
# $crc = (($crc >> 8) & 0xffffff) ^ $lookup_table[ ($crc ^ $x) & 0xff ];
7482
# }
7483
# $crc = $crc ^ 0xffffffff;
7484
# return $crc;
7485
# }
7486
#
7487
7488
7489
7490
=head1 HISTORY
7491
7492
Release history
7493
7494
0.20 Mar 2017 Subs: a2h cnttbl h2a log10 log2 nicenum rstddev sec_readable
7495
throttle timems refa refaa refah refh refha refhh refs
7496
eachr globr keysr popr pushr shiftr splicer unshiftr valuesr
7497
Commands: 2bz2 2gz 2xz z2z
7498
7499
0.172 Dec 2015 Subs: curb openstr pwgen sleepms sleepnm srlz tms username
7500
self_update install_acme_command_tools
7501
Commands: conv due freq wipe xcat (see "Commands")
7502
7503
0.16 Feb 2015 bigr curb cpad isnum parta parth read_conf resolve_equation
7504
roman2int trim. Improved: conv (numbers currency) range ("derivatives")
7505
7506
0.15 Nov 2014 Improved doc
7507
0.14 Nov 2014 New subs, improved tests and doc
7508
0.13 Oct 2010 Non-linux test issue, resolve. improved: bloom filter, tests, doc
7509
0.12 Oct 2010 Improved tests, doc, bloom filter, random_gauss, bytes_readable
7510
0.11 Dec 2008 Improved doc
7511
0.10 Dec 2008
7512
7513
=head1 SEE ALSO
7514
7515
L
7516
7517
=head1 AUTHOR
7518
7519
Kjetil Skotheim, Ekjetil.skotheim@gmail.comE
7520
7521
=head1 COPYRIGHT
7522
7523
1995-2017, Kjetil Skotheim
7524
7525
=head1 LICENSE
7526
7527
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
7528
7529
=cut