line
stmt
bran
cond
sub
pod
time
code
1
#!/usr/bin/perl
2
package Acme::Tools;
3
4
our $VERSION = '0.21';
5
6
33
33
487997
use 5.008; #Perl 5.8 was released July 18th 2002
33
85
7
33
33
139
use strict;
33
42
33
657
8
33
33
100
use warnings;
33
40
33
777
9
33
33
115
use Carp;
33
35
33
8604
10
11
require Exporter;
12
our @ISA = qw(Exporter);
13
our %EXPORT_TAGS = ( all => [ qw() ] );
14
our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } );
15
our @EXPORT = qw(
16
min
17
max
18
mins
19
maxs
20
sum
21
avg
22
geomavg
23
harmonicavg
24
stddev
25
rstddev
26
median
27
percentile
28
$Resolve_iterations
29
$Resolve_last_estimate
30
$Resolve_time
31
resolve
32
resolve_equation
33
conv
34
rank
35
rankstr
36
eqarr
37
sorted
38
sortedstr
39
pushsort
40
pushsortstr
41
binsearch
42
binsearchstr
43
random
44
random_gauss
45
big
46
bigi
47
bigf
48
bigr
49
bigscale
50
nvl
51
repl
52
replace
53
decode
54
decode_num
55
between
56
curb
57
bound
58
log10
59
log2
60
distinct
61
in
62
in_num
63
uniq
64
union
65
union_all
66
minus
67
minus_all
68
intersect
69
intersect_all
70
not_intersect
71
mix
72
zip
73
subhash
74
hashtrans
75
zipb64
76
zipbin
77
unzipb64
78
unzipbin
79
gzip
80
gunzip
81
bzip2
82
bunzip2
83
ipaddr
84
ipnum
85
webparams
86
urlenc
87
urldec
88
ht2t
89
chall
90
makedir
91
qrlist
92
ansicolor
93
ccn_ok
94
KID_ok
95
writefile
96
readfile
97
readdirectory
98
basename
99
dirname
100
wipe
101
username
102
range
103
globr
104
permutations
105
trigram
106
sliding
107
chunks
108
chars
109
cart
110
reduce
111
int2roman
112
roman2int
113
num2code
114
code2num
115
gcd
116
lcm
117
pivot
118
tablestring
119
upper
120
lower
121
trim
122
rpad
123
lpad
124
cpad
125
dserialize
126
serialize
127
srlz
128
cnttbl
129
nicenum
130
bytes_readable
131
sec_readable
132
distance
133
tms
134
easter
135
time_fp
136
timems
137
sleep_fp
138
sleeps
139
sleepms
140
sleepus
141
sleepns
142
eta
143
sys
144
recursed
145
md5sum
146
pwgen
147
read_conf
148
openstr
149
ldist
150
$Re_isnum
151
isnum
152
part
153
parth
154
parta
155
a2h
156
h2a
157
refa
158
refh
159
refs
160
refaa
161
refah
162
refha
163
refhh
164
pushr
165
popr
166
shiftr
167
unshiftr
168
splicer
169
keysr
170
valuesr
171
eachr
172
ed
173
$Edcursor
174
brainfu
175
brainfu2perl
176
brainfu2perl_optimized
177
bfinit
178
bfsum
179
bfaddbf
180
bfadd
181
bfcheck
182
bfgrep
183
bfgrepnot
184
bfdelete
185
bfstore
186
bfretrieve
187
bfclone
188
bfdimensions
189
$PI
190
install_acme_command_tools
191
192
$Dbh
193
dlogin
194
dlogout
195
drow
196
drows
197
drowc
198
drowsc
199
dcols
200
dpk
201
dsel
202
ddo
203
dins
204
dupd
205
ddel
206
dcommit
207
drollback
208
);
209
210
our $PI = '3.141592653589793238462643383279502884197169399375105820974944592307816406286';
211
212
=head1 NAME
213
214
Acme::Tools - Lots of more or less useful subs lumped together and exported into your namespace
215
216
=head1 SYNOPSIS
217
218
use Acme::Tools;
219
220
print sum(1,2,3); # 6
221
print avg(2,3,4,6); # 3.75
222
223
my @list = minus(\@listA, \@listB); # set operations
224
my @list = union(\@listA, \@listB); # set operations
225
226
print length(gzip("abc" x 1000)); # far less than 3000
227
228
writefile("/dir/filename",$string); # convenient
229
my $s=readfile("/dir/filename"); # also conventient
230
231
print "yes!" if between($pi,3,4);
232
233
print percentile(0.05, @numbers);
234
235
my @even = range(1000,2000,2); # even numbers between 1000 and 2000
236
my @odd = range(1001,2001,2);
237
238
my $dice = random(1,6);
239
my $color = random(['red','green','blue','yellow','orange']);
240
241
...and more.
242
243
=encoding utf8
244
245
=head1 ABSTRACT
246
247
About 120 more or less useful perl subroutines lumped together and exported into your namespace.
248
249
=head1 DESCRIPTION
250
251
Subs created and collected since the mid-90s.
252
253
=head1 INSTALLATION
254
255
sudo cpan Acme::Tools
256
sudo cpanm Acme::Tools # after: sudo apt-get install cpanminus make # for Ubuntu 12.04
257
258
=head1 EXPORT
259
260
Almost every sub, about 90 of them.
261
262
Beware of namespace pollution. But what did you expect from an Acme module?
263
264
=head1 NUMBERS
265
266
=head2 num2code
267
268
See L
269
270
=head2 code2num
271
272
C convert numbers (integers) from the normal decimal system to some arbitrary other number system.
273
That can be binary (2), oct (8), hex (16) or others.
274
275
Example:
276
277
print num2code(255,2,"0123456789ABCDEF"); # prints FF
278
print num2code( 14,2,"0123456789ABCDEF"); # prints 0E
279
280
...because 255 are converted to hex FF (base C<< length("0123456789ABCDEF") >> ) which is 2 digits of 0-9 or A-F.
281
...and 14 are converted to 0E, with leading 0 because of the second argument 2.
282
283
Example:
284
285
print num2code(1234,16,"01")
286
287
Prints the 16 binary digits 0000010011010010 which is 1234 converted to binary zeros and ones.
288
289
To convert back:
290
291
print code2num("0000010011010010","01"); #prints 1234
292
293
C can be used to compress numeric IDs to something shorter:
294
295
$chars="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_";
296
print num2code("241274432",5,$chars); # prints EOOv0
297
print code2num("EOOv0",$chars); # prints 241274432
298
299
=cut
300
301
#Math::BaseCnv
302
303
sub num2code {
304
4
4
1
350
my($num,$digits,$validchars,$start)=@_;
305
4
5
my $l=length($validchars);
306
4
5
my $key;
307
33
33
134
no warnings;
33
43
33
162608
308
4
50
13
croak if $num<$start;
309
4
4
$num-=$start;
310
4
7
for(1..$digits){
311
25
33
$key=substr($validchars,$num%$l,1).$key;
312
25
24
$num=int($num/$l);
313
}
314
4
50
10
croak if $num>0;
315
4
10
return $key;
316
}
317
318
sub code2num {
319
1
50
1
1
2
my($code,$validchars,$start)=@_; $start=0 if !defined $start;
1
4
320
1
1
my $l=length($validchars);
321
1
2
my $num=0;
322
1
10
$num=$num*$l+index($validchars,$_) for split//,$code;
323
1
5
return $num+$start;
324
}
325
326
327
=head2 gcd
328
329
I< C<">The Euclidean algorithm (also called Euclid's algorithm) is an
330
algorithm to determine the greatest common divisor (gcd) of two
331
integers. It is one of the oldest algorithms known, since it appeared
332
in the classic Euclid's Elements around 300 BC. The algorithm does not
333
require factoring.C<"> >
334
335
B two or more positive numbers (integers, without decimals that is)
336
337
B an integer
338
339
B
340
341
print gcd(12, 8); # prints 4
342
343
Because the (prime number) factors of 12 is 2 * 2 * 3 and the factors of 8 is 2 * 2 * 2
344
and the common ('overlapping') factors for both 12 and 8 is then 2 * 2 and the result becomes 4.
345
346
B:
347
348
print gcd(90, 135, 315); # prints 45
349
print gcd(2*3*3*5, 3*3*3*5, 3*3*5*7); # prints 45 ( = 3*3*5 which is common to all three args)
350
351
Implementation:
352
353
sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
354
355
One way of putting it: Keep replacing the larger of the two numbers with the difference between them until you got two equal numbers. Then thats the answer.
356
357
L
358
359
L
360
361
=cut
362
363
26
100
26
1
184
sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
26
100
151
364
365
=head2 lcm
366
367
C finds the Least Common Multiple of two or more numbers (integers).
368
369
B two or more positive numbers (integers)
370
371
B an integer number
372
373
Example: C< 2/21 + 1/6 = 4/42 + 7/42 = 11/42>
374
375
Where 42 = lcm(21,6).
376
377
B
378
379
print lcm(45,120,75); # prints 1800
380
381
Because the factors are:
382
383
45 = 2^0 * 3^2 * 5^1
384
120 = 2^3 * 3^1 * 5^1
385
75 = 2^0 * 3^1 * 5^2
386
387
Take the bigest power of each primary number (2, 3 and 5 here).
388
Which is 2^3, 3^2 and 5^2. Multiplied this is 8 * 9 * 25 = 1800.
389
390
sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }
391
392
Seems to works with L as well: (C of all integers from 1 to 200)
393
394
perl -MAcme::Tools -MMath::BigInt -le'print lcm(map Math::BigInt->new($_),1..200)'
395
396
337293588832926264639465766794841407432394382785157234228847021917234018060677390066992000
397
398
=cut
399
400
3
100
3
1
9
sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }
3
17
401
402
=head2 resolve
403
404
Resolves an equation by Newtons method.
405
406
B 1-6 arguments. At least one argument.
407
408
First argument: must be a coderef to a subroutine (a function)
409
410
Second argument: if present, the target, f(x)=target. Default 0.
411
412
Third argument: a start position for x. Default 0.
413
414
Fourth argument: a small delta value. Default 1e-4 (0.0001).
415
416
Fifth argument: a maximum number of iterations before resolve gives up
417
and carps. Default 100 (if fifth argument is not given or is
418
undef). The number 0 means infinite here. If the derivative of the
419
start position is zero or close to zero more iterations are typically
420
needed.
421
422
Sixth argument: A number of seconds to run before giving up. If both
423
fifth and sixth argument is given and > 0, C stops at
424
whichever comes first.
425
426
B returns the number C for C = 0
427
428
...or equal to the second input argument if present.
429
430
B
431
432
The equation C<< x^2 - 4x - 21 = 0 >> has two solutions: -3 and 7.
433
434
The result of C will depend on the start position:
435
436
print resolve(sub{ $_**2 - 4*$_ - 21 }); # -3 with $_ as your x
437
print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 }); # -3 more elaborate call
438
print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 },0,3); # 7 with start position 3
439
print "Iterations: $Acme::Tools::Resolve_iterations\n"; # 3 or larger, about 10-15 is normal
440
441
The variable C< $Acme::Tools::Resolve_iterations > (which is exported) will be set
442
to the last number of iterations C used. Also if C dies (carps).
443
444
The variable C< $Acme::Tools::Resolve_last_estimate > (which is exported) will be
445
set to the last estimate. This number will often be close to the solution and can
446
be used even if C dies (carps).
447
448
B
449
450
If either second, third or fourth argument is an instance of L, so will the result be:
451
452
use Acme::Tools;
453
my $equation = sub{ $_ - 1 - 1/$_ };
454
my $gr1 = resolve( $equation, 0, 1 ); #
455
my $gr2 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
456
bigscale(50);
457
my $gr3 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
458
459
print 1/2 + sqrt(5)/2, "\n";
460
print "Golden ratio 1: $gr1\n";
461
print "Golden ratio 2: $gr2\n";
462
print "Golden ratio 3: $gr3\n";
463
464
Output:
465
466
1.61803398874989
467
Golden ratio 1: 1.61803398874989
468
Golden ratio 2: 1.61803398874989484820458683436563811772029300310882395927211731893236137472439025
469
Golden ratio 3: 1.6180339887498948482045868343656381177203091798057610016490334024184302360920167724737807104860909804
470
471
See:
472
473
L
474
475
L
476
477
L
478
479
=cut
480
481
our $Resolve_iterations;
482
our $Resolve_last_estimate;
483
our $Resolve_time;
484
485
#sub resolve(\[&$]@) {
486
#sub resolve(&@) { <=0.17
487
#todo: perl -MAcme::Tools -le 'print resolve(sub{$_[0]**2-9431**2});print$Acme::Tools::Resolve_iterations'
488
# =>Div by zero: df(x) = 0 at n'th iteration, n=0, delta=0.0001, fx=CODE(0xc81d470) at -e line 1
489
#todo: ren solve?
490
sub resolve {
491
16
16
1
1030
my($f,$goal,$start,$delta,$iters,$sec)=@_;
492
16
100
32
$goal=0 if !defined $goal;
493
16
100
25
$start=0 if !defined $start;
494
16
50
20
$delta=1e-4 if !defined $delta;
495
16
50
27
$iters=100 if !defined $iters;
496
16
100
24
$sec=0 if !defined $sec;
497
16
50
24
$iters=13e13 if $iters==0;
498
16
50
33
53
croak "Iterations ($iters) or seconds ($sec) can not be a negative number" if $iters<0 or $sec<0;
499
16
10
$Resolve_iterations=undef;
500
16
11
$Resolve_last_estimate=undef;
501
16
50
24
croak "Should have at least 1 argument, a coderef" if !@_;
502
16
50
28
croak "First argument should be a coderef" if ref($f) ne 'CODE';
503
504
16
17
my @x=($start);
505
16
100
27
my $time_start=$sec>0?time_fp():undef;
506
16
50
23
my $ds=ref($start) eq 'Math::BigFloat' ? Math::BigFloat->div_scale() : undef;
507
my $fx=sub{
508
559
559
369
local$_=$_[0];
509
559
766
my $fx=&$f($_);
510
559
100
66
2261
if($fx=~/x/ and $fx=~/^[ \(\)\.\d\+\-\*\/x\=\^]+$/){
511
1
8
$fx=~s/(\d)x/$1*x/g;
512
1
4
$fx=~s/\^/**/g;
513
1
3
$fx=~s/^(.*)=(.*)$/($1)-($2)/;
514
1
4
$fx=~s,x,\$_,g;
515
1
100
$f=eval"sub{$fx}";
516
1
16
$fx=&$f($_);
517
}
518
$fx
519
16
33
};
559
689
520
#warn "delta=$delta\n";
521
16
16
my $n=0;
522
16
29
while($n<=$iters-1){
523
182
228
my $fd= &$fx($x[$n]+$delta*0.5) - &$fx($x[$n]-$delta*0.5);
524
182
100
241
$fd = &$fx($x[$n]+$delta*0.7) - &$fx($x[$n]-$delta*0.3) if $fd==0;# and warn"wigle 1\n";
525
182
100
193
$fd = &$fx($x[$n]+$delta*0.2) - &$fx($x[$n]-$delta*0.8) if $fd==0;# and warn"wigle 2\n";
526
182
100
369
croak "Div by zero: df(x) = $x[$n] at n'th iteration, n=$n, delta=$delta, fx=$fx" if $fd==0;
527
181
160
$x[$n+1]=$x[$n]-(&$fx($x[$n])-$goal)/($fd/$delta);
528
181
125
$Resolve_last_estimate=$x[$n+1];
529
#warn "n=$n fd=$fd x=$x[$n+1]\n";
530
181
125
$Resolve_iterations=$n;
531
181
100
100
462
last if $n>3 and $x[$n+1]==$x[$n] and $x[$n]==$x[$n-1];
100
532
180
100
66
524
last if $n>4 and $x[$n]!=0 and abs(1-$x[$n+1]/$x[$n])<1e-13; #sub{3*$_+$_**4-12}
100
533
167
50
66
379
last if $n>3 and ref($x[$n+1]) eq 'Math::BigFloat' and substr($x[$n+1],0,$ds) eq substr($x[$n],0,$ds); #hm
33
534
167
100
66
215
croak "Could not resolve, perhaps too little time given ($sec), iteratons=$n"
535
if $sec>0 and ($Resolve_time=time_fp()-$time_start)>$sec;
536
#warn "$n: ".$x[$n+1]."\n";
537
166
226
$n++;
538
}
539
14
50
37
croak "Could not resolve, perhaps too few iterations ($iters)" if @x>=$iters;
540
14
112
return $x[-1];
541
}
542
543
=head2 resolve_equation
544
545
This prints 2:
546
547
print resolve_equation "x + 13*(3-x) = 17 - x"
548
549
A string containing at least one x is converted into a perl function.
550
Then x is found by using L. The string conversion is done by
551
replacing every x with $_ and if a C< = > char is present it converts
552
C< leftside = rightside > into C< (leftside) - (rightside) = 0 > which
553
is the default behaviour of L.
554
555
=cut
556
557
1
1
1
2
sub resolve_equation { my $e=shift;resolve(sub{$e},@_)}
1
1
3
1
4
558
559
=head2 conv
560
561
Converts between:
562
563
=over 4
564
565
=item * units of measurement
566
567
=item * number systems
568
569
=item * currencies
570
571
=back
572
573
B
574
575
print conv( 2000, "meters", "miles" ); #prints 1.24274238447467
576
print conv( 2.1, 'km', 'm'); #prints 2100
577
print conv( 70,"cm","in"); #prints 27.5590551181102
578
print conv( 4,"USD","EUR"); #prints 3.20481552905431 (depending on todays rates)
579
print conv( 4000,"b","kb"); #prints 3.90625 (1 kb = 1024 bytes)
580
print conv( 4000,"b","Kb"); #prints 4 (1 Kb = 1000 bytes)
581
print conv( 1000,"mb","kb"); #prints 1024000
582
print conv( 101010,"bin","roman"); #prints XLII
583
print conv( "DCCXLII","roman","oct"); #prints 1346
584
585
B are:>
586
587
Note: units starting with the symbol _ means that all metric
588
prefixes from yocto 10^-24 to yotta 10^+24 is supported, so _m means
589
km, mm, cm, µm and so on. And _N means kN, MN GN and so on.
590
591
Note2: Many units have synonyms: m, meter, meters ...
592
593
acceleration: g, g0, m/s2, mps2
594
595
angle: binary_degree, binary_radian, brad, deg, degree, degrees,
596
gon, grad, grade, gradian, gradians, hexacontade, hour,
597
new_degree, nygrad, point, quadrant, rad, radian, radians,
598
sextant, turn
599
600
area: a, ar, are, ares, bunder, ca, centiare, cho, cm2,
601
daa, decare, decares, deciare, dekar,
602
djerib, m2, dunam, dönüm, earths, feddan, ft2, gongqing, ha
603
ha, hectare, hectares, hektar, jerib, km2, m2, manzana,
604
mi2, mm2, mu, qing, rai, sotka,
605
sqcm, sqft, sqkm, sqm, sqmi, sqmm
606
stremmata, um2, µm2
607
608
bytes: Eb, Gb, Kb, KiB, Mb, Pb, Tb, Yb, Zb, b, byte,
609
kb, kilobyte, mb, megabyte,
610
gb, gigabyte, tb, terabyte,
611
pb, petabyte, eb, exabyte,
612
zb, zettabyte, yb, yottabyte
613
614
charge: As, C, _e, coulomb, e
615
616
current: A, _A, N/m2
617
618
energy: BTU, Btu, J, Nm, W/s, Wh, Wps, Ws, _J, _eV,
619
cal, calorie, calories, eV, electronvolt, BeV,
620
erg, ergs, foot-pound, foot-pounds, ftlb, joule, kWh,
621
kcal, kilocalorie, kilocalories,
622
newtonmeter, newtonmeters, th, thermie
623
624
force: N, _N, dyn, dyne, dynes, lb, newton
625
626
length: NM, _m, _pc, astronomical unit, au, chain, ft, furlong,
627
in, inch, inches, km, league, lightyear, ls, ly,
628
m, meter, meters, mi, mil, mile, miles,
629
nautical mile, nautical miles, nmi,
630
parsec, pc, planck, yard, yard_imperical, yd, Å, ångstrøm
631
632
mass: Da, _eV, _g, bag, carat, ct, dwt, eV, electronvolt, g,
633
grain, grains, gram, grams, kilo, kilos, kt, lb, lb_av,
634
lb_t, lb_troy, lbs, ounce, ounce_av, ounce_troy, oz, oz_av, oz_t,
635
pennyweight, pound, pound_av, pound_metric, pound_troy, pounds,
636
pwt, seer, sl, slug, solar_mass, st, stone, t, tonn, tonne, tonnes, u, wey
637
638
mileage: mpg, l/100km, l/km, l/10km, lp10km, l/mil, liter_pr_100km, liter_pr_km, lp100km
639
640
money: AED, ARS, AUD, BGN, BHD, BND, BRL, BWP, CAD, CHF, CLP, CNY,
641
COP, CZK, DKK, EUR, GBP, HKD, HRK, HUF, IDR, ILS, INR, IRR,
642
ISK, JPY, KRW, KWD, KZT, LKR, LTL, LVL, LYD, MUR, MXN, MYR,
643
NOK, NPR, NZD, OMR, PHP, PKR, PLN, QAR, RON, RUB, SAR, SEK,
644
SGD, THB, TRY, TTD, TWD, USD, VEF, ZAR, BTC, LTC, mBTC, XBT
645
Currency rates are automatically updated from the net
646
at least every 24h since last update (on linux/cygwin).
647
648
numbers: dec, hex, bin, oct, roman, dozen, doz, dz, dusin, gross, gro,
649
gr, great_gross, small_gross (not supported: decimal numbers)
650
651
power: BTU, BTU/h, BTU/s, BTUph, GWhpy, J/s, Jps, MWhpy, TWhpy,
652
W, Whpy, _W, ftlb/min, ftlb/s, hk, hp, kWh/yr, kWhpy
653
654
pressure: N/m2, Pa, _Pa, at, atm, bar, mbar, pascal, psi, torr
655
656
radioactivity: Bq, becquerel, curie
657
658
speed: _m/s, km/h, km/t, kmh, kmph, kmt, m/s, mi/h, mph, mps,
659
kn, knot, knots, kt, kts, mach, machs, c, fps, ft/s, ftps
660
661
temperature: C, F, K, celsius, fahrenheit, kelvin
662
663
time: _s, biennium, century, d, day, days, decade, dy, fortnight,
664
h, hour, hours, hr, indiction, jubilee, ke, lustrum, m,
665
millennium, min, minute, minutes, mo, moment, mon, month,
666
olympiad, quarter, s, season, sec, second, seconds, shake,
667
tp, triennium, w, week, weeks, y, y365, ySI, ycommon,
668
year, years, ygregorian, yjulian, ysideral, ytropical
669
670
volume: l, L, _L, _l, cm3, m3, ft3, in3, liter, liters, litre, litres,
671
gal, gallon, gallon_imp, gallon_uk, gallon_us, gallons,
672
pint, pint_imp, pint_uk, pint_us, tsp, tablespoon, teaspoon,
673
floz, floz_uk, therm, thm, fat, bbl, Mbbl, MMbbl, drum,
674
container (or container20), container40, container40HC, container45HC
675
676
See: L
677
678
=cut
679
680
#TODO: @arr2=conv(\@arr1,"from","to") # should be way faster than:
681
#TODO: @arr2=map conv($_,"from","to"),@arr1
682
#TODO: conv(123456789,'b','h'); # h converts to something human-readable
683
684
our %conv=(
685
length=>{
686
m => 1,
687
_m => 1,
688
meter => 1,
689
meters => 1,
690
km => 1000,
691
mil => 10000, #scandinavian #also: inch/1000!
692
in => 0.0254,
693
inch => 0.0254,
694
inches => 0.0254,
695
ft => 0.0254*12, #0.3048 m
696
feet => 0.0254*12, #0.3048 m
697
yd => 0.0254*12*3, #0.9144 m
698
yard => 0.0254*12*3, #0.9144 m
699
yards => 0.0254*12*3, #0.9144 m
700
fathom => 0.0254*12*3*2, #1.8288 m
701
fathoms => 0.0254*12*3*2, #1.8288 m
702
chain => 0.0254*12*3*22, #20.1168 m
703
chains => 0.0254*12*3*22, #20.1168 m
704
furlong => 0.0254*12*3*22*10, #201.168 m
705
furlongs=> 0.0254*12*3*22*10, #201.168 m
706
mi => 0.0254*12*3*22*10*8, #1609.344 m
707
mile => 0.0254*12*3*22*10*8, #1609.344 m
708
miles => 0.0254*12*3*22*10*8,
709
league => 0.0254*12*3*22*10*8*3, #4828.032 m
710
leagues => 0.0254*12*3*22*10*8*3, #4828.032 m
711
yard_imp => 0.914398416,
712
yard_imperical => 0.914398416,
713
NM => 1852, #nautical mile
714
nmi => 1852, #nautical mile
715
'nautical mile' => 1852,
716
'nautical miles' => 1852,
717
'Å' => 1e-10,
718
'ångstrøm' => 1e-10,
719
ly => 299792458*3600*24*365.25,
720
lightyear => 299792458*3600*24*365.25, # = 9460730472580800 by def
721
ls => 299792458, #light-second
722
pc => 3.0857e16, #3.26156 ly
723
_pc => 3.0857e16, #3.26156 ly
724
parsec => 3.0857e16,
725
au => 149597870700, # by def, earth-sun
726
'astronomical unit' => 149597870700,
727
planck => 1.61619997e-35, #planck length
728
#Norwegian (old) lengths:
729
tomme => 0.0254,
730
tommer => 0.0254,
731
fot => 0.0254*12, #0.3048m
732
alen => 0.0254*12*2, #0.6096m
733
favn => 0.0254*12*2*3, #1.8288m
734
kvart => 0.0254*12*2/4, #0.1524m a quarter alen
735
},
736
mass =>{ #https://en.wikipedia.org/wiki/Unit_conversion#Mass
737
g => 1,
738
_g => 1,
739
gram => 1,
740
grams => 1,
741
kilo => 1000,
742
kilos => 1000,
743
t => 1000000,
744
tonn => 1000000,
745
tonne => 1000000,
746
tonnes => 1000000,
747
seer => 933.1,
748
lb => 453.59237,
749
lbs => 453.59237,
750
lb_av => 453.59237,
751
lb_t => 373.2417216, #5760 grains
752
lb_troy => 373.2417216,
753
pound => 453.59237,
754
pounds => 453.59237,
755
pound_av => 453.59237,
756
pound_troy => 373.2417216,
757
pound_metric => 500,
758
ounce => 28, # US food, 28g
759
ounce_av => 453.59237/16, # avoirdupois lb/16 = 28.349523125g
760
ounce_troy => 31.1034768, # lb_troy / 12
761
oz => 28, # US food, 28g
762
oz_av => 453.59237/16, # avoirdupois lb/16 = 28.349523125g
763
oz_t => 31.1034768, # lb_troy / 12,
764
grain => 64.79891/1000, # 453.59237/7000
765
grains => 64.79891/1000,
766
pennyweight => 31.1034768 / 20,
767
pwt => 31.1034768 / 20,
768
dwt => 31.1034768 / 20,
769
st => 6350.29318, # 14 lb_av
770
stone => 6350.29318,
771
wey => 114305.27724, # 252 lb = 18 stone
772
carat => 0.2,
773
ct => 0.2, #carat (metric)
774
kt => 64.79891/1000 * (3+1/6), #carat/karat
775
u => 1.66053892173e-30, #atomic mass carbon-12
776
Da => 1.66053892173e-30, #atomic mass carbon-12
777
slug => 14600,
778
sl => 14600,
779
eV => 1.78266172802679e-33, #e=mc2 = 1.60217646e-19 J / (2.99792458e8 m/s)**2
780
_eV => 1.78266172802679e-33,
781
electronvolt => 1.78266172802679e-33,
782
'solar mass' => 1.99e33,
783
solar_mass => 1.99e33,
784
bag => 60*1000, #60kg coffee
785
},
786
area =>{ # https://en.wikipedia.org/wiki/Unit_conversion#Area
787
m2 => 1,
788
dm2 => 0.1**2,
789
cm2 => 0.01**2,
790
mm2 => 0.001**2,
791
'µm2' => 1e-6**2,
792
um2 => 1e-6**2,
793
sqm => 1,
794
sqcm => 0.01**2,
795
sqmm => 0.001**2,
796
km2 => 1000**2,
797
sqkm => 1000**2,
798
a => 100,
799
ar => 100,
800
are => 100,
801
ares => 100,
802
dekar => 1000,
803
decare => 1000,
804
decares => 1000,
805
daa => 1000,
806
'mål' => 1000,
807
ha => 10000,
808
hektar => 10000,
809
hectare => 10000,
810
hectares=> 10000,
811
ft2 => (0.0254*12)**2,
812
sqft => (0.0254*12)**2,
813
mi2 => 1609.344**2,
814
sqmi => 1609.344**2,
815
yd2 => (0.0254*12*3)**2, #square yard
816
sqyd => (0.0254*12*3)**2,
817
yard2 => (0.0254*12*3)**2,
818
sqyard => (0.0254*12*3)**2,
819
rood => 1210*(0.0254*12*3)**2, # 1/4 acres
820
roods => 1210*(0.0254*12*3)**2, # 1/4 acres
821
ac => 4840*(0.0254*12*3)**2, # 4840 square yards = 1 chain x 1 furlong
822
acre => 4840*(0.0254*12*3)**2,
823
acres => 4840*(0.0254*12*3)**2,
824
homestead => 4840*(0.0254*12)**2 *160, #160 acres US Surveyors or 1/4 sqmiles
825
township => 4840*(0.0254*12)**2 *160*144, #144 homesteads or 36 sqmiles
826
perches => 4840*(0.0254*12)**2 /160, #160 perches = 1 acre in sri lanka
827
sotka => 100, #russian are
828
jerib => 10000, #iran hectare
829
djerib => 10000, #turkish hectare
830
gongqing => 10000, #chinese hectare
831
manzana => 10000, #argentinian hectare
832
bunder => 10000, #dutch hectare
833
centiare => 1,
834
deciare => 10,
835
ca => 1,
836
mu => 10000/15, #China
837
qing => 10000/0.15, #China
838
dunam => 10000/10, #Middle East
839
'dönüm' => 10000/10, #Middle East
840
stremmata => 10000/10, #Greece
841
rai => 10000/6.25, #Thailand
842
cho => 10000/1.008, #Japan
843
feddan => 10000/2.381, #Egypt
844
earths => 510072000e6, #510072000 km2, surface area of earth
845
barn => 1e-28, #physics
846
outhouse => 1e-34, #physics
847
shed => 1e-52, #physics
848
},
849
volume=>{
850
l => 1,
851
L => 1,
852
_L => 1,
853
_l => 1,
854
liter => 1,
855
liters => 1,
856
litre => 1,
857
litres => 1,
858
gal => 231*2.54**3/1000, #3.785411784, #231 cubic inches
859
gallon => 231*2.54**3/1000,
860
gallons => 231*2.54**3/1000,
861
gallon_us => 231*2.54**3/1000,
862
gallon_uk => 4.54609,
863
gallon_imp=> 4.54609,
864
gallon_us_dry => 4.40488377086, # ~ 9.25**2*pi*2.54**3/1000 L
865
m3 => 10**3, #1000 L
866
cm3 => 0.1**3, #0.001 L
867
in3 => 0.254**3, #0.016387064 L
868
ft3 => (0.254*12)**3,
869
tablespoon=> 3.785411784/256, #14.78676478125 mL
870
tsp => 3.785411784/256/3, #4.92892159375 mL
871
teaspoon => 3.785411784/256/3, #4.92892159375 mL
872
floz => 3.785411784/128, #fluid ounce US
873
floz_uk => 4.54609/160, #fluid ounce UK
874
pint => 4.54609/8, #0.56826125 L
875
pint_uk => 4.54609/8,
876
pint_imp => 4.54609/8,
877
pint_us => 3.785411784/8, #0.473176473
878
therm => 2.74e3, #? 100000BTUs? (!= thermie)
879
thm => 2.74e3, #? (!= th)
880
fat => 42*231*2.54**3/1000,
881
bbl => 42*231*2.54**3/1000, #oil barrel ~159 liters https://en.wikipedia.org/wiki/Barrel_(unit)
882
Mbbl => 42*231*2.54**3, #mille (thousand) oil barrels
883
MMbbl => 42*231*2.54**3*1000, #mille mille (million) oil barrels
884
drum => 200,
885
container => 33.1e3, #container20
886
container20 => 33.1e3,
887
container40 => 67.5e3,
888
container40HC => 75.3e3,
889
container45HC => 86.1e3,
890
#Norwegian:
891
meterfavn => 2 * 2 * 0.6, #ved 2.4 m3
892
storfavn => 2 * 2 * 3, #ved 12 m3
893
},
894
time =>{
895
s => 1,
896
_s => 1,
897
sec => 1,
898
second => 1,
899
seconds => 1,
900
m => 60,
901
min => 60,
902
minute => 60,
903
minutes => 60,
904
h => 60*60,
905
hr => 60*60,
906
hour => 60*60,
907
hours => 60*60,
908
d => 60*60*24,
909
dy => 60*60*24,
910
day => 60*60*24,
911
days => 60*60*24,
912
w => 60*60*24*7,
913
week => 60*60*24*7,
914
weeks => 60*60*24*7,
915
mo => 60*60*24 * 365.2425/12,
916
mon => 60*60*24 * 365.2425/12,
917
month => 60*60*24 * 365.2425/12,
918
quarter => 60*60*24 * 365.2425/12 * 3, #3 months
919
season => 60*60*24 * 365.2425/12 * 3, #3 months
920
y => 60*60*24 * 365.2425, # 365+97/400 #97 leap yers in 400 years
921
year => 60*60*24 * 365.2425,
922
years => 60*60*24 * 365.2425,
923
yjulian => 60*60*24 * 365.25, # 365+1/4
924
y365 => 60*60*24 * 365, # finance/science
925
ycommon => 60*60*24 * 365, # finance/science
926
ygregorian => 60*60*24 * 365.2425, # 365+97/400
927
#ygaussian => 365+(6*3600+9*60+56)/(24*3600), # 365+97/400
928
ytropical => 60*60*24 * 365.24219,
929
ysideral => 365.256363004,
930
ySI => 60*60*24*365.25, #31556925.9747
931
decade => 10 * 60*60*24*365.2425,
932
biennium => 2 * 60*60*24*365.2425,
933
triennium => 3 * 60*60*24*365.2425,
934
olympiad => 4 * 60*60*24*365.2425,
935
lustrum => 5 * 60*60*24*365.2425,
936
indiction => 15 * 60*60*24*365.2425,
937
jubilee => 50 * 60*60*24*365.2425,
938
century => 100 * 60*60*24*365.2425,
939
millennium => 1000 * 60*60*24*365.2425,
940
shake => 1e-8,
941
moment => 3600/40, #1/40th of an hour, used by Medieval Western European computists
942
ke => 864, #1/100th of a day, trad Chinese, 14m24s
943
fortnight => 14*24*3600,
944
tp => 5.3910632e-44, #planck time, time for ligth to travel 1 planck length
945
},
946
speed=>{
947
'm/s' => 1,
948
'_m/s' => 1,
949
mps => 1,
950
mph => 1609.344/3600,
951
'mi/h' => 1609.344/3600,
952
kmh => 1/3.6,
953
kmph => 1/3.6,
954
'km/h' => 1/3.6,
955
kmt => 1/3.6, # t=time or temps (scandinavian and french and dutch)
956
'km/t' => 1/3.6,
957
kt => 1852/3600,
958
kts => 1852/3600,
959
kn => 1852/3600,
960
knot => 1852/3600,
961
knots => 1852/3600,
962
knop => 1852/3600, #scandinavian
963
c => 299792458, #speed of light
964
mach => 340.3, #speed of sound
965
machs => 340.3,
966
fps => 0.3048, #0.0254*12
967
ftps => 0.3048,
968
'ft/s' => 0.3048,
969
},
970
acceleration=>{
971
'm/s2' => 1,
972
'mps2' => 1,
973
g => 9.80665,
974
g0 => 9.80665,
975
#0-100kmh or ca 0-60 mph x seconds...
976
},
977
temperature=>{ #http://en.wikipedia.org/wiki/Temperature#Conversion
978
C=>1, F=>1, K=>1, celsius=>1, fahrenheit=>1, kelvin=>1
979
},
980
radioactivity=>{
981
Bq => 1,
982
becquerel => 1,
983
curie => 3.7e10,
984
},
985
current=> {
986
A => 1,
987
_A => 1,
988
'N/m2' => 2e-7,
989
},
990
charge=>{
991
e => 1,
992
_e => 1,
993
C => 6.24150964712042e+18,
994
coulomb => 6.24150964712042e+18,
995
As => 6.24150964712042e+18,
996
#Faraday unit of charge ???
997
},
998
power=> {
999
W => 1,
1000
_W => 1,
1001
'J/s' => 1,
1002
Jps => 1,
1003
hp => 746,
1004
hk => 746, #hestekrefter (norwegian, scandinavian)
1005
PS => 746/1.014, #pferdestärken
1006
'kWh/yr' => 1000 * 3600/(24*365), #kWh annually
1007
Whpy => 3600/(24*365), #kWh annually
1008
kWhpy => 1000 * 3600/(24*365), #kWh annually
1009
MWhpy => 1000**2 * 3600/(24*365), #kWh annually
1010
GWhpy => 1000**3 * 3600/(24*365), #kWh annually
1011
TWhpy => 1000**4 * 3600/(24*365), #kWh annually
1012
BTU => 1055.05585262/3600, #
1013
BTUph => 1055.05585262/3600,
1014
'BTU/h' => 1055.05585262/3600,
1015
'BTU/s' => 1055.05585262,
1016
'ftlb/s' => 746/550,
1017
'ftlb/min'=> 746/550/60,
1018
},
1019
energy=>{
1020
joule => 1,
1021
J => 1,
1022
_J => 1,
1023
Ws => 1,
1024
Wps => 1,
1025
'W/s' => 1,
1026
Nm => 1,
1027
newtonmeter => 1,
1028
newtonmeters => 1,
1029
Wh => 3600,
1030
kWh => 3600000, #3.6 million J
1031
cal => 4.1868, # ~ 3600/860
1032
calorie => 4.1868,
1033
calories => 4.1868,
1034
kcal => 4.1868*1000,
1035
kilocalorie => 4.1868*1000,
1036
kilocalories => 4.1868*1000,
1037
BTU => 4.1868 * 252, # = 1055.0736 or is 1055.05585262 right?
1038
Btu => 4.1868 * 252,
1039
ftlb => 746/550, # ~ 1/0.7375621
1040
'foot-pound' => 746/550,
1041
'foot-pounds' => 746/550,
1042
erg => 1e-7,
1043
ergs => 1e-7,
1044
eV => 1.60217656535e-19,
1045
_eV => 1.60217656535e-19,
1046
BeV => 1.60217656535e-10,
1047
electronvolt => 1.60217656535e-19,
1048
thermie => 4.1868e6,
1049
th => 4.1868e6,
1050
hph => 3600*746,
1051
PSh => 3600*746/1.014,
1052
galatm_imp => 460.63256925,
1053
galatm_US => 383.5568490138,
1054
quad => 1.05505585262e18,
1055
Ry => 2.179872e-18,
1056
rydberg => 2.179872e-18,
1057
th => 4.1868e6,
1058
thermie => 4.1868e6,
1059
boe => 6.12e9, #barrel of oil equivalent
1060
TCE => 29.288e9, #ton of coal equivalent
1061
toe => 41.868e9, #tonne of oil equivalent
1062
tTNT => 4.184e9, #ton of TNT equivalent
1063
},
1064
force=> {
1065
newton=> 1,
1066
N => 1,
1067
_N => 1,
1068
dyn => 1e-5,
1069
dyne => 1e-5,
1070
dynes => 1e-5,
1071
lb => 4.448222,
1072
},
1073
pressure=>{
1074
Pa => 1,
1075
_Pa => 1,
1076
pascal => 1,
1077
'N/m2' => 1,
1078
bar => 100000.0,
1079
mbar => 100.0,
1080
at => 98066.5, #technical atmosphere
1081
atm => 101325.0, #standard atmosphere
1082
torr => 133.3224,
1083
psi => 6894.8, #pounds per square inch
1084
},
1085
bytes=> {
1086
b => 1,
1087
kb => 1024, #2**10
1088
mb => 1024**2, #2**20 = 1048576
1089
gb => 1024**3, #2**30 = 1073741824
1090
tb => 1024**4, #2**40 = 1099511627776
1091
pb => 1024**5, #2**50 = 1.12589990684262e+15
1092
eb => 1024**6, #2**60 =
1093
zb => 1024**7, #2**70 =
1094
yb => 1024**8, #2**80 =
1095
KiB => 1024, #2**10
1096
KiB => 1024**2, #2**20 = 1048576
1097
KiB => 1024**3, #2**30 = 1073741824
1098
KiB => 1024**4, #2**40 = 1099511627776
1099
KiB => 1024**5, #2**50 = 1.12589990684262e+15
1100
KiB => 1024**6, #2**60 =
1101
KiB => 1024**7, #2**70 =
1102
KiB => 1024**8, #2**80 =
1103
Kb => 1000, #2**10
1104
Mb => 1000**2, #2**20 = 1048576
1105
Gb => 1000**3, #2**30 = 1073741824
1106
Tb => 1000**4, #2**40 = 1099511627776
1107
Pb => 1000**5, #2**50 = 1.12589990684262e+15
1108
Eb => 1000**6, #2**60 =
1109
Zb => 1000**7, #2**70 =
1110
Yb => 1000**8, #2**80 =
1111
byte => 1,
1112
kilobyte => 1024, #2**10
1113
megabyte => 1024**2, #2**20 = 1048576
1114
gigabyte => 1024**3, #2**30 = 1073741824
1115
terabyte => 1024**4, #2**40 = 1099511627776
1116
petabyte => 1024**5, #2**50 = 1.12589990684262e+15
1117
exabyte => 1024**6, #2**60 =
1118
zettabyte => 1024**7, #2**70 =
1119
yottabyte => 1024**8, #2**80 =
1120
},
1121
milage=>{ #fuel consumption
1122
'l/mil' => 1,
1123
'l/10km' => 1,
1124
'lp10km' => 1,
1125
'l/km' => 10,
1126
'l/100km' => 1/10,
1127
lp100km => 1/10,
1128
liter_pr_100km => 1/10,
1129
liter_pr_km => 10,
1130
mpg => -23.5214584, #negative signals inverse
1131
},
1132
# light=> {
1133
# cd => 1,
1134
# candela => 1,
1135
# },
1136
# lumens
1137
# lux
1138
angle =>{
1139
turn => 1,
1140
rad => 1/(2*$PI), # 2 * pi
1141
radian => 1/(2*$PI), # 2 * pi
1142
radians => 1/(2*$PI), # 2 * pi
1143
deg => 1/360, # 4 * 90
1144
degree => 1/360, # 4 * 90
1145
degrees => 1/360, # 4 * 90
1146
grad => 1/400,
1147
gradian => 1/400,
1148
gradians => 1/400,
1149
grade => 1/400, #french revolutionary unit
1150
gon => 1/400,
1151
new_degree => 1/400,
1152
nygrad => 1/400, #scandinavian
1153
quadrant => 1/4,
1154
sextant => 1/6,
1155
hour => 1/24,
1156
point => 1/32, #used in navigation
1157
hexacontade => 1/60,
1158
binary_degree => 1/256,
1159
binary_radian => 1/256,
1160
brad => 1/256,
1161
},
1162
money =>{ # rates at dec 17 2015
1163
AED => 2.389117, #
1164
ARS => 0.895122, #
1165
AUD => 6.253619, #
1166
BGN => 4.847575, #
1167
BHD => 23.267384, #
1168
BND => 6.184624, #
1169
BRL => 2.260703, #
1170
BTC => 3910.932213547, #bitcoin
1171
BWP => 0.794654, #
1172
CAD => 6.289957, #
1173
CHF => 8.799974, #
1174
CLP => 0.012410, #
1175
CNY => 1.353406, #
1176
COP => 0.00262229, #
1177
CZK => 0.351171, #
1178
DKK => 1.271914, #
1179
EUR => 9.489926, #
1180
GBP => 13.069440, #
1181
HKD => 1.131783, #
1182
HRK => 1.240878, #
1183
HUF => 0.029947, #
1184
IDR => 0.00062471, #
1185
ILS => 2.254456, #
1186
INR => 0.132063, #
1187
IRR => 0.00029370, #
1188
ISK => 0.067245, #
1189
JPY => 0.071492, #
1190
KRW => 0.00739237, #
1191
KWD => 28.862497, #
1192
KZT => 0.027766, #
1193
LKR => 0.061173, #
1194
LTC => 31.78895354018, #litecoin
1195
LTL => 2.748472, #
1196
LVL => 13.503025, #
1197
LYD => 6.296978, #
1198
MUR => 0.240080, #
1199
MXN => 0.515159, #
1200
MYR => 2.032465, #
1201
NOK => 1.000000000, #norwegian kroner
1202
NPR => 0.084980, #
1203
NZD => 5.878331, #
1204
OMR => 22.795994, #
1205
PHP => 0.184839, #
1206
PKR => 0.083779, #
1207
PLN => 2.207243, #
1208
QAR => 2.409162, #
1209
RON => 2.101513, #
1210
RUB => 0.122991, #
1211
SAR => 2.339745, #
1212
SEK => 1.023591, #
1213
SGD => 6.184624, #
1214
THB => 0.242767, #
1215
TRY => 2.994338, #
1216
TTD => 1.374484, #
1217
TWD => 0.265806, #
1218
USD => 8.774159, #
1219
VEF => 1.395461, #
1220
ZAR => 0.576487, #
1221
XBT => 3910.932213547, # bitcoin
1222
mBTC => 3910.932213547, # bitcoin
1223
mXBT => 3910.932213547, # bitcoin
1224
},
1225
numbers =>{
1226
dec=>1,hex=>1,bin=>1,oct=>1,roman=>1, des=>1,#des: spelling error in v0.15-0.16
1227
dusin=>1,dozen=>1,doz=>1,dz=>1,gross=>144,gr=>144,gro=>144,great_gross=>12*144,small_gross=>10*12,
1228
}
1229
);
1230
our $conv_prepare_time=0;
1231
our $conv_prepare_money_time=0;
1232
sub conv_prepare {
1233
1
1
0
9
my %b =(da =>1e+1, h =>1e+2, k =>1e+3, M =>1e+6, G =>1e+9, T =>1e+12, P =>1e+15, E =>1e+18, Z =>1e+21, Y =>1e+24, H =>1e+27);
1234
1
6
my %big =(deca=>1e+1, hecto=>1e+2, kilo =>1e+3, mega =>1e+6, giga=>1e+9, tera=>1e+12, peta =>1e+15, exa =>1e+18, zetta=>1e+21, yotta=>1e+24, hella=>1e+27);
1235
1
6
my %s =(d =>1e-1, c =>1e-2, m =>1e-3,'µ' =>1e-6, u=>1e-6, n =>1e-9, p =>1e-12, f =>1e-15, a =>1e-18, z =>1e-21, y =>1e-24);
1236
1
8
my %small=(deci=>1e-1, centi=>1e-2, milli=>1e-3, micro =>1e-6, nano=>1e-9, pico=>1e-12, femto=>1e-15, atto=>1e-18, zepto=>1e-21, yocto=>1e-24);
1237
# myria=> 10000 #obsolete
1238
# demi => 1/2, double => 2 #obsolete
1239
# lakh => 1e5, crore => 1e7 #south asian
1240
1
8
my %x = (%s,%b);
1241
1
7
for my $type (keys%conv) {
1242
20
13
for(grep/^_/,keys%{$conv{$type}}) {
20
178
1243
15
15
my $c=$conv{$type}{$_};
1244
15
10
delete$conv{$type}{$_};
1245
15
14
my $unit=substr($_,1);
1246
15
288
$conv{$type}{$_.$unit}=$x{$_}*$c for keys%x;
1247
}
1248
}
1249
1
11
$conv_prepare_time=time();
1250
}
1251
1252
our $Currency_rates_url = 'http://calthis.com/currency-rates';
1253
our $Currency_rates_expire = 6*3600;
1254
sub conv_prepare_money {
1255
0
0
0
0
eval {
1256
0
0
require LWP::Simple;
1257
0
0
0
my $td=$^O=~/^(?:linux|cygwin)$/?"/tmp":"/tmp"; #hm wrong!
1258
0
0
my $fn="$td/acme-tools-currency-rates.data";
1259
0
0
0
0
if( !-e$fn or time() - (stat($fn))[9] >= $Currency_rates_expire){
1260
0
0
LWP::Simple::getstore($Currency_rates_url,"$fn.$$.tmp"); # get ... see getrates.cmd
1261
0
0
0
die "nothing downloaded" if !-s"$fn.$$.tmp";
1262
0
0
rename "$fn.$$.tmp",$fn;
1263
0
0
chmod 0666,$fn;
1264
}
1265
0
0
my $d=readfile($fn);
1266
0
0
my %r=$d=~/^\s*([A-Z]{3}) +(\d+\.\d+)\b/gm;
1267
0
0
$r{lc($_)}=$r{$_} for keys%r;
1268
#warn serialize([minus([sort keys(%r)],[sort keys(%{$conv{money}})])],'minus'); #ARS,AED,COP,BWP,LVL,BHD,NPR,LKR,QAR,KWD,LYD,SAR,KZT,CLP,IRR,VEF,TTD,OMR,MUR,BND
1269
#warn serialize([minus([sort keys(%{$conv{money}})],[sort keys(%r)])],'minus'); #LTC,I44,BTC,BYR,TWI,NOK,XDR
1270
0
0
0
$conv{money}={%{$conv{money}},%r} if keys(%r)>20;
0
0
1271
};
1272
0
0
0
carp "conv: conv_prepare_money (currency conversion automatic daily updated rates) - $@\n" if $@;
1273
0
0
$conv{money}{"m$_"}=$conv{money}{$_}/1000 for qw/BTC XBT/;
1274
0
0
$conv_prepare_money_time=time();
1275
0
0
1; #not yet
1276
}
1277
1278
sub conv {
1279
43
43
1
6725
my($num,$from,$to)=@_;
1280
43
50
76
croak "conf requires 3 args" if @_!=3;
1281
43
100
65
conv_prepare() if !$conv_prepare_time;
1282
43
86
103
my $types=sub{ my $unit=shift; [sort grep$conv{$_}{$unit}, keys%conv] };
86
78
86
911
1283
43
33
56
my @types=map{ my $ru=$_; my $r;$r=&$types($_) and @$r and $$ru=$_ and last for ($$ru,uc($$ru),lc($$ru)); $r }(\$from,\$to);
86
33
53
86
50
52
86
143
86
161
1284
43
50
my @err=map "Unit ".[$from,$to]->[$_]." is unknown",grep!@{$types[$_]},0..1;
86
84
1285
43
54
my @type=intersect(@types);
1286
43
50
62
push @err, "from=$from and to=$to has more than one possible conversions: ".join(", ", @type) if @type>1;
1287
push @err, "from $from (".(join(",",@{$types[0]})||'?').") and "
1288
43
50
0
51
."to $to (" .(join(",",@{$types[1]})||'?').") has no known common unit type.\n" if @type<1;
0
1289
43
50
58
croak join"\n",map"conv: $_",@err if @err;
1290
43
33
my $type=$type[0];
1291
43
50
66
80
conv_prepare_money() if $type eq 'money' and time() >= $conv_prepare_money_time + $Currency_rates_expire;
1292
43
100
60
return conv_temperature(@_) if $type eq 'temperature';
1293
36
100
43
return conv_numbers(@_) if $type eq 'numbers';
1294
31
20
my $c=$conv{$type};
1295
31
26
my($cf,$ct)=@{$conv{$type}}{$from,$to};
31
40
1296
31
100
100
141
my $r= $cf>0 && $ct<0 ? -$ct/$num/$cf
100
66
1297
: $cf<0 && $ct>0 ? -$cf/$num/$ct
1298
: $cf*$num/$ct;
1299
# print STDERR "$num $from => $to from=$ff to=$ft r=$r\n";
1300
31
106
return $r;
1301
}
1302
1303
sub conv_temperature { #http://en.wikipedia.org/wiki/Temperature#Conversion
1304
7
7
0
22
my($t,$from,$to)=(shift(),map uc(substr($_,0,1)),@_);
1305
7
100
16
$from=~s/K/C/ and $t-=273.15;
1306
#$from=~s/R/F/ and $t-=459.67; #rankine
1307
7
100
15
return $t if $from eq $to;
1308
1
1
7
{CK=>sub{$t+273.15},
1309
3
3
22
FC=>sub{($t-32)*5/9},
1310
2
2
15
CF=>sub{$t*9/5+32},
1311
0
0
0
FK=>sub{($t-32)*5/9+273.15},
1312
6
71
}->{$from.$to}->();
1313
}
1314
1315
sub conv_numbers {
1316
5
5
0
6
my($n,$fr,$to)=@_;
1317
5
0
34
my $dec=$fr eq 'dec' ? $n
50
50
50
50
50
50
100
50
100
50
1318
:$fr eq 'hex' ? hex($n)
1319
:$fr eq 'oct' ? oct($n)
1320
:$fr eq 'bin' ? oct("0b$n")
1321
:$fr =~ /^(dusin|dozen|doz|dz)$/ ? $n*12
1322
:$fr =~ /^(gross|gr|gro)$/ ? $n*144
1323
:$fr eq 'great_gross' ? $n*12*144
1324
:$fr eq 'small_gross' ? $n*12*10
1325
:$fr eq 'skokk' ? $n*60 #norwegian unit
1326
:$fr eq 'roman' ? roman2int($n)
1327
:$fr eq 'des' ? $n
1328
:croak "Conv from $fr not supported yet";
1329
5
50
40
my $ret=$to eq 'dec' ? $dec
100
50
50
50
50
50
100
100
50
50
1330
:$to eq 'hex' ? sprintf("%x",$dec)
1331
:$to eq 'oct' ? sprintf("%o",$dec)
1332
:$to eq 'bin' ? sprintf("%b",$dec)
1333
:$to =~ /^(dusin|dozen|doz|dz)$/ ? $dec/12
1334
:$to =~ /^(gross|gr|gro)$/ ? $dec/144
1335
:$to eq 'great_gross' ? $dec/(12*144)
1336
:$to eq 'small_gross' ? $dec/(12*10)
1337
:$to eq 'skokk' ? $dec/60
1338
:$to eq 'roman' ? int2roman($dec)
1339
:$to eq 'des' ? $dec
1340
:croak "Conv to $to not suppoerted yet";
1341
5
19
$ret;
1342
}
1343
#http://en.wikipedia.org/wiki/Norwegian_units_of_measurement
1344
1345
1346
=head2 bytes_readable
1347
1348
Converts a number of bytes to something human readable.
1349
1350
Input 1: a number
1351
1352
Input 2: optionally the number of decimals if >1000 B. Default is 2.
1353
1354
Output: a string containing:
1355
1356
the number with a B behind if the number is less than 1000
1357
1358
the number divided by 1024 with two decimals and "kB" behind if the number is less than 1024*1000
1359
1360
the number divided by 1048576 with two decimals and "MB" behind if the number is less than 1024*1024*1000
1361
1362
the number divided by 1073741824 with two decimals and "GB" behind if the number is less than 1024*1024*1024*1000
1363
1364
the number divided by 1099511627776 with two decimals and "TB" behind otherwise
1365
1366
Examples:
1367
1368
print bytes_readable(999); # 999 B
1369
print bytes_readable(1000); # 1000 B
1370
print bytes_readable(1001); # 0.98 kB
1371
print bytes_readable(1024); # 1.00 kB
1372
print bytes_readable(1153433.6); # 1.10 MB
1373
print bytes_readable(1181116006.4); # 1.10 GB
1374
print bytes_readable(1209462790553.6); # 1.10 TB
1375
print bytes_readable(1088516511498.24*1000); # 990.00 TB
1376
print bytes_readable(1088516511498.24*1000,3); # 990.000 TB
1377
print bytes_readable(1088516511498.24*1000,1); # 990.0 TB
1378
1379
=cut
1380
1381
sub bytes_readable {
1382
32
32
1
2292
my $bytes=shift();
1383
32
100
100
my $d=shift()||2; #decimals
1384
32
50
44
return undef if !defined $bytes;
1385
32
100
104
return "$bytes B" if abs($bytes) <= 2** 0*1000; #bytes
1386
24
100
177
return sprintf("%.*f kB",$d,$bytes/2**10) if abs($bytes) < 2**10*1000; #kilobyte
1387
8
100
25
return sprintf("%.*f MB",$d,$bytes/2**20) if abs($bytes) < 2**20*1000; #megabyte
1388
6
100
24
return sprintf("%.*f GB",$d,$bytes/2**30) if abs($bytes) < 2**30*1000; #gigabyte
1389
4
50
40
return sprintf("%.*f TB",$d,$bytes/2**40) if abs($bytes) < 2**40*1000; #terrabyte
1390
0
0
return sprintf("%.*f PB",$d,$bytes/2**50); #petabyte, exabyte, zettabyte, yottabyte
1391
}
1392
1393
=head2 sec_readable
1394
1395
Time written as C< 14h 37m > is often more humanly comprehensible than C< 52620 seconds >.
1396
1397
print sec_readable( 0 ); # 0s
1398
print sec_readable( 0.0123 ); # 0.0123s
1399
print sec_readable(-0.0123 ); # -0.0123s
1400
print sec_readable( 1.23 ); # 1.23s
1401
print sec_readable( 1 ); # 1s
1402
print sec_readable( 9.87 ); # 9.87s
1403
print sec_readable( 10 ); # 10s
1404
print sec_readable( 10.1 ); # 10.1s
1405
print sec_readable( 59 ); # 59s
1406
print sec_readable( 59.123 ); # 59.1s
1407
print sec_readable( 60 ); # 1m 0s
1408
print sec_readable( 60.1 ); # 1m 0s
1409
print sec_readable( 121 ); # 2m 1s
1410
print sec_readable( 131 ); # 2m 11s
1411
print sec_readable( 1331 ); # 22m 11s
1412
print sec_readable(-1331 ); # -22m 11s
1413
print sec_readable( 13331 ); # 3h 42m
1414
print sec_readable( 133331 ); # 1d 13h
1415
print sec_readable( 1333331 ); # 15d 10h
1416
print sec_readable( 13333331 ); # 154d 7h
1417
print sec_readable( 133333331 ); # 4yr 82d
1418
print sec_readable( 1333333331 ); # 42yr 91d
1419
1420
=cut
1421
1422
sub sec_readable {
1423
25
25
1
3759
my $s=shift();
1424
25
32
my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
1425
25
100
100
275
!defined$s ? undef
100
100
100
100
100
50
50
1426
:!length($s) ? ''
1427
:$s<0 ? '-'.sec_readable(-$s)
1428
:$s<60 && int($s)==$s
1429
? $s."s"
1430
:$s<60 ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
1431
:$s<3600 ? int($s/60)."m " .($s%60) ."s"
1432
:$s<24*3600 ? int($s/$h)."h " .int(($s%$h)/60)."m"
1433
:$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
1434
: int($s/$y)."yr ".int(($s%$y)/$d)."d";
1435
}
1436
1437
=head2 int2roman
1438
1439
Converts integers to roman numbers.
1440
1441
B
1442
1443
print int2roman(1234); # prints MCCXXXIV
1444
print int2roman(1971); # prints MCMLXXI
1445
1446
(Adapted subroutine from Peter J. Acklam, jacklam(&)math.uio.no)
1447
1448
I = 1
1449
V = 5
1450
X = 10
1451
L = 50
1452
C = 100 (centum)
1453
D = 500
1454
M = 1000 (mille)
1455
1456
See also L.
1457
1458
See L for more.
1459
1460
=head2 roman2int
1461
1462
roman2int("MCMLXXI") == 1971
1463
1464
=cut
1465
1466
#alternative algorithm: http://www.rapidtables.com/convert/number/how-number-to-roman-numerals.htm
1467
sub int2roman {
1468
1161
1161
1
5631
my($n,@p)=(shift,[],[1],[1,1],[1,1,1],[1,2],[2],[2,1],[2,1,1],[2,1,1,1],[1,3],[3]);
1469
!defined($n)? undef
1470
: !length($n) ? ""
1471
: int($n)!=$n ? croak"int2roman: $n is not an integer"
1472
: $n==0 ? ""
1473
: $n<0 ? "-".int2roman(-$n)
1474
: $n>3999 ? "M".int2roman($n-1000)
1475
1161
100
3823
: join'',@{[qw/I V X L C D M/]}[map{my$i=$_;map($_+5-$i*2,@{$p[$n/10**(3-$i)%10]})}(0..3)];
913
100
3597
3652
100
2158
3652
50
1918
3652
100
5908
100
1476
}
1477
1478
sub roman2int {
1479
1017
1017
1
4310
my($r,$n,%c)=(shift,0,'',0,qw/I 1 V 5 X 10 L 50 C 100 D 500 M 1000/);
1480
1017
100
66
2767
$r=~s/^-//?-roman2int($r):
1481
$r=~s/(C?)([DM])|(X?)([LCDM])|(I?)([VXLCDM])|(I)|(.)/
1482
croak "roman2int: Invalid number $r" if $8;
1483
$n += $c{$2||$4||$6||$7} - $c{$1||$3||$5||''}; ''/eg && $n
1484
}
1485
1486
#sub roman2int_slow {
1487
# my $r=shift;
1488
# $r=~s,^\-,, ? 0-roman2int($r)
1489
# : $r=~s,^M,,i ? 1000+roman2int($r)
1490
# : $r=~s,^CM,,i ? 900+roman2int($r)
1491
# : $r=~s,^D,,i ? 500+roman2int($r)
1492
# : $r=~s,^CD,,i ? 400+roman2int($r)
1493
# : $r=~s,^C,,i ? 100+roman2int($r)
1494
# : $r=~s,^XC,,i ? 90+roman2int($r)
1495
# : $r=~s,^L,,i ? 50+roman2int($r)
1496
# : $r=~s,^XL,,i ? 40+roman2int($r)
1497
# : $r=~s,^X,,i ? 10+roman2int($r)
1498
# : $r=~s,^IX,,i ? 9+roman2int($r)
1499
# : $r=~s,^V,,i ? 5+roman2int($r)
1500
# : $r=~s,^IV,,i ? 4+roman2int($r)
1501
# : $r=~s,^I,,i ? 1+roman2int($r)
1502
# : !length($r) ? 0
1503
# : croak "Invalid roman number $r";
1504
#}
1505
1506
=head2 distance
1507
1508
B the four decimal numbers of two GPS positions: latutude1, longitude1, latitude2, longitude2
1509
1510
B the air distance in meters between the two points
1511
1512
Calculation is done using the Haversine Formula for spherical distance:
1513
1514
a = sin((lat2-lat1)/2)^2
1515
+ sin((lon2-lon1)/2)^2 * cos(lat1) * cos(lat2);
1516
1517
c = 2 * atan2(min(1,sqrt(a)),
1518
min(1,sqrt(1-a)))
1519
1520
distance = c * R
1521
1522
With earth radius set to:
1523
1524
R = Re - (Re-Rp) * sin(abs(lat1+lat2)/2)
1525
1526
Where C (equatorial radius) and C (polar radius).
1527
1528
B
1529
1530
my @oslo = ( 59.93937, 10.75135); # oslo in norway
1531
my @rio = (-22.97673, -43.19508); # rio in brazil
1532
1533
printf "%.1f km\n", distance(@oslo,@rio)/1000; # 10431.7 km
1534
printf "%.1f km\n", distance(@rio,@oslo)/1000; # 10431.7 km
1535
printf "%.1f nmi\n", distance(@oslo,@rio)/1852.000; # 5632.7 nmi (nautical miles)
1536
printf "%.1f miles\n",distance(@oslo,@rio)/1609.344; # 6481.9 miles
1537
printf "%.1f miles\n",conv(distance(@oslo,@rio),"meters","miles");# 6481.9 miles
1538
1539
See L
1540
1541
and L
1542
1543
and L
1544
1545
and L, but Acme::Tools::distance() is about 8 times faster.
1546
1547
=cut
1548
1549
our $Distance_factor = $PI / 180;
1550
0
0
0
0
sub acos { atan2( sqrt(1 - $_[0] * $_[0]), $_[0] ) }
1551
sub distance_great_circle {
1552
0
0
0
0
my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
1553
0
0
my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1554
0
0
my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
1555
0
0
return $R*acos(sin($lat1)*sin($lat2)+cos($lat1)*cos($lat2)*cos($lon2-$lon1))
1556
}
1557
1558
sub distance {
1559
4
4
1
786
my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
1560
4
35
my $a= sin(($lat2-$lat1)/2)**2
1561
+ sin(($lon2-$lon1)/2)**2 * cos($lat1) * cos($lat2);
1562
4
50
5
my $sqrt_a =sqrt($a); $sqrt_a =1 if $sqrt_a >1;
4
10
1563
4
50
5
my $sqrt_1ma=sqrt(1-$a); $sqrt_1ma=1 if $sqrt_1ma>1;
4
8
1564
4
16
my $c=2*atan2($sqrt_a,$sqrt_1ma);
1565
4
4
my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1566
4
6
my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
1567
4
14
return $c*$R;
1568
}
1569
1570
1571
=head2 big
1572
1573
=head2 bigi
1574
1575
=head2 bigf
1576
1577
=head2 bigr
1578
1579
=head2 bigscale
1580
1581
big, bigi, bigf, bigr and bigscale are sometimes convenient shorthands for using
1582
C<< Math::BigInt->new() >>, C<< Math::BigFloat->new() >> and C<< Math::BigRat->new() >>
1583
(preferably with the GMP for faster calculations). Examples:
1584
1585
my $num1 = big(3); #returns a new Math::BigInt-object
1586
my $num2 = big('3.0'); #returns a new Math::BigFloat-object
1587
my $num3 = big(3.0); #returns a new Math::BigInt-object
1588
my $num4 = big(3.1); #returns a new Math::BigFloat-object
1589
my $num5 = big('2/7'); #returns a new Math::BigRat-object
1590
my($i1,$f1,$i2,$f2) = big(3,'3.0',3.0,3.1); #returns the four new numbers, as the above four lines
1591
#uses wantarray
1592
1593
print 2**200; # 1.60693804425899e+60
1594
print big(2)**200; # 1606938044258990275541962092341162602522202993782792835301376
1595
print 2**big(200); # 1606938044258990275541962092341162602522202993782792835301376
1596
print big(2**200); # 1606938044258990000000000000000000000000000000000000000000000
1597
1598
print 1/7; # 0.142857142857143
1599
print 1/big(7); # 0 because of integer arithmetics
1600
print 1/big(7.0); # 0 because 7.0 is viewed as an integer
1601
print 1/big('7.0'); # 0.1428571428571428571428571428571428571429
1602
print 1/bigf(7); # 0.1428571428571428571428571428571428571429
1603
print bigf(1/7); # 0.142857142857143 probably not what you wanted
1604
1605
print 1/bigf(7); # 0.1428571428571428571428571428571428571429
1606
bigscale(80); # for increased precesion (default is 40)
1607
print 1/bigf(7); # 0.14285714285714285714285714285714285714285714285714285714285714285714285714285714
1608
1609
In C the characters C<< . >> and C<< / >> will make it return a
1610
Math::BigFloat- and Math::BigRat-object accordingly. Or else a Math::BigInt-object is returned.
1611
1612
Instead of guessing, use C, C and C to return what you want.
1613
1614
B Acme::Tools does not depend on Math::BigInt and
1615
Math::BigFloat and GMP, but these four big*-subs do (by C).
1616
To use big, bigi, bigf and bigr effectively you should
1617
install Math::BigInt::GMP and Math::BigFloat::GMP like this:
1618
1619
sudo cpanm Math::BigFloat Math::GMP Math::BingInt::GMP # or
1620
sudo cpan Math::BigFloat Math::GMP Math::BingInt::GMP # or
1621
sudo yum install perl-Math-BigInt-GMP perl-Math-GMP # on RedHat, RHEL or
1622
sudo apt-get install libmath-bigint-gmp-perl libmath-gmp-perl # on Ubuntu or some other way
1623
1624
Unless GMP is installed for perl like this, the Math::Big*-modules
1625
will fall back to using similar but slower built in modules. See: L
1626
1627
=cut
1628
1629
sub bigi {
1630
9
100
9
1
284
eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
2
2
1013
2
16907
2
229
1631
9
100
10344
if (wantarray) { return (map Math::BigInt->new($_),@_) }
2
13
1632
7
24
else { return Math::BigInt->new($_[0]) }
1633
}
1634
sub bigf {
1635
7
100
7
1
1677
eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
2
2
894
2
16754
2
396
1636
7
100
1175
if (wantarray) { return (map Math::BigFloat->new($_),@_) }
2
11
1637
5
19
else { return Math::BigFloat->new($_[0]) }
1638
}
1639
sub bigr {
1640
17
100
17
1
15042
eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
2
2
645
2
11870
2
406
1641
17
100
1657
if (wantarray) { return (map Math::BigRat->new($_),@_) }
7
33
1642
10
36
else { return Math::BigRat->new($_[0]) }
1643
}
1644
sub big {
1645
wantarray
1646
12
50
12
1
4904
? (map /\./ ? bigf($_) : /\// ? bigr($_) : bigi($_), @_)
100
100
100
100
1647
: $_[0]=~/\./ ? bigf($_[0]) : $_[0]=~/\// ? bigr($_[0]) : bigi($_[0]);
1648
}
1649
sub bigscale {
1650
4
100
4
1
3517
@_==1 or croak "bigscale requires one and only one argument";
1651
2
4
my $scale=shift();
1652
2
50
7
eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
1653
2
50
8
eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
1654
2
50
8
eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
1655
2
9
Math::BigInt->div_scale($scale);
1656
2
37
Math::BigFloat->div_scale($scale);
1657
2
24
Math::BigRat->div_scale($scale);
1658
2
18
return;
1659
}
1660
1661
#my $R_authalic=6371007.2; #earth radius in meters, mean, Authalic radius, real R varies 6353-6384km, http://en.wikipedia.org/wiki/Earth_radius
1662
#*)
1663
# ( 6378157.5, 6356772.2 ) #hmm
1664
#my $e=0.081819218048345;#sqrt(1 - $b**2/$a**2); #eccentricity of the ellipsoid
1665
#my($a,$b)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1666
#warn "e=$e\n";
1667
#warn "t=".(1 - $e**2)."\n";
1668
#warn "n=".((1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5)."\n";
1669
#my $t=1 - $e**2;
1670
#my $n=(1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5;
1671
#warn "t=$t\n";
1672
#warn "n=$n\n";
1673
#$a * (1 - $e**2) / ((1 - $e**2 * sin(($lat1+$lat2)/2)**2)**1.5); #hmm avg lat
1674
#$R=$a * $t/$n;
1675
1676
#=head2 fractional
1677
#=cut
1678
1679
sub fractional { #http://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/lil1.html
1680
0
0
0
0
carp "fractional: NOT FINISHED";
1681
0
0
my $n=shift;
1682
0
0
print "----fractional n=$n\n";
1683
0
0
my $nn=$n; my $dec;
0
0
1684
0
0
$nn=~s,\.(\d+)$,$dec=length($1);$1.,;
1685
0
0
my $l;
1686
0
0
my $max=0;
1687
0
0
my($te,$ne);
1688
0
0
for(1..length($nn)/2){
1689
0
0
0
if( $nn=~/^(\d*?)((.{$_})(\3)+)$/ ){
1690
0
0
print "_ = $_ ".length($2)."\n";
1691
0
0
0
if(length($2)>$max){
1692
0
0
$l=$_;
1693
0
0
$te="$1$3"-$1;
1694
0
0
$max=length($2);
1695
}
1696
}
1697
}
1698
0
0
0
0
return fractional($n) if !$l and !recursed() and $dec>6 and substr($n,-1) and substr($n,-1)--;
0
0
0
1699
0
0
print "l=$l max=$max\n";
1700
0
0
$ne="9" x $l;
1701
0
0
print log($n),"\n";
1702
0
0
0
my $st=sub{print "status: ".($te/$ne)." n=$n ".($n/$te*$ne)."\n"};
0
0
1703
0
0
while($n/$te*$ne<0.99){ &$st(); $ne*=10 }
0
0
0
0
1704
0
0
while($te/$n/$ne<0.99){ &$st(); $te*=10 }
0
0
0
0
1705
0
0
&$st();
1706
0
0
while(1){
1707
0
0
my $d=gcd($te,$ne); print "gcd=$d\n";
0
0
1708
0
0
0
last if $d==1;
1709
0
0
$te/=$d; $ne/=$d;
0
0
1710
}
1711
0
0
&$st();
1712
0
0
0
wantarray ? ($te,$ne) : "$te/$ne"; #gcd()
1713
}
1714
1715
=head2 isnum
1716
1717
B String to be tested on regexp C<< /^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x >>. If no argument is given isnum checks C<< $_ >>.
1718
1719
B True or false (1 or 0)
1720
1721
use Acme::Tools;
1722
my @e=(' +32.12354E-21 ', 2.2, '9' x 99999, ' -123.12', '29,323.31', '29 323.31');
1723
print isnum() ? 'num' : 'str' for @e; #prints num for every element except the last two
1724
print $_=~$Re_isnum ? 'num' : 'str' for @e; #same
1725
1726
=cut
1727
1728
our $Re_isnum=qr/^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x;
1729
20
100
20
1
3331
sub isnum {(@_?$_[0]:$_)=~$Re_isnum}
1730
1731
=head2 between
1732
1733
Input: Three arguments.
1734
1735
Returns: Something I if the first argument is numerically between the two next.
1736
1737
=cut
1738
1739
sub between {
1740
27
27
1
7173
my($test,$fom,$tom)=@_;
1741
33
33
225
no warnings;
33
48
33
6672
1742
27
100
66
588
return $fom<$tom ? $test>=$fom&&$test<=$tom
33
1743
: $test>=$tom&&$test<=$fom;
1744
}
1745
1746
=head2 curb
1747
1748
B Three arguments: value, minumum, maximum.
1749
1750
B Returns the value if its between the given minumum and maximum.
1751
Returns minimum if the value is less or maximum if the value is more.
1752
1753
my $v = 234;
1754
print curb( $v, 200, 250 ); #prints 234
1755
print curb( $v, 150, 200 ); #prints 200
1756
print curb( $v, 250, 300 ); #prints 250
1757
print curb(\$v, 250, 300 ); #prints 250 and changes $v
1758
print $v; #prints 250
1759
1760
In the last example $v is changed because the argument is a reference. (To keep backward compatability, C<< bound() >> is a synonym for C<< curb() >>)
1761
1762
=cut
1763
1764
sub curb {
1765
129
129
1
1114
my($val,$min,$max)=@_;
1766
129
50
66
1165
croak "curb: wrong args" if @_!=3 or !defined$min or !defined$max or !defined$val or $min>$max;
100
66
66
1767
126
100
149
return $$val=curb($$val,$min,$max) if ref($val) eq 'SCALAR';
1768
125
100
206
$val < $min ? $min :
100
1769
$val > $max ? $max :
1770
$val;
1771
}
1772
0
0
0
0
sub bound { curb(@_) }
1773
0
0
0
0
sub log10 { log($_[0])/log(10) }
1774
0
0
0
0
sub log2 { log($_[0])/log(2) }
1775
1776
=head1 STRINGS
1777
1778
=head2 upper
1779
1780
=head2 lower
1781
1782
Returns input string as uppercase or lowercase.
1783
1784
Can be used if Perls build in C and C for some reason does not convert æøå or other latin letters outsize a-z.
1785
1786
Converts C<< æøåäëïöüÿâêîôûãõàèìòùáéíóúýñð >> to and from C<< ÆØÅÄËÏÖÜ?ÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ >>
1787
1788
See also C<< perldoc -f uc >> and C<< perldoc -f lc >>
1789
1790
=head2 trim
1791
1792
Removes space from the beginning and end of a string. Whitespace (C<< \s >>) that is.
1793
And removes any whitespace inside the string of more than one char, leaving the first whitespace char. Thus:
1794
1795
trim(" asdf \t\n 123 ") eq "asdf 123"
1796
trim(" asdf\t\n 123\n") eq "asdf\t123"
1797
1798
Works on C<< $_ >> if no argument i given:
1799
1800
print join",", map trim, " please ", " remove ", " my ", " spaces "; # please,remove,my,spaces
1801
print join",", trim(" please ", " remove ", " my ", " spaces "); # works on arrays as well
1802
my $s=' please '; trim(\$s); # now $s eq 'please'
1803
trim(\@untrimmedstrings); # trims array strings inplace
1804
@untrimmedstrings = map trim, @untrimmedstrings; # same, works on $_
1805
trim(\$_) for @untrimmedstrings; # same, works on \$_
1806
1807
=head2 lpad
1808
1809
=head2 rpad
1810
1811
Left or right pads a string to the given length by adding one or more spaces at the end for I or at the start for I.
1812
1813
B First argument: string to be padded. Second argument: length of the output. Optional third argument: character(s) used to pad.
1814
Default is space.
1815
1816
rpad('gomle',9); # 'gomle '
1817
lpad('gomle',9); # ' gomle'
1818
rpad('gomle',9,'-'); # 'gomle----'
1819
lpad('gomle',9,'+'); # '++++gomle'
1820
rpad('gomle',4); # 'goml'
1821
lpad('gomle',4); # 'goml'
1822
rpad('gomle',7,'xyz'); # 'gomlxy'
1823
lpad('gomle',10,'xyz'); # 'xyzxygoml'
1824
1825
=head2 cpad
1826
1827
Center pads. Pads the string both on left and right equal to the given length. Centers the string. Pads right side first.
1828
1829
cpad('mat',5) eq ' mat '
1830
cpad('mat',4) eq 'mat '
1831
cpad('mat',6) eq ' mat '
1832
cpad('mat',9) eq ' mat '
1833
cpad('mat',5,'+') eq '+mat+'
1834
cpad('MMMM',20,'xyzXYZ') eq 'xyzXYZxyMMMMxyzXYZxy'
1835
1836
=cut
1837
1838
33
50
33
1
144
sub upper {no warnings;my $s=@_?shift:$_;$s=~tr/a-zæøåäëïöüÿâêîôûãõàèìòùáéíóúýñð/A-ZÆØÅÄËÏÖÜÿÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ/;$s}
33
1
46
33
2003
1
4
1
3
1
5
1839
33
50
33
1
120
sub lower {no warnings;my $s=@_?shift:$_;$s=~tr/A-ZÆØÅÄËÏÖÜÿÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ/a-zæøåäëïöüÿâêîôûãõàèìòùáéíóúýñð/;$s}
33
1
39
33
84542
1
4
1
3
1
4
1840
1841
sub trim {
1842
24
100
24
1
56
return trim($_) if !@_;
1843
20
100
31
return map trim($_), @_ if @_>1;
1844
19
18
my $s=shift;
1845
19
100
29
if(ref($s) eq 'SCALAR'){ $$s=~s,^\s+|(?<=\s)\s+|\s+$,,g; return $$s}
5
21
5
10
1846
14
100
23
if(ref($s) eq 'ARRAY') { trim(\$_) for @$s; return $s }
1
3
1
1
1847
13
62
$s=~s,^\s+|(?<=\s)\s+|\s+$,,g;
1848
13
41
$s;
1849
}
1850
1851
sub rpad {
1852
17
17
1
541
my($s,$l,$p)=@_;
1853
17
100
66
45
$p=' ' if @_<3 or !length($p);
1854
17
37
$s.=$p while length($s)<$l;
1855
17
52
substr($s,0,$l);
1856
}
1857
1858
sub lpad {
1859
5
5
1
938
my($s,$l,$p)=@_;
1860
5
100
66
19
$p=' ' if @_<3 or !length($p);
1861
5
100
32
$l
1862
? substr($s,0,$l)
1863
: substr($p x (1+$l/length($p)), 0, $l-length($s)).$s;
1864
}
1865
1866
sub cpad {
1867
11
11
1
1775
my($s,$l,$p)=@_;
1868
11
100
66
41
$p=' ' if @_<3 or !length($p);
1869
11
10
my $ls=length($s);
1870
11
50
18
return substr($s,0,$l) if $l<$ls;
1871
11
22
$p=$p x (($l-$ls+2)/length($p));
1872
11
46
substr($p, 0, ($l-$ls )/2) . $s .
1873
substr($p, 0, ($l-$ls+1)/2);
1874
}
1875
1876
sub cpad_old {
1877
0
0
0
0
my($s,$l,$p)=@_;
1878
0
0
0
$p=' ' if !length($p);
1879
0
0
0
return substr($s,0,$l) if $l
1880
0
0
my $i=0;
1881
0
0
while($l>length($s)){
1882
0
0
0
my $pc=substr($p,($i==int($i)?1:-1)*($i%length($p)),1);
1883
0
0
0
$i==int($i) ? ($s.=$pc) : ($s=$pc.$s);
1884
0
0
$i+=1/2;
1885
}
1886
0
0
$s;
1887
}
1888
1889
=head2 trigram
1890
1891
B A string (i.e. a name). And an optional x (see example 2)
1892
1893
B A list of this strings trigrams (See examlpe)
1894
1895
B
1896
1897
print join ", ", trigram("Kjetil Skotheim");
1898
1899
Prints:
1900
1901
Kje, jet, eti, til, il , l S, Sk, Sko, kot, oth, the, hei, eim
1902
1903
B
1904
1905
Default is 3, but here 4 is used instead in the second optional input argument:
1906
1907
print join ", ", trigram("Kjetil Skotheim", 4);
1908
1909
And this prints:
1910
1911
Kjet, jeti, etil, til , il S, l Sk, Sko, Skot, koth, othe, thei, heim
1912
1913
C was created for "fuzzy" name searching. If you have a database of many names,
1914
addresses, phone numbers, customer numbers etc. You can use trigram() to search
1915
among all of those at the same time. If the search form only has one input field.
1916
One general search box.
1917
1918
Store all of the trigrams of the trigram-indexed input fields coupled
1919
with each person, and when you search, you take each trigram of you
1920
query string and adds the list of people that has that trigram. The
1921
search result should then be sorted so that the persons with most hits
1922
are listed first. Both the query strings and the indexed database
1923
fields should have a space added first and last before C-ing
1924
them.
1925
1926
This search algorithm is not includes here yet...
1927
1928
C should perhaps have been named ngram for obvious reasons.
1929
1930
=head2 sliding
1931
1932
Same as trigram (except there is no default width). Works also with arrayref instead of string.
1933
1934
Example:
1935
1936
sliding( ["Reven","rasker","over","isen"], 2 )
1937
1938
Result:
1939
1940
( ['Reven','rasker'], ['rasker','over'], ['over','isen'] )
1941
1942
=head2 chunks
1943
1944
Splits strings and arrays into chunks of given size:
1945
1946
my @a = chunks("Reven rasker over isen",7);
1947
my @b = chunks([qw/Og gubben satt i kveldinga og koste seg med skillinga/], 3);
1948
1949
Resulting arrays:
1950
1951
( 'Reven r', 'asker o', 'ver ise', 'n' )
1952
( ['Og','gubben','satt'], ['i','kveldinga','og'], ['koste','seg','med'], ['skillinga'] )
1953
1954
=head2 chars
1955
1956
chars("Tittentei"); # ('T','i','t','t','e','n','t','e','i')
1957
1958
=cut
1959
1960
2
100
2
1
46
sub trigram { sliding($_[0],$_[1]||3) }
1961
1962
sub sliding {
1963
3
3
1
4
my($s,$w)=@_;
1964
3
100
32
return map substr($s,$_,$w), 0..length($s)-$w if !ref($s);
1965
1
50
16
return map [@$s[$_..$_+$w-1]], 0..@$s-$w if ref($s) eq 'ARRAY';
1966
}
1967
1968
sub chunks {
1969
2
2
1
4
my($s,$w)=@_;
1970
2
100
32
return $s=~/(.{1,$w})/g if !ref($s);
1971
1
50
17
return map [@$s[$_*$w .. min($_*$w+$w-1,$#$s)]], 0..$#$s/$w if ref($s) eq 'ARRAY';
1972
}
1973
1974
0
0
1
0
sub chars { split//, shift }
1975
1976
=head2 repl
1977
1978
Synonym for replace().
1979
1980
=head2 replace
1981
1982
Return the string in the first input argument, but where pairs of search-replace strings (or rather regexes) has been run.
1983
1984
Works as C in Oracle, or rather regexp_replace() in Oracle 10 and onward. Except that this C accepts more than three arguments.
1985
1986
Examples:
1987
1988
print replace("water","ater","ine"); # Turns water into wine
1989
print replace("water","ater"); # w
1990
print replace("water","at","eath"); # weather
1991
print replace("water","wa","ju",
1992
"te","ic",
1993
"x","y", # No x is found, no y is returned
1994
'r$',"e"); # Turns water into juice. 'r$' says that the r it wants
1995
# to change should be the last letters. This reveals that
1996
# second, fourth, sixth and so on argument is really regexs,
1997
# not normal strings. So use \ (or \\ inside "") to protect
1998
# the special characters of regexes. You probably also
1999
# should write qr/regexp/ instead of 'regexp' if you make
2000
# use of regexps here, just to make it more clear that
2001
# these are really regexps, not strings.
2002
2003
print replace('JACK and JUE','J','BL'); # prints BLACK and BLUE
2004
print replace('JACK and JUE','J'); # prints ACK and UE
2005
print replace("abc","a","b","b","c"); # prints ccc (not bcc)
2006
2007
If the first argument is a reference to a scalar variable, that variable is changed "in place".
2008
2009
Example:
2010
2011
my $str="test";
2012
replace(\$str,'e','ee','s','S');
2013
print $str; # prints teeSt
2014
2015
=cut
2016
2017
46
46
1
60
sub replace { repl(@_) }
2018
sub repl {
2019
83
83
1
134
my $str=shift;
2020
83
100
153
return $$str=replace($$str,@_) if ref($str) eq 'SCALAR';
2021
#return ? if ref($str) eq 'ARRAY';
2022
#return ? if ref($str) eq 'HASH';
2023
82
127
while(@_){
2024
109
131
my($fra,$til)=(shift,shift);
2025
109
100
3806
defined $til ? $str=~s/$fra/$til/g : $str=~s/$fra//g;
2026
}
2027
82
379
return $str;
2028
}
2029
2030
=head1 ARRAYS
2031
2032
=head2 min
2033
2034
Returns the smallest number in a list. Undef is ignored.
2035
2036
@lengths=(2,3,5,2,10,undef,5,4);
2037
$shortest = min(@lengths); # returns 2
2038
2039
Note: The comparison operator is perls C<< < >>> which means empty strings is treated as C<0>, the number zero. The same goes for C, except of course C<< > >> is used instead.
2040
2041
min(3,4,5) # 3
2042
min(3,4,5,undef) # 3
2043
min(3,4,5,'') # returns the empty string
2044
2045
=head2 max
2046
2047
Returns the largest number in a list. Undef is ignored.
2048
2049
@heights=(123,90,134,undef,132);
2050
$highest = max(@heights); # 134
2051
2052
=head2 mins
2053
2054
Just as L, except for strings.
2055
2056
print min(2,7,10); # 2
2057
print mins("2","7","10"); # 10
2058
print mins(2,7,10); # 10
2059
2060
=head2 maxs
2061
2062
Just as L, except for strings.
2063
2064
print max(2,7,10); # 10
2065
print maxs("2","7","10"); # 7
2066
print maxs(2,7,10); # 7
2067
2068
=cut
2069
2070
35
100
100
35
1
1604
sub min {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ < $min } $min }
35
66
50
7087
20189
35
204
2071
1
100
100
1
1
2
sub mins {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ lt $min} $min }
1
33
3
3
17
1
4
2072
32
100
100
32
1
27
sub max {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ > $max } $max }
32
66
47
7093
19673
32
429
2073
1
100
100
1
1
1
sub maxs {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ gt $max} $max }
1
33
3
3
17
1
3
2074
2075
=head2 zip
2076
2077
B Two or more arrayrefs. A number of equal sized arrays
2078
containing numbers, strings or anything really.
2079
2080
B An array of those input arrays zipped (interlocked, merged) into each other.
2081
2082
print join " ", zip( [1,3,5], [2,4,6] ); # 1 2 3 4 5 6
2083
print join " ", zip( [1,4,7], [2,5,8], [3,6,9] ); # 1 2 3 4 5 6 7 8 9
2084
2085
Example:
2086
2087
zip() creates a hash where the keys are found in the first array and values in the secord in the correct order:
2088
2089
my @media = qw/CD DVD VHS LP Blueray/;
2090
my @count = qw/20 12 2 4 3/;
2091
my %count = zip(\@media,\@count); # or zip( [@media], [@count] )
2092
print "I got $count{DVD} DVDs\n"; # I got 12 DVDs
2093
2094
Dies (croaks) if the two lists are of different sizes
2095
2096
...or any input argument is not an array ref.
2097
2098
=cut
2099
2100
sub zip {
2101
7
7
1
56279
my @t=@_;
2102
7
66
368
ref($_) ne 'ARRAY' and croak "ERROR: zip should have arrayrefs as arguments" for @t;
2103
5
66
14
@{$t[$_]} != @{$t[0]} and croak "ERROR: zip should have equal sized arrays" for 1..$#t;
5
6
5
222
2104
3
4
my @res;
2105
3
2
for my $i (0..@{$t[0]}-1){
3
9
2106
9
17
push @res, $$_[$i] for @t;
2107
}
2108
3
18
return @res;
2109
}
2110
2111
2112
=head2 pushsort
2113
2114
Adds one or more element to a numerically sorted array and keeps it sorted.
2115
2116
pushsort @a, 13; # this...
2117
push @a, 13; @a = sort {$a<=>$b} @a; # is the same as this, but the former is faster if @a is large
2118
2119
=head2 pushsortstr
2120
2121
Same as pushsort except that the array is kept sorted alphanumerically (cmp) instead of numerically (<=>). See L.
2122
2123
pushsort @a, "abc"; # this...
2124
push @a, "abc"; @a = sort @a; # is the same as this, but the former is faster if @a is large
2125
2126
=cut
2127
2128
our $Pushsort_cmpsub=undef;
2129
sub pushsort (\@@) {
2130
10915
10915
1
10367
my $ar=shift;
2131
2132
#not needed but often faster
2133
10915
100
100
20098
if(!defined $Pushsort_cmpsub and @$ar+@_<100){ #hm speedup?
2134
2348
2379
@$ar=(sort {$a<=>$b} (@$ar,@_));
51995
29892
2135
2348
1815
return 0+@$ar;
2136
}
2137
2138
8567
7879
for my $v (@_){
2139
2140
#not needed but often faster
2141
7567
100
9013
if(!defined $Pushsort_cmpsub){ #faster rank() in most cases
2142
901
50
0
1145
push @$ar, $v and next if $v>=$$ar[-1];
2143
901
100
50
1130
unshift @$ar, $v and next if $v< $$ar[0];
2144
}
2145
2146
7564
8278
splice @$ar, binsearch($v,$ar,1,$Pushsort_cmpsub)+1, 0, $v;
2147
}
2148
8567
8812
0+@$ar
2149
}
2150
0
0
1
0
sub pushsortstr(\@@){ local $Pushsort_cmpsub=sub{$_[0]cmp$_[1]}; pushsort(@_) } #speedup: copy sub pushsort
1000
1000
2229
1000
1102
2151
2152
=head2 binsearch
2153
2154
Returns the position of an element in a numerically sorted array. Returns undef if the element is not found.
2155
2156
B Two, three or four arguments
2157
2158
B the element to find. Usually a number.
2159
2160
B a reference to the array to search in. The array
2161
should be sorted in ascending numerical order (se exceptions below).
2162
2163
B Optional. Default false.
2164
2165
If true, whether result I should return undef or a fractional position.
2166
2167
If the third argument is false binsearch returns undef if the element is not found.
2168
2169
If the third argument is true binsearch returns 0.5 plus closest position below the searched value.
2170
2171
Returns C< last position + 0.5 > if the searched element is greater than all elements in the sorted array.
2172
2173
Returns C< -0.5 > if the searched element is less than all elements in the sorted array.
2174
2175
Fourth argument: Optional. Default C<< sub { $_[0] <=> $_[1] } >>.
2176
2177
If present, the fourth argument is either:
2178
2179
=over 4
2180
2181
=item * a code-ref that alters the way binsearch compares two elements, default is C<< sub{$_[0]<=>$_[1]} >>
2182
2183
=item * a string that works as a hash key (column name), see example below
2184
2185
=back
2186
2187
B
2188
2189
binsearch(10,[5,10,15,20]); # 1
2190
binsearch(10,[20,15,10,5],undef,sub{$_[1]<=>$_[0]}); # 2 search arrays sorted numerically in opposite order
2191
binsearch("c",["a","b","c","d"],undef,sub{$_[0]cmp$_[1]}); # 2 search arrays sorted alphanumerically
2192
binsearchstr("b",["a","b","c","d"]); # 1 search arrays sorted alphanumerically
2193
2194
my @data=( map { {num=>$_, sqrt=>sqrt($_), square=>$_**2} }
2195
grep !$_%7, 1..1000000 );
2196
my $i = binsearch( {num=>913374}, \@data, undef, sub {$_[0]{num} <=> $_[1]{num}} );
2197
my $i = binsearch( {num=>913374}, \@data, undef, 'num' ); #same as previous line
2198
my $found_hashref = defined $i ? $data[$i] : undef;
2199
2200
=head2 binsearchstr
2201
2202
Same as binsearch except that the arrays is sorted alphanumerically
2203
(cmp) instead of numerically (<=>) and the searched element is a
2204
string, not a number. See L.
2205
2206
=cut
2207
2208
our $Binsearch_steps;
2209
our $Binsearch_maxsteps=100;
2210
sub binsearch {
2211
14611
14611
1
28984
my($search,$aref,$insertpos,$cmpsub)=@_; #search pos of search in array
2212
14611
50
21646
croak "binsearch did not get arrayref as second arg" if ref($aref) ne 'ARRAY';
2213
14611
50
100
39645
croak "binsearch got fourth arg which is not a code-ref" if defined $cmpsub and ref($cmpsub) and ref($cmpsub) ne 'CODE';
66
2214
14611
100
100
27245
if(defined $cmpsub and !ref($cmpsub)){
2215
1
1
my $key=$cmpsub;
2216
1
17
4
$cmpsub = sub{ $_[0]{$key} <=> $_[1]{$key} };
17
46
2217
}
2218
14611
50
17600
return $insertpos ? -0.5 : undef if !@$aref;
100
2219
14599
12029
my($min,$max)=(0,$#$aref);
2220
14599
10208
$Binsearch_steps=0;
2221
14599
17224
while (++$Binsearch_steps <= $Binsearch_maxsteps) {
2222
128752
115639
my $middle=int(($min+$max+0.5)/2);
2223
128752
86838
my $middle_value=$$aref[$middle];
2224
2225
#croak "binsearch got non-sorted array" if !$cmpsub and $$aref[$min]>$$aref[$min]
2226
# or $cmpsub and &$cmpsub($$aref[$min],$$aref[$min])>0;
2227
2228
128752
100
100
464471
if( !$cmpsub and $search < $middle_value
100
100
66
100
100
66
2229
or $cmpsub and &$cmpsub($search,$middle_value) < 0 ) { #print "<\n";
2230
39007
50
66
50005
$max=$min, next if $middle == $max and $min != $max;
2231
39007
50
44940
return $insertpos ? $middle-0.5 : undef if $middle == $max;
100
2232
37554
42096
$max=$middle;
2233
}
2234
elsif( !$cmpsub and $search > $middle_value
2235
or $cmpsub and &$cmpsub($search,$middle_value) > 0 ) { #print ">\n";
2236
82706
100
100
133419
$min=$max, next if $middle == $min and $max != $min;
2237
75185
50
94906
return $insertpos ? $middle+0.5 : undef if $middle == $min;
100
2238
69078
88291
$min=$middle;
2239
}
2240
else { #print "=\n";
2241
7039
10494
return $middle;
2242
}
2243
}
2244
0
0
croak "binsearch exceded $Binsearch_maxsteps steps";
2245
}
2246
2247
sub binsearchfast { # binary search routine finds index just below value
2248
0
0
0
0
my ($x,$v)=@_;
2249
0
0
my ($klo,$khi)=(0,$#{$x});
0
0
2250
0
0
my $k;
2251
0
0
while (($khi-$klo)>1) {
2252
0
0
$k=int(($khi+$klo)/2);
2253
0
0
0
if ($$x[$k]>$v) { $khi=$k; } else { $klo=$k; }
0
0
0
0
2254
}
2255
0
0
return $klo;
2256
}
2257
2258
2259
2
2
1
7
sub binsearchstr {binsearch(@_[0..2],sub{$_[0]cmp$_[1]})}
1
1
9
2260
2261
=head2 rank
2262
2263
B Two or three arguments. N and an arrayref for the list to look at.
2264
2265
In scalar context: Returns the nth smallest number in an array. The array doesn't have to be sorted.
2266
2267
In array context: Returns the n smallest numbers in an array.
2268
2269
To return the n(th) largest number(s) instead of smallest, just negate n.
2270
2271
An optional third argument can be a sub that is used to compare the elements of the input array.
2272
2273
Examples:
2274
2275
my $second_smallest = rank(2, [11,12,13,14]); # 12
2276
my @top10 = rank(-10, [1..100]); # 100, 99, 98, 97, 96, 95, 94, 93, 92, 91
2277
my $max = rank(-1, [101,102,103,102,101]); #103
2278
my @contest = ({name=>"Alice",score=>14},{name=>"Bob",score=>13},{name=>"Eve",score=>12});
2279
my $second = rank(2, \@contest, sub{$_[1]{score}<=>$_[0]{score}})->{name}; #Bob
2280
2281
=head2 rankstr
2282
2283
Just as C but sorts alphanumerically (strings, cmp) instead of numerically.
2284
2285
=cut
2286
2287
sub rank {
2288
35
35
1
339
my($rank,$aref,$cmpsub)=@_;
2289
35
100
60
if($rank<0){
2290
10
100
30006
48
$cmpsub||=sub{$_[0]<=>$_[1]};
30006
93133
2291
10
60004
38
return rank(-$rank,$aref,sub{0-&$cmpsub});
60004
53615
2292
}
2293
25
26
my @sort;
2294
25
24
local $Pushsort_cmpsub=$cmpsub;
2295
25
29
for(@$aref){
2296
8914
10037
pushsort @sort, $_;
2297
8914
100
16641
pop @sort if @sort>$rank;
2298
}
2299
25
100
1552
return wantarray ? @sort : $sort[$rank-1];
2300
}
2301
0
50
0
1
0
sub rankstr {wantarray?(rank(@_,sub{$_[0]cmp$_[1]})):rank(@_,sub{$_[0]cmp$_[1]})}
60145
10
190098
10
57
2302
2303
=head2 eqarr
2304
2305
B Two or more references to arrays.
2306
2307
B True (1) or false (0) for whether or not the arrays are numerically I alphanumerically equal.
2308
Comparing each element in each array with both C< == > and C< eq >.
2309
2310
Examples:
2311
2312
eqarr([1,2,3],[1,2,3],[1,2,3]); # 1 (true)
2313
eqarr([1,2,3],[1,2,3],[1,2,4]); # 0 (false)
2314
eqarr([1,2,3],[1,2,3,4]); # undef (different size, false)
2315
eqarr([1,2,3]); # croak (should be two or more arrays)
2316
eqarr([1,2,3],1,2,3); # croak (not arraysrefs)
2317
2318
=cut
2319
2320
sub eqarr {
2321
27
27
1
1236
my @arefs=@_;
2322
27
100
235
croak if @arefs<2;
2323
26
66
186
ref($_) ne 'ARRAY' and croak for @arefs;
2324
25
100
67
@{$arefs[0]} != @{$arefs[$_]} and return undef for 1..$#arefs;
27
32
27
65
2325
24
31
my $ant;
2326
2327
24
56
for my $ar (@arefs[1..$#arefs]){
2328
26
39
for(0..@$ar-1){
2329
272
50
33
645
++$ant and $ant>100 and croak ">100"; #TODO: feiler ved sammenligning av to tabeller > 10000(?) tall
2330
272
100
66
862
return 0 if $arefs[0][$_] ne $$ar[$_]
2331
or $arefs[0][$_] != $$ar[$_];
2332
}
2333
}
2334
22
150
return 1;
2335
}
2336
2337
=head2 sorted
2338
2339
Return true if the input array is numerically sorted.
2340
2341
@a=(1..10); print "array is sorted" if sorted @a; #true
2342
2343
Optionally the last argument can be a comparison sub:
2344
2345
@person=({Rank=>1,Name=>'Amy'}, {Rank=>2,Name=>'Paula'}, {Rank=>3,Name=>'Ruth'});
2346
print "Persons are sorted" if sorted @person, sub{$_[0]{Rank}<=>$_[1]{Rank}};
2347
2348
=head2 sortedstr
2349
2350
Return true if the input array is Inumerically sorted.
2351
2352
@a=(1..10); print "array is sorted" if sortedstr @a; #false
2353
@a=("01".."10"); print "array is sorted" if sortedstr @a; #true
2354
2355
=cut
2356
2357
sub sorted (\@@) {
2358
4
4
1
20
my($a,$cmpsub)=@_;
2359
4
21
for(0..$#$a-1){
2360
1197
50
66
4251
return 0 if !$cmpsub and $$a[$_]>$$a[$_+1]
66
33
2361
or $cmpsub and &$cmpsub($$a[$_],$$a[$_+1])>0;
2362
}
2363
4
43
return 1;
2364
}
2365
99
99
1
548
sub sortedstr { sorted(@_,sub{$_[0]cmp$_[1]}) }
2
2
20
2366
2367
=head2 part
2368
2369
B A code-ref and a list
2370
2371
B Two array-refs
2372
2373
Like C but returns the false list as well. Partitions a list
2374
into two lists where each element goes into the first or second list
2375
whether the predicate (a code-ref) is true or false for that element.
2376
2377
my( $odd, $even ) = part {$_%2} (1..8);
2378
print for @$odd; #prints 1 3 5 7
2379
print for @$even; #prints 2 4 6 8
2380
2381
(Works like C< partition() > in the Scala programming language)
2382
2383
=head2 parth
2384
2385
Like C but returns any number of lists.
2386
2387
B A code-ref and a list
2388
2389
B A hash where the returned values from the code-ref are keys and the values are arrayrefs to the list elements which gave those keys.
2390
2391
my %hash = parth { uc(substr($_,0,1)) } ('These','are','the','words','of','this','array');
2392
print serialize(\%hash);
2393
2394
Result:
2395
2396
%hash = ( T=>['These','the','this'],
2397
A=>['are','array'],
2398
O=>['of'],
2399
W=>['words'] )
2400
2401
=head2 parta
2402
2403
Like L but returns an array of lists.
2404
2405
my @a = parta { length } qw/These are the words of this array/;
2406
2407
Result:
2408
2409
@a = ( undef, undef, ['of'], ['are','the'], ['this'], ['These','words','array'] )
2410
2411
Two undefs at first (index positions 0 and 1) since there are no words of length 0 or 1 in the input array.
2412
2413
=cut
2414
2415
1
100
1
1
14
sub part (&@) { my($c,@r)=(shift,[],[]); push @{ $r[ &$c?0:1 ] }, $_ for @_; @r }
1
3
8
20
1
4
2416
1
1
1
318
sub parth (&@) { my($c,%r)=(shift); push @{ $r{ &$c } }, $_ for @_; %r }
1
4
7
32
1
11
2417
1
1
1
203
sub parta (&@) { my($c,@r)=(shift); push @{ $r[ &$c ] }, $_ for @_; @r }
1
2
7
52
1
8
2418
#sub mapn (&$@) { ... } like map but @_ contains n elems at a time, n=1 is map
2419
2420
=head2 refa
2421
2422
=head2 refh
2423
2424
=head2 refs
2425
2426
=head2 refaa
2427
2428
=head2 refah
2429
2430
=head2 refha
2431
2432
=head2 refhh
2433
2434
Returns true or false (1 or 0) if the argument is an arrayref, hashref, scalarref, ref to an array of arrays, ref to an array of hashes
2435
2436
Examples:
2437
2438
my $ref_to_array = [1,2,3];
2439
my $ref_to_hash = {1,100,2,200,3,300};
2440
my $ref_to_scalar = \"String";
2441
print "arrayref" if ref($ref_to_array) eq 'ARRAY'; #true
2442
print "hashref" if ref($ref_to_hash) eq 'HASH'; #true
2443
print "scalarref" if ref($ref_to_scalar) eq 'SCALAR'; #true
2444
print "arrayref" if refa($ref_to_array); #also true, without: eq 'ARRAY'
2445
print "hashref" if refh($ref_to_hash); #also true, without: eq 'HASH'
2446
print "scalarref" if refs($ref_to_scalar); #also true, without: eq 'SCALAR'
2447
2448
my $ref_to_array_of_arrays = [ [1,2,3], [2,4,8], [10,100,1000] ];
2449
my $ref_to_array_of_hashes = [ {1=>10, 2=>100}, {first=>1, second=>2} ];
2450
my $ref_to_hash_of_arrays = { alice=>[1,2,3], bob=>[2,4,8], eve=>[10,100,1000] };
2451
my $ref_to_hash_of_hashes = { alice=>{a=>22,b=>11}, bob=>{a=>33,b=>66} };
2452
2453
print "aa" if refaa($ref_to_array_of_arrays); #true
2454
print "ah" if refah($ref_to_array_of_hashes); #true
2455
print "ha" if refha($ref_to_hash_of_arrays); #true
2456
print "hh" if refhh($ref_to_hash_of_hashes); #true
2457
2458
=cut
2459
2460
3
0
3
1
25
sub refa { ref($_[0]) eq 'ARRAY' ? 1 : ref($_[0]) ? 0 : undef }
50
2461
3
0
3
1
13
sub refh { ref($_[0]) eq 'HASH' ? 1 : ref($_[0]) ? 0 : undef }
50
2462
1
0
1
1
6
sub refs { ref($_[0]) eq 'SCALAR' ? 1 : ref($_[0]) ? 0 : undef }
50
2463
1
0
1
1
6
sub refaa { ref($_[0]) eq 'ARRAY' ? refa($_[0][0]) : ref($_[0]) ? 0 : undef }
50
2464
1
0
1
1
6
sub refah { ref($_[0]) eq 'ARRAY' ? refh($_[0][0]) : ref($_[0]) ? 0 : undef }
50
2465
1
0
1
1
5
sub refha { ref($_[0]) eq 'HASH' ? refa((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
1
50
4
2466
1
0
1
1
7
sub refhh { ref($_[0]) eq 'HASH' ? refh((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
1
50
7
2467
2468
2469
=head2 pushr
2470
2471
=head2 popr
2472
2473
=head2 shiftr
2474
2475
=head2 unshiftr
2476
2477
=head2 splicer
2478
2479
=head2 keysr
2480
2481
=head2 valuesr
2482
2483
=head2 eachr
2484
2485
In Perl versions 5.12 - 5.22 push, pop, shift, unshift, splice, keys, values and each
2486
handled references to arrays and references to hashes just as if they where arrays and hashes. Examples:
2487
2488
my $person={name=>'Gaga', array=>[1,2,3]};
2489
push $person{array} , 4; #works in perl 5.12-5.22 but not before and after
2490
push @{ $person{array} }, 4; #works in all perl5 versions
2491
pushr $person{array} , 4; #use Acme::Tools and this should work in perl >= 5.8
2492
popr $person{array}; #returns 4
2493
2494
=cut
2495
2496
3
3
1
262
sub pushr { push @{shift()}, @_ } # ? ($@)
3
8
2497
1
1
1
2
sub popr { pop @{shift()} }
1
2
2498
1
1
1
253
sub shiftr { shift @{shift()} }
1
2
2499
2
2
1
501
sub unshiftr { unshift @{shift()}, @_ }
2
10
2500
0
0
sub splicer { @_==1 ? splice( @{shift()} )
2501
1
5
:@_==2 ? splice( @{shift()}, shift() )
2502
2
8
:@_==3 ? splice( @{shift()}, shift(), shift() )
2503
4
50
4
1
1081
:@_>=4 ? splice( @{shift()}, shift(), shift(), @_ ) : die }
1
100
6
100
50
2504
1
1
1
264
sub keysr { keys( %{shift()} ) } #hm sort(keys%{shift()}) ?
1
10
2505
1
1
1
4
sub valuesr { values( %{shift()} ) }
1
4
2506
4
50
4
1
1289
sub eachr { ref($_[0]) eq 'HASH' ? each(%{shift()})
4
11
2507
#:ref($_[0]) eq 'ARRAY' ? each(@{shift()}) # perl 5.8.8 cannot compile each on array! eval?
2508
: croak("eachr needs hashref or arrayref got '".ref($_[0])."'") }
2509
#sub eachr { each(%{shift()}) }
2510
2511
=head1 STATISTICS
2512
2513
=head2 sum
2514
2515
Returns the sum of a list of numbers. Undef is ignored.
2516
2517
print sum(1,3,undef,8); # 12
2518
print sum(1..1000); # 500500
2519
print sum(undef); # undef
2520
2521
=cut
2522
2523
33
66
33
1
189
sub sum { my $sum; no warnings; defined($_) and $sum+=$_ for @_; $sum }
33
60
44
33
2566
60
2766
60
10096
60
408
2524
2525
=head2 avg
2526
2527
Returns the I number of a list of numbers. That is C
2528
2529
print avg( 2, 4, 9); # 5 (2+4+9) / 3 = 5
2530
print avg( [2, 4, 9] ); # 5 pass by reference, same result but faster for large arrays
2531
2532
Also known as I.
2533
2534
Pass by reference: If one argument is given and it is a reference to an array,
2535
this array is taken as the list of numbers. This mode is about twice as fast
2536
for 10000 numbers or more. It most likely also saves memory.
2537
2538
=cut
2539
2540
sub avg {
2541
880
880
1
2173
my($sum,$n,@a)=(0,0);
2542
33
33
371
no warnings;
33
43
33
11681
2543
880
50
1044
if( @_==0 ) { return undef }
0
0
2544
880
100
100
1255
if( @_==1 and ref($_[0]) eq 'ARRAY' ){ @a=grep defined,@{$_[0]} }
1
1
1
4
2545
879
2379
else { @a=grep defined,@_ }
2546
880
100
995
if( @a==0 ) { return undef }
1
4
2547
879
2972
$sum+=$_ for @a;
2548
879
2244
return $sum/@a
2549
}
2550
2551
=head2 geomavg
2552
2553
Returns the I (a.k.a I) of a list of numbers.
2554
2555
print geomavg(10,100,1000,10000,100000); # 1000
2556
print 0+ (10*100*1000*10000*100000) ** (1/5); # 1000 same thing
2557
print exp(avg(map log($_),10,100,1000,10000,100000)); # 1000 same thing, this is how geomavg() works internally
2558
2559
=cut
2560
2561
771
771
1
1820
sub geomavg { exp(avg(map log($_), @_)) }
2562
2563
=head2 harmonicavg
2564
2565
Returns the I (a.k.a I) of a list of numbers. L
2566
2567
print harmonicavg(10,11,12); # 3 / ( 1/10 + 1/11 + 1/12) = 10.939226519337
2568
2569
=cut
2570
2571
0
0
1
0
sub harmonicavg { my $s; $s+=1/$_ for @_; @_/$s }
0
0
0
0
2572
2573
=head2 variance
2574
2575
C<< variance = ( sum (x[i]-Average)**2)/(n-1) >>
2576
2577
=cut
2578
2579
sub variance {
2580
0
0
1
0
my $sumx2; $sumx2+=$_*$_ for @_;
0
0
2581
0
0
my $sumx; $sumx+=$_ for @_;
0
0
2582
0
0
(@_*$sumx2-$sumx*$sumx)/(@_*(@_-1));
2583
}
2584
2585
=head2 stddev
2586
2587
C<< Standard_Deviation = sqrt(variance) >>
2588
2589
Standard deviation (stddev) is a measurement of the width of a normal
2590
distribution where one stddev on each side of the mean covers 68% and
2591
two stddevs 95%. Normal distributions are sometimes called Gauss curves
2592
or Bell shapes. L
2593
2594
stddev(4,5,6,5,6,4,3,5,5,6,7,6,5,7,5,6,4) # = 1.0914103126635
2595
avg(@testscores) + stddev(@testscores) # = the score for IQ = 115 (by one definition)
2596
avg(@testscores) - stddev(@testscores) # = the score for IQ = 85
2597
2598
=cut
2599
2600
sub stddev {
2601
14
100
14
1
596
return undef if @_==0;
2602
13
100
66
52
return stddev(\@_) if @_>0 and !ref($_[0]);
2603
8
8
my $ar=shift;
2604
8
50
12
return undef if @$ar==0;
2605
8
100
16
return 0 if @$ar==1;
2606
6
3
my $sumx2; $sumx2 += $_*$_ for @$ar;
6
51
2607
6
6
my $sumx; $sumx += $_ for @$ar;
6
41
2608
6
30
sqrt( (@$ar*$sumx2-$sumx*$sumx)/(@$ar*(@$ar-1)) );
2609
}
2610
2611
=head2 rstddev
2612
2613
Relative stddev = stddev / avg
2614
2615
=cut
2616
2617
0
0
1
0
sub rstddev { stddev(@_) / avg(@_) }
2618
2619
=head2 median
2620
2621
Returns the median value of a list of numbers. The list do not have to
2622
be sorted.
2623
2624
Example 1, list having an odd number of numbers:
2625
2626
print median(1, 100, 101); # 100
2627
2628
100 is the middlemost number after sorting.
2629
2630
Example 2, an even number of numbers:
2631
2632
print median(1005, 100, 101, 99); # 100.5
2633
2634
100.5 is the average of the two middlemost numbers.
2635
2636
=cut
2637
2638
sub median {
2639
33
33
146
no warnings;
33
48
33
11928
2640
6
6
1
327
my @list = sort {$a<=>$b} @_;
10035
5776
2641
6
8
my $n=@list;
2642
6
100
167
$n%2 ? $list[($n-1)/2]
2643
: ($list[$n/2-1] + $list[$n/2])/2;
2644
}
2645
2646
2647
=head2 percentile
2648
2649
Returns one or more percentiles of a list of numbers.
2650
2651
Percentile 50 is the same as the I, percentile 25 is the first
2652
quartile, 75 is the third quartile.
2653
2654
B
2655
2656
First argument is your wanted percentile, or a refrence to a list of percentiles you want from the dataset.
2657
2658
If the first argument to percentile() is a scalar, this percentile is returned.
2659
2660
If the first argument is a reference to an array, then all those percentiles are returned as an array.
2661
2662
Second, third, fourth and so on argument are the numbers from which you want to find the percentile(s).
2663
2664
B
2665
2666
This finds the 50-percentile (the median) to the four numbers 1, 2, 3 and 4:
2667
2668
print "Median = " . percentile(50, 1,2,3,4); # 2.5
2669
2670
This:
2671
2672
@data=(11, 5, 3, 5, 7, 3, 1, 17, 4, 2, 6, 4, 12, 9, 0, 5);
2673
@p = map percentile($_,@data), (25, 50, 75);
2674
2675
Is the same as this:
2676
2677
@p = percentile([25, 50, 75], @data);
2678
2679
But the latter is faster, especially if @data is large since it sorts
2680
the numbers only once internally.
2681
2682
B
2683
2684
Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992
2685
2686
Average (or mean) is 143
2687
2688
Median is 15.5 (which is the average of 9 and 22 who both equally lays in the middle)
2689
2690
The 25-percentile is 6.25 which are between 6 and 7, but closer to 6.
2691
2692
The 75-percentile is 46.5, which are between 39 and 49 but close to 49.
2693
2694
Linear interpolation is used to find the 25- and 75-percentile and any
2695
other x-percentile which doesn't fall exactly on one of the numbers in
2696
the set.
2697
2698
B
2699
2700
As you saw, 6.25 are closer to 6 than to 7 because 25% along the set of
2701
the twelve numbers is closer to the third number (6) than to he fourth
2702
(7). The median (50-percentile) is also really interpolated, but it is
2703
always in the middle of the two center numbers if there are an even count
2704
of numbers.
2705
2706
However, there is two methods of interpolation:
2707
2708
Example, we have only three numbers: 5, 6 and 7.
2709
2710
Method 1: The most common is to say that 5 and 7 lays on the 25- and
2711
75-percentile. This method is used in Acme::Tools.
2712
2713
Method 2: In Oracle databases the least and greatest numbers
2714
always lay on the 0- and 100-percentile.
2715
2716
As an argument on why Oracles (and others?) definition is not the best way is to
2717
look at your data as for instance temperature measurements. If you
2718
place the highest temperature on the 100-percentile you are sort of
2719
saying that there can never be a higher temperatures in future measurements.
2720
2721
A quick non-exhaustive Google survey suggests that method 1 here is most used.
2722
2723
The larger the data sets, the less difference there is between the two methods.
2724
2725
B
2726
2727
In method one, when you want a percentile outside of any possible
2728
interpolation, you use the smallest and second smallest to extrapolate
2729
from. For instance in the data set C<5, 6, 7>, if you want an
2730
x-percentile of x < 25, this is below 5.
2731
2732
If you feel tempted to go below 0 or above 100, C will
2733
I (or I to be more precise)
2734
2735
Another method could be to use "soft curves" instead of "straight
2736
lines" in interpolation. Maybe B-splines or Bezier curves. This is not
2737
used here.
2738
2739
For large sets of data Hoares algorithm would be faster than the
2740
simple straightforward implementation used in C
2741
here. Hoares don't sort all the numbers fully.
2742
2743
B
2744
2745
Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992
2746
2747
Percentile Method 1 Method 2
2748
(Acme::Tools::percentile (Oracle)
2749
and others)
2750
------------- ----------------------------- ---------
2751
0 -2 1
2752
1 -1.61 1.33
2753
25 6.25 6.75
2754
50 (median) 15.5 15.5
2755
75 46.5 41.5
2756
99 1372.19 943.93
2757
100 1429 992
2758
2759
Found like this:
2760
2761
perl -MAcme::Tools -le 'print for percentile([0,1,25,50,75,99,100], 1,4,6,7,8,9,22,24,39,49,555,992)'
2762
2763
And like this in Oracle-databases:
2764
2765
select
2766
percentile_cont(0.00) within group(order by n) per0,
2767
percentile_cont(0.01) within group(order by n) per1,
2768
percentile_cont(0.25) within group(order by n) per25,
2769
percentile_cont(0.50) within group(order by n) per50,
2770
percentile_cont(0.75) within group(order by n) per75,
2771
percentile_cont(0.99) within group(order by n) per99,
2772
percentile_cont(1.00) within group(order by n) per100
2773
from (
2774
select 0+regexp_substr('1,4,6,7,8,9,22,24,39,49,555,992','[^,]+',1,i) n
2775
from dual,(select level i from dual connect by level <= 12)
2776
);
2777
2778
(Oracle also provides a similar function: C where I
2779
is short for I, meaning no interpolation is taking
2780
place. Instead the closest number from the data set is picked.)
2781
2782
=cut
2783
2784
sub percentile {
2785
3
3
1
4
my(@p,@t,@ret);
2786
3
100
9
if(ref($_[0]) eq 'ARRAY'){ @p=@{shift()} }
1
50
2
1
3
2787
2
4
elsif(not ref($_[0])) { @p=(shift()) }
2788
0
0
else{croak()}
2789
3
5
@t=@_;
2790
3
50
7
return if !@p;
2791
3
50
4
croak if !@t;
2792
3
9
@t=sort{$a<=>$b}@t;
72
44
2793
3
50
8
push@t,$t[0] if @t==1;
2794
3
4
for(@p){
2795
9
50
33
28
croak if $_<0 or $_>100;
2796
9
13
my $i=(@t+1)*$_/100-1;
2797
9
50
38
push@ret,
100
100
2798
$i<0 ? $t[0]+($t[1]-$t[0])*$i:
2799
$i>$#t ? $t[-1]+($t[-1]-$t[-2])*($i-$#t):
2800
$i==int($i)? $t[$i]:
2801
$t[$i]*(int($i+1)-$i) + $t[$i+1]*($i-int($i));
2802
}
2803
3
100
36
return @p==1 ? $ret[0] : @ret;
2804
}
2805
2806
=head1 RANDOM
2807
2808
=head2 random
2809
2810
B One or two arguments.
2811
2812
B
2813
2814
If two integer arguments: returns a random integer between the integers in argument one and two.
2815
2816
If the first argument is an arrayref: returns a random member of that array without changing the array.
2817
2818
If the first argument is an arrayref and there is a second arg: return that many random members of that array
2819
2820
If the first argument is an hashref and there is no second arg: return a random key weighted by the values of that hash
2821
2822
If the first argument is an hashref and there is a second arg: return that many random keys weighted by the values of that hash
2823
2824
If there is no second argument and the first is an integer, a random integer between 0 and that number is returned. Including 0 and the number itself.
2825
2826
B
2827
2828
$dice=random(1,6); # 1, 2, 3, 4, 5 or 6
2829
$dice=random([1..6]); # same as previous
2830
@dice=random([1..6],10); # 10 dice tosses
2831
$dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2}); # weighted dice with 6 being twice as likely as the others
2832
@dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2},10); # 10 weighted dice tosses
2833
print random({head=>0.4999,tail=>0.4999,edge=>0.0002}); # coin toss (sum 1 here but not required to be)
2834
print random(2); # prints 0, 1 or 2
2835
print 2**random(7); # prints 1, 2, 4, 8, 16, 32, 64 or 128
2836
@dice=map random([1..6]), 1..10; # as third example above, but much slower
2837
perl -MAcme::Tools -le 'print for random({head=>0.499,tail=>0.499,edge=>0.002},10000);' | sort | uniq -c
2838
2839
=cut
2840
2841
sub random {
2842
21002
21002
1
35376
my($from,$to)=@_;
2843
21002
18639
my $ref=ref($from);
2844
21002
100
26358
if($ref eq 'ARRAY'){
100
2845
15001
100
56326
my @r=map $$from[rand@$from], 1..$to||1;
2846
15001
100
54115
return @_>1?@r:$r[0];
2847
}
2848
elsif($ref eq 'HASH') {
2849
4001
7947
my @k=keys%$from;
2850
33
66
33
151
my $max;do{no warnings 'uninitialized';$_>$max and $max=$_ or $_<0 and croak"negative weight" for values%$from};
33
33
38
33
66
24984
4001
3073
4001
2228
4001
36591
2851
4001
100
9020
my @r=map {my$r;1 while $$from{$r=$k[rand@k]}
4010
2338
4010
10629
4010
6039
2852
4001
100
10993
return @_>1?@r:$r[0];
2853
}
2854
2000
100
3404
($from,$to)=(0,$from) if @_==1;
2855
2000
50
2957
($from,$to)=($to,$from) if $from>$to;
2856
2000
4899
return int($from+rand(1+$to-$from));
2857
}
2858
#todo?: https://en.wikipedia.org/wiki/Irwin%E2%80%93Hall_distribution
2859
2860
=head2 random_gauss
2861
2862
Returns an pseudo-random number with a Gaussian distribution instead
2863
of the uniform distribution of perls C or C in this
2864
module. The algorithm is a variation of the one at
2865
L which is both faster
2866
and better than adding a long series of C.
2867
2868
Uses perls C function internally.
2869
2870
B 0 - 3 arguments.
2871
2872
First argument: the average of the distribution. Default 0.
2873
2874
Second argument: the standard deviation of the distribution. Default 1.
2875
2876
Third argument: If a third argument is present, C
2877
returns an array of that many pseudo-random numbers. If there is no
2878
third argument, a number (a scalar) is returned.
2879
2880
B One or more pseudo-random numbers with a Gaussian distribution. Also known as a Bell curve or Normal distribution.
2881
2882
Example:
2883
2884
my @I=random_gauss(100, 15, 100000); # produces 100000 pseudo-random numbers, average=100, stddev=15
2885
#my @I=map random_gauss(100, 15), 1..100000; # same but more than three times slower
2886
print "Average is: ".avg(@I)."\n"; # prints a number close to 100
2887
print "Stddev is: ".stddev(@I)."\n"; # prints a number close to 15
2888
2889
my @M=grep $_>100+15*2, @I; # those above 130
2890
print "Percent above two stddevs: ".(100*@M/@I)."%\n"; #prints a number close to 2.2%
2891
2892
Example 2:
2893
2894
my $num=1e6;
2895
my @h; $h[$_/2]++ for random_gauss(100,15, $num);
2896
$h[$_] and printf "%3d - %3d %6d %s\n",
2897
$_*2,$_*2+1,$h[$_],'=' x ($h[$_]*1000/$num)
2898
for 1..200/2;
2899
2900
...prints an example of the famous Bell curve:
2901
2902
44 - 45 70
2903
46 - 47 114
2904
48 - 49 168
2905
50 - 51 250
2906
52 - 53 395
2907
54 - 55 588
2908
56 - 57 871
2909
58 - 59 1238 =
2910
60 - 61 1807 =
2911
62 - 63 2553 ==
2912
64 - 65 3528 ===
2913
66 - 67 4797 ====
2914
68 - 69 6490 ======
2915
70 - 71 8202 ========
2916
72 - 73 10577 ==========
2917
74 - 75 13319 =============
2918
76 - 77 16283 ================
2919
78 - 79 20076 ====================
2920
80 - 81 23742 =======================
2921
82 - 83 27726 ===========================
2922
84 - 85 32205 ================================
2923
86 - 87 36577 ====================================
2924
88 - 89 40684 ========================================
2925
90 - 91 44515 ============================================
2926
92 - 93 47575 ===============================================
2927
94 - 95 50098 ==================================================
2928
96 - 97 52062 ====================================================
2929
98 - 99 53338 =====================================================
2930
100 - 101 52834 ====================================================
2931
102 - 103 52185 ====================================================
2932
104 - 105 50472 ==================================================
2933
106 - 107 47551 ===============================================
2934
108 - 109 44471 ============================================
2935
110 - 111 40704 ========================================
2936
112 - 113 36642 ====================================
2937
114 - 115 32171 ================================
2938
116 - 117 28166 ============================
2939
118 - 119 23618 =======================
2940
120 - 121 19873 ===================
2941
122 - 123 16360 ================
2942
124 - 125 13452 =============
2943
126 - 127 10575 ==========
2944
128 - 129 8283 ========
2945
130 - 131 6224 ======
2946
132 - 133 4661 ====
2947
134 - 135 3527 ===
2948
136 - 137 2516 ==
2949
138 - 139 1833 =
2950
140 - 141 1327 =
2951
142 - 143 860
2952
144 - 145 604
2953
146 - 147 428
2954
148 - 149 275
2955
150 - 151 184
2956
152 - 153 111
2957
154 - 155 67
2958
2959
=cut
2960
2961
sub random_gauss {
2962
1
1
1
3
my($avg,$stddev,$num)=@_;
2963
1
50
4
$avg=0 if !defined $avg;
2964
1
50
3
$stddev=1 if !defined $stddev;
2965
1
50
3
$num=1 if !defined $num;
2966
1
50
5
croak "random_gauss should not have more than 3 arguments" if @_>3;
2967
1
1
my @r;
2968
1
4
while (@r<$num) {
2969
2500
1378
my($x1,$x2,$w);
2970
2500
1349
do {
2971
3215
2066
$x1=2.0*rand()-1.0;
2972
3215
1767
$x2=2.0*rand()-1.0;
2973
3215
3925
$w=$x1*$x1+$x2*$x2;
2974
} while $w>=1.0;
2975
2500
1927
$w=sqrt(-2.0*log($w)/$w) * $stddev;
2976
2500
3733
push @r, $x1*$w + $avg,
2977
$x2*$w + $avg;
2978
}
2979
1
50
10
pop @r if @r > $num;
2980
1
50
13
return $r[0] if @_<3;
2981
1
543
return @r;
2982
}
2983
2984
=head2 mix
2985
2986
Mixes an array in random order. In-place if given an array reference or not if given an array.
2987
2988
C could also have been named C, as in shuffling a deck of cards.
2989
2990
Example:
2991
2992
This:
2993
2994
print mix("a".."z"),"\n" for 1..3;
2995
2996
...could write something like:
2997
2998
trgoykzfqsduphlbcmxejivnwa
2999
qycatilmpgxbhrdezfwsovujkn
3000
ytogrjialbewcpvndhkxfzqsmu
3001
3002
B
3003
3004
=over 4
3005
3006
=item 1.
3007
Either a reference to an array as the only input. This array will then be mixed I. The array will be changed:
3008
3009
This: C<< @a=mix(@a) >> is the same as: C<< mix(\@a) >>.
3010
3011
=item 2.
3012
Or an array of zero, one or more elements.
3013
3014
=back
3015
3016
Note that an input-array which COINCIDENTLY SOME TIMES has one element
3017
(but more other times), and that element is an array-ref, you will
3018
probably not get the expected result.
3019
3020
To check distribution:
3021
3022
perl -MAcme::Tools -le 'print mix("a".."z") for 1..26000'|cut -c1|sort|uniq -c|sort -n
3023
3024
The letters a-z should occur around 1000 times each.
3025
3026
Shuffles a deck of cards: (s=spaces, h=hearts, c=clubs, d=diamonds)
3027
3028
perl -MAcme::Tools -le '@cards=map join("",@$_),cart([qw/s h c d/],[2..10,qw/J Q K A/]); print join " ",mix(@cards)'
3029
3030
(Uses L, which is not a typo, see further down here)
3031
3032
Note: C is approximately four times faster. Both respects the Perl built-in C.
3033
3034
=cut
3035
3036
sub mix {
3037
1020
50
33
1020
1
3947
if(@_==1 and ref($_[0]) eq 'ARRAY'){ #kun ett arg, og det er ref array
3038
0
0
my $r=$_[0];
3039
0
0
push@$r,splice(@$r,rand(@$r-$_),1) for 0..(@$r-1);
3040
0
0
return $r;
3041
}
3042
else{
3043
1020
1605
my@e=@_;
3044
1020
9491
push@e,splice(@e,rand(@e-$_),1) for 0..$#e;
3045
1020
4560
return @e;
3046
}
3047
}
3048
3049
=head2 pwgen
3050
3051
Generates random passwords.
3052
3053
B 0-n args
3054
3055
* First arg: length of password(s), default 8
3056
3057
* Second arg: number of passwords, default 1
3058
3059
* Third arg: string containing legal chars in password, default A-Za-z0-9,-./&%_!
3060
3061
* Fourth to n'th arg: list of requirements for passwords, default if the third arg is false/undef (so default third arg is used) is:
3062
3063
sub{/^[a-zA-Z0-9].*[a-zA-Z0-9]$/ and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
3064
3065
...meaning the password should:
3066
* start and end with: a letter a-z (lower- or uppercase) or a digit 0-9
3067
* should contain at least one char from each of the groups lower, upper, digit and special char
3068
3069
To keep the default requirement-sub but add additional ones just set the fourth arg to false/undef
3070
and add your own requirements in the fifth arg and forward (examples below). Sub pwgen uses perls
3071
own C internally.
3072
3073
C<< $Acme::Tools::Pwgen_max_sec >> and C<< $Acme::Tools::Pwgen_max_trials >> can be set to adjust for how long
3074
pwgen tries to find a password. Defaults for those are 0.01 and 10000.
3075
Whenever one of the two limits is reached, a first generates a croak.
3076
3077
Examples:
3078
3079
my $pw=pwgen(); # a random 8 chars password A-Z a-z 0-9 ,-./&%!_ (8 is default length)
3080
my $pw=pwgen(12); # a random 12 chars password A-Z a-z 0-9 ,-./&%!_
3081
my @pw=pwgen(0,10); # 10 random 8 chars passwords, containing the same possible chars
3082
my @pw=pwgen(0,1000,'A-Z'); # 1000 random 8 chars passwords containing just uppercase letters from A to Z
3083
3084
pwgen(3); # dies, defaults require chars in each of 4 group (see above)
3085
pwgen(5,1,'A-C0-9', qr/^\D{3}\d{2}$/); # a 5 char string starting with three A, B or Cs and endring with two digits
3086
pwgen(5,1,'ABC0-9',sub{/^\D{3}\d{2}$/}); # same as above
3087
3088
Examples of adding additional requirements to the default ones:
3089
3090
my @pwreq = ( qr/^[A-C]/ );
3091
pwgen(8,1,'','',@pwreq); # use defaults for allowed chars and the standard requirements
3092
# but also demand that the password must start with A, B or C
3093
3094
push @pwreq, sub{ not /[a-z]{3}/i };
3095
pwgen(8,1,'','',@pwreq); # as above and in addition the password should not contain three
3096
# or more consecutive letters (to avoid "offensive" words perhaps)
3097
3098
=cut
3099
3100
our $Pwgen_max_sec=0.01; #max seconds/password before croak (for hard to find requirements)
3101
our $Pwgen_max_trials=10000; #max trials/password before croak (for hard to find requirements)
3102
our $Pwgen_sec=0; #seconds used in last call to pwgen()
3103
our $Pwgen_trials=0; #trials in last call to pwgen()
3104
3515
100
100
3515
0
24889
sub pwgendefreq{/^[a-z].*[a-z\d]$/i and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
100
100
3105
sub pwgen {
3106
60
60
1
3107
my($len,$num,$chars,@req)=@_;
3107
60
100
101
$len||=8;
3108
60
100
85
$num||=1;
3109
60
100
87
$chars||='A-Za-z0-9,-./&%_!';
3110
60
100
50
105
$req[0]||=\&pwgendefreq if !$_[2];
3111
60
1583
$chars=~s/([$_])-([$_])/join("","$1".."$2")/eg for ('a-z','A-Z','0-9');
125
1499
3112
60
95
my($c,$t,@pw,$d)=(length($chars),time_fp());
3113
60
52
($Pwgen_trials,$Pwgen_sec)=(0,0);
3114
TRIAL:
3115
60
100
while(@pw<$num){
3116
13909
100
66
38540
croak "pwgen timeout after $Pwgen_trials trials"
66
3117
if ++$Pwgen_trials >= $Pwgen_max_trials
3118
or ($d=time_fp()-$t) > $Pwgen_max_sec*$num
3119
and $d!~/^\d+$/; #jic int from time_fp
3120
13907
63713
my $pw=join"",map substr($chars,rand($c),1),1..$len;
3121
13907
17403
for my $r (@req){
3122
20556
100
27608
if (ref($r) eq 'CODE' ){ local$_=$pw; &$r() or next TRIAL }
8179
100
6159
8179
50
8044
3123
33
100
33
170
elsif(ref($r) eq 'Regexp'){ no warnings; $pw=~$r or next TRIAL }
33
59
33
5983
12377
39004
3124
0
0
else { croak "pwgen: invalid req type $r ".ref($r) }
3125
}
3126
651
1519
push@pw,$pw;
3127
}
3128
58
65
$Pwgen_sec=time_fp()-$t;
3129
58
100
294
return $pw[0] if $num==1;
3130
7
269
return @pw;
3131
}
3132
3133
# =head1 veci
3134
#
3135
# Perls C takes 1, 2, 4, 8, 16, 32 and possibly 64 as its third argument.
3136
#
3137
# This limitation is removed with C (vec improved, but much slower)
3138
#
3139
# The third argument still needs to be 32 or lower (or possibly 64 or lower).
3140
#
3141
# =cut
3142
#
3143
# sub vecibs ($) {
3144
# my($s,$o,$b,$new)=@_;
3145
# if($b=~/^(1|2|4|8|16|32|64)$/){
3146
# return vec($s,$o,$b)=$new if @_==4;
3147
# return vec($s,$o,$b);
3148
# }
3149
# my $bb=$b<4?4:$b<8?8:$b<16?16:$b<32?32:$b<64?64:die;
3150
# my $ob=int($o*$b/$bb);
3151
# my $v=vec($s,$ob,$bb)*2**$bb+vec($s,$ob+1,$bb);
3152
# $v & (2**$b-1)
3153
# }
3154
3155
3156
=head1 SETS
3157
3158
=head2 distinct
3159
3160
Returns the values of the input list, sorted alfanumerically, but only
3161
one of each value. This is the same as L except uniq does not
3162
sort the returned list.
3163
3164
Example:
3165
3166
print join(", ", distinct(4,9,3,4,"abc",3,"abc")); # 3, 4, 9, abc
3167
print join(", ", distinct(4,9,30,4,"abc",30,"abc")); # 30, 4, 9, abc note: alphanumeric sort
3168
3169
=cut
3170
3171
1
1
1
282
sub distinct { return sort keys %{{map {($_,1)} @_}} }
1
2
7
20
3172
3173
=head2 in
3174
3175
Returns I<1> (true) if first argument is in the list of the remaining arguments. Uses the perl-operator C<< eq >>.
3176
3177
Otherwise it returns I<0> (false).
3178
3179
print in( 5, 1,2,3,4,6); # 0
3180
print in( 4, 1,2,3,4,6); # 1
3181
print in( 'a', 'A','B','C','aa'); # 0
3182
print in( 'a', 'A','B','C','a'); # 1
3183
3184
I guess in perl 5.10 or perl 6 you could use the C<< ~~ >> operator instead.
3185
3186
=head2 in_num
3187
3188
Just as sub L, but for numbers. Internally uses the perl operator C<< == >> instead of C< eq >.
3189
3190
print in(5000, '5e3'); # 0
3191
print in(5000, 5e3); # 1 since 5e3 is converted to 5000 before the call
3192
print in_num(5000, 5e3); # 1
3193
print in_num(5000, '+5.0e03'); # 1
3194
3195
=cut
3196
3197
sub in {
3198
33
33
151
no warnings 'uninitialized';
33
48
33
2292
3199
95
95
1
109
my $val=shift;
3200
95
100
90
for(@_){ return 1 if $_ eq $val }
294
471
3201
6
20
return 0;
3202
}
3203
3204
sub in_num {
3205
33
33
129
no warnings 'uninitialized';
33
45
33
59289
3206
1
1
1
2
my $val=shift;
3207
1
100
3
for(@_){ return 1 if $_ == $val }
5000
5246
3208
0
0
return 0;
3209
}
3210
3211
=head2 union
3212
3213
Input: Two arrayrefs. (Two lists, that is)
3214
3215
Output: An array containing all elements from both input lists, but no element more than once even if it occurs twice or more in the input.
3216
3217
Example, prints 1,2,3,4:
3218
3219
perl -MAcme::Tools -le 'print join ",", union([1,2,3],[2,3,3,4,4])' # 1,2,3,4
3220
3221
=cut
3222
3223
1
1
1
2
sub union { my %seen; grep !$seen{$_}++, map @{shift()},@_ }
1
2
2
12
3224
=head2 minus
3225
3226
Input: Two arrayrefs.
3227
3228
Output: An array containing all elements in the first input array but not in the second.
3229
3230
Example:
3231
3232
perl -MAcme::Tools -le 'print join " ", minus( ["five", "FIVE", 1, 2, 3.0, 4], [4, 3, "FIVE"] )'
3233
3234
Output is C<< five 1 2 >>.
3235
3236
=cut
3237
3238
sub minus {
3239
1
1
0
2
my %seen;
3240
1
2
my %notme=map{($_=>1)}@{$_[1]};
3
6
1
3
3241
1
66
1
grep !$notme{$_}&&!$seen{$_}++, @{$_[0]};
1
24
3242
}
3243
3244
=head2 intersect
3245
3246
Input: Two arrayrefs
3247
3248
Output: An array containing all elements which exists in both input arrays.
3249
3250
Example:
3251
3252
perl -MAcme::Tools -le 'print join" ", intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )' # 4 3 five
3253
3254
Output: C<< 4 3 five >>
3255
3256
=cut
3257
3258
sub intersect {
3259
44
44
1
33
my %first=map{($_=>1)}@{$_[0]};
52
92
44
69
3260
44
38
my %seen;
3261
44
100
30
return grep{$first{$_}&&!$seen{$_}++}@{$_[1]};
50
202
44
35
3262
}
3263
3264
=head2 not_intersect
3265
3266
Input: Two arrayrefs
3267
3268
Output: An array containing all elements member of just one of the input arrays (not both).
3269
3270
Example:
3271
3272
perl -MAcme::Tools -le ' print join " ", not_intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )'
3273
3274
The output is C<< 1 2 >>.
3275
3276
=cut
3277
3278
sub not_intersect {
3279
1
1
1
1
my %code;
3280
my %seen;
3281
1
2
for(@{$_[0]}){$code{$_}|=1}
1
3
5
11
3282
1
3
for(@{$_[1]}){$code{$_}|=2}
1
2
3
4
3283
1
100
1
return grep{$code{$_}!=3&&!$seen{$_}++}(@{$_[0]},@{$_[1]});
8
28
1
2
1
1
3284
}
3285
3286
=head2 uniq
3287
3288
Input: An array of strings (or numbers)
3289
3290
Output: The same array in the same order, except elements which exists earlier in the list.
3291
3292
Same as L but distinct sorts the returned list, I does not.
3293
3294
Example:
3295
3296
my @t=(7,2,3,3,4,2,1,4,5,3,"x","xx","x",02,"07");
3297
print join " ", uniq @t; # prints 7 2 3 4 1 5 x xx 07
3298
3299
=cut
3300
3301
1
1
1
2
sub uniq(@) { my %seen; grep !$seen{$_}++, @_ }
1
19
3302
3303
=head1 HASHES
3304
3305
=head2 subhash
3306
3307
Copies a subset of keys/values from one hash to another.
3308
3309
B First argument is a reference to a hash. The rest of the arguments are a list of the keys of which key/value-pair you want to be copied.
3310
3311
B The hash consisting of the keys and values you specified.
3312
3313
Example:
3314
3315
%population = ( Norway=>5000000, Sweden=>9500000, Finland=>5400000,
3316
Denmark=>5600000, Iceland=>320000,
3317
India => 1.21e9, China=>1.35e9, USA=>313e6, UK=>62e6 );
3318
3319
%scandinavia = subhash( \%population , 'Norway', 'Sweden', 'Denmark' ); # this and
3320
%scandinavia = (Norway=>5000000,Sweden=>9500000,Denmark=>5600000); # this is the same
3321
3322
print "Population of $_ is $scandinavia{$_}\n" for keys %scandinavia;
3323
3324
...prints the populations of the three scandinavian countries.
3325
3326
Note: The values are NOT deep copied when they are references. (Use C<< Storable::dclone() >> to do that).
3327
3328
Note2: For perl version 5.20+ subhashes (hash slices returning keys as well as values) is built in like this:
3329
3330
%scandinavia = %population{'Norway','Sweden','Denmark'};
3331
3332
=cut
3333
3334
sub subhash {
3335
1
1
1
2
my $hr=shift;
3336
1
1
my @r;
3337
1
3
for(@_){ push@r,($_=>$$hr{$_}) }
3
6
3338
1
7
return @r;
3339
}
3340
3341
=head2 hashtrans
3342
3343
B a reference to a hash of hashes
3344
3345
B a hash like the input-hash, but matrix transposed (kind of). Think of it as if X and Y has swapped places.
3346
3347
%h = ( 1 => {a=>33,b=>55},
3348
2 => {a=>11,b=>22},
3349
3 => {a=>88,b=>99} );
3350
print serialize({hashtrans(\%h)},'v');
3351
3352
Gives:
3353
3354
%v=( 'a'=>{'1'=>'33','2'=>'11','3'=>'88'},
3355
'b'=>{'1'=>'55','2'=>'22','3'=>'99'} );
3356
3357
=cut
3358
3359
#Hashtrans brukes automatisk når første argument er -1 i sub hashtabell()
3360
3361
sub hashtrans {
3362
1
1
1
5
my $h=shift;
3363
1
2
my %new;
3364
1
4
for my $k (keys%$h){
3365
3
4
my $r=$$h{$k};
3366
3
5
for(keys%$r){
3367
6
9
$new{$_}{$k}=$$r{$_};
3368
}
3369
}
3370
1
9
return %new;
3371
}
3372
3373
=head2 a2h
3374
3375
B array of arrays
3376
3377
B array of hashes
3378
3379
Transforms an array of arrays (arrayrefs) to an array of hashes (hashrefs).
3380
3381
Example:
3382
3383
my @h = a2h( ['Name', 'Age', 'Gender'], #1st row become keys
3384
['Alice', 20, 'F'],
3385
['Bob', 30, 'M'],
3386
['Eve', undef, 'F'] );
3387
3388
Result array @h:
3389
3390
(
3391
{Name=>'Alice', Age=>20, Gender=>'F'},
3392
{Name=>'Bob', Age=>30, Gender=>'M'},
3393
{Name=>'Eve', Age=>undef, Gender=>'F'},
3394
);
3395
3396
=head2 h2a
3397
3398
B array of hashes
3399
3400
B array of arrays
3401
3402
Opposite of L
3403
3404
=cut
3405
3406
sub a2h {
3407
1
1
1
25
my @col=@{shift@_};
1
4
3408
1
3
map { my%h;@h{@col}=@$_;\%h} @_;
10
6
10
15
10
15
3409
}
3410
3411
sub h2a {
3412
1
1
1
5
my %c;
3413
1
16
map $c{$_}++, keys%$_ for @_;
3414
1
0
8
my @c=sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c;
5
11
3415
1
11
(\@c,map[@$_{@c}],@_);
3416
}
3417
3418
=head1 COMPRESSION
3419
3420
L, L, L, L, L, and L
3421
compresses and uncompresses strings to save space in disk, memory,
3422
database or network transfer. Trades time for space. (Beware of wormholes)
3423
3424
=head2 zipb64
3425
3426
Compresses the input (text or binary) and returns a base64-encoded string of the compressed binary data.
3427
No known limit on input length, several MB has been tested, as long as you've got the RAM...
3428
3429
B One or two strings.
3430
3431
First argument: The string to be compressed.
3432
3433
Second argument is optional: A I string.
3434
3435
B a base64-kodet string of the compressed input.
3436
3437
The use of an optional I string will result in an even
3438
further compressed output in the dictionary string is somewhat similar
3439
to the string that is compressed (the data in the first argument).
3440
3441
If x relatively similar string are to be compressed, i.e. x number
3442
automatic of email responses to some action by a user, it will pay of
3443
to choose one of those x as a dictionary string and store it as
3444
such. (You will also use the same dictionary string when decompressing
3445
using L.
3446
3447
The returned string is base64 encoded. That is, the output is 33%
3448
larger than it has to be. The advantage is that this string more
3449
easily can be stored in a database (without the hassles of CLOB/BLOB)
3450
or perhaps easier transfer in http POST requests (it still needs some
3451
url-encoding, normally). See L and L for the
3452
same without base 64 encoding.
3453
3454
Example 1, normal compression without dictionary:
3455
3456
$txt = "Test av komprimering, hva skjer? " x 10; # ten copies of this norwegian string, $txt is now 330 bytes (or chars rather...)
3457
print length($txt)," bytes input!\n"; # prints 330
3458
$zip = zipb64($txt); # compresses
3459
print length($zip)," bytes output!\n"; # prints 65
3460
print $zip; # prints the base64 string ("noise")
3461
3462
$output=unzipb64($zip); # decompresses
3463
print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
3464
print length($output),"\n"; # prints 330
3465
3466
Example 2, same compression, now with dictionary:
3467
3468
$txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
3469
$dict = "Testing av kompresjon, hva vil skje?"; # dictionary with certain similarities
3470
# of the text to be compressed
3471
$zip2 = zipb64($txt,$dict); # compressing with $dict as dictionary
3472
print length($zip2)," bytes output!\n"; # prints 49, which is less than 65 in ex. 1 above
3473
$output=unzipb64($zip2,$dict); # uses $dict in the decompressions too
3474
print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
3475
3476
3477
Example 3, dictionary = string to be compressed: (out of curiosity)
3478
3479
$txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
3480
$zip3 = zipb64($txt,$txt); # hmm
3481
print length($zip3)," bytes output!\n"; # prints 25
3482
print "Hurra\n" if unzipb64($zip3,$txt) eq $txt; # hipp hipp ...
3483
3484
zipb64() and zipbin() is really just wrappers around L and C & co there.
3485
3486
=cut
3487
3488
sub zipb64 {
3489
9
9
1
1008
require MIME::Base64;
3490
9
898
return MIME::Base64::encode_base64(zipbin(@_));
3491
}
3492
3493
3494
=head2 zipbin
3495
3496
C does the same as C except that zipbin()
3497
does not base64 encode the result. Returns binary data.
3498
3499
See L for documentation.
3500
3501
=cut
3502
3503
sub zipbin {
3504
17
17
1
13395
require Compress::Zlib;
3505
17
30
my($data,$dict)=@_;
3506
17
50
100
145
my $x=Compress::Zlib::deflateInit(-Dictionary=>$dict||'',-Level=>Compress::Zlib::Z_BEST_COMPRESSION()) or croak();
3507
17
50
5433
my($output,$status)=$x->deflate($data); croak() if $status!=Compress::Zlib::Z_OK();
17
50749
3508
17
50
112
my($out,$status2)=$x->flush(); croak() if $status2!=Compress::Zlib::Z_OK();
17
2219
3509
17
609
return $output.$out;
3510
}
3511
3512
=head2 unzipb64
3513
3514
Opposite of L.
3515
3516
Input:
3517
3518
First argument: A string made by L
3519
3520
Second argument: (optional) a dictionary string which where used in L.
3521
3522
Output: The original string (be it text or binary).
3523
3524
See L.
3525
3526
=cut
3527
3528
sub unzipb64 {
3529
2
2
1
7
my($data,$dict)=@_;
3530
2
12
require MIME::Base64;
3531
2
32
unzipbin(MIME::Base64::decode_base64($data),$dict);
3532
}
3533
3534
=head2 unzipbin
3535
3536
C does the same as L except that C
3537
wants a pure binary compressed string as input, not base64.
3538
3539
See L for documentation.
3540
3541
=cut
3542
3543
sub unzipbin {
3544
5
5
1
291
require Compress::Zlib;
3545
5
15
require Carp;
3546
5
11
my($data,$dict)=@_;
3547
5
50
50
43
my $x=Compress::Zlib::inflateInit(-Dictionary=>$dict||'') or croak();
3548
5
594
my($output,$status)=$x->inflate($data);
3549
5
50
507
croak() if $status!=Compress::Zlib::Z_STREAM_END();
3550
5
92
return $output;
3551
}
3552
3553
=head2 gzip
3554
3555
B A string or reference to a string you want to compress. Text or binary.
3556
3557
B The binary compressed representation of that input string.
3558
3559
C is really just a wrapper for C< Compress:Zlib::memGzip() > and uses the same
3560
compression algorithm as the well known GNU program gzip found in most unix/linux/cygwin
3561
distros. Except C does this in-memory. (Both using the C-library C).
3562
3563
writefile( "file.gz", gzip("some string") );
3564
3565
=head2 gunzip
3566
3567
B A binary compressed string or a reference to such a string. I.e. something returned from
3568
C earlier or read from a C<< .gz >> file.
3569
3570
B The original larger non-compressed string. Text or binary.
3571
3572
C is a wrapper for Compress::Zlib::memGunzip()
3573
3574
print gunzip( gzip("some string") ); #some string
3575
3576
=head2 bzip2
3577
3578
Same as L and L except with a different compression algorithm (compresses more but is slower). Wrapper for Compress::Bzip2::memBzip.
3579
3580
Compared to gzip/gunzip, bzip2 compression is much slower, bunzip2 decompression not so much.
3581
3582
See also L, C, C, C.
3583
3584
writefile( "file.bz2", bzip2("some string") );
3585
print bunzip2( bzip2("some string") ); #some string
3586
3587
=head2 bunzip2
3588
3589
Decompressed something compressed by bzip2() or data from a C<.bz2> file. See L.
3590
3591
=cut
3592
3593
8
100
8
1
13352
sub gzip { my $s=shift(); eval"require Compress::Zlib" if !$INC{'Compress/Zlib.pm'}; croak "Compress::Zlib not found" if $@; Compress::Zlib::memGzip( ref($s)?$s:\$s ) }
8
50
263
8
50
108940
8
47
3594
3
50
3
1
12898
sub gunzip { my $s=shift(); eval"require Compress::Zlib" if !$INC{'Compress/Zlib.pm'}; croak "Compress::Zlib not found" if $@; Compress::Zlib::memGunzip( ref($s)?$s:\$s ) }
3
50
18
3
50
9
3
24
3595
0
0
0
1
0
sub bzip2 { my $s=shift(); eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBzip( ref($s)?$s:\$s ) }
0
0
0
0
0
0
0
0
3596
0
0
0
1
0
sub bunzip2 { my $s=shift(); eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBunzip( ref($s)?$s:\$s ) }
0
0
0
0
0
0
0
0
3597
3598
=head1 NET, WEB, CGI-STUFF
3599
3600
=head2 ipaddr
3601
3602
B an IP-number
3603
3604
B either an IP-address I or an empty string
3605
if the DNS lookup didn't find anything.
3606
3607
Example:
3608
3609
perl -MAcme::Tools -le 'print ipaddr("129.240.8.200")' # prints www.uio.no
3610
3611
Uses perls C internally.
3612
3613
C memoizes the results internally (using the
3614
C<%Acme::Tools::IPADDR_memo> hash) so only the first loopup on a
3615
particular IP number might take some time.
3616
3617
Some few DNS loopups can take several seconds.
3618
Most is done in a fraction of a second. Due to this slowness, medium to high traffic web servers should
3619
probably turn off hostname lookups in their logs and just log IP numbers by using
3620
C in Apache C and then use I afterwards if necessary.
3621
3622
=cut
3623
3624
our %IPADDR_memo;
3625
sub ipaddr {
3626
2
2
1
582
my $ipnr=shift;
3627
#NB, 2-tallet på neste kodelinje er ikke det samme på alle os,
3628
#men ser ut til å funke i linux og hpux. Den Riktige Måten(tm)
3629
#er konstanten AF_INET i Socket eller IO::Socket-pakken.
3630
2
66
113005
return $IPADDR_memo{$ipnr} ||= gethostbyaddr(pack("C4",split("\\.",$ipnr)),2);
3631
}
3632
3633
=head2 ipnum
3634
3635
C does the opposite of C
3636
3637
Does an attempt of converting an IP address (hostname) to an IP number.
3638
Uses DNS name servers via perls internal C.
3639
Return empty string (undef) if unsuccessful.
3640
3641
print ipnum("www.uio.no"); # prints 129.240.13.152
3642
3643
Does internal memoization via the hash C<%Acme::Tools::IPNUM_memo>.
3644
3645
=cut
3646
3647
our %IPNUM_memo;
3648
sub ipnum {
3649
1
1
1
2
my $ipaddr=shift;
3650
#croak "No $ipaddr" if !length($ipaddr);
3651
1
50
4
return $IPNUM_memo{$ipaddr} if exists $IPNUM_memo{$ipaddr};
3652
1
50704
my $h=gethostbyname($ipaddr);
3653
#croak "No ipnum for $ipaddr" if !$h;
3654
1
50
15
return if !defined $h;
3655
1
16
my $ipnum = join(".",unpack("C4",$h));
3656
1
50
35
$IPNUM_memo{$ipaddr} = $ipnum=~/^(\d+\.){3}\d+$/ ? $ipnum : undef;
3657
1
13
return $IPNUM_memo{$ipaddr};
3658
}
3659
3660
=head2 webparams
3661
3662
B (optional)
3663
3664
Zero or one input argument: A string of the same type often found behind the first question mark (C<< ? >>) in URLs.
3665
3666
This string can have one or more parts separated by C<&> chars.
3667
3668
Each part consists of C pairs (with the first C<=> char being the separation char).
3669
3670
Both C and C can be url-encoded.
3671
3672
If there is no input argument, C uses C<< $ENV{QUERY_STRING} >> instead.
3673
3674
If also C<< $ENV{QUERY_STRING} >> is lacking, C checks if C<< $ENV{REQUEST_METHOD} eq 'POST' >>.
3675
In that case C<< $ENV{CONTENT_LENGTH} >> is taken as the number of bytes to be read from C
3676
and those bytes are used as the missing input argument.
3677
3678
The environment variables QUERY_STRING, REQUEST_METHOD and CONTENT_LENGTH is
3679
typically set by a web server following the CGI standard (which Apache and
3680
most of them can do I guess) or in mod_perl by Apache. Although you are
3681
probably better off using L. Or C<< $R->args() >> or C<< $R->content() >> in mod_perl.
3682
3683
B
3684
3685
C returns a hash of the key/value pairs in the input argument. Url-decoded.
3686
3687
If an input string has more than one occurrence of the same key, that keys value in the returned hash will become concatenated each value separated by a C<,> char. (A comma char)
3688
3689
Examples:
3690
3691
use Acme::Tools;
3692
my %R=webparams();
3693
print "Content-Type: text/plain\n\n"; # or rather \cM\cJ\cM\cJ instead of \n\n to be http-compliant
3694
print "My name is $R{name}";
3695
3696
Storing those four lines in a file in the directory designated for CGI-scripts
3697
on your web server (or perhaps naming the file .cgi is enough), and C
3698
/.../cgi-bin/script> and the URL
3699
L will print
3700
C to the web page.
3701
3702
L will print C.
3703
3704
=cut
3705
3706
sub webparams {
3707
2
2
1
358
my $query=shift();
3708
2
50
7
$query=$ENV{QUERY_STRING} if !defined $query;
3709
2
50
33
7
if(!defined $query and $ENV{REQUEST_METHOD} eq "POST"){
3710
0
0
read(STDIN,$query , $ENV{CONTENT_LENGTH});
3711
0
0
$ENV{QUERY_STRING}=$query;
3712
}
3713
2
3
my %R;
3714
2
11
for(split("&",$query)){
3715
7
50
13
next if !length($_);
3716
7
15
my($nkl,$verdi)=map urldec($_),split("=",$_,2);
3717
7
100
19
$R{$nkl}=exists$R{$nkl}?"$R{$nkl},$verdi":$verdi;
3718
}
3719
2
13
return %R;
3720
}
3721
3722
=head2 urlenc
3723
3724
Input: a string
3725
3726
Output: the same string URL encoded so it can be sent in URLs or POST requests.
3727
3728
In URLs (web addresses) certain characters are illegal. For instance I and I.
3729
And certain other chars have special meaning, such as C<+>, C<%>, C<=>, C>, C<&>.
3730
3731
These illegal and special chars needs to be encoded to be sent in
3732
URLs. This is done by sending them as C<%> and two hex-digits. All
3733
chars can be URL encodes this way, but it's necessary just on some.
3734
3735
Example:
3736
3737
$search="Østdal, Åge";
3738
my $url="http://machine.somewhere.com/search?q=" . urlenc($search);
3739
print $url;
3740
3741
Prints C<< http://machine.somewhere.com/search?q=%D8stdal%2C%20%C5ge >>
3742
3743
=cut
3744
3745
sub urlenc {
3746
4
4
1
787
my $str=shift;
3747
4
13
$str=~s/([^\w\-\.\/\,\[\]])/sprintf("%%%02x",ord($1))/eg; #more chars is probably legal...
622
802
3748
4
21
return $str;
3749
}
3750
3751
=head2 urldec
3752
3753
Opposite of L.
3754
3755
Example, this returns 'C< ø>'. That is space and C<< ø >>.
3756
3757
urldec('+%C3')
3758
3759
=cut
3760
3761
sub urldec {
3762
14
14
1
11
my $str=shift;
3763
14
13
$str=~s/\+/ /gs;
3764
14
75
$str=~s/%([a-f\d]{2})/pack("C", hex($1))/egi;
623
823
3765
14
26
return $str;
3766
}
3767
3768
=head2 ht2t
3769
3770
C is short for I.
3771
3772
This sub extracts an html-C<< >>s and returns its C<< s >>
3773
and C<< s >> as an array of arrayrefs. And strips away any html
3774
inside the C<< s >> as well.
3775
3776
my @table = ht2t($html,'some string occuring before the you want');
3777
3778
Input: One or two arguments.
3779
3780
First argument: the html where a C<< >> is to be found and converted.
3781
3782
Second argument: (optional) If the html contains more than one C<<
3783
>>, and you do not want the first one, applying a second
3784
argument is a way of telling C which to capture: the one with this word
3785
or string occurring before it.
3786
3787
Output: An array of arrayrefs.
3788
3789
C is a quick and dirty way of scraping (or harvesting as it is
3790
also called) data from a web page. Look too L to do this
3791
more accurate.
3792
3793
Example:
3794
3795
use Acme::Tools;
3796
use LWP::Simple;
3797
my $url = "http://en.wikipedia.org/wiki/List_of_countries_by_population";
3798
for( ht2t( get($url), "Countries" ) ) {
3799
my($rank, $country, $pop) = @$_;
3800
$pop =~ s/,//g;
3801
printf "%3d | %-32s | %9d\n", @$_ if $pop>0;
3802
}
3803
3804
Output:
3805
3806
1 | China | 1367740000
3807
2 | India | 1262090000
3808
3 | United States | 319043000
3809
4 | Indonesia | 252164800
3810
5 | Brazil | 203404000
3811
3812
...and so on.
3813
3814
=cut
3815
3816
sub ht2t {
3817
9
100
100
9
1
274
my($f,$s,$r)=@_; 1>@_||@_>3 and croak; $s='' if @_==1;
9
100
362
7
19
3818
7
136
$f=~s,.*?($s).*?(
3819
33
33
22469
my $e=0;$e++ while index($f,$s=chr($e))>=$[;
33
9306
33
95235
7
9
7
49
3820
7
112
$f=~s//\l$1$s/gsi;
3821
7
45
$f=~s/\s*<.*?>\s*/ /gsi;
3822
7
81
my @t=split("r$s",$f);shift @t;
7
9
3823
20
20
19
$r||=sub{s/&(#160|nbsp);/ /g;s/&/&/g;s/^\s*(.*?)\s*$/$1/s;
20
21
20
53
3824
7
100
100
42
s/(\d) (\d)/$1$2/g if /^[\d \.\,]+$/};
20
46
3825
7
15
for(@t){my @r=split/[dh]$s/;shift@r;$_=[map{&$r;$_}@r]}
9
54
9
7
9
14
20
21
20
46
3826
7
46
@t;
3827
}
3828
3829
=head1 FILES, DIRECTORIES
3830
3831
=head2 writefile
3832
3833
Justification:
3834
3835
Perl needs three or four operations to make a file out of a string:
3836
3837
open my $FILE, '>', $filename or die $!;
3838
print $FILE $text;
3839
close($FILE);
3840
3841
This is way simpler:
3842
3843
writefile($filename,$text);
3844
3845
Sub writefile opens the file i binary mode (C) and has two usage modes:
3846
3847
B Two arguments
3848
3849
B is the filename. If the file exists, its overwritten.
3850
If the file can not be opened for writing, a die (a croak really) happens.
3851
3852
B is one of:
3853
3854
=over 4
3855
3856
=item * Either a scaler. That is a normal string to be written to the file.
3857
3858
=item * Or a reference to a scalar. That referred text is written to the file.
3859
3860
=item * Or a reference to an array of scalars. This array is the written to the
3861
file element by element and C<< \n >> is automatically appended to each element.
3862
3863
=back
3864
3865
Alternativelly, you can write several files at once.
3866
3867
Example, this:
3868
3869
writefile('file1.txt','The text....tjo');
3870
writefile('file2.txt','The text....hip');
3871
writefile('file3.txt','The text....and hop');
3872
3873
...is the same as this:
3874
3875
writefile([
3876
['file1.txt','The text....tjo'],
3877
['file2.txt','The text....hip'],
3878
['file3.txt','The text....and hop'],
3879
]);
3880
3881
B Nothing (for the time being). Cs (C really) if something goes wrong.
3882
3883
=cut
3884
3885
#todo: use openstr() as in readfile(), transparently gzip .gz filenames and so on
3886
sub writefile {
3887
9
9
1
70274
my($filename,$text)=@_;
3888
9
50
38
if(ref($filename) eq 'ARRAY'){
3889
0
0
writefile(@$_) for @$filename;
3890
0
0
return;
3891
}
3892
9
50
33
521
open(WRITEFILE,">",$filename) and binmode(WRITEFILE) or croak($!);
3893
9
50
33
54
if(!defined $text or !ref($text)){
0
0
3894
9
746
print WRITEFILE $text;
3895
}
3896
elsif(ref($text) eq 'SCALAR'){
3897
0
0
print WRITEFILE $$text;
3898
}
3899
elsif(ref($text) eq 'ARRAY'){
3900
0
0
print WRITEFILE "$_\n" for @$text;
3901
}
3902
else {
3903
0
0
croak;
3904
}
3905
9
201
close(WRITEFILE);
3906
9
1419
return;
3907
}
3908
3909
=head2 readfile
3910
3911
Just as with L you can read in a whole file in one operation with C. Instead of:
3912
3913
open my $FILE,'<', $filename or die $!;
3914
my $data = join"",<$FILE>;
3915
close($FILE);
3916
3917
This is simpler:
3918
3919
my $data = readfile($filename);
3920
3921
B
3922
3923
Reading the content of the file to a scalar variable: (Any content in C<$data> will be overwritten)
3924
3925
my $data;
3926
readfile('filename.txt',\$data);
3927
3928
Reading the lines of a file into an array:
3929
3930
my @lines;
3931
readfile('filnavn.txt',\@lines);
3932
for(@lines){
3933
...
3934
}
3935
3936
Note: Chomp is done on each line. That is, any newlines (C<< \n >>) will be removed.
3937
If C<@lines> is non-empty, this will be lost.
3938
3939
Sub readfile is context aware. If an array is expected it returns an array of the lines without a trailing C<< \n >>.
3940
The last example can be rewritten:
3941
3942
for(readfile('filnavn.txt')){
3943
...
3944
}
3945
3946
With two input arguments, nothing (undef) is returned from C.
3947
3948
=cut
3949
3950
#http://blogs.perl.org/users/leon_timmermans/2013/05/why-you-dont-need-fileslurp.html
3951
#todo: readfile with grep-filter code ref in a third arg (avoid reading all into mem)
3952
3953
sub readfile {
3954
12
12
1
8067
my($filename,$ref)=@_;
3955
12
100
25
if(@_==1){
3956
6
100
15
if(wantarray){ my @data; readfile($filename,\@data); return @data }
1
1
1
4
1
3855
3957
5
5
else { my $data; readfile($filename,\$data); return $data }
5
343
5
400
3958
}
3959
else {
3960
6
50
35
open my $fh,openstr($filename) or croak("ERROR: readfile $! $?");
3961
6
100
31
if ( ref($ref) eq 'SCALAR') { $$ref=join"",<$fh> }
5
50
5383
3962
1
12
elsif( ref($ref) eq 'ARRAY' ) { while(my $l=<$fh>){ chomp($l); push @$ref, $l } }
20003
12022
20003
31678
3963
0
0
else { croak "ERROR: Second arg to readfile should be a ref to a scalar og array" }
3964
6
766
close($fh);
3965
6
33
return;#?
3966
}
3967
}
3968
3969
=head2 readdirectory
3970
3971
B
3972
3973
Name of a directory.
3974
3975
B
3976
3977
A list of all files in it, except of C<.> and C<..> (on linux/unix systems, all directories have a C<.> and C<..> directory).
3978
3979
The names of all types of files are returned: normal files, directories, symbolic links,
3980
pipes, semaphores. That is every thing shown by C except C<.> and C<..>
3981
3982
C do not recurce down into subdirectories (but see example below).
3983
3984
B
3985
3986
my @files = readdirectory("/tmp");
3987
3988
B
3989
3990
Sometimes calling the built ins C, C and C seems a tad tedious, since this:
3991
3992
my $dir="/usr/bin";
3993
opendir(D,$dir);
3994
my @files=map "$dir/$_", grep {!/^\.\.?$/} readdir(D);
3995
closedir(D);
3996
3997
Is the same as this:
3998
3999
my @files=readdirectory("/usr/bin");
4000
4001
See also: L
4002
4003
B
4004
4005
On huge directories with perhaps tens or houndreds of thousands of
4006
files, readdirectory() will consume more memory than perls
4007
opendir/readdir. This isn't usually a concern anymore for modern
4008
computers with gigabytes of RAM, but might be the rationale behind
4009
Perls more tedious way created in the 80s. The same argument goes for
4010
file slurping. On the other side it's also a good practice to never
4011
assume to much on available memory and the number of files if you
4012
don't know for certain that enough memory is available whereever your
4013
code is run or that the size of the directory is limited.
4014
4015
B
4016
4017
How to get all files in the C directory including all subdirectories below of any depth:
4018
4019
my @files=("/tmp");
4020
map {-d $_ and unshift @files,$_ or push @files,$_} readdirectory(shift(@files)) while -d $files[0];
4021
4022
...or to avoid symlinks and only get real files:
4023
4024
map {-d and !-l and unshift @files,$_ or -f and !-l and push @files,$_} readdirectory(shift(@files)) while -d $files[0];
4025
4026
=cut
4027
4028
sub readdirectory {
4029
0
0
1
0
my $dir=shift;
4030
0
0
opendir(my $D,$dir);
4031
0
0
my @filer=map "$dir/$_", grep {!/^\.\.?$/} readdir($D);
0
0
4032
0
0
closedir($D);
4033
0
0
return @filer;
4034
}
4035
4036
=head2 basename
4037
4038
The basename and dirname functions behaves like the *nix shell commands with the same names.
4039
4040
B One or two arguments: Filename and an optional suffix
4041
4042
B Returns the filename with any directory and (if given) the suffix removed.
4043
4044
basename('/usr/bin/perl') # returns 'perl'
4045
basename('/usr/local/bin/report.pl','.pl') # returns 'report' since .pl at the end is removed
4046
basename('report2.pl','.pl') # returns 'report2'
4047
basename('report2.pl','.\w+') # returns 'report2.pl', probably not what you meant
4048
basename('report2.pl',qr/.\w+/) # returns 'report2', use qr for regex
4049
4050
=head2 dirname
4051
4052
B A filename including path
4053
4054
B Removes the filename path and returns just the directory path up until but not including
4055
the last /. Return just a one char C<< . >> (period string) if there is no directory in the input.
4056
4057
dirname('/usr/bin/perl') # returns '/usr/bin'
4058
dirname('perl') # returns '.'
4059
4060
=head2 username
4061
4062
Returns the current linux/unix username, for example the string root
4063
4064
print username(); #just (getpwuid($<))[0] but more readable perhaps
4065
4066
=cut
4067
4068
8
100
8
1
1407
sub basename { my($f,$s)=(@_,'');$s=quotemeta($s)if!ref($s);$f=~m,^(.*/)?([^/]*?)($s)?$,;$2 }
8
21
8
146
8
24
4069
2
100
66
2
1
178
sub dirname { $_[0]=~m,^(.*)/,;defined($1) && length($1) ? $1 : '.' }
2
17
4070
0
0
1
0
sub username { (getpwuid($<))[0] }
4071
4072
=head2 wipe
4073
4074
Deletes a file by "wiping" it on the disk. Overwrites the file before deleting. (May not work properly on SSDs)
4075
4076
B
4077
* Arg 1: A filename
4078
* Optional arg 2: number of times to overwrite file. Default is 3 if omitted, 0 or undef
4079
* Optional arg 3: keep (true/false), wipe() but no delete of file
4080
4081
B Same as the C (remove file): 1 for success, 0 or false for failure.
4082
4083
See also: L, L
4084
4085
=cut
4086
4087
sub wipe {
4088
2
2
1
3050
my($file,$times,$keep)=@_;
4089
2
100
8
$times||=3;
4090
2
50
33
48
croak "ERROR: File $file nonexisting\n" if not -f $file or not -e $file;
4091
2
13
my $size=-s$file;
4092
2
50
52
open my $WIFH, '+<', $file or croak "ERROR: Unable to open $file: $!\n";
4093
2
5
binmode($WIFH);
4094
2
5
for(1..$times){
4095
4
13
my $block=chr(int(rand(256))) x 1024;#hm
4096
4
8
for(0..($size/1024)){
4097
73
253
seek($WIFH,$_*1024,0);
4098
73
76
print $WIFH $block;
4099
}
4100
}
4101
2
41
close($WIFH);
4102
2
100
109
$keep || unlink($file);
4103
}
4104
4105
=head2 chall
4106
4107
Does chmod + utime + chown on one or more files.
4108
4109
Returns the number of files of which those operations was successful.
4110
4111
Mode, uid, gid, atime and mtime are set from the array ref in the first argument.
4112
4113
The first argument references an array which is exactly like an array returned from perls internal C -function.
4114
4115
Example:
4116
4117
my @stat=stat($filenameA);
4118
chall( \@stat, $filenameB, $filenameC, ... ); # by stat-array
4119
chall( $filenameA, $filenameB, $filenameC, ... ); # by file name
4120
4121
Copies the chmod, owner, group, access time and modify time from file A to file B and C.
4122
4123
See C, C, C, C
4124
4125
=cut
4126
4127
4128
sub chall {
4129
my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks )
4130
12
100
12
1
10856
= ref($_[0]) ? @{shift()} : stat(shift());
1
3
4131
12
20
my $successful=0;
4132
12
50
33
31
for(@_){ chmod($mode,$_) && utime($atime,$mtime,$_) && chown($uid,$gid,$_) && $successful++ }
12
33
607
4133
12
45
return $successful;
4134
}
4135
4136
=head2 makedir
4137
4138
Input: One or two arguments.
4139
4140
Works like perls C except that C will create nesessary parent directories if they dont exists.
4141
4142
First input argument: A directory name (absolute, starting with C< / > or relative).
4143
4144
Second input argument: (optional) permission bits. Using the normal C<< 0777^umask() >> as the default if no second input argument is provided.
4145
4146
Example:
4147
4148
makedir("dirB/dirC")
4149
4150
...will create directory C if it does not already exists, to be able to create C inside C.
4151
4152
Returns true on success, otherwise false.
4153
4154
C memoizes directories it has checked for existence before (trading memory and for speed).
4155
Thus directories removed during running the script is not discovered by makedir.
4156
4157
See also C<< perldoc -f mkdir >>, C<< man umask >>
4158
4159
=cut
4160
4161
our %MAKEDIR;
4162
4163
sub makedir {
4164
0
0
1
0
my($d,$p,$dd)=@_;
4165
0
0
0
$p=0777^umask() if !defined$p;
4166
(
4167
$MAKEDIR{$d} or -d$d or mkdir($d,$p) #or croak("mkdir $d, $p")
4168
or ($dd)=($d=~m,^(.+)/+([^/]+)$,) and makedir($dd,$p) and mkdir($d,$p) #or die;
4169
0
0
0
0
) and ++$MAKEDIR{$d};
0
0
0
0
4170
}
4171
4172
=head2 md5sum
4173
4174
B a filename.
4175
4176
B a string of 32 hexadecimal chars from 0-9 or a-f.
4177
4178
Example, the md5sum gnu/linux command without options could be implementet like this:
4179
4180
use Acme::Tools;
4181
print eval{ md5sum($_)." $_\n" } || $@ for @ARGV;
4182
4183
This sub requires L, which is a core perl-module since
4184
version 5.?.? It does not slurp the files or spawn new processes.
4185
4186
=cut
4187
4188
sub md5sum {
4189
2
2
1
16
require Digest::MD5;
4190
2
4
my $fn=shift;
4191
2
100
176
croak "md5sum: $fn is a directory (no md5sum)" if -d $fn;
4192
1
50
23
open my $FH, '<', $fn or croak "Could not open file $fn for md5sum() $!";
4193
1
2
binmode($FH);
4194
1
2
my $r = eval { Digest::MD5->new->addfile($FH)->hexdigest };
1
17
4195
1
50
3
croak "md5sum on $fn failed ($@)\n" if $@;
4196
1
13
$r;
4197
}
4198
4199
=head2 read_conf
4200
4201
B A file name or a reference to a string with settings in the format described below.
4202
4203
B A reference to a hash. This hash will have the settings from the file (or stringref).
4204
The hash do not have to be empty beforehand.
4205
4206
Returns a hash with the settings as in this examples:
4207
4208
my %conf = read_conf('/etc/your/thing.conf');
4209
print $conf{sectionA}{knobble}; #prints ABC if the file is as shown below
4210
print $conf{sectionA}{gobble}; #prints ZZZ, the last gobble
4211
print $conf{switch}; #prints OK here as well, unsectioned value
4212
print $conf{part2}{password}; #prints oh:no= x
4213
4214
File use for the above example:
4215
4216
switch: OK #before first section, the '' (empty) section
4217
[sectionA]
4218
knobble: ABC
4219
gobble: XYZ #this gobble is overwritten by the gobble on the next line
4220
gobble: ZZZ
4221
[part2]
4222
password: oh:no= x #should be better
4223
text: { values starting with { continues
4224
until reaching a line with }
4225
4226
Everything from # and behind is regarded comments and ignored. Comments can be on any line.
4227
To keep a # char, put a \ in front of it.
4228
4229
A C< : > or C< = > separates keys and values. Spaces at the beginning or end of lines are
4230
ignored (after removal of #comments), as are any spaces before and after : and = separators.
4231
4232
Empty lines or lines with no C< : > or C< = > is also ignored. Keys and values can contain
4233
internal spaces and tabs, but not at the beginning or end.
4234
4235
Multi-line values must start and end with { and }. Using { and } keep spaces at the start
4236
or end in both one-line and multi-line values.
4237
4238
Sections are marked with C<< [sectionname] >>. Section names, keys and values is case
4239
sensitive. C above the first section or below and empty C<< [] >> is placed
4240
both in the empty section in the returned hash and as top level key/values.
4241
4242
C can be a simpler alternative to the core module L which has
4243
its own hassles.
4244
4245
$Acme::Tools::Read_conf_empty_section=1; #default 0 (was 1 in version 0.16)
4246
my %conf = read_conf('/etc/your/thing.conf');
4247
print $conf{''}{switch}; #prints OK with the file above
4248
print $conf{switch}; #prints OK here as well
4249
4250
=cut
4251
4252
our $Read_conf_empty_section=0;
4253
sub read_conf {
4254
3
3
1
15
my($fn,$hr)=(@_,{});
4255
3
100
11
my $conf=ref($fn)?$$fn:readfile($fn);
4256
3
104
$conf=~s,\s*(?
4257
3
29
my($section,@l)=('',split"\n",$conf);
4258
3
9
while(@l) {
4259
63
42
my $l=shift@l;
4260
63
100
219
if( $l=~/^\s*\[\s*(.*?)\s*\]/ ) {
100
4261
12
12
$section=$1;
4262
12
100
41
$$hr{$1}||={};
4263
}
4264
elsif( $l=~/^\s*([^\:\=]+)[:=]\s*(.*?)\s*$/ ) {
4265
33
66
33
61
my $ml=sub{my$v=shift;$v.="\n".shift@l while $v=~/^\{[^\}]*$/&&@l;$v=~s/^\{(.*)\}\s*$/$1/s;$v=~s,\\#,#,g;$v};
33
34
33
106
33
42
33
22
33
41
4266
33
39
my $v=&$ml($2);
4267
33
100
66
89
$$hr{$section}{$1}=$v if length($section) or $Read_conf_empty_section;
4268
33
100
108
$$hr{$1}=$v if !length($section);
4269
}
4270
}
4271
3
20
%$hr;
4272
}
4273
# my $incfn=sub{return $1 if $_[0]=~m,^(/.+),;my$f=$fn;$f=~s,[^/]+$,$_[0],;$f};
4274
# s,]+)>,"".readfile(&$incfn($1)),eg; #todo
4275
4276
4277
=head2 openstr
4278
4279
# returned from openstr:
4280
open my $FH, openstr("fil.txt") or die; # fil.txt
4281
open my $FH, openstr("fil.gz") or die; # zcat fil.gz |
4282
open my $FH, openstr("fil.bz2") or die; # bzcat fil.bz2 |
4283
open my $FH, openstr("fil.xz") or die; # xzcat fil.xz |
4284
open my $FH, openstr(">fil.txt") or die; # > fil.txt
4285
open my $FH, openstr(">fil.gz") or die; # | gzip > fil.gz
4286
open my $FH, openstr(">fil.bz2") or die; # | bzip2 > fil.bz2
4287
open my $FH, openstr(">fil.xz") or die; # | xz > fil.bz2
4288
4289
Environment variable PATH is used. So in the examples above, /bin/gzip
4290
is returned instead of gzip if /bin is the first directory in
4291
$ENV{PATH} containing an executable file gzip. Dirs /usr/bin, /bin and
4292
/usr/local/bin is added to PATH in openstr(). They are checked even if
4293
PATH is empty.
4294
4295
=cut
4296
4297
our @Openstrpath=(grep$_,split(":",$ENV{PATH}),qw(/usr/bin /bin /usr/local/bin));
4298
sub openstr {
4299
22
22
1
2944
my($fn,$ext)=(shift()=~/^(.*?(?:\.(t?gz|bz2|xz))?)$/i);
4300
22
100
235
return $fn if !$ext;
4301
12
50
12
37
my $prog=sub{@Openstrpath or return $_[0];(grep -x$_, map "$_/$_[0]", @Openstrpath)[0] or croak"$_[0] not found"};
12
100
20
12
1316
4302
$fn =~ /^\s*>/
4303
? "| ".(&$prog({qw/gz gzip bz2 bzip2 xz xz tgz gzip/ }->{lc($ext)})).$fn
4304
12
100
68
: &$prog({qw/gz zcat bz2 bzcat xz xzcat tgz zcat/}->{lc($ext)})." $fn |";
4305
}
4306
4307
=head1 TIME FUNCTIONS
4308
4309
=head2 tms
4310
4311
Timestring, works somewhat like the Gnu/Linux C command and Oracle's C
4312
4313
Converts timestamps to more readable forms of time strings.
4314
4315
Converts seconds since I and time strings on the form C to other forms.
4316
4317
B One, two or three arguments.
4318
4319
B A format string.
4320
4321
B An epock C number or a time
4322
string of the form YYYYMMDD-HH24:MI:SS or YYYYMMDDTHH:MI:SS or
4323
YYYY-MM-DDTHH:MI:SS (in which T is litteral and HH is the 24-hour
4324
version of hours) or YYYYMMDD. Uses the current C if the
4325
second argument is missing.
4326
4327
TODO: Formats with % as in C (C<%Y%m%d> and so on)
4328
4329
B True or false. If true and first argument
4330
is eight digits: Its interpreted as a date like YYYYMMDD time string,
4331
not an epoch time. If true and first argument is six digits its
4332
interpreted as a date like DDMMYY (not YYMMDD!).
4333
4334
B a date or clock string on the wanted form.
4335
4336
B
4337
4338
Prints C<< 3. july 1997 >> if thats the dato today:
4339
4340
perl -MAcme::Tools -le 'print timestr("D. month YYYY")'
4341
4342
print tms("HH24:MI"); # prints 23:55 if thats the time now
4343
tms("HH24:MI",time()); # ...same,since time() is the default
4344
tms("HH:MI",time()-5*60); # 23:50 if that was the time 5 minutes ago
4345
tms("HH:MI",time()-5*60*60); # 18:55 if thats the time 5 hours ago
4346
tms("Day Month Dth YYYY HH:MI"); # Saturday July 1st 2004 23:55 (big S, big J)
4347
tms("Day D. Month YYYY HH:MI"); # Saturday 8. July 2004 23:55 (big S, big J)
4348
tms("DAY D. MONTH YYYY HH:MI"); # SATURDAY 8. JULY 2004 23:55 (upper)
4349
tms("dy D. month YYYY HH:MI"); # sat 8. july 2004 23:55 (small s, small j)
4350
tms("Dy DD. MON YYYY HH12:MI am"); # Sat 08. JUL 2004 11:55 pm (HH12, am becomes pm if after 12)
4351
tms("DD-MON-YYYY"); # 03-MAY-2004 (mon, english)
4352
4353
The following list of codes in the first argument will be replaced:
4354
4355
YYYY Year, four digits
4356
YY Year, two digits, i.e. 04 instead of 2004
4357
yyyy Year, four digits, but nothing if its the current year
4358
YYYY|HH:MI Year if its another year than the current, a time in hours and minutes elsewise
4359
MM Month, two digits. I.e. 08 for August
4360
DD Day of month, two digits. I.e. 01 (not 1) for the first day in a month
4361
D Day of month, one digit. I.e. 1 (not 01)
4362
HH Hour. From 00 to 23.
4363
HH24 Same as HH.
4364
HH12 12 becomes 12 (never 00), 13 becomes 01, 14 02 and so on.
4365
Note: 00 after midnight becomes 12 (am). Tip: always include the code
4366
am in a format string that uses HH12.
4367
MI Minutt. Fra 00 til 59.
4368
SS Sekund. Fra 00 til 59.
4369
am Becomes am or pm
4370
pm Same
4371
AM Becomes AM or PM (upper case)
4372
PM Same
4373
4374
Month The full name of the month in English from January to December
4375
MONTH Same in upper case (JANUARY)
4376
month Same in lower case (january)
4377
Mont Jan Feb Mars Apr May June July Aug Sep Oct Nov Dec
4378
Mont. Jan. Feb. Mars Apr. May June July Aug. Sep. Oct. Nov. Dec. (always four chars)
4379
Mon Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec (always three chars)
4380
4381
Day The full name of the weekday. Sunday to Saturday
4382
Dy Three letters: Sun Mon Tue Wed Thu Fri Sat
4383
DAY Upper case
4384
DY Upper case
4385
Dth 1st 2nd 3rd 4th 5th ... 11th 12th ... 20th 21st 22nd 23rd 24th ... 30th 31st
4386
4387
WW Week number of the year 01-53 according to the ISO8601-definition (which most countries uses)
4388
WWUS Week number of the year 01-53 according to the most used definition in the USA.
4389
Other definitions also exists.
4390
4391
epoch Converts a time string from YYYYMMDD-HH24:MI:SS, YYYYMMDD-HH24:MI:SS, YYYYMMDDTHH:MI:SS,
4392
YYYY-MM-DDTHH:MI:SS or YYYYMMDD to the number of seconds since January 1st 1970.
4393
Commonly known as the Unix epoch.
4394
4395
JDN Julian day number. Integer. The number of days since the day starting at noon on January 1 4713 BC
4396
JD Same as JDN but a float accounting for the time of day
4397
4398
TODO: sub smt() (tms backward... or something better named, converts the other way)
4399
As to_date and to_char in Oracle. Se maybe L instead
4400
4401
B (optional) Is_date. False|true, default false. If true, the second argument is
4402
interpreted as a date of the form YYYYMMDD, not as a number of seconds since epoch (January 1st 1970).
4403
4404
=cut
4405
4406
#Se også L og L
4407
4408
our $Tms_pattern;
4409
our %Tms_str=
4410
('MÅNED' => [4, 'JANUAR','FEBRUAR','MARS','APRIL','MAI','JUNI','JULI',
4411
'AUGUST','SEPTEMBER','OKTOBER','NOVEMBER','DESEMBER' ],
4412
'Måned' => [4, 'Januar','Februar','Mars','April','Mai','Juni','Juli',
4413
'August','September','Oktober','November','Desember'],
4414
'måned' => [4, 'januar','februar','mars','april','mai','juni','juli',
4415
'august','september','oktober','november','desember'],
4416
'MÅNE.' => [4, 'JAN.','FEB.','MARS','APR.','MAI','JUNI','JULI','AUG.','SEP.','OKT.','NOV.','DES.'],
4417
'Måne.' => [4, 'Jan.','Feb.','Mars','Apr.','Mai','Juni','Juli','Aug.','Sep.','Okt.','Nov.','Des.'],
4418
'måne.' => [4, 'jan.','feb.','mars','apr.','mai','juni','juli','aug.','sep.','okt.','nov.','des.'],
4419
'MÅNE' => [4, 'JAN','FEB','MARS','APR','MAI','JUNI','JULI','AUG','SEP','OKT','NOV','DES'],
4420
'Måne' => [4, 'Jan','Feb','Mars','Apr','Mai','Juni','Juli','Aug','Sep','Okt','Nov','Des'],
4421
'måne' => [4, 'jan','feb','mars','apr','mai','juni','juli','aug','sep','okt','nov','des'],
4422
'MÅN' => [4, 'JAN','FEB','MAR','APR','MAI','JUN','JUL','AUG','SEP','OKT','NOV','DES'],
4423
'Mån' => [4, 'Jan','Feb','Mar','Apr','Mai','Jun','Jul','Aug','Sep','Okt','Nov','Des'],
4424
'mån' => [4, 'jan','feb','mar','apr','mai','jun','jul','aug','sep','okt','nov','des'],
4425
'MONTH' => [4, 'JANUARY','FEBRUARY','MARCH','APRIL','MAY','JUNE','JULY',
4426
'AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER'],
4427
'Month' => [4, 'January','February','March','April','May','June','July',
4428
'August','September','October','November','December'],
4429
'month' => [4, 'january','february','march','april','may','june','july',
4430
'august','september','october','november','december'],
4431
'MONT.' => [4, 'JAN.','FEB.','MAR.','APR.','MAY','JUNE','JULY','AUG.','SEP.','OCT.','NOV.','DEC.'],
4432
'Mont.' => [4, 'Jan.','Feb.','Mar.','Apr.','May','June','July','Aug.','Sep.','Oct.','Nov.','Dec.'],
4433
'mont.' => [4, 'jan.','feb.','mar.','apr.','may','june','july','aug.','sep.','oct.','nov.','dec.'],
4434
'MONT' => [4, 'JAN','FEB','MAR','APR','MAY','JUNE','JULY','AUG','SEP','OCT','NOV','DEC'],
4435
'Mont' => [4, 'Jan','Feb','Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec'],
4436
'mont' => [4, 'jan','feb','mar','apr','may','june','july','aug','sep','oct','nov','dec'],
4437
'MON' => [4, 'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'],
4438
'Mon' => [4, 'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],
4439
'mon' => [4, 'jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'],
4440
'DAY' => [6, 'SUNDAY','MONDAY','TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY'],
4441
'Day' => [6, 'Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'],
4442
'day' => [6, 'sunday','monday','tuesday','wednesday','thursday','friday','saturday'],
4443
'DY' => [6, 'SUN','MON','TUE','WED','THU','FRI','SAT'],
4444
'Dy' => [6, 'Sun','Mon','Tue','Wed','Thu','Fri','Sat'],
4445
'dy' => [6, 'sun','mon','tue','wed','thu','fri','sat'],
4446
'DAG' => [6, 'SØNDAG','MANDAG','TIRSDAG','ONSDAG','TORSDAG','FREDAG','LØRDAG'],
4447
'Dag' => [6, 'Søndag','Mandag','Tirsdag','Onsdag','Torsdag','Fredag','Lørdag'],
4448
'dag' => [6, 'søndag','mandag','tirsdag','onsdag','torsdag','fredag','lørdag'],
4449
'DG' => [6, 'SØN','MAN','TIR','ONS','TOR','FRE','LØR'],
4450
'Dg' => [6, 'Søn','Man','Tir','Ons','Tor','Fre','Lør'],
4451
'dg' => [6, 'søn','man','tir','ons','tor','fre','lør'],
4452
);
4453
my $_tms_inited=0;
4454
sub tms_init {
4455
1
50
1
0
2
return if $_tms_inited++;
4456
1
3
for(qw(MAANED Maaned maaned MAAN Maan maan),'MAANE.','Maane.','maane.'){
4457
9
15
$Tms_str{$_}=$Tms_str{replace($_,"aa","å","AA","Å")};
4458
}
4459
45
40
$Tms_pattern=join("|",map{quotemeta($_)}
4460
1
12
sort{length($b)<=>length($a)}
194
115
4461
keys %Tms_str);
4462
#uten sort kan "måned" bli "mared", fordi "mån"=>"mar"
4463
}
4464
4465
0
0
sub totime {
4466
4467
}
4468
4469
sub date_ok {
4470
0
0
0
0
my($y,$m,$d)=@_;
4471
0
0
0
0
return date_ok($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
4472
0
0
0
return 0 if $y!~/^\d\d\d\d$/;
4473
0
0
0
0
return 0 if $m<1||$m>12||$d<1||$d>(31,$y%4||$y%100==0&&$y%400?28:29,31,30,31,30,31,31,30,31,30,31)[$m-1];
0
0
0
0
4474
0
0
return 1;
4475
}
4476
4477
sub weeknum {
4478
0
0
0
0
0
return weeknum(tms('YYYYMMDD')) if @_<1;
4479
0
0
0
0
return weeknum($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
4480
0
0
my($year,$month,$day)= @_;
4481
0
0
eval{
4482
0
0
0
if(@_<2){
0
4483
0
0
0
if($year=~/^\d{8}$/) { ($year,$month,$day)=unpack("A4A2A2",$year) }
0
0
0
4484
0
0
elsif($year>99999999){ ($year,$month,$day)=(localtime($year))[5,4,3]; $year+=1900; $month++ }
0
0
0
0
4485
0
0
else {die}
4486
}
4487
0
0
elsif(@_!=3){croak}
4488
0
0
0
croak if !date_ok(sprintf("%04d%02d%02d",$year,$month,$day));
4489
};
4490
0
0
0
croak "ERROR: Wrong args Acme::Tools::weeknum(".join(",",@_).")" if $@;
4491
33
33
15779
use integer;#heltallsdivisjon
33
276
33
126
4492
0
0
my $y=$year+4800-(14-$month)/12;
4493
0
0
my $j=$day+(153*($month+(14-$month)/12*12-3)+2)/5+365*$y+$y/4-$y/100+$y/400-32045;
4494
0
0
my $d=($j+31741-$j%7)%146097%36524%1461;
4495
0
0
return (($d-$d/1460)%365+$d/1460)/7+1;
4496
}
4497
4498
#perl -MAcme::Tools -le 'print "$_ ".tms($_."0501","day",1) for 2015..2026'
4499
4500
sub tms {
4501
42
50
66
42
1
5350
return undef if @_>1 and not defined $_[1]; #time=undef => undef
4502
42
100
63
if(@_==1){
4503
2
9
my @lt=localtime();
4504
2
50
5
$_[0] eq 'YYYY' and return 1900+$lt[5];
4505
2
50
4
$_[0] eq 'YYYYMMDD' and return sprintf("%04d%02d%02d",1900+$lt[5],1+$lt[4],$lt[3]);
4506
2
100
66
30
$_[0] =~ $Re_isnum and @lt=localtime($_[0]) and return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]);
4507
}
4508
41
44
my($format,$time,$is_date)=@_;
4509
41
100
60
$time=time_fp() if !defined$time;
4510
41
100
100
245
($time,$format)=($format,$time) if @_>=2 and $format=~/^[\d+\:\-\.]+$/; #swap /hm/
4511
41
193
my @lt=localtime($time);
4512
#todo? $is_date=0 if $time=~s/^\@(\-?\d)/$1/; #@n where n is sec since epoch makes it clear that its not a formatted, as in `date`
4513
#todo? date --date='TZ="America/Los_Angeles" 09:00 next Fri' #`info date`
4514
# Fri Nov 13 18:00:00 CET 2015
4515
#date --date="next Friday" #--date or -d
4516
#date --date="last friday"
4517
#date --date="2 days ago"
4518
#date --date="yesterday" #or tomorrow
4519
#date --date="-1 day" #date --date='10 week'
4520
4521
41
50
57
if( $is_date ){
4522
0
0
0
0
my $yy2c=sub{10+$_[0]>$lt[5]%100?"20":"19"}; #hm 10+
0
0
4523
0
0
0
$time=totime(&$yy2c($1)."$1$2$3")."000000" if $time=~/^(\d\d)(\d\d)(\d\d)$/;
4524
0
0
0
$time=totime("$1$2${3}000000") if $time=~/^((?:18|19|20)\d\d)(\d\d)(\d\d)$/; #hm 18-20?
4525
}
4526
else {
4527
41
50
90
$time = yyyymmddhh24miss_time("$1$2$3$4$5$6") #yyyymmddhh24miss_time ???
4528
if $time=~/^((?:19|20|18)\d\d) #yyyy
4529
(0[1-9]|1[012]) #mm
4530
(0[1-9]|[12]\d|3[01]) \-? #dd
4531
([01]\d|2[0-3]) \:? #hh24
4532
([0-5]\d) \:? #mi
4533
([0-5]\d) $/x; #ss
4534
}
4535
41
100
65
tms_init() if !$_tms_inited;
4536
41
100
60
return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]) if !$format;
4537
40
349
my %p=('%'=>'%',
4538
a=>'Dy',
4539
A=>'Day',
4540
b=>'Mon',
4541
b=>'Month',
4542
c=>'Dy Mon D HH:MI:SS YYYY',
4543
C=>'CC',
4544
d=>'DD',
4545
D=>'MM/DD/YY',
4546
e=>'D',
4547
F=>'YYYY-MM-DD',
4548
#G=>'',
4549
h=>'Month', H=>'HH24', I=>'HH12',
4550
j=>'DoY', #day of year
4551
k=>'H24', _H=>'H24',
4552
l=>'H12', _I=>'H12',
4553
m=>'MM', M=>'MI',
4554
n=>"\n",
4555
#N=>'NS', #sprintf%09d,1e9*(time_fp()-time()) #000000000..999999999
4556
p=>'AM', #AM|PM upper (yes, opposite: date +%H%M%S%P%p)
4557
P=>'am', #am|pm lower
4558
S=>'SS',
4559
t=>"\t",
4560
T=>'HH24:MI:SS',
4561
u=>'DoW', #day of week 1..7, 1=mon 7=sun
4562
w=>'DoW0', #day of week 0..6, 1=mon 0=sun
4563
#U=>'WoYs', #week num of year 00..53, sunday as first day of week
4564
#V=>'UKE', #ISO week num of year 01..53, monday as first day of week
4565
#W=>'WoYm', #week num of year 00..53, monday as first day of week, not ISO!
4566
#x=>$ENV{locale's date representation}, #e.g. MM/DD/YY
4567
#X=>$ENV{locale's time representation}, #e.g. HH/MI/SS
4568
y=>'YY',
4569
Y=>'YYYY',
4570
#z=>'TZHHMI', #time zone hour minute e.g. -0430
4571
#':z'=>'TZHH:MI',
4572
#'::z'=>'TZHH:MI:SS',
4573
#':::z'=>'TZ', #number of :'s necessary precision, e.g. -02 or +03:30
4574
#Z=>'TZN', #e.g. CET, EDT, ...
4575
);
4576
40
178
my $pkeys=join"|",keys%p;
4577
40
1218
$format=~s,\%($pkeys),$p{$1},g;
4578
40
274
$format=~s/($Tms_pattern)/$Tms_str{$1}[1+$lt[$Tms_str{$1}[0]]]/g;
4579
40
51
$format=~s/YYYY / 1900+$lt[5] /gxe;
13
26
4580
40
0
31
$format=~s/(\s?)yyyy / $lt[5]==(localtime)[5]?"":$1.(1900+$lt[5])/gxe;
0
0
4581
40
39
$format=~s/YY / sprintf("%02d",$lt[5]%100) /gxei;
1
5
4582
40
37
$format=~s|CC | sprintf("%02d",(1900+$lt[5])/100) |gxei;
2
10
4583
40
32
$format=~s/MM / sprintf("%02d",$lt[4]+1) /gxe;
5
15
4584
40
27
$format=~s/mm / sprintf("%d",$lt[4]+1) /gxe;
0
0
4585
40
36
$format=~s,M/ , ($lt[4]+1).'/' ,gxe;
2
5
4586
40
26
$format=~s,/M , '/'.($lt[4]+1) ,gxe;
1
3
4587
40
28
$format=~s/DD / sprintf("%02d",$lt[3]) /gxe;
6
15
4588
40
67
$format=~s/d0w|dow0 / $lt[6] /gxei;
4
7
4589
40
100
35
$format=~s/dow / $lt[6]?$lt[6]:7 /gxei;
2
7
4590
40
42
$format=~s/d0y|doy0 / $lt[7] /gxei; #0-364 (365 leap)
2
5
4591
40
60
$format=~s/doy / $lt[7]+1 /gxei; #1-365 (366 leap)
1
3
4592
40
38
$format=~s/D(?![AaGgYyEeNn]) / $lt[3] /gxe; #EN pga desember og wednesday
5
7
4593
40
26
$format=~s/dd / sprintf("%d",$lt[3]) /gxe;
0
0
4594
40
100
50
59
$format=~s/hh12|HH12 / sprintf("%02d",$lt[2]<13?$lt[2]||12:$lt[2]-12)/gxe;
8
40
4595
40
48
$format=~s/HH24|HH24|HH|hh / sprintf("%02d",$lt[2]) /gxe;
4
15
4596
40
42
$format=~s/MI / sprintf("%02d",$lt[1]) /gxei;
12
24
4597
40
100
38
$format=~s{SS\.([1-9]) }{ sprintf("%0*.$1f",3+$1,$lt[0]+(repl($time,qr/^[^\.]+/)||0)) }gxei;
4
22
4598
40
41
$format=~s/SS(?:\.0)? / sprintf("%02d",$lt[0]) /gxei;
5
12
4599
40
100
54
$format=~s/(?:am|pm|apm|xm) / $lt[2]<13 ? 'am' : 'pm' /gxe;
4
6
4600
40
100
40
$format=~s/(?:AM|PM|APM|XM) / $lt[2]<13 ? 'AM' : 'PM' /gxe;
4
9
4601
40
44
$format=~s/WWI|WW / sprintf("%02d",weeknum($time)) /gxei;
0
0
4602
40
31
$format=~s/W / weeknum($time) /gxei;
0
0
4603
40
203
$format;
4604
}
4605
4606
=head2 easter
4607
4608
Input: A year (a four digit number)
4609
4610
Output: array of two numbers: day and month of Easter Sunday that year. Month 3 means March and 4 means April.
4611
4612
sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
4613
(($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
4614
4615
...is a "golfed" version of Oudins algorithm (1940) L
4616
(see also http://www.smart.net/~mmontes/ec-cal.html )
4617
4618
Valid for any Gregorian year. Dates repeat themselves after 70499183
4619
lunations = 2081882250 days = ca 5699845 years. However, our planet will
4620
by then have a different rotation and spin time...
4621
4622
Example:
4623
4624
( $day, $month ) = easter( 2012 ); # $day == 8 and $month == 4
4625
4626
Example 2:
4627
4628
my @e=map sprintf("%02d%02d", reverse(easter($_))), 1800..300000;
4629
print "First: ".min(@e)." Last: ".max(@e)."\n"; # First: 0322 Last: 0425
4630
4631
Note: The Spencer Jones formula differs Oudins used in C in some years
4632
before 1498. However, in that period the Julian calendar with a different formula was
4633
used anyway. Countries introduced the current Gregorian calendar between 1583 and 1926.
4634
4635
=cut
4636
4637
33
33
1
42964
sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
33
13503
42
33
100
13503
84407
13503
8503
13503
12817
4638
13503
100
66
38672
(($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
4639
4640
4641
=head2 time_fp
4642
4643
No input arguments.
4644
4645
Return the same number as perls C except with decimals (fractions of a second, _fp as in floating point number).
4646
4647
print time_fp(),"\n";
4648
print time(),"\n";
4649
4650
Could write:
4651
4652
1116776232.38632
4653
4654
...if that is the time now.
4655
4656
Or just:
4657
4658
1116776232
4659
4660
...from perl's internal C if C isn't installed and available.
4661
4662
4663
=cut
4664
4665
sub time_fp { # {return 0+gettimeofday} is just as well?
4666
14069
50
14069
1
12371
eval{ require Time::HiRes } or return time();
14069
42604
4667
14069
28925
my($sec,$mic)=Time::HiRes::gettimeofday();
4668
14069
30858
return $sec+$mic/1e6; #1e6 not portable?
4669
}
4670
4671
sub timems {
4672
0
0
0
0
0
eval{ require Time::HiRes } or return time();
0
0
4673
0
0
my($sec,$mic)=Time::HiRes::gettimeofday();
4674
0
0
return $sec*1000+$mic/1e3;
4675
}
4676
4677
=head2 sleep_fp
4678
4679
sleep_fp() work as the built in C<< sleep() >> but also accepts fractional seconds:
4680
4681
sleep_fp(0.020); # sleeps for 20 milliseconds
4682
4683
Sub sleep_fp do a C, thus it might take some
4684
extra time the first call. To avoid that, add C<< use Time::HiRes >>
4685
to your code. Sleep_fp should not be trusted for accuracy to more than
4686
a tenth of a second. Virtual machines tend to be less accurate (sleep
4687
longer) than physical ones. This was tested on VMware and RHEL
4688
(Linux). See also L.
4689
4690
=head2 sleeps
4691
4692
=head2 sleepms
4693
4694
=head2 sleepus
4695
4696
=head2 sleepns
4697
4698
sleep_fp(0.020); #sleeps for 20 milliseconds
4699
sleeps(0.020); #sleeps for 20 milliseconds, sleeps() is a synonym to sleep_fp()
4700
sleepms(20); #sleeps for 20 milliseconds
4701
sleepus(20000); #sleeps for 20000 microseconds = 20 milliseconds
4702
sleepns(20000000); #sleeps for 20 million nanoseconds = 20 milliseconds
4703
4704
=cut
4705
4706
5
50
5
1
19
sub sleep_fp { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
5
33
5
170544
4707
1
50
1
1
7
sub sleeps { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
1
13
1
10106
4708
1
50
1
1
8
sub sleepms { eval{require Time::HiRes} or (sleep(shift()/1e3),return);Time::HiRes::sleep(shift()/1e3) }
1
17
1
10117
4709
1
50
1
1
10
sub sleepus { eval{require Time::HiRes} or (sleep(shift()/1e6),return);Time::HiRes::sleep(shift()/1e6) }
1
22
1
10111
4710
1
50
1
1
12
sub sleepns { eval{require Time::HiRes} or (sleep(shift()/1e9),return);Time::HiRes::sleep(shift()/1e9) }
1
17
1
10181
4711
4712
=head2 eta
4713
4714
Estimated time of arrival (ETA).
4715
4716
for(@files){
4717
...do work on file...
4718
my $eta = eta( ++$i, 0+@files ); # file now, number of files
4719
print "" . localtime($eta);
4720
}
4721
4722
..DOC MISSING..
4723
4724
=head2 etahhmm
4725
4726
...NOT YET
4727
4728
=cut
4729
4730
#http://en.wikipedia.org/wiki/Kalman_filter god idé?
4731
our %Eta;
4732
our $Eta_forgetfulness=2;
4733
sub eta {
4734
2012
100
2012
1
9355
my($id,$pos,$end,$time_fp)=( @_==2 ? (join(";",caller()),@_) : @_ );
4735
2012
66
2639
$time_fp||=time_fp();
4736
2012
100
3015
my $a=$Eta{$id}||=[];
4737
2012
2782
push @$a, [$pos,$time_fp];
4738
2012
100
3759
@$a=@$a[map$_*2,0..@$a/2] if @$a>40; #hm 40
4739
2012
100
100
6182
splice(@$a,-2,1) if @$a>1 and $$a[-2][0]==$$a[-1][0]; #same pos as last
4740
2012
100
2426
return undef if @$a<2;
4741
2001
1288
my @eta;
4742
2001
2125
for(2..@$a){
4743
58602
95585
push @eta, $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-$_][1])/($$a[-1][0]-$$a[-$_][0]);
4744
}
4745
2001
1785
my($sum,$sumw,$w)=(0,0,1);
4746
2001
1748
for(@eta){
4747
58602
36319
$sum+=$w*$_;
4748
58602
34498
$sumw+=$w;
4749
58602
40931
$w/=$Eta_forgetfulness;
4750
}
4751
2001
1570
my $avg=$sum/$sumw;
4752
2001
4295
return $avg;
4753
# return avg(@eta);
4754
#return $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-2][1])/($$a[-1][0]-$$a[-2][0]);
4755
0
0
1;
4756
}
4757
4758
=head2 sleep_until
4759
4760
sleep_until(0.5) sleeps until half a second has passed since the last
4761
call to sleep_until. This example starts the next job excactly ten
4762
seconds after the last job started even if the last job lasted for a
4763
while (but not more than ten seconds):
4764
4765
for(@jobs){
4766
sleep_until(10);
4767
print localtime()."\n";
4768
...heavy job....
4769
}
4770
4771
Might print:
4772
4773
Thu Jan 12 16:00:00 2012
4774
Thu Jan 12 16:00:10 2012
4775
Thu Jan 12 16:00:20 2012
4776
4777
...and so on even if the C<< ...heavy job... >>-part takes more than a
4778
second to complete. Whereas if sleep(10) was used, each job would
4779
spend more than ten seconds in average since the work time would be
4780
added to sleep(10).
4781
4782
Note: sleep_until() will remember the time of ANY last call of this sub,
4783
not just the one on the same line in the source code (this might change
4784
in the future). The first call to sleep_until() will be the same as
4785
sleep_fp() or Perl's own sleep() if the argument is an integer.
4786
4787
=cut
4788
4789
our $Time_last_sleep_until;
4790
sub sleep_until {
4791
0
0
0
1
0
my $s=@_==1?shift():0;
4792
0
0
my $time=time_fp();
4793
0
0
my $sleep=$s-($time-nvl($Time_last_sleep_until,0));
4794
0
0
$Time_last_sleep_until=time;
4795
0
0
0
sleep_fp($sleep) if $sleep>0;
4796
}
4797
4798
my %thr;
4799
sub throttle {
4800
0
0
0
0
my($times,$mintime,$what)=@_;
4801
0
0
0
$what||=join(":",@{[caller(1)]}[3,2]);
0
0
4802
0
0
0
$thr{$what}||=[];
4803
0
0
my $thr=$thr{$what};
4804
0
0
push @$thr,time_fp();
4805
0
0
0
return if @$thr<$times;
4806
0
0
my $since=$$thr[-1]-shift(@$thr);
4807
0
0
0
my $sleep=$since<$mintime?$mintime-$since:0;
4808
0
0
sleep_fp($sleep);
4809
0
0
return $sleep;
4810
}
4811
4812
=head2 leapyear
4813
4814
B A year. A four digit number.
4815
4816
B True (1) or false (0) of whether the year is a leap year or
4817
not. (Uses current calendar even for periods before leapyears was used).
4818
4819
print join(", ",grep leapyear($_), 1900..2014)."\n";
4820
4821
1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 1948, 1952, 1956,
4822
1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012
4823
4824
Note: 1900 is not a leap year, but 2000 is. Years divided by 100 is a leap year only
4825
if it can be divided by 400.
4826
4827
=cut
4828
4829
0
0
0
1
0
sub leapyear{$_[0]%400?$_[0]%100?$_[0]%4?0:1:0:1} #bool
0
0
4830
4831
#http://rosettacode.org/wiki/Levenshtein_distance#Perl
4832
our %ldist_cache;
4833
sub ldist {
4834
0
0
0
0
my($s,$t,$l) = @_;
4835
0
0
0
return length($t) if !$s;
4836
0
0
0
return length($s) if !$t;
4837
0
0
0
0
%ldist_cache=() if !$l and 1000<0+%ldist_cache;
4838
$ldist_cache{$s,$t} ||=
4839
0
0
0
do {
4840
0
0
my($s1,$t1) = ( substr($s,1), substr($t,1) );
4841
0
0
0
substr($s,0,1) eq substr($t,0,1)
4842
? ldist($s1,$t1)
4843
: 1 + min( ldist($s1,$t1,1+$l), ldist($s,$t1,1+$l), ldist($s1,$t,1+$l) );
4844
};
4845
}
4846
4847
=head1 OTHER
4848
4849
=head2 nvl
4850
4851
The I function (or I function)
4852
4853
C takes two or more arguments. (Oracles nvl-function take just two)
4854
4855
Returns the value of the first input argument with length() > 0.
4856
4857
Return I if there is no such input argument.
4858
4859
In perl 5.10 and perl 6 this will most often be easier with the C< //
4860
> operator, although C and C<< // >> treats empty strings C<"">
4861
differently. Sub nvl here considers empty strings and undef the same.
4862
4863
=cut
4864
4865
sub nvl {
4866
38
100
66
38
1
191
return $_[0] if defined $_[0] and length($_[0]) or @_==1;
100
4867
22
100
53
return $_[1] if @_==2;
4868
9
100
27
return nvl(@_[1..$#_]) if @_>2;
4869
1
3
return undef;
4870
}
4871
4872
=head2 decode_num
4873
4874
See L.
4875
4876
=head2 decode
4877
4878
C and C works just as Oracles C.
4879
4880
C and C accordingly uses perl operators C and C<==> for comparison.
4881
4882
Examples:
4883
4884
my $a=123;
4885
print decode($a, 123,3, 214,4, $a); # prints 3
4886
print decode($a, 123=>3, 214=>4, $a); # prints 3, same thing since => is synonymous to comma in Perl
4887
4888
The first argument is tested against the second, fourth, sixth and so on,
4889
and then the third, fifth, seventh and so on is
4890
returned if decode() finds an equal string or number.
4891
4892
In the above example: 123 maps to 3, 124 maps to 4 and the last argument $a is returned elsewise.
4893
4894
More examples:
4895
4896
my $a=123;
4897
print decode($a, 123=>3, 214=>7, $a); # also 3, note that => is synonym for , (comma) in perl
4898
print decode($a, 122=>3, 214=>7, $a); # prints 123
4899
print decode($a, 123.0 =>3, 214=>7); # prints 3
4900
print decode($a, '123.0'=>3, 214=>7); # prints nothing (undef), no last argument default value here
4901
print decode_num($a, 121=>3, 221=>7, '123.0','b'); # prints b
4902
4903
Sort of:
4904
4905
decode($string, %conversion, $default);
4906
4907
The last argument is returned as a default if none of the keys in the keys/value-pairs matched.
4908
4909
A more perl-ish and often faster way of doing the same:
4910
4911
{123=>3, 214=>7}->{$a} || $a # (beware of 0)
4912
4913
=cut
4914
4915
sub decode {
4916
4
50
4
1
10
croak "Must have a mimimum of two arguments" if @_<2;
4917
4
4
my $uttrykk=shift;
4918
4
50
100
6
if(defined$uttrykk){ shift eq $uttrykk and return shift or shift for 1..@_/2 }
4
33
30
4919
0
0
0
else { !defined shift and return shift or shift for 1..@_/2 }
0
4920
2
8
return shift;
4921
}
4922
4923
sub decode_num {
4924
1
50
1
1
8
croak "Must have a mimimum of two arguments" if @_<2;
4925
1
2
my $uttrykk=shift;
4926
1
50
100
3
if(defined$uttrykk){ shift == $uttrykk and return shift or shift for 1..@_/2 }
1
33
12
4927
0
0
0
else { !defined shift and return shift or shift for 1..@_/2 }
0
4928
0
0
return shift;
4929
}
4930
4931
=head2 qrlist
4932
4933
Input: An array of values to be used to test againts for existence.
4934
4935
Output: A reference to a regular expression. That is a C
4936
4937
The regex sets $1 if it match.
4938
4939
Example:
4940
4941
my @list=qw/ABc XY DEF DEFG XYZ/;
4942
my $filter=qrlist("ABC","DEF","XY."); # makes a regex of it qr/^(\QABC\E|\QDEF\E|\QXY.\E)$/
4943
my @filtered= grep { $_ =~ $filter } @list; # returns DEF and XYZ, but not XYZ because the . char is taken literally
4944
4945
Note: Filtering with hash lookups are WAY faster.
4946
4947
Source:
4948
4949
sub qrlist (@) { my $str=join"|",map quotemeta, @_; qr/^($str)$/ }
4950
4951
=cut
4952
4953
sub qrlist (@) {
4954
0
0
1
0
my $str=join"|",map quotemeta,@_;
4955
0
0
return qr/^($str)$/;
4956
}
4957
4958
=head2 ansicolor
4959
4960
Perhaps easier to use than L ?
4961
4962
B One argument. A string where the char C<¤> have special
4963
meaning and is replaced by color codings depending on the letter
4964
following the C<¤>.
4965
4966
B The same string, but with C<¤letter> replaced by ANSI color
4967
codes respected by many types terminal windows. (xterm, telnet, ssh,
4968
telnet, rlog, vt100, cygwin, rxvt and such...).
4969
4970
B
4971
4972
¤r red
4973
¤g green
4974
¤b blue
4975
¤y yellow
4976
¤m magenta
4977
¤B bold
4978
¤u underline
4979
¤c clear
4980
¤¤ reset, quits and returns to default text color.
4981
4982
B
4983
4984
print ansicolor("This is maybe ¤ggreen¤¤?");
4985
4986
Prints I where the word I is shown in green.
4987
4988
If L is not installed or not found, returns the input
4989
string with every C<¤> including the following code letters
4990
removed. (That is: ansicolor is safe to use even if Term::ANSIColor is
4991
not installed, you just don't get the colors).
4992
4993
See also L.
4994
4995
=cut
4996
4997
sub ansicolor {
4998
0
0
1
0
my $txt=shift;
4999
0
0
0
eval{require Term::ANSIColor} or return replace($txt,qr/¤./);
0
0
5000
0
0
my %h=qw/r red g green b blue y yellow m magenta B bold u underline c clear ¤ reset/;
5001
0
0
my $re=join"|",keys%h;
5002
0
0
$txt=~s/¤($re)/Term::ANSIColor::color($h{$1})/ge;
0
0
5003
0
0
return $txt;
5004
}
5005
5006
=head2 ccn_ok
5007
5008
Checks if a Credit Card number (CCN) has correct control digits according to the LUHN-algorithm from 1960.
5009
This method of control digits is used by MasterCard, Visa, American Express,
5010
Discover, Diners Club / Carte Blanche, JCB and others.
5011
5012
B
5013
5014
A credit card number. Can contain non-digits, but they are removed internally before checking.
5015
5016
B
5017
5018
Something true or false.
5019
5020
Or more accurately:
5021
5022
Returns C (false) if the input argument is missing digits.
5023
5024
Returns 0 (zero, which is false) is the digits is not correct according to the LUHN algorithm.
5025
5026
Returns 1 or the name of a credit card company (true either way) if the last digit is an ok control digit for this ccn.
5027
5028
The name of the credit card company is returned like this (without the C<'> character)
5029
5030
Returns (wo '') Starts on Number of digits
5031
------------------------------ ------------------------ ----------------
5032
'MasterCard' 51-55 16
5033
'Visa' 4 13 eller 16
5034
'American Express' 34 eller 37 15
5035
'Discover' 6011 16
5036
'Diners Club / Carte Blanche' 300-305, 36 eller 38 14
5037
'JCB' 3 16
5038
'JCB' 2131 eller 1800 15
5039
5040
And should perhaps have had:
5041
5042
'enRoute' 2014 eller 2149 15
5043
5044
...but that card uses either another control algorithm or no control
5045
digits at all. So C is never returned here.
5046
5047
If the control digits is valid, but the input does not match anything in the column C, 1 is returned.
5048
5049
(This is also the same control digit mechanism used in Norwegian KID numbers on payment bills)
5050
5051
The first digit in a credit card number is supposed to tell what "industry" the card is meant for:
5052
5053
MII Digit Value Issuer Category
5054
--------------------------- ----------------------------------------------------
5055
0 ISO/TC 68 and other industry assignments
5056
1 Airlines
5057
2 Airlines and other industry assignments
5058
3 Travel and entertainment
5059
4 Banking and financial
5060
5 Banking and financial
5061
6 Merchandizing and banking
5062
7 Petroleum
5063
8 Telecommunications and other industry assignments
5064
9 National assignment
5065
5066
...although this has no meaning to C.
5067
5068
The first six digits is I, that is the bank
5069
(probably). The rest in the "account number", except the last digits,
5070
which is the control digit. Max length on credit card numbers are 19
5071
digits.
5072
5073
=cut
5074
5075
sub ccn_ok {
5076
0
0
1
0
my $ccn=shift(); #credit card number
5077
0
0
$ccn=~s/\D+//g;
5078
0
0
0
if(KID_ok($ccn)){
5079
0
0
0
return "MasterCard" if $ccn=~/^5[1-5]\d{14}$/;
5080
0
0
0
return "Visa" if $ccn=~/^4\d{12}(?:\d{3})?$/;
5081
0
0
0
return "American Express" if $ccn=~/^3[47]\d{13}$/;
5082
0
0
0
return "Discover" if $ccn=~/^6011\d{12}$/;
5083
0
0
0
return "Diners Club / Carte Blanche" if $ccn=~/^3(?:0[0-5]\d{11}|[68]\d{12})$/;
5084
0
0
0
return "JCB" if $ccn=~/^(?:3\d{15}|(?:2131|1800)\d{11})$/;
5085
0
0
return 1;
5086
}
5087
#return "enRoute" if $ccn=~/^(?:2014|2149)\d{11}$/; #ikke LUHN-krav?
5088
0
0
return 0;
5089
}
5090
5091
=head2 KID_ok
5092
5093
Checks if a norwegian KID number has an ok control digit.
5094
5095
To check if a customer has typed the number correctly.
5096
5097
This uses the LUHN algorithm (also known as mod-10) from 1960 which is also used
5098
internationally in control digits for credit card numbers, and Canadian social security ID numbers as well.
5099
5100
The algorithm, as described in Phrack (47-8) (a long time hacker online publication):
5101
5102
"For a card with an even number of digits, double every odd numbered
5103
digit and subtract 9 if the product is greater than 9. Add up all the
5104
even digits as well as the doubled-odd digits, and the result must be
5105
a multiple of 10 or it's not a valid card. If the card has an odd
5106
number of digits, perform the same addition doubling the even numbered
5107
digits instead."
5108
5109
B A KID-nummer. Must consist of digits 0-9 only, otherwise a die (croak) happens.
5110
5111
B
5112
5113
- Returns undef if the input argument is missing.
5114
5115
- Returns 0 if the control digit (the last digit) does not satify the LUHN/mod-10 algorithm.
5116
5117
- Returns 1 if ok
5118
5119
B L
5120
5121
=cut
5122
5123
sub KID_ok {
5124
0
0
0
1
0
croak "Non-numeric argument" if $_[0]=~/\D/;
5125
0
0
0
my @k=split//,shift or return undef;
5126
0
0
my $s;$s+=pop(@k)+[qw/0 2 4 6 8 1 3 5 7 9/]->[pop@k] while @k;
0
0
5127
0
0
0
$s%10==0?1:0;
5128
}
5129
5130
5131
5132
=head2 range
5133
5134
B
5135
5136
One or more numeric arguments:
5137
5138
First: x (first returned element)
5139
5140
Second: y (last but not including)
5141
5142
Third: step, default 1. The step between each returned element
5143
5144
If a fourth, fifth and so on arguments are given, they change the step for each returned element. As first derivative, second derivative.
5145
5146
B
5147
5148
If one argument: returns the array C<(0 .. x-1)>
5149
5150
If two arguments: returns the array C<(x .. y-1)>
5151
5152
If three arguments: The default step is 1. Use a third argument to use a different step.
5153
5154
B
5155
5156
print join ",", range(11); # prints 0,1,2,3,4,5,6,7,8,9,10 (but not 11)
5157
print join ",", range(2,11); # 2,3,4,5,6,7,8,9,10 (but not 11)
5158
print join ",", range(11,2,-1); # 11,10,9,8,7,6,5,4,3
5159
print join ",", range(2,11,3); # 2,5,8
5160
print join ",", range(11,2,-3); # 11,8,5
5161
print join ",", range(11,2,+3); # prints nothing
5162
5163
print join ", ",range(2,11,1,0.1); # 2, 3, 4.1, 5.3, 6.6, 8, 9.5 adds 0.1 to step each time
5164
print join ", ",range(2,11,1,0.1,-0.01); # 2, 3, 4.1, 5.29, 6.56, 7.9, 9.3, 10.75
5165
5166
Note: In the Python language and others, C is a build in iterator (a
5167
generator), not an array. This saves memory for large sets and sometimes time.
5168
Use C in L to get a similar lazy generator in Perl.
5169
5170
=cut
5171
5172
sub range {
5173
8
100
8
1
28
return _range_accellerated(@_) if @_>3; #se under
5174
6
7
my($x,$y,$jump)=@_;
5175
6
100
51
return ( 0 .. $x-1 ) if @_==1;
5176
5
100
18
return ( $x .. $y-1 ) if @_==2;
5177
4
50
33
20
croak "Wrong number of arguments or jump==0" if @_!=3 or $jump==0;
5178
4
3
my @r;
5179
4
100
9
if($jump>0){ while($x<$y){ push @r, $x; $x+=$jump } }
2
6
772
435
772
795
5180
2
4
else { while($x>$y){ push @r, $x; $x+=$jump } }
12
10
12
13
5181
4
38
return @r;
5182
}
5183
5184
#jumps derivative, double der., trippled der usw
5185
sub _range_accellerated {
5186
2
2
3
my($x,$y,@jump)=@_;
5187
2
2
my @r;
5188
2
50
17
9
my $test = $jump[0]>=0 ? sub{$x<$y} : sub{$x>$y};
17
25
0
0
5189
2
3
while(&$test()){
5190
15
14
push @r, $x;
5191
15
10
$x+=$jump[0];
5192
15
28
$jump[$_-1]+=$jump[$_] for 1..$#jump;
5193
}
5194
2
13
return @r;
5195
}
5196
5197
=head2 globr
5198
5199
Works like and uses Perls builtin C<< glob() >> function but adds support for ranges
5200
with C<< {from..to} >> and C<< {from..to..step} >>. Like brace expansion in bash.
5201
5202
Examples:
5203
5204
my @arr = glob "X{a,b,c,d}Z"; # return four element array: XaZ XbZ XcZ XdZ
5205
my @arr = globr "X{a,b,c,d}Z"; # same as above
5206
my @arr = globr "X{a..d}Z"; # same as above
5207
my @arr = globr "X{a..d..2}Z"; # step 2, returns array: XaZ XcZ
5208
my @arr = globr "X{aa..bz..13}Z"; # XaaZ XanZ XbaZ XbnZ
5209
my @arr = globr "{1..12}b"; # 1b 2b 3b 4b 5b 6b 7b 8b 9b 10b 11b 12b
5210
my @arr = globr "{01..12}b"; # 01b 02b 03b 04b 05b 06b 07b 08b 09b 10b 11b 12b
5211
my @arr = globr "{01..12..3}b"; # 01b 04b 07b 10b
5212
5213
=cut
5214
5215
sub globr($) {
5216
8
8
1
9
my $p=shift;
5217
8
100
64
$p=~s/\{(\w+)\.\.(\w+)(\.\.(\d+))?\}/my$i=0;"{".join(",",grep{$4?!($i++%$4):1}$1..$2)."}";/eg;
7
5
7
44
133
196
5218
8
709
glob $p;
5219
}
5220
5221
=head2 permutations
5222
5223
How many ways (permutations) can six people be placed around a table:
5224
5225
If one person: one
5226
If two persons: two (they can swap places)
5227
If three persons: six
5228
If four persons: 24
5229
If five persons: 120
5230
If six persons: 720
5231
5232
The formula is C where the postfix unary operator C, also known as I is defined like:
5233
C. Example: C<5! = 5 * 4 * 3 * 2 * 1 = 120>.Run this to see the 100 first C<< n! >>
5234
5235
perl -MAcme::Tools -le'$i=big(1);print "$_!=",$i*=$_ for 1..100'
5236
5237
1! = 1
5238
2! = 2
5239
3! = 6
5240
4! = 24
5241
5! = 120
5242
6! = 720
5243
7! = 5040
5244
8! = 40320
5245
9! = 362880
5246
10! = 3628800
5247
.
5248
.
5249
.
5250
100! = 93326215443944152681699238856266700490715968264381621468592963895217599993229915608941463976156518286253697920827223758251185210916864000000000000000000000000
5251
5252
C takes a list and return a list of arrayrefs for each
5253
of the permutations of the input list:
5254
5255
permutations('a','b'); #returns (['a','b'],['b','a'])
5256
5257
permutations('a','b','c'); #returns (['a','b','c'],['a','c','b'],
5258
# ['b','a','c'],['b','c','a'],
5259
# ['c','a','b'],['c','b','a'])
5260
5261
Up to five input arguments C is probably as fast as it
5262
can be in this pure perl implementation (see source). For more than
5263
five, it could be faster. How fast is it now: Running with different
5264
n, this many time took that many seconds:
5265
5266
n times seconds
5267
-- ------- ---------
5268
2 100000 0.32
5269
3 10000 0.09
5270
4 10000 0.33
5271
5 1000 0.18
5272
6 100 0.27
5273
7 10 0.21
5274
8 1 0.17
5275
9 1 1.63
5276
10 1 17.00
5277
5278
If the first argument is a coderef, that sub will be called for each permutation and the return from those calls with be the real return from C. For example this:
5279
5280
print for permutations(sub{join"",@_},1..3);
5281
5282
...will print the same as:
5283
5284
print for map join("",@$_), permutations(1..3);
5285
5286
...but the first of those two uses less RAM if 3 has been say 9.
5287
Changing 3 with 10, and many computers hasn't enough memory
5288
for the latter.
5289
5290
The examples prints:
5291
5292
123
5293
132
5294
213
5295
231
5296
312
5297
321
5298
5299
If you just want to say calculate something on each permutation,
5300
but is not interested in the list of them, you just don't
5301
take the return. That is:
5302
5303
my $ant;
5304
permutations(sub{$ant++ if $_[-1]>=$_[0]*2},1..9);
5305
5306
...is the same as:
5307
5308
$$_[-1]>=$$_[0]*2 and $ant++ for permutations(1..9);
5309
5310
...but the first uses next to nothing of memory compared to the latter. They have about the same speed.
5311
(The examples just counts the permutations where the last number is at least twice as large as the first)
5312
5313
C was created to find all combinations of a persons
5314
name. This is useful in "fuzzy" name searches with
5315
L if you can not be certain what is first, middle
5316
and last names. In foreign or unfamiliar names it can be difficult to
5317
know that.
5318
5319
=cut
5320
5321
#TODO: se test_perl.pl
5322
5323
sub permutations {
5324
2
50
2
1
26
my $code=ref($_[0]) eq 'CODE' ? shift() : undef;
5325
2
50
33
6
$code and @_<6 and return map &$code(@$_),permutations(@_);
5326
5327
2
50
6
return [@_] if @_<2;
5328
5329
2
100
17
return ([@_[0,1]],[@_[1,0]]) if @_==2;
5330
5331
1
50
19
return ([@_[0,1,2]],[@_[0,2,1]],[@_[1,0,2]],
5332
[@_[1,2,0]],[@_[2,0,1]],[@_[2,1,0]]) if @_==3;
5333
5334
0
0
0
return ([@_[0,1,2,3]],[@_[0,1,3,2]],[@_[0,2,1,3]],[@_[0,2,3,1]],
5335
[@_[0,3,1,2]],[@_[0,3,2,1]],[@_[1,0,2,3]],[@_[1,0,3,2]],
5336
[@_[1,2,0,3]],[@_[1,2,3,0]],[@_[1,3,0,2]],[@_[1,3,2,0]],
5337
[@_[2,0,1,3]],[@_[2,0,3,1]],[@_[2,1,0,3]],[@_[2,1,3,0]],
5338
[@_[2,3,0,1]],[@_[2,3,1,0]],[@_[3,0,1,2]],[@_[3,0,2,1]],
5339
[@_[3,1,0,2]],[@_[3,1,2,0]],[@_[3,2,0,1]],[@_[3,2,1,0]]) if @_==4;
5340
5341
0
0
0
return ([@_[0,1,2,3,4]],[@_[0,1,2,4,3]],[@_[0,1,3,2,4]],[@_[0,1,3,4,2]],[@_[0,1,4,2,3]],
5342
[@_[0,1,4,3,2]],[@_[0,2,1,3,4]],[@_[0,2,1,4,3]],[@_[0,2,3,1,4]],[@_[0,2,3,4,1]],
5343
[@_[0,2,4,1,3]],[@_[0,2,4,3,1]],[@_[0,3,1,2,4]],[@_[0,3,1,4,2]],[@_[0,3,2,1,4]],
5344
[@_[0,3,2,4,1]],[@_[0,3,4,1,2]],[@_[0,3,4,2,1]],[@_[0,4,1,2,3]],[@_[0,4,1,3,2]],
5345
[@_[0,4,2,1,3]],[@_[0,4,2,3,1]],[@_[0,4,3,1,2]],[@_[0,4,3,2,1]],[@_[1,0,2,3,4]],
5346
[@_[1,0,2,4,3]],[@_[1,0,3,2,4]],[@_[1,0,3,4,2]],[@_[1,0,4,2,3]],[@_[1,0,4,3,2]],
5347
[@_[1,2,0,3,4]],[@_[1,2,0,4,3]],[@_[1,2,3,0,4]],[@_[1,2,3,4,0]],[@_[1,2,4,0,3]],
5348
[@_[1,2,4,3,0]],[@_[1,3,0,2,4]],[@_[1,3,0,4,2]],[@_[1,3,2,0,4]],[@_[1,3,2,4,0]],
5349
[@_[1,3,4,0,2]],[@_[1,3,4,2,0]],[@_[1,4,0,2,3]],[@_[1,4,0,3,2]],[@_[1,4,2,0,3]],
5350
[@_[1,4,2,3,0]],[@_[1,4,3,0,2]],[@_[1,4,3,2,0]],[@_[2,0,1,3,4]],[@_[2,0,1,4,3]],
5351
[@_[2,0,3,1,4]],[@_[2,0,3,4,1]],[@_[2,0,4,1,3]],[@_[2,0,4,3,1]],[@_[2,1,0,3,4]],
5352
[@_[2,1,0,4,3]],[@_[2,1,3,0,4]],[@_[2,1,3,4,0]],[@_[2,1,4,0,3]],[@_[2,1,4,3,0]],
5353
[@_[2,3,0,1,4]],[@_[2,3,0,4,1]],[@_[2,3,1,0,4]],[@_[2,3,1,4,0]],[@_[2,3,4,0,1]],
5354
[@_[2,3,4,1,0]],[@_[2,4,0,1,3]],[@_[2,4,0,3,1]],[@_[2,4,1,0,3]],[@_[2,4,1,3,0]],
5355
[@_[2,4,3,0,1]],[@_[2,4,3,1,0]],[@_[3,0,1,2,4]],[@_[3,0,1,4,2]],[@_[3,0,2,1,4]],
5356
[@_[3,0,2,4,1]],[@_[3,0,4,1,2]],[@_[3,0,4,2,1]],[@_[3,1,0,2,4]],[@_[3,1,0,4,2]],
5357
[@_[3,1,2,0,4]],[@_[3,1,2,4,0]],[@_[3,1,4,0,2]],[@_[3,1,4,2,0]],[@_[3,2,0,1,4]],
5358
[@_[3,2,0,4,1]],[@_[3,2,1,0,4]],[@_[3,2,1,4,0]],[@_[3,2,4,0,1]],[@_[3,2,4,1,0]],
5359
[@_[3,4,0,1,2]],[@_[3,4,0,2,1]],[@_[3,4,1,0,2]],[@_[3,4,1,2,0]],[@_[3,4,2,0,1]],
5360
[@_[3,4,2,1,0]],[@_[4,0,1,2,3]],[@_[4,0,1,3,2]],[@_[4,0,2,1,3]],[@_[4,0,2,3,1]],
5361
[@_[4,0,3,1,2]],[@_[4,0,3,2,1]],[@_[4,1,0,2,3]],[@_[4,1,0,3,2]],[@_[4,1,2,0,3]],
5362
[@_[4,1,2,3,0]],[@_[4,1,3,0,2]],[@_[4,1,3,2,0]],[@_[4,2,0,1,3]],[@_[4,2,0,3,1]],
5363
[@_[4,2,1,0,3]],[@_[4,2,1,3,0]],[@_[4,2,3,0,1]],[@_[4,2,3,1,0]],[@_[4,3,0,1,2]],
5364
[@_[4,3,0,2,1]],[@_[4,3,1,0,2]],[@_[4,3,1,2,0]],[@_[4,3,2,0,1]],[@_[4,3,2,1,0]]) if @_==5;
5365
5366
0
0
my(@r,@p,@c,@i,@n); @i=(0,@_); @p=@c=1..@_; @n=1..@_-1;
0
0
0
0
0
0
5367
PERM:
5368
0
0
while(1){
5369
0
0
0
if($code){if(defined wantarray){push(@r,&$code(@i[@p]))}else{&$code(@i[@p])}}else{push@r,[@i[@p]]}
0
0
0
0
0
0
0
0
0
5370
0
0
0
for my$i(@n){splice@p,$i,0,shift@p;next PERM if --$c[$i];$c[$i]=$i+1}
0
0
0
0
0
0
5371
return@r
5372
0
0
}
5373
}
5374
5375
=head2 cart
5376
5377
Cartesian product
5378
5379
B
5380
5381
Input: two or more arrayrefs with accordingly x, y, z and so on number of elements.
5382
5383
Output: An array of x * y * z number of arrayrefs. The arrays being the cartesian product of the input arrays.
5384
5385
It can be useful to think of this as joins in SQL. In C statements with
5386
more tables behind C, but without any C condition to join the tables.
5387
5388
B
5389
5390
B
5391
5392
- Either two or more arrayrefs with x, y, z and so on number of elements.
5393
5394
- Or coderefs to subs containing condition checks. Somewhat like C conditions in SQL.
5395
5396
B An array of x * y * z number of arrayrefs (the cartesian product)
5397
minus the ones that did not fulfill the condition(s).
5398
5399
This of is as joins with one or more where conditions as coderefs.
5400
5401
The coderef input arguments can be placed last or among the array refs
5402
to save both runtime and memory if the conditions depend on
5403
arrays further back.
5404
5405
B
5406
5407
for(cart(\@a1,\@a2,\@a3)){
5408
my($a1,$a2,$a3) = @$_;
5409
print "$a1,$a2,$a3\n";
5410
}
5411
5412
Prints the same as this:
5413
5414
for my $a1 (@a1){
5415
for my $a2 (@a2){
5416
for my $a3 (@a3){
5417
print "$a1,$a2,$a3\n";
5418
}
5419
}
5420
}
5421
5422
B (with a condition: the sum of the first two should be dividable with 3)
5423
5424
for( cart( \@a1, \@a2, sub{sum(@$_)%3==0}, \@a3 ) ) {
5425
my($a1,$a2,$a3)=@$_;
5426
print "$a1,$a2,$a3\n";
5427
}
5428
5429
Prints the same as this:
5430
5431
for my $a1 (@a1){
5432
for my $a2 (@a2){
5433
next if 0==($a1+$a2)%3;
5434
for my $a3 (@a3){
5435
print "$a1,$a2,$a3\n";
5436
}
5437
}
5438
}
5439
5440
B
5441
5442
my @a1 = (1,2);
5443
my @a2 = (10,20,30);
5444
my @a3 = (100,200,300,400);
5445
5446
my $s = join"", map "*".join(",",@$_), cart(\@a1,\@a2,\@a3);
5447
ok( $s eq "*1,10,100*1,10,200*1,10,300*1,10,400*1,20,100*1,20,200"
5448
."*1,20,300*1,20,400*1,30,100*1,30,200*1,30,300*1,30,400"
5449
."*2,10,100*2,10,200*2,10,300*2,10,400*2,20,100*2,20,200"
5450
."*2,20,300*2,20,400*2,30,100*2,30,200*2,30,300*2,30,400");
5451
5452
$s=join"",map "*".join(",",@$_), cart(\@a1,\@a2,\@a3,sub{sum(@$_)%3==0});
5453
ok( $s eq "*1,10,100*1,10,400*1,20,300*1,30,200*2,10,300*2,20,200*2,30,100*2,30,400");
5454
5455
B
5456
5457
Returns hashrefs instead of arrayrefs:
5458
5459
my @cards=cart( #5200 cards: 100 decks of 52 cards
5460
deck => [1..100],
5461
value => [qw/2 3 4 5 6 7 8 9 10 J Q K A/],
5462
col => [qw/heart diamond club star/],
5463
);
5464
for my $card ( mix(@cards) ) {
5465
print "From deck number $$card{deck} we got $$card{value} $$card{col}\n";
5466
}
5467
5468
Note: using sub-ref filters do not work (yet) in hash-mode. Use grep on result instead.
5469
5470
=cut
5471
5472
sub cart {
5473
5
5
1
518
my @ars=@_;
5474
5
100
16
if(!ref($_[0])){ #if hash-mode detected
5475
1
33
3
my(@k,@v); push@k,shift@ars and push@v,shift@ars while @ars;
1
14
5476
1
8
return map{my%h;@h{@k}=@$_;\%h}cart(@v);
24
25
24
37
24
37
5477
}
5478
4
3
my @res=map[$_],@{shift@ars};
4
16
5479
4
8
for my $ar (@ars){
5480
9
100
50
20
@res=grep{&$ar(@$_)}@res and next if ref($ar) eq 'CODE';
24
40
5481
8
10
@res=map{my$r=$_;map{[@$r,$_]}@$ar}@res;
34
21
34
27
120
165
5482
}
5483
4
45
return @res;
5484
}
5485
5486
sub cart_easy { #not tested/exported http://stackoverflow.com/questions/2457096/in-perl-how-can-i-get-the-cartesian-product-of-multiple-sets
5487
0
0
0
0
my $last = pop @_;
5488
0
0
0
@_ ? (map {my$left=$_; map [@$left, $_], @$last } cart_easy(@_) )
0
0
0
0
5489
: (map [$_], @$last);
5490
}
5491
5492
=head2 reduce
5493
5494
From: Why Functional Programming Matters: L
5495
5496
L
5497
5498
DON'T TRY THIS AT HOME, C PROGRAMMERS.
5499
5500
sub reduce (&@) {
5501
my ($proc, $first, @rest) = @_;
5502
return $first if @rest == 0;
5503
local ($a, $b) = ($first, reduce($proc, @rest));
5504
return $proc->();
5505
}
5506
5507
Many functions can then be implemented with very little code. Such as:
5508
5509
sub mean { (reduce {$a + $b} @_) / @_ }
5510
5511
=cut
5512
5513
sub reduce (&@) {
5514
0
0
1
0
my ($proc, $first, @rest) = @_;
5515
0
0
0
return $first if @rest == 0;
5516
33
33
101418
no warnings;
33
52
33
18868
5517
0
0
local ($a, $b) = ($first, reduce($proc, @rest));
5518
0
0
return $proc->();
5519
}
5520
5521
5522
=head2 pivot
5523
5524
Resembles the pivot table function in Excel.
5525
5526
C is used to spread out a slim and long table to a visually improved layout.
5527
5528
For instance spreading out the results of C-selects from SQL:
5529
5530
pivot( arrayref, columnname1, columnname2, ...)
5531
5532
pivot( ref_to_array_of_arrayrefs, @list_of_names_to_down_fields )
5533
5534
The first argument is a ref to a two dimensional table.
5535
5536
The rest of the arguments is a list which also signals the number of
5537
columns from left in each row that is ending up to the left of the
5538
data table, the rest ends up at the top and the last element of
5539
each row ends up as data.
5540
5541
top1 top1 top1 top1
5542
left1 left2 left3 top2 top2 top2 top2
5543
----- ----- ----- ---- ---- ---- ----
5544
data data data data
5545
data data data data
5546
data data data data
5547
5548
Example:
5549
5550
my @table=(
5551
["1997","Gerd", "Weight", "Summer",66],
5552
["1997","Gerd", "Height", "Summer",170],
5553
["1997","Per", "Weight", "Summer",75],
5554
["1997","Per", "Height", "Summer",182],
5555
["1997","Hilde","Weight", "Summer",62],
5556
["1997","Hilde","Height", "Summer",168],
5557
["1997","Tone", "Weight", "Summer",70],
5558
5559
["1997","Gerd", "Weight", "Winter",64],
5560
["1997","Gerd", "Height", "Winter",158],
5561
["1997","Per", "Weight", "Winter",73],
5562
["1997","Per", "Height", "Winter",180],
5563
["1997","Hilde","Weight", "Winter",61],
5564
["1997","Hilde","Height", "Winter",164],
5565
["1997","Tone", "Weight", "Winter",69],
5566
5567
["1998","Gerd", "Weight", "Summer",64],
5568
["1998","Gerd", "Height", "Summer",171],
5569
["1998","Per", "Weight", "Summer",76],
5570
["1998","Per", "Height", "Summer",182],
5571
["1998","Hilde","Weight", "Summer",62],
5572
["1998","Hilde","Height", "Summer",168],
5573
["1998","Tone", "Weight", "Summer",70],
5574
5575
["1998","Gerd", "Weight", "Winter",64],
5576
["1998","Gerd", "Height", "Winter",171],
5577
["1998","Per", "Weight", "Winter",74],
5578
["1998","Per", "Height", "Winter",183],
5579
["1998","Hilde","Weight", "Winter",62],
5580
["1998","Hilde","Height", "Winter",168],
5581
["1998","Tone", "Weight", "Winter",71],
5582
);
5583
5584
.
5585
5586
my @reportA=pivot(\@table,"Year","Name");
5587
print "\n\nReport A\n\n".tablestring(\@reportA);
5588
5589
Will print:
5590
5591
Report A
5592
5593
Year Name Height Height Weight Weight
5594
Summer Winter Summer Winter
5595
---- ----- ------ ------ ------ ------
5596
1997 Gerd 170 158 66 64
5597
1997 Hilde 168 164 62 61
5598
1997 Per 182 180 75 73
5599
1997 Tone 70 69
5600
1998 Gerd 171 171 64 64
5601
1998 Hilde 168 168 62 62
5602
1998 Per 182 183 76 74
5603
1998 Tone 70 71
5604
5605
.
5606
5607
my @reportB=pivot([map{$_=[@$_[0,3,2,1,4]]}(@t=@table)],"Year","Season");
5608
print "\n\nReport B\n\n".tablestring(\@reportB);
5609
5610
Will print:
5611
5612
Report B
5613
5614
Year Season Height Height Height Weight Weight Weight Weight
5615
Gerd Hilde Per Gerd Hilde Per Tone
5616
---- ------ ------ ------ ----- ----- ------ ------ ------
5617
1997 Summer 170 168 182 66 62 75 70
5618
1997 Winter 158 164 180 64 61 73 69
5619
1998 Summer 171 168 182 64 62 76 70
5620
1998 Winter 171 168 183 64 62 74 71
5621
5622
.
5623
5624
my @reportC=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name","Attributt");
5625
print "\n\nReport C\n\n".tablestring(\@reportC);
5626
5627
Will print:
5628
5629
Report C
5630
5631
Name Attributt 1997 1997 1998 1998
5632
Summer Winter Summer Winter
5633
----- --------- ------ ------ ------ ------
5634
Gerd Height 170 158 171 171
5635
Gerd Weight 66 64 64 64
5636
Hilde Height 168 164 168 168
5637
Hilde Weight 62 61 62 62
5638
Per Height 182 180 182 183
5639
Per Weight 75 73 76 74
5640
Tone Weight 70 69 70 71
5641
5642
.
5643
5644
my @reportD=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name");
5645
print "\n\nReport D\n\n".tablestring(\@reportD);
5646
5647
Will print:
5648
5649
Report D
5650
5651
Name Height Height Height Height Weight Weight Weight Weight
5652
1997 1997 1998 1998 1997 1997 1998 1998
5653
Summer Winter Summer Winter Summer Winter Summer Winter
5654
----- ------ ------ ------ ------ ------ ------ ------ ------
5655
Gerd 170 158 171 171 66 64 64 64
5656
Hilde 168 164 168 168 62 61 62 62
5657
Per 182 180 182 183 75 73 76 74
5658
Tone 70 69 70 71
5659
5660
Options:
5661
5662
Options to sort differently and show sums and percents are available. (...MORE DOC ON THAT LATER...)
5663
5664
See also L
5665
5666
=cut
5667
5668
sub pivot {
5669
4
4
1
9
my($tabref,@vertikalefelt)=@_;
5670
4
50
11
my %opt=ref($vertikalefelt[-1]) eq 'HASH' ? %{pop(@vertikalefelt)} : ();
0
0
5671
4
50
9
my $opt_sum=1 if $opt{sum};
5672
4
50
0
9
my $opt_pro=exists $opt{prosent}?$opt{prosent}||0:undef;
5673
4
50
32
my $sortsub = $opt{'sortsub'} || \&_sortsub;
5674
4
33
14
my $sortsub_bortover = $opt{'sortsub_bortover'} || $sortsub;
5675
4
33
35
my $sortsub_nedover = $opt{'sortsub_nedover'} || $sortsub;
5676
#print serialize(\%opt,'opt');
5677
#print serialize(\$opt_pro,'opt_pro');
5678
4
6
my $antned=0+@vertikalefelt;
5679
4
3
my $bakerst=-1+@{$$tabref[0]};
4
6
5680
4
5
my(%h,%feltfinnes,%sum);
5681
#print "Bakerst<$bakerst>\n";
5682
4
6
for(@$tabref){
5683
112
166
my $rad=join($;,@$_[0..($antned-1)]);
5684
112
121
my $felt=join($;,@$_[$antned..($bakerst-1)]);
5685
112
89
my $verdi=$$_[$bakerst];
5686
112
50
131
length($rad) or $rad=' ';
5687
112
50
115
length($felt) or $felt=' ';
5688
112
134
$h{$rad}{$felt}=$verdi;
5689
112
138
$h{$rad}{"%$felt"}=$verdi;
5690
112
50
33
261
if($opt_sum or defined $opt_pro){
5691
0
0
$h{$rad}{Sum}+=$verdi;
5692
0
0
$sum{$felt}+=$verdi;
5693
0
0
$sum{Sum}+=$verdi;
5694
}
5695
112
88
$feltfinnes{$felt}++;
5696
112
50
159
$feltfinnes{"%$felt"}++ if $opt_pro;
5697
}
5698
4
28
my @feltfinnes = sort $sortsub_bortover keys%feltfinnes;
5699
4
50
9
push @feltfinnes, "Sum" if $opt_sum;
5700
4
7
my @t=([@vertikalefelt,map{replace($_,$;,"\n")}@feltfinnes]);
23
31
5701
#print serialize(\@feltfinnes,'feltfinnes');
5702
#print serialize(\%h,'h');
5703
#print "H = ".join(", ",sort _sortsub keys%h)."\n";
5704
4
16
for my $rad (sort $sortsub_nedover keys(%h)){
5705
my @rad=(split($;,$rad),
5706
map{
5707
23
50
33
111
if(/^\%/ and defined $opt_pro){
120
211
5708
0
0
my $sum=$h{$rad}{Sum};
5709
0
0
my $verdi=$h{$rad}{$_};
5710
0
0
0
if($sum!=0){
5711
0
0
0
defined $verdi
5712
?sprintf("%*.*f",3+1+$opt_pro,$opt_pro,100*$verdi/$sum)
5713
:$verdi;
5714
}
5715
else{
5716
0
0
0
$verdi!=0?"div0":$verdi;
5717
}
5718
}
5719
else{
5720
120
122
$h{$rad}{$_};
5721
}
5722
}
5723
@feltfinnes);
5724
23
47
push(@t,[@rad]);
5725
}
5726
4
50
10
push(@t,"-",["Sum",(map{""}(2..$antned)),map{print "<$_>\n";$sum{$_}}@feltfinnes]) if $opt_sum;
0
0
0
0
0
0
5727
4
76
return @t;
5728
}
5729
5730
# default sortsub for pivot()
5731
5732
sub _sortsub {
5733
33
33
163
no warnings;
33
39
33
10145
5734
#my $c=($a<=>$b)||($a cmp $b);
5735
#return $c if $c;
5736
#printf "%-30s %-30s ",replace($a,$;,','),replace($b,$;,',');
5737
78
78
450
my @a=split $;,$a;
5738
78
314
my @b=split $;,$b;
5739
78
101
for(0..$#a){
5740
119
153
my $c=$a[$_]<=>$b[$_];
5741
119
100
66
281
return $c if $c and "$a[$_]$b[$_]"!~/[iI][nN][fF]|þ/i; # inf(inity)
5742
101
70
$c=$a[$_]cmp$b[$_];
5743
101
100
192
return $c if $c;
5744
}
5745
0
0
return 0;
5746
}
5747
5748
=head2 tablestring
5749
5750
B a reference to an array of arrayrefs -- a two dimensional table of strings and numbers
5751
5752
B a string containing the textual table -- a string of two or more lines
5753
5754
The first arrayref in the list refers to a list of either column headings (scalar)
5755
or ... (...more later...)
5756
5757
In this output table:
5758
5759
- the columns will not be wider than necessary by its widest value (any -tags are removed in every internal width-calculation)
5760
5761
- multi-lined cell values are handled also
5762
5763
- and so are html-tags, if the output is to be used inside -tags on a web page.
5764
5765
- columns with just numeric values are right justified (header row excepted)
5766
5767
Example:
5768
5769
print tablestring([
5770
[qw/AA BB CCCC/],
5771
[123,23,"d"],
5772
[12,23,34],
5773
[77,88,99],
5774
["lin\nes",12,"asdff\nfdsa\naa"],[0,22,"adf"]
5775
]);
5776
5777
Prints this string of 11 lines:
5778
5779
AA BB CCCC
5780
--- -- -----
5781
123 23 d
5782
12 23 34
5783
77 8 99
5784
5785
lin 12 asdff
5786
es fdsa
5787
aa
5788
5789
10 22 adf
5790
5791
As you can see, rows containing multi-lined cells gets an empty line before and after the row to separate it more clearly.
5792
5793
=cut
5794
5795
sub tablestring {
5796
5
5
1
20
my $tab=shift;
5797
5
50
13
my %o=$_[0] ? %{shift()} : ();
0
0
5798
5
6
my $fjern_tom=$o{fjern_tomme_kolonner};
5799
5
6
my $ikke_space=$o{ikke_space};
5800
5
50
17
my $nodup=$o{nodup}||0;
5801
5
4
my $ikke_hodestrek=$o{ikke_hodestrek};
5802
5
50
8
my $pagesize=exists $o{pagesize} ? $o{pagesize}-3 : 9999999;
5803
5
5
my $venstretvang=$o{venstre};
5804
5
4
my(@bredde,@venstre,@hoeyde,@ikketom,@nodup);
5805
5
6
my $hode=1;
5806
5
4
my $i=0;
5807
5
4
my $j;
5808
5
8
for(@$tab){
5809
33
22
$j=0;
5810
33
25
$hoeyde[$i]=0;
5811
33
23
my $nodup_rad=$nodup;
5812
33
50
46
if(ref($_) eq 'ARRAY'){
5813
33
31
for(@$_){
5814
210
147
my $celle=$_;
5815
210
100
307
$bredde[$j]||=0;
5816
210
50
33
343
if($nodup_rad and $i>0 and $$tab[$i][$j] eq $$tab[$i-1][$j] || ($nodup_rad=0)){
0
33
5817
0
0
0
$celle=$nodup==1?"":$nodup;
5818
0
0
$nodup[$i][$j]=1;
5819
}
5820
else{
5821
210
129
my $hoeyde=0;
5822
210
111
my $bredere;
5823
33
33
169
no warnings;
33
40
33
12590
5824
210
100
100
511
$ikketom[$j]=1 if !$hode && length($celle)>0;
5825
210
235
for(split("\n",$celle)){
5826
236
50
233
$bredere=/
5827
236
174
s/<[^>]+>//g;
5828
236
141
$hoeyde++;
5829
236
194
s/>/>/g;
5830
236
139
s/</
5831
236
100
321
$bredde[$j]=length($_)+1+$bredere if length($_)+1+$bredere>$bredde[$j];
5832
236
100
100
955
$venstre[$j]=1 if $_ && !/^\s*[\-\+]?(\d+|\d*\.\d+)\s*\%?$/ && !$hode;
100
5833
}
5834
210
100
66
349
if( $hoeyde>1 && !$ikke_space){
5835
25
100
30
$hoeyde++ unless $hode;
5836
25
100
100
42
$hoeyde[$i-1]++ if $i>1 && $hoeyde[$i-1]==1;
5837
}
5838
210
100
259
$hoeyde[$i]=$hoeyde if $hoeyde>$hoeyde[$i];
5839
}
5840
210
156
$j++;
5841
}
5842
}
5843
else{
5844
0
0
$hoeyde[$i]=1;
5845
0
0
$ikke_hodestrek=1;
5846
}
5847
33
17
$hode=0;
5848
33
27
$i++;
5849
}
5850
5
7
$i=$#hoeyde;
5851
5
5
$j=$#bredde;
5852
5
50
33
18
if($i==0 or $venstretvang) { @venstre=map{1}(0..$j) }
0
0
0
0
5853
5
50
7
else { for(0..$j){ $venstre[$_]=1 if !$ikketom[$_] } }
33
41
5854
5
8
my @tabut;
5855
5
2
my $rad_startlinje=0;
5856
5
4
my @overskrift;
5857
my $overskrift_forrige;
5858
5
7
for my $x (0..$i){
5859
33
50
54
if($$tab[$x] eq '-'){
5860
0
0
0
my @tegn=map {$$tab[$x-1][$_]=~/\S/?"-":" "} (0..$j);
0
0
5861
0
0
$tabut[$rad_startlinje]=join(" ",map {$tegn[$_] x ($bredde[$_]-1)} (0..$j));
0
0
5862
}
5863
else{
5864
33
32
for my $y (0..$j){
5865
210
50
33
278
next if $fjern_tom && !$ikketom[$y];
5866
33
33
165
no warnings;
33
46
33
17891
5867
5868
210
0
33
805
my @celle=
50
5869
!$overskrift_forrige&&$nodup&&$nodup[$x][$y]
5870
?($nodup>0?():((" " x (($bredde[$y]-length($nodup))/2)).$nodup))
5871
:split("\n",$$tab[$x][$y]);
5872
210
228
for(0..($hoeyde[$x]-1)){
5873
261
153
my $linje=$rad_startlinje+$_;
5874
261
100
373
my $txt=shift @celle || '';
5875
261
100
100
946
$txt=sprintf("%*s",$bredde[$y]-1,$txt) if length($txt)>0 && !$venstre[$y] && ($x>0 || $ikke_hodestrek);
66
66
5876
261
213
$tabut[$linje].=$txt;
5877
261
100
245
if($y==$j){
5878
42
115
$tabut[$linje]=~s/\s+$//;
5879
}
5880
else{
5881
219
124
my $bredere;
5882
219
50
197
$bredere = $txt=~/
5883
219
167
$txt=~s/<[^>]+>//g;
5884
219
132
$txt=~s/>/>/g;
5885
219
124
$txt=~s/</
5886
219
335
$tabut[$linje].= ' ' x ($bredde[$y]-length($txt)-$bredere);
5887
}
5888
}
5889
}
5890
}
5891
33
22
$rad_startlinje+=$hoeyde[$x];
5892
5893
#--lage streker?
5894
33
50
40
if(not $ikke_hodestrek){
5895
33
100
33
102
if($x==0){
50
33
0
5896
5
6
for my $y (0..$j){
5897
33
50
33
41
next if $fjern_tom && !$ikketom[$y];
5898
33
42
$tabut[$rad_startlinje].=('-' x ($bredde[$y]-1))." ";
5899
}
5900
5
3
$rad_startlinje++;
5901
5
11
@overskrift=("",@tabut);
5902
}
5903
elsif(
5904
$x%$pagesize==0 || $nodup>0&&!$nodup[$x+1][$nodup-1]
5905
and $x+1<@$tab
5906
and !$ikke_hodestrek
5907
)
5908
{
5909
0
0
push(@tabut,@overskrift);
5910
0
0
$rad_startlinje+=@overskrift;
5911
0
0
$overskrift_forrige=1;
5912
}
5913
else{
5914
28
26
$overskrift_forrige=0;
5915
}
5916
}
5917
}#for x
5918
5
49
return join("\n",@tabut)."\n";
5919
}
5920
5921
=head2 serialize
5922
5923
Returns a data structure as a string. See also C
5924
(serialize was created long time ago before Data::Dumper appeared on
5925
CPAN, before CPAN even...)
5926
5927
B One to four arguments.
5928
5929
First argument: A reference to the structure you want.
5930
5931
Second argument: (optional) The name the structure will get in the output string.
5932
If second argument is missing or is undef or '', it will get no name in the output.
5933
5934
Third argument: (optional) The string that is returned is also put
5935
into a created file with the name given in this argument. Putting a
5936
C<< > >> char in from of the filename will append that file
5937
instead. Use C<''> or C to not write to a file if you want to
5938
use a fourth argument.
5939
5940
Fourth argument: (optional) A number signalling the depth on which newlines is used in the output.
5941
The default is infinite (some big number) so no extra newlines are output.
5942
5943
B A string containing the perl-code definition that makes that data structure.
5944
The input reference (first input argument) can be to an array, hash or a string.
5945
Those can contain other refs and strings in a deep data structure.
5946
5947
Limitations:
5948
5949
- Code refs are not handled (just returns C)
5950
5951
- Regex, class refs and circular recursive structures are also not handled.
5952
5953
B
5954
5955
$a = 'test';
5956
@b = (1,2,3);
5957
%c = (1=>2, 2=>3, 3=>5, 4=>7, 5=>11);
5958
%d = (1=>2, 2=>3, 3=>\5, 4=>7, 5=>11, 6=>[13,17,19,{1,2,3,'asdf\'\\\''}],7=>'x');
5959
print serialize(\$a,'a');
5960
print serialize(\@b,'tab');
5961
print serialize(\%c,'c');
5962
print serialize(\%d,'d');
5963
print serialize(\("test'n roll",'brb "brb"'));
5964
print serialize(\%d,'d',undef,1);
5965
5966
Prints accordingly:
5967
5968
$a='test';
5969
@tab=('1','2','3');
5970
%c=('1','2','2','3','3','5','4','7','5','11');
5971
%d=('1'=>'2','2'=>'3','3'=>\'5','4'=>'7','5'=>'11','6'=>['13','17','19',{'1'=>'2','3'=>'asdf\'\\\''}]);
5972
('test\'n roll','brb "brb"');
5973
%d=('1'=>'2',
5974
'2'=>'3',
5975
'3'=>\'5',
5976
'4'=>'7',
5977
'5'=>'11',
5978
'6'=>['13','17','19',{'1'=>'2','3'=>'asdf\'\\\''}],
5979
'7'=>'x');
5980
5981
Areas of use:
5982
5983
- Debugging (first and foremost)
5984
5985
- Storing arrays and hashes and data structures of those on file, database or sending them over the net
5986
5987
- eval earlier stored string to get back the data structure
5988
5989
Be aware of the security implications of Cing a perl code string
5990
stored somewhere that unauthorized users can change them! You are
5991
probably better of using L or L without
5992
enabling the CODE-options if you have such security issues.
5993
More on decompiling Perl-code: L or L.
5994
5995
=head2 dserialize
5996
5997
Debug-serialize, dumping data structures for you to look at.
5998
5999
Same as C but the output is given a newline every 80th character.
6000
(Every 80th or whatever C<$Acme::Tools::Dserialize_width> contains)
6001
6002
=cut
6003
6004
our $Dserialize_width=80;
6005
0
0
0
0
sub _kallstack { my $tilbake=shift||0; my @c; my $ret; $ret.=serialize(\@c,"caller$tilbake") while @c=caller(++$tilbake); $ret }
0
0
0
0
0
0
6006
0
0
1
0
sub dserialize{join "\n",serialize(@_)=~/(.{1,$Dserialize_width})/gs}
6007
sub serialize {
6008
33
33
160
no warnings;
33
45
33
108811
6009
1421
1421
1
3391
my($r,$name,$filename,$level)=@_;
6010
1421
100
2346
my @r=(undef,undef,($level||0)-1);
6011
1421
50
1549
if($filename){
6012
0
0
0
open my $fh, '>', $filename or croak("FEIL: could not open file $filename\n" . _kallstack());
6013
0
0
my $ret=serialize($r,$name,undef,$level);
6014
0
0
print $fh "$ret\n1;\n";
6015
0
0
close($fh);
6016
0
0
return $ret;
6017
}
6018
1421
100
1977
if(ref($r) eq 'SCALAR'){
100
100
100
50
0
0
0
0
6019
1151
50
1295
return "\$$name=".serialize($r,@r).";\n" if $name;
6020
1151
100
1321
return "undef" unless defined $$r;
6021
1143
822
my $ret=$$r;
6022
1143
957
$ret=~s/\\/\\\\/g;
6023
1143
749
$ret=~s/\'/\\'/g;
6024
1143
2139
return "'$ret'";
6025
}
6026
elsif(ref($r) eq 'ARRAY'){
6027
76
100
141
return "\@$name=".serialize($r,@r).";\n" if $name;
6028
74
87
my $ret="(";
6029
74
99
for(@$r){
6030
392
525
$ret.=serialize(\$_,@r).",";
6031
392
100
753
$ret.="\n" if $level>=0;
6032
}
6033
74
176
$ret=~s/,$//;
6034
74
91
$ret.=")";
6035
74
50
110
$ret.=";\n" if $name;
6036
74
193
return $ret;
6037
}
6038
elsif(ref($r) eq 'HASH'){
6039
31
100
168
return "\%$name=".serialize($r,@r).";\n" if $name;
6040
21
20
my $ret="(";
6041
21
86
for(sort keys %$r){
6042
73
134
$ret.=serialize(\$_,@r)."=>".serialize(\$$r{$_},@r).",";
6043
73
50
131
$ret.="\n" if $level>=0;
6044
}
6045
21
65
$ret=~s/,$//;
6046
21
28
$ret.=")";
6047
21
50
34
$ret.=";\n" if $name;
6048
21
107
return $ret;
6049
}
6050
elsif(ref($$r) eq 'ARRAY'){
6051
70
50
111
return "\@$name=".serialize($r,@r).";\n" if $name;
6052
70
52
my $ret="[";
6053
70
81
for(@$$r){
6054
188
265
$ret.=serialize(\$_,@r).",";
6055
188
50
33
569
$ret.="\n" if !defined $level or $level>=0;
6056
}
6057
70
148
$ret=~s/,$//;
6058
70
52
$ret.="]";
6059
70
50
90
$ret.=";\n" if $name;
6060
70
138
return $ret;
6061
}
6062
elsif(ref($$r) eq 'HASH'){
6063
93
50
116
return "\%$name=".serialize($r,@r).";\n" if $name;
6064
93
68
my $ret="{";
6065
93
257
for(sort keys %$$r){
6066
294
349
$ret.=serialize(\$_,@r)."=>".serialize(\$$$r{$_},@r).",";
6067
294
50
478
$ret.="\n" if $level>=0;
6068
}
6069
93
165
$ret=~s/,$//;
6070
93
73
$ret.="}";
6071
93
50
115
$ret.=";\n" if $name;
6072
93
163
return $ret;
6073
}
6074
elsif(ref($$r) eq 'SCALAR'){
6075
0
0
return "\\".serialize($$r,@r);
6076
}
6077
elsif(ref($r) eq 'LVALUE'){
6078
0
0
my $s="$$r";
6079
0
0
return serialize(\$s,@r);
6080
}
6081
elsif(ref($$r) eq 'CODE'){
6082
#warn "Tried to serialize CODE";
6083
0
0
return 'sub{croak "Can not serialize CODE-references, see perhaps B::Deparse and Storable"}'
6084
}
6085
elsif(ref($$r) eq 'GLOB'){
6086
0
0
warn "Tried to serialize a GLOB";
6087
0
0
return '\*STDERR'
6088
}
6089
else{
6090
0
0
my $tilbake;
6091
0
0
my($pakke,$fil,$linje,$sub,$hasargs,$wantarray);
6092
0
0
0
($pakke,$fil,$linje,$sub,$hasargs,$wantarray)=caller($tilbake++) until $sub ne 'serialize' || $tilbake>20;
6093
0
0
croak("serialize() argument should be reference!\n".
6094
"\$r=$r\n".
6095
"ref(\$r) = ".ref($r)."\n".
6096
"ref(\$\$r) = ".ref($$r)."\n".
6097
"kallstack:\n". _kallstack());
6098
}
6099
}
6100
6101
=head2 srlz
6102
6103
Synonym to L, but remove unnecessary single quote chars around
6104
C<< \w+ >>-keys and number values (except numbers with leading zeros). Example:
6105
6106
serialize:
6107
6108
%s=('action'=>{'del'=>'0','ins'=>'0','upd'=>'18'},'post'=>'1348','pre'=>'1348',
6109
'updcol'=>{'Laerestednr'=>'18','Studietypenr'=>'18','Undervisningssted'=>'7','Url'=>'11'},
6110
'where'=>'where 1=1');
6111
6112
srlz:
6113
6114
%s=(action=>{del=>0,ins=>0,upd=>18},post=>1348,pre=>1348,
6115
updcol=>{Laerestednr=>18,Studietypenr=>18,Undervisningssted=>7,Url=>11},
6116
where=>'where 1=1');
6117
6118
Todo: update L to do the same, but in the right way. (For now
6119
srlz runs the string from serialize() through two C<< s/// >>, this will break
6120
in certain cases). L will be kept as a synonym (or the other way around).
6121
6122
=cut
6123
6124
sub srlz {
6125
0
0
1
0
my $s=serialize(@_);
6126
0
0
$s=~s,'(\w+)'=>,$1=>,g;
6127
0
0
$s=~s,=>'((0|[1-9]\d*)(\.\d+)?(e[-+]?\d+)?)',=>$1,gi; #ikke ledende null! hm
6128
0
0
$s;
6129
}
6130
6131
=head2 cnttbl
6132
6133
my %nordic_country_population=(Norway=>5214890,Sweden=>9845155,Denmark=>5699220,Finland=>5496907,Iceland=>331310);
6134
print cnttbl(\%nordic_country_population);
6135
Iceland 331310 1.25%
6136
Norway 5214890 19.61%
6137
Finland 5496907 20.67%
6138
Denmark 5699220 21.44%
6139
Sweden 9845155 37.03%
6140
SUM 26587482 100.00%
6141
6142
Todo: Levels...:
6143
6144
my %sales=(
6145
Toyota=>{Prius=>19,RAV=>12,Auris=>18,Avensis=>7},
6146
Volvo=>{V40=>14, XC90=>4},
6147
Nissan=>{Leaf=>19,Qashqai=>17},
6148
Tesla=>{ModelS=>8}
6149
);
6150
print cnttbl(\%sales);
6151
Toyota SUM 56
6152
Volvo SUM 18
6153
Nissan SUM 36
6154
Tesla SUM 8
6155
SUM SUM 56 100%
6156
6157
=cut
6158
6159
sub cnttbl {
6160
6
6
1
338
my $hr=shift;
6161
6
20
my $maxlen=max(3,map length($_),keys%$hr);
6162
join"",ref((values%$hr)[0])
6163
4
3
?do{ map {my$o=$_;join("",map rpad($$o[0],$maxlen)." $_\n",split("\n",$$o[1]))}
4
12
6164
1
8
map [$_,cnttbl($$hr{$_})],
6165
sort keys%$hr }
6166
6
100
14
:do{ my $sum=sum(values%$hr);
5
9
6167
5
8
my $fmt=repl("%-xs %yd %6.2f%%\n",x=>$maxlen,y=>length($sum));
6168
map sprintf($fmt,@$_,100*$$_[1]/$sum),
6169
5
50
26
(map[$_,$$hr{$_}],sort{$$hr{$a}<=>$$hr{$b} or $a cmp $b}keys%$hr),
16
127
6170
(['SUM',$sum]) }
6171
}
6172
6173
=head2 nicenum
6174
6175
print 14.3 - 14.0; # 0.300000000000001
6176
print 34.3 - 34.0; # 0.299999999999997
6177
print nicenum( 14.3 - 14.0 ); # 0.3
6178
print nicenum( 34.3 - 34.0 ); # 0.3
6179
6180
=cut
6181
6182
our $Nicenum;
6183
sub nicenum { #hm
6184
3
3
1
558
$Nicenum=$_[0];
6185
3
42
$Nicenum=~s/([\.,]\d*)((\d)\3\3\3\3\3)\d$/$1$2$3$3$3$3$3$3$3$3$3/;
6186
3
9
my $r=0+$Nicenum;
6187
#warn "nn $_[0] --> $Nicenum --> $r\n";
6188
3
6
$r;
6189
}
6190
6191
6192
=head2 sys
6193
6194
Call instead of C if you want C (Carp::croak) when something fails.
6195
6196
sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }
6197
6198
6199
=cut
6200
6201
11
50
11
1
13
sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }
11
58375
11
160
6202
6203
=head2 recursed
6204
6205
Returns true or false (actually 1 or 0) depending on whether the
6206
current sub has been called by itself or not.
6207
6208
sub xyz
6209
{
6210
xyz() if not recursed;
6211
6212
}
6213
6214
=cut
6215
6216
0
0
0
1
0
sub recursed {(caller(1))[3] eq (caller(2))[3]?1:0}
6217
6218
6219
6220
=head2 ed
6221
6222
String editor commands
6223
6224
literals: a-z 0-9 space
6225
move cursor: FBAEPN MF MB ME
6226
delete: D Md
6227
up/low/camelcase word U L C
6228
backspace: -
6229
search: S
6230
return/enter: R
6231
meta/esc/alt: M
6232
shift: T
6233
cut to eol: K
6234
caps lock: C
6235
yank: Y
6236
start and end: < >
6237
macro start/end/play: { } !
6238
times for next cmd: M (i.e. M24a inserts 24 a's)
6239
6240
(TODO: alfa...and more docs needed)
6241
6242
=cut
6243
6244
our $Edcursor;
6245
sub ed {
6246
18
18
1
2772
my($s,$cs,$p,$buf)=@_; #string, commands, point (or cursor)
6247
18
100
40
return $$s=ed($$s,$cs,$p,$buf) if ref($s);
6248
17
22
my($sh,$cl,$m,$t,@m)=(0,0,0,undef);
6249
17
26
while(length($cs)){
6250
121
79
my $n = 0;
6251
121
50
304
my $c = $cs=~s,^(M\d+|M.|""|".+?"|S.+?R|\\.|.),,s ? $1 : die;
6252
121
100
272
$p = curb($p||0,0,length($s));
6253
121
100
138
if(defined$t){$cs="".($c x $t).$cs;$t=undef;next}
1
5
1
1
1
2
6254
120
81
248
my $add=sub{substr($s,$p,0)=$_[0];$p+=length($_[0])};
81
87
81
61
6255
120
50
498
if ($c =~ /^([a-z0-9 ])/){ &$add($sh^$cl?uc($1):$1); $sh=0 }
65
100
87
65
100
46
100
100
100
100
100
50
100
100
100
100
100
50
100
100
100
50
50
50
50
50
50
50
100
100
100
50
6256
1
2
elsif($c =~ /^"(.+)"$/) { &$add($1) }
6257
1
2
elsif($c =~ /^\\(.)/) { &$add($1) }
6258
2
50
3
elsif($c =~ /^S(.+)R/) { my $i=index($s,$1,$p);$p=$i+length($1) if $i>=0 }
2
7
6259
1
6
elsif($c =~ /^M(\d+)/) { $t=$1; next }
1
4
6260
2
3
elsif($c eq 'F') { $p++ }
6261
1
2
elsif($c eq 'B') { $p-- }
6262
0
0
0
elsif($c eq 'A') { $p-- while $p>0 and substr($s,$p-1,2)!~/^\n/ }
6263
8
50
26
elsif($c eq 'E') { substr($s,$p)=~/(.*)/ and $p+=length($1) }
6264
4
4
elsif($c eq 'D') { substr($s,$p,1)='' }
6265
3
50
15
elsif($c eq 'MD'){ substr($s,$p)=~s/^(\W*\w+)// and $buf=$1 }
6266
3
50
21
elsif($c eq 'MF'){ substr($s,$p)=~/(\W*\w+)/ and $p+=length($1) }
6267
3
50
13
elsif($c eq 'MB'){ substr($s,0,$p)=~/(\w+\W*)$/ and $p-=length($1) }
6268
0
0
0
elsif($c eq '-') { substr($s,--$p,1)='' if $p }
6269
5
50
28
elsif($c eq 'M-'){ substr($s,0,$p)=~s/(\w+\W*)$// and $p-=length($buf=$1)}
6270
3
50
13
elsif($c eq 'K') { substr($s,$p)=~s/(\S.+|\s*?\n)// and $buf=$1 }
6271
12
15
elsif($c eq 'Y') { &$add($buf) }
6272
0
0
elsif($c eq 'U') { substr($s,$p)=~s/(\W*)(\w+)/$1\U$2\E/; $p+=length($1.$2) }
0
0
6273
0
0
elsif($c eq 'L') { substr($s,$p)=~s/(\W*)(\w+)/$1\L$2\E/; $p+=length($1.$2) }
0
0
6274
0
0
elsif($c eq 'C') { substr($s,$p)=~s/(\W*)(\w+)/$1\u\L$2\E/; $p+=length($1.$2) }
0
0
6275
0
0
elsif($c eq '<') { $p=0 }
6276
0
0
elsif($c eq '>') { $p=length($s) }
6277
0
0
elsif($c eq 'T') { $sh=1 }
6278
0
0
elsif($c eq 'C') { $cl^=1 }
6279
1
2
elsif($c eq '{') { $m=1; @m=() }
1
1
6280
1
2
elsif($c eq '}') { $m=0 }
6281
2
50
33
9
elsif($c eq '!') { $m||!@m and die"ed: no macro"; $cs=join("",@m).$cs }
2
4
6282
2
3
elsif($c eq '""'){ &$add('"') }
6283
0
0
else { croak "ed: Unknown cmd '$c'\n" }
6284
119
100
100
352
push @m, $c if $m and $c ne '{';
6285
#warn serialize([$c,$m,$cs],'d');
6286
}
6287
17
13
$Edcursor=$p;
6288
17
32
$s;
6289
}
6290
6291
#todo: sub unbless eller sub damn
6292
#todo: ..se også: use Data::Structure::Util qw/unbless/;
6293
#todo: ...og: Acme::Damn sin damn()
6294
#todo? sub swap($$) http://www.idg.no/computerworld/article242008.ece
6295
#todo? catal
6296
#todo?
6297
#void quicksort(int t, int u) int i, m; if (t >= u) return; swap(t, randint(t, u)); m = t; for (i = t + 1; i <= u; i++) if (x[i] < x[t]) swap(++m, i); swap(t, m) quicksort(t, m-1); quicksort(m+1, u);
6298
6299
6300
=head1 JUST FOR FUN
6301
6302
=head2 brainfu
6303
6304
B one or two arguments
6305
6306
First argument: a string, source code of the brainfu
6307
language. String containing the eight charachters + - < > [ ] . ,
6308
Every other char is ignored silently.
6309
6310
Second argument: if the source code contains commas (,) the second
6311
argument is the input characters in a string.
6312
6313
B The resulting output from the program.
6314
6315
Example:
6316
6317
print brainfu(<<""); #prints "Hallo Verden!\n"
6318
++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>---.+++++++++++..+++.>++.<<++++++++++++++
6319
.>----------.+++++++++++++.--------------.+.+++++++++.>+.>.
6320
6321
See L
6322
6323
=head2 brainfu2perl
6324
6325
Just as L but instead it return the perl code to which the
6326
brainfu code is translated. Just C<< eval() >> this perl code to run.
6327
6328
Example:
6329
6330
print brainfu2perl('>++++++++[<++++++++>-]<++++++++.>++++++[<++++++>-]<---.');
6331
6332
Prints this string:
6333
6334
my($c,$o,@b)=(0); sub out{$o.=chr($b[$c]) for 1..$_[0]||1}
6335
++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6336
while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6337
++$b[$c];++$c;--$b[$c];}--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6338
++$b[$c];++$b[$c];out;++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6339
while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$c;--$b[$c];}
6340
--$c;--$b[$c];--$b[$c];--$b[$c];out;$o;
6341
6342
=head2 brainfu2perl_optimized
6343
6344
Just as L but optimizes the perl code. The same
6345
example as above with brainfu2perl_optimized returns this equivalent
6346
but shorter perl code:
6347
6348
$b[++$c]+=8;while($b[$c]){$b[--$c]+=8;--$b[++$c]}$b[--$c]+=8;out;$b[++$c]+=6;
6349
while($b[$c]){$b[--$c]+=6;--$b[++$c]}$b[--$c]-=3;out;$o;
6350
6351
=cut
6352
6353
3
3
1
1210
sub brainfu { eval(brainfu2perl(@_)) }
6354
6355
sub brainfu2perl {
6356
6
6
1
18
my($bf,$inp)=@_;
6357
17
52
my $perl='my($c,$inp,$o,@b)=(0,\''.$inp.'\'); no warnings; sub out{$o.=chr($b[$c]) for 1..$_[0]||1}'."\n";
6358
3
100
66
25
$perl.='sub inp{$inp=~s/(.)//s and $b[$c]=ord($1)}'."\n" if $inp and $bf=~/,/;
6359
3
100
545
$perl.=join("",map/\+/?'++$b[$c];':/\-/?'--$b[$c];':/\[/?'while($b[$c]){':/\]/?'}':/>/?'++$c;':/?'--$c;':/\./?'out;':/\,/?'inp;':'',split//,$bf).'$o;';
100
100
100
100
100
100
100
6360
3
339
$perl;
6361
}
6362
6363
sub brainfu2perl_optimized {
6364
0
14
1
0
my $perl=brainfu2perl(@_);
6365
0
0
$perl=~s{(((\+|\-)\3\$b\[\$c\];){2,})}{ '$b[$c]'.$3.'='.(grep/b/,split//,$1).';' }gisex;
0
0
6366
0
0
1 while $perl=~s/\+\+\$c;\-\-\$c;//g + $perl=~s/\-\-\$c;\+\+\$c;//g;
6367
0
0
$perl=~s{((([\-\+])\3\$c;){2,})}{"\$c$3=".(grep/c/,split//,$1).';'}gisex;
0
0
6368
0
0
0
$perl=~s{((\+\+|\-\-)\$c;([^;{}]+;))}{my($o,$s)=($2,$3);$s=~s/\$c/$o\$c/?$s:$1}ge;
0
0
0
0
6369
0
0
$perl=~s/\$c(\-|\+)=(\d+);(\+\+|\-\-)\$b\[\$c\]/$3.'$b[$c'.$1.'='.$2.'];'/ge;
0
0
6370
0
0
$perl=~s{((out;){2,})}{'out('.(grep/o/,split//,$1).');'}ge;
0
0
6371
0
0
$perl=~s/;}/}/g;$perl=~s/;+/;/g;
0
0
6372
0
0
$perl;
6373
}
6374
6375
6376
=head1 BLOOM FILTER SUBROUTINES
6377
6378
Bloom filters can be used to check whether an element (a string) is a
6379
member of a large set using much less memory or disk space than other
6380
data structures. Trading speed and accuracy for memory usage. While
6381
risking false positives, Bloom filters have a very strong space
6382
advantage over other data structures for representing sets.
6383
6384
In the example below, a set of 100000 phone numbers (or any string of
6385
any length) can be "stored" in just 91230 bytes if you accept that you
6386
can only check the data structure for existence of a string and accept
6387
false positives with an error rate of 0.03 (that is three percent, error
6388
rates are given in numbers larger than 0 and smaller than 1).
6389
6390
You can not retrieve the strings in the set without using "brute
6391
force" methods and even then you would get slightly more strings than
6392
you put in because of the error rate inaccuracy.
6393
6394
Bloom Filters have many uses.
6395
6396
See also: L
6397
6398
See also: L
6399
6400
=head2 bfinit
6401
6402
Initialize a new Bloom Filter:
6403
6404
my $bf = bfinit( error_rate=>0.01, capacity=>100000 );
6405
6406
The same:
6407
6408
my $bf = bfinit( 0.01, 100000 );
6409
6410
since two arguments is interpreted as error_rate and capacity accordingly.
6411
6412
6413
=head2 bfadd
6414
6415
bfadd($bf, $_) for @phone_numbers; # Adding strings one at a time
6416
6417
bfadd($bf, @phone_numbers); # ...or all at once (faster)
6418
6419
Returns 1 on success. Dies (croaks) if more strings than capacity is added.
6420
6421
=head2 bfcheck
6422
6423
my $phone_number="97713246";
6424
if ( bfcheck($bf, $phone_number) ) {
6425
print "Yes, $phone_number was PROBABLY added\n";
6426
}
6427
else{
6428
print "No, $phone_number was DEFINITELY NOT added\n";
6429
}
6430
6431
Returns true if C<$phone_number> exists in C<@phone_numbers>.
6432
6433
Returns false most of the times, but sometimes true*), if C<$phone_number> doesn't exists in C<@phone_numbers>.
6434
6435
*) This is called a false positive.
6436
6437
Checking more than one key:
6438
6439
@bools = bfcheck($bf, @keys); # or ...
6440
@bools = bfcheck($bf, \@keys); # better, uses less memory if @keys is large
6441
6442
Returns an array the same size as @keys where each element is true or false accordingly.
6443
6444
=head2 bfgrep
6445
6446
Same as C except it returns the keys that exists in the bloom filter
6447
6448
@found = bfgrep($bf, @keys); # or ...
6449
@found = bfgrep($bf, \@keys); # better, uses less memory if @keys is large, or ...
6450
@found = grep bfcheck($bf,$_), @keys; # same but slower
6451
6452
=head2 bfgrepnot
6453
6454
Same as C except it returns the keys that do NOT exists in the bloom filter:
6455
6456
@not_found = bfgrepnot($bf, @keys); # or ...
6457
@not_found = bfgrepnot($bf, \@keys); # better, uses less memory if @keys is large, or ...
6458
@not_found = grep !bfcheck($bf,$_), @keys); # same but slower
6459
6460
=head2 bfdelete
6461
6462
Deletes from a counting bloom filter.
6463
6464
To enable deleting be sure to initialize the bloom filter with the
6465
numeric C argument. The number of bits could be 2 or 3*)
6466
for small filters with a small capacity (a small number of keys), but
6467
setting the number to 4 ensures that even very large filters with very
6468
small error rates would not overflow.
6469
6470
*) Acme::Tools do not currently support C<< counting_bits => 3 >> so 4
6471
and 8 are the only practical alternatives where 8 is almost always overkill.
6472
6473
my $bf=bfinit(
6474
error_rate => 0.001,
6475
capacity => 10000000,
6476
counting_bits => 4 # power of 2, that is 2, 4, 8, 16 or 32
6477
);
6478
bfadd( $bf, @unique_phone_numbers);
6479
bfdelete($bf, @unique_phone_numbers);
6480
6481
Example: examine the frequency of the counters with 4 bit counters and 4 million keys:
6482
6483
my $bf=bfinit( error_rate=>0.001, capacity=>4e6, counting_bits=>4 );
6484
bfadd($bf,[1e3*$_+1 .. 1e3*($_+1)]) for 0..4000-1; # adding 4 million keys one thousand at a time
6485
my %c; $c{vec($$bf{filter},$_,$$bf{counting_bits})}++ for 0..$$bf{filterlength}-1;
6486
printf "%8d counters = %d\n",$c{$_},$_ for sort{$a<=>$b}keys%c;
6487
6488
The output:
6489
6490
28689562 counters = 0
6491
19947673 counters = 1
6492
6941082 counters = 2
6493
1608250 counters = 3
6494
280107 counters = 4
6495
38859 counters = 5
6496
4533 counters = 6
6497
445 counters = 7
6498
46 counters = 8
6499
1 counters = 9
6500
6501
Even after the error_rate is changed from 0.001 to a percent of that, 0.00001, the limit of 16 (4 bits) is still far away:
6502
6503
47162242 counters = 0
6504
33457237 counters = 1
6505
11865217 counters = 2
6506
2804447 counters = 3
6507
497308 counters = 4
6508
70608 counters = 5
6509
8359 counters = 6
6510
858 counters = 7
6511
65 counters = 8
6512
4 counters = 9
6513
6514
In algorithmic terms the number of bits needed is C. Thats why 4 bits (counters up
6515
to 15) is "always" good enough except for extremely large capasities or extremely small error rates.
6516
(Except when adding the same key many times, which should be avoided, and Acme::Tools::bfadd do not
6517
check for that, perhaps in future versions).
6518
6519
Bloom filters of the counting type are not very space efficient: The tables above shows that 84%-85%
6520
of the counters are 0 or 1. This means most bits are zero-bits. This doesn't have to be a problem if
6521
a counting bloom filter is used to be sent over slow networks because they are very compressable by
6522
common compression tools like I or L and such.
6523
6524
Deletion of non-existing keys makes C die (croak).
6525
6526
=head2 bfdelete
6527
6528
Deletes from a counting bloom filter:
6529
6530
bfdelete($bf, @keys);
6531
bfdelete($bf, \@keys);
6532
6533
Returns C<$bf> after deletion.
6534
6535
Croaks (dies) on deleting a non-existing key or deleting from an previouly overflown counter in a counting bloom filter.
6536
6537
=head2 bfaddbf
6538
6539
Adds another bloom filter to a bloom filter.
6540
6541
Bloom filters has the proberty that bit-wise I-ing the bit-filters
6542
of two filters with the same capacity and the same number and type of
6543
hash functions, adds the filters:
6544
6545
my $bf1=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[1..500]);
6546
my $bf2=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[501..1000]);
6547
6548
bfaddbf($bf1,$bf2);
6549
6550
print "Yes!" if bfgrep($bf1, 1..1000) == 1000;
6551
6552
Prints yes since C now returns an array of all the 1000 elements.
6553
6554
Croaks if the filters are of different dimensions.
6555
6556
Works for counting bloom filters as well (C<< counting_bits=>4 >> e.g.)
6557
6558
=head2 bfsum
6559
6560
Returns the number of 1's in the filter.
6561
6562
my $percent=100*bfsum($bf)/$$bf{filterlength};
6563
printf "The filter is %.1f%% filled\n",$percent; #prints 50.0% or so if filled to capacity
6564
6565
Sums the counters for counting bloom filters (much slower than for non counting).
6566
6567
=head2 bfdimensions
6568
6569
Input, two numeric arguments: Capacity and error_rate.
6570
6571
Outputs an array of two numbers: m and k.
6572
6573
m = - n * log(p) / log(2)**2 # n = capacity, m = bits in filter (divide by 8 to get bytes)
6574
k = log(1/p) / log(2) # p = error_rate, uses perls internal log() with base e (2.718)
6575
6576
...that is: m = the best number of bits in the filter and k = the best
6577
number of hash functions optimized for the given capacity (n) and
6578
error_rate (p). Note that k is a dependent only of the error_rate. At
6579
about two percent error rate the bloom filter needs just the same
6580
number of bytes as the number of keys.
6581
6582
Storage (bytes):
6583
Capacity Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate
6584
0.000000001 0.00000001 0.0000001 0.000001 0.00001 0.0001 0.001 0.01 0.02141585 0.1 0.5 0.99
6585
------------- ----------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
6586
10 54.48 48.49 42.5 36.51 30.52 24.53 18.53 12.54 10.56 6.553 2.366 0.5886
6587
100 539.7 479.8 419.9 360 300.1 240.2 180.3 120.4 100.6 60.47 18.6 0.824
6588
1000 5392 4793 4194 3595 2996 2397 1798 1199 1001 599.6 180.9 3.177
6589
10000 5.392e+04 4.793e+04 4.194e+04 3.594e+04 2.995e+04 2.396e+04 1.797e+04 1.198e+04 1e+04 5991 1804 26.71
6590
100000 5.392e+05 4.793e+05 4.193e+05 3.594e+05 2.995e+05 2.396e+05 1.797e+05 1.198e+05 1e+05 5.991e+04 1.803e+04 262
6591
1000000 5.392e+06 4.793e+06 4.193e+06 3.594e+06 2.995e+06 2.396e+06 1.797e+06 1.198e+06 1e+06 5.991e+05 1.803e+05 2615
6592
10000000 5.392e+07 4.793e+07 4.193e+07 3.594e+07 2.995e+07 2.396e+07 1.797e+07 1.198e+07 1e+07 5.991e+06 1.803e+06 2.615e+04
6593
100000000 5.392e+08 4.793e+08 4.193e+08 3.594e+08 2.995e+08 2.396e+08 1.797e+08 1.198e+08 1e+08 5.991e+07 1.803e+07 2.615e+05
6594
1000000000 5.392e+09 4.793e+09 4.193e+09 3.594e+09 2.995e+09 2.396e+09 1.797e+09 1.198e+09 1e+09 5.991e+08 1.803e+08 2.615e+06
6595
10000000000 5.392e+10 4.793e+10 4.193e+10 3.594e+10 2.995e+10 2.396e+10 1.797e+10 1.198e+10 1e+10 5.991e+09 1.803e+09 2.615e+07
6596
100000000000 5.392e+11 4.793e+11 4.193e+11 3.594e+11 2.995e+11 2.396e+11 1.797e+11 1.198e+11 1e+11 5.991e+10 1.803e+10 2.615e+08
6597
1000000000000 5.392e+12 4.793e+12 4.193e+12 3.594e+12 2.995e+12 2.396e+12 1.797e+12 1.198e+12 1e+12 5.991e+11 1.803e+11 2.615e+09
6598
6599
Error rate: 0.99 Hash functions: 1
6600
Error rate: 0.5 Hash functions: 1
6601
Error rate: 0.1 Hash functions: 3
6602
Error rate: 0.0214158522653385 Hash functions: 6
6603
Error rate: 0.01 Hash functions: 7
6604
Error rate: 0.001 Hash functions: 10
6605
Error rate: 0.0001 Hash functions: 13
6606
Error rate: 0.00001 Hash functions: 17
6607
Error rate: 0.000001 Hash functions: 20
6608
Error rate: 0.0000001 Hash functions: 23
6609
Error rate: 0.00000001 Hash functions: 27
6610
Error rate: 0.000000001 Hash functions: 30
6611
6612
=head2 bfstore
6613
6614
Storing and retrieving bloom filters to and from disk uses Ls C and C. This:
6615
6616
bfstore($bf,'filename.bf');
6617
6618
It the same as:
6619
6620
use Storable qw(store retrieve);
6621
...
6622
store($bf,'filename.bf');
6623
6624
=head2 bfretrieve
6625
6626
This:
6627
6628
my $bf=bfretrieve('filename.bf');
6629
6630
Or this:
6631
6632
my $bf=bfinit('filename.bf');
6633
6634
Is the same as:
6635
6636
use Storable qw(store retrieve);
6637
my $bf=retrieve('filename.bf');
6638
6639
=head2 bfclone
6640
6641
Deep copies the bloom filter data structure. (Which btw is not very deep, two levels at most)
6642
6643
This:
6644
6645
my $bfc = bfclone($bf);
6646
6647
Works just as:
6648
6649
use Storable;
6650
my $bfc=Storable::dclone($bf);
6651
6652
=head2 Object oriented interface to bloom filters
6653
6654
use Acme::Tools;
6655
my $bf=new Acme::Tools::BloomFilter(0.1,1000); # the same as bfinit, see bfinit above
6656
print ref($bf),"\n"; # prints Acme::Tools::BloomFilter
6657
$bf->add(@keys);
6658
$bf->check($keys[0]) and print "ok\n"; # prints ok
6659
$bf->grep(\@keys)==@keys and print "ok\n"; # prints ok
6660
$bf->store('filename.bf');
6661
my $bf2=bfretrieve('filename.bf');
6662
$bf2->check($keys[0]) and print "ok\n"; # still ok
6663
6664
$bf2=$bf->clone();
6665
6666
To instantiate a previously stored bloom filter:
6667
6668
my $bf = Acme::Tools::BloomFilter->new( '/path/to/stored/bloomfilter.bf' );
6669
6670
The o.o. interface has the same methods as the C-subs without the
6671
C-prefix in the names. The C is not available as a
6672
method, although C, C and
6673
C are synonyms.
6674
6675
=head2 Internals and speed
6676
6677
The internal hash-functions are C<< md5( "$key$salt" ) >> from L.
6678
6679
Since C returns 128 bits and most medium to large sized bloom
6680
filters need only a 32 bit hash function, the result from md5() are
6681
split (C-ed) into 4 parts 32 bits each and are treated as if 4
6682
hash functions was called at once (speedup). Using different salts to
6683
the key on each md5 results in different hash functions.
6684
6685
Digest::SHA512 would have been even better since it returns more bits,
6686
if it werent for the fact that it's much slower than Digest::MD5.
6687
6688
String::CRC32::crc32 is faster than Digest::MD5, but not 4 times faster:
6689
6690
time perl -e'use Digest::MD5 qw(md5);md5("asdf$_") for 1..10e6' #5.56 sec
6691
time perl -e'use String::CRC32;crc32("asdf$_") for 1..10e6' #2.79 sec, faster but not per bit
6692
time perl -e'use Digest::SHA qw(sha512);sha512("asdf$_") for 1..10e6' #36.10 sec, too slow (sha1, sha224, sha256 and sha384 too)
6693
6694
Md5 seems to be an ok choice both for speed and avoiding collitions due to skewed data keys.
6695
6696
=head2 Theory and math behind bloom filters
6697
6698
L
6699
6700
L
6701
6702
L
6703
6704
See also Scaleable Bloom Filters: L (not implemented in Acme::Tools)
6705
6706
...and perhaps L
6707
6708
=cut
6709
6710
sub bfinit {
6711
24
100
24
1
8064
return bfretrieve(@_) if @_==1;
6712
23
50
66
72
return bfinit(error_rate=>$_[0], capacity=>$_[1]) if @_==2 and 0<$_[0] and $_[0]<1 and $_[1]>1;
66
33
6713
21
0
33
40
return bfinit(error_rate=>$_[1], capacity=>$_[0]) if @_==2 and 0<$_[1] and $_[1]<1 and $_[0]>1;
33
0
6714
21
82
require Digest::MD5;
6715
21
50
33
@_%2&&croak "Arguments should be a hash of equal number of keys and values";
6716
21
60
my %arg=@_;
6717
21
47
my @ok_param=qw/error_rate capacity min_hashfuncs max_hashfuncs hashfuncs counting_bits adaptive keys/;
6718
21
62
my @not_ok=sort(grep!in($_,@ok_param),keys%arg);
6719
21
100
216
croak "Not ok param to bfinit: ".join(", ",@not_ok) if @not_ok;
6720
20
50
66
58
croak "Not an arrayref in keys-param" if exists $arg{keys} and ref($arg{keys}) ne 'ARRAY';
6721
20
50
47
croak "Not implemented counting_bits=$arg{counting_bits}, should be 2, 4, 8, 16 or 32" if !in(nvl($arg{counting_bits},1),1,2,4,8,16,32);
6722
20
50
33
56
croak "An bloom filters here can not be in both adaptive and counting_bits modes" if $arg{adaptive} and $arg{counting_bits}>1;
6723
20
103
my $bf={error_rate => 0.001, #default p
6724
capacity => 100000, #default n
6725
min_hashfuncs => 1,
6726
max_hashfuncs => 100,
6727
counting_bits => 1, #default: not counting filter
6728
adaptive => 0,
6729
%arg, #arguments
6730
key_count => 0,
6731
overflow => {},
6732
version => $Acme::Tools::VERSION,
6733
};
6734
20
100
100
278
croak "Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1" if $$bf{error_rate}<=0 or $$bf{error_rate}>=1;
6735
18
50
29
@$bf{'min_hashfuncs','max_hashfuncs'}=(map$arg{hashfuncs},1..2) if $arg{hashfuncs};
6736
18
34
@$bf{'filterlength','hashfuncs'}=bfdimensions($bf); #m and k
6737
18
756
$$bf{filter}=pack("b*", '0' x ($$bf{filterlength}*$$bf{counting_bits}) ); #hm x new empty filter
6738
$$bf{unpack}= $$bf{filterlength}<=2**16/4 ? "n*" # /4 alleviates skewing if m just slightly < 2**x
6739
18
50
37
:$$bf{filterlength}<=2**32/4 ? "N*"
100
6740
: "Q*";
6741
18
100
30
bfadd($bf,@{$arg{keys}}) if $arg{keys};
8
34
6742
16
60
return $bf;
6743
}
6744
sub bfaddbf {
6745
2
2
1
3
my($bf,$bf2)=@_;
6746
my $differror=join"\n",
6747
map "Property $_ differs ($$bf{$_} vs $$bf2{$_})",
6748
2
24
grep $$bf{$_} ne $$bf2{$_},
6749
qw/capacity counting_bits adaptive hashfuncs filterlength/; #not error_rate
6750
2
50
5
croak $differror if $differror;
6751
2
50
5
croak "Can not add adaptive bloom filters" if $$bf{adaptive};
6752
2
3
my $count=$$bf{key_count}+$$bf2{key_count};
6753
croak "Exceeded filter capacity $$bf{key_count} + $$bf2{key_count} = $count > $$bf{capacity}"
6754
2
50
4
if $count > $$bf{capacity};
6755
2
3
$$bf{key_count}+=$$bf2{key_count};
6756
2
100
3
if($$bf{counting_bits}==1){
6757
1
3
$$bf{filter} |= $$bf2{filter};
6758
#$$bf{filter} = $$bf{filter} | $$bf2{filter}; #or-ing
6759
}
6760
else {
6761
1
2
my $cb=$$bf{counting_bits};
6762
1
4
for(0..$$bf{filterlength}-1){
6763
my $sum=
6764
vec($$bf{filter}, $_,$cb)+
6765
4793
3625
vec($$bf2{filter},$_,$cb);
6766
4793
50
5265
if( $sum>2**$cb-1 ){
6767
0
0
$sum=2**$cb-1;
6768
0
0
$$bf{overflow}{$_}++;
6769
}
6770
4793
3911
vec($$bf{filter}, $_,$cb)=$sum;
6771
33
33
191
no warnings;
33
46
33
73296
6772
$$bf{overflow}{$_}+=$$bf2{overflow}{$_}
6773
0
0
and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
6774
and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb)
6775
4793
50
0
6180
if exists $$bf2{overflow}{$_};
0
6776
}
6777
}
6778
2
2
return $bf; #for convenience
6779
}
6780
sub bfsum {
6781
8
8
1
26
my($bf)=@_;
6782
8
100
55
return unpack( "%32b*", $$bf{filter}) if $$bf{counting_bits}==1;
6783
3
5
my($sum,$cb)=(0,$$bf{counting_bits});
6784
3
2549
$sum+=vec($$bf{filter},$_,$cb) for 0..$$bf{filterlength}-1;
6785
3
12
return $sum;
6786
}
6787
sub bfadd {
6788
11
11
1
1434
require Digest::MD5;
6789
11
508
my($bf,@keys)=@_;
6790
11
50
21
return if !@keys;
6791
11
100
66
34
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6792
11
31
my($m,$k,$up,$n,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','capacity','counting_bits','adaptive'};
6793
11
15
for(@$keysref){
6794
#croak "Key should be scalar" if ref($_);
6795
15084
50
66
21894
$$bf{key_count} >= $n and croak "Exceeded filter capacity $n" or $$bf{key_count}++;
6796
15083
7960
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
15083
76699
6797
15083
100
66
32584
if ($cb==1 and !$adaptive) { # normal bloom filter
50
0
6798
11110
51315
vec($$bf{filter}, $h[$_] % $m, 1) = 1 for 0..$k-1;
6799
}
6800
elsif ($cb>1) { # counting bloom filter
6801
3973
3607
for(0..$k-1){
6802
11919
7943
my $pos=$h[$_] % $m;
6803
my $c=
6804
vec($$bf{filter}, $pos, $cb) =
6805
11919
13826
vec($$bf{filter}, $pos, $cb) + 1;
6806
11919
100
17050
if($c==0){
6807
26
26
vec($$bf{filter}, $pos, $cb) = -1;
6808
$$bf{overflow}{$pos}++
6809
26
100
100
69
and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
2
129
6810
and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb);
6811
}
6812
}
6813
}
6814
elsif ($adaptive) { # adaptive bloom filter
6815
0
0
my($i,$key,$bit)=(0+@h,$_);
6816
0
0
for(0..$$bf{filterlength}-1){
6817
0
0
0
$i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
6818
0
0
my $pos=shift(@h) % $m;
6819
0
0
$bit=vec($$bf{filter}, $pos, 1);
6820
0
0
vec($$bf{filter}, $pos, 1)=1;
6821
0
0
0
0
last if $_>=$k-1 and $bit==0;
6822
}
6823
}
6824
0
0
else {croak}
6825
}
6826
9
546
return 1;
6827
}
6828
sub bfcheck {
6829
5
5
1
55
require Digest::MD5;
6830
5
710
my($bf,@keys)=@_;
6831
5
50
12
return if !@keys;
6832
5
50
33
20
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6833
5
15
my($m,$k,$up,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','counting_bits','adaptive'};
6834
5
5
my $wa=wantarray();
6835
5
50
7
if(!$adaptive){ # normal bloom filter or counting bloom filter
6836
return map {
6837
5
42
my $match = 1; # match if every bit is on
24001
13484
6838
24001
12762
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
24001
128523
6839
24001
100
117531
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
100
6840
24001
50
24041
return $match if !$wa;
6841
24001
25453
$match;
6842
} @$keysref;
6843
}
6844
else { # adaptive bloom filter
6845
return map {
6846
0
0
my($match,$i,$key,$bit,@h)=(1,0,$_);
0
0
6847
0
0
for(0..$$bf{filterlength}-1){
6848
0
0
0
$i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
6849
0
0
my $pos=shift(@h) % $m;
6850
0
0
$bit=vec($$bf{filter}, $pos, 1);
6851
0
0
0
0
$match++ if $_ > $k-1 and $bit==1;
6852
0
0
0
0
$match=0 if $_ <= $k-1 and $bit==0;
6853
0
0
0
last if $bit==0;
6854
}
6855
0
0
0
return $match if !$wa;
6856
0
0
$match;
6857
} @$keysref;
6858
}
6859
}
6860
sub bfgrep { # just a copy of bfcheck with map replaced by grep
6861
8
8
1
430
require Digest::MD5;
6862
8
13
my($bf,@keys)=@_;
6863
8
50
16
return if !@keys;
6864
8
50
33
37
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6865
8
20
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6866
return grep {
6867
8
19
my $match = 1; # match if every bit is on
6100
3595
6868
6100
3408
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
6100
22704
6869
6100
100
22555
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
100
6870
6100
5861
$match;
6871
} @$keysref;
6872
}
6873
sub bfgrepnot { # just a copy of bfgrep with $match replaced by not $match
6874
1
1
1
5
require Digest::MD5;
6875
1
3
my($bf,@keys)=@_;
6876
1
50
3
return if !@keys;
6877
1
50
33
10
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6878
1
4
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6879
return grep {
6880
1
4
my $match = 1; # match if every bit is on
1000
540
6881
1000
516
my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
1000
3726
6882
1000
50
3730
vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
50
6883
1000
1115
!$match;
6884
} @$keysref;
6885
}
6886
sub bfdelete {
6887
3
3
1
60
require Digest::MD5;
6888
3
11
my($bf,@keys)=@_;
6889
3
50
7
return if !@keys;
6890
3
100
100
15
my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6891
3
7
my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6892
3
50
5
croak "Cannot delete from non-counting bloom filter (use counting_bits 4 e.g.)" if $cb==1;
6893
3
5
for my $key (@$keysref){
6894
501
254
my @h; push @h, unpack $up, Digest::MD5::md5($key,0+@h) while @h<$k;
501
2016
6895
501
50
33
751
$$bf{key_count}==0 and croak "Deleted all and then some" or $$bf{key_count}--;
6896
501
369
my($ones,$croak,@pos)=(0);
6897
501
431
for(0..$k-1){
6898
1503
1067
my $pos=$h[$_] % $m;
6899
my $c=
6900
1503
1002
vec($$bf{filter}, $pos, $cb);
6901
1503
1411
vec($$bf{filter}, $pos, $cb)=$c-1;
6902
1503
100
1746
$croak="Cannot delete a non-existing key $key" if $c==0;
6903
$croak="Cannot delete with previously overflown position. Try doubleing counting_bits"
6904
1503
50
66
3602
if $c==1 and ++$ones and $$bf{overflow}{$pos};
66
6905
}
6906
501
100
662
if($croak){ #rollback
6907
vec($$bf{filter}, $h[$_] % $m, $cb)=
6908
1
6
vec($$bf{filter}, $h[$_] % $m, $cb)+1 for 0..$k-1;
6909
1
142
croak $croak;
6910
}
6911
}
6912
2
14
return $bf;
6913
}
6914
sub bfstore {
6915
1
1
1
13818
require Storable;
6916
1
5
Storable::store(@_);
6917
}
6918
sub bfretrieve {
6919
2
2
1
301
require Storable;
6920
2
6
my $bf=Storable::retrieve(@_);
6921
2
50
344
carp "Retrieved bloom filter was stored in version $$bf{version}, this is version $VERSION" if $$bf{version}>$VERSION;
6922
2
5
return $bf;
6923
}
6924
sub bfclone {
6925
1
1
1
792
require Storable;
6926
1
2578
return Storable::dclone(@_); #could be faster
6927
}
6928
sub bfdimensions_old {
6929
my($n,$p,$mink,$maxk, $k,$flen,$m)=
6930
0
0
0
0
0
@_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'},1)
0
0
0
6931
:@_==2 ? (@_,1,100,1)
6932
: croak "Wrong number of arguments (".@_."), should be 2";
6933
0
0
0
0
croak "p ($p) should be > 0 and < 1" if not ( 0<$p && $p<1 );
6934
0
0
0
$m=-1*$_*$n/log(1-$p**(1/$_)) and (!defined $flen or $m<$flen) and ($flen,$k)=($m,$_) for $mink..$maxk;
0
0
6935
0
0
$flen = int(1+$flen);
6936
0
0
return ($flen,$k);
6937
}
6938
sub bfdimensions {
6939
my($n,$p,$mink,$maxk)=
6940
18
0
18
1
35
@_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'})
18
50
32
6941
:@_==2 ? (@_,1,100)
6942
: croak "Wrong number of arguments (".@_."), should be 2";
6943
18
42
my $k=log(1/$p)/log(2); # k hash funcs
6944
18
30
my $m=-$n*log($p)/log(2)**2; # m bits in filter
6945
18
38
return ($m+0.5,min($maxk,max($mink,int($k+0.5))));
6946
}
6947
6948
#crontab -e
6949
#01 4,10,16,22 * * * /usr/bin/perl -MAcme::Tools -e'Acme::Tools::_update_currency_file("/var/www/html/currency-rates")' > /dev/null 2>&1
6950
6951
sub _update_currency_file { #call from cron
6952
0
0
0
0
my $fn=shift()||'/var/www/html/currency-rates';
6953
0
0
0
my %exe=map+($_=>"/usr/bin/$_"),qw/curl ci/;-x$_ or die for values %exe;
0
0
6954
0
0
0
open my $F, '>', $fn or die"ERROR: Could not write file $fn ($!)\n";
6955
0
0
print $F "#-- Currency rates ".localtime()." (".time().")\n";
6956
0
0
print $F "# File generated by Acme::Tools version $VERSION\n";
6957
0
0
print $F "# Updated every 6th hour on http://calthis.com/currency-rates\n";
6958
0
0
print $F "NOK 1.000000000\n";
6959
0
0
my $amount=1000;
6960
0
0
my $data=qx($exe{curl} -s "http://www.x-rates.com/table/?from=NOK&amount=$amount");
6961
0
0
$data=~s,to=([A-Z]{3})(.)>,$2>$1 ,g;
6962
0
0
my @data=ht2t($data,"Alphabetical order"); shift @data;
0
0
6963
0
0
0
@data=map "$$_[1] ".($$_[4]>1e-2?$$_[4]:$$_[2]?sprintf("%.8f",$amount/$$_[2]):0)."\n",@data;
0
6964
0
0
my %data=map split,@data;
6965
0
0
my@tc=qx($exe{curl} -s https://btc-e.com/api/3/ticker/btc_usd-ltc_usd)=~/avg.?:(\d+\.?\d*)/g;
6966
0
0
push @data,"BTC ".($tc[0]*$data{USD})."\n";
6967
0
0
push @data,"LTC ".($tc[1]*$data{USD})."\n";
6968
0
0
print $F sort(@data);
6969
0
0
close($F);
6970
0
0
0
qx($exe{ci} -l -m. -d $fn) if -w"$fn,v";
6971
}
6972
6973
sub ftype {
6974
0
0
0
0
my $f=shift;
6975
0
0
0
0
-e $f and
0
0
0
0
0
0
0
0
6976
-f$f ? 'file' # -f File is a plain file.
6977
:-d$f ? 'dir' # -d File is a directory.
6978
:-l$f ? 'symlink' # -l File is a symbolic link.
6979
:-p$f ? 'pipe' # -p File is a named pipe (FIFO), or Filehandle is a pipe.
6980
:-S$f ? 'socket' # -S File is a socket.
6981
:-b$f ? 'blockfile' # -b File is a block special file.
6982
:-c$f ? 'charfile' # -c File is a character special file.
6983
:-t$f ? 'ttyfile' # -t Filehandle is opened to a tty.
6984
: ''
6985
or undef;
6986
}
6987
6988
sub ext2mime {
6989
0
0
0
0
my $ext=shift(); #or filename
6990
#http://www.sitepoint.com/web-foundations/mime-types-complete-list/
6991
0
0
croak "todo: ext2mime not yet implemented";
6992
#return "application/json";#feks
6993
}
6994
6995
=head1 COMMANDS
6996
6997
=head2 install_acme_command_tools
6998
6999
sudo perl -MAcme::Tools -e install_acme_command_tools
7000
7001
Wrote executable /usr/local/bin/conv
7002
Wrote executable /usr/local/bin/due
7003
Wrote executable /usr/local/bin/xcat
7004
Wrote executable /usr/local/bin/freq
7005
Wrote executable /usr/local/bin/deldup
7006
Wrote executable /usr/local/bin/ccmd
7007
Wrote executable /usr/local/bin/z2z
7008
Wrote executable /usr/local/bin/2gz
7009
Wrote executable /usr/local/bin/2gzip
7010
Wrote executable /usr/local/bin/2bz2
7011
Wrote executable /usr/local/bin/2bzip2
7012
Wrote executable /usr/local/bin/2xz
7013
7014
Examples of commands then made available:
7015
7016
conv 1 USD EUR #might show 0.88029 if thats the current currency rate. Uses conv()
7017
conv .5 in cm #reveals that 1/2 inch is 1.27 cm, see doc on conv() for all supported units
7018
due [-h] /path/1/ /path/2/ #like du, but show statistics on file extentions instead of subdirs
7019
xcat file #like cat, zcat, bzcat or xzcat in one. Uses file extention to decide. Uses openstr()
7020
freq file #reads file(s) or stdin and view counts of each byte 0-255
7021
ccmd grep string /huge/file #caches stdout+stderr for 15 minutes (default) for much faster results later
7022
ccmd "sleep 2;echo hello" #slow first time. Note the quotes!
7023
ccmd "du -s ~/*|sort -n|tail" #ccmd store stdout+stderr in /tmp files (default)
7024
z2z [-pvk1-9o -t type] files #convert from/to .gz/bz2/xz files, -p progress, -v verbose (output result),
7025
#-k keep org file, -o overwrite, 1-9 compression degree
7026
#2xz and 2bz2 depends on xz and bzip2 being installed on system
7027
2xz #same as z2z with -t xz
7028
2bz2 #same as z2z with -t bz2
7029
2gz #same as z2z with -t gz
7030
7031
TODO :
7032
finddup [-v -d -s -h] path1/ path2/
7033
#reports (+deletes with -d) duplicate files
7034
#finddup is NOT IMPLEMENTED YET! Use -s for symlink dups, -h for hardlink
7035
rttop
7036
trunc file(s)
7037
wipe file(s)
7038
7039
=head3 z2z
7040
7041
=head3 2xz
7042
7043
=head3 2bz2
7044
7045
=head3 2gz
7046
7047
The commands C<2xz>, C<2bz2> and C<2gz> are just synonyms for C with an implicitly added option C<-t xz>, C<-t xz> or C<-t gz> accordingly.
7048
7049
z2z [-p -k -v -o -1 -2 -3 -4 -5 -6 -7 -8 -9 ] files
7050
7051
Converts (recompresses) files from one compression sc
7052
7053
7054
7055
=head3 due
7056
7057
Like C command but views space used by file extentions instead of dirs. Options:
7058
7059
due [-options] [dirs] [files]
7060
due -h View bytes "human readable", i.e. C<8.72 MB> instead of C<9145662 b> (bytes)
7061
due -k | -m View bytes in kilobytes | megabytes (1024 | 1048576)
7062
due -K Like -k but uses 1000 instead of 1024
7063
due -z View two extentions if .z .Z .gz .bz2 .rz or .xz (.tar.gz, not just .gz)
7064
due -M Also show min, medium and max date (mtime) of files, give an idea of their age
7065
due -P Also show 10, 50 (medium) and 90 percentile of file date
7066
due -MP Both -M and -P, shows min, 10p, 50p, 90p and max
7067
due -a Sort output alphabetically by extention (default order is by size)
7068
due -c Sort output by number of files
7069
due -i Ignore case, .GZ and .gz is the same, output in lower case
7070
due -t Adds time of day to -M and -P output
7071
due -e 'regex' Exclude files (full path) matching regex. Ex: due -e '\.git'
7072
TODO: due -l TODO: Exclude hardlinks (dont count "same" file more than once, "man du")
7073
ls -l | due Parses output of ls -l, find -ls, tar tvf for size+filename and reports
7074
find | due List of filenames from stdin produces same as just command 'due'
7075
ls | due Reports on just files in current dir without recursing into subdirs
7076
7077
=cut
7078
7079
sub install_acme_command_tools {
7080
0
0
1
0
my $dir=(grep -d$_, @_, '/usr/local/bin', '/usr/bin')[0];
7081
0
0
for( qw( conv due xcat freq finddup ccmd trunc wipe rttop z2z 2gz 2gzip 2bz2 2bzip2 2xz ) ){
7082
0
0
unlink("$dir/$_");
7083
0
0
writefile("$dir/$_", "#!$^X\nuse Acme::Tools;\nAcme::Tools::cmd_$_(\@ARGV);\n");
7084
0
0
sys("/bin/chmod +x $dir/$_"); #hm umask
7085
0
0
print "Wrote executable $dir/$_\n";
7086
}
7087
}
7088
0
0
0
0
sub cmd_conv { print conv(@ARGV)."\n" }
7089
7090
sub cmd_due { #TODO: output from tar tvf and ls and find -ls
7091
0
0
0
0
my %o=_go("zkKmhciMPate:l");
7092
0
0
require File::Find;
7093
33
33
169
no warnings 'uninitialized';
33
43
33
102984
7094
0
0
0
die"$0: -l not implemented yet\n" if $o{l}; #man du: default is not to count hardlinks more than once, with -l it does
7095
0
0
0
die"$0: -h, -k or -m can not be used together\n" if $o{h}+$o{k}+$o{m}>1;
7096
0
0
0
die"$0: -c and -a can not be used together\n" if $o{a}+$o{c}>1;
7097
0
0
0
die"$0: -k and -m can not be used together\n" if $o{k}+$o{m}>1;
7098
0
0
0
my @q=@ARGV; @q=('.') if !@q;
0
0
7099
0
0
my(%c,%b,$cnt,$bts,%mtime);
7100
0
0
0
my $zext=$o{z}?'(\.(z|Z|gz|bz2|xz|rz|kr))?':'';
7101
0
0
my $r=qr/(\.[^\.\/]{1,10}$zext)$/;
7102
0
0
0
my $qrexcl=exists$o{e}?qr/$o{e}/:0;
7103
#TODO: ought to work: tar cf - .|tar tvf -|due
7104
#my $qrstdin=qr/(^| )\-[rwx\-sS]{9} +\d+ \w+ +\w+ +(\d+) [a-zA-Z]+\.? +\d+ +(?:\d\d:\d\d|\d{4}) (.*)$/;
7105
0
0
my $qrstdin=qr/(^| )\-[rwx\-sS]{9} +(\d+ )?\w+[ \/]+\w+ +(\d+) [a-zA-Z]+\.? +\d+ +(?:\d\d:\d\d|\d{4}) (.*)$/;
7106
0
0
0
if(-p STDIN){
7107
0
0
while(<>){
7108
0
0
chomp;
7109
0
0
0
my($sz,$f)=/$qrstdin/?($2,$3):-f$_?(-s$_,$_):next;
0
7110
0
0
0
my $ext=$f=~$r?$1:'';
7111
0
0
0
$ext=lc($ext) if $o{i};
7112
0
0
$cnt++; $c{$ext}++;
0
0
7113
0
0
$bts+=$sz; $b{$ext}+=$sz;
0
0
7114
#$mtime{$ext}.=",$mtime" if
7115
0
0
0
0
$o{M} || $o{P} and die"due: -M and -P not yet implemented for STDIN";
7116
}
7117
}
7118
else { #hm DRY
7119
File::Find::find({wanted =>
7120
sub {
7121
0
0
0
0
return if !-f$_;
7122
0
0
0
0
return if $qrexcl and defined $File::Find::name and $File::Find::name=~$qrexcl;
0
7123
0
0
my($sz,$mtime)=(stat($_))[7,9];
7124
0
0
0
my $ext=m/$r/?$1:'';
7125
0
0
0
$ext=lc($ext) if $o{i};
7126
0
0
$cnt++; $c{$ext}++;
0
0
7127
0
0
$bts+=$sz; $b{$ext}+=$sz;
0
0
7128
0
0
0
0
$mtime{$ext}.=",$mtime" if $o{M} || $o{P};
7129
0
0
1;
7130
0
0
} },@q);
7131
}
7132
0
0
0
my($f,$s)=$o{k}?("%14.2f kb",sub{$_[0]/1024})
7133
0
0
0
:$o{K}?("%14.2f Kb",sub{$_[0]/1000})
7134
0
0
0
:$o{m}?("%14.2f mb",sub{$_[0]/1024**2})
7135
0
0
0
:$o{h}?("%14s", sub{bytes_readable($_[0])})
7136
0
0
0
0
: ("%14d b", sub{$_[0]});
0
0
0
0
0
7137
my @e=$o{a}?(sort(keys%c))
7138
0
0
0
:$o{c}?(sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c)
7139
0
0
0
: (sort{$b{$a}<=>$b{$b} or $a cmp $b}keys%c);
0
0
0
0
7140
0
0
0
my $perc=!$o{M}&&!$o{P}?sub{""}:
7141
sub{
7142
0
0
0
0
my @p=$o{P}?(10,50,90):(50);
7143
0
0
my @m=@_>0 ? do {grep$_, split",", $mtime{$_[0]}}
7144
0
0
0
: do {grep$_, map {split","} values %mtime};
0
0
0
0
7145
0
0
my @r=percentile(\@p,@m);
7146
0
0
0
@r=(min(@m),@r,max(@m)) if $o{M};
7147
0
0
@r=map int($_), @r;
7148
0
0
0
my $fmt=$o{t}?'YYYY/MM/DD-MM:MI:SS':'YYYY/MM/DD';
7149
0
0
@r=map tms($_,$fmt), @r;
7150
0
0
" ".join(" ",@r);
7151
0
0
0
0
};
7152
0
0
printf("%-11s %8d $f %7.2f%%%s\n",$_,$c{$_},&$s($b{$_}),100*$b{$_}/$bts,&$perc($_)) for @e;
7153
0
0
printf("%-11s %8d $f %7.2f%%%s\n","Sum",$cnt,&$s($bts),100,&$perc());
7154
}
7155
sub cmd_xcat {
7156
0
0
0
0
for my $fn (@_){
7157
0
0
my $os=openstr($fn);
7158
0
0
0
0
open my $FH, $os or warn "xcat: cannot open $os ($!)\n" and next;
7159
#binmode($FH);#hm?
7160
0
0
print while <$FH>;
7161
0
0
close($FH);
7162
}
7163
}
7164
sub cmd_freq {
7165
0
0
0
0
my(@f,$i);
7166
0
0
map $f[$_]++, unpack("C*",$_) while <>;
7167
0
0
my $s=" " x 12;map{print"$_$s$_$s$_\n"}("BYTE CHAR COUNT","---- ----- -------");
0
0
0
0
7168
0
0
my %m=(145,"DOS-æ",155,"DOS-ø",134,"DOS-å",146,"DOS-Æ",157,"DOS-Ø",143,"DOS-Å",map{($_," ")}0..31);
0
0
7169
0
0
0
0
printf("%4d %5s%8d".(++$i%3?$s:"\n"),$_,$m{$_}||chr,$f[$_]) for grep$f[$_],0..255;print "\n";
0
0
7170
0
0
my @no=grep!$f[$_],0..255; print "No bytes for these ".@no.": ".join(" ",@no)."\n";
0
0
7171
}
7172
sub cmd_deldup {
7173
0
0
0
0
cmd_finddup(@_);
7174
}
7175
sub cmd_finddup {
7176
# ~/test/deldup.pl #find and optionally delete duplicate files effiencently
7177
#http://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash
7178
0
0
0
0
die "todo: finddup not ready yet"
7179
}
7180
#http://stackoverflow.com/questions/11900239/can-i-cache-the-output-of-a-command-on-linux-from-cli
7181
our $Ccmd_cache_dir='/tmp/acme-tools-ccmd-cache';
7182
our $Ccmd_cache_expire=15*60; #default 15 minutes
7183
sub cmd_ccmd {
7184
0
0
0
0
require Digest::MD5;
7185
0
0
my $cmd=join" ",@_;
7186
0
0
my $d="$Ccmd_cache_dir/".username();
7187
0
0
makedir($d);
7188
0
0
my $md5=Digest::MD5::md5_hex($cmd);
7189
0
0
my($fno,$fne)=map"$d/cmd.$md5.std$_","out","err";
7190
0
0
0
my $too_old=sub{time()-(stat(shift))[9] >= $Ccmd_cache_expire};
0
0
7191
0
0
unlink grep &$too_old($_), <$d/*.std???>;
7192
0
0
0
0
sys("($cmd) > $fno 2> $fne") if !-e$fno or &$too_old($fno);
7193
0
0
print STDOUT "".readfile($fno);
7194
0
0
print STDERR "".readfile($fne);
7195
}
7196
7197
0
0
0
0
sub cmd_trunc { die "todo: trunc not ready yet"} #truncate a file, size 0, keep all other attr
7198
7199
sub cmd_wipe {
7200
0
0
0
0
my %o=_go("n:k");
7201
0
0
wipe($_,$o{n},$o{k}) for @_;
7202
}
7203
7204
0
0
0
0
sub cmd_2gz {cmd_z2z("-t","gz", @_)}
7205
0
0
0
0
sub cmd_2gzip {cmd_z2z("-t","gz", @_)}
7206
0
0
0
0
sub cmd_2bz2 {cmd_z2z("-t","bz2",@_)}
7207
0
0
0
0
sub cmd_2bzip2 {cmd_z2z("-t","bz2",@_)}
7208
0
0
0
0
sub cmd_2xz {cmd_z2z("-t","xz", @_)}
7209
#todo?: sub cmd_7z
7210
sub cmd_z2z {
7211
8
8
0
18171
local @ARGV=@_;
7212
8
35
my %o=_go("pt:kvhon123456789");
7213
8
60
my $t=repl(lc$o{t},qw/gzip gz bzip2 bz2/);
7214
8
50
60
die "due: unknown compression type $o{t}, known are gz, bz2 and xz" if $t!~/^(gz|bz2|xz)$/;
7215
8
180
my $sum=sum(map -s$_,@ARGV);
7216
8
100
66
60
print "Converting ".@ARGV." files, total ".bytes_readable($sum)."\n" if $o{v} and @ARGV>1;
7217
8
17
my $cat='cat';
7218
8
0
20
if($o{p}){ if(qx(which pv)){ $cat='pv' } else { warn repl(<<"",qr/^\s+/) } }
0
50
0
0
0
0
0
7219
due: pv for -p not found, install with sudo yum install pv, sudo apt-get install pv or similar
7220
7221
8
16
my $sumnew=0;
7222
8
34
my $start=time_fp();
7223
8
11
my($i,$bsf)=(0,0);#bytes so far
7224
8
24
$Eta{'z2z'}=[];eta('z2z',0,$sum);
8
36
7225
8
13
for(@ARGV){
7226
11
50
17
my $new=$_; $new=~s/(\.(gz|bz2|xz))?$/.$t/i or die;
11
138
7227
11
100
37
my $ext=defined($2)?lc($2):'';
7228
11
50
33
172
my $same=/^$new$/; $new.=".tmp" if $same; die if $o{k} and $same;
11
50
18
11
69
7229
11
50
33
141
next if !-e$_ and warn"$_ do not exists\n";
7230
11
50
33
84
next if !-r$_ and warn"$_ is not readable\n";
7231
11
0
33
149
next if -e$new and !$o{o} and warn"$new already exists, skipping (use -o to overwrite)\n";
33
7232
11
100
167
my $unz={qw/gz gunzip bz2 bunzip2 xz unxz/}->{$ext}||'';
7233
#todo: my $cntfile="/tmp/acme-tools-z2z-wc-c.$$";
7234
#todo: my $cnt="tee >(wc -c>$cntfile)" if $ENV{SHELL}=~/bash/ and $o{v}; #hm dash vs bash
7235
11
44
my $z= {qw/gz gzip bz2 bzip2 xz xz/}->{$t};
7236
11
68
$z.=" -$_" for grep$o{$_},1..9;
7237
11
38
my $cmd="$cat $_|$unz|$z>$new";
7238
#todo: "$cat $_|$unz|$cnt|$z>$new";
7239
#cat /tmp/kontroll-linux.xz|unxz|tee >(wc -c>/tmp/p)|gzip|wc -c;cat /tmp/p
7240
11
56
$cmd=~s,\|+,|,g; #print "cmd: $cmd\n";
7241
11
50
sys($cmd);
7242
11
50
50
118
chall($_,$new)||die if !$o{n};
7243
11
42
my($szold,$sznew)=map{-s$_}($_,$new);
22
206
7244
11
80
$bsf+=-s$_;
7245
11
50
692
unlink $_ if !$o{k};
7246
11
50
0
24
rename($new, replace($new,qr/.tmp$/)) or die if $same;
7247
11
50
25
if($o{v}){
7248
11
21
$sumnew+=$sznew;
7249
11
168
my $pr=sprintf"%0.1f%%",100*$sznew/$szold;
7250
#todo: my $szuncmp=-s$cntfile&&time()-(stat($cntfile))[9]<10 ? qx(cat $cntfile) : '';
7251
#todo: $o{h} ? printf("%6.1f%% %9s => %9s => %9s %s\n", $pr,(map bytes_readable($_),$szold,$szuncmp,$sznew),$_)
7252
#todo: : printf("%6.1f%% %11d b => %11d b => %11 b %s\n",$pr,$szold,$szuncmp,$sznew,$_)
7253
my $str= $o{h}
7254
11
100
87
? sprintf("%-7s %9s => %9s", $pr,(map bytes_readable($_),$szold,$sznew))
7255
: sprintf("%-7s %11d b => %11d b", $pr,$szold,$sznew);
7256
11
100
30
if(@ARGV>1){
7257
4
10
$i++;
7258
4
0
12
$str=$i<@ARGV
50
7259
? " ETA:".sec_readable(eta('z2z',$bsf,$sum)-time_fp())." $str"
7260
: " TA: 0s $str"
7261
if $sum>1e6;
7262
4
19
$str="$i/".@ARGV." $str";
7263
}
7264
11
341
print "$str $new\n";
7265
}
7266
}
7267
8
100
66
142
if($o{v} and @ARGV>1){
7268
1
50
17
my $bytes=$o{h}?'':'bytes ';
7269
my $str=
7270
sprintf "%d files compressed in %.3f seconds from %s to %s $bytes (%s bytes) %.1f%% of original\n",
7271
0+@ARGV,
7272
time_fp()-$start,
7273
1
50
11
(map{$o{h}?bytes_readable($_):$_}($sum,$sumnew,$sumnew-$sum)),
3
10
7274
100*$sumnew/$sum;
7275
1
8
$str=~s,\((\d),(+$1,;
7276
1
30
print $str;
7277
}
7278
}
7279
7280
8
8
1802
sub _go { require Getopt::Std; my %o; Getopt::Std::getopts(shift() => \%o); %o }
8
51
8
48
8
560
7281
7282
0
0
0
0
sub cmd_rttop { die "rttop: not implemented here yet.\n" }
7283
0
0
0
0
sub cmd_whichpm { die "whichpm: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
7284
0
0
0
0
sub cmd_catal { die "catal: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
7285
#todo: cmd_tabdiff (fra sonyk)
7286
#todo: cmd_catlog (ala catal med /etc/catlog.conf, default er access_log)
7287
7288
=head1 DATABASE STUFF - NOT IMPLEMENTED YET
7289
7290
Uses L. Comming soon...
7291
7292
$Dbh
7293
dlogin
7294
dlogout
7295
drow
7296
drows
7297
drowc
7298
drowsc
7299
dcols
7300
dpk
7301
dsel
7302
ddo
7303
dins
7304
dupd
7305
ddel
7306
dcommit
7307
drollback
7308
7309
=cut
7310
7311
#my$dummy=<<'SOON';
7312
sub dtype {
7313
0
0
0
0
my $connstr=shift;
7314
0
0
0
return 'SQLite' if $connstr=~/(\.sqlite|sqlite:.*\.db)$/i;
7315
0
0
0
return 'Oracle' if $connstr=~/\@/;
7316
0
0
return 'Pg' if 1==2;
7317
0
0
die;
7318
}
7319
7320
our($Dbh,@Dbh,%Sth);
7321
our %Dbattr=(RaiseError => 1, AutoCommit => 0); #defaults
7322
sub dlogin {
7323
0
0
0
0
my $connstr=shift();
7324
0
0
my %attr=(%Dbattr,@_);
7325
0
0
my $type=dtype($connstr);
7326
0
0
my($dsn,$u,$p)=('','','');
7327
0
0
0
if($type eq 'SQLite'){
0
0
7328
0
0
$dsn=$connstr;
7329
}
7330
elsif($type eq 'Oracle'){
7331
0
0
($u,$p,$dsn)=($connstr=~m,(.+?)(/.+?)?\@(.+),);
7332
}
7333
elsif($type eq 'Pg'){
7334
0
0
croak "todo";
7335
}
7336
else{
7337
0
0
croak "dblogin: unknown database type for connection string $connstr\n";
7338
}
7339
0
0
$dsn="dbi:$type:$dsn";
7340
0
0
0
push @Dbh, $Dbh if $Dbh; #local is better?
7341
0
0
require DBI;
7342
0
0
$Dbh=DBI->connect($dsn,$u,$p,\%attr); #connect_cached?
7343
}
7344
sub dlogout {
7345
0
0
0
0
$Dbh->disconnect;
7346
0
0
0
$Dbh=pop@Dbh if @Dbh;
7347
}
7348
sub drow {
7349
0
0
0
0
my($q,@b)=_dattrarg(@_);
7350
#my $sth=do{$Sth{$Dbh,$q} ||= $Dbh->prepare_cached($q)};
7351
0
0
my $sth=$Dbh->prepare_cached($q);
7352
0
0
$sth->execute(@b);
7353
0
0
my @r=$sth->fetchrow_array;
7354
0
0
0
$sth->finish if $$Dbh{Driver}{Name} eq 'SQLite';
7355
#$dbh->selectrow_array($statement);
7356
0
0
0
return @r==1?$r[0]:@r;
7357
}
7358
0
0
sub drows {
7359
}
7360
0
0
sub drowc {
7361
}
7362
0
0
sub drowsc {
7363
}
7364
0
0
sub dcols {
7365
}
7366
0
0
sub dpk {
7367
}
7368
0
0
sub dsel {
7369
}
7370
sub ddo {
7371
0
0
0
0
my @arg=_dattrarg(@_);
7372
#warn serialize(\@arg,'arg','',1);
7373
0
0
$Dbh->do(@arg); #hm cache?
7374
}
7375
0
0
sub dins {
7376
}
7377
0
0
sub dupd {
7378
}
7379
0
0
sub ddel {
7380
}
7381
0
0
0
0
sub dcommit { $Dbh->commit }
7382
0
0
0
0
sub drollback { $Dbh->rollback }
7383
7384
sub _dattrarg {
7385
0
0
0
my @arg=@_;
7386
0
0
0
splice @arg,1,0, ref($arg[-1]) eq 'HASH' ? pop(@arg) : {};
7387
0
0
@arg;
7388
}
7389
7390
=head2 self_update
7391
7392
Update Acme::Tools to newest version quick and dirty:
7393
7394
function pmview(){ ls -ld `perl -M$1 -le'$m=shift;$mi=$m;$mi=~s,::,/,g;print $INC{"$mi.pm"};warn"Version ".${$m."::VERSION"}."\n"' $1`;}
7395
7396
pmview Acme::Tools #view date and version before
7397
sudo perl -MAcme::Tools -e Acme::Tools::self_update #update to newest version
7398
pmview Acme::Tools #view date and version after
7399
7400
Does C to where Acme/Tools.pm are and then wget -N https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm
7401
7402
TODO: cmd_acme_tools_self_update, accept --no-check-certificate to use on curl
7403
7404
=cut
7405
7406
our $Wget;
7407
our $Self_update_url='https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm'; #todo: change site
7408
sub self_update {
7409
#in($^O,'linux','cygwin') or die"ERROR: self_update works on linux and cygwin only";
7410
0
0
0
1
0
$Wget||=(grep -x$_,map"$_/wget",'/usr/bin','/bin','/usr/local/bin','.')[0]; #hm --no-check-certificate
7411
0
0
0
-x$Wget or die"ERROR: wget ($Wget) executable not found\n";
7412
0
0
my $d=dirname(__FILE__);
7413
0
0
sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
7414
0
0
0
sys("cd $d; $Wget -N ".($ARGV[0]||$Self_update_url));
7415
0
0
sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
7416
}
7417
7418
1;
7419
7420
package Acme::Tools::BloomFilter;
7421
33
33
848
use 5.008; use strict; use warnings; use Carp;
33
33
101
33
33
138
33
33
40
33
793
33
104
33
39
33
916
33
110
33
29
33
13885
7422
2
2
185
sub new { my($class,@p)=@_; my $self=Acme::Tools::bfinit(@p); bless $self, $class }
2
6
2
8
7423
2
2
55
sub add { &Acme::Tools::bfadd }
7424
0
0
0
sub addbf { &Acme::Tools::bfaddbf }
7425
2
2
52
sub check { &Acme::Tools::bfcheck }
7426
1
1
3
sub grep { &Acme::Tools::bfgrep }
7427
0
0
0
sub grepnot { &Acme::Tools::bfgrepnot }
7428
0
0
0
sub delete { &Acme::Tools::bfdelete }
7429
0
0
0
sub store { &Acme::Tools::bfstore }
7430
0
0
0
sub retrieve { &Acme::Tools::bfretrieve }
7431
1
1
3
sub clone { &Acme::Tools::bfclone }
7432
0
0
0
sub sum { &Acme::Tools::bfsum }
7433
1;
7434
7435
# Ny versjon:
7436
# + c-s todo
7437
# + endre $VERSION
7438
# + endre Release history under HISTORY
7439
# + endre årstall under COPYRIGHT AND LICENSE
7440
# + oppd default valutakurser inkl datoen
7441
# + emacs Changes
7442
# + emacs README + aarstall
7443
# + emacs MANIFEST legg til ev nye t/*.t
7444
# + perl Makefile.PL;make test
7445
# + /usr/bin/perl Makefile.PL;make test
7446
# + perlbrew exec "perl ~/Acme-Tools/Makefile.PL ; time make test"
7447
# + perlbrew use perl-5.10.1; perl Makefile.PL; make test; perlbrew off
7448
# + test evt i cygwin og mingw-perl
7449
# + pod2html Tools.pm > Tools.html ; firefox Tools.html
7450
# + https://metacpan.org/pod/Acme::Tools
7451
# + http://cpants.cpanauthors.org/dist/Acme-Tools #kvalitee
7452
# + perl Makefile.PL ; make test && make dist
7453
# + cp -p *tar.gz /htdocs/
7454
# + ci -l -mversjon -d `cat MANIFEST`
7455
# + git add `cat MANIFEST`
7456
# + git status
7457
# + git commit -am versjon
7458
# + git push #eller:
7459
# + git push origin master
7460
# + http://pause.perl.org/
7461
# + tegnsett/utf8-kroell
7462
# http://en.wikipedia.org/wiki/Birthday_problem#Approximations
7463
7464
# memoize_expire() http://perldoc.perl.org/Memoize/Expire.html
7465
# memoize_file_expire()
7466
# memoize_limit_size() #lru
7467
# memoize_file_limit_size()
7468
# memoize_memcached http://search.cpan.org/~dtrischuk/Memoize-Memcached-0.03/lib/Memoize/Memcached.pm
7469
# hint on http://perl.jonallen.info/writing/articles/install-perl-modules-without-root
7470
7471
# sub mycrc32 { #http://billauer.co.il/blog/2011/05/perl-crc32-crc-xs-module/ eller String::CRC32::crc32 som er 100 x raskere enn Digest::CRC::crc32
7472
# my ($input, $init_value, $polynomial) = @_;
7473
# $init_value = 0 unless (defined $init_value);
7474
# $polynomial = 0xedb88320 unless (defined $polynomial);
7475
# my @lookup_table;
7476
# for (my $i=0; $i<256; $i++) {
7477
# my $x = $i;
7478
# for (my $j=0; $j<8; $j++) {
7479
# if ($x & 1) {
7480
# $x = ($x >> 1) ^ $polynomial;
7481
# } else {
7482
# $x = $x >> 1;
7483
# }
7484
# }
7485
# push @lookup_table, $x;
7486
# }
7487
# my $crc = $init_value ^ 0xffffffff;
7488
# foreach my $x (unpack ('C*', $input)) {
7489
# $crc = (($crc >> 8) & 0xffffff) ^ $lookup_table[ ($crc ^ $x) & 0xff ];
7490
# }
7491
# $crc = $crc ^ 0xffffffff;
7492
# return $crc;
7493
# }
7494
#
7495
7496
7497
7498
=head1 HISTORY
7499
7500
Release history
7501
7502
0.21 Mar 2017 Improved nicenum() and its tests
7503
7504
0.20 Mar 2017 Subs: a2h cnttbl h2a log10 log2 nicenum rstddev sec_readable
7505
throttle timems refa refaa refah refh refha refhh refs
7506
eachr globr keysr popr pushr shiftr splicer unshiftr valuesr
7507
Commands: 2bz2 2gz 2xz z2z
7508
7509
0.172 Dec 2015 Subs: curb openstr pwgen sleepms sleepnm srlz tms username
7510
self_update install_acme_command_tools
7511
Commands: conv due freq wipe xcat (see "Commands")
7512
7513
0.16 Feb 2015 bigr curb cpad isnum parta parth read_conf resolve_equation
7514
roman2int trim. Improved: conv (numbers currency) range ("derivatives")
7515
7516
0.15 Nov 2014 Improved doc
7517
0.14 Nov 2014 New subs, improved tests and doc
7518
0.13 Oct 2010 Non-linux test issue, resolve. improved: bloom filter, tests, doc
7519
0.12 Oct 2010 Improved tests, doc, bloom filter, random_gauss, bytes_readable
7520
0.11 Dec 2008 Improved doc
7521
0.10 Dec 2008
7522
7523
=head1 SEE ALSO
7524
7525
L
7526
7527
=head1 AUTHOR
7528
7529
Kjetil Skotheim, Ekjetil.skotheim@gmail.comE
7530
7531
=head1 COPYRIGHT
7532
7533
2008-2017, Kjetil Skotheim
7534
7535
=head1 LICENSE
7536
7537
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
7538
7539
=cut