File Coverage

blib/lib/Acme/Tools.pm
Criterion Covered Total %
statement 1360 1897 71.6
branch 708 1280 55.3
condition 333 658 50.6
subroutine 206 302 68.2
pod 164 223 73.5
total 2771 4360 63.5


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             package Acme::Tools;
3              
4             our $VERSION = '0.20';
5              
6 33     33   477084 use 5.008; #Perl 5.8 was released July 18th 2002
  33         89  
7 33     33   131 use strict;
  33         45  
  33         710  
8 33     33   100 use warnings;
  33         39  
  33         764  
9 33     33   115 use Carp;
  33         28  
  33         8503  
10              
11             require Exporter;
12             our @ISA = qw(Exporter);
13             our %EXPORT_TAGS = ( all => [ qw() ] );
14             our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } );
15             our @EXPORT = qw(
16             min
17             max
18             mins
19             maxs
20             sum
21             avg
22             geomavg
23             harmonicavg
24             stddev
25             rstddev
26             median
27             percentile
28             $Resolve_iterations
29             $Resolve_last_estimate
30             $Resolve_time
31             resolve
32             resolve_equation
33             conv
34             rank
35             rankstr
36             eqarr
37             sorted
38             sortedstr
39             pushsort
40             pushsortstr
41             binsearch
42             binsearchstr
43             random
44             random_gauss
45             big
46             bigi
47             bigf
48             bigr
49             bigscale
50             nvl
51             repl
52             replace
53             decode
54             decode_num
55             between
56             curb
57             bound
58             log10
59             log2
60             distinct
61             in
62             in_num
63             uniq
64             union
65             union_all
66             minus
67             minus_all
68             intersect
69             intersect_all
70             not_intersect
71             mix
72             zip
73             subhash
74             hashtrans
75             zipb64
76             zipbin
77             unzipb64
78             unzipbin
79             gzip
80             gunzip
81             bzip2
82             bunzip2
83             ipaddr
84             ipnum
85             webparams
86             urlenc
87             urldec
88             ht2t
89             chall
90             makedir
91             qrlist
92             ansicolor
93             ccn_ok
94             KID_ok
95             writefile
96             readfile
97             readdirectory
98             basename
99             dirname
100             wipe
101             username
102             range
103             globr
104             permutations
105             trigram
106             sliding
107             chunks
108             chars
109             cart
110             reduce
111             int2roman
112             roman2int
113             num2code
114             code2num
115             gcd
116             lcm
117             pivot
118             tablestring
119             upper
120             lower
121             trim
122             rpad
123             lpad
124             cpad
125             dserialize
126             serialize
127             srlz
128             cnttbl
129             nicenum
130             bytes_readable
131             sec_readable
132             distance
133             tms
134             easter
135             time_fp
136             timems
137             sleep_fp
138             sleeps
139             sleepms
140             sleepus
141             sleepns
142             eta
143             sys
144             recursed
145             md5sum
146             pwgen
147             read_conf
148             openstr
149             ldist
150             $Re_isnum
151             isnum
152             part
153             parth
154             parta
155             a2h
156             h2a
157             refa
158             refh
159             refs
160             refaa
161             refah
162             refha
163             refhh
164             pushr
165             popr
166             shiftr
167             unshiftr
168             splicer
169             keysr
170             valuesr
171             eachr
172             ed
173             $Edcursor
174             brainfu
175             brainfu2perl
176             brainfu2perl_optimized
177             bfinit
178             bfsum
179             bfaddbf
180             bfadd
181             bfcheck
182             bfgrep
183             bfgrepnot
184             bfdelete
185             bfstore
186             bfretrieve
187             bfclone
188             bfdimensions
189             $PI
190             install_acme_command_tools
191              
192             $Dbh
193             dlogin
194             dlogout
195             drow
196             drows
197             drowc
198             drowsc
199             dcols
200             dpk
201             dsel
202             ddo
203             dins
204             dupd
205             ddel
206             dcommit
207             drollback
208             );
209              
210             our $PI = '3.141592653589793238462643383279502884197169399375105820974944592307816406286';
211              
212             =head1 NAME
213              
214             Acme::Tools - Lots of more or less useful subs lumped together and exported into your namespace
215              
216             =head1 SYNOPSIS
217              
218             use Acme::Tools;
219              
220             print sum(1,2,3); # 6
221             print avg(2,3,4,6); # 3.75
222              
223             my @list = minus(\@listA, \@listB); # set operations
224             my @list = union(\@listA, \@listB); # set operations
225              
226             print length(gzip("abc" x 1000)); # far less than 3000
227              
228             writefile("/dir/filename",$string); # convenient
229             my $s=readfile("/dir/filename"); # also conventient
230              
231             print "yes!" if between($pi,3,4);
232              
233             print percentile(0.05, @numbers);
234              
235             my @even = range(1000,2000,2); # even numbers between 1000 and 2000
236             my @odd = range(1001,2001,2);
237              
238             my $dice = random(1,6);
239             my $color = random(['red','green','blue','yellow','orange']);
240              
241             ...and more.
242              
243             =encoding utf8
244              
245             =head1 ABSTRACT
246              
247             About 120 more or less useful perl subroutines lumped together and exported into your namespace.
248              
249             =head1 DESCRIPTION
250              
251             Subs created and collected since the mid-90s.
252              
253             =head1 INSTALLATION
254              
255             sudo cpan Acme::Tools
256             sudo cpanm Acme::Tools # after: sudo apt-get install cpanminus make # for Ubuntu 12.04
257              
258             =head1 EXPORT
259              
260             Almost every sub, about 90 of them.
261              
262             Beware of namespace pollution. But what did you expect from an Acme module?
263              
264             =head1 NUMBERS
265              
266             =head2 num2code
267              
268             See L
269              
270             =head2 code2num
271              
272             C convert numbers (integers) from the normal decimal system to some arbitrary other number system.
273             That can be binary (2), oct (8), hex (16) or others.
274              
275             Example:
276              
277             print num2code(255,2,"0123456789ABCDEF"); # prints FF
278             print num2code( 14,2,"0123456789ABCDEF"); # prints 0E
279              
280             ...because 255 are converted to hex FF (base C<< length("0123456789ABCDEF") >> ) which is 2 digits of 0-9 or A-F.
281             ...and 14 are converted to 0E, with leading 0 because of the second argument 2.
282              
283             Example:
284              
285             print num2code(1234,16,"01")
286              
287             Prints the 16 binary digits 0000010011010010 which is 1234 converted to binary zeros and ones.
288              
289             To convert back:
290              
291             print code2num("0000010011010010","01"); #prints 1234
292              
293             C can be used to compress numeric IDs to something shorter:
294              
295             $chars="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_";
296             print num2code("241274432",5,$chars); # prints EOOv0
297             print code2num("EOOv0",$chars); # prints 241274432
298              
299             =cut
300              
301             #Math::BaseCnv
302              
303             sub num2code {
304 4     4 1 356 my($num,$digits,$validchars,$start)=@_;
305 4         6 my $l=length($validchars);
306 4         5 my $key;
307 33     33   141 no warnings;
  33         41  
  33         160203  
308 4 50       14 croak if $num<$start;
309 4         6 $num-=$start;
310 4         9 for(1..$digits){
311 25         33 $key=substr($validchars,$num%$l,1).$key;
312 25         27 $num=int($num/$l);
313             }
314 4 50       13 croak if $num>0;
315 4         14 return $key;
316             }
317              
318             sub code2num {
319 1 50   1 1 3 my($code,$validchars,$start)=@_; $start=0 if !defined $start;
  1         5  
320 1         2 my $l=length($validchars);
321 1         2 my $num=0;
322 1         11 $num=$num*$l+index($validchars,$_) for split//,$code;
323 1         6 return $num+$start;
324             }
325              
326              
327             =head2 gcd
328              
329             I< C<">The Euclidean algorithm (also called Euclid's algorithm) is an
330             algorithm to determine the greatest common divisor (gcd) of two
331             integers. It is one of the oldest algorithms known, since it appeared
332             in the classic Euclid's Elements around 300 BC. The algorithm does not
333             require factoring.C<"> >
334              
335             B two or more positive numbers (integers, without decimals that is)
336              
337             B an integer
338              
339             B
340              
341             print gcd(12, 8); # prints 4
342              
343             Because the (prime number) factors of 12 is 2 * 2 * 3 and the factors of 8 is 2 * 2 * 2
344             and the common ('overlapping') factors for both 12 and 8 is then 2 * 2 and the result becomes 4.
345              
346             B:
347              
348             print gcd(90, 135, 315); # prints 45
349             print gcd(2*3*3*5, 3*3*3*5, 3*3*5*7); # prints 45 ( = 3*3*5 which is common to all three args)
350              
351             Implementation:
352              
353             sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
354              
355             One way of putting it: Keep replacing the larger of the two numbers with the difference between them until you got two equal numbers. Then thats the answer.
356              
357             L
358              
359             L
360              
361             =cut
362              
363 26 100   26 1 273 sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
  26 100       74  
364              
365             =head2 lcm
366              
367             C finds the Least Common Multiple of two or more numbers (integers).
368              
369             B two or more positive numbers (integers)
370              
371             B an integer number
372              
373             Example: C< 2/21 + 1/6 = 4/42 + 7/42 = 11/42>
374              
375             Where 42 = lcm(21,6).
376              
377             B
378              
379             print lcm(45,120,75); # prints 1800
380              
381             Because the factors are:
382              
383             45 = 2^0 * 3^2 * 5^1
384             120 = 2^3 * 3^1 * 5^1
385             75 = 2^0 * 3^1 * 5^2
386              
387             Take the bigest power of each primary number (2, 3 and 5 here).
388             Which is 2^3, 3^2 and 5^2. Multiplied this is 8 * 9 * 25 = 1800.
389              
390             sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }
391              
392             Seems to works with L as well: (C of all integers from 1 to 200)
393              
394             perl -MAcme::Tools -MMath::BigInt -le'print lcm(map Math::BigInt->new($_),1..200)'
395              
396             337293588832926264639465766794841407432394382785157234228847021917234018060677390066992000
397              
398             =cut
399              
400 3 100   3 1 5 sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }
  3         10  
401              
402             =head2 resolve
403              
404             Resolves an equation by Newtons method.
405              
406             B 1-6 arguments. At least one argument.
407              
408             First argument: must be a coderef to a subroutine (a function)
409              
410             Second argument: if present, the target, f(x)=target. Default 0.
411              
412             Third argument: a start position for x. Default 0.
413              
414             Fourth argument: a small delta value. Default 1e-4 (0.0001).
415              
416             Fifth argument: a maximum number of iterations before resolve gives up
417             and carps. Default 100 (if fifth argument is not given or is
418             undef). The number 0 means infinite here. If the derivative of the
419             start position is zero or close to zero more iterations are typically
420             needed.
421              
422             Sixth argument: A number of seconds to run before giving up. If both
423             fifth and sixth argument is given and > 0, C stops at
424             whichever comes first.
425              
426             B returns the number C for C = 0
427              
428             ...or equal to the second input argument if present.
429              
430             B
431              
432             The equation C<< x^2 - 4x - 21 = 0 >> has two solutions: -3 and 7.
433              
434             The result of C will depend on the start position:
435              
436             print resolve(sub{ $_**2 - 4*$_ - 21 }); # -3 with $_ as your x
437             print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 }); # -3 more elaborate call
438             print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 },0,3); # 7 with start position 3
439             print "Iterations: $Acme::Tools::Resolve_iterations\n"; # 3 or larger, about 10-15 is normal
440              
441             The variable C< $Acme::Tools::Resolve_iterations > (which is exported) will be set
442             to the last number of iterations C used. Also if C dies (carps).
443              
444             The variable C< $Acme::Tools::Resolve_last_estimate > (which is exported) will be
445             set to the last estimate. This number will often be close to the solution and can
446             be used even if C dies (carps).
447              
448             B
449              
450             If either second, third or fourth argument is an instance of L, so will the result be:
451              
452             use Acme::Tools;
453             my $equation = sub{ $_ - 1 - 1/$_ };
454             my $gr1 = resolve( $equation, 0, 1 ); #
455             my $gr2 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
456             bigscale(50);
457             my $gr3 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
458            
459             print 1/2 + sqrt(5)/2, "\n";
460             print "Golden ratio 1: $gr1\n";
461             print "Golden ratio 2: $gr2\n";
462             print "Golden ratio 3: $gr3\n";
463              
464             Output:
465              
466             1.61803398874989
467             Golden ratio 1: 1.61803398874989
468             Golden ratio 2: 1.61803398874989484820458683436563811772029300310882395927211731893236137472439025
469             Golden ratio 3: 1.6180339887498948482045868343656381177203091798057610016490334024184302360920167724737807104860909804
470              
471             See:
472              
473             L
474              
475             L
476              
477             L
478              
479             =cut
480              
481             our $Resolve_iterations;
482             our $Resolve_last_estimate;
483             our $Resolve_time;
484              
485             #sub resolve(\[&$]@) {
486             #sub resolve(&@) { <=0.17
487             #todo: perl -MAcme::Tools -le 'print resolve(sub{$_[0]**2-9431**2});print$Acme::Tools::Resolve_iterations'
488             # =>Div by zero: df(x) = 0 at n'th iteration, n=0, delta=0.0001, fx=CODE(0xc81d470) at -e line 1
489             #todo: ren solve?
490             sub resolve {
491 16     16 1 1766 my($f,$goal,$start,$delta,$iters,$sec)=@_;
492 16 100       33 $goal=0 if !defined $goal;
493 16 100       26 $start=0 if !defined $start;
494 16 50       23 $delta=1e-4 if !defined $delta;
495 16 50       22 $iters=100 if !defined $iters;
496 16 100       29 $sec=0 if !defined $sec;
497 16 50       23 $iters=13e13 if $iters==0;
498 16 50 33     65 croak "Iterations ($iters) or seconds ($sec) can not be a negative number" if $iters<0 or $sec<0;
499 16         14 $Resolve_iterations=undef;
500 16         11 $Resolve_last_estimate=undef;
501 16 50       23 croak "Should have at least 1 argument, a coderef" if !@_;
502 16 50       30 croak "First argument should be a coderef" if ref($f) ne 'CODE';
503              
504 16         22 my @x=($start);
505 16 100       24 my $time_start=$sec>0?time_fp():undef;
506 16 50       21 my $ds=ref($start) eq 'Math::BigFloat' ? Math::BigFloat->div_scale() : undef;
507             my $fx=sub{
508 559     559   370 local$_=$_[0];
509 559         790 my $fx=&$f($_);
510 559 100 66     2435 if($fx=~/x/ and $fx=~/^[ \(\)\.\d\+\-\*\/x\=\^]+$/){
511 1         8 $fx=~s/(\d)x/$1*x/g;
512 1         3 $fx=~s/\^/**/g;
513 1         2 $fx=~s/^(.*)=(.*)$/($1)-($2)/;
514 1         2 $fx=~s,x,\$_,g;
515 1         141 $f=eval"sub{$fx}";
516 1         16 $fx=&$f($_);
517             }
518             $fx
519 16         42 };
  559         675  
520             #warn "delta=$delta\n";
521 16         17 my $n=0;
522 16         28 while($n<=$iters-1){
523 182         222 my $fd= &$fx($x[$n]+$delta*0.5) - &$fx($x[$n]-$delta*0.5);
524 182 100       268 $fd = &$fx($x[$n]+$delta*0.7) - &$fx($x[$n]-$delta*0.3) if $fd==0;# and warn"wigle 1\n";
525 182 100       193 $fd = &$fx($x[$n]+$delta*0.2) - &$fx($x[$n]-$delta*0.8) if $fd==0;# and warn"wigle 2\n";
526 182 100       353 croak "Div by zero: df(x) = $x[$n] at n'th iteration, n=$n, delta=$delta, fx=$fx" if $fd==0;
527 181         161 $x[$n+1]=$x[$n]-(&$fx($x[$n])-$goal)/($fd/$delta);
528 181         139 $Resolve_last_estimate=$x[$n+1];
529             #warn "n=$n fd=$fd x=$x[$n+1]\n";
530 181         130 $Resolve_iterations=$n;
531 181 100 100     463 last if $n>3 and $x[$n+1]==$x[$n] and $x[$n]==$x[$n-1];
      100        
532 180 100 66     523 last if $n>4 and $x[$n]!=0 and abs(1-$x[$n+1]/$x[$n])<1e-13; #sub{3*$_+$_**4-12}
      100        
533 167 50 66     374 last if $n>3 and ref($x[$n+1]) eq 'Math::BigFloat' and substr($x[$n+1],0,$ds) eq substr($x[$n],0,$ds); #hm
      33        
534 167 100 66     233 croak "Could not resolve, perhaps too little time given ($sec), iteratons=$n"
535             if $sec>0 and ($Resolve_time=time_fp()-$time_start)>$sec;
536             #warn "$n: ".$x[$n+1]."\n";
537 166         195 $n++;
538             }
539 14 50       22 croak "Could not resolve, perhaps too few iterations ($iters)" if @x>=$iters;
540 14         113 return $x[-1];
541             }
542              
543             =head2 resolve_equation
544              
545             This prints 2:
546              
547             print resolve_equation "x + 13*(3-x) = 17 - x"
548              
549             A string containing at least one x is converted into a perl function.
550             Then x is found by using L. The string conversion is done by
551             replacing every x with $_ and if a C< = > char is present it converts
552             C< leftside = rightside > into C< (leftside) - (rightside) = 0 > which
553             is the default behaviour of L.
554              
555             =cut
556              
557 1     1 1 2 sub resolve_equation { my $e=shift;resolve(sub{$e},@_)}
  1     1   2  
  1         5  
558              
559             =head2 conv
560              
561             Converts between:
562              
563             =over 4
564              
565             =item * units of measurement
566              
567             =item * number systems
568              
569             =item * currencies
570              
571             =back
572              
573             B
574              
575             print conv( 2000, "meters", "miles" ); #prints 1.24274238447467
576             print conv( 2.1, 'km', 'm'); #prints 2100
577             print conv( 70,"cm","in"); #prints 27.5590551181102
578             print conv( 4,"USD","EUR"); #prints 3.20481552905431 (depending on todays rates)
579             print conv( 4000,"b","kb"); #prints 3.90625 (1 kb = 1024 bytes)
580             print conv( 4000,"b","Kb"); #prints 4 (1 Kb = 1000 bytes)
581             print conv( 1000,"mb","kb"); #prints 1024000
582             print conv( 101010,"bin","roman"); #prints XLII
583             print conv( "DCCXLII","roman","oct"); #prints 1346
584              
585             B are:>
586              
587             Note: units starting with the symbol _ means that all metric
588             prefixes from yocto 10^-24 to yotta 10^+24 is supported, so _m means
589             km, mm, cm, µm and so on. And _N means kN, MN GN and so on.
590              
591             Note2: Many units have synonyms: m, meter, meters ...
592              
593             acceleration: g, g0, m/s2, mps2
594            
595             angle: binary_degree, binary_radian, brad, deg, degree, degrees,
596             gon, grad, grade, gradian, gradians, hexacontade, hour,
597             new_degree, nygrad, point, quadrant, rad, radian, radians,
598             sextant, turn
599            
600             area: a, ar, are, ares, bunder, ca, centiare, cho, cm2,
601             daa, decare, decares, deciare, dekar,
602             djerib, m2, dunam, dönüm, earths, feddan, ft2, gongqing, ha
603             ha, hectare, hectares, hektar, jerib, km2, m2, manzana,
604             mi2, mm2, mu, qing, rai, sotka,
605             sqcm, sqft, sqkm, sqm, sqmi, sqmm
606             stremmata, um2, µm2
607            
608             bytes: Eb, Gb, Kb, KiB, Mb, Pb, Tb, Yb, Zb, b, byte,
609             kb, kilobyte, mb, megabyte,
610             gb, gigabyte, tb, terabyte,
611             pb, petabyte, eb, exabyte,
612             zb, zettabyte, yb, yottabyte
613            
614             charge: As, C, _e, coulomb, e
615            
616             current: A, _A, N/m2
617            
618             energy: BTU, Btu, J, Nm, W/s, Wh, Wps, Ws, _J, _eV,
619             cal, calorie, calories, eV, electronvolt, BeV,
620             erg, ergs, foot-pound, foot-pounds, ftlb, joule, kWh,
621             kcal, kilocalorie, kilocalories,
622             newtonmeter, newtonmeters, th, thermie
623            
624             force: N, _N, dyn, dyne, dynes, lb, newton
625            
626             length: NM, _m, _pc, astronomical unit, au, chain, ft, furlong,
627             in, inch, inches, km, league, lightyear, ls, ly,
628             m, meter, meters, mi, mil, mile, miles,
629             nautical mile, nautical miles, nmi,
630             parsec, pc, planck, yard, yard_imperical, yd, Å, ångstrøm
631            
632             mass: Da, _eV, _g, bag, carat, ct, dwt, eV, electronvolt, g,
633             grain, grains, gram, grams, kilo, kilos, kt, lb, lb_av,
634             lb_t, lb_troy, lbs, ounce, ounce_av, ounce_troy, oz, oz_av, oz_t,
635             pennyweight, pound, pound_av, pound_metric, pound_troy, pounds,
636             pwt, seer, sl, slug, solar_mass, st, stone, t, tonn, tonne, tonnes, u, wey
637            
638             mileage: mpg, l/100km, l/km, l/10km, lp10km, l/mil, liter_pr_100km, liter_pr_km, lp100km
639              
640             money: AED, ARS, AUD, BGN, BHD, BND, BRL, BWP, CAD, CHF, CLP, CNY,
641             COP, CZK, DKK, EUR, GBP, HKD, HRK, HUF, IDR, ILS, INR, IRR,
642             ISK, JPY, KRW, KWD, KZT, LKR, LTL, LVL, LYD, MUR, MXN, MYR,
643             NOK, NPR, NZD, OMR, PHP, PKR, PLN, QAR, RON, RUB, SAR, SEK,
644             SGD, THB, TRY, TTD, TWD, USD, VEF, ZAR, BTC, LTC, mBTC, XBT
645             Currency rates are automatically updated from the net
646             at least every 24h since last update (on linux/cygwin).
647              
648             numbers: dec, hex, bin, oct, roman, dozen, doz, dz, dusin, gross, gro,
649             gr, great_gross, small_gross (not supported: decimal numbers)
650              
651             power: BTU, BTU/h, BTU/s, BTUph, GWhpy, J/s, Jps, MWhpy, TWhpy,
652             W, Whpy, _W, ftlb/min, ftlb/s, hk, hp, kWh/yr, kWhpy
653            
654             pressure: N/m2, Pa, _Pa, at, atm, bar, mbar, pascal, psi, torr
655            
656             radioactivity: Bq, becquerel, curie
657            
658             speed: _m/s, km/h, km/t, kmh, kmph, kmt, m/s, mi/h, mph, mps,
659             kn, knot, knots, kt, mach, machs, c, fps, ft/s, ftps
660            
661             temperature: C, F, K, celsius, fahrenheit, kelvin
662            
663             time: _s, biennium, century, d, day, days, decade, dy, fortnight,
664             h, hour, hours, hr, indiction, jubilee, ke, lustrum, m,
665             millennium, min, minute, minutes, mo, moment, mon, month,
666             olympiad, quarter, s, season, sec, second, seconds, shake,
667             tp, triennium, w, week, weeks, y, y365, ySI, ycommon,
668             year, years, ygregorian, yjulian, ysideral, ytropical
669            
670             volume: l, L, _L, _l, cm3, m3, ft3, in3, liter, liters, litre, litres,
671             gal, gallon, gallon_imp, gallon_uk, gallon_us, gallons,
672             pint, pint_imp, pint_uk, pint_us, tsp, tablespoon, teaspoon,
673             floz, floz_uk, therm, thm, fat, bbl, Mbbl, MMbbl, drum,
674             container (or container20), container40, container40HC, container45HC
675              
676             See: L
677              
678             =cut
679              
680             #TODO: @arr2=conv(\@arr1,"from","to") # should be way faster than:
681             #TODO: @arr2=map conv($_,"from","to"),@arr1
682             #TODO: conv(123456789,'b','h'); # h converts to something human-readable
683              
684             our %conv=(
685             length=>{
686             m => 1,
687             _m => 1,
688             meter => 1,
689             meters => 1,
690             km => 1000,
691             mil => 10000, #scandinavian #also: inch/1000!
692             in => 0.0254,
693             inch => 0.0254,
694             inches => 0.0254,
695             ft => 0.0254*12, #0.3048 m
696             feet => 0.0254*12, #0.3048 m
697             yd => 0.0254*12*3, #0.9144 m
698             yard => 0.0254*12*3, #0.9144 m
699             yards => 0.0254*12*3, #0.9144 m
700             fathom => 0.0254*12*3*2, #1.8288 m
701             fathoms => 0.0254*12*3*2, #1.8288 m
702             chain => 0.0254*12*3*22, #20.1168 m
703             chains => 0.0254*12*3*22, #20.1168 m
704             furlong => 0.0254*12*3*22*10, #201.168 m
705             furlongs=> 0.0254*12*3*22*10, #201.168 m
706             mi => 0.0254*12*3*22*10*8, #1609.344 m
707             mile => 0.0254*12*3*22*10*8, #1609.344 m
708             miles => 0.0254*12*3*22*10*8,
709             league => 0.0254*12*3*22*10*8*3, #4828.032 m
710             leagues => 0.0254*12*3*22*10*8*3, #4828.032 m
711             yard_imp => 0.914398416,
712             yard_imperical => 0.914398416,
713             NM => 1852, #nautical mile
714             nmi => 1852, #nautical mile
715             'nautical mile' => 1852,
716             'nautical miles' => 1852,
717             'Å' => 1e-10,
718             'ångstrøm' => 1e-10,
719             ly => 299792458*3600*24*365.25,
720             lightyear => 299792458*3600*24*365.25, # = 9460730472580800 by def
721             ls => 299792458, #light-second
722             pc => 3.0857e16, #3.26156 ly
723             _pc => 3.0857e16, #3.26156 ly
724             parsec => 3.0857e16,
725             au => 149597870700, # by def, earth-sun
726             'astronomical unit' => 149597870700,
727             planck => 1.61619997e-35, #planck length
728             #Norwegian (old) lengths:
729             tomme => 0.0254,
730             tommer => 0.0254,
731             fot => 0.0254*12, #0.3048m
732             alen => 0.0254*12*2, #0.6096m
733             favn => 0.0254*12*2*3, #1.8288m
734             kvart => 0.0254*12*2/4, #0.1524m a quarter alen
735             },
736             mass =>{ #https://en.wikipedia.org/wiki/Unit_conversion#Mass
737             g => 1,
738             _g => 1,
739             gram => 1,
740             grams => 1,
741             kilo => 1000,
742             kilos => 1000,
743             t => 1000000,
744             tonn => 1000000,
745             tonne => 1000000,
746             tonnes => 1000000,
747             seer => 933.1,
748             lb => 453.59237,
749             lbs => 453.59237,
750             lb_av => 453.59237,
751             lb_t => 373.2417216, #5760 grains
752             lb_troy => 373.2417216,
753             pound => 453.59237,
754             pounds => 453.59237,
755             pound_av => 453.59237,
756             pound_troy => 373.2417216,
757             pound_metric => 500,
758             ounce => 28, # US food, 28g
759             ounce_av => 453.59237/16, # avoirdupois lb/16 = 28.349523125g
760             ounce_troy => 31.1034768, # lb_troy / 12
761             oz => 28, # US food, 28g
762             oz_av => 453.59237/16, # avoirdupois lb/16 = 28.349523125g
763             oz_t => 31.1034768, # lb_troy / 12,
764             grain => 64.79891/1000, # 453.59237/7000
765             grains => 64.79891/1000,
766             pennyweight => 31.1034768 / 20,
767             pwt => 31.1034768 / 20,
768             dwt => 31.1034768 / 20,
769             st => 6350.29318, # 14 lb_av
770             stone => 6350.29318,
771             wey => 114305.27724, # 252 lb = 18 stone
772             carat => 0.2,
773             ct => 0.2, #carat (metric)
774             kt => 64.79891/1000 * (3+1/6), #carat/karat
775             u => 1.66053892173e-30, #atomic mass carbon-12
776             Da => 1.66053892173e-30, #atomic mass carbon-12
777             slug => 14600,
778             sl => 14600,
779             eV => 1.78266172802679e-33, #e=mc2 = 1.60217646e-19 J / (2.99792458e8 m/s)**2
780             _eV => 1.78266172802679e-33,
781             electronvolt => 1.78266172802679e-33,
782             'solar mass' => 1.99e33,
783             solar_mass => 1.99e33,
784             bag => 60*1000, #60kg coffee
785             },
786             area =>{ # https://en.wikipedia.org/wiki/Unit_conversion#Area
787             m2 => 1,
788             dm2 => 0.1**2,
789             cm2 => 0.01**2,
790             mm2 => 0.001**2,
791             'µm2' => 1e-6**2,
792             um2 => 1e-6**2,
793             sqm => 1,
794             sqcm => 0.01**2,
795             sqmm => 0.001**2,
796             km2 => 1000**2,
797             sqkm => 1000**2,
798             a => 100,
799             ar => 100,
800             are => 100,
801             ares => 100,
802             dekar => 1000,
803             decare => 1000,
804             decares => 1000,
805             daa => 1000,
806             'mål' => 1000,
807             ha => 10000,
808             hektar => 10000,
809             hectare => 10000,
810             hectares=> 10000,
811             ft2 => (0.0254*12)**2,
812             sqft => (0.0254*12)**2,
813             mi2 => 1609.344**2,
814             sqmi => 1609.344**2,
815             yd2 => (0.0254*12*3)**2, #square yard
816             sqyd => (0.0254*12*3)**2,
817             yard2 => (0.0254*12*3)**2,
818             sqyard => (0.0254*12*3)**2,
819             rood => 1210*(0.0254*12*3)**2, # 1/4 acres
820             roods => 1210*(0.0254*12*3)**2, # 1/4 acres
821             ac => 4840*(0.0254*12*3)**2, # 4840 square yards = 1 chain x 1 furlong
822             acre => 4840*(0.0254*12*3)**2,
823             acres => 4840*(0.0254*12*3)**2,
824             homestead => 4840*(0.0254*12)**2 *160, #160 acres US Surveyors or 1/4 sqmiles
825             township => 4840*(0.0254*12)**2 *160*144, #144 homesteads or 36 sqmiles
826             perches => 4840*(0.0254*12)**2 /160, #160 perches = 1 acre in sri lanka
827             sotka => 100, #russian are
828             jerib => 10000, #iran hectare
829             djerib => 10000, #turkish hectare
830             gongqing => 10000, #chinese hectare
831             manzana => 10000, #argentinian hectare
832             bunder => 10000, #dutch hectare
833             centiare => 1,
834             deciare => 10,
835             ca => 1,
836             mu => 10000/15, #China
837             qing => 10000/0.15, #China
838             dunam => 10000/10, #Middle East
839             'dönüm' => 10000/10, #Middle East
840             stremmata => 10000/10, #Greece
841             rai => 10000/6.25, #Thailand
842             cho => 10000/1.008, #Japan
843             feddan => 10000/2.381, #Egypt
844             earths => 510072000e6, #510072000 km2, surface area of earth
845             barn => 1e-28, #physics
846             outhouse => 1e-34, #physics
847             shed => 1e-52, #physics
848             },
849             volume=>{
850             l => 1,
851             L => 1,
852             _L => 1,
853             _l => 1,
854             liter => 1,
855             liters => 1,
856             litre => 1,
857             litres => 1,
858             gal => 231*2.54**3/1000, #3.785411784, #231 cubic inches
859             gallon => 231*2.54**3/1000,
860             gallons => 231*2.54**3/1000,
861             gallon_us => 231*2.54**3/1000,
862             gallon_uk => 4.54609,
863             gallon_imp=> 4.54609,
864             gallon_us_dry => 4.40488377086, # ~ 9.25**2*pi*2.54**3/1000 L
865             m3 => 10**3, #1000 L
866             cm3 => 0.1**3, #0.001 L
867             in3 => 0.254**3, #0.016387064 L
868             ft3 => (0.254*12)**3,
869             tablespoon=> 3.785411784/256, #14.78676478125 mL
870             tsp => 3.785411784/256/3, #4.92892159375 mL
871             teaspoon => 3.785411784/256/3, #4.92892159375 mL
872             floz => 3.785411784/128, #fluid ounce US
873             floz_uk => 4.54609/160, #fluid ounce UK
874             pint => 4.54609/8, #0.56826125 L
875             pint_uk => 4.54609/8,
876             pint_imp => 4.54609/8,
877             pint_us => 3.785411784/8, #0.473176473
878             therm => 2.74e3, #? 100000BTUs? (!= thermie)
879             thm => 2.74e3, #? (!= th)
880             fat => 42*231*2.54**3/1000,
881             bbl => 42*231*2.54**3/1000, #oil barrel ~159 liters https://en.wikipedia.org/wiki/Barrel_(unit)
882             Mbbl => 42*231*2.54**3, #mille (thousand) oil barrels
883             MMbbl => 42*231*2.54**3*1000, #mille mille (million) oil barrels
884             drum => 200,
885             container => 33.1e3, #container20
886             container20 => 33.1e3,
887             container40 => 67.5e3,
888             container40HC => 75.3e3,
889             container45HC => 86.1e3,
890             #Norwegian:
891             meterfavn => 2 * 2 * 0.6, #ved 2.4 m3
892             storfavn => 2 * 2 * 3, #ved 12 m3
893             },
894             time =>{
895             s => 1,
896             _s => 1,
897             sec => 1,
898             second => 1,
899             seconds => 1,
900             m => 60,
901             min => 60,
902             minute => 60,
903             minutes => 60,
904             h => 60*60,
905             hr => 60*60,
906             hour => 60*60,
907             hours => 60*60,
908             d => 60*60*24,
909             dy => 60*60*24,
910             day => 60*60*24,
911             days => 60*60*24,
912             w => 60*60*24*7,
913             week => 60*60*24*7,
914             weeks => 60*60*24*7,
915             mo => 60*60*24 * 365.2425/12,
916             mon => 60*60*24 * 365.2425/12,
917             month => 60*60*24 * 365.2425/12,
918             quarter => 60*60*24 * 365.2425/12 * 3, #3 months
919             season => 60*60*24 * 365.2425/12 * 3, #3 months
920             y => 60*60*24 * 365.2425, # 365+97/400 #97 leap yers in 400 years
921             year => 60*60*24 * 365.2425,
922             years => 60*60*24 * 365.2425,
923             yjulian => 60*60*24 * 365.25, # 365+1/4
924             y365 => 60*60*24 * 365, # finance/science
925             ycommon => 60*60*24 * 365, # finance/science
926             ygregorian => 60*60*24 * 365.2425, # 365+97/400
927             #ygaussian => 365+(6*3600+9*60+56)/(24*3600), # 365+97/400
928             ytropical => 60*60*24 * 365.24219,
929             ysideral => 365.256363004,
930             ySI => 60*60*24*365.25, #31556925.9747
931             decade => 10 * 60*60*24*365.2425,
932             biennium => 2 * 60*60*24*365.2425,
933             triennium => 3 * 60*60*24*365.2425,
934             olympiad => 4 * 60*60*24*365.2425,
935             lustrum => 5 * 60*60*24*365.2425,
936             indiction => 15 * 60*60*24*365.2425,
937             jubilee => 50 * 60*60*24*365.2425,
938             century => 100 * 60*60*24*365.2425,
939             millennium => 1000 * 60*60*24*365.2425,
940             shake => 1e-8,
941             moment => 3600/40, #1/40th of an hour, used by Medieval Western European computists
942             ke => 864, #1/100th of a day, trad Chinese, 14m24s
943             fortnight => 14*24*3600,
944             tp => 5.3910632e-44, #planck time, time for ligth to travel 1 planck length
945             },
946             speed=>{
947             'm/s' => 1,
948             '_m/s' => 1,
949             mps => 1,
950             mph => 1609.344/3600,
951             'mi/h' => 1609.344/3600,
952             kmh => 1/3.6,
953             kmph => 1/3.6,
954             'km/h' => 1/3.6,
955             kmt => 1/3.6, # t=time or temps (scandinavian and french and dutch)
956             'km/t' => 1/3.6,
957             kn => 1852/3600,
958             kt => 1852/3600,
959             knot => 1852/3600,
960             knop => 1852/3600, #scandinavian
961             knots => 1852/3600,
962             c => 299792458, #speed of light
963             mach => 340.3, #speed of sound
964             machs => 340.3,
965             fps => 0.3048, #0.0254*12
966             ftps => 0.3048,
967             'ft/s' => 0.3048,
968             },
969             acceleration=>{
970             'm/s2' => 1,
971             'mps2' => 1,
972             g => 9.80665,
973             g0 => 9.80665,
974             #0-100kmh or ca 0-60 mph x seconds...
975             },
976             temperature=>{ #http://en.wikipedia.org/wiki/Temperature#Conversion
977             C=>1, F=>1, K=>1, celsius=>1, fahrenheit=>1, kelvin=>1
978             },
979             radioactivity=>{
980             Bq => 1,
981             becquerel => 1,
982             curie => 3.7e10,
983             },
984             current=> {
985             A => 1,
986             _A => 1,
987             'N/m2' => 2e-7,
988             },
989             charge=>{
990             e => 1,
991             _e => 1,
992             C => 6.24150964712042e+18,
993             coulomb => 6.24150964712042e+18,
994             As => 6.24150964712042e+18,
995             #Faraday unit of charge ???
996             },
997             power=> {
998             W => 1,
999             _W => 1,
1000             'J/s' => 1,
1001             Jps => 1,
1002             hp => 746,
1003             hk => 746, #hestekrefter (norwegian, scandinavian)
1004             PS => 746/1.014, #pferdestärken
1005             'kWh/yr' => 1000 * 3600/(24*365), #kWh annually
1006             Whpy => 3600/(24*365), #kWh annually
1007             kWhpy => 1000 * 3600/(24*365), #kWh annually
1008             MWhpy => 1000**2 * 3600/(24*365), #kWh annually
1009             GWhpy => 1000**3 * 3600/(24*365), #kWh annually
1010             TWhpy => 1000**4 * 3600/(24*365), #kWh annually
1011             BTU => 1055.05585262/3600, #
1012             BTUph => 1055.05585262/3600,
1013             'BTU/h' => 1055.05585262/3600,
1014             'BTU/s' => 1055.05585262,
1015             'ftlb/s' => 746/550,
1016             'ftlb/min'=> 746/550/60,
1017             },
1018             energy=>{
1019             joule => 1,
1020             J => 1,
1021             _J => 1,
1022             Ws => 1,
1023             Wps => 1,
1024             'W/s' => 1,
1025             Nm => 1,
1026             newtonmeter => 1,
1027             newtonmeters => 1,
1028             Wh => 3600,
1029             kWh => 3600000, #3.6 million J
1030             cal => 4.1868, # ~ 3600/860
1031             calorie => 4.1868,
1032             calories => 4.1868,
1033             kcal => 4.1868*1000,
1034             kilocalorie => 4.1868*1000,
1035             kilocalories => 4.1868*1000,
1036             BTU => 4.1868 * 252, # = 1055.0736 or is 1055.05585262 right?
1037             Btu => 4.1868 * 252,
1038             ftlb => 746/550, # ~ 1/0.7375621
1039             'foot-pound' => 746/550,
1040             'foot-pounds' => 746/550,
1041             erg => 1e-7,
1042             ergs => 1e-7,
1043             eV => 1.60217656535e-19,
1044             _eV => 1.60217656535e-19,
1045             BeV => 1.60217656535e-10,
1046             electronvolt => 1.60217656535e-19,
1047             thermie => 4.1868e6,
1048             th => 4.1868e6,
1049             hph => 3600*746,
1050             PSh => 3600*746/1.014,
1051             galatm_imp => 460.63256925,
1052             galatm_US => 383.5568490138,
1053             quad => 1.05505585262e18,
1054             Ry => 2.179872e-18,
1055             rydberg => 2.179872e-18,
1056             th => 4.1868e6,
1057             thermie => 4.1868e6,
1058             boe => 6.12e9, #barrel of oil equivalent
1059             TCE => 29.288e9, #ton of coal equivalent
1060             toe => 41.868e9, #tonne of oil equivalent
1061             tTNT => 4.184e9, #ton of TNT equivalent
1062             },
1063             force=> {
1064             newton=> 1,
1065             N => 1,
1066             _N => 1,
1067             dyn => 1e-5,
1068             dyne => 1e-5,
1069             dynes => 1e-5,
1070             lb => 4.448222,
1071             },
1072             pressure=>{
1073             Pa => 1,
1074             _Pa => 1,
1075             pascal => 1,
1076             'N/m2' => 1,
1077             bar => 100000.0,
1078             mbar => 100.0,
1079             at => 98066.5, #technical atmosphere
1080             atm => 101325.0, #standard atmosphere
1081             torr => 133.3224,
1082             psi => 6894.8, #pounds per square inch
1083             },
1084             bytes=> {
1085             b => 1,
1086             kb => 1024, #2**10
1087             mb => 1024**2, #2**20 = 1048576
1088             gb => 1024**3, #2**30 = 1073741824
1089             tb => 1024**4, #2**40 = 1099511627776
1090             pb => 1024**5, #2**50 = 1.12589990684262e+15
1091             eb => 1024**6, #2**60 =
1092             zb => 1024**7, #2**70 =
1093             yb => 1024**8, #2**80 =
1094             KiB => 1024, #2**10
1095             KiB => 1024**2, #2**20 = 1048576
1096             KiB => 1024**3, #2**30 = 1073741824
1097             KiB => 1024**4, #2**40 = 1099511627776
1098             KiB => 1024**5, #2**50 = 1.12589990684262e+15
1099             KiB => 1024**6, #2**60 =
1100             KiB => 1024**7, #2**70 =
1101             KiB => 1024**8, #2**80 =
1102             Kb => 1000, #2**10
1103             Mb => 1000**2, #2**20 = 1048576
1104             Gb => 1000**3, #2**30 = 1073741824
1105             Tb => 1000**4, #2**40 = 1099511627776
1106             Pb => 1000**5, #2**50 = 1.12589990684262e+15
1107             Eb => 1000**6, #2**60 =
1108             Zb => 1000**7, #2**70 =
1109             Yb => 1000**8, #2**80 =
1110             byte => 1,
1111             kilobyte => 1024, #2**10
1112             megabyte => 1024**2, #2**20 = 1048576
1113             gigabyte => 1024**3, #2**30 = 1073741824
1114             terabyte => 1024**4, #2**40 = 1099511627776
1115             petabyte => 1024**5, #2**50 = 1.12589990684262e+15
1116             exabyte => 1024**6, #2**60 =
1117             zettabyte => 1024**7, #2**70 =
1118             yottabyte => 1024**8, #2**80 =
1119             },
1120             milage=>{ #fuel consumption
1121             'l/mil' => 1,
1122             'l/10km' => 1,
1123             'lp10km' => 1,
1124             'l/km' => 10,
1125             'l/100km' => 1/10,
1126             lp100km => 1/10,
1127             liter_pr_100km => 1/10,
1128             liter_pr_km => 10,
1129             mpg => -23.5214584, #negative signals inverse
1130             },
1131             # light=> {
1132             # cd => 1,
1133             # candela => 1,
1134             # },
1135             # lumens
1136             # lux
1137             angle =>{
1138             turn => 1,
1139             rad => 1/(2*$PI), # 2 * pi
1140             radian => 1/(2*$PI), # 2 * pi
1141             radians => 1/(2*$PI), # 2 * pi
1142             deg => 1/360, # 4 * 90
1143             degree => 1/360, # 4 * 90
1144             degrees => 1/360, # 4 * 90
1145             grad => 1/400,
1146             gradian => 1/400,
1147             gradians => 1/400,
1148             grade => 1/400, #french revolutionary unit
1149             gon => 1/400,
1150             new_degree => 1/400,
1151             nygrad => 1/400, #scandinavian
1152             quadrant => 1/4,
1153             sextant => 1/6,
1154             hour => 1/24,
1155             point => 1/32, #used in navigation
1156             hexacontade => 1/60,
1157             binary_degree => 1/256,
1158             binary_radian => 1/256,
1159             brad => 1/256,
1160             },
1161             money =>{ # rates at dec 17 2015
1162             AED => 2.389117, #
1163             ARS => 0.895122, #
1164             AUD => 6.253619, #
1165             BGN => 4.847575, #
1166             BHD => 23.267384, #
1167             BND => 6.184624, #
1168             BRL => 2.260703, #
1169             BTC => 3910.932213547, #bitcoin
1170             BWP => 0.794654, #
1171             CAD => 6.289957, #
1172             CHF => 8.799974, #
1173             CLP => 0.012410, #
1174             CNY => 1.353406, #
1175             COP => 0.00262229, #
1176             CZK => 0.351171, #
1177             DKK => 1.271914, #
1178             EUR => 9.489926, #
1179             GBP => 13.069440, #
1180             HKD => 1.131783, #
1181             HRK => 1.240878, #
1182             HUF => 0.029947, #
1183             IDR => 0.00062471, #
1184             ILS => 2.254456, #
1185             INR => 0.132063, #
1186             IRR => 0.00029370, #
1187             ISK => 0.067245, #
1188             JPY => 0.071492, #
1189             KRW => 0.00739237, #
1190             KWD => 28.862497, #
1191             KZT => 0.027766, #
1192             LKR => 0.061173, #
1193             LTC => 31.78895354018, #litecoin
1194             LTL => 2.748472, #
1195             LVL => 13.503025, #
1196             LYD => 6.296978, #
1197             MUR => 0.240080, #
1198             MXN => 0.515159, #
1199             MYR => 2.032465, #
1200             NOK => 1.000000000, #norwegian kroner
1201             NPR => 0.084980, #
1202             NZD => 5.878331, #
1203             OMR => 22.795994, #
1204             PHP => 0.184839, #
1205             PKR => 0.083779, #
1206             PLN => 2.207243, #
1207             QAR => 2.409162, #
1208             RON => 2.101513, #
1209             RUB => 0.122991, #
1210             SAR => 2.339745, #
1211             SEK => 1.023591, #
1212             SGD => 6.184624, #
1213             THB => 0.242767, #
1214             TRY => 2.994338, #
1215             TTD => 1.374484, #
1216             TWD => 0.265806, #
1217             USD => 8.774159, #
1218             VEF => 1.395461, #
1219             ZAR => 0.576487, #
1220             XBT => 3910.932213547, # bitcoin
1221             mBTC => 3910.932213547, # bitcoin
1222             mXBT => 3910.932213547, # bitcoin
1223             },
1224             numbers =>{
1225             dec=>1,hex=>1,bin=>1,oct=>1,roman=>1, des=>1,#des: spelling error in v0.15-0.16
1226             dusin=>1,dozen=>1,doz=>1,dz=>1,gross=>144,gr=>144,gro=>144,great_gross=>12*144,small_gross=>10*12,
1227             }
1228             );
1229             our $conv_prepare_time=0;
1230             our $conv_prepare_money_time=0;
1231             sub conv_prepare {
1232 1     1 0 10 my %b =(da =>1e+1, h =>1e+2, k =>1e+3, M =>1e+6, G =>1e+9, T =>1e+12, P =>1e+15, E =>1e+18, Z =>1e+21, Y =>1e+24, H =>1e+27);
1233 1         8 my %big =(deca=>1e+1, hecto=>1e+2, kilo =>1e+3, mega =>1e+6, giga=>1e+9, tera=>1e+12, peta =>1e+15, exa =>1e+18, zetta=>1e+21, yotta=>1e+24, hella=>1e+27);
1234 1         7 my %s =(d =>1e-1, c =>1e-2, m =>1e-3,'µ' =>1e-6, u=>1e-6, n =>1e-9, p =>1e-12, f =>1e-15, a =>1e-18, z =>1e-21, y =>1e-24);
1235 1         6 my %small=(deci=>1e-1, centi=>1e-2, milli=>1e-3, micro =>1e-6, nano=>1e-9, pico=>1e-12, femto=>1e-15, atto=>1e-18, zepto=>1e-21, yocto=>1e-24);
1236             # myria=> 10000 #obsolete
1237             # demi => 1/2, double => 2 #obsolete
1238             # lakh => 1e5, crore => 1e7 #south asian
1239 1         12 my %x = (%s,%b);
1240 1         6 for my $type (keys%conv) {
1241 20         13 for(grep/^_/,keys%{$conv{$type}}) {
  20         214  
1242 15         14 my $c=$conv{$type}{$_};
1243 15         14 delete$conv{$type}{$_};
1244 15         16 my $unit=substr($_,1);
1245 15         319 $conv{$type}{$_.$unit}=$x{$_}*$c for keys%x;
1246             }
1247             }
1248 1         13 $conv_prepare_time=time();
1249             }
1250              
1251             our $Currency_rates_url = 'http://calthis.com/currency-rates';
1252             our $Currency_rates_expire = 6*3600;
1253             sub conv_prepare_money {
1254 0     0 0 0 eval {
1255 0         0 require LWP::Simple;
1256 0 0       0 my $td=$^O=~/^(?:linux|cygwin)$/?"/tmp":"/tmp"; #hm wrong!
1257 0         0 my $fn="$td/acme-tools-currency-rates.data";
1258 0 0 0     0 if( !-e$fn or time() - (stat($fn))[9] >= $Currency_rates_expire){
1259 0         0 LWP::Simple::getstore($Currency_rates_url,"$fn.$$.tmp"); # get ... see getrates.cmd
1260 0 0       0 die "nothing downloaded" if !-s"$fn.$$.tmp";
1261 0         0 rename "$fn.$$.tmp",$fn;
1262 0         0 chmod 0666,$fn;
1263             }
1264 0         0 my $d=readfile($fn);
1265 0         0 my %r=$d=~/^\s*([A-Z]{3}) +(\d+\.\d+)\b/gm;
1266 0         0 $r{lc($_)}=$r{$_} for keys%r;
1267             #warn serialize([minus([sort keys(%r)],[sort keys(%{$conv{money}})])],'minus'); #ARS,AED,COP,BWP,LVL,BHD,NPR,LKR,QAR,KWD,LYD,SAR,KZT,CLP,IRR,VEF,TTD,OMR,MUR,BND
1268             #warn serialize([minus([sort keys(%{$conv{money}})],[sort keys(%r)])],'minus'); #LTC,I44,BTC,BYR,TWI,NOK,XDR
1269 0 0       0 $conv{money}={%{$conv{money}},%r} if keys(%r)>20;
  0         0  
1270             };
1271 0 0       0 carp "conv: conv_prepare_money (currency conversion automatic daily updated rates) - $@\n" if $@;
1272 0         0 $conv{money}{"m$_"}=$conv{money}{$_}/1000 for qw/BTC XBT/;
1273 0         0 $conv_prepare_money_time=time();
1274 0         0 1; #not yet
1275             }
1276              
1277             sub conv {
1278 43     43 1 6692 my($num,$from,$to)=@_;
1279 43 50       80 croak "conf requires 3 args" if @_!=3;
1280 43 100       78 conv_prepare() if !$conv_prepare_time;
1281 43     86   116 my $types=sub{ my $unit=shift; [sort grep$conv{$_}{$unit}, keys%conv] };
  86         61  
  86         913  
1282 43   33     57 my @types=map{ my $ru=$_; my $r;$r=&$types($_) and @$r and $$ru=$_ and last for ($$ru,uc($$ru),lc($$ru)); $r }(\$from,\$to);
  86   33     61  
  86   50     50  
  86         147  
  86         157  
1283 43         44 my @err=map "Unit ".[$from,$to]->[$_]." is unknown",grep!@{$types[$_]},0..1;
  86         91  
1284 43         58 my @type=intersect(@types);
1285 43 50       60 push @err, "from=$from and to=$to has more than one possible conversions: ".join(", ", @type) if @type>1;
1286             push @err, "from $from (".(join(",",@{$types[0]})||'?').") and "
1287 43 50 0     55 ."to $to (" .(join(",",@{$types[1]})||'?').") has no known common unit type.\n" if @type<1;
      0        
1288 43 50       53 croak join"\n",map"conv: $_",@err if @err;
1289 43         33 my $type=$type[0];
1290 43 50 66     79 conv_prepare_money() if $type eq 'money' and time() >= $conv_prepare_money_time + $Currency_rates_expire;
1291 43 100       60 return conv_temperature(@_) if $type eq 'temperature';
1292 36 100       47 return conv_numbers(@_) if $type eq 'numbers';
1293 31         25 my $c=$conv{$type};
1294 31         24 my($cf,$ct)=@{$conv{$type}}{$from,$to};
  31         42  
1295 31 100 100     141 my $r= $cf>0 && $ct<0 ? -$ct/$num/$cf
    100 66        
1296             : $cf<0 && $ct>0 ? -$cf/$num/$ct
1297             : $cf*$num/$ct;
1298             # print STDERR "$num $from => $to from=$ff to=$ft r=$r\n";
1299 31         105 return $r;
1300             }
1301              
1302             sub conv_temperature { #http://en.wikipedia.org/wiki/Temperature#Conversion
1303 7     7 0 22 my($t,$from,$to)=(shift(),map uc(substr($_,0,1)),@_);
1304 7 100       16 $from=~s/K/C/ and $t-=273.15;
1305             #$from=~s/R/F/ and $t-=459.67; #rankine
1306 7 100       14 return $t if $from eq $to;
1307 1     1   7 {CK=>sub{$t+273.15},
1308 3     3   61 FC=>sub{($t-32)*5/9},
1309 2     2   13 CF=>sub{$t*9/5+32},
1310 0     0   0 FK=>sub{($t-32)*5/9+273.15},
1311 6         39 }->{$from.$to}->();
1312             }
1313              
1314             sub conv_numbers {
1315 5     5 0 5 my($n,$fr,$to)=@_;
1316 5 0       29 my $dec=$fr eq 'dec' ? $n
    50          
    50          
    50          
    50          
    50          
    50          
    100          
    50          
    100          
    50          
1317             :$fr eq 'hex' ? hex($n)
1318             :$fr eq 'oct' ? oct($n)
1319             :$fr eq 'bin' ? oct("0b$n")
1320             :$fr =~ /^(dusin|dozen|doz|dz)$/ ? $n*12
1321             :$fr =~ /^(gross|gr|gro)$/ ? $n*144
1322             :$fr eq 'great_gross' ? $n*12*144
1323             :$fr eq 'small_gross' ? $n*12*10
1324             :$fr eq 'skokk' ? $n*60 #norwegian unit
1325             :$fr eq 'roman' ? roman2int($n)
1326             :$fr eq 'des' ? $n
1327             :croak "Conv from $fr not supported yet";
1328 5 50       41 my $ret=$to eq 'dec' ? $dec
    100          
    50          
    50          
    50          
    50          
    50          
    100          
    100          
    50          
    50          
1329             :$to eq 'hex' ? sprintf("%x",$dec)
1330             :$to eq 'oct' ? sprintf("%o",$dec)
1331             :$to eq 'bin' ? sprintf("%b",$dec)
1332             :$to =~ /^(dusin|dozen|doz|dz)$/ ? $dec/12
1333             :$to =~ /^(gross|gr|gro)$/ ? $dec/144
1334             :$to eq 'great_gross' ? $dec/(12*144)
1335             :$to eq 'small_gross' ? $dec/(12*10)
1336             :$to eq 'skokk' ? $dec/60
1337             :$to eq 'roman' ? int2roman($dec)
1338             :$to eq 'des' ? $dec
1339             :croak "Conv to $to not suppoerted yet";
1340 5         19 $ret;
1341             }
1342             #http://en.wikipedia.org/wiki/Norwegian_units_of_measurement
1343              
1344              
1345             =head2 bytes_readable
1346              
1347             Converts a number of bytes to something human readable.
1348              
1349             Input 1: a number
1350              
1351             Input 2: optionally the number of decimals if >1000 B. Default is 2.
1352              
1353             Output: a string containing:
1354              
1355             the number with a B behind if the number is less than 1000
1356              
1357             the number divided by 1024 with two decimals and "kB" behind if the number is less than 1024*1000
1358              
1359             the number divided by 1048576 with two decimals and "MB" behind if the number is less than 1024*1024*1000
1360              
1361             the number divided by 1073741824 with two decimals and "GB" behind if the number is less than 1024*1024*1024*1000
1362              
1363             the number divided by 1099511627776 with two decimals and "TB" behind otherwise
1364              
1365             Examples:
1366              
1367             print bytes_readable(999); # 999 B
1368             print bytes_readable(1000); # 1000 B
1369             print bytes_readable(1001); # 0.98 kB
1370             print bytes_readable(1024); # 1.00 kB
1371             print bytes_readable(1153433.6); # 1.10 MB
1372             print bytes_readable(1181116006.4); # 1.10 GB
1373             print bytes_readable(1209462790553.6); # 1.10 TB
1374             print bytes_readable(1088516511498.24*1000); # 990.00 TB
1375             print bytes_readable(1088516511498.24*1000,3); # 990.000 TB
1376             print bytes_readable(1088516511498.24*1000,1); # 990.0 TB
1377              
1378             =cut
1379              
1380             sub bytes_readable {
1381 32     32 1 2644 my $bytes=shift();
1382 32   100     149 my $d=shift()||2; #decimals
1383 32 50       68 return undef if !defined $bytes;
1384 32 100       151 return "$bytes B" if abs($bytes) <= 2** 0*1000; #bytes
1385 24 100       352 return sprintf("%.*f kB",$d,$bytes/2**10) if abs($bytes) < 2**10*1000; #kilobyte
1386 8 100       37 return sprintf("%.*f MB",$d,$bytes/2**20) if abs($bytes) < 2**20*1000; #megabyte
1387 6 100       32 return sprintf("%.*f GB",$d,$bytes/2**30) if abs($bytes) < 2**30*1000; #gigabyte
1388 4 50       53 return sprintf("%.*f TB",$d,$bytes/2**40) if abs($bytes) < 2**40*1000; #terrabyte
1389 0         0 return sprintf("%.*f PB",$d,$bytes/2**50); #petabyte, exabyte, zettabyte, yottabyte
1390             }
1391              
1392             =head2 sec_readable
1393              
1394             Time written as C< 14h 37m > is often more humanly comprehensible than C< 52620 seconds >.
1395              
1396             print sec_readable( 0 ); # 0s
1397             print sec_readable( 0.0123 ); # 0.0123s
1398             print sec_readable(-0.0123 ); # -0.0123s
1399             print sec_readable( 1.23 ); # 1.23s
1400             print sec_readable( 1 ); # 1s
1401             print sec_readable( 9.87 ); # 9.87s
1402             print sec_readable( 10 ); # 10s
1403             print sec_readable( 10.1 ); # 10.1s
1404             print sec_readable( 59 ); # 59s
1405             print sec_readable( 59.123 ); # 59.1s
1406             print sec_readable( 60 ); # 1m 0s
1407             print sec_readable( 60.1 ); # 1m 0s
1408             print sec_readable( 121 ); # 2m 1s
1409             print sec_readable( 131 ); # 2m 11s
1410             print sec_readable( 1331 ); # 22m 11s
1411             print sec_readable(-1331 ); # -22m 11s
1412             print sec_readable( 13331 ); # 3h 42m
1413             print sec_readable( 133331 ); # 1d 13h
1414             print sec_readable( 1333331 ); # 15d 10h
1415             print sec_readable( 13333331 ); # 154d 7h
1416             print sec_readable( 133333331 ); # 4yr 82d
1417             print sec_readable( 1333333331 ); # 42yr 91d
1418              
1419             =cut
1420              
1421             sub sec_readable {
1422 25     25 1 5097 my $s=shift();
1423 25         32 my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
1424 25 100 100     358 !defined$s ? undef
    100          
    100          
    100          
    100          
    100          
    50          
    50          
1425             :!length($s) ? ''
1426             :$s<0 ? '-'.sec_readable(-$s)
1427             :$s<60 && int($s)==$s
1428             ? $s."s"
1429             :$s<60 ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
1430             :$s<3600 ? int($s/60)."m " .($s%60) ."s"
1431             :$s<24*3600 ? int($s/$h)."h " .int(($s%$h)/60)."m"
1432             :$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
1433             : int($s/$y)."yr ".int(($s%$y)/$d)."d";
1434             }
1435              
1436             =head2 int2roman
1437              
1438             Converts integers to roman numbers.
1439              
1440             B
1441              
1442             print int2roman(1234); # prints MCCXXXIV
1443             print int2roman(1971); # prints MCMLXXI
1444              
1445             (Adapted subroutine from Peter J. Acklam, jacklam(&)math.uio.no)
1446              
1447             I = 1
1448             V = 5
1449             X = 10
1450             L = 50
1451             C = 100 (centum)
1452             D = 500
1453             M = 1000 (mille)
1454              
1455             See also L.
1456              
1457             See L for more.
1458              
1459             =head2 roman2int
1460              
1461             roman2int("MCMLXXI") == 1971
1462              
1463             =cut
1464              
1465             #alternative algorithm: http://www.rapidtables.com/convert/number/how-number-to-roman-numerals.htm
1466             sub int2roman {
1467 1161     1161 1 5652 my($n,@p)=(shift,[],[1],[1,1],[1,1,1],[1,2],[2],[2,1],[2,1,1],[2,1,1,1],[1,3],[3]);
1468             !defined($n)? undef
1469             : !length($n) ? ""
1470             : int($n)!=$n ? croak"int2roman: $n is not an integer"
1471             : $n==0 ? ""
1472             : $n<0 ? "-".int2roman(-$n)
1473             : $n>3999 ? "M".int2roman($n-1000)
1474 1161 100       4034 : join'',@{[qw/I V X L C D M/]}[map{my$i=$_;map($_+5-$i*2,@{$p[$n/10**(3-$i)%10]})}(0..3)];
  913 100       3804  
  3652 100       2123  
  3652 50       1948  
  3652 100       6098  
    100          
1475             }
1476              
1477             sub roman2int {
1478 1017     1017 1 4484 my($r,$n,%c)=(shift,0,'',0,qw/I 1 V 5 X 10 L 50 C 100 D 500 M 1000/);
1479 1017 100 66     2926 $r=~s/^-//?-roman2int($r):
1480             $r=~s/(C?)([DM])|(X?)([LCDM])|(I?)([VXLCDM])|(I)|(.)/
1481             croak "roman2int: Invalid number $r" if $8;
1482             $n += $c{$2||$4||$6||$7} - $c{$1||$3||$5||''}; ''/eg && $n
1483             }
1484              
1485             #sub roman2int_slow {
1486             # my $r=shift;
1487             # $r=~s,^\-,, ? 0-roman2int($r)
1488             # : $r=~s,^M,,i ? 1000+roman2int($r)
1489             # : $r=~s,^CM,,i ? 900+roman2int($r)
1490             # : $r=~s,^D,,i ? 500+roman2int($r)
1491             # : $r=~s,^CD,,i ? 400+roman2int($r)
1492             # : $r=~s,^C,,i ? 100+roman2int($r)
1493             # : $r=~s,^XC,,i ? 90+roman2int($r)
1494             # : $r=~s,^L,,i ? 50+roman2int($r)
1495             # : $r=~s,^XL,,i ? 40+roman2int($r)
1496             # : $r=~s,^X,,i ? 10+roman2int($r)
1497             # : $r=~s,^IX,,i ? 9+roman2int($r)
1498             # : $r=~s,^V,,i ? 5+roman2int($r)
1499             # : $r=~s,^IV,,i ? 4+roman2int($r)
1500             # : $r=~s,^I,,i ? 1+roman2int($r)
1501             # : !length($r) ? 0
1502             # : croak "Invalid roman number $r";
1503             #}
1504              
1505             =head2 distance
1506              
1507             B the four decimal numbers of two GPS positions: latutude1, longitude1, latitude2, longitude2
1508              
1509             B the air distance in meters between the two points
1510              
1511             Calculation is done using the Haversine Formula for spherical distance:
1512              
1513             a = sin((lat2-lat1)/2)^2
1514             + sin((lon2-lon1)/2)^2 * cos(lat1) * cos(lat2);
1515              
1516             c = 2 * atan2(min(1,sqrt(a)),
1517             min(1,sqrt(1-a)))
1518              
1519             distance = c * R
1520              
1521             With earth radius set to:
1522              
1523             R = Re - (Re-Rp) * sin(abs(lat1+lat2)/2)
1524              
1525             Where C (equatorial radius) and C (polar radius).
1526              
1527             B
1528              
1529             my @oslo = ( 59.93937, 10.75135); # oslo in norway
1530             my @rio = (-22.97673, -43.19508); # rio in brazil
1531              
1532             printf "%.1f km\n", distance(@oslo,@rio)/1000; # 10431.7 km
1533             printf "%.1f km\n", distance(@rio,@oslo)/1000; # 10431.7 km
1534             printf "%.1f nmi\n", distance(@oslo,@rio)/1852.000; # 5632.7 nmi (nautical miles)
1535             printf "%.1f miles\n",distance(@oslo,@rio)/1609.344; # 6481.9 miles
1536             printf "%.1f miles\n",conv(distance(@oslo,@rio),"meters","miles");# 6481.9 miles
1537              
1538             See L
1539              
1540             and L
1541              
1542             and L
1543              
1544             and L, but Acme::Tools::distance() is about 8 times faster.
1545              
1546             =cut
1547              
1548             our $Distance_factor = $PI / 180;
1549 0     0 0 0 sub acos { atan2( sqrt(1 - $_[0] * $_[0]), $_[0] ) }
1550             sub distance_great_circle {
1551 0     0 0 0 my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
1552 0         0 my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1553 0         0 my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
1554 0         0 return $R*acos(sin($lat1)*sin($lat2)+cos($lat1)*cos($lat2)*cos($lon2-$lon1))
1555             }
1556              
1557             sub distance {
1558 4     4 1 786 my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
1559 4         39 my $a= sin(($lat2-$lat1)/2)**2
1560             + sin(($lon2-$lon1)/2)**2 * cos($lat1) * cos($lat2);
1561 4 50       4 my $sqrt_a =sqrt($a); $sqrt_a =1 if $sqrt_a >1;
  4         9  
1562 4 50       5 my $sqrt_1ma=sqrt(1-$a); $sqrt_1ma=1 if $sqrt_1ma>1;
  4         8  
1563 4         17 my $c=2*atan2($sqrt_a,$sqrt_1ma);
1564 4         5 my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1565 4         7 my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
1566 4         14 return $c*$R;
1567             }
1568              
1569              
1570             =head2 big
1571              
1572             =head2 bigi
1573              
1574             =head2 bigf
1575              
1576             =head2 bigr
1577              
1578             =head2 bigscale
1579              
1580             big, bigi, bigf, bigr and bigscale are sometimes convenient shorthands for using
1581             C<< Math::BigInt->new() >>, C<< Math::BigFloat->new() >> and C<< Math::BigRat->new() >>
1582             (preferably with the GMP for faster calculations). Examples:
1583              
1584             my $num1 = big(3); #returns a new Math::BigInt-object
1585             my $num2 = big('3.0'); #returns a new Math::BigFloat-object
1586             my $num3 = big(3.0); #returns a new Math::BigInt-object
1587             my $num4 = big(3.1); #returns a new Math::BigFloat-object
1588             my $num5 = big('2/7'); #returns a new Math::BigRat-object
1589             my($i1,$f1,$i2,$f2) = big(3,'3.0',3.0,3.1); #returns the four new numbers, as the above four lines
1590             #uses wantarray
1591              
1592             print 2**200; # 1.60693804425899e+60
1593             print big(2)**200; # 1606938044258990275541962092341162602522202993782792835301376
1594             print 2**big(200); # 1606938044258990275541962092341162602522202993782792835301376
1595             print big(2**200); # 1606938044258990000000000000000000000000000000000000000000000
1596              
1597             print 1/7; # 0.142857142857143
1598             print 1/big(7); # 0 because of integer arithmetics
1599             print 1/big(7.0); # 0 because 7.0 is viewed as an integer
1600             print 1/big('7.0'); # 0.1428571428571428571428571428571428571429
1601             print 1/bigf(7); # 0.1428571428571428571428571428571428571429
1602             print bigf(1/7); # 0.142857142857143 probably not what you wanted
1603              
1604             print 1/bigf(7); # 0.1428571428571428571428571428571428571429
1605             bigscale(80); # for increased precesion (default is 40)
1606             print 1/bigf(7); # 0.14285714285714285714285714285714285714285714285714285714285714285714285714285714
1607              
1608             In C the characters C<< . >> and C<< / >> will make it return a
1609             Math::BigFloat- and Math::BigRat-object accordingly. Or else a Math::BigInt-object is returned.
1610              
1611             Instead of guessing, use C, C and C to return what you want.
1612              
1613             B Acme::Tools does not depend on Math::BigInt and
1614             Math::BigFloat and GMP, but these four big*-subs do (by C).
1615             To use big, bigi, bigf and bigr effectively you should
1616             install Math::BigInt::GMP and Math::BigFloat::GMP like this:
1617              
1618             sudo cpanm Math::BigFloat Math::GMP Math::BingInt::GMP # or
1619             sudo cpan Math::BigFloat Math::GMP Math::BingInt::GMP # or
1620             sudo yum install perl-Math-BigInt-GMP perl-Math-GMP # on RedHat, RHEL or
1621             sudo apt-get install libmath-bigint-gmp-perl libmath-gmp-perl # on Ubuntu or some other way
1622              
1623             Unless GMP is installed for perl like this, the Math::Big*-modules
1624             will fall back to using similar but slower built in modules. See: L
1625              
1626             =cut
1627              
1628             sub bigi {
1629 9 100   9 1 185 eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
  2     2   948  
  2         16420  
  2         176  
1630 9 100       10755 if (wantarray) { return (map Math::BigInt->new($_),@_) }
  2         9  
1631 7         20 else { return Math::BigInt->new($_[0]) }
1632             }
1633             sub bigf {
1634 7 100   7 1 1044 eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
  2     2   792  
  2         16045  
  2         369  
1635 7 100       795 if (wantarray) { return (map Math::BigFloat->new($_),@_) }
  2         6  
1636 5         15 else { return Math::BigFloat->new($_[0]) }
1637             }
1638             sub bigr {
1639 17 100   17 1 9521 eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
  2     2   826  
  2         8477  
  2         308  
1640 17 100       1018 if (wantarray) { return (map Math::BigRat->new($_),@_) }
  7         23  
1641 10         24 else { return Math::BigRat->new($_[0]) }
1642             }
1643             sub big {
1644             wantarray
1645 12 50   12 1 3198 ? (map /\./ ? bigf($_) : /\// ? bigr($_) : bigi($_), @_)
    100          
    100          
    100          
    100          
1646             : $_[0]=~/\./ ? bigf($_[0]) : $_[0]=~/\// ? bigr($_[0]) : bigi($_[0]);
1647             }
1648             sub bigscale {
1649 4 100   4 1 2186 @_==1 or croak "bigscale requires one and only one argument";
1650 2         3 my $scale=shift();
1651 2 50       4 eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
1652 2 50       5 eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
1653 2 50       4 eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
1654 2         6 Math::BigInt->div_scale($scale);
1655 2         24 Math::BigFloat->div_scale($scale);
1656 2         14 Math::BigRat->div_scale($scale);
1657 2         13 return;
1658             }
1659              
1660             #my $R_authalic=6371007.2; #earth radius in meters, mean, Authalic radius, real R varies 6353-6384km, http://en.wikipedia.org/wiki/Earth_radius
1661             #*)
1662             # ( 6378157.5, 6356772.2 ) #hmm
1663             #my $e=0.081819218048345;#sqrt(1 - $b**2/$a**2); #eccentricity of the ellipsoid
1664             #my($a,$b)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1665             #warn "e=$e\n";
1666             #warn "t=".(1 - $e**2)."\n";
1667             #warn "n=".((1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5)."\n";
1668             #my $t=1 - $e**2;
1669             #my $n=(1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5;
1670             #warn "t=$t\n";
1671             #warn "n=$n\n";
1672             #$a * (1 - $e**2) / ((1 - $e**2 * sin(($lat1+$lat2)/2)**2)**1.5); #hmm avg lat
1673             #$R=$a * $t/$n;
1674              
1675             #=head2 fractional
1676             #=cut
1677              
1678             sub fractional { #http://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/lil1.html
1679 0     0 0 0 carp "fractional: NOT FINISHED";
1680 0         0 my $n=shift;
1681 0         0 print "----fractional n=$n\n";
1682 0         0 my $nn=$n; my $dec;
  0         0  
1683 0         0 $nn=~s,\.(\d+)$,$dec=length($1);$1.,;
1684 0         0 my $l;
1685 0         0 my $max=0;
1686 0         0 my($te,$ne);
1687 0         0 for(1..length($nn)/2){
1688 0 0       0 if( $nn=~/^(\d*?)((.{$_})(\3)+)$/ ){
1689 0         0 print "_ = $_ ".length($2)."\n";
1690 0 0       0 if(length($2)>$max){
1691 0         0 $l=$_;
1692 0         0 $te="$1$3"-$1;
1693 0         0 $max=length($2);
1694             }
1695             }
1696             }
1697 0 0 0     0 return fractional($n) if !$l and !recursed() and $dec>6 and substr($n,-1) and substr($n,-1)--;
      0        
      0        
      0        
1698 0         0 print "l=$l max=$max\n";
1699 0         0 $ne="9" x $l;
1700 0         0 print log($n),"\n";
1701 0     0   0 my $st=sub{print "status: ".($te/$ne)." n=$n ".($n/$te*$ne)."\n"};
  0         0  
1702 0         0 while($n/$te*$ne<0.99){ &$st(); $ne*=10 }
  0         0  
  0         0  
1703 0         0 while($te/$n/$ne<0.99){ &$st(); $te*=10 }
  0         0  
  0         0  
1704 0         0 &$st();
1705 0         0 while(1){
1706 0         0 my $d=gcd($te,$ne); print "gcd=$d\n";
  0         0  
1707 0 0       0 last if $d==1;
1708 0         0 $te/=$d; $ne/=$d;
  0         0  
1709             }
1710 0         0 &$st();
1711 0 0       0 wantarray ? ($te,$ne) : "$te/$ne"; #gcd()
1712             }
1713              
1714             =head2 isnum
1715              
1716             B String to be tested on regexp C<< /^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x >>. If no argument is given isnum checks C<< $_ >>.
1717              
1718             B True or false (1 or 0)
1719              
1720             use Acme::Tools;
1721             my @e=(' +32.12354E-21 ', 2.2, '9' x 99999, ' -123.12', '29,323.31', '29 323.31');
1722             print isnum() ? 'num' : 'str' for @e; #prints num for every element except the last two
1723             print $_=~$Re_isnum ? 'num' : 'str' for @e; #same
1724              
1725             =cut
1726              
1727             our $Re_isnum=qr/^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x;
1728 20 100   20 1 3165 sub isnum {(@_?$_[0]:$_)=~$Re_isnum}
1729              
1730             =head2 between
1731              
1732             Input: Three arguments.
1733              
1734             Returns: Something I if the first argument is numerically between the two next.
1735              
1736             =cut
1737              
1738             sub between {
1739 27     27 1 6369 my($test,$fom,$tom)=@_;
1740 33     33   210 no warnings;
  33         41  
  33         6505  
1741 27 100 66     427 return $fom<$tom ? $test>=$fom&&$test<=$tom
      33        
1742             : $test>=$tom&&$test<=$fom;
1743             }
1744              
1745             =head2 curb
1746              
1747             B Three arguments: value, minumum, maximum.
1748              
1749             B Returns the value if its between the given minumum and maximum.
1750             Returns minimum if the value is less or maximum if the value is more.
1751              
1752             my $v = 234;
1753             print curb( $v, 200, 250 ); #prints 234
1754             print curb( $v, 150, 200 ); #prints 200
1755             print curb( $v, 250, 300 ); #prints 250
1756             print curb(\$v, 250, 300 ); #prints 250 and changes $v
1757             print $v; #prints 250
1758              
1759             In the last example $v is changed because the argument is a reference. (To keep backward compatability, C<< bound() >> is a synonym for C<< curb() >>)
1760              
1761             =cut
1762              
1763             sub curb {
1764 129     129 1 744 my($val,$min,$max)=@_;
1765 129 50 66     1096 croak "curb: wrong args" if @_!=3 or !defined$min or !defined$max or !defined$val or $min>$max;
      100        
      66        
      66        
1766 126 100       151 return $$val=curb($$val,$min,$max) if ref($val) eq 'SCALAR';
1767 125 100       192 $val < $min ? $min :
    100          
1768             $val > $max ? $max :
1769             $val;
1770             }
1771 0     0 0 0 sub bound { curb(@_) }
1772 0     0 0 0 sub log10 { log($_[0])/log(10) }
1773 0     0 0 0 sub log2 { log($_[0])/log(2) }
1774              
1775             =head1 STRINGS
1776              
1777             =head2 upper
1778              
1779             =head2 lower
1780              
1781             Returns input string as uppercase or lowercase.
1782              
1783             Can be used if Perls build in C and C for some reason does not convert æøå or other latin letters outsize a-z.
1784              
1785             Converts C<< æøåäëïöüÿâêîôûãõàèìòùáéíóúýñð >> to and from C<< ÆØÅÄËÏÖÜ?ÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ >>
1786              
1787             See also C<< perldoc -f uc >> and C<< perldoc -f lc >>
1788              
1789             =head2 trim
1790              
1791             Removes space from the beginning and end of a string. Whitespace (C<< \s >>) that is.
1792             And removes any whitespace inside the string of more than one char, leaving the first whitespace char. Thus:
1793              
1794             trim(" asdf \t\n 123 ") eq "asdf 123"
1795             trim(" asdf\t\n 123\n") eq "asdf\t123"
1796              
1797             Works on C<< $_ >> if no argument i given:
1798              
1799             print join",", map trim, " please ", " remove ", " my ", " spaces "; # please,remove,my,spaces
1800             print join",", trim(" please ", " remove ", " my ", " spaces "); # works on arrays as well
1801             my $s=' please '; trim(\$s); # now $s eq 'please'
1802             trim(\@untrimmedstrings); # trims array strings inplace
1803             @untrimmedstrings = map trim, @untrimmedstrings; # same, works on $_
1804             trim(\$_) for @untrimmedstrings; # same, works on \$_
1805              
1806             =head2 lpad
1807              
1808             =head2 rpad
1809              
1810             Left or right pads a string to the given length by adding one or more spaces at the end for I or at the start for I.
1811              
1812             B First argument: string to be padded. Second argument: length of the output. Optional third argument: character(s) used to pad.
1813             Default is space.
1814              
1815             rpad('gomle',9); # 'gomle '
1816             lpad('gomle',9); # ' gomle'
1817             rpad('gomle',9,'-'); # 'gomle----'
1818             lpad('gomle',9,'+'); # '++++gomle'
1819             rpad('gomle',4); # 'goml'
1820             lpad('gomle',4); # 'goml'
1821             rpad('gomle',7,'xyz'); # 'gomlxy'
1822             lpad('gomle',10,'xyz'); # 'xyzxygoml'
1823              
1824             =head2 cpad
1825              
1826             Center pads. Pads the string both on left and right equal to the given length. Centers the string. Pads right side first.
1827              
1828             cpad('mat',5) eq ' mat '
1829             cpad('mat',4) eq 'mat '
1830             cpad('mat',6) eq ' mat '
1831             cpad('mat',9) eq ' mat '
1832             cpad('mat',5,'+') eq '+mat+'
1833             cpad('MMMM',20,'xyzXYZ') eq 'xyzXYZxyMMMMxyzXYZxy'
1834              
1835             =cut
1836              
1837 33 50   33 1 138 sub upper {no warnings;my $s=@_?shift:$_;$s=~tr/a-zæøåäëïöüÿâêîôûãõàèìòùáéíóúýñð/A-ZÆØÅÄËÏÖÜÿÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ/;$s}
  33     1   39  
  33         1980  
  1         6  
  1         5  
  1         5  
1838 33 50   33 1 129 sub lower {no warnings;my $s=@_?shift:$_;$s=~tr/A-ZÆØÅÄËÏÖÜÿÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ/a-zæøåäëïöüÿâêîôûãõàèìòùáéíóúýñð/;$s}
  33     1   37  
  33         86206  
  1         7  
  1         2  
  1         6  
1839              
1840             sub trim {
1841 24 100   24 1 58 return trim($_) if !@_;
1842 20 100       35 return map trim($_), @_ if @_>1;
1843 19         16 my $s=shift;
1844 19 100       30 if(ref($s) eq 'SCALAR'){ $$s=~s,^\s+|(?<=\s)\s+|\s+$,,g; return $$s}
  5         19  
  5         7  
1845 14 100       20 if(ref($s) eq 'ARRAY') { trim(\$_) for @$s; return $s }
  1         3  
  1         2  
1846 13         63 $s=~s,^\s+|(?<=\s)\s+|\s+$,,g;
1847 13         41 $s;
1848             }
1849              
1850             sub rpad {
1851 17     17 1 536 my($s,$l,$p)=@_;
1852 17 100 66     38 $p=' ' if @_<3 or !length($p);
1853 17         50 $s.=$p while length($s)<$l;
1854 17         54 substr($s,0,$l);
1855             }
1856              
1857             sub lpad {
1858 5     5 1 994 my($s,$l,$p)=@_;
1859 5 100 66     21 $p=' ' if @_<3 or !length($p);
1860 5 100       33 $l
1861             ? substr($s,0,$l)
1862             : substr($p x (1+$l/length($p)), 0, $l-length($s)).$s;
1863             }
1864              
1865             sub cpad {
1866 11     11 1 1788 my($s,$l,$p)=@_;
1867 11 100 66     46 $p=' ' if @_<3 or !length($p);
1868 11         11 my $ls=length($s);
1869 11 50       15 return substr($s,0,$l) if $l<$ls;
1870 11         23 $p=$p x (($l-$ls+2)/length($p));
1871 11         44 substr($p, 0, ($l-$ls )/2) . $s .
1872             substr($p, 0, ($l-$ls+1)/2);
1873             }
1874              
1875             sub cpad_old {
1876 0     0 0 0 my($s,$l,$p)=@_;
1877 0 0       0 $p=' ' if !length($p);
1878 0 0       0 return substr($s,0,$l) if $l
1879 0         0 my $i=0;
1880 0         0 while($l>length($s)){
1881 0 0       0 my $pc=substr($p,($i==int($i)?1:-1)*($i%length($p)),1);
1882 0 0       0 $i==int($i) ? ($s.=$pc) : ($s=$pc.$s);
1883 0         0 $i+=1/2;
1884             }
1885 0         0 $s;
1886             }
1887              
1888             =head2 trigram
1889              
1890             B A string (i.e. a name). And an optional x (see example 2)
1891              
1892             B A list of this strings trigrams (See examlpe)
1893              
1894             B
1895              
1896             print join ", ", trigram("Kjetil Skotheim");
1897              
1898             Prints:
1899              
1900             Kje, jet, eti, til, il , l S, Sk, Sko, kot, oth, the, hei, eim
1901              
1902             B
1903              
1904             Default is 3, but here 4 is used instead in the second optional input argument:
1905              
1906             print join ", ", trigram("Kjetil Skotheim", 4);
1907              
1908             And this prints:
1909              
1910             Kjet, jeti, etil, til , il S, l Sk, Sko, Skot, koth, othe, thei, heim
1911              
1912             C was created for "fuzzy" name searching. If you have a database of many names,
1913             addresses, phone numbers, customer numbers etc. You can use trigram() to search
1914             among all of those at the same time. If the search form only has one input field.
1915             One general search box.
1916              
1917             Store all of the trigrams of the trigram-indexed input fields coupled
1918             with each person, and when you search, you take each trigram of you
1919             query string and adds the list of people that has that trigram. The
1920             search result should then be sorted so that the persons with most hits
1921             are listed first. Both the query strings and the indexed database
1922             fields should have a space added first and last before C-ing
1923             them.
1924              
1925             This search algorithm is not includes here yet...
1926              
1927             C should perhaps have been named ngram for obvious reasons.
1928              
1929             =head2 sliding
1930              
1931             Same as trigram (except there is no default width). Works also with arrayref instead of string.
1932              
1933             Example:
1934              
1935             sliding( ["Reven","rasker","over","isen"], 2 )
1936              
1937             Result:
1938              
1939             ( ['Reven','rasker'], ['rasker','over'], ['over','isen'] )
1940              
1941             =head2 chunks
1942              
1943             Splits strings and arrays into chunks of given size:
1944              
1945             my @a = chunks("Reven rasker over isen",7);
1946             my @b = chunks([qw/Og gubben satt i kveldinga og koste seg med skillinga/], 3);
1947              
1948             Resulting arrays:
1949              
1950             ( 'Reven r', 'asker o', 'ver ise', 'n' )
1951             ( ['Og','gubben','satt'], ['i','kveldinga','og'], ['koste','seg','med'], ['skillinga'] )
1952              
1953             =head2 chars
1954              
1955             chars("Tittentei"); # ('T','i','t','t','e','n','t','e','i')
1956              
1957             =cut
1958              
1959 2   100 2 1 248 sub trigram { sliding($_[0],$_[1]||3) }
1960              
1961             sub sliding {
1962 3     3 1 6 my($s,$w)=@_;
1963 3 100       46 return map substr($s,$_,$w), 0..length($s)-$w if !ref($s);
1964 1 50       19 return map [@$s[$_..$_+$w-1]], 0..@$s-$w if ref($s) eq 'ARRAY';
1965             }
1966              
1967             sub chunks {
1968 2     2 1 6 my($s,$w)=@_;
1969 2 100       52 return $s=~/(.{1,$w})/g if !ref($s);
1970 1 50       66 return map [@$s[$_*$w .. min($_*$w+$w-1,$#$s)]], 0..$#$s/$w if ref($s) eq 'ARRAY';
1971             }
1972              
1973 0     0 1 0 sub chars { split//, shift }
1974              
1975             =head2 repl
1976              
1977             Synonym for replace().
1978              
1979             =head2 replace
1980              
1981             Return the string in the first input argument, but where pairs of search-replace strings (or rather regexes) has been run.
1982              
1983             Works as C in Oracle, or rather regexp_replace() in Oracle 10 and onward. Except that this C accepts more than three arguments.
1984              
1985             Examples:
1986              
1987             print replace("water","ater","ine"); # Turns water into wine
1988             print replace("water","ater"); # w
1989             print replace("water","at","eath"); # weather
1990             print replace("water","wa","ju",
1991             "te","ic",
1992             "x","y", # No x is found, no y is returned
1993             'r$',"e"); # Turns water into juice. 'r$' says that the r it wants
1994             # to change should be the last letters. This reveals that
1995             # second, fourth, sixth and so on argument is really regexs,
1996             # not normal strings. So use \ (or \\ inside "") to protect
1997             # the special characters of regexes. You probably also
1998             # should write qr/regexp/ instead of 'regexp' if you make
1999             # use of regexps here, just to make it more clear that
2000             # these are really regexps, not strings.
2001              
2002             print replace('JACK and JUE','J','BL'); # prints BLACK and BLUE
2003             print replace('JACK and JUE','J'); # prints ACK and UE
2004             print replace("abc","a","b","b","c"); # prints ccc (not bcc)
2005              
2006             If the first argument is a reference to a scalar variable, that variable is changed "in place".
2007              
2008             Example:
2009              
2010             my $str="test";
2011             replace(\$str,'e','ee','s','S');
2012             print $str; # prints teeSt
2013              
2014             =cut
2015              
2016 46     46 1 76 sub replace { repl(@_) }
2017             sub repl {
2018 83     83 1 152 my $str=shift;
2019 83 100       187 return $$str=replace($$str,@_) if ref($str) eq 'SCALAR';
2020             #return ? if ref($str) eq 'ARRAY';
2021             #return ? if ref($str) eq 'HASH';
2022 82         149 while(@_){
2023 109         152 my($fra,$til)=(shift,shift);
2024 109 100       4181 defined $til ? $str=~s/$fra/$til/g : $str=~s/$fra//g;
2025             }
2026 82         456 return $str;
2027             }
2028              
2029             =head1 ARRAYS
2030              
2031             =head2 min
2032              
2033             Returns the smallest number in a list. Undef is ignored.
2034              
2035             @lengths=(2,3,5,2,10,undef,5,4);
2036             $shortest = min(@lengths); # returns 2
2037              
2038             Note: The comparison operator is perls C<< < >>> which means empty strings is treated as C<0>, the number zero. The same goes for C, except of course C<< > >> is used instead.
2039              
2040             min(3,4,5) # 3
2041             min(3,4,5,undef) # 3
2042             min(3,4,5,'') # returns the empty string
2043              
2044             =head2 max
2045              
2046             Returns the largest number in a list. Undef is ignored.
2047              
2048             @heights=(123,90,134,undef,132);
2049             $highest = max(@heights); # 134
2050              
2051             =head2 mins
2052              
2053             Just as L, except for strings.
2054              
2055             print min(2,7,10); # 2
2056             print mins("2","7","10"); # 10
2057             print mins(2,7,10); # 10
2058              
2059             =head2 maxs
2060              
2061             Just as L, except for strings.
2062              
2063             print max(2,7,10); # 10
2064             print maxs("2","7","10"); # 7
2065             print maxs(2,7,10); # 7
2066              
2067             =cut
2068              
2069 35 100 100 35 1 1502 sub min {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ < $min } $min }
  35   66     56  
  7087         20529  
  35         191  
2070 1 100 100 1 1 1 sub mins {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ lt $min} $min }
  1   33     3  
  3         17  
  1         3  
2071 32 100 100 32 1 27 sub max {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ > $max } $max }
  32   66     52  
  7093         20654  
  32         378  
2072 1 100 100 1 1 1 sub maxs {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ gt $max} $max }
  1   33     2  
  3         16  
  1         4  
2073              
2074             =head2 zip
2075              
2076             B Two or more arrayrefs. A number of equal sized arrays
2077             containing numbers, strings or anything really.
2078              
2079             B An array of those input arrays zipped (interlocked, merged) into each other.
2080              
2081             print join " ", zip( [1,3,5], [2,4,6] ); # 1 2 3 4 5 6
2082             print join " ", zip( [1,4,7], [2,5,8], [3,6,9] ); # 1 2 3 4 5 6 7 8 9
2083              
2084             Example:
2085              
2086             zip() creates a hash where the keys are found in the first array and values in the secord in the correct order:
2087              
2088             my @media = qw/CD DVD VHS LP Blueray/;
2089             my @count = qw/20 12 2 4 3/;
2090             my %count = zip(\@media,\@count); # or zip( [@media], [@count] )
2091             print "I got $count{DVD} DVDs\n"; # I got 12 DVDs
2092              
2093             Dies (croaks) if the two lists are of different sizes
2094              
2095             ...or any input argument is not an array ref.
2096              
2097             =cut
2098              
2099             sub zip {
2100 7     7 1 45865 my @t=@_;
2101 7   66     255 ref($_) ne 'ARRAY' and croak "ERROR: zip should have arrayrefs as arguments" for @t;
2102 5   66     9 @{$t[$_]} != @{$t[0]} and croak "ERROR: zip should have equal sized arrays" for 1..$#t;
  5         6  
  5         163  
2103 3         4 my @res;
2104 3         3 for my $i (0..@{$t[0]}-1){
  3         5  
2105 9         15 push @res, $$_[$i] for @t;
2106             }
2107 3         18 return @res;
2108             }
2109              
2110              
2111             =head2 pushsort
2112              
2113             Adds one or more element to a numerically sorted array and keeps it sorted.
2114              
2115             pushsort @a, 13; # this...
2116             push @a, 13; @a = sort {$a<=>$b} @a; # is the same as this, but the former is faster if @a is large
2117              
2118             =head2 pushsortstr
2119              
2120             Same as pushsort except that the array is kept sorted alphanumerically (cmp) instead of numerically (<=>). See L.
2121              
2122             pushsort @a, "abc"; # this...
2123             push @a, "abc"; @a = sort @a; # is the same as this, but the former is faster if @a is large
2124              
2125             =cut
2126              
2127             our $Pushsort_cmpsub=undef;
2128             sub pushsort (\@@) {
2129 10915     10915 1 8497 my $ar=shift;
2130              
2131             #not needed but often faster
2132 10915 100 100     18499 if(!defined $Pushsort_cmpsub and @$ar+@_<100){ #hm speedup?
2133 2348         2382 @$ar=(sort {$a<=>$b} (@$ar,@_));
  52064         30065  
2134 2348         1807 return 0+@$ar;
2135             }
2136              
2137 8567         6473 for my $v (@_){
2138              
2139             #not needed but often faster
2140 7567 100       8354 if(!defined $Pushsort_cmpsub){ #faster rank() in most cases
2141 901 100 50     1048 push @$ar, $v and next if $v>=$$ar[-1];
2142 898 100 50     1025 unshift @$ar, $v and next if $v< $$ar[0];
2143             }
2144              
2145 7562         7368 splice @$ar, binsearch($v,$ar,1,$Pushsort_cmpsub)+1, 0, $v;
2146             }
2147 8567         7598 0+@$ar
2148             }
2149 0     0 1 0 sub pushsortstr(\@@){ local $Pushsort_cmpsub=sub{$_[0]cmp$_[1]}; pushsort(@_) } #speedup: copy sub pushsort
  1000     1000   1939  
  1000         943  
2150              
2151             =head2 binsearch
2152              
2153             Returns the position of an element in a numerically sorted array. Returns undef if the element is not found.
2154              
2155             B Two, three or four arguments
2156              
2157             B the element to find. Usually a number.
2158              
2159             B a reference to the array to search in. The array
2160             should be sorted in ascending numerical order (se exceptions below).
2161              
2162             B Optional. Default false.
2163              
2164             If true, whether result I should return undef or a fractional position.
2165              
2166             If the third argument is false binsearch returns undef if the element is not found.
2167              
2168             If the third argument is true binsearch returns 0.5 plus closest position below the searched value.
2169              
2170             Returns C< last position + 0.5 > if the searched element is greater than all elements in the sorted array.
2171              
2172             Returns C< -0.5 > if the searched element is less than all elements in the sorted array.
2173              
2174             Fourth argument: Optional. Default C<< sub { $_[0] <=> $_[1] } >>.
2175              
2176             If present, the fourth argument is either:
2177              
2178             =over 4
2179              
2180             =item * a code-ref that alters the way binsearch compares two elements, default is C<< sub{$_[0]<=>$_[1]} >>
2181              
2182             =item * a string that works as a hash key (column name), see example below
2183              
2184             =back
2185              
2186             B
2187              
2188             binsearch(10,[5,10,15,20]); # 1
2189             binsearch(10,[20,15,10,5],undef,sub{$_[1]<=>$_[0]}); # 2 search arrays sorted numerically in opposite order
2190             binsearch("c",["a","b","c","d"],undef,sub{$_[0]cmp$_[1]}); # 2 search arrays sorted alphanumerically
2191             binsearchstr("b",["a","b","c","d"]); # 1 search arrays sorted alphanumerically
2192              
2193             my @data=( map { {num=>$_, sqrt=>sqrt($_), square=>$_**2} }
2194             grep !$_%7, 1..1000000 );
2195             my $i = binsearch( {num=>913374}, \@data, undef, sub {$_[0]{num} <=> $_[1]{num}} );
2196             my $i = binsearch( {num=>913374}, \@data, undef, 'num' ); #same as previous line
2197             my $found_hashref = defined $i ? $data[$i] : undef;
2198              
2199             =head2 binsearchstr
2200              
2201             Same as binsearch except that the arrays is sorted alphanumerically
2202             (cmp) instead of numerically (<=>) and the searched element is a
2203             string, not a number. See L.
2204              
2205             =cut
2206              
2207             our $Binsearch_steps;
2208             our $Binsearch_maxsteps=100;
2209             sub binsearch {
2210 14609     14609 1 28791 my($search,$aref,$insertpos,$cmpsub)=@_; #search pos of search in array
2211 14609 50       19850 croak "binsearch did not get arrayref as second arg" if ref($aref) ne 'ARRAY';
2212 14609 50 100     37024 croak "binsearch got fourth arg which is not a code-ref" if defined $cmpsub and ref($cmpsub) and ref($cmpsub) ne 'CODE';
      66        
2213 14609 100 100     26990 if(defined $cmpsub and !ref($cmpsub)){
2214 1         2 my $key=$cmpsub;
2215 1     17   4 $cmpsub = sub{ $_[0]{$key} <=> $_[1]{$key} };
  17         47  
2216             }
2217 14609 50       16724 return $insertpos ? -0.5 : undef if !@$aref;
    100          
2218 14597         11015 my($min,$max)=(0,$#$aref);
2219 14597         9574 $Binsearch_steps=0;
2220 14597         17240 while (++$Binsearch_steps <= $Binsearch_maxsteps) {
2221 128735         108476 my $middle=int(($min+$max+0.5)/2);
2222 128735         78797 my $middle_value=$$aref[$middle];
2223              
2224             #croak "binsearch got non-sorted array" if !$cmpsub and $$aref[$min]>$$aref[$min]
2225             # or $cmpsub and &$cmpsub($$aref[$min],$$aref[$min])>0;
2226              
2227 128735 100 100     453784 if( !$cmpsub and $search < $middle_value
    100 100        
      66        
      100        
      100        
      66        
2228             or $cmpsub and &$cmpsub($search,$middle_value) < 0 ) { #print "<\n";
2229 38944 50 66     51700 $max=$min, next if $middle == $max and $min != $max;
2230 38944 50       41473 return $insertpos ? $middle-0.5 : undef if $middle == $max;
    100          
2231 37494         44437 $max=$middle;
2232             }
2233             elsif( !$cmpsub and $search > $middle_value
2234             or $cmpsub and &$cmpsub($search,$middle_value) > 0 ) { #print ">\n";
2235 82752 100 100     131076 $min=$max, next if $middle == $min and $max != $min;
2236 75247 50       87387 return $insertpos ? $middle+0.5 : undef if $middle == $min;
    100          
2237 69139         85869 $min=$middle;
2238             }
2239             else { #print "=\n";
2240 7039         10191 return $middle;
2241             }
2242             }
2243 0         0 croak "binsearch exceded $Binsearch_maxsteps steps";
2244             }
2245              
2246             sub binsearchfast { # binary search routine finds index just below value
2247 0     0 0 0 my ($x,$v)=@_;
2248 0         0 my ($klo,$khi)=(0,$#{$x});
  0         0  
2249 0         0 my $k;
2250 0         0 while (($khi-$klo)>1) {
2251 0         0 $k=int(($khi+$klo)/2);
2252 0 0       0 if ($$x[$k]>$v) { $khi=$k; } else { $klo=$k; }
  0         0  
  0         0  
2253             }
2254 0         0 return $klo;
2255             }
2256              
2257              
2258 2     2 1 8 sub binsearchstr {binsearch(@_[0..2],sub{$_[0]cmp$_[1]})}
  1     1   8  
2259              
2260             =head2 rank
2261              
2262             B Two or three arguments. N and an arrayref for the list to look at.
2263              
2264             In scalar context: Returns the nth smallest number in an array. The array doesn't have to be sorted.
2265              
2266             In array context: Returns the n smallest numbers in an array.
2267              
2268             To return the n(th) largest number(s) instead of smallest, just negate n.
2269              
2270             An optional third argument can be a sub that is used to compare the elements of the input array.
2271              
2272             Examples:
2273              
2274             my $second_smallest = rank(2, [11,12,13,14]); # 12
2275             my @top10 = rank(-10, [1..100]); # 100, 99, 98, 97, 96, 95, 94, 93, 92, 91
2276             my $max = rank(-1, [101,102,103,102,101]); #103
2277             my @contest = ({name=>"Alice",score=>14},{name=>"Bob",score=>13},{name=>"Eve",score=>12});
2278             my $second = rank(2, \@contest, sub{$_[1]{score}<=>$_[0]{score}})->{name}; #Bob
2279              
2280             =head2 rankstr
2281              
2282             Just as C but sorts alphanumerically (strings, cmp) instead of numerically.
2283              
2284             =cut
2285              
2286             sub rank {
2287 35     35 1 334 my($rank,$aref,$cmpsub)=@_;
2288 35 100       64 if($rank<0){
2289 10   100 30037   44 $cmpsub||=sub{$_[0]<=>$_[1]};
  30037         83109  
2290 10     60180   32 return rank(-$rank,$aref,sub{0-&$cmpsub});
  60180         46990  
2291             }
2292 25         19 my @sort;
2293 25         27 local $Pushsort_cmpsub=$cmpsub;
2294 25         32 for(@$aref){
2295 8914         8457 pushsort @sort, $_;
2296 8914 100       13664 pop @sort if @sort>$rank;
2297             }
2298 25 100       1206 return wantarray ? @sort : $sort[$rank-1];
2299             }
2300 0 50   0 1 0 sub rankstr {wantarray?(rank(@_,sub{$_[0]cmp$_[1]})):rank(@_,sub{$_[0]cmp$_[1]})}
  60142     10   168873  
  10         54  
2301              
2302             =head2 eqarr
2303              
2304             B Two or more references to arrays.
2305              
2306             B True (1) or false (0) for whether or not the arrays are numerically I alphanumerically equal.
2307             Comparing each element in each array with both C< == > and C< eq >.
2308              
2309             Examples:
2310              
2311             eqarr([1,2,3],[1,2,3],[1,2,3]); # 1 (true)
2312             eqarr([1,2,3],[1,2,3],[1,2,4]); # 0 (false)
2313             eqarr([1,2,3],[1,2,3,4]); # undef (different size, false)
2314             eqarr([1,2,3]); # croak (should be two or more arrays)
2315             eqarr([1,2,3],1,2,3); # croak (not arraysrefs)
2316              
2317             =cut
2318              
2319             sub eqarr {
2320 27     27 1 1209 my @arefs=@_;
2321 27 100       229 croak if @arefs<2;
2322 26   66     174 ref($_) ne 'ARRAY' and croak for @arefs;
2323 25   100     55 @{$arefs[0]} != @{$arefs[$_]} and return undef for 1..$#arefs;
  27         25  
  27         69  
2324 24         24 my $ant;
2325            
2326 24         75 for my $ar (@arefs[1..$#arefs]){
2327 26         36 for(0..@$ar-1){
2328 272 50 33     630 ++$ant and $ant>100 and croak ">100"; #TODO: feiler ved sammenligning av to tabeller > 10000(?) tall
2329 272 100 66     771 return 0 if $arefs[0][$_] ne $$ar[$_]
2330             or $arefs[0][$_] != $$ar[$_];
2331             }
2332             }
2333 22         135 return 1;
2334             }
2335              
2336             =head2 sorted
2337              
2338             Return true if the input array is numerically sorted.
2339              
2340             @a=(1..10); print "array is sorted" if sorted @a; #true
2341              
2342             Optionally the last argument can be a comparison sub:
2343              
2344             @person=({Rank=>1,Name=>'Amy'}, {Rank=>2,Name=>'Paula'}, {Rank=>3,Name=>'Ruth'});
2345             print "Persons are sorted" if sorted @person, sub{$_[0]{Rank}<=>$_[1]{Rank}};
2346              
2347             =head2 sortedstr
2348              
2349             Return true if the input array is Inumerically sorted.
2350              
2351             @a=(1..10); print "array is sorted" if sortedstr @a; #false
2352             @a=("01".."10"); print "array is sorted" if sortedstr @a; #true
2353              
2354             =cut
2355              
2356             sub sorted (\@@) {
2357 4     4 1 11 my($a,$cmpsub)=@_;
2358 4         12 for(0..$#$a-1){
2359 1197 50 66     3729 return 0 if !$cmpsub and $$a[$_]>$$a[$_+1]
      66        
      33        
2360             or $cmpsub and &$cmpsub($$a[$_],$$a[$_+1])>0;
2361             }
2362 4         20 return 1;
2363             }
2364 99     99 1 458 sub sortedstr { sorted(@_,sub{$_[0]cmp$_[1]}) }
  2     2   13  
2365              
2366             =head2 part
2367              
2368             B A code-ref and a list
2369              
2370             B Two array-refs
2371              
2372             Like C but returns the false list as well. Partitions a list
2373             into two lists where each element goes into the first or second list
2374             whether the predicate (a code-ref) is true or false for that element.
2375              
2376             my( $odd, $even ) = part {$_%2} (1..8);
2377             print for @$odd; #prints 1 3 5 7
2378             print for @$even; #prints 2 4 6 8
2379              
2380             (Works like C< partition() > in the Scala programming language)
2381              
2382             =head2 parth
2383              
2384             Like C but returns any number of lists.
2385              
2386             B A code-ref and a list
2387              
2388             B A hash where the returned values from the code-ref are keys and the values are arrayrefs to the list elements which gave those keys.
2389              
2390             my %hash = parth { uc(substr($_,0,1)) } ('These','are','the','words','of','this','array');
2391             print serialize(\%hash);
2392              
2393             Result:
2394              
2395             %hash = ( T=>['These','the','this'],
2396             A=>['are','array'],
2397             O=>['of'],
2398             W=>['words'] )
2399              
2400             =head2 parta
2401              
2402             Like L but returns an array of lists.
2403              
2404             my @a = parta { length } qw/These are the words of this array/;
2405              
2406             Result:
2407              
2408             @a = ( undef, undef, ['of'], ['are','the'], ['this'], ['These','words','array'] )
2409              
2410             Two undefs at first (index positions 0 and 1) since there are no words of length 0 or 1 in the input array.
2411              
2412             =cut
2413              
2414 1 100   1 1 12 sub part (&@) { my($c,@r)=(shift,[],[]); push @{ $r[ &$c?0:1 ] }, $_ for @_; @r }
  1         2  
  8         19  
  1         5  
2415 1     1 1 162 sub parth (&@) { my($c,%r)=(shift); push @{ $r{ &$c } }, $_ for @_; %r }
  1         2  
  7         19  
  1         7  
2416 1     1 1 234 sub parta (&@) { my($c,@r)=(shift); push @{ $r[ &$c ] }, $_ for @_; @r }
  1         3  
  7         15  
  1         4  
2417             #sub mapn (&$@) { ... } like map but @_ contains n elems at a time, n=1 is map
2418              
2419             =head2 refa
2420              
2421             =head2 refh
2422              
2423             =head2 refs
2424              
2425             =head2 refaa
2426              
2427             =head2 refah
2428              
2429             =head2 refha
2430              
2431             =head2 refhh
2432              
2433             Returns true or false (1 or 0) if the argument is an arrayref, hashref, scalarref, ref to an array of arrays, ref to an array of hashes
2434              
2435             Examples:
2436              
2437             my $ref_to_array = [1,2,3];
2438             my $ref_to_hash = {1,100,2,200,3,300};
2439             my $ref_to_scalar = \"String";
2440             print "arrayref" if ref($ref_to_array) eq 'ARRAY'; #true
2441             print "hashref" if ref($ref_to_hash) eq 'HASH'; #true
2442             print "scalarref" if ref($ref_to_scalar) eq 'SCALAR'; #true
2443             print "arrayref" if refa($ref_to_array); #also true, without: eq 'ARRAY'
2444             print "hashref" if refh($ref_to_hash); #also true, without: eq 'HASH'
2445             print "scalarref" if refs($ref_to_scalar); #also true, without: eq 'SCALAR'
2446              
2447             my $ref_to_array_of_arrays = [ [1,2,3], [2,4,8], [10,100,1000] ];
2448             my $ref_to_array_of_hashes = [ {1=>10, 2=>100}, {first=>1, second=>2} ];
2449             my $ref_to_hash_of_arrays = { alice=>[1,2,3], bob=>[2,4,8], eve=>[10,100,1000] };
2450             my $ref_to_hash_of_hashes = { alice=>{a=>22,b=>11}, bob=>{a=>33,b=>66} };
2451              
2452             print "aa" if refaa($ref_to_array_of_arrays); #true
2453             print "ah" if refah($ref_to_array_of_hashes); #true
2454             print "ha" if refha($ref_to_hash_of_arrays); #true
2455             print "hh" if refhh($ref_to_hash_of_hashes); #true
2456              
2457             =cut
2458              
2459 3 0   3 1 24 sub refa { ref($_[0]) eq 'ARRAY' ? 1 : ref($_[0]) ? 0 : undef }
    50          
2460 3 0   3 1 13 sub refh { ref($_[0]) eq 'HASH' ? 1 : ref($_[0]) ? 0 : undef }
    50          
2461 1 0   1 1 6 sub refs { ref($_[0]) eq 'SCALAR' ? 1 : ref($_[0]) ? 0 : undef }
    50          
2462 1 0   1 1 6 sub refaa { ref($_[0]) eq 'ARRAY' ? refa($_[0][0]) : ref($_[0]) ? 0 : undef }
    50          
2463 1 0   1 1 5 sub refah { ref($_[0]) eq 'ARRAY' ? refh($_[0][0]) : ref($_[0]) ? 0 : undef }
    50          
2464 1 0   1 1 5 sub refha { ref($_[0]) eq 'HASH' ? refa((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
  1 50       4  
2465 1 0   1 1 6 sub refhh { ref($_[0]) eq 'HASH' ? refh((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
  1 50       4  
2466              
2467              
2468             =head2 pushr
2469              
2470             =head2 popr
2471              
2472             =head2 shiftr
2473              
2474             =head2 unshiftr
2475              
2476             =head2 splicer
2477              
2478             =head2 keysr
2479              
2480             =head2 valuesr
2481              
2482             =head2 eachr
2483              
2484             In Perl versions 5.12 - 5.22 push, pop, shift, unshift, splice, keys, values and each
2485             handled references to arrays and references to hashes just as if they where arrays and hashes. Examples:
2486              
2487             my $person={name=>'Gaga', array=>[1,2,3]};
2488             push $person{array} , 4; #works in perl 5.12-5.22 but not before and after
2489             push @{ $person{array} }, 4; #works in all perl5 versions
2490             pushr $person{array} , 4; #use Acme::Tools and this should work in perl >= 5.8
2491             popr $person{array}; #returns 4
2492              
2493             =cut
2494              
2495 3     3 1 258 sub pushr { push @{shift()}, @_ } # ? ($@)
  3         7  
2496 1     1 1 1 sub popr { pop @{shift()} }
  1         3  
2497 1     1 1 245 sub shiftr { shift @{shift()} }
  1         3  
2498 2     2 1 485 sub unshiftr { unshift @{shift()}, @_ }
  2         7  
2499 0         0 sub splicer { @_==1 ? splice( @{shift()} )
2500 1         4 :@_==2 ? splice( @{shift()}, shift() )
2501 2         7 :@_==3 ? splice( @{shift()}, shift(), shift() )
2502 4 50   4 1 1048 :@_>=4 ? splice( @{shift()}, shift(), shift(), @_ ) : die }
  1 100       6  
    100          
    50          
2503 1     1 1 246 sub keysr { keys( %{shift()} ) } #hm sort(keys%{shift()}) ?
  1         9  
2504 1     1 1 3 sub valuesr { values( %{shift()} ) }
  1         4  
2505 4 50   4 1 1265 sub eachr { ref($_[0]) eq 'HASH' ? each(%{shift()})
  4         13  
2506             #:ref($_[0]) eq 'ARRAY' ? each(@{shift()}) # perl 5.8.8 cannot compile each on array! eval?
2507             : croak("eachr needs hashref or arrayref got '".ref($_[0])."'") }
2508             #sub eachr { each(%{shift()}) }
2509              
2510             =head1 STATISTICS
2511              
2512             =head2 sum
2513              
2514             Returns the sum of a list of numbers. Undef is ignored.
2515              
2516             print sum(1,3,undef,8); # 12
2517             print sum(1..1000); # 500500
2518             print sum(undef); # undef
2519              
2520             =cut
2521              
2522 33   66 33 1 199 sub sum { my $sum; no warnings; defined($_) and $sum+=$_ for @_; $sum }
  33     60   37  
  33         2560  
  60         2852  
  60         10708  
  60         464  
2523              
2524             =head2 avg
2525              
2526             Returns the I number of a list of numbers. That is C
2527              
2528             print avg( 2, 4, 9); # 5 (2+4+9) / 3 = 5
2529             print avg( [2, 4, 9] ); # 5 pass by reference, same result but faster for large arrays
2530              
2531             Also known as I.
2532              
2533             Pass by reference: If one argument is given and it is a reference to an array,
2534             this array is taken as the list of numbers. This mode is about twice as fast
2535             for 10000 numbers or more. It most likely also saves memory.
2536              
2537             =cut
2538              
2539             sub avg {
2540 880     880 1 2181 my($sum,$n,@a)=(0,0);
2541 33     33   359 no warnings;
  33         46  
  33         11800  
2542 880 50       1088 if( @_==0 ) { return undef }
  0         0  
2543 880 100 100     1231 if( @_==1 and ref($_[0]) eq 'ARRAY' ){ @a=grep defined,@{$_[0]} }
  1         1  
  1         5  
2544 879         2279 else { @a=grep defined,@_ }
2545 880 100       1047 if( @a==0 ) { return undef }
  1         3  
2546 879         2966 $sum+=$_ for @a;
2547 879         2149 return $sum/@a
2548             }
2549              
2550             =head2 geomavg
2551              
2552             Returns the I (a.k.a I) of a list of numbers.
2553              
2554             print geomavg(10,100,1000,10000,100000); # 1000
2555             print 0+ (10*100*1000*10000*100000) ** (1/5); # 1000 same thing
2556             print exp(avg(map log($_),10,100,1000,10000,100000)); # 1000 same thing, this is how geomavg() works internally
2557              
2558             =cut
2559              
2560 771     771 1 1680 sub geomavg { exp(avg(map log($_), @_)) }
2561              
2562             =head2 harmonicavg
2563              
2564             Returns the I (a.k.a I) of a list of numbers. L
2565              
2566             print harmonicavg(10,11,12); # 3 / ( 1/10 + 1/11 + 1/12) = 10.939226519337
2567              
2568             =cut
2569              
2570 0     0 1 0 sub harmonicavg { my $s; $s+=1/$_ for @_; @_/$s }
  0         0  
  0         0  
2571              
2572             =head2 variance
2573              
2574             C<< variance = ( sum (x[i]-Average)**2)/(n-1) >>
2575              
2576             =cut
2577              
2578             sub variance {
2579 0     0 1 0 my $sumx2; $sumx2+=$_*$_ for @_;
  0         0  
2580 0         0 my $sumx; $sumx+=$_ for @_;
  0         0  
2581 0         0 (@_*$sumx2-$sumx*$sumx)/(@_*(@_-1));
2582             }
2583              
2584             =head2 stddev
2585              
2586             C<< Standard_Deviation = sqrt(variance) >>
2587              
2588             Standard deviation (stddev) is a measurement of the width of a normal
2589             distribution where one stddev on each side of the mean covers 68% and
2590             two stddevs 95%. Normal distributions are sometimes called Gauss curves
2591             or Bell shapes. L
2592              
2593             stddev(4,5,6,5,6,4,3,5,5,6,7,6,5,7,5,6,4) # = 1.0914103126635
2594             avg(@testscores) + stddev(@testscores) # = the score for IQ = 115 (by one definition)
2595             avg(@testscores) - stddev(@testscores) # = the score for IQ = 85
2596              
2597             =cut
2598              
2599             sub stddev {
2600 14 100   14 1 378 return undef if @_==0;
2601 13 100 66     48 return stddev(\@_) if @_>0 and !ref($_[0]);
2602 8         9 my $ar=shift;
2603 8 50       10 return undef if @$ar==0;
2604 8 100       12 return 0 if @$ar==1;
2605 6         6 my $sumx2; $sumx2 += $_*$_ for @$ar;
  6         52  
2606 6         5 my $sumx; $sumx += $_ for @$ar;
  6         41  
2607 6         22 sqrt( (@$ar*$sumx2-$sumx*$sumx)/(@$ar*(@$ar-1)) );
2608             }
2609              
2610             =head2 rstddev
2611              
2612             Relative stddev = stddev / avg
2613              
2614             =cut
2615              
2616 0     0 1 0 sub rstddev { stddev(@_) / avg(@_) }
2617              
2618             =head2 median
2619              
2620             Returns the median value of a list of numbers. The list do not have to
2621             be sorted.
2622              
2623             Example 1, list having an odd number of numbers:
2624              
2625             print median(1, 100, 101); # 100
2626              
2627             100 is the middlemost number after sorting.
2628              
2629             Example 2, an even number of numbers:
2630              
2631             print median(1005, 100, 101, 99); # 100.5
2632              
2633             100.5 is the average of the two middlemost numbers.
2634              
2635             =cut
2636              
2637             sub median {
2638 33     33   143 no warnings;
  33         50  
  33         11562  
2639 6     6 1 217 my @list = sort {$a<=>$b} @_;
  10035         5617  
2640 6         7 my $n=@list;
2641 6 100       164 $n%2 ? $list[($n-1)/2]
2642             : ($list[$n/2-1] + $list[$n/2])/2;
2643             }
2644              
2645              
2646             =head2 percentile
2647              
2648             Returns one or more percentiles of a list of numbers.
2649              
2650             Percentile 50 is the same as the I, percentile 25 is the first
2651             quartile, 75 is the third quartile.
2652              
2653             B
2654              
2655             First argument is your wanted percentile, or a refrence to a list of percentiles you want from the dataset.
2656              
2657             If the first argument to percentile() is a scalar, this percentile is returned.
2658              
2659             If the first argument is a reference to an array, then all those percentiles are returned as an array.
2660              
2661             Second, third, fourth and so on argument are the numbers from which you want to find the percentile(s).
2662              
2663             B
2664              
2665             This finds the 50-percentile (the median) to the four numbers 1, 2, 3 and 4:
2666              
2667             print "Median = " . percentile(50, 1,2,3,4); # 2.5
2668              
2669             This:
2670              
2671             @data=(11, 5, 3, 5, 7, 3, 1, 17, 4, 2, 6, 4, 12, 9, 0, 5);
2672             @p = map percentile($_,@data), (25, 50, 75);
2673              
2674             Is the same as this:
2675              
2676             @p = percentile([25, 50, 75], @data);
2677              
2678             But the latter is faster, especially if @data is large since it sorts
2679             the numbers only once internally.
2680              
2681             B
2682              
2683             Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992
2684              
2685             Average (or mean) is 143
2686              
2687             Median is 15.5 (which is the average of 9 and 22 who both equally lays in the middle)
2688              
2689             The 25-percentile is 6.25 which are between 6 and 7, but closer to 6.
2690              
2691             The 75-percentile is 46.5, which are between 39 and 49 but close to 49.
2692              
2693             Linear interpolation is used to find the 25- and 75-percentile and any
2694             other x-percentile which doesn't fall exactly on one of the numbers in
2695             the set.
2696              
2697             B
2698              
2699             As you saw, 6.25 are closer to 6 than to 7 because 25% along the set of
2700             the twelve numbers is closer to the third number (6) than to he fourth
2701             (7). The median (50-percentile) is also really interpolated, but it is
2702             always in the middle of the two center numbers if there are an even count
2703             of numbers.
2704              
2705             However, there is two methods of interpolation:
2706              
2707             Example, we have only three numbers: 5, 6 and 7.
2708              
2709             Method 1: The most common is to say that 5 and 7 lays on the 25- and
2710             75-percentile. This method is used in Acme::Tools.
2711              
2712             Method 2: In Oracle databases the least and greatest numbers
2713             always lay on the 0- and 100-percentile.
2714              
2715             As an argument on why Oracles (and others?) definition is not the best way is to
2716             look at your data as for instance temperature measurements. If you
2717             place the highest temperature on the 100-percentile you are sort of
2718             saying that there can never be a higher temperatures in future measurements.
2719              
2720             A quick non-exhaustive Google survey suggests that method 1 here is most used.
2721              
2722             The larger the data sets, the less difference there is between the two methods.
2723              
2724             B
2725              
2726             In method one, when you want a percentile outside of any possible
2727             interpolation, you use the smallest and second smallest to extrapolate
2728             from. For instance in the data set C<5, 6, 7>, if you want an
2729             x-percentile of x < 25, this is below 5.
2730              
2731             If you feel tempted to go below 0 or above 100, C will
2732             I (or I to be more precise)
2733              
2734             Another method could be to use "soft curves" instead of "straight
2735             lines" in interpolation. Maybe B-splines or Bezier curves. This is not
2736             used here.
2737              
2738             For large sets of data Hoares algorithm would be faster than the
2739             simple straightforward implementation used in C
2740             here. Hoares don't sort all the numbers fully.
2741              
2742             B
2743              
2744             Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992
2745              
2746             Percentile Method 1 Method 2
2747             (Acme::Tools::percentile (Oracle)
2748             and others)
2749             ------------- ----------------------------- ---------
2750             0 -2 1
2751             1 -1.61 1.33
2752             25 6.25 6.75
2753             50 (median) 15.5 15.5
2754             75 46.5 41.5
2755             99 1372.19 943.93
2756             100 1429 992
2757              
2758             Found like this:
2759              
2760             perl -MAcme::Tools -le 'print for percentile([0,1,25,50,75,99,100], 1,4,6,7,8,9,22,24,39,49,555,992)'
2761              
2762             And like this in Oracle-databases:
2763              
2764             select
2765             percentile_cont(0.00) within group(order by n) per0,
2766             percentile_cont(0.01) within group(order by n) per1,
2767             percentile_cont(0.25) within group(order by n) per25,
2768             percentile_cont(0.50) within group(order by n) per50,
2769             percentile_cont(0.75) within group(order by n) per75,
2770             percentile_cont(0.99) within group(order by n) per99,
2771             percentile_cont(1.00) within group(order by n) per100
2772             from (
2773             select 0+regexp_substr('1,4,6,7,8,9,22,24,39,49,555,992','[^,]+',1,i) n
2774             from dual,(select level i from dual connect by level <= 12)
2775             );
2776              
2777             (Oracle also provides a similar function: C where I
2778             is short for I, meaning no interpolation is taking
2779             place. Instead the closest number from the data set is picked.)
2780              
2781             =cut
2782              
2783             sub percentile {
2784 3     3 1 3 my(@p,@t,@ret);
2785 3 100       12 if(ref($_[0]) eq 'ARRAY'){ @p=@{shift()} }
  1 50       9  
  1         3  
2786 2         4 elsif(not ref($_[0])) { @p=(shift()) }
2787 0         0 else{croak()}
2788 3         5 @t=@_;
2789 3 50       6 return if !@p;
2790 3 50       6 croak if !@t;
2791 3         8 @t=sort{$a<=>$b}@t;
  72         46  
2792 3 50       8 push@t,$t[0] if @t==1;
2793 3         5 for(@p){
2794 9 50 33     34 croak if $_<0 or $_>100;
2795 9         13 my $i=(@t+1)*$_/100-1;
2796 9 50       49 push@ret,
    100          
    100          
2797             $i<0 ? $t[0]+($t[1]-$t[0])*$i:
2798             $i>$#t ? $t[-1]+($t[-1]-$t[-2])*($i-$#t):
2799             $i==int($i)? $t[$i]:
2800             $t[$i]*(int($i+1)-$i) + $t[$i+1]*($i-int($i));
2801             }
2802 3 100       42 return @p==1 ? $ret[0] : @ret;
2803             }
2804              
2805             =head1 RANDOM
2806              
2807             =head2 random
2808              
2809             B One or two arguments.
2810              
2811             B
2812              
2813             If two integer arguments: returns a random integer between the integers in argument one and two.
2814              
2815             If the first argument is an arrayref: returns a random member of that array without changing the array.
2816              
2817             If the first argument is an arrayref and there is a second arg: return that many random members of that array
2818              
2819             If the first argument is an hashref and there is no second arg: return a random key weighted by the values of that hash
2820              
2821             If the first argument is an hashref and there is a second arg: return that many random keys weighted by the values of that hash
2822              
2823             If there is no second argument and the first is an integer, a random integer between 0 and that number is returned. Including 0 and the number itself.
2824              
2825             B
2826              
2827             $dice=random(1,6); # 1, 2, 3, 4, 5 or 6
2828             $dice=random([1..6]); # same as previous
2829             @dice=random([1..6],10); # 10 dice tosses
2830             $dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2}); # weighted dice with 6 being twice as likely as the others
2831             @dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2},10); # 10 weighted dice tosses
2832             print random({head=>0.4999,tail=>0.4999,edge=>0.0002}); # coin toss (sum 1 here but not required to be)
2833             print random(2); # prints 0, 1 or 2
2834             print 2**random(7); # prints 1, 2, 4, 8, 16, 32, 64 or 128
2835             @dice=map random([1..6]), 1..10; # as third example above, but much slower
2836             perl -MAcme::Tools -le 'print for random({head=>0.499,tail=>0.499,edge=>0.002},10000);' | sort | uniq -c
2837              
2838             =cut
2839              
2840             sub random {
2841 21002     21002 1 29129 my($from,$to)=@_;
2842 21002         14989 my $ref=ref($from);
2843 21002 100       21822 if($ref eq 'ARRAY'){
    100          
2844 15001   100     41210 my @r=map $$from[rand@$from], 1..$to||1;
2845 15001 100       38073 return @_>1?@r:$r[0];
2846             }
2847             elsif($ref eq 'HASH') {
2848 4001         7162 my @k=keys%$from;
2849 33   66 33   142 my $max;do{no warnings 'uninitialized';$_>$max and $max=$_ or $_<0 and croak"negative weight" for values%$from};
  33   33     39  
  33   66     25052  
  4001         2952  
  4001         2168  
  4001         34369  
2850 4001   100     8624 my @r=map {my$r;1 while $$from{$r=$k[rand@k]}
  4010         2219  
  4010         9691  
  4010         5493  
2851 4001 100       9941 return @_>1?@r:$r[0];
2852             }
2853 2000 100       2323 ($from,$to)=(0,$from) if @_==1;
2854 2000 50       2066 ($from,$to)=($to,$from) if $from>$to;
2855 2000         2860 return int($from+rand(1+$to-$from));
2856             }
2857             #todo?: https://en.wikipedia.org/wiki/Irwin%E2%80%93Hall_distribution
2858              
2859             =head2 random_gauss
2860              
2861             Returns an pseudo-random number with a Gaussian distribution instead
2862             of the uniform distribution of perls C or C in this
2863             module. The algorithm is a variation of the one at
2864             L which is both faster
2865             and better than adding a long series of C.
2866              
2867             Uses perls C function internally.
2868              
2869             B 0 - 3 arguments.
2870              
2871             First argument: the average of the distribution. Default 0.
2872              
2873             Second argument: the standard deviation of the distribution. Default 1.
2874              
2875             Third argument: If a third argument is present, C
2876             returns an array of that many pseudo-random numbers. If there is no
2877             third argument, a number (a scalar) is returned.
2878              
2879             B One or more pseudo-random numbers with a Gaussian distribution. Also known as a Bell curve or Normal distribution.
2880              
2881             Example:
2882              
2883             my @I=random_gauss(100, 15, 100000); # produces 100000 pseudo-random numbers, average=100, stddev=15
2884             #my @I=map random_gauss(100, 15), 1..100000; # same but more than three times slower
2885             print "Average is: ".avg(@I)."\n"; # prints a number close to 100
2886             print "Stddev is: ".stddev(@I)."\n"; # prints a number close to 15
2887              
2888             my @M=grep $_>100+15*2, @I; # those above 130
2889             print "Percent above two stddevs: ".(100*@M/@I)."%\n"; #prints a number close to 2.2%
2890              
2891             Example 2:
2892              
2893             my $num=1e6;
2894             my @h; $h[$_/2]++ for random_gauss(100,15, $num);
2895             $h[$_] and printf "%3d - %3d %6d %s\n",
2896             $_*2,$_*2+1,$h[$_],'=' x ($h[$_]*1000/$num)
2897             for 1..200/2;
2898              
2899             ...prints an example of the famous Bell curve:
2900              
2901             44 - 45 70
2902             46 - 47 114
2903             48 - 49 168
2904             50 - 51 250
2905             52 - 53 395
2906             54 - 55 588
2907             56 - 57 871
2908             58 - 59 1238 =
2909             60 - 61 1807 =
2910             62 - 63 2553 ==
2911             64 - 65 3528 ===
2912             66 - 67 4797 ====
2913             68 - 69 6490 ======
2914             70 - 71 8202 ========
2915             72 - 73 10577 ==========
2916             74 - 75 13319 =============
2917             76 - 77 16283 ================
2918             78 - 79 20076 ====================
2919             80 - 81 23742 =======================
2920             82 - 83 27726 ===========================
2921             84 - 85 32205 ================================
2922             86 - 87 36577 ====================================
2923             88 - 89 40684 ========================================
2924             90 - 91 44515 ============================================
2925             92 - 93 47575 ===============================================
2926             94 - 95 50098 ==================================================
2927             96 - 97 52062 ====================================================
2928             98 - 99 53338 =====================================================
2929             100 - 101 52834 ====================================================
2930             102 - 103 52185 ====================================================
2931             104 - 105 50472 ==================================================
2932             106 - 107 47551 ===============================================
2933             108 - 109 44471 ============================================
2934             110 - 111 40704 ========================================
2935             112 - 113 36642 ====================================
2936             114 - 115 32171 ================================
2937             116 - 117 28166 ============================
2938             118 - 119 23618 =======================
2939             120 - 121 19873 ===================
2940             122 - 123 16360 ================
2941             124 - 125 13452 =============
2942             126 - 127 10575 ==========
2943             128 - 129 8283 ========
2944             130 - 131 6224 ======
2945             132 - 133 4661 ====
2946             134 - 135 3527 ===
2947             136 - 137 2516 ==
2948             138 - 139 1833 =
2949             140 - 141 1327 =
2950             142 - 143 860
2951             144 - 145 604
2952             146 - 147 428
2953             148 - 149 275
2954             150 - 151 184
2955             152 - 153 111
2956             154 - 155 67
2957              
2958             =cut
2959              
2960             sub random_gauss {
2961 1     1 1 2 my($avg,$stddev,$num)=@_;
2962 1 50       3 $avg=0 if !defined $avg;
2963 1 50       3 $stddev=1 if !defined $stddev;
2964 1 50       3 $num=1 if !defined $num;
2965 1 50       4 croak "random_gauss should not have more than 3 arguments" if @_>3;
2966 1         2 my @r;
2967 1         4 while (@r<$num) {
2968 2500         1384 my($x1,$x2,$w);
2969 2500         1359 do {
2970 3196         2058 $x1=2.0*rand()-1.0;
2971 3196         1739 $x2=2.0*rand()-1.0;
2972 3196         3830 $w=$x1*$x1+$x2*$x2;
2973             } while $w>=1.0;
2974 2500         1868 $w=sqrt(-2.0*log($w)/$w) * $stddev;
2975 2500         3726 push @r, $x1*$w + $avg,
2976             $x2*$w + $avg;
2977             }
2978 1 50       4 pop @r if @r > $num;
2979 1 50       6 return $r[0] if @_<3;
2980 1         503 return @r;
2981             }
2982              
2983             =head2 mix
2984              
2985             Mixes an array in random order. In-place if given an array reference or not if given an array.
2986              
2987             C could also have been named C, as in shuffling a deck of cards.
2988              
2989             Example:
2990              
2991             This:
2992              
2993             print mix("a".."z"),"\n" for 1..3;
2994              
2995             ...could write something like:
2996              
2997             trgoykzfqsduphlbcmxejivnwa
2998             qycatilmpgxbhrdezfwsovujkn
2999             ytogrjialbewcpvndhkxfzqsmu
3000              
3001             B
3002              
3003             =over 4
3004              
3005             =item 1.
3006             Either a reference to an array as the only input. This array will then be mixed I. The array will be changed:
3007              
3008             This: C<< @a=mix(@a) >> is the same as: C<< mix(\@a) >>.
3009              
3010             =item 2.
3011             Or an array of zero, one or more elements.
3012              
3013             =back
3014              
3015             Note that an input-array which COINCIDENTLY SOME TIMES has one element
3016             (but more other times), and that element is an array-ref, you will
3017             probably not get the expected result.
3018              
3019             To check distribution:
3020              
3021             perl -MAcme::Tools -le 'print mix("a".."z") for 1..26000'|cut -c1|sort|uniq -c|sort -n
3022              
3023             The letters a-z should occur around 1000 times each.
3024              
3025             Shuffles a deck of cards: (s=spaces, h=hearts, c=clubs, d=diamonds)
3026              
3027             perl -MAcme::Tools -le '@cards=map join("",@$_),cart([qw/s h c d/],[2..10,qw/J Q K A/]); print join " ",mix(@cards)'
3028              
3029             (Uses L, which is not a typo, see further down here)
3030              
3031             Note: C is approximately four times faster. Both respects the Perl built-in C.
3032              
3033             =cut
3034              
3035             sub mix {
3036 1020 50 33 1020 1 2860 if(@_==1 and ref($_[0]) eq 'ARRAY'){ #kun ett arg, og det er ref array
3037 0         0 my $r=$_[0];
3038 0         0 push@$r,splice(@$r,rand(@$r-$_),1) for 0..(@$r-1);
3039 0         0 return $r;
3040             }
3041             else{
3042 1020         1085 my@e=@_;
3043 1020         7763 push@e,splice(@e,rand(@e-$_),1) for 0..$#e;
3044 1020         2717 return @e;
3045             }
3046             }
3047              
3048             =head2 pwgen
3049              
3050             Generates random passwords.
3051              
3052             B 0-n args
3053              
3054             * First arg: length of password(s), default 8
3055              
3056             * Second arg: number of passwords, default 1
3057              
3058             * Third arg: string containing legal chars in password, default A-Za-z0-9,-./&%_!
3059              
3060             * Fourth to n'th arg: list of requirements for passwords, default if the third arg is false/undef (so default third arg is used) is:
3061              
3062             sub{/^[a-zA-Z0-9].*[a-zA-Z0-9]$/ and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
3063              
3064             ...meaning the password should:
3065             * start and end with: a letter a-z (lower- or uppercase) or a digit 0-9
3066             * should contain at least one char from each of the groups lower, upper, digit and special char
3067              
3068             To keep the default requirement-sub but add additional ones just set the fourth arg to false/undef
3069             and add your own requirements in the fifth arg and forward (examples below). Sub pwgen uses perls
3070             own C internally.
3071              
3072             C<< $Acme::Tools::Pwgen_max_sec >> and C<< $Acme::Tools::Pwgen_max_trials >> can be set to adjust for how long
3073             pwgen tries to find a password. Defaults for those are 0.01 and 10000.
3074             Whenever one of the two limits is reached, a first generates a croak.
3075              
3076             Examples:
3077              
3078             my $pw=pwgen(); # a random 8 chars password A-Z a-z 0-9 ,-./&%!_ (8 is default length)
3079             my $pw=pwgen(12); # a random 12 chars password A-Z a-z 0-9 ,-./&%!_
3080             my @pw=pwgen(0,10); # 10 random 8 chars passwords, containing the same possible chars
3081             my @pw=pwgen(0,1000,'A-Z'); # 1000 random 8 chars passwords containing just uppercase letters from A to Z
3082              
3083             pwgen(3); # dies, defaults require chars in each of 4 group (see above)
3084             pwgen(5,1,'A-C0-9', qr/^\D{3}\d{2}$/); # a 5 char string starting with three A, B or Cs and endring with two digits
3085             pwgen(5,1,'ABC0-9',sub{/^\D{3}\d{2}$/}); # same as above
3086              
3087             Examples of adding additional requirements to the default ones:
3088              
3089             my @pwreq = ( qr/^[A-C]/ );
3090             pwgen(8,1,'','',@pwreq); # use defaults for allowed chars and the standard requirements
3091             # but also demand that the password must start with A, B or C
3092              
3093             push @pwreq, sub{ not /[a-z]{3}/i };
3094             pwgen(8,1,'','',@pwreq); # as above and in addition the password should not contain three
3095             # or more consecutive letters (to avoid "offensive" words perhaps)
3096              
3097             =cut
3098              
3099             our $Pwgen_max_sec=0.01; #max seconds/password before croak (for hard to find requirements)
3100             our $Pwgen_max_trials=10000; #max trials/password before croak (for hard to find requirements)
3101             our $Pwgen_sec=0; #seconds used in last call to pwgen()
3102             our $Pwgen_trials=0; #trials in last call to pwgen()
3103 3218 100 100 3218 0 23102 sub pwgendefreq{/^[a-z].*[a-z\d]$/i and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
      100        
      100        
3104             sub pwgen {
3105 60     60 1 3181 my($len,$num,$chars,@req)=@_;
3106 60   100     95 $len||=8;
3107 60   100     97 $num||=1;
3108 60   100     89 $chars||='A-Za-z0-9,-./&%_!';
3109 60 100 50     95 $req[0]||=\&pwgendefreq if !$_[2];
3110 60         1202 $chars=~s/([$_])-([$_])/join("","$1".."$2")/eg for ('a-z','A-Z','0-9');
  125         1412  
3111 60         87 my($c,$t,@pw,$d)=(length($chars),time_fp());
3112 60         55 ($Pwgen_trials,$Pwgen_sec)=(0,0);
3113             TRIAL:
3114 60         89 while(@pw<$num){
3115 12286 100 66     33180 croak "pwgen timeout after $Pwgen_trials trials"
      66        
3116             if ++$Pwgen_trials >= $Pwgen_max_trials
3117             or ($d=time_fp()-$t) > $Pwgen_max_sec*$num
3118             and $d!~/^\d+$/; #jic int from time_fp
3119 12284         49598 my $pw=join"",map substr($chars,rand($c),1),1..$len;
3120 12284         15611 for my $r (@req){
3121 18216 100       25160 if (ref($r) eq 'CODE' ){ local$_=$pw; &$r() or next TRIAL }
  7178 100       5326  
  7178 50       7033  
3122 33 100   33   178 elsif(ref($r) eq 'Regexp'){ no warnings; $pw=~$r or next TRIAL }
  33         48  
  33         5974  
  11038         33505  
3123 0         0 else { croak "pwgen: invalid req type $r ".ref($r) }
3124             }
3125 651         1506 push@pw,$pw;
3126             }
3127 58         63 $Pwgen_sec=time_fp()-$t;
3128 58 100       284 return $pw[0] if $num==1;
3129 7         278 return @pw;
3130             }
3131              
3132             # =head1 veci
3133             #
3134             # Perls C takes 1, 2, 4, 8, 16, 32 and possibly 64 as its third argument.
3135             #
3136             # This limitation is removed with C (vec improved, but much slower)
3137             #
3138             # The third argument still needs to be 32 or lower (or possibly 64 or lower).
3139             #
3140             # =cut
3141             #
3142             # sub vecibs ($) {
3143             # my($s,$o,$b,$new)=@_;
3144             # if($b=~/^(1|2|4|8|16|32|64)$/){
3145             # return vec($s,$o,$b)=$new if @_==4;
3146             # return vec($s,$o,$b);
3147             # }
3148             # my $bb=$b<4?4:$b<8?8:$b<16?16:$b<32?32:$b<64?64:die;
3149             # my $ob=int($o*$b/$bb);
3150             # my $v=vec($s,$ob,$bb)*2**$bb+vec($s,$ob+1,$bb);
3151             # $v & (2**$b-1)
3152             # }
3153              
3154              
3155             =head1 SETS
3156              
3157             =head2 distinct
3158              
3159             Returns the values of the input list, sorted alfanumerically, but only
3160             one of each value. This is the same as L except uniq does not
3161             sort the returned list.
3162              
3163             Example:
3164              
3165             print join(", ", distinct(4,9,3,4,"abc",3,"abc")); # 3, 4, 9, abc
3166             print join(", ", distinct(4,9,30,4,"abc",30,"abc")); # 30, 4, 9, abc note: alphanumeric sort
3167              
3168             =cut
3169              
3170 1     1 1 165 sub distinct { return sort keys %{{map {($_,1)} @_}} }
  1         3  
  7         18  
3171              
3172             =head2 in
3173              
3174             Returns I<1> (true) if first argument is in the list of the remaining arguments. Uses the perl-operator C<< eq >>.
3175              
3176             Otherwise it returns I<0> (false).
3177              
3178             print in( 5, 1,2,3,4,6); # 0
3179             print in( 4, 1,2,3,4,6); # 1
3180             print in( 'a', 'A','B','C','aa'); # 0
3181             print in( 'a', 'A','B','C','a'); # 1
3182              
3183             I guess in perl 5.10 or perl 6 you could use the C<< ~~ >> operator instead.
3184              
3185             =head2 in_num
3186              
3187             Just as sub L, but for numbers. Internally uses the perl operator C<< == >> instead of C< eq >.
3188              
3189             print in(5000, '5e3'); # 0
3190             print in(5000, 5e3); # 1 since 5e3 is converted to 5000 before the call
3191             print in_num(5000, 5e3); # 1
3192             print in_num(5000, '+5.0e03'); # 1
3193              
3194             =cut
3195              
3196             sub in {
3197 33     33   142 no warnings 'uninitialized';
  33         50  
  33         2294  
3198 95     95 1 101 my $val=shift;
3199 95 100       94 for(@_){ return 1 if $_ eq $val }
  294         534  
3200 6         19 return 0;
3201             }
3202              
3203             sub in_num {
3204 33     33   138 no warnings 'uninitialized';
  33         44  
  33         60742  
3205 1     1 1 1 my $val=shift;
3206 1 100       3 for(@_){ return 1 if $_ == $val }
  5000         5299  
3207 0         0 return 0;
3208             }
3209              
3210             =head2 union
3211              
3212             Input: Two arrayrefs. (Two lists, that is)
3213              
3214             Output: An array containing all elements from both input lists, but no element more than once even if it occurs twice or more in the input.
3215              
3216             Example, prints 1,2,3,4:
3217              
3218             perl -MAcme::Tools -le 'print join ",", union([1,2,3],[2,3,3,4,4])' # 1,2,3,4
3219              
3220             =cut
3221              
3222 1     1 1 1 sub union { my %seen; grep !$seen{$_}++, map @{shift()},@_ }
  1         3  
  2         12  
3223             =head2 minus
3224              
3225             Input: Two arrayrefs.
3226              
3227             Output: An array containing all elements in the first input array but not in the second.
3228              
3229             Example:
3230              
3231             perl -MAcme::Tools -le 'print join " ", minus( ["five", "FIVE", 1, 2, 3.0, 4], [4, 3, "FIVE"] )'
3232              
3233             Output is C<< five 1 2 >>.
3234              
3235             =cut
3236              
3237             sub minus {
3238 1     1 0 1 my %seen;
3239 1         2 my %notme=map{($_=>1)}@{$_[1]};
  3         6  
  1         2  
3240 1   66     1 grep !$notme{$_}&&!$seen{$_}++, @{$_[0]};
  1         25  
3241             }
3242              
3243             =head2 intersect
3244              
3245             Input: Two arrayrefs
3246              
3247             Output: An array containing all elements which exists in both input arrays.
3248              
3249             Example:
3250              
3251             perl -MAcme::Tools -le 'print join" ", intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )' # 4 3 five
3252              
3253             Output: C<< 4 3 five >>
3254              
3255             =cut
3256              
3257             sub intersect {
3258 44     44 1 31 my %first=map{($_=>1)}@{$_[0]};
  52         89  
  44         46  
3259 44         34 my %seen;
3260 44 100       36 return grep{$first{$_}&&!$seen{$_}++}@{$_[1]};
  50         193  
  44         37  
3261             }
3262              
3263             =head2 not_intersect
3264              
3265             Input: Two arrayrefs
3266              
3267             Output: An array containing all elements member of just one of the input arrays (not both).
3268              
3269             Example:
3270              
3271             perl -MAcme::Tools -le ' print join " ", not_intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )'
3272              
3273             The output is C<< 1 2 >>.
3274              
3275             =cut
3276              
3277             sub not_intersect {
3278 1     1 1 1 my %code;
3279             my %seen;
3280 1         1 for(@{$_[0]}){$code{$_}|=1}
  1         3  
  5         11  
3281 1         1 for(@{$_[1]}){$code{$_}|=2}
  1         3  
  3         3  
3282 1 100       2 return grep{$code{$_}!=3&&!$seen{$_}++}(@{$_[0]},@{$_[1]});
  8         22  
  1         2  
  1         2  
3283             }
3284              
3285             =head2 uniq
3286              
3287             Input: An array of strings (or numbers)
3288              
3289             Output: The same array in the same order, except elements which exists earlier in the list.
3290              
3291             Same as L but distinct sorts the returned list, I does not.
3292              
3293             Example:
3294              
3295             my @t=(7,2,3,3,4,2,1,4,5,3,"x","xx","x",02,"07");
3296             print join " ", uniq @t; # prints 7 2 3 4 1 5 x xx 07
3297              
3298             =cut
3299              
3300 1     1 1 2 sub uniq(@) { my %seen; grep !$seen{$_}++, @_ }
  1         17  
3301              
3302             =head1 HASHES
3303              
3304             =head2 subhash
3305              
3306             Copies a subset of keys/values from one hash to another.
3307              
3308             B First argument is a reference to a hash. The rest of the arguments are a list of the keys of which key/value-pair you want to be copied.
3309              
3310             B The hash consisting of the keys and values you specified.
3311              
3312             Example:
3313              
3314             %population = ( Norway=>5000000, Sweden=>9500000, Finland=>5400000,
3315             Denmark=>5600000, Iceland=>320000,
3316             India => 1.21e9, China=>1.35e9, USA=>313e6, UK=>62e6 );
3317              
3318             %scandinavia = subhash( \%population , 'Norway', 'Sweden', 'Denmark' ); # this and
3319             %scandinavia = (Norway=>5000000,Sweden=>9500000,Denmark=>5600000); # this is the same
3320              
3321             print "Population of $_ is $scandinavia{$_}\n" for keys %scandinavia;
3322              
3323             ...prints the populations of the three scandinavian countries.
3324              
3325             Note: The values are NOT deep copied when they are references. (Use C<< Storable::dclone() >> to do that).
3326              
3327             Note2: For perl version 5.20+ subhashes (hash slices returning keys as well as values) is built in like this:
3328              
3329             %scandinavia = %population{'Norway','Sweden','Denmark'};
3330              
3331             =cut
3332              
3333             sub subhash {
3334 1     1 1 1 my $hr=shift;
3335 1         2 my @r;
3336 1         2 for(@_){ push@r,($_=>$$hr{$_}) }
  3         9  
3337 1         9 return @r;
3338             }
3339              
3340             =head2 hashtrans
3341              
3342             B a reference to a hash of hashes
3343              
3344             B a hash like the input-hash, but matrix transposed (kind of). Think of it as if X and Y has swapped places.
3345              
3346             %h = ( 1 => {a=>33,b=>55},
3347             2 => {a=>11,b=>22},
3348             3 => {a=>88,b=>99} );
3349             print serialize({hashtrans(\%h)},'v');
3350              
3351             Gives:
3352              
3353             %v=( 'a'=>{'1'=>'33','2'=>'11','3'=>'88'},
3354             'b'=>{'1'=>'55','2'=>'22','3'=>'99'} );
3355              
3356             =cut
3357              
3358             #Hashtrans brukes automatisk når første argument er -1 i sub hashtabell()
3359              
3360             sub hashtrans {
3361 1     1 1 2 my $h=shift;
3362 1         1 my %new;
3363 1         4 for my $k (keys%$h){
3364 3         4 my $r=$$h{$k};
3365 3         4 for(keys%$r){
3366 6         8 $new{$_}{$k}=$$r{$_};
3367             }
3368             }
3369 1         11 return %new;
3370             }
3371              
3372             =head2 a2h
3373              
3374             B array of arrays
3375              
3376             B array of hashes
3377              
3378             Transforms an array of arrays (arrayrefs) to an array of hashes (hashrefs).
3379              
3380             Example:
3381              
3382             my @h = a2h( ['Name', 'Age', 'Gender'], #1st row become keys
3383             ['Alice', 20, 'F'],
3384             ['Bob', 30, 'M'],
3385             ['Eve', undef, 'F'] );
3386              
3387             Result array @h:
3388              
3389             (
3390             {Name=>'Alice', Age=>20, Gender=>'F'},
3391             {Name=>'Bob', Age=>30, Gender=>'M'},
3392             {Name=>'Eve', Age=>undef, Gender=>'F'},
3393             );
3394              
3395             =head2 h2a
3396              
3397             B array of hashes
3398              
3399             B array of arrays
3400              
3401             Opposite of L
3402              
3403             =cut
3404              
3405             sub a2h {
3406 1     1 1 76 my @col=@{shift@_};
  1         5  
3407 1         3 map { my%h;@h{@col}=@$_;\%h} @_;
  10         8  
  10         32  
  10         15  
3408             }
3409              
3410             sub h2a {
3411 1     1 1 5 my %c;
3412 1         18 map $c{$_}++, keys%$_ for @_;
3413 1 0       8 my @c=sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c;
  4         11  
3414 1         12 (\@c,map[@$_{@c}],@_);
3415             }
3416              
3417             =head1 COMPRESSION
3418              
3419             L, L, L, L, L, and L
3420             compresses and uncompresses strings to save space in disk, memory,
3421             database or network transfer. Trades time for space. (Beware of wormholes)
3422              
3423             =head2 zipb64
3424              
3425             Compresses the input (text or binary) and returns a base64-encoded string of the compressed binary data.
3426             No known limit on input length, several MB has been tested, as long as you've got the RAM...
3427              
3428             B One or two strings.
3429              
3430             First argument: The string to be compressed.
3431              
3432             Second argument is optional: A I string.
3433              
3434             B a base64-kodet string of the compressed input.
3435              
3436             The use of an optional I string will result in an even
3437             further compressed output in the dictionary string is somewhat similar
3438             to the string that is compressed (the data in the first argument).
3439              
3440             If x relatively similar string are to be compressed, i.e. x number
3441             automatic of email responses to some action by a user, it will pay of
3442             to choose one of those x as a dictionary string and store it as
3443             such. (You will also use the same dictionary string when decompressing
3444             using L.
3445              
3446             The returned string is base64 encoded. That is, the output is 33%
3447             larger than it has to be. The advantage is that this string more
3448             easily can be stored in a database (without the hassles of CLOB/BLOB)
3449             or perhaps easier transfer in http POST requests (it still needs some
3450             url-encoding, normally). See L and L for the
3451             same without base 64 encoding.
3452              
3453             Example 1, normal compression without dictionary:
3454              
3455             $txt = "Test av komprimering, hva skjer? " x 10; # ten copies of this norwegian string, $txt is now 330 bytes (or chars rather...)
3456             print length($txt)," bytes input!\n"; # prints 330
3457             $zip = zipb64($txt); # compresses
3458             print length($zip)," bytes output!\n"; # prints 65
3459             print $zip; # prints the base64 string ("noise")
3460              
3461             $output=unzipb64($zip); # decompresses
3462             print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
3463             print length($output),"\n"; # prints 330
3464              
3465             Example 2, same compression, now with dictionary:
3466              
3467             $txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
3468             $dict = "Testing av kompresjon, hva vil skje?"; # dictionary with certain similarities
3469             # of the text to be compressed
3470             $zip2 = zipb64($txt,$dict); # compressing with $dict as dictionary
3471             print length($zip2)," bytes output!\n"; # prints 49, which is less than 65 in ex. 1 above
3472             $output=unzipb64($zip2,$dict); # uses $dict in the decompressions too
3473             print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
3474              
3475              
3476             Example 3, dictionary = string to be compressed: (out of curiosity)
3477              
3478             $txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
3479             $zip3 = zipb64($txt,$txt); # hmm
3480             print length($zip3)," bytes output!\n"; # prints 25
3481             print "Hurra\n" if unzipb64($zip3,$txt) eq $txt; # hipp hipp ...
3482              
3483             zipb64() and zipbin() is really just wrappers around L and C & co there.
3484              
3485             =cut
3486              
3487             sub zipb64 {
3488 9     9 1 631 require MIME::Base64;
3489 9         593 return MIME::Base64::encode_base64(zipbin(@_));
3490             }
3491              
3492              
3493             =head2 zipbin
3494              
3495             C does the same as C except that zipbin()
3496             does not base64 encode the result. Returns binary data.
3497              
3498             See L for documentation.
3499              
3500             =cut
3501              
3502             sub zipbin {
3503 17     17 1 12426 require Compress::Zlib;
3504 17         23 my($data,$dict)=@_;
3505 17 50 100     99 my $x=Compress::Zlib::deflateInit(-Dictionary=>$dict||'',-Level=>Compress::Zlib::Z_BEST_COMPRESSION()) or croak();
3506 17 50       3561 my($output,$status)=$x->deflate($data); croak() if $status!=Compress::Zlib::Z_OK();
  17         48061  
3507 17 50       70 my($out,$status2)=$x->flush(); croak() if $status2!=Compress::Zlib::Z_OK();
  17         1723  
3508 17         415 return $output.$out;
3509             }
3510              
3511             =head2 unzipb64
3512              
3513             Opposite of L.
3514              
3515             Input:
3516              
3517             First argument: A string made by L
3518              
3519             Second argument: (optional) a dictionary string which where used in L.
3520              
3521             Output: The original string (be it text or binary).
3522              
3523             See L.
3524              
3525             =cut
3526              
3527             sub unzipb64 {
3528 2     2 1 3 my($data,$dict)=@_;
3529 2         10 require MIME::Base64;
3530 2         31 unzipbin(MIME::Base64::decode_base64($data),$dict);
3531             }
3532              
3533             =head2 unzipbin
3534              
3535             C does the same as L except that C
3536             wants a pure binary compressed string as input, not base64.
3537              
3538             See L for documentation.
3539              
3540             =cut
3541              
3542             sub unzipbin {
3543 5     5 1 215 require Compress::Zlib;
3544 5         12 require Carp;
3545 5         5 my($data,$dict)=@_;
3546 5 50 50     31 my $x=Compress::Zlib::inflateInit(-Dictionary=>$dict||'') or croak();
3547 5         420 my($output,$status)=$x->inflate($data);
3548 5 50       458 croak() if $status!=Compress::Zlib::Z_STREAM_END();
3549 5         61 return $output;
3550             }
3551              
3552             =head2 gzip
3553              
3554             B A string or reference to a string you want to compress. Text or binary.
3555              
3556             B The binary compressed representation of that input string.
3557              
3558             C is really just a wrapper for C< Compress:Zlib::memGzip() > and uses the same
3559             compression algorithm as the well known GNU program gzip found in most unix/linux/cygwin
3560             distros. Except C does this in-memory. (Both using the C-library C).
3561              
3562             writefile( "file.gz", gzip("some string") );
3563              
3564             =head2 gunzip
3565              
3566             B A binary compressed string or a reference to such a string. I.e. something returned from
3567             C earlier or read from a C<< .gz >> file.
3568              
3569             B The original larger non-compressed string. Text or binary.
3570              
3571             C is a wrapper for Compress::Zlib::memGunzip()
3572              
3573             print gunzip( gzip("some string") ); #some string
3574              
3575             =head2 bzip2
3576              
3577             Same as L and L except with a different compression algorithm (compresses more but is slower). Wrapper for Compress::Bzip2::memBzip.
3578              
3579             Compared to gzip/gunzip, bzip2 compression is much slower, bunzip2 decompression not so much.
3580              
3581             See also L, C, C, C.
3582              
3583             writefile( "file.bz2", bzip2("some string") );
3584             print bunzip2( bzip2("some string") ); #some string
3585              
3586             =head2 bunzip2
3587              
3588             Decompressed something compressed by bzip2() or data from a C<.bz2> file. See L.
3589              
3590             =cut
3591              
3592 8 100   8 1 12654 sub gzip { my $s=shift(); eval"require Compress::Zlib" if !$INC{'Compress/Zlib.pm'}; croak "Compress::Zlib not found" if $@; Compress::Zlib::memGzip( ref($s)?$s:\$s ) }
  8 50       204  
  8 50       85311  
  8         37  
3593 3 50   3 1 12456 sub gunzip { my $s=shift(); eval"require Compress::Zlib" if !$INC{'Compress/Zlib.pm'}; croak "Compress::Zlib not found" if $@; Compress::Zlib::memGunzip( ref($s)?$s:\$s ) }
  3 50       10  
  3 50       6  
  3         15  
3594 0 0   0 1 0 sub bzip2 { my $s=shift(); eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBzip( ref($s)?$s:\$s ) }
  0 0       0  
  0 0       0  
  0         0  
3595 0 0   0 1 0 sub bunzip2 { my $s=shift(); eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBunzip( ref($s)?$s:\$s ) }
  0 0       0  
  0 0       0  
  0         0  
3596              
3597             =head1 NET, WEB, CGI-STUFF
3598              
3599             =head2 ipaddr
3600              
3601             B an IP-number
3602              
3603             B either an IP-address I or an empty string
3604             if the DNS lookup didn't find anything.
3605              
3606             Example:
3607              
3608             perl -MAcme::Tools -le 'print ipaddr("129.240.8.200")' # prints www.uio.no
3609              
3610             Uses perls C internally.
3611              
3612             C memoizes the results internally (using the
3613             C<%Acme::Tools::IPADDR_memo> hash) so only the first loopup on a
3614             particular IP number might take some time.
3615              
3616             Some few DNS loopups can take several seconds.
3617             Most is done in a fraction of a second. Due to this slowness, medium to high traffic web servers should
3618             probably turn off hostname lookups in their logs and just log IP numbers by using
3619             C in Apache C and then use I afterwards if necessary.
3620              
3621             =cut
3622              
3623             our %IPADDR_memo;
3624             sub ipaddr {
3625 2     2 1 458 my $ipnr=shift;
3626             #NB, 2-tallet på neste kodelinje er ikke det samme på alle os,
3627             #men ser ut til å funke i linux og hpux. Den Riktige Måten(tm)
3628             #er konstanten AF_INET i Socket eller IO::Socket-pakken.
3629 2   66     207790 return $IPADDR_memo{$ipnr} ||= gethostbyaddr(pack("C4",split("\\.",$ipnr)),2);
3630             }
3631              
3632             =head2 ipnum
3633              
3634             C does the opposite of C
3635              
3636             Does an attempt of converting an IP address (hostname) to an IP number.
3637             Uses DNS name servers via perls internal C.
3638             Return empty string (undef) if unsuccessful.
3639              
3640             print ipnum("www.uio.no"); # prints 129.240.13.152
3641              
3642             Does internal memoization via the hash C<%Acme::Tools::IPNUM_memo>.
3643              
3644             =cut
3645              
3646             our %IPNUM_memo;
3647             sub ipnum {
3648 1     1 1 2 my $ipaddr=shift;
3649             #croak "No $ipaddr" if !length($ipaddr);
3650 1 50       3 return $IPNUM_memo{$ipaddr} if exists $IPNUM_memo{$ipaddr};
3651 1         43124 my $h=gethostbyname($ipaddr);
3652             #croak "No ipnum for $ipaddr" if !$h;
3653 1 50       11 return if !defined $h;
3654 1         13 my $ipnum = join(".",unpack("C4",$h));
3655 1 50       25 $IPNUM_memo{$ipaddr} = $ipnum=~/^(\d+\.){3}\d+$/ ? $ipnum : undef;
3656 1         7 return $IPNUM_memo{$ipaddr};
3657             }
3658              
3659             =head2 webparams
3660              
3661             B (optional)
3662              
3663             Zero or one input argument: A string of the same type often found behind the first question mark (C<< ? >>) in URLs.
3664              
3665             This string can have one or more parts separated by C<&> chars.
3666              
3667             Each part consists of C pairs (with the first C<=> char being the separation char).
3668              
3669             Both C and C can be url-encoded.
3670              
3671             If there is no input argument, C uses C<< $ENV{QUERY_STRING} >> instead.
3672              
3673             If also C<< $ENV{QUERY_STRING} >> is lacking, C checks if C<< $ENV{REQUEST_METHOD} eq 'POST' >>.
3674             In that case C<< $ENV{CONTENT_LENGTH} >> is taken as the number of bytes to be read from C
3675             and those bytes are used as the missing input argument.
3676              
3677             The environment variables QUERY_STRING, REQUEST_METHOD and CONTENT_LENGTH is
3678             typically set by a web server following the CGI standard (which Apache and
3679             most of them can do I guess) or in mod_perl by Apache. Although you are
3680             probably better off using L. Or C<< $R->args() >> or C<< $R->content() >> in mod_perl.
3681              
3682             B
3683              
3684             C returns a hash of the key/value pairs in the input argument. Url-decoded.
3685              
3686             If an input string has more than one occurrence of the same key, that keys value in the returned hash will become concatenated each value separated by a C<,> char. (A comma char)
3687              
3688             Examples:
3689              
3690             use Acme::Tools;
3691             my %R=webparams();
3692             print "Content-Type: text/plain\n\n"; # or rather \cM\cJ\cM\cJ instead of \n\n to be http-compliant
3693             print "My name is $R{name}";
3694              
3695             Storing those four lines in a file in the directory designated for CGI-scripts
3696             on your web server (or perhaps naming the file .cgi is enough), and C
3697             /.../cgi-bin/script> and the URL
3698             L will print
3699             C to the web page.
3700              
3701             L will print C.
3702              
3703             =cut
3704              
3705             sub webparams {
3706 2     2 1 401 my $query=shift();
3707 2 50       9 $query=$ENV{QUERY_STRING} if !defined $query;
3708 2 50 33     7 if(!defined $query and $ENV{REQUEST_METHOD} eq "POST"){
3709 0         0 read(STDIN,$query , $ENV{CONTENT_LENGTH});
3710 0         0 $ENV{QUERY_STRING}=$query;
3711             }
3712 2         3 my %R;
3713 2         11 for(split("&",$query)){
3714 7 50       14 next if !length($_);
3715 7         17 my($nkl,$verdi)=map urldec($_),split("=",$_,2);
3716 7 100       27 $R{$nkl}=exists$R{$nkl}?"$R{$nkl},$verdi":$verdi;
3717             }
3718 2         17 return %R;
3719             }
3720              
3721             =head2 urlenc
3722              
3723             Input: a string
3724              
3725             Output: the same string URL encoded so it can be sent in URLs or POST requests.
3726              
3727             In URLs (web addresses) certain characters are illegal. For instance I and I.
3728             And certain other chars have special meaning, such as C<+>, C<%>, C<=>, C, C<&>.
3729              
3730             These illegal and special chars needs to be encoded to be sent in
3731             URLs. This is done by sending them as C<%> and two hex-digits. All
3732             chars can be URL encodes this way, but it's necessary just on some.
3733              
3734             Example:
3735              
3736             $search="Østdal, Åge";
3737             my $url="http://machine.somewhere.com/search?q=" . urlenc($search);
3738             print $url;
3739              
3740             Prints C<< http://machine.somewhere.com/search?q=%D8stdal%2C%20%C5ge >>
3741              
3742             =cut
3743              
3744             sub urlenc {
3745 4     4 1 837 my $str=shift;
3746 4         13 $str=~s/([^\w\-\.\/\,\[\]])/sprintf("%%%02x",ord($1))/eg; #more chars is probably legal...
  631         812  
3747 4         18 return $str;
3748             }
3749              
3750             =head2 urldec
3751              
3752             Opposite of L.
3753              
3754             Example, this returns 'C< ø>'. That is space and C<< ø >>.
3755              
3756             urldec('+%C3')
3757              
3758             =cut
3759              
3760             sub urldec {
3761 14     14 1 11 my $str=shift;
3762 14         16 $str=~s/\+/ /gs;
3763 14         20 $str=~s/%([a-f\d]{2})/pack("C", hex($1))/egi;
  632         1350  
3764 14         30 return $str;
3765             }
3766              
3767             =head2 ht2t
3768              
3769             C is short for I.
3770              
3771             This sub extracts an html-C<< >>s and returns its C<< s >>
3772             and C<< s >> as an array of arrayrefs. And strips away any html
3773             inside the C<< s >> as well.
3774              
3775             my @table = ht2t($html,'some string occuring before the you want');
3776              
3777             Input: One or two arguments.
3778              
3779             First argument: the html where a C<< >> is to be found and converted.
3780              
3781             Second argument: (optional) If the html contains more than one C<<
3782             >>, and you do not want the first one, applying a second
3783             argument is a way of telling C which to capture: the one with this word
3784             or string occurring before it.
3785              
3786             Output: An array of arrayrefs.
3787              
3788             C is a quick and dirty way of scraping (or harvesting as it is
3789             also called) data from a web page. Look too L to do this
3790             more accurate.
3791              
3792             Example:
3793              
3794             use Acme::Tools;
3795             use LWP::Simple;
3796             my $url = "http://en.wikipedia.org/wiki/List_of_countries_by_population";
3797             for( ht2t( get($url), "Countries" ) ) {
3798             my($rank, $country, $pop) = @$_;
3799             $pop =~ s/,//g;
3800             printf "%3d | %-32s | %9d\n", @$_ if $pop>0;
3801             }
3802              
3803             Output:
3804              
3805             1 | China | 1367740000
3806             2 | India | 1262090000
3807             3 | United States | 319043000
3808             4 | Indonesia | 252164800
3809             5 | Brazil | 203404000
3810              
3811             ...and so on.
3812              
3813             =cut
3814              
3815             sub ht2t {
3816 9 100 100 9 1 220 my($f,$s,$r)=@_; 1>@_||@_>3 and croak; $s='' if @_==1;
  9 100       237  
  7         13  
3817 7         87 $f=~s,.*?($s).*?(
3818 33     33   20548 my $e=0;$e++ while index($f,$s=chr($e))>=$[;
  33         9956  
  33         96190  
  7         7  
  7         29  
3819 7         67 $f=~s//\l$1$s/gsi;
3820 7         35 $f=~s/\s*<.*?>\s*/ /gsi;
3821 7         48 my @t=split("r$s",$f);shift @t;
  7         5  
3822 20     20   17 $r||=sub{s/&(#160|nbsp);/ /g;s/&/&/g;s/^\s*(.*?)\s*$/$1/s;
  20         14  
  20         45  
3823 7 100 100     30 s/(\d) (\d)/$1$2/g if /^[\d \.\,]+$/};
  20         37  
3824 7         9 for(@t){my @r=split/[dh]$s/;shift@r;$_=[map{&$r;$_}@r]}
  9         38  
  9         5  
  9         10  
  20         22  
  20         37  
3825 7         27 @t;
3826             }
3827              
3828             =head1 FILES, DIRECTORIES
3829              
3830             =head2 writefile
3831              
3832             Justification:
3833              
3834             Perl needs three or four operations to make a file out of a string:
3835              
3836             open my $FILE, '>', $filename or die $!;
3837             print $FILE $text;
3838             close($FILE);
3839              
3840             This is way simpler:
3841              
3842             writefile($filename,$text);
3843              
3844             Sub writefile opens the file i binary mode (C) and has two usage modes:
3845              
3846             B Two arguments
3847              
3848             B is the filename. If the file exists, its overwritten.
3849             If the file can not be opened for writing, a die (a croak really) happens.
3850              
3851             B is one of:
3852              
3853             =over 4
3854              
3855             =item * Either a scaler. That is a normal string to be written to the file.
3856              
3857             =item * Or a reference to a scalar. That referred text is written to the file.
3858              
3859             =item * Or a reference to an array of scalars. This array is the written to the
3860             file element by element and C<< \n >> is automatically appended to each element.
3861              
3862             =back
3863              
3864             Alternativelly, you can write several files at once.
3865              
3866             Example, this:
3867              
3868             writefile('file1.txt','The text....tjo');
3869             writefile('file2.txt','The text....hip');
3870             writefile('file3.txt','The text....and hop');
3871              
3872             ...is the same as this:
3873              
3874             writefile([
3875             ['file1.txt','The text....tjo'],
3876             ['file2.txt','The text....hip'],
3877             ['file3.txt','The text....and hop'],
3878             ]);
3879              
3880             B Nothing (for the time being). Cs (C really) if something goes wrong.
3881              
3882             =cut
3883              
3884             #todo: use openstr() as in readfile(), transparently gzip .gz filenames and so on
3885             sub writefile {
3886 9     9 1 81967 my($filename,$text)=@_;
3887 9 50       73 if(ref($filename) eq 'ARRAY'){
3888 0         0 writefile(@$_) for @$filename;
3889 0         0 return;
3890             }
3891 9 50 33     1065 open(WRITEFILE,">",$filename) and binmode(WRITEFILE) or croak($!);
3892 9 50 33     88 if(!defined $text or !ref($text)){
    0          
    0          
3893 9         1300 print WRITEFILE $text;
3894             }
3895             elsif(ref($text) eq 'SCALAR'){
3896 0         0 print WRITEFILE $$text;
3897             }
3898             elsif(ref($text) eq 'ARRAY'){
3899 0         0 print WRITEFILE "$_\n" for @$text;
3900             }
3901             else {
3902 0         0 croak;
3903             }
3904 9         274 close(WRITEFILE);
3905 9         2731 return;
3906             }
3907              
3908             =head2 readfile
3909              
3910             Just as with L you can read in a whole file in one operation with C. Instead of:
3911              
3912             open my $FILE,'<', $filename or die $!;
3913             my $data = join"",<$FILE>;
3914             close($FILE);
3915              
3916             This is simpler:
3917              
3918             my $data = readfile($filename);
3919              
3920             B
3921              
3922             Reading the content of the file to a scalar variable: (Any content in C<$data> will be overwritten)
3923              
3924             my $data;
3925             readfile('filename.txt',\$data);
3926              
3927             Reading the lines of a file into an array:
3928              
3929             my @lines;
3930             readfile('filnavn.txt',\@lines);
3931             for(@lines){
3932             ...
3933             }
3934              
3935             Note: Chomp is done on each line. That is, any newlines (C<< \n >>) will be removed.
3936             If C<@lines> is non-empty, this will be lost.
3937              
3938             Sub readfile is context aware. If an array is expected it returns an array of the lines without a trailing C<< \n >>.
3939             The last example can be rewritten:
3940              
3941             for(readfile('filnavn.txt')){
3942             ...
3943             }
3944              
3945             With two input arguments, nothing (undef) is returned from C.
3946              
3947             =cut
3948              
3949             #http://blogs.perl.org/users/leon_timmermans/2013/05/why-you-dont-need-fileslurp.html
3950             #todo: readfile with grep-filter code ref in a third arg (avoid reading all into mem)
3951              
3952             sub readfile {
3953 12     12 1 8040 my($filename,$ref)=@_;
3954 12 100       28 if(@_==1){
3955 6 100       13 if(wantarray){ my @data; readfile($filename,\@data); return @data }
  1         2  
  1         4  
  1         3590  
3956 5         5 else { my $data; readfile($filename,\$data); return $data }
  5         20  
  5         418  
3957             }
3958             else {
3959 6 50       49 open my $fh,openstr($filename) or croak("ERROR: readfile $! $?");
3960 6 100       34 if ( ref($ref) eq 'SCALAR') { $$ref=join"",<$fh> }
  5 50       5934  
3961 1         15 elsif( ref($ref) eq 'ARRAY' ) { while(my $l=<$fh>){ chomp($l); push @$ref, $l } }
  20003         11907  
  20003         30113  
3962 0         0 else { croak "ERROR: Second arg to readfile should be a ref to a scalar og array" }
3963 6         888 close($fh);
3964 6         43 return;#?
3965             }
3966             }
3967              
3968             =head2 readdirectory
3969              
3970             B
3971              
3972             Name of a directory.
3973              
3974             B
3975              
3976             A list of all files in it, except of C<.> and C<..> (on linux/unix systems, all directories have a C<.> and C<..> directory).
3977              
3978             The names of all types of files are returned: normal files, directories, symbolic links,
3979             pipes, semaphores. That is every thing shown by C except C<.> and C<..>
3980              
3981             C do not recurce down into subdirectories (but see example below).
3982              
3983             B
3984              
3985             my @files = readdirectory("/tmp");
3986              
3987             B
3988              
3989             Sometimes calling the built ins C, C and C seems a tad tedious, since this:
3990              
3991             my $dir="/usr/bin";
3992             opendir(D,$dir);
3993             my @files=map "$dir/$_", grep {!/^\.\.?$/} readdir(D);
3994             closedir(D);
3995              
3996             Is the same as this:
3997              
3998             my @files=readdirectory("/usr/bin");
3999              
4000             See also: L
4001              
4002             B
4003              
4004             On huge directories with perhaps tens or houndreds of thousands of
4005             files, readdirectory() will consume more memory than perls
4006             opendir/readdir. This isn't usually a concern anymore for modern
4007             computers with gigabytes of RAM, but might be the rationale behind
4008             Perls more tedious way created in the 80s. The same argument goes for
4009             file slurping. On the other side it's also a good practice to never
4010             assume to much on available memory and the number of files if you
4011             don't know for certain that enough memory is available whereever your
4012             code is run or that the size of the directory is limited.
4013              
4014             B
4015              
4016             How to get all files in the C directory including all subdirectories below of any depth:
4017              
4018             my @files=("/tmp");
4019             map {-d $_ and unshift @files,$_ or push @files,$_} readdirectory(shift(@files)) while -d $files[0];
4020              
4021             ...or to avoid symlinks and only get real files:
4022              
4023             map {-d and !-l and unshift @files,$_ or -f and !-l and push @files,$_} readdirectory(shift(@files)) while -d $files[0];
4024              
4025             =cut
4026              
4027             sub readdirectory {
4028 0     0 1 0 my $dir=shift;
4029 0         0 opendir(my $D,$dir);
4030 0         0 my @filer=map "$dir/$_", grep {!/^\.\.?$/} readdir($D);
  0         0  
4031 0         0 closedir($D);
4032 0         0 return @filer;
4033             }
4034              
4035             =head2 basename
4036              
4037             The basename and dirname functions behaves like the *nix shell commands with the same names.
4038              
4039             B One or two arguments: Filename and an optional suffix
4040              
4041             B Returns the filename with any directory and (if given) the suffix removed.
4042              
4043             basename('/usr/bin/perl') # returns 'perl'
4044             basename('/usr/local/bin/report.pl','.pl') # returns 'report' since .pl at the end is removed
4045             basename('report2.pl','.pl') # returns 'report2'
4046             basename('report2.pl','.\w+') # returns 'report2.pl', probably not what you meant
4047             basename('report2.pl',qr/.\w+/) # returns 'report2', use qr for regex
4048              
4049             =head2 dirname
4050              
4051             B A filename including path
4052              
4053             B Removes the filename path and returns just the directory path up until but not including
4054             the last /. Return just a one char C<< . >> (period string) if there is no directory in the input.
4055              
4056             dirname('/usr/bin/perl') # returns '/usr/bin'
4057             dirname('perl') # returns '.'
4058              
4059             =head2 username
4060              
4061             Returns the current linux/unix username, for example the string root
4062              
4063             print username(); #just (getpwuid($<))[0] but more readable perhaps
4064              
4065             =cut
4066              
4067 8 100   8 1 1380 sub basename { my($f,$s)=(@_,'');$s=quotemeta($s)if!ref($s);$f=~m,^(.*/)?([^/]*?)($s)?$,;$2 }
  8         20  
  8         156  
  8         22  
4068 2 100 66 2 1 182 sub dirname { $_[0]=~m,^(.*)/,;defined($1) && length($1) ? $1 : '.' }
  2         20  
4069 0     0 1 0 sub username { (getpwuid($<))[0] }
4070              
4071             =head2 wipe
4072              
4073             Deletes a file by "wiping" it on the disk. Overwrites the file before deleting. (May not work properly on SSDs)
4074              
4075             B
4076             * Arg 1: A filename
4077             * Optional arg 2: number of times to overwrite file. Default is 3 if omitted, 0 or undef
4078             * Optional arg 3: keep (true/false), wipe() but no delete of file
4079              
4080             B Same as the C (remove file): 1 for success, 0 or false for failure.
4081              
4082             See also: L, L
4083              
4084             =cut
4085              
4086             sub wipe {
4087 2     2 1 2945 my($file,$times,$keep)=@_;
4088 2   100     9 $times||=3;
4089 2 50 33     47 croak "ERROR: File $file nonexisting\n" if not -f $file or not -e $file;
4090 2         13 my $size=-s$file;
4091 2 50       49 open my $WIFH, '+<', $file or croak "ERROR: Unable to open $file: $!\n";
4092 2         3 binmode($WIFH);
4093 2         6 for(1..$times){
4094 4         12 my $block=chr(int(rand(256))) x 1024;#hm
4095 4         8 for(0..($size/1024)){
4096 73         237 seek($WIFH,$_*1024,0);
4097 73         71 print $WIFH $block;
4098             }
4099             }
4100 2         16 close($WIFH);
4101 2 100       114 $keep || unlink($file);
4102             }
4103              
4104             =head2 chall
4105              
4106             Does chmod + utime + chown on one or more files.
4107              
4108             Returns the number of files of which those operations was successful.
4109              
4110             Mode, uid, gid, atime and mtime are set from the array ref in the first argument.
4111              
4112             The first argument references an array which is exactly like an array returned from perls internal C -function.
4113              
4114             Example:
4115              
4116             my @stat=stat($filenameA);
4117             chall( \@stat, $filenameB, $filenameC, ... ); # by stat-array
4118             chall( $filenameA, $filenameB, $filenameC, ... ); # by file name
4119              
4120             Copies the chmod, owner, group, access time and modify time from file A to file B and C.
4121              
4122             See C, C, C, C
4123              
4124             =cut
4125              
4126              
4127             sub chall {
4128             my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks )
4129 12 100   12 1 12777 = ref($_[0]) ? @{shift()} : stat(shift());
  1         4  
4130 12         52 my $successful=0;
4131 12 50 33     70 for(@_){ chmod($mode,$_) && utime($atime,$mtime,$_) && chown($uid,$gid,$_) && $successful++ }
  12   33     3654  
4132 12         76 return $successful;
4133             }
4134              
4135             =head2 makedir
4136              
4137             Input: One or two arguments.
4138              
4139             Works like perls C except that C will create nesessary parent directories if they dont exists.
4140              
4141             First input argument: A directory name (absolute, starting with C< / > or relative).
4142              
4143             Second input argument: (optional) permission bits. Using the normal C<< 0777^umask() >> as the default if no second input argument is provided.
4144              
4145             Example:
4146              
4147             makedir("dirB/dirC")
4148              
4149             ...will create directory C if it does not already exists, to be able to create C inside C.
4150              
4151             Returns true on success, otherwise false.
4152              
4153             C memoizes directories it has checked for existence before (trading memory and for speed).
4154             Thus directories removed during running the script is not discovered by makedir.
4155              
4156             See also C<< perldoc -f mkdir >>, C<< man umask >>
4157              
4158             =cut
4159              
4160             our %MAKEDIR;
4161              
4162             sub makedir {
4163 0     0 1 0 my($d,$p,$dd)=@_;
4164 0 0       0 $p=0777^umask() if !defined$p;
4165             (
4166             $MAKEDIR{$d} or -d$d or mkdir($d,$p) #or croak("mkdir $d, $p")
4167             or ($dd)=($d=~m,^(.+)/+([^/]+)$,) and makedir($dd,$p) and mkdir($d,$p) #or die;
4168 0 0 0     0 ) and ++$MAKEDIR{$d};
      0        
      0        
      0        
      0        
4169             }
4170              
4171             =head2 md5sum
4172              
4173             B a filename.
4174              
4175             B a string of 32 hexadecimal chars from 0-9 or a-f.
4176              
4177             Example, the md5sum gnu/linux command without options could be implementet like this:
4178              
4179             use Acme::Tools;
4180             print eval{ md5sum($_)." $_\n" } || $@ for @ARGV;
4181              
4182             This sub requires L, which is a core perl-module since
4183             version 5.?.? It does not slurp the files or spawn new processes.
4184              
4185             =cut
4186              
4187             sub md5sum {
4188 2     2 1 15 require Digest::MD5;
4189 2         3 my $fn=shift;
4190 2 100       168 croak "md5sum: $fn is a directory (no md5sum)" if -d $fn;
4191 1 50       21 open my $FH, '<', $fn or croak "Could not open file $fn for md5sum() $!";
4192 1         2 binmode($FH);
4193 1         1 my $r = eval { Digest::MD5->new->addfile($FH)->hexdigest };
  1         17  
4194 1 50       3 croak "md5sum on $fn failed ($@)\n" if $@;
4195 1         12 $r;
4196             }
4197              
4198             =head2 read_conf
4199              
4200             B A file name or a reference to a string with settings in the format described below.
4201              
4202             B A reference to a hash. This hash will have the settings from the file (or stringref).
4203             The hash do not have to be empty beforehand.
4204              
4205             Returns a hash with the settings as in this examples:
4206              
4207             my %conf = read_conf('/etc/your/thing.conf');
4208             print $conf{sectionA}{knobble}; #prints ABC if the file is as shown below
4209             print $conf{sectionA}{gobble}; #prints ZZZ, the last gobble
4210             print $conf{switch}; #prints OK here as well, unsectioned value
4211             print $conf{part2}{password}; #prints oh:no= x
4212              
4213             File use for the above example:
4214              
4215             switch: OK #before first section, the '' (empty) section
4216             [sectionA]
4217             knobble: ABC
4218             gobble: XYZ #this gobble is overwritten by the gobble on the next line
4219             gobble: ZZZ
4220             [part2]
4221             password: oh:no= x #should be better
4222             text: { values starting with { continues
4223             until reaching a line with }
4224              
4225             Everything from # and behind is regarded comments and ignored. Comments can be on any line.
4226             To keep a # char, put a \ in front of it.
4227              
4228             A C< : > or C< = > separates keys and values. Spaces at the beginning or end of lines are
4229             ignored (after removal of #comments), as are any spaces before and after : and = separators.
4230              
4231             Empty lines or lines with no C< : > or C< = > is also ignored. Keys and values can contain
4232             internal spaces and tabs, but not at the beginning or end.
4233              
4234             Multi-line values must start and end with { and }. Using { and } keep spaces at the start
4235             or end in both one-line and multi-line values.
4236              
4237             Sections are marked with C<< [sectionname] >>. Section names, keys and values is case
4238             sensitive. C above the first section or below and empty C<< [] >> is placed
4239             both in the empty section in the returned hash and as top level key/values.
4240              
4241             C can be a simpler alternative to the core module L which has
4242             its own hassles.
4243              
4244             $Acme::Tools::Read_conf_empty_section=1; #default 0 (was 1 in version 0.16)
4245             my %conf = read_conf('/etc/your/thing.conf');
4246             print $conf{''}{switch}; #prints OK with the file above
4247             print $conf{switch}; #prints OK here as well
4248              
4249             =cut
4250              
4251             our $Read_conf_empty_section=0;
4252             sub read_conf {
4253 3     3 1 11 my($fn,$hr)=(@_,{});
4254 3 100       11 my $conf=ref($fn)?$$fn:readfile($fn);
4255 3         61 $conf=~s,\s*(?
4256 3         26 my($section,@l)=('',split"\n",$conf);
4257 3         9 while(@l) {
4258 63         44 my $l=shift@l;
4259 63 100       199 if( $l=~/^\s*\[\s*(.*?)\s*\]/ ) {
    100          
4260 12         13 $section=$1;
4261 12   100     40 $$hr{$1}||={};
4262             }
4263             elsif( $l=~/^\s*([^\:\=]+)[:=]\s*(.*?)\s*$/ ) {
4264 33   66 33   63 my $ml=sub{my$v=shift;$v.="\n".shift@l while $v=~/^\{[^\}]*$/&&@l;$v=~s/^\{(.*)\}\s*$/$1/s;$v=~s,\\#,#,g;$v};
  33         38  
  33         106  
  33         39  
  33         30  
  33         33  
4265 33         35 my $v=&$ml($2);
4266 33 100 66     81 $$hr{$section}{$1}=$v if length($section) or $Read_conf_empty_section;
4267 33 100       103 $$hr{$1}=$v if !length($section);
4268             }
4269             }
4270 3         19 %$hr;
4271             }
4272             # my $incfn=sub{return $1 if $_[0]=~m,^(/.+),;my$f=$fn;$f=~s,[^/]+$,$_[0],;$f};
4273             # s,]+)>,"".readfile(&$incfn($1)),eg; #todo
4274              
4275              
4276             =head2 openstr
4277              
4278             # returned from openstr:
4279             open my $FH, openstr("fil.txt") or die; # fil.txt
4280             open my $FH, openstr("fil.gz") or die; # zcat fil.gz |
4281             open my $FH, openstr("fil.bz2") or die; # bzcat fil.bz2 |
4282             open my $FH, openstr("fil.xz") or die; # xzcat fil.xz |
4283             open my $FH, openstr(">fil.txt") or die; # > fil.txt
4284             open my $FH, openstr(">fil.gz") or die; # | gzip > fil.gz
4285             open my $FH, openstr(">fil.bz2") or die; # | bzip2 > fil.bz2
4286             open my $FH, openstr(">fil.xz") or die; # | xz > fil.bz2
4287              
4288             Environment variable PATH is used. So in the examples above, /bin/gzip
4289             is returned instead of gzip if /bin is the first directory in
4290             $ENV{PATH} containing an executable file gzip. Dirs /usr/bin, /bin and
4291             /usr/local/bin is added to PATH in openstr(). They are checked even if
4292             PATH is empty.
4293              
4294             =cut
4295              
4296             our @Openstrpath=(grep$_,split(":",$ENV{PATH}),qw(/usr/bin /bin /usr/local/bin));
4297             sub openstr {
4298 22     22 1 2900 my($fn,$ext)=(shift()=~/^(.*?(?:\.(t?gz|bz2|xz))?)$/i);
4299 22 100       263 return $fn if !$ext;
4300 12 50   12   37 my $prog=sub{@Openstrpath or return $_[0];(grep -x$_, map "$_/$_[0]", @Openstrpath)[0] or croak"$_[0] not found"};
  12 100       22  
  12         1441  
4301             $fn =~ /^\s*>/
4302             ? "| ".(&$prog({qw/gz gzip bz2 bzip2 xz xz tgz gzip/ }->{lc($ext)})).$fn
4303 12 100       65 : &$prog({qw/gz zcat bz2 bzcat xz xzcat tgz zcat/}->{lc($ext)})." $fn |";
4304             }
4305              
4306             =head1 TIME FUNCTIONS
4307              
4308             =head2 tms
4309              
4310             Timestring, works somewhat like the Gnu/Linux C command and Oracle's C
4311              
4312             Converts timestamps to more readable forms of time strings.
4313              
4314             Converts seconds since I and time strings on the form C to other forms.
4315              
4316             B One, two or three arguments.
4317              
4318             B A format string.
4319              
4320             B An epock C number or a time
4321             string of the form YYYYMMDD-HH24:MI:SS or YYYYMMDDTHH:MI:SS or
4322             YYYY-MM-DDTHH:MI:SS (in which T is litteral and HH is the 24-hour
4323             version of hours) or YYYYMMDD. Uses the current C if the
4324             second argument is missing.
4325              
4326             TODO: Formats with % as in C (C<%Y%m%d> and so on)
4327              
4328             B True or false. If true and first argument
4329             is eight digits: Its interpreted as a date like YYYYMMDD time string,
4330             not an epoch time. If true and first argument is six digits its
4331             interpreted as a date like DDMMYY (not YYMMDD!).
4332              
4333             B a date or clock string on the wanted form.
4334              
4335             B
4336              
4337             Prints C<< 3. july 1997 >> if thats the dato today:
4338              
4339             perl -MAcme::Tools -le 'print timestr("D. month YYYY")'
4340              
4341             print tms("HH24:MI"); # prints 23:55 if thats the time now
4342             tms("HH24:MI",time()); # ...same,since time() is the default
4343             tms("HH:MI",time()-5*60); # 23:50 if that was the time 5 minutes ago
4344             tms("HH:MI",time()-5*60*60); # 18:55 if thats the time 5 hours ago
4345             tms("Day Month Dth YYYY HH:MI"); # Saturday July 1st 2004 23:55 (big S, big J)
4346             tms("Day D. Month YYYY HH:MI"); # Saturday 8. July 2004 23:55 (big S, big J)
4347             tms("DAY D. MONTH YYYY HH:MI"); # SATURDAY 8. JULY 2004 23:55 (upper)
4348             tms("dy D. month YYYY HH:MI"); # sat 8. july 2004 23:55 (small s, small j)
4349             tms("Dy DD. MON YYYY HH12:MI am"); # Sat 08. JUL 2004 11:55 pm (HH12, am becomes pm if after 12)
4350             tms("DD-MON-YYYY"); # 03-MAY-2004 (mon, english)
4351              
4352             The following list of codes in the first argument will be replaced:
4353              
4354             YYYY Year, four digits
4355             YY Year, two digits, i.e. 04 instead of 2004
4356             yyyy Year, four digits, but nothing if its the current year
4357             YYYY|HH:MI Year if its another year than the current, a time in hours and minutes elsewise
4358             MM Month, two digits. I.e. 08 for August
4359             DD Day of month, two digits. I.e. 01 (not 1) for the first day in a month
4360             D Day of month, one digit. I.e. 1 (not 01)
4361             HH Hour. From 00 to 23.
4362             HH24 Same as HH.
4363             HH12 12 becomes 12 (never 00), 13 becomes 01, 14 02 and so on.
4364             Note: 00 after midnight becomes 12 (am). Tip: always include the code
4365             am in a format string that uses HH12.
4366             MI Minutt. Fra 00 til 59.
4367             SS Sekund. Fra 00 til 59.
4368             am Becomes am or pm
4369             pm Same
4370             AM Becomes AM or PM (upper case)
4371             PM Same
4372            
4373             Month The full name of the month in English from January to December
4374             MONTH Same in upper case (JANUARY)
4375             month Same in lower case (january)
4376             Mont Jan Feb Mars Apr May June July Aug Sep Oct Nov Dec
4377             Mont. Jan. Feb. Mars Apr. May June July Aug. Sep. Oct. Nov. Dec. (always four chars)
4378             Mon Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec (always three chars)
4379            
4380             Day The full name of the weekday. Sunday to Saturday
4381             Dy Three letters: Sun Mon Tue Wed Thu Fri Sat
4382             DAY Upper case
4383             DY Upper case
4384             Dth 1st 2nd 3rd 4th 5th ... 11th 12th ... 20th 21st 22nd 23rd 24th ... 30th 31st
4385            
4386             WW Week number of the year 01-53 according to the ISO8601-definition (which most countries uses)
4387             WWUS Week number of the year 01-53 according to the most used definition in the USA.
4388             Other definitions also exists.
4389              
4390             epoch Converts a time string from YYYYMMDD-HH24:MI:SS, YYYYMMDD-HH24:MI:SS, YYYYMMDDTHH:MI:SS,
4391             YYYY-MM-DDTHH:MI:SS or YYYYMMDD to the number of seconds since January 1st 1970.
4392             Commonly known as the Unix epoch.
4393            
4394             JDN Julian day number. Integer. The number of days since the day starting at noon on January 1 4713 BC
4395             JD Same as JDN but a float accounting for the time of day
4396            
4397             TODO: sub smt() (tms backward... or something better named, converts the other way)
4398             As to_date and to_char in Oracle. Se maybe L instead
4399              
4400             B (optional) Is_date. False|true, default false. If true, the second argument is
4401             interpreted as a date of the form YYYYMMDD, not as a number of seconds since epoch (January 1st 1970).
4402              
4403             =cut
4404              
4405             #Se også L og L
4406              
4407             our $Tms_pattern;
4408             our %Tms_str=
4409             ('MÅNED' => [4, 'JANUAR','FEBRUAR','MARS','APRIL','MAI','JUNI','JULI',
4410             'AUGUST','SEPTEMBER','OKTOBER','NOVEMBER','DESEMBER' ],
4411             'Måned' => [4, 'Januar','Februar','Mars','April','Mai','Juni','Juli',
4412             'August','September','Oktober','November','Desember'],
4413             'måned' => [4, 'januar','februar','mars','april','mai','juni','juli',
4414             'august','september','oktober','november','desember'],
4415             'MÅNE.' => [4, 'JAN.','FEB.','MARS','APR.','MAI','JUNI','JULI','AUG.','SEP.','OKT.','NOV.','DES.'],
4416             'Måne.' => [4, 'Jan.','Feb.','Mars','Apr.','Mai','Juni','Juli','Aug.','Sep.','Okt.','Nov.','Des.'],
4417             'måne.' => [4, 'jan.','feb.','mars','apr.','mai','juni','juli','aug.','sep.','okt.','nov.','des.'],
4418             'MÅNE' => [4, 'JAN','FEB','MARS','APR','MAI','JUNI','JULI','AUG','SEP','OKT','NOV','DES'],
4419             'Måne' => [4, 'Jan','Feb','Mars','Apr','Mai','Juni','Juli','Aug','Sep','Okt','Nov','Des'],
4420             'måne' => [4, 'jan','feb','mars','apr','mai','juni','juli','aug','sep','okt','nov','des'],
4421             'MÅN' => [4, 'JAN','FEB','MAR','APR','MAI','JUN','JUL','AUG','SEP','OKT','NOV','DES'],
4422             'Mån' => [4, 'Jan','Feb','Mar','Apr','Mai','Jun','Jul','Aug','Sep','Okt','Nov','Des'],
4423             'mån' => [4, 'jan','feb','mar','apr','mai','jun','jul','aug','sep','okt','nov','des'],
4424             'MONTH' => [4, 'JANUARY','FEBRUARY','MARCH','APRIL','MAY','JUNE','JULY',
4425             'AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER'],
4426             'Month' => [4, 'January','February','March','April','May','June','July',
4427             'August','September','October','November','December'],
4428             'month' => [4, 'january','february','march','april','may','june','july',
4429             'august','september','october','november','december'],
4430             'MONT.' => [4, 'JAN.','FEB.','MAR.','APR.','MAY','JUNE','JULY','AUG.','SEP.','OCT.','NOV.','DEC.'],
4431             'Mont.' => [4, 'Jan.','Feb.','Mar.','Apr.','May','June','July','Aug.','Sep.','Oct.','Nov.','Dec.'],
4432             'mont.' => [4, 'jan.','feb.','mar.','apr.','may','june','july','aug.','sep.','oct.','nov.','dec.'],
4433             'MONT' => [4, 'JAN','FEB','MAR','APR','MAY','JUNE','JULY','AUG','SEP','OCT','NOV','DEC'],
4434             'Mont' => [4, 'Jan','Feb','Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec'],
4435             'mont' => [4, 'jan','feb','mar','apr','may','june','july','aug','sep','oct','nov','dec'],
4436             'MON' => [4, 'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'],
4437             'Mon' => [4, 'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],
4438             'mon' => [4, 'jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'],
4439             'DAY' => [6, 'SUNDAY','MONDAY','TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY'],
4440             'Day' => [6, 'Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'],
4441             'day' => [6, 'sunday','monday','tuesday','wednesday','thursday','friday','saturday'],
4442             'DY' => [6, 'SUN','MON','TUE','WED','THU','FRI','SAT'],
4443             'Dy' => [6, 'Sun','Mon','Tue','Wed','Thu','Fri','Sat'],
4444             'dy' => [6, 'sun','mon','tue','wed','thu','fri','sat'],
4445             'DAG' => [6, 'SØNDAG','MANDAG','TIRSDAG','ONSDAG','TORSDAG','FREDAG','LØRDAG'],
4446             'Dag' => [6, 'Søndag','Mandag','Tirsdag','Onsdag','Torsdag','Fredag','Lørdag'],
4447             'dag' => [6, 'søndag','mandag','tirsdag','onsdag','torsdag','fredag','lørdag'],
4448             'DG' => [6, 'SØN','MAN','TIR','ONS','TOR','FRE','LØR'],
4449             'Dg' => [6, 'Søn','Man','Tir','Ons','Tor','Fre','Lør'],
4450             'dg' => [6, 'søn','man','tir','ons','tor','fre','lør'],
4451             );
4452             my $_tms_inited=0;
4453             sub tms_init {
4454 1 50   1 0 3 return if $_tms_inited++;
4455 1         2 for(qw(MAANED Maaned maaned MAAN Maan maan),'MAANE.','Maane.','maane.'){
4456 9         12 $Tms_str{$_}=$Tms_str{replace($_,"aa","å","AA","Å")};
4457             }
4458 45         40 $Tms_pattern=join("|",map{quotemeta($_)}
4459 1         13 sort{length($b)<=>length($a)}
  197         115  
4460             keys %Tms_str);
4461             #uten sort kan "måned" bli "mared", fordi "mån"=>"mar"
4462             }
4463              
4464       0 0   sub totime {
4465              
4466             }
4467              
4468             sub date_ok {
4469 0     0 0 0 my($y,$m,$d)=@_;
4470 0 0 0     0 return date_ok($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
4471 0 0       0 return 0 if $y!~/^\d\d\d\d$/;
4472 0 0 0     0 return 0 if $m<1||$m>12||$d<1||$d>(31,$y%4||$y%100==0&&$y%400?28:29,31,30,31,30,31,31,30,31,30,31)[$m-1];
    0 0        
      0        
      0        
4473 0         0 return 1;
4474             }
4475              
4476             sub weeknum {
4477 0 0   0 0 0 return weeknum(tms('YYYYMMDD')) if @_<1;
4478 0 0 0     0 return weeknum($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
4479 0         0 my($year,$month,$day)= @_;
4480 0         0 eval{
4481 0 0       0 if(@_<2){
    0          
4482 0 0       0 if($year=~/^\d{8}$/) { ($year,$month,$day)=unpack("A4A2A2",$year) }
  0 0       0  
4483 0         0 elsif($year>99999999){ ($year,$month,$day)=(localtime($year))[5,4,3]; $year+=1900; $month++ }
  0         0  
  0         0  
4484 0         0 else {die}
4485             }
4486 0         0 elsif(@_!=3){croak}
4487 0 0       0 croak if !date_ok(sprintf("%04d%02d%02d",$year,$month,$day));
4488             };
4489 0 0       0 croak "ERROR: Wrong args Acme::Tools::weeknum(".join(",",@_).")" if $@;
4490 33     33   14597 use integer;#heltallsdivisjon
  33         275  
  33         127  
4491 0         0 my $y=$year+4800-(14-$month)/12;
4492 0         0 my $j=$day+(153*($month+(14-$month)/12*12-3)+2)/5+365*$y+$y/4-$y/100+$y/400-32045;
4493 0         0 my $d=($j+31741-$j%7)%146097%36524%1461;
4494 0         0 return (($d-$d/1460)%365+$d/1460)/7+1;
4495             }
4496              
4497             #perl -MAcme::Tools -le 'print "$_ ".tms($_."0501","day",1) for 2015..2026'
4498              
4499             sub tms {
4500 42 50 66 42 1 5873 return undef if @_>1 and not defined $_[1]; #time=undef => undef
4501 42 100       57 if(@_==1){
4502 2         8 my @lt=localtime();
4503 2 50       5 $_[0] eq 'YYYY' and return 1900+$lt[5];
4504 2 50       4 $_[0] eq 'YYYYMMDD' and return sprintf("%04d%02d%02d",1900+$lt[5],1+$lt[4],$lt[3]);
4505 2 100 66     27 $_[0] =~ $Re_isnum and @lt=localtime($_[0]) and return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]);
4506             }
4507 41         42 my($format,$time,$is_date)=@_;
4508 41 100       80 $time=time_fp() if !defined$time;
4509 41 100 100     235 ($time,$format)=($format,$time) if @_>=2 and $format=~/^[\d+\:\-\.]+$/; #swap /hm/
4510 41         168 my @lt=localtime($time);
4511             #todo? $is_date=0 if $time=~s/^\@(\-?\d)/$1/; #@n where n is sec since epoch makes it clear that its not a formatted, as in `date`
4512             #todo? date --date='TZ="America/Los_Angeles" 09:00 next Fri' #`info date`
4513             # Fri Nov 13 18:00:00 CET 2015
4514             #date --date="next Friday" #--date or -d
4515             #date --date="last friday"
4516             #date --date="2 days ago"
4517             #date --date="yesterday" #or tomorrow
4518             #date --date="-1 day" #date --date='10 week'
4519              
4520 41 50       57 if( $is_date ){
4521 0 0   0   0 my $yy2c=sub{10+$_[0]>$lt[5]%100?"20":"19"}; #hm 10+
  0         0  
4522 0 0       0 $time=totime(&$yy2c($1)."$1$2$3")."000000" if $time=~/^(\d\d)(\d\d)(\d\d)$/;
4523 0 0       0 $time=totime("$1$2${3}000000") if $time=~/^((?:18|19|20)\d\d)(\d\d)(\d\d)$/; #hm 18-20?
4524             }
4525             else {
4526 41 50       86 $time = yyyymmddhh24miss_time("$1$2$3$4$5$6") #yyyymmddhh24miss_time ???
4527             if $time=~/^((?:19|20|18)\d\d) #yyyy
4528             (0[1-9]|1[012]) #mm
4529             (0[1-9]|[12]\d|3[01]) \-? #dd
4530             ([01]\d|2[0-3]) \:? #hh24
4531             ([0-5]\d) \:? #mi
4532             ([0-5]\d) $/x; #ss
4533             }
4534 41 100       57 tms_init() if !$_tms_inited;
4535 41 100       58 return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]) if !$format;
4536 40         339 my %p=('%'=>'%',
4537             a=>'Dy',
4538             A=>'Day',
4539             b=>'Mon',
4540             b=>'Month',
4541             c=>'Dy Mon D HH:MI:SS YYYY',
4542             C=>'CC',
4543             d=>'DD',
4544             D=>'MM/DD/YY',
4545             e=>'D',
4546             F=>'YYYY-MM-DD',
4547             #G=>'',
4548             h=>'Month', H=>'HH24', I=>'HH12',
4549             j=>'DoY', #day of year
4550             k=>'H24', _H=>'H24',
4551             l=>'H12', _I=>'H12',
4552             m=>'MM', M=>'MI',
4553             n=>"\n",
4554             #N=>'NS', #sprintf%09d,1e9*(time_fp()-time()) #000000000..999999999
4555             p=>'AM', #AM|PM upper (yes, opposite: date +%H%M%S%P%p)
4556             P=>'am', #am|pm lower
4557             S=>'SS',
4558             t=>"\t",
4559             T=>'HH24:MI:SS',
4560             u=>'DoW', #day of week 1..7, 1=mon 7=sun
4561             w=>'DoW0', #day of week 0..6, 1=mon 0=sun
4562             #U=>'WoYs', #week num of year 00..53, sunday as first day of week
4563             #V=>'UKE', #ISO week num of year 01..53, monday as first day of week
4564             #W=>'WoYm', #week num of year 00..53, monday as first day of week, not ISO!
4565             #x=>$ENV{locale's date representation}, #e.g. MM/DD/YY
4566             #X=>$ENV{locale's time representation}, #e.g. HH/MI/SS
4567             y=>'YY',
4568             Y=>'YYYY',
4569             #z=>'TZHHMI', #time zone hour minute e.g. -0430
4570             #':z'=>'TZHH:MI',
4571             #'::z'=>'TZHH:MI:SS',
4572             #':::z'=>'TZ', #number of :'s necessary precision, e.g. -02 or +03:30
4573             #Z=>'TZN', #e.g. CET, EDT, ...
4574             );
4575 40         178 my $pkeys=join"|",keys%p;
4576 40         1229 $format=~s,\%($pkeys),$p{$1},g;
4577 40         285 $format=~s/($Tms_pattern)/$Tms_str{$1}[1+$lt[$Tms_str{$1}[0]]]/g;
4578 40         49 $format=~s/YYYY / 1900+$lt[5] /gxe;
  13         27  
4579 40 0       31 $format=~s/(\s?)yyyy / $lt[5]==(localtime)[5]?"":$1.(1900+$lt[5])/gxe;
  0         0  
4580 40         40 $format=~s/YY / sprintf("%02d",$lt[5]%100) /gxei;
  1         5  
4581 40         42 $format=~s|CC | sprintf("%02d",(1900+$lt[5])/100) |gxei;
  2         9  
4582 40         35 $format=~s/MM / sprintf("%02d",$lt[4]+1) /gxe;
  5         16  
4583 40         21 $format=~s/mm / sprintf("%d",$lt[4]+1) /gxe;
  0         0  
4584 40         33 $format=~s,M/ , ($lt[4]+1).'/' ,gxe;
  2         4  
4585 40         32 $format=~s,/M , '/'.($lt[4]+1) ,gxe;
  1         2  
4586 40         27 $format=~s/DD / sprintf("%02d",$lt[3]) /gxe;
  6         15  
4587 40         60 $format=~s/d0w|dow0 / $lt[6] /gxei;
  4         9  
4588 40 100       33 $format=~s/dow / $lt[6]?$lt[6]:7 /gxei;
  2         5  
4589 40         45 $format=~s/d0y|doy0 / $lt[7] /gxei; #0-364 (365 leap)
  2         3  
4590 40         37 $format=~s/doy / $lt[7]+1 /gxei; #1-365 (366 leap)
  1         3  
4591 40         56 $format=~s/D(?![AaGgYyEeNn]) / $lt[3] /gxe; #EN pga desember og wednesday
  5         7  
4592 40         24 $format=~s/dd / sprintf("%d",$lt[3]) /gxe;
  0         0  
4593 40 100 50     53 $format=~s/hh12|HH12 / sprintf("%02d",$lt[2]<13?$lt[2]||12:$lt[2]-12)/gxe;
  8         41  
4594 40         45 $format=~s/HH24|HH24|HH|hh / sprintf("%02d",$lt[2]) /gxe;
  4         14  
4595 40         45 $format=~s/MI / sprintf("%02d",$lt[1]) /gxei;
  12         22  
4596 40   100     36 $format=~s{SS\.([1-9]) }{ sprintf("%0*.$1f",3+$1,$lt[0]+(repl($time,qr/^[^\.]+/)||0)) }gxei;
  4         23  
4597 40         48 $format=~s/SS(?:\.0)? / sprintf("%02d",$lt[0]) /gxei;
  5         14  
4598 40 100       40 $format=~s/(?:am|pm|apm|xm) / $lt[2]<13 ? 'am' : 'pm' /gxe;
  4         9  
4599 40 100       45 $format=~s/(?:AM|PM|APM|XM) / $lt[2]<13 ? 'AM' : 'PM' /gxe;
  4         8  
4600 40         43 $format=~s/WWI|WW / sprintf("%02d",weeknum($time)) /gxei;
  0         0  
4601 40         27 $format=~s/W / weeknum($time) /gxei;
  0         0  
4602 40         204 $format;
4603             }
4604              
4605             =head2 easter
4606              
4607             Input: A year (a four digit number)
4608              
4609             Output: array of two numbers: day and month of Easter Sunday that year. Month 3 means March and 4 means April.
4610              
4611             sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
4612             (($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
4613              
4614             ...is a "golfed" version of Oudins algorithm (1940) L
4615             (see also http://www.smart.net/~mmontes/ec-cal.html )
4616              
4617             Valid for any Gregorian year. Dates repeat themselves after 70499183
4618             lunations = 2081882250 days = ca 5699845 years. However, our planet will
4619             by then have a different rotation and spin time...
4620              
4621             Example:
4622              
4623             ( $day, $month ) = easter( 2012 ); # $day == 8 and $month == 4
4624              
4625             Example 2:
4626              
4627             my @e=map sprintf("%02d%02d", reverse(easter($_))), 1800..300000;
4628             print "First: ".min(@e)." Last: ".max(@e)."\n"; # First: 0322 Last: 0425
4629              
4630             Note: The Spencer Jones formula differs Oudins used in C in some years
4631             before 1498. However, in that period the Julian calendar with a different formula was
4632             used anyway. Countries introduced the current Gregorian calendar between 1583 and 1926.
4633              
4634             =cut
4635              
4636 33     33 1 43995 sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
  33     13503   42  
  33         91  
  13503         86855  
  13503         9013  
  13503         13396  
4637 13503 100 66     39767 (($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
4638              
4639              
4640             =head2 time_fp
4641              
4642             No input arguments.
4643              
4644             Return the same number as perls C except with decimals (fractions of a second, _fp as in floating point number).
4645              
4646             print time_fp(),"\n";
4647             print time(),"\n";
4648              
4649             Could write:
4650              
4651             1116776232.38632
4652              
4653             ...if that is the time now.
4654              
4655             Or just:
4656              
4657             1116776232
4658              
4659             ...from perl's internal C if C isn't installed and available.
4660              
4661              
4662             =cut
4663              
4664             sub time_fp { # {return 0+gettimeofday} is just as well?
4665 12446 50   12446 1 10467 eval{ require Time::HiRes } or return time();
  12446         39223  
4666 12446         27137 my($sec,$mic)=Time::HiRes::gettimeofday();
4667 12446         25878 return $sec+$mic/1e6; #1e6 not portable?
4668             }
4669              
4670             sub timems {
4671 0 0   0 0 0 eval{ require Time::HiRes } or return time();
  0         0  
4672 0         0 my($sec,$mic)=Time::HiRes::gettimeofday();
4673 0         0 return $sec*1000+$mic/1e3;
4674             }
4675              
4676             =head2 sleep_fp
4677              
4678             sleep_fp() work as the built in C<< sleep() >> but also accepts fractional seconds:
4679              
4680             sleep_fp(0.020); # sleeps for 20 milliseconds
4681              
4682             Sub sleep_fp do a C, thus it might take some
4683             extra time the first call. To avoid that, add C<< use Time::HiRes >>
4684             to your code. Sleep_fp should not be trusted for accuracy to more than
4685             a tenth of a second. Virtual machines tend to be less accurate (sleep
4686             longer) than physical ones. This was tested on VMware and RHEL
4687             (Linux). See also L.
4688              
4689             =head2 sleeps
4690              
4691             =head2 sleepms
4692              
4693             =head2 sleepus
4694              
4695             =head2 sleepns
4696              
4697             sleep_fp(0.020); #sleeps for 20 milliseconds
4698             sleeps(0.020); #sleeps for 20 milliseconds, sleeps() is a synonym to sleep_fp()
4699             sleepms(20); #sleeps for 20 milliseconds
4700             sleepus(20000); #sleeps for 20000 microseconds = 20 milliseconds
4701             sleepns(20000000); #sleeps for 20 million nanoseconds = 20 milliseconds
4702              
4703             =cut
4704              
4705 5 50   5 1 18 sub sleep_fp { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
  5         30  
  5         170554  
4706 1 50   1 1 5 sub sleeps { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
  1         10  
  1         10093  
4707 1 50   1 1 8 sub sleepms { eval{require Time::HiRes} or (sleep(shift()/1e3),return);Time::HiRes::sleep(shift()/1e3) }
  1         9  
  1         10092  
4708 1 50   1 1 6 sub sleepus { eval{require Time::HiRes} or (sleep(shift()/1e6),return);Time::HiRes::sleep(shift()/1e6) }
  1         9  
  1         10088  
4709 1 50   1 1 22 sub sleepns { eval{require Time::HiRes} or (sleep(shift()/1e9),return);Time::HiRes::sleep(shift()/1e9) }
  1         9  
  1         10172  
4710              
4711             =head2 eta
4712              
4713             Estimated time of arrival (ETA).
4714              
4715             for(@files){
4716             ...do work on file...
4717             my $eta = eta( ++$i, 0+@files ); # file now, number of files
4718             print "" . localtime($eta);
4719             }
4720              
4721             ..DOC MISSING..
4722              
4723             =head2 etahhmm
4724              
4725             ...NOT YET
4726              
4727             =cut
4728              
4729             #http://en.wikipedia.org/wiki/Kalman_filter god idé?
4730             our %Eta;
4731             our $Eta_forgetfulness=2;
4732             sub eta {
4733 2012 100   2012 1 7796 my($id,$pos,$end,$time_fp)=( @_==2 ? (join(";",caller()),@_) : @_ );
4734 2012   66     2320 $time_fp||=time_fp();
4735 2012   100     2638 my $a=$Eta{$id}||=[];
4736 2012         2273 push @$a, [$pos,$time_fp];
4737 2012 100       3320 @$a=@$a[map$_*2,0..@$a/2] if @$a>40; #hm 40
4738 2012 100 100     5321 splice(@$a,-2,1) if @$a>1 and $$a[-2][0]==$$a[-1][0]; #same pos as last
4739 2012 100       2254 return undef if @$a<2;
4740 2001         1194 my @eta;
4741 2001         1636 for(2..@$a){
4742 58602         83049 push @eta, $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-$_][1])/($$a[-1][0]-$$a[-$_][0]);
4743             }
4744 2001         1566 my($sum,$sumw,$w)=(0,0,1);
4745 2001         1507 for(@eta){
4746 58602         35348 $sum+=$w*$_;
4747 58602         32676 $sumw+=$w;
4748 58602         38103 $w/=$Eta_forgetfulness;
4749             }
4750 2001         1409 my $avg=$sum/$sumw;
4751 2001         3351 return $avg;
4752             # return avg(@eta);
4753             #return $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-2][1])/($$a[-1][0]-$$a[-2][0]);
4754 0         0 1;
4755             }
4756              
4757             =head2 sleep_until
4758              
4759             sleep_until(0.5) sleeps until half a second has passed since the last
4760             call to sleep_until. This example starts the next job excactly ten
4761             seconds after the last job started even if the last job lasted for a
4762             while (but not more than ten seconds):
4763              
4764             for(@jobs){
4765             sleep_until(10);
4766             print localtime()."\n";
4767             ...heavy job....
4768             }
4769              
4770             Might print:
4771              
4772             Thu Jan 12 16:00:00 2012
4773             Thu Jan 12 16:00:10 2012
4774             Thu Jan 12 16:00:20 2012
4775              
4776             ...and so on even if the C<< ...heavy job... >>-part takes more than a
4777             second to complete. Whereas if sleep(10) was used, each job would
4778             spend more than ten seconds in average since the work time would be
4779             added to sleep(10).
4780              
4781             Note: sleep_until() will remember the time of ANY last call of this sub,
4782             not just the one on the same line in the source code (this might change
4783             in the future). The first call to sleep_until() will be the same as
4784             sleep_fp() or Perl's own sleep() if the argument is an integer.
4785              
4786             =cut
4787              
4788             our $Time_last_sleep_until;
4789             sub sleep_until {
4790 0 0   0 1 0 my $s=@_==1?shift():0;
4791 0         0 my $time=time_fp();
4792 0         0 my $sleep=$s-($time-nvl($Time_last_sleep_until,0));
4793 0         0 $Time_last_sleep_until=time;
4794 0 0       0 sleep_fp($sleep) if $sleep>0;
4795             }
4796              
4797             my %thr;
4798             sub throttle {
4799 0     0 0 0 my($times,$mintime,$what)=@_;
4800 0   0     0 $what||=join(":",@{[caller(1)]}[3,2]);
  0         0  
4801 0   0     0 $thr{$what}||=[];
4802 0         0 my $thr=$thr{$what};
4803 0         0 push @$thr,time_fp();
4804 0 0       0 return if @$thr<$times;
4805 0         0 my $since=$$thr[-1]-shift(@$thr);
4806 0 0       0 my $sleep=$since<$mintime?$mintime-$since:0;
4807 0         0 sleep_fp($sleep);
4808 0         0 return $sleep;
4809             }
4810              
4811             =head2 leapyear
4812              
4813             B A year. A four digit number.
4814              
4815             B True (1) or false (0) of whether the year is a leap year or
4816             not. (Uses current calendar even for periods before leapyears was used).
4817              
4818             print join(", ",grep leapyear($_), 1900..2014)."\n";
4819              
4820             1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 1948, 1952, 1956,
4821             1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012
4822              
4823             Note: 1900 is not a leap year, but 2000 is. Years divided by 100 is a leap year only
4824             if it can be divided by 400.
4825              
4826             =cut
4827              
4828 0 0   0 1 0 sub leapyear{$_[0]%400?$_[0]%100?$_[0]%4?0:1:0:1} #bool
    0          
    0          
4829              
4830             #http://rosettacode.org/wiki/Levenshtein_distance#Perl
4831             our %ldist_cache;
4832             sub ldist {
4833 0     0 0 0 my($s,$t,$l) = @_;
4834 0 0       0 return length($t) if !$s;
4835 0 0       0 return length($s) if !$t;
4836 0 0 0     0 %ldist_cache=() if !$l and 1000<0+%ldist_cache;
4837             $ldist_cache{$s,$t} ||=
4838 0   0     0 do {
4839 0         0 my($s1,$t1) = ( substr($s,1), substr($t,1) );
4840 0 0       0 substr($s,0,1) eq substr($t,0,1)
4841             ? ldist($s1,$t1)
4842             : 1 + min( ldist($s1,$t1,1+$l), ldist($s,$t1,1+$l), ldist($s1,$t,1+$l) );
4843             };
4844             }
4845              
4846             =head1 OTHER
4847              
4848             =head2 nvl
4849              
4850             The I function (or I function)
4851              
4852             C takes two or more arguments. (Oracles nvl-function take just two)
4853              
4854             Returns the value of the first input argument with length() > 0.
4855              
4856             Return I if there is no such input argument.
4857              
4858             In perl 5.10 and perl 6 this will most often be easier with the C< //
4859             > operator, although C and C<< // >> treats empty strings C<"">
4860             differently. Sub nvl here considers empty strings and undef the same.
4861              
4862             =cut
4863              
4864             sub nvl {
4865 38 100 66 38 1 195 return $_[0] if defined $_[0] and length($_[0]) or @_==1;
      100        
4866 22 100       55 return $_[1] if @_==2;
4867 9 100       26 return nvl(@_[1..$#_]) if @_>2;
4868 1         3 return undef;
4869             }
4870              
4871             =head2 decode_num
4872              
4873             See L.
4874              
4875             =head2 decode
4876              
4877             C and C works just as Oracles C.
4878              
4879             C and C accordingly uses perl operators C and C<==> for comparison.
4880              
4881             Examples:
4882              
4883             my $a=123;
4884             print decode($a, 123,3, 214,4, $a); # prints 3
4885             print decode($a, 123=>3, 214=>4, $a); # prints 3, same thing since => is synonymous to comma in Perl
4886              
4887             The first argument is tested against the second, fourth, sixth and so on,
4888             and then the third, fifth, seventh and so on is
4889             returned if decode() finds an equal string or number.
4890              
4891             In the above example: 123 maps to 3, 124 maps to 4 and the last argument $a is returned elsewise.
4892              
4893             More examples:
4894              
4895             my $a=123;
4896             print decode($a, 123=>3, 214=>7, $a); # also 3, note that => is synonym for , (comma) in perl
4897             print decode($a, 122=>3, 214=>7, $a); # prints 123
4898             print decode($a, 123.0 =>3, 214=>7); # prints 3
4899             print decode($a, '123.0'=>3, 214=>7); # prints nothing (undef), no last argument default value here
4900             print decode_num($a, 121=>3, 221=>7, '123.0','b'); # prints b
4901              
4902             Sort of:
4903              
4904             decode($string, %conversion, $default);
4905              
4906             The last argument is returned as a default if none of the keys in the keys/value-pairs matched.
4907              
4908             A more perl-ish and often faster way of doing the same:
4909              
4910             {123=>3, 214=>7}->{$a} || $a # (beware of 0)
4911              
4912             =cut
4913              
4914             sub decode {
4915 4 50   4 1 9 croak "Must have a mimimum of two arguments" if @_<2;
4916 4         2 my $uttrykk=shift;
4917 4 50 100     8 if(defined$uttrykk){ shift eq $uttrykk and return shift or shift for 1..@_/2 }
  4   33     28  
4918 0   0     0 else { !defined shift and return shift or shift for 1..@_/2 }
      0        
4919 2         5 return shift;
4920             }
4921              
4922             sub decode_num {
4923 1 50   1 1 4 croak "Must have a mimimum of two arguments" if @_<2;
4924 1         2 my $uttrykk=shift;
4925 1 50 100     3 if(defined$uttrykk){ shift == $uttrykk and return shift or shift for 1..@_/2 }
  1   33     12  
4926 0   0     0 else { !defined shift and return shift or shift for 1..@_/2 }
      0        
4927 0         0 return shift;
4928             }
4929              
4930             =head2 qrlist
4931              
4932             Input: An array of values to be used to test againts for existence.
4933              
4934             Output: A reference to a regular expression. That is a C
4935              
4936             The regex sets $1 if it match.
4937              
4938             Example:
4939              
4940             my @list=qw/ABc XY DEF DEFG XYZ/;
4941             my $filter=qrlist("ABC","DEF","XY."); # makes a regex of it qr/^(\QABC\E|\QDEF\E|\QXY.\E)$/
4942             my @filtered= grep { $_ =~ $filter } @list; # returns DEF and XYZ, but not XYZ because the . char is taken literally
4943              
4944             Note: Filtering with hash lookups are WAY faster.
4945              
4946             Source:
4947              
4948             sub qrlist (@) { my $str=join"|",map quotemeta, @_; qr/^($str)$/ }
4949              
4950             =cut
4951              
4952             sub qrlist (@) {
4953 0     0 1 0 my $str=join"|",map quotemeta,@_;
4954 0         0 return qr/^($str)$/;
4955             }
4956              
4957             =head2 ansicolor
4958              
4959             Perhaps easier to use than L ?
4960              
4961             B One argument. A string where the char C<¤> have special
4962             meaning and is replaced by color codings depending on the letter
4963             following the C<¤>.
4964              
4965             B The same string, but with C<¤letter> replaced by ANSI color
4966             codes respected by many types terminal windows. (xterm, telnet, ssh,
4967             telnet, rlog, vt100, cygwin, rxvt and such...).
4968              
4969             B
4970              
4971             ¤r red
4972             ¤g green
4973             ¤b blue
4974             ¤y yellow
4975             ¤m magenta
4976             ¤B bold
4977             ¤u underline
4978             ¤c clear
4979             ¤¤ reset, quits and returns to default text color.
4980              
4981             B
4982              
4983             print ansicolor("This is maybe ¤ggreen¤¤?");
4984              
4985             Prints I where the word I is shown in green.
4986              
4987             If L is not installed or not found, returns the input
4988             string with every C<¤> including the following code letters
4989             removed. (That is: ansicolor is safe to use even if Term::ANSIColor is
4990             not installed, you just don't get the colors).
4991              
4992             See also L.
4993              
4994             =cut
4995              
4996             sub ansicolor {
4997 0     0 1 0 my $txt=shift;
4998 0 0       0 eval{require Term::ANSIColor} or return replace($txt,qr/¤./);
  0         0  
4999 0         0 my %h=qw/r red g green b blue y yellow m magenta B bold u underline c clear ¤ reset/;
5000 0         0 my $re=join"|",keys%h;
5001 0         0 $txt=~s/¤($re)/Term::ANSIColor::color($h{$1})/ge;
  0         0  
5002 0         0 return $txt;
5003             }
5004              
5005             =head2 ccn_ok
5006              
5007             Checks if a Credit Card number (CCN) has correct control digits according to the LUHN-algorithm from 1960.
5008             This method of control digits is used by MasterCard, Visa, American Express,
5009             Discover, Diners Club / Carte Blanche, JCB and others.
5010              
5011             B
5012              
5013             A credit card number. Can contain non-digits, but they are removed internally before checking.
5014              
5015             B
5016              
5017             Something true or false.
5018              
5019             Or more accurately:
5020              
5021             Returns C (false) if the input argument is missing digits.
5022              
5023             Returns 0 (zero, which is false) is the digits is not correct according to the LUHN algorithm.
5024              
5025             Returns 1 or the name of a credit card company (true either way) if the last digit is an ok control digit for this ccn.
5026              
5027             The name of the credit card company is returned like this (without the C<'> character)
5028              
5029             Returns (wo '') Starts on Number of digits
5030             ------------------------------ ------------------------ ----------------
5031             'MasterCard' 51-55 16
5032             'Visa' 4 13 eller 16
5033             'American Express' 34 eller 37 15
5034             'Discover' 6011 16
5035             'Diners Club / Carte Blanche' 300-305, 36 eller 38 14
5036             'JCB' 3 16
5037             'JCB' 2131 eller 1800 15
5038              
5039             And should perhaps have had:
5040              
5041             'enRoute' 2014 eller 2149 15
5042              
5043             ...but that card uses either another control algorithm or no control
5044             digits at all. So C is never returned here.
5045              
5046             If the control digits is valid, but the input does not match anything in the column C, 1 is returned.
5047              
5048             (This is also the same control digit mechanism used in Norwegian KID numbers on payment bills)
5049              
5050             The first digit in a credit card number is supposed to tell what "industry" the card is meant for:
5051              
5052             MII Digit Value Issuer Category
5053             --------------------------- ----------------------------------------------------
5054             0 ISO/TC 68 and other industry assignments
5055             1 Airlines
5056             2 Airlines and other industry assignments
5057             3 Travel and entertainment
5058             4 Banking and financial
5059             5 Banking and financial
5060             6 Merchandizing and banking
5061             7 Petroleum
5062             8 Telecommunications and other industry assignments
5063             9 National assignment
5064              
5065             ...although this has no meaning to C.
5066              
5067             The first six digits is I, that is the bank
5068             (probably). The rest in the "account number", except the last digits,
5069             which is the control digit. Max length on credit card numbers are 19
5070             digits.
5071              
5072             =cut
5073              
5074             sub ccn_ok {
5075 0     0 1 0 my $ccn=shift(); #credit card number
5076 0         0 $ccn=~s/\D+//g;
5077 0 0       0 if(KID_ok($ccn)){
5078 0 0       0 return "MasterCard" if $ccn=~/^5[1-5]\d{14}$/;
5079 0 0       0 return "Visa" if $ccn=~/^4\d{12}(?:\d{3})?$/;
5080 0 0       0 return "American Express" if $ccn=~/^3[47]\d{13}$/;
5081 0 0       0 return "Discover" if $ccn=~/^6011\d{12}$/;
5082 0 0       0 return "Diners Club / Carte Blanche" if $ccn=~/^3(?:0[0-5]\d{11}|[68]\d{12})$/;
5083 0 0       0 return "JCB" if $ccn=~/^(?:3\d{15}|(?:2131|1800)\d{11})$/;
5084 0         0 return 1;
5085             }
5086             #return "enRoute" if $ccn=~/^(?:2014|2149)\d{11}$/; #ikke LUHN-krav?
5087 0         0 return 0;
5088             }
5089              
5090             =head2 KID_ok
5091              
5092             Checks if a norwegian KID number has an ok control digit.
5093              
5094             To check if a customer has typed the number correctly.
5095              
5096             This uses the LUHN algorithm (also known as mod-10) from 1960 which is also used
5097             internationally in control digits for credit card numbers, and Canadian social security ID numbers as well.
5098              
5099             The algorithm, as described in Phrack (47-8) (a long time hacker online publication):
5100              
5101             "For a card with an even number of digits, double every odd numbered
5102             digit and subtract 9 if the product is greater than 9. Add up all the
5103             even digits as well as the doubled-odd digits, and the result must be
5104             a multiple of 10 or it's not a valid card. If the card has an odd
5105             number of digits, perform the same addition doubling the even numbered
5106             digits instead."
5107              
5108             B A KID-nummer. Must consist of digits 0-9 only, otherwise a die (croak) happens.
5109              
5110             B
5111              
5112             - Returns undef if the input argument is missing.
5113              
5114             - Returns 0 if the control digit (the last digit) does not satify the LUHN/mod-10 algorithm.
5115              
5116             - Returns 1 if ok
5117              
5118             B L
5119              
5120             =cut
5121              
5122             sub KID_ok {
5123 0 0   0 1 0 croak "Non-numeric argument" if $_[0]=~/\D/;
5124 0 0       0 my @k=split//,shift or return undef;
5125 0         0 my $s;$s+=pop(@k)+[qw/0 2 4 6 8 1 3 5 7 9/]->[pop@k] while @k;
  0         0  
5126 0 0       0 $s%10==0?1:0;
5127             }
5128              
5129              
5130              
5131             =head2 range
5132              
5133             B
5134              
5135             One or more numeric arguments:
5136              
5137             First: x (first returned element)
5138              
5139             Second: y (last but not including)
5140              
5141             Third: step, default 1. The step between each returned element
5142              
5143             If a fourth, fifth and so on arguments are given, they change the step for each returned element. As first derivative, second derivative.
5144              
5145             B
5146              
5147             If one argument: returns the array C<(0 .. x-1)>
5148              
5149             If two arguments: returns the array C<(x .. y-1)>
5150              
5151             If three arguments: The default step is 1. Use a third argument to use a different step.
5152              
5153             B
5154              
5155             print join ",", range(11); # prints 0,1,2,3,4,5,6,7,8,9,10 (but not 11)
5156             print join ",", range(2,11); # 2,3,4,5,6,7,8,9,10 (but not 11)
5157             print join ",", range(11,2,-1); # 11,10,9,8,7,6,5,4,3
5158             print join ",", range(2,11,3); # 2,5,8
5159             print join ",", range(11,2,-3); # 11,8,5
5160             print join ",", range(11,2,+3); # prints nothing
5161              
5162             print join ", ",range(2,11,1,0.1); # 2, 3, 4.1, 5.3, 6.6, 8, 9.5 adds 0.1 to step each time
5163             print join ", ",range(2,11,1,0.1,-0.01); # 2, 3, 4.1, 5.29, 6.56, 7.9, 9.3, 10.75
5164              
5165             Note: In the Python language and others, C is a build in iterator (a
5166             generator), not an array. This saves memory for large sets and sometimes time.
5167             Use C in L to get a similar lazy generator in Perl.
5168              
5169             =cut
5170              
5171             sub range {
5172 8 100   8 1 26 return _range_accellerated(@_) if @_>3; #se under
5173 6         8 my($x,$y,$jump)=@_;
5174 6 100       19 return ( 0 .. $x-1 ) if @_==1;
5175 5 100       47 return ( $x .. $y-1 ) if @_==2;
5176 4 50 33     21 croak "Wrong number of arguments or jump==0" if @_!=3 or $jump==0;
5177 4         5 my @r;
5178 4 100       7 if($jump>0){ while($x<$y){ push @r, $x; $x+=$jump } }
  2         6  
  772         452  
  772         834  
5179 2         4 else { while($x>$y){ push @r, $x; $x+=$jump } }
  12         10  
  12         14  
5180 4         37 return @r;
5181             }
5182              
5183             #jumps derivative, double der., trippled der usw
5184             sub _range_accellerated {
5185 2     2   3 my($x,$y,@jump)=@_;
5186 2         3 my @r;
5187 2 50   17   8 my $test = $jump[0]>=0 ? sub{$x<$y} : sub{$x>$y};
  17         29  
  0         0  
5188 2         4 while(&$test()){
5189 15         19 push @r, $x;
5190 15         11 $x+=$jump[0];
5191 15         26 $jump[$_-1]+=$jump[$_] for 1..$#jump;
5192             }
5193 2         13 return @r;
5194             }
5195              
5196             =head2 globr
5197              
5198             Works like and uses Perls builtin C<< glob() >> function but adds support for ranges
5199             with C<< {from..to} >> and C<< {from..to..step} >>. Like brace expansion in bash.
5200              
5201             Examples:
5202              
5203             my @arr = glob "X{a,b,c,d}Z"; # return four element array: XaZ XbZ XcZ XdZ
5204             my @arr = globr "X{a,b,c,d}Z"; # same as above
5205             my @arr = globr "X{a..d}Z"; # same as above
5206             my @arr = globr "X{a..d..2}Z"; # step 2, returns array: XaZ XcZ
5207             my @arr = globr "X{aa..bz..13}Z"; # XaaZ XanZ XbaZ XbnZ
5208             my @arr = globr "{1..12}b"; # 1b 2b 3b 4b 5b 6b 7b 8b 9b 10b 11b 12b
5209             my @arr = globr "{01..12}b"; # 01b 02b 03b 04b 05b 06b 07b 08b 09b 10b 11b 12b
5210             my @arr = globr "{01..12..3}b"; # 01b 04b 07b 10b
5211              
5212             =cut
5213              
5214             sub globr($) {
5215 8     8 1 11 my $p=shift;
5216 8 100       36 $p=~s/\{(\w+)\.\.(\w+)(\.\.(\d+))?\}/my$i=0;"{".join(",",grep{$4?!($i++%$4):1}$1..$2)."}";/eg;
  7         6  
  7         38  
  133         199  
5217 8         669 glob $p;
5218             }
5219              
5220             =head2 permutations
5221              
5222             How many ways (permutations) can six people be placed around a table:
5223              
5224             If one person: one
5225             If two persons: two (they can swap places)
5226             If three persons: six
5227             If four persons: 24
5228             If five persons: 120
5229             If six persons: 720
5230              
5231             The formula is C where the postfix unary operator C, also known as I is defined like:
5232             C. Example: C<5! = 5 * 4 * 3 * 2 * 1 = 120>.Run this to see the 100 first C<< n! >>
5233              
5234             perl -MAcme::Tools -le'$i=big(1);print "$_!=",$i*=$_ for 1..100'
5235              
5236             1! = 1
5237             2! = 2
5238             3! = 6
5239             4! = 24
5240             5! = 120
5241             6! = 720
5242             7! = 5040
5243             8! = 40320
5244             9! = 362880
5245             10! = 3628800
5246             .
5247             .
5248             .
5249             100! = 93326215443944152681699238856266700490715968264381621468592963895217599993229915608941463976156518286253697920827223758251185210916864000000000000000000000000
5250              
5251             C takes a list and return a list of arrayrefs for each
5252             of the permutations of the input list:
5253              
5254             permutations('a','b'); #returns (['a','b'],['b','a'])
5255              
5256             permutations('a','b','c'); #returns (['a','b','c'],['a','c','b'],
5257             # ['b','a','c'],['b','c','a'],
5258             # ['c','a','b'],['c','b','a'])
5259              
5260             Up to five input arguments C is probably as fast as it
5261             can be in this pure perl implementation (see source). For more than
5262             five, it could be faster. How fast is it now: Running with different
5263             n, this many time took that many seconds:
5264              
5265             n times seconds
5266             -- ------- ---------
5267             2 100000 0.32
5268             3 10000 0.09
5269             4 10000 0.33
5270             5 1000 0.18
5271             6 100 0.27
5272             7 10 0.21
5273             8 1 0.17
5274             9 1 1.63
5275             10 1 17.00
5276              
5277             If the first argument is a coderef, that sub will be called for each permutation and the return from those calls with be the real return from C. For example this:
5278              
5279             print for permutations(sub{join"",@_},1..3);
5280              
5281             ...will print the same as:
5282              
5283             print for map join("",@$_), permutations(1..3);
5284              
5285             ...but the first of those two uses less RAM if 3 has been say 9.
5286             Changing 3 with 10, and many computers hasn't enough memory
5287             for the latter.
5288              
5289             The examples prints:
5290              
5291             123
5292             132
5293             213
5294             231
5295             312
5296             321
5297              
5298             If you just want to say calculate something on each permutation,
5299             but is not interested in the list of them, you just don't
5300             take the return. That is:
5301              
5302             my $ant;
5303             permutations(sub{$ant++ if $_[-1]>=$_[0]*2},1..9);
5304              
5305             ...is the same as:
5306              
5307             $$_[-1]>=$$_[0]*2 and $ant++ for permutations(1..9);
5308              
5309             ...but the first uses next to nothing of memory compared to the latter. They have about the same speed.
5310             (The examples just counts the permutations where the last number is at least twice as large as the first)
5311              
5312             C was created to find all combinations of a persons
5313             name. This is useful in "fuzzy" name searches with
5314             L if you can not be certain what is first, middle
5315             and last names. In foreign or unfamiliar names it can be difficult to
5316             know that.
5317              
5318             =cut
5319              
5320             #TODO: se test_perl.pl
5321              
5322             sub permutations {
5323 2 50   2 1 14 my $code=ref($_[0]) eq 'CODE' ? shift() : undef;
5324 2 50 33     11 $code and @_<6 and return map &$code(@$_),permutations(@_);
5325              
5326 2 50       8 return [@_] if @_<2;
5327              
5328 2 100       27 return ([@_[0,1]],[@_[1,0]]) if @_==2;
5329              
5330 1 50       39 return ([@_[0,1,2]],[@_[0,2,1]],[@_[1,0,2]],
5331             [@_[1,2,0]],[@_[2,0,1]],[@_[2,1,0]]) if @_==3;
5332              
5333 0 0       0 return ([@_[0,1,2,3]],[@_[0,1,3,2]],[@_[0,2,1,3]],[@_[0,2,3,1]],
5334             [@_[0,3,1,2]],[@_[0,3,2,1]],[@_[1,0,2,3]],[@_[1,0,3,2]],
5335             [@_[1,2,0,3]],[@_[1,2,3,0]],[@_[1,3,0,2]],[@_[1,3,2,0]],
5336             [@_[2,0,1,3]],[@_[2,0,3,1]],[@_[2,1,0,3]],[@_[2,1,3,0]],
5337             [@_[2,3,0,1]],[@_[2,3,1,0]],[@_[3,0,1,2]],[@_[3,0,2,1]],
5338             [@_[3,1,0,2]],[@_[3,1,2,0]],[@_[3,2,0,1]],[@_[3,2,1,0]]) if @_==4;
5339              
5340 0 0       0 return ([@_[0,1,2,3,4]],[@_[0,1,2,4,3]],[@_[0,1,3,2,4]],[@_[0,1,3,4,2]],[@_[0,1,4,2,3]],
5341             [@_[0,1,4,3,2]],[@_[0,2,1,3,4]],[@_[0,2,1,4,3]],[@_[0,2,3,1,4]],[@_[0,2,3,4,1]],
5342             [@_[0,2,4,1,3]],[@_[0,2,4,3,1]],[@_[0,3,1,2,4]],[@_[0,3,1,4,2]],[@_[0,3,2,1,4]],
5343             [@_[0,3,2,4,1]],[@_[0,3,4,1,2]],[@_[0,3,4,2,1]],[@_[0,4,1,2,3]],[@_[0,4,1,3,2]],
5344             [@_[0,4,2,1,3]],[@_[0,4,2,3,1]],[@_[0,4,3,1,2]],[@_[0,4,3,2,1]],[@_[1,0,2,3,4]],
5345             [@_[1,0,2,4,3]],[@_[1,0,3,2,4]],[@_[1,0,3,4,2]],[@_[1,0,4,2,3]],[@_[1,0,4,3,2]],
5346             [@_[1,2,0,3,4]],[@_[1,2,0,4,3]],[@_[1,2,3,0,4]],[@_[1,2,3,4,0]],[@_[1,2,4,0,3]],
5347             [@_[1,2,4,3,0]],[@_[1,3,0,2,4]],[@_[1,3,0,4,2]],[@_[1,3,2,0,4]],[@_[1,3,2,4,0]],
5348             [@_[1,3,4,0,2]],[@_[1,3,4,2,0]],[@_[1,4,0,2,3]],[@_[1,4,0,3,2]],[@_[1,4,2,0,3]],
5349             [@_[1,4,2,3,0]],[@_[1,4,3,0,2]],[@_[1,4,3,2,0]],[@_[2,0,1,3,4]],[@_[2,0,1,4,3]],
5350             [@_[2,0,3,1,4]],[@_[2,0,3,4,1]],[@_[2,0,4,1,3]],[@_[2,0,4,3,1]],[@_[2,1,0,3,4]],
5351             [@_[2,1,0,4,3]],[@_[2,1,3,0,4]],[@_[2,1,3,4,0]],[@_[2,1,4,0,3]],[@_[2,1,4,3,0]],
5352             [@_[2,3,0,1,4]],[@_[2,3,0,4,1]],[@_[2,3,1,0,4]],[@_[2,3,1,4,0]],[@_[2,3,4,0,1]],
5353             [@_[2,3,4,1,0]],[@_[2,4,0,1,3]],[@_[2,4,0,3,1]],[@_[2,4,1,0,3]],[@_[2,4,1,3,0]],
5354             [@_[2,4,3,0,1]],[@_[2,4,3,1,0]],[@_[3,0,1,2,4]],[@_[3,0,1,4,2]],[@_[3,0,2,1,4]],
5355             [@_[3,0,2,4,1]],[@_[3,0,4,1,2]],[@_[3,0,4,2,1]],[@_[3,1,0,2,4]],[@_[3,1,0,4,2]],
5356             [@_[3,1,2,0,4]],[@_[3,1,2,4,0]],[@_[3,1,4,0,2]],[@_[3,1,4,2,0]],[@_[3,2,0,1,4]],
5357             [@_[3,2,0,4,1]],[@_[3,2,1,0,4]],[@_[3,2,1,4,0]],[@_[3,2,4,0,1]],[@_[3,2,4,1,0]],
5358             [@_[3,4,0,1,2]],[@_[3,4,0,2,1]],[@_[3,4,1,0,2]],[@_[3,4,1,2,0]],[@_[3,4,2,0,1]],
5359             [@_[3,4,2,1,0]],[@_[4,0,1,2,3]],[@_[4,0,1,3,2]],[@_[4,0,2,1,3]],[@_[4,0,2,3,1]],
5360             [@_[4,0,3,1,2]],[@_[4,0,3,2,1]],[@_[4,1,0,2,3]],[@_[4,1,0,3,2]],[@_[4,1,2,0,3]],
5361             [@_[4,1,2,3,0]],[@_[4,1,3,0,2]],[@_[4,1,3,2,0]],[@_[4,2,0,1,3]],[@_[4,2,0,3,1]],
5362             [@_[4,2,1,0,3]],[@_[4,2,1,3,0]],[@_[4,2,3,0,1]],[@_[4,2,3,1,0]],[@_[4,3,0,1,2]],
5363             [@_[4,3,0,2,1]],[@_[4,3,1,0,2]],[@_[4,3,1,2,0]],[@_[4,3,2,0,1]],[@_[4,3,2,1,0]]) if @_==5;
5364              
5365 0         0 my(@r,@p,@c,@i,@n); @i=(0,@_); @p=@c=1..@_; @n=1..@_-1;
  0         0  
  0         0  
  0         0  
5366             PERM:
5367 0         0 while(1){
5368 0 0       0 if($code){if(defined wantarray){push(@r,&$code(@i[@p]))}else{&$code(@i[@p])}}else{push@r,[@i[@p]]}
  0 0       0  
  0         0  
  0         0  
  0         0  
5369 0 0       0 for my$i(@n){splice@p,$i,0,shift@p;next PERM if --$c[$i];$c[$i]=$i+1}
  0         0  
  0         0  
  0         0  
5370             return@r
5371 0         0 }
5372             }
5373              
5374             =head2 cart
5375              
5376             Cartesian product
5377              
5378             B
5379              
5380             Input: two or more arrayrefs with accordingly x, y, z and so on number of elements.
5381              
5382             Output: An array of x * y * z number of arrayrefs. The arrays being the cartesian product of the input arrays.
5383              
5384             It can be useful to think of this as joins in SQL. In C
5385             more tables behind C, but without any C condition to join the tables.
5386              
5387             B
5388              
5389             B
5390              
5391             - Either two or more arrayrefs with x, y, z and so on number of elements.
5392              
5393             - Or coderefs to subs containing condition checks. Somewhat like C conditions in SQL.
5394              
5395             B An array of x * y * z number of arrayrefs (the cartesian product)
5396             minus the ones that did not fulfill the condition(s).
5397              
5398             This of is as joins with one or more where conditions as coderefs.
5399              
5400             The coderef input arguments can be placed last or among the array refs
5401             to save both runtime and memory if the conditions depend on
5402             arrays further back.
5403              
5404             B
5405              
5406             for(cart(\@a1,\@a2,\@a3)){
5407             my($a1,$a2,$a3) = @$_;
5408             print "$a1,$a2,$a3\n";
5409             }
5410              
5411             Prints the same as this:
5412              
5413             for my $a1 (@a1){
5414             for my $a2 (@a2){
5415             for my $a3 (@a3){
5416             print "$a1,$a2,$a3\n";
5417             }
5418             }
5419             }
5420              
5421             B (with a condition: the sum of the first two should be dividable with 3)
5422              
5423             for( cart( \@a1, \@a2, sub{sum(@$_)%3==0}, \@a3 ) ) {
5424             my($a1,$a2,$a3)=@$_;
5425             print "$a1,$a2,$a3\n";
5426             }
5427              
5428             Prints the same as this:
5429              
5430             for my $a1 (@a1){
5431             for my $a2 (@a2){
5432             next if 0==($a1+$a2)%3;
5433             for my $a3 (@a3){
5434             print "$a1,$a2,$a3\n";
5435             }
5436             }
5437             }
5438              
5439             B
5440              
5441             my @a1 = (1,2);
5442             my @a2 = (10,20,30);
5443             my @a3 = (100,200,300,400);
5444              
5445             my $s = join"", map "*".join(",",@$_), cart(\@a1,\@a2,\@a3);
5446             ok( $s eq "*1,10,100*1,10,200*1,10,300*1,10,400*1,20,100*1,20,200"
5447             ."*1,20,300*1,20,400*1,30,100*1,30,200*1,30,300*1,30,400"
5448             ."*2,10,100*2,10,200*2,10,300*2,10,400*2,20,100*2,20,200"
5449             ."*2,20,300*2,20,400*2,30,100*2,30,200*2,30,300*2,30,400");
5450              
5451             $s=join"",map "*".join(",",@$_), cart(\@a1,\@a2,\@a3,sub{sum(@$_)%3==0});
5452             ok( $s eq "*1,10,100*1,10,400*1,20,300*1,30,200*2,10,300*2,20,200*2,30,100*2,30,400");
5453              
5454             B
5455              
5456             Returns hashrefs instead of arrayrefs:
5457              
5458             my @cards=cart( #5200 cards: 100 decks of 52 cards
5459             deck => [1..100],
5460             value => [qw/2 3 4 5 6 7 8 9 10 J Q K A/],
5461             col => [qw/heart diamond club star/],
5462             );
5463             for my $card ( mix(@cards) ) {
5464             print "From deck number $$card{deck} we got $$card{value} $$card{col}\n";
5465             }
5466              
5467             Note: using sub-ref filters do not work (yet) in hash-mode. Use grep on result instead.
5468              
5469             =cut
5470              
5471             sub cart {
5472 5     5 1 437 my @ars=@_;
5473 5 100       12 if(!ref($_[0])){ #if hash-mode detected
5474 1   33     3 my(@k,@v); push@k,shift@ars and push@v,shift@ars while @ars;
  1         11  
5475 1         7 return map{my%h;@h{@k}=@$_;\%h}cart(@v);
  24         14  
  24         31  
  24         25  
5476             }
5477 4         6 my @res=map[$_],@{shift@ars};
  4         14  
5478 4         6 for my $ar (@ars){
5479 9 100 50     21 @res=grep{&$ar(@$_)}@res and next if ref($ar) eq 'CODE';
  24         40  
5480 8         7 @res=map{my$r=$_;map{[@$r,$_]}@$ar}@res;
  34         24  
  34         17  
  120         147  
5481             }
5482 4         49 return @res;
5483             }
5484              
5485             sub cart_easy { #not tested/exported http://stackoverflow.com/questions/2457096/in-perl-how-can-i-get-the-cartesian-product-of-multiple-sets
5486 0     0 0 0 my $last = pop @_;
5487 0 0       0 @_ ? (map {my$left=$_; map [@$left, $_], @$last } cart_easy(@_) )
  0         0  
  0         0  
5488             : (map [$_], @$last);
5489             }
5490              
5491             =head2 reduce
5492              
5493             From: Why Functional Programming Matters: L
5494              
5495             L
5496              
5497             DON'T TRY THIS AT HOME, C PROGRAMMERS.
5498              
5499             sub reduce (&@) {
5500             my ($proc, $first, @rest) = @_;
5501             return $first if @rest == 0;
5502             local ($a, $b) = ($first, reduce($proc, @rest));
5503             return $proc->();
5504             }
5505              
5506             Many functions can then be implemented with very little code. Such as:
5507              
5508             sub mean { (reduce {$a + $b} @_) / @_ }
5509              
5510             =cut
5511              
5512             sub reduce (&@) {
5513 0     0 1 0 my ($proc, $first, @rest) = @_;
5514 0 0       0 return $first if @rest == 0;
5515 33     33   102629 no warnings;
  33         83  
  33         19623  
5516 0         0 local ($a, $b) = ($first, reduce($proc, @rest));
5517 0         0 return $proc->();
5518             }
5519              
5520              
5521             =head2 pivot
5522              
5523             Resembles the pivot table function in Excel.
5524              
5525             C is used to spread out a slim and long table to a visually improved layout.
5526              
5527             For instance spreading out the results of C-selects from SQL:
5528              
5529             pivot( arrayref, columnname1, columnname2, ...)
5530              
5531             pivot( ref_to_array_of_arrayrefs, @list_of_names_to_down_fields )
5532              
5533             The first argument is a ref to a two dimensional table.
5534              
5535             The rest of the arguments is a list which also signals the number of
5536             columns from left in each row that is ending up to the left of the
5537             data table, the rest ends up at the top and the last element of
5538             each row ends up as data.
5539              
5540             top1 top1 top1 top1
5541             left1 left2 left3 top2 top2 top2 top2
5542             ----- ----- ----- ---- ---- ---- ----
5543             data data data data
5544             data data data data
5545             data data data data
5546              
5547             Example:
5548              
5549             my @table=(
5550             ["1997","Gerd", "Weight", "Summer",66],
5551             ["1997","Gerd", "Height", "Summer",170],
5552             ["1997","Per", "Weight", "Summer",75],
5553             ["1997","Per", "Height", "Summer",182],
5554             ["1997","Hilde","Weight", "Summer",62],
5555             ["1997","Hilde","Height", "Summer",168],
5556             ["1997","Tone", "Weight", "Summer",70],
5557            
5558             ["1997","Gerd", "Weight", "Winter",64],
5559             ["1997","Gerd", "Height", "Winter",158],
5560             ["1997","Per", "Weight", "Winter",73],
5561             ["1997","Per", "Height", "Winter",180],
5562             ["1997","Hilde","Weight", "Winter",61],
5563             ["1997","Hilde","Height", "Winter",164],
5564             ["1997","Tone", "Weight", "Winter",69],
5565            
5566             ["1998","Gerd", "Weight", "Summer",64],
5567             ["1998","Gerd", "Height", "Summer",171],
5568             ["1998","Per", "Weight", "Summer",76],
5569             ["1998","Per", "Height", "Summer",182],
5570             ["1998","Hilde","Weight", "Summer",62],
5571             ["1998","Hilde","Height", "Summer",168],
5572             ["1998","Tone", "Weight", "Summer",70],
5573            
5574             ["1998","Gerd", "Weight", "Winter",64],
5575             ["1998","Gerd", "Height", "Winter",171],
5576             ["1998","Per", "Weight", "Winter",74],
5577             ["1998","Per", "Height", "Winter",183],
5578             ["1998","Hilde","Weight", "Winter",62],
5579             ["1998","Hilde","Height", "Winter",168],
5580             ["1998","Tone", "Weight", "Winter",71],
5581             );
5582              
5583             .
5584              
5585             my @reportA=pivot(\@table,"Year","Name");
5586             print "\n\nReport A\n\n".tablestring(\@reportA);
5587              
5588             Will print:
5589              
5590             Report A
5591            
5592             Year Name Height Height Weight Weight
5593             Summer Winter Summer Winter
5594             ---- ----- ------ ------ ------ ------
5595             1997 Gerd 170 158 66 64
5596             1997 Hilde 168 164 62 61
5597             1997 Per 182 180 75 73
5598             1997 Tone 70 69
5599             1998 Gerd 171 171 64 64
5600             1998 Hilde 168 168 62 62
5601             1998 Per 182 183 76 74
5602             1998 Tone 70 71
5603              
5604             .
5605              
5606             my @reportB=pivot([map{$_=[@$_[0,3,2,1,4]]}(@t=@table)],"Year","Season");
5607             print "\n\nReport B\n\n".tablestring(\@reportB);
5608              
5609             Will print:
5610              
5611             Report B
5612            
5613             Year Season Height Height Height Weight Weight Weight Weight
5614             Gerd Hilde Per Gerd Hilde Per Tone
5615             ---- ------ ------ ------ ----- ----- ------ ------ ------
5616             1997 Summer 170 168 182 66 62 75 70
5617             1997 Winter 158 164 180 64 61 73 69
5618             1998 Summer 171 168 182 64 62 76 70
5619             1998 Winter 171 168 183 64 62 74 71
5620              
5621             .
5622              
5623             my @reportC=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name","Attributt");
5624             print "\n\nReport C\n\n".tablestring(\@reportC);
5625              
5626             Will print:
5627              
5628             Report C
5629            
5630             Name Attributt 1997 1997 1998 1998
5631             Summer Winter Summer Winter
5632             ----- --------- ------ ------ ------ ------
5633             Gerd Height 170 158 171 171
5634             Gerd Weight 66 64 64 64
5635             Hilde Height 168 164 168 168
5636             Hilde Weight 62 61 62 62
5637             Per Height 182 180 182 183
5638             Per Weight 75 73 76 74
5639             Tone Weight 70 69 70 71
5640              
5641             .
5642              
5643             my @reportD=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name");
5644             print "\n\nReport D\n\n".tablestring(\@reportD);
5645              
5646             Will print:
5647              
5648             Report D
5649            
5650             Name Height Height Height Height Weight Weight Weight Weight
5651             1997 1997 1998 1998 1997 1997 1998 1998
5652             Summer Winter Summer Winter Summer Winter Summer Winter
5653             ----- ------ ------ ------ ------ ------ ------ ------ ------
5654             Gerd 170 158 171 171 66 64 64 64
5655             Hilde 168 164 168 168 62 61 62 62
5656             Per 182 180 182 183 75 73 76 74
5657             Tone 70 69 70 71
5658              
5659             Options:
5660              
5661             Options to sort differently and show sums and percents are available. (...MORE DOC ON THAT LATER...)
5662              
5663             See also L
5664              
5665             =cut
5666              
5667             sub pivot {
5668 4     4 1 12 my($tabref,@vertikalefelt)=@_;
5669 4 50       19 my %opt=ref($vertikalefelt[-1]) eq 'HASH' ? %{pop(@vertikalefelt)} : ();
  0         0  
5670 4 50       16 my $opt_sum=1 if $opt{sum};
5671 4 50 0     15 my $opt_pro=exists $opt{prosent}?$opt{prosent}||0:undef;
5672 4   50     47 my $sortsub = $opt{'sortsub'} || \&_sortsub;
5673 4   33     23 my $sortsub_bortover = $opt{'sortsub_bortover'} || $sortsub;
5674 4   33     19 my $sortsub_nedover = $opt{'sortsub_nedover'} || $sortsub;
5675             #print serialize(\%opt,'opt');
5676             #print serialize(\$opt_pro,'opt_pro');
5677 4         8 my $antned=0+@vertikalefelt;
5678 4         6 my $bakerst=-1+@{$$tabref[0]};
  4         11  
5679 4         8 my(%h,%feltfinnes,%sum);
5680             #print "Bakerst<$bakerst>\n";
5681 4         9 for(@$tabref){
5682 112         207 my $rad=join($;,@$_[0..($antned-1)]);
5683 112         162 my $felt=join($;,@$_[$antned..($bakerst-1)]);
5684 112         116 my $verdi=$$_[$bakerst];
5685 112 50       161 length($rad) or $rad=' ';
5686 112 50       135 length($felt) or $felt=' ';
5687 112         178 $h{$rad}{$felt}=$verdi;
5688 112         176 $h{$rad}{"%$felt"}=$verdi;
5689 112 50 33     602 if($opt_sum or defined $opt_pro){
5690 0         0 $h{$rad}{Sum}+=$verdi;
5691 0         0 $sum{$felt}+=$verdi;
5692 0         0 $sum{Sum}+=$verdi;
5693             }
5694 112         111 $feltfinnes{$felt}++;
5695 112 50       184 $feltfinnes{"%$felt"}++ if $opt_pro;
5696             }
5697 4         38 my @feltfinnes = sort $sortsub_bortover keys%feltfinnes;
5698 4 50       17 push @feltfinnes, "Sum" if $opt_sum;
5699 4         11 my @t=([@vertikalefelt,map{replace($_,$;,"\n")}@feltfinnes]);
  23         36  
5700             #print serialize(\@feltfinnes,'feltfinnes');
5701             #print serialize(\%h,'h');
5702             #print "H = ".join(", ",sort _sortsub keys%h)."\n";
5703 4         21 for my $rad (sort $sortsub_nedover keys(%h)){
5704             my @rad=(split($;,$rad),
5705             map{
5706 23 50 33     166 if(/^\%/ and defined $opt_pro){
  120         225  
5707 0         0 my $sum=$h{$rad}{Sum};
5708 0         0 my $verdi=$h{$rad}{$_};
5709 0 0       0 if($sum!=0){
5710 0 0       0 defined $verdi
5711             ?sprintf("%*.*f",3+1+$opt_pro,$opt_pro,100*$verdi/$sum)
5712             :$verdi;
5713             }
5714             else{
5715 0 0       0 $verdi!=0?"div0":$verdi;
5716             }
5717             }
5718             else{
5719 120         149 $h{$rad}{$_};
5720             }
5721             }
5722             @feltfinnes);
5723 23         69 push(@t,[@rad]);
5724             }
5725 4 50       10 push(@t,"-",["Sum",(map{""}(2..$antned)),map{print "<$_>\n";$sum{$_}}@feltfinnes]) if $opt_sum;
  0         0  
  0         0  
  0         0  
5726 4         48 return @t;
5727             }
5728              
5729             # default sortsub for pivot()
5730              
5731             sub _sortsub {
5732 33     33   162 no warnings;
  33         40  
  33         10401  
5733             #my $c=($a<=>$b)||($a cmp $b);
5734             #return $c if $c;
5735             #printf "%-30s %-30s ",replace($a,$;,','),replace($b,$;,',');
5736 78     78   651 my @a=split $;,$a;
5737 78         491 my @b=split $;,$b;
5738 78         146 for(0..$#a){
5739 117         232 my $c=$a[$_]<=>$b[$_];
5740 117 100 66     307 return $c if $c and "$a[$_]$b[$_]"!~/[iI][nN][fF]|þ/i; # inf(inity)
5741 99         95 $c=$a[$_]cmp$b[$_];
5742 99 100       246 return $c if $c;
5743             }
5744 0         0 return 0;
5745             }
5746              
5747             =head2 tablestring
5748              
5749             B a reference to an array of arrayrefs -- a two dimensional table of strings and numbers
5750              
5751             B a string containing the textual table -- a string of two or more lines
5752              
5753             The first arrayref in the list refers to a list of either column headings (scalar)
5754             or ... (...more later...)
5755              
5756             In this output table:
5757              
5758             - the columns will not be wider than necessary by its widest value (any -tags are removed in every internal width-calculation)
5759              
5760             - multi-lined cell values are handled also
5761              
5762             - and so are html-tags, if the output is to be used inside
-tags on a web page. 
5763              
5764             - columns with just numeric values are right justified (header row excepted)
5765              
5766             Example:
5767              
5768             print tablestring([
5769             [qw/AA BB CCCC/],
5770             [123,23,"d"],
5771             [12,23,34],
5772             [77,88,99],
5773             ["lin\nes",12,"asdff\nfdsa\naa"],[0,22,"adf"]
5774             ]);
5775              
5776             Prints this string of 11 lines:
5777              
5778             AA BB CCCC
5779             --- -- -----
5780             123 23 d
5781             12 23 34
5782             77 8 99
5783            
5784             lin 12 asdff
5785             es fdsa
5786             aa
5787            
5788             10 22 adf
5789              
5790             As you can see, rows containing multi-lined cells gets an empty line before and after the row to separate it more clearly.
5791              
5792             =cut
5793              
5794             sub tablestring {
5795 5     5 1 31 my $tab=shift;
5796 5 50       15 my %o=$_[0] ? %{shift()} : ();
  0         0  
5797 5         8 my $fjern_tom=$o{fjern_tomme_kolonner};
5798 5         9 my $ikke_space=$o{ikke_space};
5799 5   50     24 my $nodup=$o{nodup}||0;
5800 5         9 my $ikke_hodestrek=$o{ikke_hodestrek};
5801 5 50       11 my $pagesize=exists $o{pagesize} ? $o{pagesize}-3 : 9999999;
5802 5         7 my $venstretvang=$o{venstre};
5803 5         5 my(@bredde,@venstre,@hoeyde,@ikketom,@nodup);
5804 5         7 my $hode=1;
5805 5         5 my $i=0;
5806 5         8 my $j;
5807 5         12 for(@$tab){
5808 33         27 $j=0;
5809 33         32 $hoeyde[$i]=0;
5810 33         29 my $nodup_rad=$nodup;
5811 33 50       66 if(ref($_) eq 'ARRAY'){
5812 33         43 for(@$_){
5813 210         184 my $celle=$_;
5814 210   100     352 $bredde[$j]||=0;
5815 210 50 33     429 if($nodup_rad and $i>0 and $$tab[$i][$j] eq $$tab[$i-1][$j] || ($nodup_rad=0)){
      0        
      33        
5816 0 0       0 $celle=$nodup==1?"":$nodup;
5817 0         0 $nodup[$i][$j]=1;
5818             }
5819             else{
5820 210         156 my $hoeyde=0;
5821 210         135 my $bredere;
5822 33     33   175 no warnings;
  33         49  
  33         12903  
5823 210 100 100     622 $ikketom[$j]=1 if !$hode && length($celle)>0;
5824 210         364 for(split("\n",$celle)){
5825 236 50       298 $bredere=/
5826 236         268 s/<[^>]+>//g;
5827 236         162 $hoeyde++;
5828 236         178 s/>/>/g;
5829 236         165 s/</
5830 236 100       391 $bredde[$j]=length($_)+1+$bredere if length($_)+1+$bredere>$bredde[$j];
5831 236 100 100     1252 $venstre[$j]=1 if $_ && !/^\s*[\-\+]?(\d+|\d*\.\d+)\s*\%?$/ && !$hode;
      100        
5832             }
5833 210 100 66     432 if( $hoeyde>1 && !$ikke_space){
5834 25 100       38 $hoeyde++ unless $hode;
5835 25 100 100     50 $hoeyde[$i-1]++ if $i>1 && $hoeyde[$i-1]==1;
5836             }
5837 210 100       329 $hoeyde[$i]=$hoeyde if $hoeyde>$hoeyde[$i];
5838             }
5839 210         196 $j++;
5840             }
5841             }
5842             else{
5843 0         0 $hoeyde[$i]=1;
5844 0         0 $ikke_hodestrek=1;
5845             }
5846 33         34 $hode=0;
5847 33         45 $i++;
5848             }
5849 5         7 $i=$#hoeyde;
5850 5         10 $j=$#bredde;
5851 5 50 33     26 if($i==0 or $venstretvang) { @venstre=map{1}(0..$j) }
  0         0  
  0         0  
5852 5 50       16 else { for(0..$j){ $venstre[$_]=1 if !$ikketom[$_] } }
  33         56  
5853 5         6 my @tabut;
5854 5         8 my $rad_startlinje=0;
5855 5         11 my @overskrift;
5856             my $overskrift_forrige;
5857 5         12 for my $x (0..$i){
5858 33 50       81 if($$tab[$x] eq '-'){
5859 0 0       0 my @tegn=map {$$tab[$x-1][$_]=~/\S/?"-":" "} (0..$j);
  0         0  
5860 0         0 $tabut[$rad_startlinje]=join(" ",map {$tegn[$_] x ($bredde[$_]-1)} (0..$j));
  0         0  
5861             }
5862             else{
5863 33         36 for my $y (0..$j){
5864 210 50 33     337 next if $fjern_tom && !$ikketom[$y];
5865 33     33   168 no warnings;
  33         53  
  33         18322  
5866            
5867 210 0 33     1204 my @celle=
    50          
5868             !$overskrift_forrige&&$nodup&&$nodup[$x][$y]
5869             ?($nodup>0?():((" " x (($bredde[$y]-length($nodup))/2)).$nodup))
5870             :split("\n",$$tab[$x][$y]);
5871 210         335 for(0..($hoeyde[$x]-1)){
5872 261         225 my $linje=$rad_startlinje+$_;
5873 261   100     430 my $txt=shift @celle || '';
5874 261 100 100     1225 $txt=sprintf("%*s",$bredde[$y]-1,$txt) if length($txt)>0 && !$venstre[$y] && ($x>0 || $ikke_hodestrek);
      66        
      66        
5875 261         302 $tabut[$linje].=$txt;
5876 261 100       313 if($y==$j){
5877 42         197 $tabut[$linje]=~s/\s+$//;
5878             }
5879             else{
5880 219         161 my $bredere;
5881 219 50       262 $bredere = $txt=~/
5882 219         230 $txt=~s/<[^>]+>//g;
5883 219         180 $txt=~s/>/>/g;
5884 219         170 $txt=~s/</
5885 219         474 $tabut[$linje].= ' ' x ($bredde[$y]-length($txt)-$bredere);
5886             }
5887             }
5888             }
5889             }
5890 33         40 $rad_startlinje+=$hoeyde[$x];
5891              
5892             #--lage streker?
5893 33 50       58 if(not $ikke_hodestrek){
5894 33 100 33     136 if($x==0){
    50 33        
      0        
5895 5         12 for my $y (0..$j){
5896 33 50 33     99 next if $fjern_tom && !$ikketom[$y];
5897 33         64 $tabut[$rad_startlinje].=('-' x ($bredde[$y]-1))." ";
5898             }
5899 5         8 $rad_startlinje++;
5900 5         18 @overskrift=("",@tabut);
5901             }
5902             elsif(
5903             $x%$pagesize==0 || $nodup>0&&!$nodup[$x+1][$nodup-1]
5904             and $x+1<@$tab
5905             and !$ikke_hodestrek
5906             )
5907             {
5908 0         0 push(@tabut,@overskrift);
5909 0         0 $rad_startlinje+=@overskrift;
5910 0         0 $overskrift_forrige=1;
5911             }
5912             else{
5913 28         37 $overskrift_forrige=0;
5914             }
5915             }
5916             }#for x
5917 5         85 return join("\n",@tabut)."\n";
5918             }
5919              
5920             =head2 serialize
5921              
5922             Returns a data structure as a string. See also C
5923             (serialize was created long time ago before Data::Dumper appeared on
5924             CPAN, before CPAN even...)
5925              
5926             B One to four arguments.
5927              
5928             First argument: A reference to the structure you want.
5929              
5930             Second argument: (optional) The name the structure will get in the output string.
5931             If second argument is missing or is undef or '', it will get no name in the output.
5932              
5933             Third argument: (optional) The string that is returned is also put
5934             into a created file with the name given in this argument. Putting a
5935             C<< > >> char in from of the filename will append that file
5936             instead. Use C<''> or C to not write to a file if you want to
5937             use a fourth argument.
5938              
5939             Fourth argument: (optional) A number signalling the depth on which newlines is used in the output.
5940             The default is infinite (some big number) so no extra newlines are output.
5941              
5942             B A string containing the perl-code definition that makes that data structure.
5943             The input reference (first input argument) can be to an array, hash or a string.
5944             Those can contain other refs and strings in a deep data structure.
5945              
5946             Limitations:
5947              
5948             - Code refs are not handled (just returns C)
5949              
5950             - Regex, class refs and circular recursive structures are also not handled.
5951              
5952             B
5953              
5954             $a = 'test';
5955             @b = (1,2,3);
5956             %c = (1=>2, 2=>3, 3=>5, 4=>7, 5=>11);
5957             %d = (1=>2, 2=>3, 3=>\5, 4=>7, 5=>11, 6=>[13,17,19,{1,2,3,'asdf\'\\\''}],7=>'x');
5958             print serialize(\$a,'a');
5959             print serialize(\@b,'tab');
5960             print serialize(\%c,'c');
5961             print serialize(\%d,'d');
5962             print serialize(\("test'n roll",'brb "brb"'));
5963             print serialize(\%d,'d',undef,1);
5964              
5965             Prints accordingly:
5966              
5967             $a='test';
5968             @tab=('1','2','3');
5969             %c=('1','2','2','3','3','5','4','7','5','11');
5970             %d=('1'=>'2','2'=>'3','3'=>\'5','4'=>'7','5'=>'11','6'=>['13','17','19',{'1'=>'2','3'=>'asdf\'\\\''}]);
5971             ('test\'n roll','brb "brb"');
5972             %d=('1'=>'2',
5973             '2'=>'3',
5974             '3'=>\'5',
5975             '4'=>'7',
5976             '5'=>'11',
5977             '6'=>['13','17','19',{'1'=>'2','3'=>'asdf\'\\\''}],
5978             '7'=>'x');
5979              
5980             Areas of use:
5981              
5982             - Debugging (first and foremost)
5983              
5984             - Storing arrays and hashes and data structures of those on file, database or sending them over the net
5985              
5986             - eval earlier stored string to get back the data structure
5987              
5988             Be aware of the security implications of Cing a perl code string
5989             stored somewhere that unauthorized users can change them! You are
5990             probably better of using L or L without
5991             enabling the CODE-options if you have such security issues.
5992             More on decompiling Perl-code: L or L.
5993              
5994             =head2 dserialize
5995              
5996             Debug-serialize, dumping data structures for you to look at.
5997              
5998             Same as C but the output is given a newline every 80th character.
5999             (Every 80th or whatever C<$Acme::Tools::Dserialize_width> contains)
6000              
6001             =cut
6002              
6003             our $Dserialize_width=80;
6004 0   0 0   0 sub _kallstack { my $tilbake=shift||0; my @c; my $ret; $ret.=serialize(\@c,"caller$tilbake") while @c=caller(++$tilbake); $ret }
  0         0  
  0         0  
  0         0  
6005 0     0 1 0 sub dserialize{join "\n",serialize(@_)=~/(.{1,$Dserialize_width})/gs}
6006             sub serialize {
6007 33     33   163 no warnings;
  33         42  
  33         109759  
6008 1421     1421 1 3664 my($r,$name,$filename,$level)=@_;
6009 1421   100     2471 my @r=(undef,undef,($level||0)-1);
6010 1421 50       1628 if($filename){
6011 0 0       0 open my $fh, '>', $filename or croak("FEIL: could not open file $filename\n" . _kallstack());
6012 0         0 my $ret=serialize($r,$name,undef,$level);
6013 0         0 print $fh "$ret\n1;\n";
6014 0         0 close($fh);
6015 0         0 return $ret;
6016             }
6017 1421 100       2007 if(ref($r) eq 'SCALAR'){
    100          
    100          
    100          
    50          
    0          
    0          
    0          
    0          
6018 1151 50       1346 return "\$$name=".serialize($r,@r).";\n" if $name;
6019 1151 100       1367 return "undef" unless defined $$r;
6020 1143         862 my $ret=$$r;
6021 1143         1063 $ret=~s/\\/\\\\/g;
6022 1143         785 $ret=~s/\'/\\'/g;
6023 1143         2296 return "'$ret'";
6024             }
6025             elsif(ref($r) eq 'ARRAY'){
6026 76 100       125 return "\@$name=".serialize($r,@r).";\n" if $name;
6027 74         69 my $ret="(";
6028 74         97 for(@$r){
6029 392         535 $ret.=serialize(\$_,@r).",";
6030 392 100       690 $ret.="\n" if $level>=0;
6031             }
6032 74         199 $ret=~s/,$//;
6033 74         90 $ret.=")";
6034 74 50       116 $ret.=";\n" if $name;
6035 74         207 return $ret;
6036             }
6037             elsif(ref($r) eq 'HASH'){
6038 31 100       172 return "\%$name=".serialize($r,@r).";\n" if $name;
6039 21         23 my $ret="(";
6040 21         84 for(sort keys %$r){
6041 73         119 $ret.=serialize(\$_,@r)."=>".serialize(\$$r{$_},@r).",";
6042 73 50       129 $ret.="\n" if $level>=0;
6043             }
6044 21         75 $ret=~s/,$//;
6045 21         32 $ret.=")";
6046 21 50       34 $ret.=";\n" if $name;
6047 21         76 return $ret;
6048             }
6049             elsif(ref($$r) eq 'ARRAY'){
6050 70 50       90 return "\@$name=".serialize($r,@r).";\n" if $name;
6051 70         54 my $ret="[";
6052 70         72 for(@$$r){
6053 188         213 $ret.=serialize(\$_,@r).",";
6054 188 50 33     613 $ret.="\n" if !defined $level or $level>=0;
6055             }
6056 70         213 $ret=~s/,$//;
6057 70         58 $ret.="]";
6058 70 50       90 $ret.=";\n" if $name;
6059 70         129 return $ret;
6060             }
6061             elsif(ref($$r) eq 'HASH'){
6062 93 50       133 return "\%$name=".serialize($r,@r).";\n" if $name;
6063 93         76 my $ret="{";
6064 93         288 for(sort keys %$$r){
6065 294         367 $ret.=serialize(\$_,@r)."=>".serialize(\$$$r{$_},@r).",";
6066 294 50       519 $ret.="\n" if $level>=0;
6067             }
6068 93         223 $ret=~s/,$//;
6069 93         76 $ret.="}";
6070 93 50       117 $ret.=";\n" if $name;
6071 93         193 return $ret;
6072             }
6073             elsif(ref($$r) eq 'SCALAR'){
6074 0         0 return "\\".serialize($$r,@r);
6075             }
6076             elsif(ref($r) eq 'LVALUE'){
6077 0         0 my $s="$$r";
6078 0         0 return serialize(\$s,@r);
6079             }
6080             elsif(ref($$r) eq 'CODE'){
6081             #warn "Tried to serialize CODE";
6082 0         0 return 'sub{croak "Can not serialize CODE-references, see perhaps B::Deparse and Storable"}'
6083             }
6084             elsif(ref($$r) eq 'GLOB'){
6085 0         0 warn "Tried to serialize a GLOB";
6086 0         0 return '\*STDERR'
6087             }
6088             else{
6089 0         0 my $tilbake;
6090 0         0 my($pakke,$fil,$linje,$sub,$hasargs,$wantarray);
6091 0   0     0 ($pakke,$fil,$linje,$sub,$hasargs,$wantarray)=caller($tilbake++) until $sub ne 'serialize' || $tilbake>20;
6092 0         0 croak("serialize() argument should be reference!\n".
6093             "\$r=$r\n".
6094             "ref(\$r) = ".ref($r)."\n".
6095             "ref(\$\$r) = ".ref($$r)."\n".
6096             "kallstack:\n". _kallstack());
6097             }
6098             }
6099              
6100             =head2 srlz
6101              
6102             Synonym to L, but remove unnecessary single quote chars around
6103             C<< \w+ >>-keys and number values (except numbers with leading zeros). Example:
6104              
6105             serialize:
6106              
6107             %s=('action'=>{'del'=>'0','ins'=>'0','upd'=>'18'},'post'=>'1348','pre'=>'1348',
6108             'updcol'=>{'Laerestednr'=>'18','Studietypenr'=>'18','Undervisningssted'=>'7','Url'=>'11'},
6109             'where'=>'where 1=1');
6110              
6111             srlz:
6112              
6113             %s=(action=>{del=>0,ins=>0,upd=>18},post=>1348,pre=>1348,
6114             updcol=>{Laerestednr=>18,Studietypenr=>18,Undervisningssted=>7,Url=>11},
6115             where=>'where 1=1');
6116              
6117             Todo: update L to do the same, but in the right way. (For now
6118             srlz runs the string from serialize() through two C<< s/// >>, this will break
6119             in certain cases). L will be kept as a synonym (or the other way around).
6120              
6121             =cut
6122              
6123             sub srlz {
6124 0     0 1 0 my $s=serialize(@_);
6125 0         0 $s=~s,'(\w+)'=>,$1=>,g;
6126 0         0 $s=~s,=>'((0|[1-9]\d*)(\.\d+)?(e[-+]?\d+)?)',=>$1,gi; #ikke ledende null! hm
6127 0         0 $s;
6128             }
6129              
6130             =head2 cnttbl
6131              
6132             my %nordic_country_population=(Norway=>5214890,Sweden=>9845155,Denmark=>5699220,Finland=>5496907,Iceland=>331310);
6133             print cnttbl(\%nordic_country_population);
6134             Iceland 331310 1.25%
6135             Norway 5214890 19.61%
6136             Finland 5496907 20.67%
6137             Denmark 5699220 21.44%
6138             Sweden 9845155 37.03%
6139             SUM 26587482 100.00%
6140              
6141             Todo: Levels...:
6142              
6143             my %sales=(
6144             Toyota=>{Prius=>19,RAV=>12,Auris=>18,Avensis=>7},
6145             Volvo=>{V40=>14, XC90=>4},
6146             Nissan=>{Leaf=>19,Qashqai=>17},
6147             Tesla=>{ModelS=>8}
6148             );
6149             print cnttbl(\%sales);
6150             Toyota SUM 56
6151             Volvo SUM 18
6152             Nissan SUM 36
6153             Tesla SUM 8
6154             SUM SUM 56 100%
6155              
6156             =cut
6157              
6158             sub cnttbl {
6159 6     6 1 365 my $hr=shift;
6160 6         33 my $maxlen=max(3,map length($_),keys%$hr);
6161             join"",ref((values%$hr)[0])
6162 4         4 ?do{ map {my$o=$_;join("",map rpad($$o[0],$maxlen)." $_\n",split("\n",$$o[1]))}
  4         13  
6163 1         8 map [$_,cnttbl($$hr{$_})],
6164             sort keys%$hr }
6165 6 100       14 :do{ my $sum=sum(values%$hr);
  5         9  
6166 5         12 my $fmt=repl("%-xs %yd %6.2f%%\n",x=>$maxlen,y=>length($sum));
6167             map sprintf($fmt,@$_,100*$$_[1]/$sum),
6168 5 50       27 (map[$_,$$hr{$_}],sort{$$hr{$a}<=>$$hr{$b} or $a cmp $b}keys%$hr),
  15         156  
6169             (['SUM',$sum]) }
6170             }
6171              
6172             =head2 nicenum
6173              
6174             print 14.3 - 14.0; # 0.300000000000001
6175             print 34.3 - 34.0; # 0.299999999999997
6176             print nicenum( 14.3 - 14.0 ); # 0.3
6177             print nicenum( 34.3 - 34.0 ); # 0.3
6178              
6179             =cut
6180              
6181 2     2 1 3 sub nicenum { my $n=shift; $n=~s,((\d)\2\2\2\2\2\2\2)\d$,$1$2$2$2$2$2$2$2$2$2$2$2,; 0+$n }
  2         41  
  2         13  
6182              
6183              
6184             =head2 sys
6185              
6186             Call instead of C if you want C (Carp::croak) when something fails.
6187              
6188             sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }
6189              
6190              
6191             =cut
6192              
6193 11 50   11 1 21 sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }
  11         169247  
  11         369  
6194              
6195             =head2 recursed
6196              
6197             Returns true or false (actually 1 or 0) depending on whether the
6198             current sub has been called by itself or not.
6199              
6200             sub xyz
6201             {
6202             xyz() if not recursed;
6203              
6204             }
6205              
6206             =cut
6207              
6208 0 0   0 1 0 sub recursed {(caller(1))[3] eq (caller(2))[3]?1:0}
6209              
6210              
6211              
6212             =head2 ed
6213              
6214             String editor commands
6215              
6216             literals: a-z 0-9 space
6217             move cursor: FBAEPN MF MB ME
6218             delete: D Md
6219             up/low/camelcase word U L C
6220             backspace: -
6221             search: S
6222             return/enter: R
6223             meta/esc/alt: M
6224             shift: T
6225             cut to eol: K
6226             caps lock: C
6227             yank: Y
6228             start and end: < >
6229             macro start/end/play: { } !
6230             times for next cmd: M (i.e. M24a inserts 24 a's)
6231              
6232             (TODO: alfa...and more docs needed)
6233              
6234             =cut
6235              
6236             our $Edcursor;
6237             sub ed {
6238 18     18 1 3029 my($s,$cs,$p,$buf)=@_; #string, commands, point (or cursor)
6239 18 100       45 return $$s=ed($$s,$cs,$p,$buf) if ref($s);
6240 17         20 my($sh,$cl,$m,$t,@m)=(0,0,0,undef);
6241 17         29 while(length($cs)){
6242 121         75 my $n = 0;
6243 121 50       304 my $c = $cs=~s,^(M\d+|M.|""|".+?"|S.+?R|\\.|.),,s ? $1 : die;
6244 121   100     228 $p = curb($p||0,0,length($s));
6245 121 100       142 if(defined$t){$cs="".($c x $t).$cs;$t=undef;next}
  1         4  
  1         1  
  1         2  
6246 120     81   226 my $add=sub{substr($s,$p,0)=$_[0];$p+=length($_[0])};
  81         86  
  81         61  
6247 120 50       471 if ($c =~ /^([a-z0-9 ])/){ &$add($sh^$cl?uc($1):$1); $sh=0 }
  65 100       87  
  65 100       44  
    100          
    100          
    100          
    100          
    100          
    50          
    100          
    100          
    100          
    100          
    100          
    50          
    100          
    100          
    100          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    100          
    100          
    100          
    50          
6248 1         2 elsif($c =~ /^"(.+)"$/) { &$add($1) }
6249 1         2 elsif($c =~ /^\\(.)/) { &$add($1) }
6250 2 50       4 elsif($c =~ /^S(.+)R/) { my $i=index($s,$1,$p);$p=$i+length($1) if $i>=0 }
  2         6  
6251 1         2 elsif($c =~ /^M(\d+)/) { $t=$1; next }
  1         3  
6252 2         3 elsif($c eq 'F') { $p++ }
6253 1         1 elsif($c eq 'B') { $p-- }
6254 0   0     0 elsif($c eq 'A') { $p-- while $p>0 and substr($s,$p-1,2)!~/^\n/ }
6255 8 50       28 elsif($c eq 'E') { substr($s,$p)=~/(.*)/ and $p+=length($1) }
6256 4         4 elsif($c eq 'D') { substr($s,$p,1)='' }
6257 3 50       16 elsif($c eq 'MD'){ substr($s,$p)=~s/^(\W*\w+)// and $buf=$1 }
6258 3 50       10 elsif($c eq 'MF'){ substr($s,$p)=~/(\W*\w+)/ and $p+=length($1) }
6259 3 50       16 elsif($c eq 'MB'){ substr($s,0,$p)=~/(\w+\W*)$/ and $p-=length($1) }
6260 0 0       0 elsif($c eq '-') { substr($s,--$p,1)='' if $p }
6261 5 50       30 elsif($c eq 'M-'){ substr($s,0,$p)=~s/(\w+\W*)$// and $p-=length($buf=$1)}
6262 3 50       19 elsif($c eq 'K') { substr($s,$p)=~s/(\S.+|\s*?\n)// and $buf=$1 }
6263 12         14 elsif($c eq 'Y') { &$add($buf) }
6264 0         0 elsif($c eq 'U') { substr($s,$p)=~s/(\W*)(\w+)/$1\U$2\E/; $p+=length($1.$2) }
  0         0  
6265 0         0 elsif($c eq 'L') { substr($s,$p)=~s/(\W*)(\w+)/$1\L$2\E/; $p+=length($1.$2) }
  0         0  
6266 0         0 elsif($c eq 'C') { substr($s,$p)=~s/(\W*)(\w+)/$1\u\L$2\E/; $p+=length($1.$2) }
  0         0  
6267 0         0 elsif($c eq '<') { $p=0 }
6268 0         0 elsif($c eq '>') { $p=length($s) }
6269 0         0 elsif($c eq 'T') { $sh=1 }
6270 0         0 elsif($c eq 'C') { $cl^=1 }
6271 1         2 elsif($c eq '{') { $m=1; @m=() }
  1         1  
6272 1         2 elsif($c eq '}') { $m=0 }
6273 2 50 33     12 elsif($c eq '!') { $m||!@m and die"ed: no macro"; $cs=join("",@m).$cs }
  2         3  
6274 2         3 elsif($c eq '""'){ &$add('"') }
6275 0         0 else { croak "ed: Unknown cmd '$c'\n" }
6276 119 100 100     347 push @m, $c if $m and $c ne '{';
6277             #warn serialize([$c,$m,$cs],'d');
6278             }
6279 17         15 $Edcursor=$p;
6280 17         36 $s;
6281             }
6282              
6283             #todo: sub unbless eller sub damn
6284             #todo: ..se også: use Data::Structure::Util qw/unbless/;
6285             #todo: ...og: Acme::Damn sin damn()
6286             #todo? sub swap($$) http://www.idg.no/computerworld/article242008.ece
6287             #todo? catal
6288             #todo?
6289             #void quicksort(int t, int u) int i, m; if (t >= u) return; swap(t, randint(t, u)); m = t; for (i = t + 1; i <= u; i++) if (x[i] < x[t]) swap(++m, i); swap(t, m) quicksort(t, m-1); quicksort(m+1, u);
6290              
6291              
6292             =head1 JUST FOR FUN
6293              
6294             =head2 brainfu
6295              
6296             B one or two arguments
6297              
6298             First argument: a string, source code of the brainfu
6299             language. String containing the eight charachters + - < > [ ] . ,
6300             Every other char is ignored silently.
6301              
6302             Second argument: if the source code contains commas (,) the second
6303             argument is the input characters in a string.
6304              
6305             B The resulting output from the program.
6306              
6307             Example:
6308              
6309             print brainfu(<<""); #prints "Hallo Verden!\n"
6310             ++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>---.+++++++++++..+++.>++.<<++++++++++++++
6311             .>----------.+++++++++++++.--------------.+.+++++++++.>+.>.
6312              
6313             See L
6314              
6315             =head2 brainfu2perl
6316              
6317             Just as L but instead it return the perl code to which the
6318             brainfu code is translated. Just C<< eval() >> this perl code to run.
6319              
6320             Example:
6321              
6322             print brainfu2perl('>++++++++[<++++++++>-]<++++++++.>++++++[<++++++>-]<---.');
6323              
6324             Prints this string:
6325              
6326             my($c,$o,@b)=(0); sub out{$o.=chr($b[$c]) for 1..$_[0]||1}
6327             ++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6328             while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6329             ++$b[$c];++$c;--$b[$c];}--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6330             ++$b[$c];++$b[$c];out;++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6331             while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$c;--$b[$c];}
6332             --$c;--$b[$c];--$b[$c];--$b[$c];out;$o;
6333              
6334             =head2 brainfu2perl_optimized
6335              
6336             Just as L but optimizes the perl code. The same
6337             example as above with brainfu2perl_optimized returns this equivalent
6338             but shorter perl code:
6339              
6340             $b[++$c]+=8;while($b[$c]){$b[--$c]+=8;--$b[++$c]}$b[--$c]+=8;out;$b[++$c]+=6;
6341             while($b[$c]){$b[--$c]+=6;--$b[++$c]}$b[--$c]-=3;out;$o;
6342              
6343             =cut
6344              
6345 3     3 1 489 sub brainfu { eval(brainfu2perl(@_)) }
6346              
6347             sub brainfu2perl {
6348 6     6 1 17 my($bf,$inp)=@_;
6349 17         48 my $perl='my($c,$inp,$o,@b)=(0,\''.$inp.'\'); no warnings; sub out{$o.=chr($b[$c]) for 1..$_[0]||1}'."\n";
6350 3 100 66     17 $perl.='sub inp{$inp=~s/(.)//s and $b[$c]=ord($1)}'."\n" if $inp and $bf=~/,/;
6351 3 100       516 $perl.=join("",map/\+/?'++$b[$c];':/\-/?'--$b[$c];':/\[/?'while($b[$c]){':/\]/?'}':/>/?'++$c;':/
    100          
    100          
    100          
    100          
    100          
    100          
    100          
6352 3         245 $perl;
6353             }
6354              
6355             sub brainfu2perl_optimized {
6356 0     14 1 0 my $perl=brainfu2perl(@_);
6357 0         0 $perl=~s{(((\+|\-)\3\$b\[\$c\];){2,})}{ '$b[$c]'.$3.'='.(grep/b/,split//,$1).';' }gisex;
  0         0  
6358 0         0 1 while $perl=~s/\+\+\$c;\-\-\$c;//g + $perl=~s/\-\-\$c;\+\+\$c;//g;
6359 0         0 $perl=~s{((([\-\+])\3\$c;){2,})}{"\$c$3=".(grep/c/,split//,$1).';'}gisex;
  0         0  
6360 0 0       0 $perl=~s{((\+\+|\-\-)\$c;([^;{}]+;))}{my($o,$s)=($2,$3);$s=~s/\$c/$o\$c/?$s:$1}ge;
  0         0  
  0         0  
6361 0         0 $perl=~s/\$c(\-|\+)=(\d+);(\+\+|\-\-)\$b\[\$c\]/$3.'$b[$c'.$1.'='.$2.'];'/ge;
  0         0  
6362 0         0 $perl=~s{((out;){2,})}{'out('.(grep/o/,split//,$1).');'}ge;
  0         0  
6363 0         0 $perl=~s/;}/}/g;$perl=~s/;+/;/g;
  0         0  
6364 0         0 $perl;
6365             }
6366              
6367              
6368             =head1 BLOOM FILTER SUBROUTINES
6369              
6370             Bloom filters can be used to check whether an element (a string) is a
6371             member of a large set using much less memory or disk space than other
6372             data structures. Trading speed and accuracy for memory usage. While
6373             risking false positives, Bloom filters have a very strong space
6374             advantage over other data structures for representing sets.
6375              
6376             In the example below, a set of 100000 phone numbers (or any string of
6377             any length) can be "stored" in just 91230 bytes if you accept that you
6378             can only check the data structure for existence of a string and accept
6379             false positives with an error rate of 0.03 (that is three percent, error
6380             rates are given in numbers larger than 0 and smaller than 1).
6381              
6382             You can not retrieve the strings in the set without using "brute
6383             force" methods and even then you would get slightly more strings than
6384             you put in because of the error rate inaccuracy.
6385              
6386             Bloom Filters have many uses.
6387              
6388             See also: L
6389              
6390             See also: L
6391              
6392             =head2 bfinit
6393              
6394             Initialize a new Bloom Filter:
6395              
6396             my $bf = bfinit( error_rate=>0.01, capacity=>100000 );
6397              
6398             The same:
6399              
6400             my $bf = bfinit( 0.01, 100000 );
6401              
6402             since two arguments is interpreted as error_rate and capacity accordingly.
6403              
6404              
6405             =head2 bfadd
6406              
6407             bfadd($bf, $_) for @phone_numbers; # Adding strings one at a time
6408              
6409             bfadd($bf, @phone_numbers); # ...or all at once (faster)
6410              
6411             Returns 1 on success. Dies (croaks) if more strings than capacity is added.
6412              
6413             =head2 bfcheck
6414              
6415             my $phone_number="97713246";
6416             if ( bfcheck($bf, $phone_number) ) {
6417             print "Yes, $phone_number was PROBABLY added\n";
6418             }
6419             else{
6420             print "No, $phone_number was DEFINITELY NOT added\n";
6421             }
6422              
6423             Returns true if C<$phone_number> exists in C<@phone_numbers>.
6424              
6425             Returns false most of the times, but sometimes true*), if C<$phone_number> doesn't exists in C<@phone_numbers>.
6426              
6427             *) This is called a false positive.
6428              
6429             Checking more than one key:
6430              
6431             @bools = bfcheck($bf, @keys); # or ...
6432             @bools = bfcheck($bf, \@keys); # better, uses less memory if @keys is large
6433              
6434             Returns an array the same size as @keys where each element is true or false accordingly.
6435              
6436             =head2 bfgrep
6437              
6438             Same as C except it returns the keys that exists in the bloom filter
6439              
6440             @found = bfgrep($bf, @keys); # or ...
6441             @found = bfgrep($bf, \@keys); # better, uses less memory if @keys is large, or ...
6442             @found = grep bfcheck($bf,$_), @keys; # same but slower
6443              
6444             =head2 bfgrepnot
6445              
6446             Same as C except it returns the keys that do NOT exists in the bloom filter:
6447              
6448             @not_found = bfgrepnot($bf, @keys); # or ...
6449             @not_found = bfgrepnot($bf, \@keys); # better, uses less memory if @keys is large, or ...
6450             @not_found = grep !bfcheck($bf,$_), @keys); # same but slower
6451              
6452             =head2 bfdelete
6453              
6454             Deletes from a counting bloom filter.
6455              
6456             To enable deleting be sure to initialize the bloom filter with the
6457             numeric C argument. The number of bits could be 2 or 3*)
6458             for small filters with a small capacity (a small number of keys), but
6459             setting the number to 4 ensures that even very large filters with very
6460             small error rates would not overflow.
6461              
6462             *) Acme::Tools do not currently support C<< counting_bits => 3 >> so 4
6463             and 8 are the only practical alternatives where 8 is almost always overkill.
6464              
6465             my $bf=bfinit(
6466             error_rate => 0.001,
6467             capacity => 10000000,
6468             counting_bits => 4 # power of 2, that is 2, 4, 8, 16 or 32
6469             );
6470             bfadd( $bf, @unique_phone_numbers);
6471             bfdelete($bf, @unique_phone_numbers);
6472              
6473             Example: examine the frequency of the counters with 4 bit counters and 4 million keys:
6474              
6475             my $bf=bfinit( error_rate=>0.001, capacity=>4e6, counting_bits=>4 );
6476             bfadd($bf,[1e3*$_+1 .. 1e3*($_+1)]) for 0..4000-1; # adding 4 million keys one thousand at a time
6477             my %c; $c{vec($$bf{filter},$_,$$bf{counting_bits})}++ for 0..$$bf{filterlength}-1;
6478             printf "%8d counters = %d\n",$c{$_},$_ for sort{$a<=>$b}keys%c;
6479              
6480             The output:
6481              
6482             28689562 counters = 0
6483             19947673 counters = 1
6484             6941082 counters = 2
6485             1608250 counters = 3
6486             280107 counters = 4
6487             38859 counters = 5
6488             4533 counters = 6
6489             445 counters = 7
6490             46 counters = 8
6491             1 counters = 9
6492              
6493             Even after the error_rate is changed from 0.001 to a percent of that, 0.00001, the limit of 16 (4 bits) is still far away:
6494              
6495             47162242 counters = 0
6496             33457237 counters = 1
6497             11865217 counters = 2
6498             2804447 counters = 3
6499             497308 counters = 4
6500             70608 counters = 5
6501             8359 counters = 6
6502             858 counters = 7
6503             65 counters = 8
6504             4 counters = 9
6505              
6506             In algorithmic terms the number of bits needed is C. Thats why 4 bits (counters up
6507             to 15) is "always" good enough except for extremely large capasities or extremely small error rates.
6508             (Except when adding the same key many times, which should be avoided, and Acme::Tools::bfadd do not
6509             check for that, perhaps in future versions).
6510              
6511             Bloom filters of the counting type are not very space efficient: The tables above shows that 84%-85%
6512             of the counters are 0 or 1. This means most bits are zero-bits. This doesn't have to be a problem if
6513             a counting bloom filter is used to be sent over slow networks because they are very compressable by
6514             common compression tools like I or L and such.
6515              
6516             Deletion of non-existing keys makes C die (croak).
6517              
6518             =head2 bfdelete
6519              
6520             Deletes from a counting bloom filter:
6521              
6522             bfdelete($bf, @keys);
6523             bfdelete($bf, \@keys);
6524              
6525             Returns C<$bf> after deletion.
6526              
6527             Croaks (dies) on deleting a non-existing key or deleting from an previouly overflown counter in a counting bloom filter.
6528              
6529             =head2 bfaddbf
6530              
6531             Adds another bloom filter to a bloom filter.
6532              
6533             Bloom filters has the proberty that bit-wise I-ing the bit-filters
6534             of two filters with the same capacity and the same number and type of
6535             hash functions, adds the filters:
6536              
6537             my $bf1=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[1..500]);
6538             my $bf2=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[501..1000]);
6539              
6540             bfaddbf($bf1,$bf2);
6541              
6542             print "Yes!" if bfgrep($bf1, 1..1000) == 1000;
6543              
6544             Prints yes since C now returns an array of all the 1000 elements.
6545              
6546             Croaks if the filters are of different dimensions.
6547              
6548             Works for counting bloom filters as well (C<< counting_bits=>4 >> e.g.)
6549              
6550             =head2 bfsum
6551              
6552             Returns the number of 1's in the filter.
6553              
6554             my $percent=100*bfsum($bf)/$$bf{filterlength};
6555             printf "The filter is %.1f%% filled\n",$percent; #prints 50.0% or so if filled to capacity
6556              
6557             Sums the counters for counting bloom filters (much slower than for non counting).
6558              
6559             =head2 bfdimensions
6560              
6561             Input, two numeric arguments: Capacity and error_rate.
6562              
6563             Outputs an array of two numbers: m and k.
6564              
6565             m = - n * log(p) / log(2)**2 # n = capacity, m = bits in filter (divide by 8 to get bytes)
6566             k = log(1/p) / log(2) # p = error_rate, uses perls internal log() with base e (2.718)
6567              
6568             ...that is: m = the best number of bits in the filter and k = the best
6569             number of hash functions optimized for the given capacity (n) and
6570             error_rate (p). Note that k is a dependent only of the error_rate. At
6571             about two percent error rate the bloom filter needs just the same
6572             number of bytes as the number of keys.
6573              
6574             Storage (bytes):
6575             Capacity Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate
6576             0.000000001 0.00000001 0.0000001 0.000001 0.00001 0.0001 0.001 0.01 0.02141585 0.1 0.5 0.99
6577             ------------- ----------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
6578             10 54.48 48.49 42.5 36.51 30.52 24.53 18.53 12.54 10.56 6.553 2.366 0.5886
6579             100 539.7 479.8 419.9 360 300.1 240.2 180.3 120.4 100.6 60.47 18.6 0.824
6580             1000 5392 4793 4194 3595 2996 2397 1798 1199 1001 599.6 180.9 3.177
6581             10000 5.392e+04 4.793e+04 4.194e+04 3.594e+04 2.995e+04 2.396e+04 1.797e+04 1.198e+04 1e+04 5991 1804 26.71
6582             100000 5.392e+05 4.793e+05 4.193e+05 3.594e+05 2.995e+05 2.396e+05 1.797e+05 1.198e+05 1e+05 5.991e+04 1.803e+04 262
6583             1000000 5.392e+06 4.793e+06 4.193e+06 3.594e+06 2.995e+06 2.396e+06 1.797e+06 1.198e+06 1e+06 5.991e+05 1.803e+05 2615
6584             10000000 5.392e+07 4.793e+07 4.193e+07 3.594e+07 2.995e+07 2.396e+07 1.797e+07 1.198e+07 1e+07 5.991e+06 1.803e+06 2.615e+04
6585             100000000 5.392e+08 4.793e+08 4.193e+08 3.594e+08 2.995e+08 2.396e+08 1.797e+08 1.198e+08 1e+08 5.991e+07 1.803e+07 2.615e+05
6586             1000000000 5.392e+09 4.793e+09 4.193e+09 3.594e+09 2.995e+09 2.396e+09 1.797e+09 1.198e+09 1e+09 5.991e+08 1.803e+08 2.615e+06
6587             10000000000 5.392e+10 4.793e+10 4.193e+10 3.594e+10 2.995e+10 2.396e+10 1.797e+10 1.198e+10 1e+10 5.991e+09 1.803e+09 2.615e+07
6588             100000000000 5.392e+11 4.793e+11 4.193e+11 3.594e+11 2.995e+11 2.396e+11 1.797e+11 1.198e+11 1e+11 5.991e+10 1.803e+10 2.615e+08
6589             1000000000000 5.392e+12 4.793e+12 4.193e+12 3.594e+12 2.995e+12 2.396e+12 1.797e+12 1.198e+12 1e+12 5.991e+11 1.803e+11 2.615e+09
6590              
6591             Error rate: 0.99 Hash functions: 1
6592             Error rate: 0.5 Hash functions: 1
6593             Error rate: 0.1 Hash functions: 3
6594             Error rate: 0.0214158522653385 Hash functions: 6
6595             Error rate: 0.01 Hash functions: 7
6596             Error rate: 0.001 Hash functions: 10
6597             Error rate: 0.0001 Hash functions: 13
6598             Error rate: 0.00001 Hash functions: 17
6599             Error rate: 0.000001 Hash functions: 20
6600             Error rate: 0.0000001 Hash functions: 23
6601             Error rate: 0.00000001 Hash functions: 27
6602             Error rate: 0.000000001 Hash functions: 30
6603              
6604             =head2 bfstore
6605              
6606             Storing and retrieving bloom filters to and from disk uses Ls C and C. This:
6607              
6608             bfstore($bf,'filename.bf');
6609              
6610             It the same as:
6611              
6612             use Storable qw(store retrieve);
6613             ...
6614             store($bf,'filename.bf');
6615              
6616             =head2 bfretrieve
6617              
6618             This:
6619              
6620             my $bf=bfretrieve('filename.bf');
6621              
6622             Or this:
6623              
6624             my $bf=bfinit('filename.bf');
6625              
6626             Is the same as:
6627              
6628             use Storable qw(store retrieve);
6629             my $bf=retrieve('filename.bf');
6630              
6631             =head2 bfclone
6632              
6633             Deep copies the bloom filter data structure. (Which btw is not very deep, two levels at most)
6634              
6635             This:
6636              
6637             my $bfc = bfclone($bf);
6638              
6639             Works just as:
6640              
6641             use Storable;
6642             my $bfc=Storable::dclone($bf);
6643              
6644             =head2 Object oriented interface to bloom filters
6645              
6646             use Acme::Tools;
6647             my $bf=new Acme::Tools::BloomFilter(0.1,1000); # the same as bfinit, see bfinit above
6648             print ref($bf),"\n"; # prints Acme::Tools::BloomFilter
6649             $bf->add(@keys);
6650             $bf->check($keys[0]) and print "ok\n"; # prints ok
6651             $bf->grep(\@keys)==@keys and print "ok\n"; # prints ok
6652             $bf->store('filename.bf');
6653             my $bf2=bfretrieve('filename.bf');
6654             $bf2->check($keys[0]) and print "ok\n"; # still ok
6655              
6656             $bf2=$bf->clone();
6657              
6658             To instantiate a previously stored bloom filter:
6659              
6660             my $bf = Acme::Tools::BloomFilter->new( '/path/to/stored/bloomfilter.bf' );
6661              
6662             The o.o. interface has the same methods as the C-subs without the
6663             C-prefix in the names. The C is not available as a
6664             method, although C, C and
6665             C are synonyms.
6666              
6667             =head2 Internals and speed
6668              
6669             The internal hash-functions are C<< md5( "$key$salt" ) >> from L.
6670              
6671             Since C returns 128 bits and most medium to large sized bloom
6672             filters need only a 32 bit hash function, the result from md5() are
6673             split (C-ed) into 4 parts 32 bits each and are treated as if 4
6674             hash functions was called at once (speedup). Using different salts to
6675             the key on each md5 results in different hash functions.
6676              
6677             Digest::SHA512 would have been even better since it returns more bits,
6678             if it werent for the fact that it's much slower than Digest::MD5.
6679              
6680             String::CRC32::crc32 is faster than Digest::MD5, but not 4 times faster:
6681              
6682             time perl -e'use Digest::MD5 qw(md5);md5("asdf$_") for 1..10e6' #5.56 sec
6683             time perl -e'use String::CRC32;crc32("asdf$_") for 1..10e6' #2.79 sec, faster but not per bit
6684             time perl -e'use Digest::SHA qw(sha512);sha512("asdf$_") for 1..10e6' #36.10 sec, too slow (sha1, sha224, sha256 and sha384 too)
6685              
6686             Md5 seems to be an ok choice both for speed and avoiding collitions due to skewed data keys.
6687              
6688             =head2 Theory and math behind bloom filters
6689              
6690             L
6691              
6692             L
6693              
6694             L
6695              
6696             See also Scaleable Bloom Filters: L (not implemented in Acme::Tools)
6697              
6698             ...and perhaps L
6699              
6700             =cut
6701              
6702             sub bfinit {
6703 24 100   24 1 8141 return bfretrieve(@_) if @_==1;
6704 23 50 66     90 return bfinit(error_rate=>$_[0], capacity=>$_[1]) if @_==2 and 0<$_[0] and $_[0]<1 and $_[1]>1;
      66        
      33        
6705 21 0 33     47 return bfinit(error_rate=>$_[1], capacity=>$_[0]) if @_==2 and 0<$_[1] and $_[1]<1 and $_[0]>1;
      33        
      0        
6706 21         113 require Digest::MD5;
6707 21 50       41 @_%2&&croak "Arguments should be a hash of equal number of keys and values";
6708 21         68 my %arg=@_;
6709 21         57 my @ok_param=qw/error_rate capacity min_hashfuncs max_hashfuncs hashfuncs counting_bits adaptive keys/;
6710 21         79 my @not_ok=sort(grep!in($_,@ok_param),keys%arg);
6711 21 100       212 croak "Not ok param to bfinit: ".join(", ",@not_ok) if @not_ok;
6712 20 50 66     71 croak "Not an arrayref in keys-param" if exists $arg{keys} and ref($arg{keys}) ne 'ARRAY';
6713 20 50       56 croak "Not implemented counting_bits=$arg{counting_bits}, should be 2, 4, 8, 16 or 32" if !in(nvl($arg{counting_bits},1),1,2,4,8,16,32);
6714 20 50 33     52 croak "An bloom filters here can not be in both adaptive and counting_bits modes" if $arg{adaptive} and $arg{counting_bits}>1;
6715 20         129 my $bf={error_rate => 0.001, #default p
6716             capacity => 100000, #default n
6717             min_hashfuncs => 1,
6718             max_hashfuncs => 100,
6719             counting_bits => 1, #default: not counting filter
6720             adaptive => 0,
6721             %arg, #arguments
6722             key_count => 0,
6723             overflow => {},
6724             version => $Acme::Tools::VERSION,
6725             };
6726 20 100 100     269 croak "Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1" if $$bf{error_rate}<=0 or $$bf{error_rate}>=1;
6727 18 50       32 @$bf{'min_hashfuncs','max_hashfuncs'}=(map$arg{hashfuncs},1..2) if $arg{hashfuncs};
6728 18         35 @$bf{'filterlength','hashfuncs'}=bfdimensions($bf); #m and k
6729 18         776 $$bf{filter}=pack("b*", '0' x ($$bf{filterlength}*$$bf{counting_bits}) ); #hm x new empty filter
6730             $$bf{unpack}= $$bf{filterlength}<=2**16/4 ? "n*" # /4 alleviates skewing if m just slightly < 2**x
6731 18 50       39 :$$bf{filterlength}<=2**32/4 ? "N*"
    100          
6732             : "Q*";
6733 18 100       36 bfadd($bf,@{$arg{keys}}) if $arg{keys};
  8         42  
6734 16         76 return $bf;
6735             }
6736             sub bfaddbf {
6737 2     2 1 3 my($bf,$bf2)=@_;
6738             my $differror=join"\n",
6739             map "Property $_ differs ($$bf{$_} vs $$bf2{$_})",
6740 2         37 grep $$bf{$_} ne $$bf2{$_},
6741             qw/capacity counting_bits adaptive hashfuncs filterlength/; #not error_rate
6742 2 50       6 croak $differror if $differror;
6743 2 50       7 croak "Can not add adaptive bloom filters" if $$bf{adaptive};
6744 2         3 my $count=$$bf{key_count}+$$bf2{key_count};
6745             croak "Exceeded filter capacity $$bf{key_count} + $$bf2{key_count} = $count > $$bf{capacity}"
6746 2 50       4 if $count > $$bf{capacity};
6747 2         4 $$bf{key_count}+=$$bf2{key_count};
6748 2 100       6 if($$bf{counting_bits}==1){
6749 1         4 $$bf{filter} |= $$bf2{filter};
6750             #$$bf{filter} = $$bf{filter} | $$bf2{filter}; #or-ing
6751             }
6752             else {
6753 1         2 my $cb=$$bf{counting_bits};
6754 1         4 for(0..$$bf{filterlength}-1){
6755             my $sum=
6756             vec($$bf{filter}, $_,$cb)+
6757 4793         3595 vec($$bf2{filter},$_,$cb);
6758 4793 50       5262 if( $sum>2**$cb-1 ){
6759 0         0 $sum=2**$cb-1;
6760 0         0 $$bf{overflow}{$_}++;
6761             }
6762 4793         4417 vec($$bf{filter}, $_,$cb)=$sum;
6763 33     33   188 no warnings;
  33         41  
  33         73187  
6764             $$bf{overflow}{$_}+=$$bf2{overflow}{$_}
6765 0         0 and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
6766             and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb)
6767 4793 50 0     6085 if exists $$bf2{overflow}{$_};
      0        
6768             }
6769             }
6770 2         7 return $bf; #for convenience
6771             }
6772             sub bfsum {
6773 8     8 1 48 my($bf)=@_;
6774 8 100       59 return unpack( "%32b*", $$bf{filter}) if $$bf{counting_bits}==1;
6775 3         10 my($sum,$cb)=(0,$$bf{counting_bits});
6776 3         2586 $sum+=vec($$bf{filter},$_,$cb) for 0..$$bf{filterlength}-1;
6777 3         24 return $sum;
6778             }
6779             sub bfadd {
6780 11     11 1 1405 require Digest::MD5;
6781 11         521 my($bf,@keys)=@_;
6782 11 50       28 return if !@keys;
6783 11 100 66     39 my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6784 11         39 my($m,$k,$up,$n,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','capacity','counting_bits','adaptive'};
6785 11         18 for(@$keysref){
6786             #croak "Key should be scalar" if ref($_);
6787 15084 50 66     21608 $$bf{key_count} >= $n and croak "Exceeded filter capacity $n" or $$bf{key_count}++;
6788 15083         8044 my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
  15083         75694  
6789 15083 100 66     32569 if ($cb==1 and !$adaptive) { # normal bloom filter
    50          
    0          
6790 11110         50389 vec($$bf{filter}, $h[$_] % $m, 1) = 1 for 0..$k-1;
6791             }
6792             elsif ($cb>1) { # counting bloom filter
6793 3973         3442 for(0..$k-1){
6794 11919         7956 my $pos=$h[$_] % $m;
6795             my $c=
6796             vec($$bf{filter}, $pos, $cb) =
6797 11919         13857 vec($$bf{filter}, $pos, $cb) + 1;
6798 11919 100       16590 if($c==0){
6799 26         25 vec($$bf{filter}, $pos, $cb) = -1;
6800             $$bf{overflow}{$pos}++
6801 26 100 100     74 and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
  2         283  
6802             and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb);
6803             }
6804             }
6805             }
6806             elsif ($adaptive) { # adaptive bloom filter
6807 0         0 my($i,$key,$bit)=(0+@h,$_);
6808 0         0 for(0..$$bf{filterlength}-1){
6809 0 0       0 $i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
6810 0         0 my $pos=shift(@h) % $m;
6811 0         0 $bit=vec($$bf{filter}, $pos, 1);
6812 0         0 vec($$bf{filter}, $pos, 1)=1;
6813 0 0 0     0 last if $_>=$k-1 and $bit==0;
6814             }
6815             }
6816 0         0 else {croak}
6817             }
6818 9         532 return 1;
6819             }
6820             sub bfcheck {
6821 5     5 1 77 require Digest::MD5;
6822 5         705 my($bf,@keys)=@_;
6823 5 50       18 return if !@keys;
6824 5 50 33     28 my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6825 5         22 my($m,$k,$up,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','counting_bits','adaptive'};
6826 5         9 my $wa=wantarray();
6827 5 50       14 if(!$adaptive){ # normal bloom filter or counting bloom filter
6828             return map {
6829 5         45 my $match = 1; # match if every bit is on
  24001         13210  
6830 24001         12786 my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
  24001         125010  
6831 24001   100     118525 vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
      100        
6832 24001 50       24049 return $match if !$wa;
6833 24001         25810 $match;
6834             } @$keysref;
6835             }
6836             else { # adaptive bloom filter
6837             return map {
6838 0         0 my($match,$i,$key,$bit,@h)=(1,0,$_);
  0         0  
6839 0         0 for(0..$$bf{filterlength}-1){
6840 0 0       0 $i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
6841 0         0 my $pos=shift(@h) % $m;
6842 0         0 $bit=vec($$bf{filter}, $pos, 1);
6843 0 0 0     0 $match++ if $_ > $k-1 and $bit==1;
6844 0 0 0     0 $match=0 if $_ <= $k-1 and $bit==0;
6845 0 0       0 last if $bit==0;
6846             }
6847 0 0       0 return $match if !$wa;
6848 0         0 $match;
6849             } @$keysref;
6850             }
6851             }
6852             sub bfgrep { # just a copy of bfcheck with map replaced by grep
6853 8     8 1 612 require Digest::MD5;
6854 8         12 my($bf,@keys)=@_;
6855 8 50       23 return if !@keys;
6856 8 50 33     51 my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6857 8         27 my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6858             return grep {
6859 8         24 my $match = 1; # match if every bit is on
  6100         3386  
6860 6100         3361 my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
  6100         22568  
6861 6100   100     22486 vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
      100        
6862 6100         5723 $match;
6863             } @$keysref;
6864             }
6865             sub bfgrepnot { # just a copy of bfgrep with $match replaced by not $match
6866 1     1 1 9 require Digest::MD5;
6867 1         3 my($bf,@keys)=@_;
6868 1 50       3 return if !@keys;
6869 1 50 33     13 my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6870 1         4 my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6871             return grep {
6872 1         4 my $match = 1; # match if every bit is on
  1000         573  
6873 1000         516 my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
  1000         3646  
6874 1000   50     3758 vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
      50        
6875 1000         1142 !$match;
6876             } @$keysref;
6877             }
6878             sub bfdelete {
6879 3     3 1 86 require Digest::MD5;
6880 3         14 my($bf,@keys)=@_;
6881 3 50       10 return if !@keys;
6882 3 100 100     18 my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6883 3         10 my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6884 3 50       6 croak "Cannot delete from non-counting bloom filter (use counting_bits 4 e.g.)" if $cb==1;
6885 3         5 for my $key (@$keysref){
6886 501         260 my @h; push @h, unpack $up, Digest::MD5::md5($key,0+@h) while @h<$k;
  501         2005  
6887 501 50 33     718 $$bf{key_count}==0 and croak "Deleted all and then some" or $$bf{key_count}--;
6888 501         387 my($ones,$croak,@pos)=(0);
6889 501         419 for(0..$k-1){
6890 1503         1068 my $pos=$h[$_] % $m;
6891             my $c=
6892 1503         1000 vec($$bf{filter}, $pos, $cb);
6893 1503         1494 vec($$bf{filter}, $pos, $cb)=$c-1;
6894 1503 100       1736 $croak="Cannot delete a non-existing key $key" if $c==0;
6895             $croak="Cannot delete with previously overflown position. Try doubleing counting_bits"
6896 1503 50 66     3619 if $c==1 and ++$ones and $$bf{overflow}{$pos};
      66        
6897             }
6898 501 100       677 if($croak){ #rollback
6899             vec($$bf{filter}, $h[$_] % $m, $cb)=
6900 1         8 vec($$bf{filter}, $h[$_] % $m, $cb)+1 for 0..$k-1;
6901 1         170 croak $croak;
6902             }
6903             }
6904 2         15 return $bf;
6905             }
6906             sub bfstore {
6907 1     1 1 14473 require Storable;
6908 1         7 Storable::store(@_);
6909             }
6910             sub bfretrieve {
6911 2     2 1 313 require Storable;
6912 2         9 my $bf=Storable::retrieve(@_);
6913 2 50       466 carp "Retrieved bloom filter was stored in version $$bf{version}, this is version $VERSION" if $$bf{version}>$VERSION;
6914 2         3 return $bf;
6915             }
6916             sub bfclone {
6917 1     1 1 822 require Storable;
6918 1         2634 return Storable::dclone(@_); #could be faster
6919             }
6920             sub bfdimensions_old {
6921             my($n,$p,$mink,$maxk, $k,$flen,$m)=
6922 0 0   0 0 0 @_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'},1)
  0 0       0  
6923             :@_==2 ? (@_,1,100,1)
6924             : croak "Wrong number of arguments (".@_."), should be 2";
6925 0 0 0     0 croak "p ($p) should be > 0 and < 1" if not ( 0<$p && $p<1 );
6926 0   0     0 $m=-1*$_*$n/log(1-$p**(1/$_)) and (!defined $flen or $m<$flen) and ($flen,$k)=($m,$_) for $mink..$maxk;
      0        
      0        
6927 0         0 $flen = int(1+$flen);
6928 0         0 return ($flen,$k);
6929             }
6930             sub bfdimensions {
6931             my($n,$p,$mink,$maxk)=
6932 18 0   18 1 55 @_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'})
  18 50       36  
6933             :@_==2 ? (@_,1,100)
6934             : croak "Wrong number of arguments (".@_."), should be 2";
6935 18         48 my $k=log(1/$p)/log(2); # k hash funcs
6936 18         33 my $m=-$n*log($p)/log(2)**2; # m bits in filter
6937 18         60 return ($m+0.5,min($maxk,max($mink,int($k+0.5))));
6938             }
6939              
6940             #crontab -e
6941             #01 4,10,16,22 * * * /usr/bin/perl -MAcme::Tools -e'Acme::Tools::_update_currency_file("/var/www/html/currency-rates")' > /dev/null 2>&1
6942              
6943             sub _update_currency_file { #call from cron
6944 0   0 0   0 my $fn=shift()||'/var/www/html/currency-rates';
6945 0   0     0 my %exe=map+($_=>"/usr/bin/$_"),qw/curl ci/;-x$_ or die for values %exe;
  0         0  
6946 0 0       0 open my $F, '>', $fn or die"ERROR: Could not write file $fn ($!)\n";
6947 0         0 print $F "#-- Currency rates ".localtime()." (".time().")\n";
6948 0         0 print $F "# File generated by Acme::Tools version $VERSION\n";
6949 0         0 print $F "# Updated every 6th hour on http://calthis.com/currency-rates\n";
6950 0         0 print $F "NOK 1.000000000\n";
6951 0         0 my $amount=1000;
6952 0         0 my $data=qx($exe{curl} -s "http://www.x-rates.com/table/?from=NOK&amount=$amount");
6953 0         0 $data=~s,to=([A-Z]{3})(.)>,$2>$1,g;
6954 0         0 my @data=ht2t($data,"Alphabetical order"); shift @data;
  0         0  
6955 0 0       0 @data=map "$$_[1] ".($$_[4]>1e-2?$$_[4]:$$_[2]?sprintf("%.8f",$amount/$$_[2]):0)."\n",@data;
    0          
6956 0         0 my %data=map split,@data;
6957 0         0 my@tc=qx($exe{curl} -s https://btc-e.com/api/3/ticker/btc_usd-ltc_usd)=~/avg.?:(\d+\.?\d*)/g;
6958 0         0 push @data,"BTC ".($tc[0]*$data{USD})."\n";
6959 0         0 push @data,"LTC ".($tc[1]*$data{USD})."\n";
6960 0         0 print $F sort(@data);
6961 0         0 close($F);
6962 0 0       0 qx($exe{ci} -l -m. -d $fn) if -w"$fn,v";
6963             }
6964              
6965             sub ftype {
6966 0     0 0 0 my $f=shift;
6967 0 0 0     0 -e $f and
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
6968             -f$f ? 'file' # -f File is a plain file.
6969             :-d$f ? 'dir' # -d File is a directory.
6970             :-l$f ? 'symlink' # -l File is a symbolic link.
6971             :-p$f ? 'pipe' # -p File is a named pipe (FIFO), or Filehandle is a pipe.
6972             :-S$f ? 'socket' # -S File is a socket.
6973             :-b$f ? 'blockfile' # -b File is a block special file.
6974             :-c$f ? 'charfile' # -c File is a character special file.
6975             :-t$f ? 'ttyfile' # -t Filehandle is opened to a tty.
6976             : ''
6977             or undef;
6978             }
6979              
6980             sub ext2mime {
6981 0     0 0 0 my $ext=shift(); #or filename
6982             #http://www.sitepoint.com/web-foundations/mime-types-complete-list/
6983 0         0 croak "todo: ext2mime not yet implemented";
6984             #return "application/json";#feks
6985             }
6986              
6987             =head1 COMMANDS
6988              
6989             =head2 install_acme_command_tools
6990              
6991             sudo perl -MAcme::Tools -e install_acme_command_tools
6992              
6993             Wrote executable /usr/local/bin/conv
6994             Wrote executable /usr/local/bin/due
6995             Wrote executable /usr/local/bin/xcat
6996             Wrote executable /usr/local/bin/freq
6997             Wrote executable /usr/local/bin/deldup
6998             Wrote executable /usr/local/bin/ccmd
6999             Wrote executable /usr/local/bin/z2z
7000             Wrote executable /usr/local/bin/2gz
7001             Wrote executable /usr/local/bin/2gzip
7002             Wrote executable /usr/local/bin/2bz2
7003             Wrote executable /usr/local/bin/2bzip2
7004             Wrote executable /usr/local/bin/2xz
7005              
7006             Examples of commands then made available:
7007              
7008             conv 1 USD EUR #might show 0.88029 if thats the current currency rate. Uses conv()
7009             conv .5 in cm #reveals that 1/2 inch is 1.27 cm, see doc on conv() for all supported units
7010             due [-h] /path/1/ /path/2/ #like du, but show statistics on file extentions instead of subdirs
7011             xcat file #like cat, zcat, bzcat or xzcat in one. Uses file extention to decide. Uses openstr()
7012             freq file #reads file(s) or stdin and view counts of each byte 0-255
7013             ccmd grep string /huge/file #caches stdout+stderr for 15 minutes (default) for much faster results later
7014             ccmd "sleep 2;echo hello" #slow first time. Note the quotes!
7015             ccmd "du -s ~/*|sort -n|tail" #ccmd store stdout+stderr in /tmp files (default)
7016             z2z [-pvk1-9o -t type] files #convert from/to .gz/bz2/xz files, -p progress, -v verbose (output result),
7017             #-k keep org file, -o overwrite, 1-9 compression degree
7018             #2xz and 2bz2 depends on xz and bzip2 being installed on system
7019             2xz #same as z2z with -t xz
7020             2bz2 #same as z2z with -t bz2
7021             2gz #same as z2z with -t gz
7022              
7023             TODO :
7024             finddup [-v -d -s -h] path1/ path2/
7025             #reports (+deletes with -d) duplicate files
7026             #finddup is NOT IMPLEMENTED YET! Use -s for symlink dups, -h for hardlink
7027             rttop
7028             trunc file(s)
7029             wipe file(s)
7030              
7031             =head3 z2z
7032              
7033             =head3 2xz
7034              
7035             =head3 2bz2
7036              
7037             =head3 2gz
7038              
7039             The commands C<2xz>, C<2bz2> and C<2gz> are just synonyms for C with an implicitly added option C<-t xz>, C<-t xz> or C<-t gz> accordingly.
7040              
7041             z2z [-p -k -v -o -1 -2 -3 -4 -5 -6 -7 -8 -9 ] files
7042              
7043             Converts (recompresses) files from one compression sc
7044              
7045              
7046              
7047             =head3 due
7048              
7049             Like C command but views space used by file extentions instead of dirs. Options:
7050              
7051             due [-options] [dirs] [files]
7052             due -h View bytes "human readable", i.e. C<8.72 MB> instead of C<9145662 b> (bytes)
7053             due -k | -m View bytes in kilobytes | megabytes (1024 | 1048576)
7054             due -K Like -k but uses 1000 instead of 1024
7055             due -z View two extentions if .z .Z .gz .bz2 .rz or .xz (.tar.gz, not just .gz)
7056             due -M Also show min, medium and max date (mtime) of files, give an idea of their age
7057             due -P Also show 10, 50 (medium) and 90 percentile of file date
7058             due -MP Both -M and -P, shows min, 10p, 50p, 90p and max
7059             due -a Sort output alphabetically by extention (default order is by size)
7060             due -c Sort output by number of files
7061             due -i Ignore case, .GZ and .gz is the same, output in lower case
7062             due -t Adds time of day to -M and -P output
7063             due -e 'regex' Exclude files (full path) matching regex. Ex: due -e '\.git'
7064             TODO: due -l TODO: Exclude hardlinks (dont count "same" file more than once, "man du")
7065             ls -l | due Parses output of ls -l, find -ls, tar tvf for size+filename and reports
7066             find | due List of filenames from stdin produces same as just command 'due'
7067             ls | due Reports on just files in current dir without recursing into subdirs
7068              
7069             =cut
7070              
7071             sub install_acme_command_tools {
7072 0     0 1 0 my $dir=(grep -d$_, @_, '/usr/local/bin', '/usr/bin')[0];
7073 0         0 for( qw( conv due xcat freq finddup ccmd trunc wipe rttop z2z 2gz 2gzip 2bz2 2bzip2 2xz ) ){
7074 0         0 unlink("$dir/$_");
7075 0         0 writefile("$dir/$_", "#!$^X\nuse Acme::Tools;\nAcme::Tools::cmd_$_(\@ARGV);\n");
7076 0         0 sys("/bin/chmod +x $dir/$_"); #hm umask
7077 0         0 print "Wrote executable $dir/$_\n";
7078             }
7079             }
7080 0     0 0 0 sub cmd_conv { print conv(@ARGV)."\n" }
7081              
7082             sub cmd_due { #TODO: output from tar tvf and ls and find -ls
7083 0     0 0 0 my %o=_go("zkKmhciMPate:l");
7084 0         0 require File::Find;
7085 33     33   161 no warnings 'uninitialized';
  33         40  
  33         102944  
7086 0 0       0 die"$0: -l not implemented yet\n" if $o{l}; #man du: default is not to count hardlinks more than once, with -l it does
7087 0 0       0 die"$0: -h, -k or -m can not be used together\n" if $o{h}+$o{k}+$o{m}>1;
7088 0 0       0 die"$0: -c and -a can not be used together\n" if $o{a}+$o{c}>1;
7089 0 0       0 die"$0: -k and -m can not be used together\n" if $o{k}+$o{m}>1;
7090 0 0       0 my @q=@ARGV; @q=('.') if !@q;
  0         0  
7091 0         0 my(%c,%b,$cnt,$bts,%mtime);
7092 0 0       0 my $zext=$o{z}?'(\.(z|Z|gz|bz2|xz|rz|kr))?':'';
7093 0         0 my $r=qr/(\.[^\.\/]{1,10}$zext)$/;
7094 0 0       0 my $qrexcl=exists$o{e}?qr/$o{e}/:0;
7095             #TODO: ought to work: tar cf - .|tar tvf -|due
7096             #my $qrstdin=qr/(^| )\-[rwx\-sS]{9} +\d+ \w+ +\w+ +(\d+) [a-zA-Z]+\.? +\d+ +(?:\d\d:\d\d|\d{4}) (.*)$/;
7097 0         0 my $qrstdin=qr/(^| )\-[rwx\-sS]{9} +(\d+ )?\w+[ \/]+\w+ +(\d+) [a-zA-Z]+\.? +\d+ +(?:\d\d:\d\d|\d{4}) (.*)$/;
7098 0 0       0 if(-p STDIN){
7099 0         0 while(<>){
7100 0         0 chomp;
7101 0 0       0 my($sz,$f)=/$qrstdin/?($2,$3):-f$_?(-s$_,$_):next;
    0          
7102 0 0       0 my $ext=$f=~$r?$1:'';
7103 0 0       0 $ext=lc($ext) if $o{i};
7104 0         0 $cnt++; $c{$ext}++;
  0         0  
7105 0         0 $bts+=$sz; $b{$ext}+=$sz;
  0         0  
7106             #$mtime{$ext}.=",$mtime" if
7107 0 0 0     0 $o{M} || $o{P} and die"due: -M and -P not yet implemented for STDIN";
7108             }
7109             }
7110             else { #hm DRY
7111             File::Find::find({wanted =>
7112             sub {
7113 0 0   0   0 return if !-f$_;
7114 0 0 0     0 return if $qrexcl and defined $File::Find::name and $File::Find::name=~$qrexcl;
      0        
7115 0         0 my($sz,$mtime)=(stat($_))[7,9];
7116 0 0       0 my $ext=m/$r/?$1:'';
7117 0 0       0 $ext=lc($ext) if $o{i};
7118 0         0 $cnt++; $c{$ext}++;
  0         0  
7119 0         0 $bts+=$sz; $b{$ext}+=$sz;
  0         0  
7120 0 0 0     0 $mtime{$ext}.=",$mtime" if $o{M} || $o{P};
7121 0         0 1;
7122 0         0 } },@q);
7123             }
7124 0     0   0 my($f,$s)=$o{k}?("%14.2f kb",sub{$_[0]/1024})
7125 0     0   0 :$o{K}?("%14.2f Kb",sub{$_[0]/1000})
7126 0     0   0 :$o{m}?("%14.2f mb",sub{$_[0]/1024**2})
7127 0     0   0 :$o{h}?("%14s", sub{bytes_readable($_[0])})
7128 0 0   0   0 : ("%14d b", sub{$_[0]});
  0 0       0  
    0          
    0          
7129             my @e=$o{a}?(sort(keys%c))
7130 0 0       0 :$o{c}?(sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c)
7131 0 0       0 : (sort{$b{$a}<=>$b{$b} or $a cmp $b}keys%c);
  0 0       0  
    0          
7132 0     0   0 my $perc=!$o{M}&&!$o{P}?sub{""}:
7133             sub{
7134 0 0   0   0 my @p=$o{P}?(10,50,90):(50);
7135 0         0 my @m=@_>0 ? do {grep$_, split",", $mtime{$_[0]}}
7136 0 0       0 : do {grep$_, map {split","} values %mtime};
  0         0  
  0         0  
7137 0         0 my @r=percentile(\@p,@m);
7138 0 0       0 @r=(min(@m),@r,max(@m)) if $o{M};
7139 0         0 @r=map int($_), @r;
7140 0 0       0 my $fmt=$o{t}?'YYYY/MM/DD-MM:MI:SS':'YYYY/MM/DD';
7141 0         0 @r=map tms($_,$fmt), @r;
7142 0         0 " ".join(" ",@r);
7143 0 0 0     0 };
7144 0         0 printf("%-11s %8d $f %7.2f%%%s\n",$_,$c{$_},&$s($b{$_}),100*$b{$_}/$bts,&$perc($_)) for @e;
7145 0         0 printf("%-11s %8d $f %7.2f%%%s\n","Sum",$cnt,&$s($bts),100,&$perc());
7146             }
7147             sub cmd_xcat {
7148 0     0 0 0 for my $fn (@_){
7149 0         0 my $os=openstr($fn);
7150 0 0 0     0 open my $FH, $os or warn "xcat: cannot open $os ($!)\n" and next;
7151             #binmode($FH);#hm?
7152 0         0 print while <$FH>;
7153 0         0 close($FH);
7154             }
7155             }
7156             sub cmd_freq {
7157 0     0 0 0 my(@f,$i);
7158 0         0 map $f[$_]++, unpack("C*",$_) while <>;
7159 0         0 my $s=" " x 12;map{print"$_$s$_$s$_\n"}("BYTE CHAR COUNT","---- ----- -------");
  0         0  
  0         0  
7160 0         0 my %m=(145,"DOS-æ",155,"DOS-ø",134,"DOS-å",146,"DOS-Æ",157,"DOS-Ø",143,"DOS-Å",map{($_," ")}0..31);
  0         0  
7161 0 0 0     0 printf("%4d %5s%8d".(++$i%3?$s:"\n"),$_,$m{$_}||chr,$f[$_]) for grep$f[$_],0..255;print "\n";
  0         0  
7162 0         0 my @no=grep!$f[$_],0..255; print "No bytes for these ".@no.": ".join(" ",@no)."\n";
  0         0  
7163             }
7164             sub cmd_deldup {
7165 0     0 0 0 cmd_finddup(@_);
7166             }
7167             sub cmd_finddup {
7168             # ~/test/deldup.pl #find and optionally delete duplicate files effiencently
7169             #http://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash
7170 0     0 0 0 die "todo: finddup not ready yet"
7171             }
7172             #http://stackoverflow.com/questions/11900239/can-i-cache-the-output-of-a-command-on-linux-from-cli
7173             our $Ccmd_cache_dir='/tmp/acme-tools-ccmd-cache';
7174             our $Ccmd_cache_expire=15*60; #default 15 minutes
7175             sub cmd_ccmd {
7176 0     0 0 0 require Digest::MD5;
7177 0         0 my $cmd=join" ",@_;
7178 0         0 my $d="$Ccmd_cache_dir/".username();
7179 0         0 makedir($d);
7180 0         0 my $md5=Digest::MD5::md5_hex($cmd);
7181 0         0 my($fno,$fne)=map"$d/cmd.$md5.std$_","out","err";
7182 0     0   0 my $too_old=sub{time()-(stat(shift))[9] >= $Ccmd_cache_expire};
  0         0  
7183 0         0 unlink grep &$too_old($_), <$d/*.std???>;
7184 0 0 0     0 sys("($cmd) > $fno 2> $fne") if !-e$fno or &$too_old($fno);
7185 0         0 print STDOUT "".readfile($fno);
7186 0         0 print STDERR "".readfile($fne);
7187             }
7188              
7189 0     0 0 0 sub cmd_trunc { die "todo: trunc not ready yet"} #truncate a file, size 0, keep all other attr
7190              
7191             sub cmd_wipe {
7192 0     0 0 0 my %o=_go("n:k");
7193 0         0 wipe($_,$o{n},$o{k}) for @_;
7194             }
7195              
7196 0     0 0 0 sub cmd_2gz {cmd_z2z("-t","gz", @_)}
7197 0     0 0 0 sub cmd_2gzip {cmd_z2z("-t","gz", @_)}
7198 0     0 0 0 sub cmd_2bz2 {cmd_z2z("-t","bz2",@_)}
7199 0     0 0 0 sub cmd_2bzip2 {cmd_z2z("-t","bz2",@_)}
7200 0     0 0 0 sub cmd_2xz {cmd_z2z("-t","xz", @_)}
7201             #todo?: sub cmd_7z
7202             sub cmd_z2z {
7203 8     8 0 54949 local @ARGV=@_;
7204 8         70 my %o=_go("pt:kvhon123456789");
7205 8         119 my $t=repl(lc$o{t},qw/gzip gz bzip2 bz2/);
7206 8 50       107 die "due: unknown compression type $o{t}, known are gz, bz2 and xz" if $t!~/^(gz|bz2|xz)$/;
7207 8         410 my $sum=sum(map -s$_,@ARGV);
7208 8 100 66     105 print "Converting ".@ARGV." files, total ".bytes_readable($sum)."\n" if $o{v} and @ARGV>1;
7209 8         35 my $cat='cat';
7210 8 0       37 if($o{p}){ if(qx(which pv)){ $cat='pv' } else { warn repl(<<"",qr/^\s+/) } }
  0 50       0  
  0         0  
  0         0  
7211             due: pv for -p not found, install with sudo yum install pv, sudo apt-get install pv or similar
7212              
7213 8         19 my $sumnew=0;
7214 8         124 my $start=time_fp();
7215 8         17 my($i,$bsf)=(0,0);#bytes so far
7216 8         43 $Eta{'z2z'}=[];eta('z2z',0,$sum);
  8         63  
7217 8         28 for(@ARGV){
7218 11 50       43 my $new=$_; $new=~s/(\.(gz|bz2|xz))?$/.$t/i or die;
  11         293  
7219 11 100       69 my $ext=defined($2)?lc($2):'';
7220 11 50 33     370 my $same=/^$new$/; $new.=".tmp" if $same; die if $o{k} and $same;
  11 50       37  
  11         45  
7221 11 50 33     297 next if !-e$_ and warn"$_ do not exists\n";
7222 11 50 33     161 next if !-r$_ and warn"$_ is not readable\n";
7223 11 0 33     301 next if -e$new and !$o{o} and warn"$new already exists, skipping (use -o to overwrite)\n";
      33        
7224 11   100     192 my $unz={qw/gz gunzip bz2 bunzip2 xz unxz/}->{$ext}||'';
7225             #todo: my $cntfile="/tmp/acme-tools-z2z-wc-c.$$";
7226             #todo: my $cnt="tee >(wc -c>$cntfile)" if $ENV{SHELL}=~/bash/ and $o{v}; #hm dash vs bash
7227 11         80 my $z= {qw/gz gzip bz2 bzip2 xz xz/}->{$t};
7228 11         133 $z.=" -$_" for grep$o{$_},1..9;
7229 11         71 my $cmd="$cat $_|$unz|$z>$new";
7230             #todo: "$cat $_|$unz|$cnt|$z>$new";
7231             #cat /tmp/kontroll-linux.xz|unxz|tee >(wc -c>/tmp/p)|gzip|wc -c;cat /tmp/p
7232 11         103 $cmd=~s,\|+,|,g; #print "cmd: $cmd\n";
7233 11         81 sys($cmd);
7234 11 50 50     245 chall($_,$new)||die if !$o{n};
7235 11         96 my($szold,$sznew)=map{-s$_}($_,$new);
  22         423  
7236 11         153 $bsf+=-s$_;
7237 11 50       1435 unlink $_ if !$o{k};
7238 11 50 0     50 rename($new, replace($new,qr/.tmp$/)) or die if $same;
7239 11 50       45 if($o{v}){
7240 11         26 $sumnew+=$sznew;
7241 11         414 my $pr=sprintf"%0.1f%%",100*$sznew/$szold;
7242             #todo: my $szuncmp=-s$cntfile&&time()-(stat($cntfile))[9]<10 ? qx(cat $cntfile) : '';
7243             #todo: $o{h} ? printf("%6.1f%% %9s => %9s => %9s %s\n", $pr,(map bytes_readable($_),$szold,$szuncmp,$sznew),$_)
7244             #todo: : printf("%6.1f%% %11d b => %11d b => %11 b %s\n",$pr,$szold,$szuncmp,$sznew,$_)
7245             my $str= $o{h}
7246 11 100       163 ? sprintf("%-7s %9s => %9s", $pr,(map bytes_readable($_),$szold,$sznew))
7247             : sprintf("%-7s %11d b => %11d b", $pr,$szold,$sznew);
7248 11 100       77 if(@ARGV>1){
7249 4         19 $i++;
7250 4 0       25 $str=$i<@ARGV
    50          
7251             ? " ETA:".sec_readable(eta('z2z',$bsf,$sum)-time_fp())." $str"
7252             : " TA: 0s $str"
7253             if $sum>1e6;
7254 4         37 $str="$i/".@ARGV." $str";
7255             }
7256 11         7511 print "$str $new\n";
7257             }
7258             }
7259 8 100 66     363 if($o{v} and @ARGV>1){
7260 1 50       28 my $bytes=$o{h}?'':'bytes ';
7261             my $str=
7262             sprintf "%d files compressed in %.3f seconds from %s to %s $bytes (%s bytes) %.1f%% of original\n",
7263             0+@ARGV,
7264             time_fp()-$start,
7265 1 50       21 (map{$o{h}?bytes_readable($_):$_}($sum,$sumnew,$sumnew-$sum)),
  3         17  
7266             100*$sumnew/$sum;
7267 1         27 $str=~s,\((\d),(+$1,;
7268 1         31 print $str;
7269             }
7270             }
7271              
7272 8     8   3205 sub _go { require Getopt::Std; my %o; Getopt::Std::getopts(shift() => \%o); %o }
  8         103  
  8         101  
  8         1142  
7273              
7274 0     0 0 0 sub cmd_rttop { die "rttop: not implemented here yet.\n" }
7275 0     0 0 0 sub cmd_whichpm { die "whichpm: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
7276 0     0 0 0 sub cmd_catal { die "catal: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
7277             #todo: cmd_tabdiff (fra sonyk)
7278             #todo: cmd_catlog (ala catal med /etc/catlog.conf, default er access_log)
7279              
7280             =head1 DATABASE STUFF - NOT IMPLEMENTED YET
7281              
7282             Uses L. Comming soon...
7283              
7284             $Dbh
7285             dlogin
7286             dlogout
7287             drow
7288             drows
7289             drowc
7290             drowsc
7291             dcols
7292             dpk
7293             dsel
7294             ddo
7295             dins
7296             dupd
7297             ddel
7298             dcommit
7299             drollback
7300              
7301             =cut
7302              
7303             #my$dummy=<<'SOON';
7304             sub dtype {
7305 0     0 0 0 my $connstr=shift;
7306 0 0       0 return 'SQLite' if $connstr=~/(\.sqlite|sqlite:.*\.db)$/i;
7307 0 0       0 return 'Oracle' if $connstr=~/\@/;
7308 0         0 return 'Pg' if 1==2;
7309 0         0 die;
7310             }
7311              
7312             our($Dbh,@Dbh,%Sth);
7313             our %Dbattr=(RaiseError => 1, AutoCommit => 0); #defaults
7314             sub dlogin {
7315 0     0 0 0 my $connstr=shift();
7316 0         0 my %attr=(%Dbattr,@_);
7317 0         0 my $type=dtype($connstr);
7318 0         0 my($dsn,$u,$p)=('','','');
7319 0 0       0 if($type eq 'SQLite'){
    0          
    0          
7320 0         0 $dsn=$connstr;
7321             }
7322             elsif($type eq 'Oracle'){
7323 0         0 ($u,$p,$dsn)=($connstr=~m,(.+?)(/.+?)?\@(.+),);
7324             }
7325             elsif($type eq 'Pg'){
7326 0         0 croak "todo";
7327             }
7328             else{
7329 0         0 croak "dblogin: unknown database type for connection string $connstr\n";
7330             }
7331 0         0 $dsn="dbi:$type:$dsn";
7332 0 0       0 push @Dbh, $Dbh if $Dbh; #local is better?
7333 0         0 require DBI;
7334 0         0 $Dbh=DBI->connect($dsn,$u,$p,\%attr); #connect_cached?
7335             }
7336             sub dlogout {
7337 0     0 0 0 $Dbh->disconnect;
7338 0 0       0 $Dbh=pop@Dbh if @Dbh;
7339             }
7340             sub drow {
7341 0     0 0 0 my($q,@b)=_dattrarg(@_);
7342             #my $sth=do{$Sth{$Dbh,$q} ||= $Dbh->prepare_cached($q)};
7343 0         0 my $sth=$Dbh->prepare_cached($q);
7344 0         0 $sth->execute(@b);
7345 0         0 my @r=$sth->fetchrow_array;
7346 0 0       0 $sth->finish if $$Dbh{Driver}{Name} eq 'SQLite';
7347             #$dbh->selectrow_array($statement);
7348 0 0       0 return @r==1?$r[0]:@r;
7349             }
7350       0 0   sub drows {
7351             }
7352       0 0   sub drowc {
7353             }
7354       0 0   sub drowsc {
7355             }
7356       0 0   sub dcols {
7357             }
7358       0 0   sub dpk {
7359             }
7360       0 0   sub dsel {
7361             }
7362             sub ddo {
7363 0     0 0 0 my @arg=_dattrarg(@_);
7364             #warn serialize(\@arg,'arg','',1);
7365 0         0 $Dbh->do(@arg); #hm cache?
7366             }
7367       0 0   sub dins {
7368             }
7369       0 0   sub dupd {
7370             }
7371       0 0   sub ddel {
7372             }
7373 0     0 0 0 sub dcommit { $Dbh->commit }
7374 0     0 0 0 sub drollback { $Dbh->rollback }
7375              
7376             sub _dattrarg {
7377 0     0   0 my @arg=@_;
7378 0 0       0 splice @arg,1,0, ref($arg[-1]) eq 'HASH' ? pop(@arg) : {};
7379 0         0 @arg;
7380             }
7381              
7382             =head2 self_update
7383              
7384             Update Acme::Tools to newest version quick and dirty:
7385              
7386             function pmview(){ ls -ld `perl -M$1 -le'$m=shift;$mi=$m;$mi=~s,::,/,g;print $INC{"$mi.pm"};warn"Version ".${$m."::VERSION"}."\n"' $1`;}
7387              
7388             pmview Acme::Tools #view date and version before
7389             sudo perl -MAcme::Tools -e Acme::Tools::self_update #update to newest version
7390             pmview Acme::Tools #view date and version after
7391              
7392             Does C to where Acme/Tools.pm are and then wget -N https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm
7393              
7394             TODO: cmd_acme_tools_self_update, accept --no-check-certificate to use on curl
7395              
7396             =cut
7397              
7398             our $Wget;
7399             our $Self_update_url='https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm'; #todo: change site
7400             sub self_update {
7401             #in($^O,'linux','cygwin') or die"ERROR: self_update works on linux and cygwin only";
7402 0   0 0 1 0 $Wget||=(grep -x$_,map"$_/wget",'/usr/bin','/bin','/usr/local/bin','.')[0]; #hm --no-check-certificate
7403 0 0       0 -x$Wget or die"ERROR: wget ($Wget) executable not found\n";
7404 0         0 my $d=dirname(__FILE__);
7405 0         0 sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
7406 0   0     0 sys("cd $d; $Wget -N ".($ARGV[0]||$Self_update_url));
7407 0         0 sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
7408             }
7409              
7410             1;
7411              
7412             package Acme::Tools::BloomFilter;
7413 33     33   813 use 5.008; use strict; use warnings; use Carp;
  33     33   103  
  33     33   129  
  33     33   38  
  33         757  
  33         105  
  33         35  
  33         946  
  33         113  
  33         33  
  33         13675  
7414 2     2   180 sub new { my($class,@p)=@_; my $self=Acme::Tools::bfinit(@p); bless $self, $class }
  2         9  
  2         8  
7415 2     2   66 sub add { &Acme::Tools::bfadd }
7416 0     0   0 sub addbf { &Acme::Tools::bfaddbf }
7417 2     2   64 sub check { &Acme::Tools::bfcheck }
7418 1     1   3 sub grep { &Acme::Tools::bfgrep }
7419 0     0   0 sub grepnot { &Acme::Tools::bfgrepnot }
7420 0     0   0 sub delete { &Acme::Tools::bfdelete }
7421 0     0   0 sub store { &Acme::Tools::bfstore }
7422 0     0   0 sub retrieve { &Acme::Tools::bfretrieve }
7423 1     1   4 sub clone { &Acme::Tools::bfclone }
7424 0     0   0 sub sum { &Acme::Tools::bfsum }
7425             1;
7426              
7427             # Ny versjon:
7428             # + c-s todo
7429             # + endre $VERSION
7430             # + endre Release history under HISTORY
7431             # + endre årstall under COPYRIGHT AND LICENSE
7432             # + oppd default valutakurser inkl datoen
7433             # + emacs Changes
7434             # + emacs README + aarstall
7435             # + emacs MANIFEST legg til ev nye t/*.t
7436             # + perl Makefile.PL;make test
7437             # + /usr/bin/perl Makefile.PL;make test
7438             # + perlbrew exec "perl ~/Acme-Tools/Makefile.PL ; time make test"
7439             # + perlbrew use perl-5.10.1; perl Makefile.PL; make test; perlbrew off
7440             # + test evt i cygwin og mingw-perl
7441             # + pod2html Tools.pm > Tools.html ; firefox Tools.html
7442             # + https://metacpan.org/pod/Acme::Tools
7443             # + http://cpants.cpanauthors.org/dist/Acme-Tools #kvalitee
7444             # + perl Makefile.PL ; make test && make dist
7445             # + cp -p *tar.gz /htdocs/
7446             # + ci -l -mversjon -d `cat MANIFEST`
7447             # + git add `cat MANIFEST`
7448             # + git status
7449             # + git commit -am versjon
7450             # + git push #eller:
7451             # + git push origin master
7452             # + http://pause.perl.org/
7453             # + tegnsett/utf8-kroell
7454             # http://en.wikipedia.org/wiki/Birthday_problem#Approximations
7455              
7456             # memoize_expire() http://perldoc.perl.org/Memoize/Expire.html
7457             # memoize_file_expire()
7458             # memoize_limit_size() #lru
7459             # memoize_file_limit_size()
7460             # memoize_memcached http://search.cpan.org/~dtrischuk/Memoize-Memcached-0.03/lib/Memoize/Memcached.pm
7461             # hint on http://perl.jonallen.info/writing/articles/install-perl-modules-without-root
7462              
7463             # sub mycrc32 { #http://billauer.co.il/blog/2011/05/perl-crc32-crc-xs-module/ eller String::CRC32::crc32 som er 100 x raskere enn Digest::CRC::crc32
7464             # my ($input, $init_value, $polynomial) = @_;
7465             # $init_value = 0 unless (defined $init_value);
7466             # $polynomial = 0xedb88320 unless (defined $polynomial);
7467             # my @lookup_table;
7468             # for (my $i=0; $i<256; $i++) {
7469             # my $x = $i;
7470             # for (my $j=0; $j<8; $j++) {
7471             # if ($x & 1) {
7472             # $x = ($x >> 1) ^ $polynomial;
7473             # } else {
7474             # $x = $x >> 1;
7475             # }
7476             # }
7477             # push @lookup_table, $x;
7478             # }
7479             # my $crc = $init_value ^ 0xffffffff;
7480             # foreach my $x (unpack ('C*', $input)) {
7481             # $crc = (($crc >> 8) & 0xffffff) ^ $lookup_table[ ($crc ^ $x) & 0xff ];
7482             # }
7483             # $crc = $crc ^ 0xffffffff;
7484             # return $crc;
7485             # }
7486             #
7487              
7488              
7489              
7490             =head1 HISTORY
7491              
7492             Release history
7493              
7494             0.20 Mar 2017 Subs: a2h cnttbl h2a log10 log2 nicenum rstddev sec_readable
7495             throttle timems refa refaa refah refh refha refhh refs
7496             eachr globr keysr popr pushr shiftr splicer unshiftr valuesr
7497             Commands: 2bz2 2gz 2xz z2z
7498              
7499             0.172 Dec 2015 Subs: curb openstr pwgen sleepms sleepnm srlz tms username
7500             self_update install_acme_command_tools
7501             Commands: conv due freq wipe xcat (see "Commands")
7502              
7503             0.16 Feb 2015 bigr curb cpad isnum parta parth read_conf resolve_equation
7504             roman2int trim. Improved: conv (numbers currency) range ("derivatives")
7505              
7506             0.15 Nov 2014 Improved doc
7507             0.14 Nov 2014 New subs, improved tests and doc
7508             0.13 Oct 2010 Non-linux test issue, resolve. improved: bloom filter, tests, doc
7509             0.12 Oct 2010 Improved tests, doc, bloom filter, random_gauss, bytes_readable
7510             0.11 Dec 2008 Improved doc
7511             0.10 Dec 2008
7512              
7513             =head1 SEE ALSO
7514              
7515             L
7516              
7517             =head1 AUTHOR
7518              
7519             Kjetil Skotheim, Ekjetil.skotheim@gmail.comE
7520              
7521             =head1 COPYRIGHT
7522              
7523             1995-2017, Kjetil Skotheim
7524              
7525             =head1 LICENSE
7526              
7527             This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
7528              
7529             =cut