File Coverage

blib/lib/Acme/Tools.pm
Criterion Covered Total %
statement 1361 1898 71.7
branch 707 1280 55.2
condition 332 658 50.4
subroutine 206 302 68.2
pod 164 223 73.5
total 2770 4361 63.5


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             package Acme::Tools;
3              
4             our $VERSION = '0.21';
5              
6 33     33   487997 use 5.008; #Perl 5.8 was released July 18th 2002
  33         85  
7 33     33   139 use strict;
  33         42  
  33         657  
8 33     33   100 use warnings;
  33         40  
  33         777  
9 33     33   115 use Carp;
  33         35  
  33         8604  
10              
11             require Exporter;
12             our @ISA = qw(Exporter);
13             our %EXPORT_TAGS = ( all => [ qw() ] );
14             our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } );
15             our @EXPORT = qw(
16             min
17             max
18             mins
19             maxs
20             sum
21             avg
22             geomavg
23             harmonicavg
24             stddev
25             rstddev
26             median
27             percentile
28             $Resolve_iterations
29             $Resolve_last_estimate
30             $Resolve_time
31             resolve
32             resolve_equation
33             conv
34             rank
35             rankstr
36             eqarr
37             sorted
38             sortedstr
39             pushsort
40             pushsortstr
41             binsearch
42             binsearchstr
43             random
44             random_gauss
45             big
46             bigi
47             bigf
48             bigr
49             bigscale
50             nvl
51             repl
52             replace
53             decode
54             decode_num
55             between
56             curb
57             bound
58             log10
59             log2
60             distinct
61             in
62             in_num
63             uniq
64             union
65             union_all
66             minus
67             minus_all
68             intersect
69             intersect_all
70             not_intersect
71             mix
72             zip
73             subhash
74             hashtrans
75             zipb64
76             zipbin
77             unzipb64
78             unzipbin
79             gzip
80             gunzip
81             bzip2
82             bunzip2
83             ipaddr
84             ipnum
85             webparams
86             urlenc
87             urldec
88             ht2t
89             chall
90             makedir
91             qrlist
92             ansicolor
93             ccn_ok
94             KID_ok
95             writefile
96             readfile
97             readdirectory
98             basename
99             dirname
100             wipe
101             username
102             range
103             globr
104             permutations
105             trigram
106             sliding
107             chunks
108             chars
109             cart
110             reduce
111             int2roman
112             roman2int
113             num2code
114             code2num
115             gcd
116             lcm
117             pivot
118             tablestring
119             upper
120             lower
121             trim
122             rpad
123             lpad
124             cpad
125             dserialize
126             serialize
127             srlz
128             cnttbl
129             nicenum
130             bytes_readable
131             sec_readable
132             distance
133             tms
134             easter
135             time_fp
136             timems
137             sleep_fp
138             sleeps
139             sleepms
140             sleepus
141             sleepns
142             eta
143             sys
144             recursed
145             md5sum
146             pwgen
147             read_conf
148             openstr
149             ldist
150             $Re_isnum
151             isnum
152             part
153             parth
154             parta
155             a2h
156             h2a
157             refa
158             refh
159             refs
160             refaa
161             refah
162             refha
163             refhh
164             pushr
165             popr
166             shiftr
167             unshiftr
168             splicer
169             keysr
170             valuesr
171             eachr
172             ed
173             $Edcursor
174             brainfu
175             brainfu2perl
176             brainfu2perl_optimized
177             bfinit
178             bfsum
179             bfaddbf
180             bfadd
181             bfcheck
182             bfgrep
183             bfgrepnot
184             bfdelete
185             bfstore
186             bfretrieve
187             bfclone
188             bfdimensions
189             $PI
190             install_acme_command_tools
191              
192             $Dbh
193             dlogin
194             dlogout
195             drow
196             drows
197             drowc
198             drowsc
199             dcols
200             dpk
201             dsel
202             ddo
203             dins
204             dupd
205             ddel
206             dcommit
207             drollback
208             );
209              
210             our $PI = '3.141592653589793238462643383279502884197169399375105820974944592307816406286';
211              
212             =head1 NAME
213              
214             Acme::Tools - Lots of more or less useful subs lumped together and exported into your namespace
215              
216             =head1 SYNOPSIS
217              
218             use Acme::Tools;
219              
220             print sum(1,2,3); # 6
221             print avg(2,3,4,6); # 3.75
222              
223             my @list = minus(\@listA, \@listB); # set operations
224             my @list = union(\@listA, \@listB); # set operations
225              
226             print length(gzip("abc" x 1000)); # far less than 3000
227              
228             writefile("/dir/filename",$string); # convenient
229             my $s=readfile("/dir/filename"); # also conventient
230              
231             print "yes!" if between($pi,3,4);
232              
233             print percentile(0.05, @numbers);
234              
235             my @even = range(1000,2000,2); # even numbers between 1000 and 2000
236             my @odd = range(1001,2001,2);
237              
238             my $dice = random(1,6);
239             my $color = random(['red','green','blue','yellow','orange']);
240              
241             ...and more.
242              
243             =encoding utf8
244              
245             =head1 ABSTRACT
246              
247             About 120 more or less useful perl subroutines lumped together and exported into your namespace.
248              
249             =head1 DESCRIPTION
250              
251             Subs created and collected since the mid-90s.
252              
253             =head1 INSTALLATION
254              
255             sudo cpan Acme::Tools
256             sudo cpanm Acme::Tools # after: sudo apt-get install cpanminus make # for Ubuntu 12.04
257              
258             =head1 EXPORT
259              
260             Almost every sub, about 90 of them.
261              
262             Beware of namespace pollution. But what did you expect from an Acme module?
263              
264             =head1 NUMBERS
265              
266             =head2 num2code
267              
268             See L
269              
270             =head2 code2num
271              
272             C convert numbers (integers) from the normal decimal system to some arbitrary other number system.
273             That can be binary (2), oct (8), hex (16) or others.
274              
275             Example:
276              
277             print num2code(255,2,"0123456789ABCDEF"); # prints FF
278             print num2code( 14,2,"0123456789ABCDEF"); # prints 0E
279              
280             ...because 255 are converted to hex FF (base C<< length("0123456789ABCDEF") >> ) which is 2 digits of 0-9 or A-F.
281             ...and 14 are converted to 0E, with leading 0 because of the second argument 2.
282              
283             Example:
284              
285             print num2code(1234,16,"01")
286              
287             Prints the 16 binary digits 0000010011010010 which is 1234 converted to binary zeros and ones.
288              
289             To convert back:
290              
291             print code2num("0000010011010010","01"); #prints 1234
292              
293             C can be used to compress numeric IDs to something shorter:
294              
295             $chars="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_";
296             print num2code("241274432",5,$chars); # prints EOOv0
297             print code2num("EOOv0",$chars); # prints 241274432
298              
299             =cut
300              
301             #Math::BaseCnv
302              
303             sub num2code {
304 4     4 1 350 my($num,$digits,$validchars,$start)=@_;
305 4         5 my $l=length($validchars);
306 4         5 my $key;
307 33     33   134 no warnings;
  33         43  
  33         162608  
308 4 50       13 croak if $num<$start;
309 4         4 $num-=$start;
310 4         7 for(1..$digits){
311 25         33 $key=substr($validchars,$num%$l,1).$key;
312 25         24 $num=int($num/$l);
313             }
314 4 50       10 croak if $num>0;
315 4         10 return $key;
316             }
317              
318             sub code2num {
319 1 50   1 1 2 my($code,$validchars,$start)=@_; $start=0 if !defined $start;
  1         4  
320 1         1 my $l=length($validchars);
321 1         2 my $num=0;
322 1         10 $num=$num*$l+index($validchars,$_) for split//,$code;
323 1         5 return $num+$start;
324             }
325              
326              
327             =head2 gcd
328              
329             I< C<">The Euclidean algorithm (also called Euclid's algorithm) is an
330             algorithm to determine the greatest common divisor (gcd) of two
331             integers. It is one of the oldest algorithms known, since it appeared
332             in the classic Euclid's Elements around 300 BC. The algorithm does not
333             require factoring.C<"> >
334              
335             B two or more positive numbers (integers, without decimals that is)
336              
337             B an integer
338              
339             B
340              
341             print gcd(12, 8); # prints 4
342              
343             Because the (prime number) factors of 12 is 2 * 2 * 3 and the factors of 8 is 2 * 2 * 2
344             and the common ('overlapping') factors for both 12 and 8 is then 2 * 2 and the result becomes 4.
345              
346             B:
347              
348             print gcd(90, 135, 315); # prints 45
349             print gcd(2*3*3*5, 3*3*3*5, 3*3*5*7); # prints 45 ( = 3*3*5 which is common to all three args)
350              
351             Implementation:
352              
353             sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
354              
355             One way of putting it: Keep replacing the larger of the two numbers with the difference between them until you got two equal numbers. Then thats the answer.
356              
357             L
358              
359             L
360              
361             =cut
362              
363 26 100   26 1 184 sub gcd { my($a,$b,@r)=@_; @r ? gcd($a,gcd($b,@r)) : $b==0 ? $a : gcd($b, $a % $b) }
  26 100       151  
364              
365             =head2 lcm
366              
367             C finds the Least Common Multiple of two or more numbers (integers).
368              
369             B two or more positive numbers (integers)
370              
371             B an integer number
372              
373             Example: C< 2/21 + 1/6 = 4/42 + 7/42 = 11/42>
374              
375             Where 42 = lcm(21,6).
376              
377             B
378              
379             print lcm(45,120,75); # prints 1800
380              
381             Because the factors are:
382              
383             45 = 2^0 * 3^2 * 5^1
384             120 = 2^3 * 3^1 * 5^1
385             75 = 2^0 * 3^1 * 5^2
386              
387             Take the bigest power of each primary number (2, 3 and 5 here).
388             Which is 2^3, 3^2 and 5^2. Multiplied this is 8 * 9 * 25 = 1800.
389              
390             sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }
391              
392             Seems to works with L as well: (C of all integers from 1 to 200)
393              
394             perl -MAcme::Tools -MMath::BigInt -le'print lcm(map Math::BigInt->new($_),1..200)'
395              
396             337293588832926264639465766794841407432394382785157234228847021917234018060677390066992000
397              
398             =cut
399              
400 3 100   3 1 9 sub lcm { my($a,$b,@r)=@_; @r ? lcm($a,lcm($b,@r)) : $a*$b/gcd($a,$b) }
  3         17  
401              
402             =head2 resolve
403              
404             Resolves an equation by Newtons method.
405              
406             B 1-6 arguments. At least one argument.
407              
408             First argument: must be a coderef to a subroutine (a function)
409              
410             Second argument: if present, the target, f(x)=target. Default 0.
411              
412             Third argument: a start position for x. Default 0.
413              
414             Fourth argument: a small delta value. Default 1e-4 (0.0001).
415              
416             Fifth argument: a maximum number of iterations before resolve gives up
417             and carps. Default 100 (if fifth argument is not given or is
418             undef). The number 0 means infinite here. If the derivative of the
419             start position is zero or close to zero more iterations are typically
420             needed.
421              
422             Sixth argument: A number of seconds to run before giving up. If both
423             fifth and sixth argument is given and > 0, C stops at
424             whichever comes first.
425              
426             B returns the number C for C = 0
427              
428             ...or equal to the second input argument if present.
429              
430             B
431              
432             The equation C<< x^2 - 4x - 21 = 0 >> has two solutions: -3 and 7.
433              
434             The result of C will depend on the start position:
435              
436             print resolve(sub{ $_**2 - 4*$_ - 21 }); # -3 with $_ as your x
437             print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 }); # -3 more elaborate call
438             print resolve(sub{ my $x=shift; $x**2 - 4*$x - 21 },0,3); # 7 with start position 3
439             print "Iterations: $Acme::Tools::Resolve_iterations\n"; # 3 or larger, about 10-15 is normal
440              
441             The variable C< $Acme::Tools::Resolve_iterations > (which is exported) will be set
442             to the last number of iterations C used. Also if C dies (carps).
443              
444             The variable C< $Acme::Tools::Resolve_last_estimate > (which is exported) will be
445             set to the last estimate. This number will often be close to the solution and can
446             be used even if C dies (carps).
447              
448             B
449              
450             If either second, third or fourth argument is an instance of L, so will the result be:
451              
452             use Acme::Tools;
453             my $equation = sub{ $_ - 1 - 1/$_ };
454             my $gr1 = resolve( $equation, 0, 1 ); #
455             my $gr2 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
456             bigscale(50);
457             my $gr3 = resolve( $equation, 0, bigf(1) ); # 1/2 + sqrt(5)/2
458            
459             print 1/2 + sqrt(5)/2, "\n";
460             print "Golden ratio 1: $gr1\n";
461             print "Golden ratio 2: $gr2\n";
462             print "Golden ratio 3: $gr3\n";
463              
464             Output:
465              
466             1.61803398874989
467             Golden ratio 1: 1.61803398874989
468             Golden ratio 2: 1.61803398874989484820458683436563811772029300310882395927211731893236137472439025
469             Golden ratio 3: 1.6180339887498948482045868343656381177203091798057610016490334024184302360920167724737807104860909804
470              
471             See:
472              
473             L
474              
475             L
476              
477             L
478              
479             =cut
480              
481             our $Resolve_iterations;
482             our $Resolve_last_estimate;
483             our $Resolve_time;
484              
485             #sub resolve(\[&$]@) {
486             #sub resolve(&@) { <=0.17
487             #todo: perl -MAcme::Tools -le 'print resolve(sub{$_[0]**2-9431**2});print$Acme::Tools::Resolve_iterations'
488             # =>Div by zero: df(x) = 0 at n'th iteration, n=0, delta=0.0001, fx=CODE(0xc81d470) at -e line 1
489             #todo: ren solve?
490             sub resolve {
491 16     16 1 1030 my($f,$goal,$start,$delta,$iters,$sec)=@_;
492 16 100       32 $goal=0 if !defined $goal;
493 16 100       25 $start=0 if !defined $start;
494 16 50       20 $delta=1e-4 if !defined $delta;
495 16 50       27 $iters=100 if !defined $iters;
496 16 100       24 $sec=0 if !defined $sec;
497 16 50       24 $iters=13e13 if $iters==0;
498 16 50 33     53 croak "Iterations ($iters) or seconds ($sec) can not be a negative number" if $iters<0 or $sec<0;
499 16         10 $Resolve_iterations=undef;
500 16         11 $Resolve_last_estimate=undef;
501 16 50       24 croak "Should have at least 1 argument, a coderef" if !@_;
502 16 50       28 croak "First argument should be a coderef" if ref($f) ne 'CODE';
503              
504 16         17 my @x=($start);
505 16 100       27 my $time_start=$sec>0?time_fp():undef;
506 16 50       23 my $ds=ref($start) eq 'Math::BigFloat' ? Math::BigFloat->div_scale() : undef;
507             my $fx=sub{
508 559     559   369 local$_=$_[0];
509 559         766 my $fx=&$f($_);
510 559 100 66     2261 if($fx=~/x/ and $fx=~/^[ \(\)\.\d\+\-\*\/x\=\^]+$/){
511 1         8 $fx=~s/(\d)x/$1*x/g;
512 1         4 $fx=~s/\^/**/g;
513 1         3 $fx=~s/^(.*)=(.*)$/($1)-($2)/;
514 1         4 $fx=~s,x,\$_,g;
515 1         100 $f=eval"sub{$fx}";
516 1         16 $fx=&$f($_);
517             }
518             $fx
519 16         33 };
  559         689  
520             #warn "delta=$delta\n";
521 16         16 my $n=0;
522 16         29 while($n<=$iters-1){
523 182         228 my $fd= &$fx($x[$n]+$delta*0.5) - &$fx($x[$n]-$delta*0.5);
524 182 100       241 $fd = &$fx($x[$n]+$delta*0.7) - &$fx($x[$n]-$delta*0.3) if $fd==0;# and warn"wigle 1\n";
525 182 100       193 $fd = &$fx($x[$n]+$delta*0.2) - &$fx($x[$n]-$delta*0.8) if $fd==0;# and warn"wigle 2\n";
526 182 100       369 croak "Div by zero: df(x) = $x[$n] at n'th iteration, n=$n, delta=$delta, fx=$fx" if $fd==0;
527 181         160 $x[$n+1]=$x[$n]-(&$fx($x[$n])-$goal)/($fd/$delta);
528 181         125 $Resolve_last_estimate=$x[$n+1];
529             #warn "n=$n fd=$fd x=$x[$n+1]\n";
530 181         125 $Resolve_iterations=$n;
531 181 100 100     462 last if $n>3 and $x[$n+1]==$x[$n] and $x[$n]==$x[$n-1];
      100        
532 180 100 66     524 last if $n>4 and $x[$n]!=0 and abs(1-$x[$n+1]/$x[$n])<1e-13; #sub{3*$_+$_**4-12}
      100        
533 167 50 66     379 last if $n>3 and ref($x[$n+1]) eq 'Math::BigFloat' and substr($x[$n+1],0,$ds) eq substr($x[$n],0,$ds); #hm
      33        
534 167 100 66     215 croak "Could not resolve, perhaps too little time given ($sec), iteratons=$n"
535             if $sec>0 and ($Resolve_time=time_fp()-$time_start)>$sec;
536             #warn "$n: ".$x[$n+1]."\n";
537 166         226 $n++;
538             }
539 14 50       37 croak "Could not resolve, perhaps too few iterations ($iters)" if @x>=$iters;
540 14         112 return $x[-1];
541             }
542              
543             =head2 resolve_equation
544              
545             This prints 2:
546              
547             print resolve_equation "x + 13*(3-x) = 17 - x"
548              
549             A string containing at least one x is converted into a perl function.
550             Then x is found by using L. The string conversion is done by
551             replacing every x with $_ and if a C< = > char is present it converts
552             C< leftside = rightside > into C< (leftside) - (rightside) = 0 > which
553             is the default behaviour of L.
554              
555             =cut
556              
557 1     1 1 2 sub resolve_equation { my $e=shift;resolve(sub{$e},@_)}
  1     1   3  
  1         4  
558              
559             =head2 conv
560              
561             Converts between:
562              
563             =over 4
564              
565             =item * units of measurement
566              
567             =item * number systems
568              
569             =item * currencies
570              
571             =back
572              
573             B
574              
575             print conv( 2000, "meters", "miles" ); #prints 1.24274238447467
576             print conv( 2.1, 'km', 'm'); #prints 2100
577             print conv( 70,"cm","in"); #prints 27.5590551181102
578             print conv( 4,"USD","EUR"); #prints 3.20481552905431 (depending on todays rates)
579             print conv( 4000,"b","kb"); #prints 3.90625 (1 kb = 1024 bytes)
580             print conv( 4000,"b","Kb"); #prints 4 (1 Kb = 1000 bytes)
581             print conv( 1000,"mb","kb"); #prints 1024000
582             print conv( 101010,"bin","roman"); #prints XLII
583             print conv( "DCCXLII","roman","oct"); #prints 1346
584              
585             B are:>
586              
587             Note: units starting with the symbol _ means that all metric
588             prefixes from yocto 10^-24 to yotta 10^+24 is supported, so _m means
589             km, mm, cm, µm and so on. And _N means kN, MN GN and so on.
590              
591             Note2: Many units have synonyms: m, meter, meters ...
592              
593             acceleration: g, g0, m/s2, mps2
594            
595             angle: binary_degree, binary_radian, brad, deg, degree, degrees,
596             gon, grad, grade, gradian, gradians, hexacontade, hour,
597             new_degree, nygrad, point, quadrant, rad, radian, radians,
598             sextant, turn
599            
600             area: a, ar, are, ares, bunder, ca, centiare, cho, cm2,
601             daa, decare, decares, deciare, dekar,
602             djerib, m2, dunam, dönüm, earths, feddan, ft2, gongqing, ha
603             ha, hectare, hectares, hektar, jerib, km2, m2, manzana,
604             mi2, mm2, mu, qing, rai, sotka,
605             sqcm, sqft, sqkm, sqm, sqmi, sqmm
606             stremmata, um2, µm2
607            
608             bytes: Eb, Gb, Kb, KiB, Mb, Pb, Tb, Yb, Zb, b, byte,
609             kb, kilobyte, mb, megabyte,
610             gb, gigabyte, tb, terabyte,
611             pb, petabyte, eb, exabyte,
612             zb, zettabyte, yb, yottabyte
613            
614             charge: As, C, _e, coulomb, e
615            
616             current: A, _A, N/m2
617            
618             energy: BTU, Btu, J, Nm, W/s, Wh, Wps, Ws, _J, _eV,
619             cal, calorie, calories, eV, electronvolt, BeV,
620             erg, ergs, foot-pound, foot-pounds, ftlb, joule, kWh,
621             kcal, kilocalorie, kilocalories,
622             newtonmeter, newtonmeters, th, thermie
623            
624             force: N, _N, dyn, dyne, dynes, lb, newton
625            
626             length: NM, _m, _pc, astronomical unit, au, chain, ft, furlong,
627             in, inch, inches, km, league, lightyear, ls, ly,
628             m, meter, meters, mi, mil, mile, miles,
629             nautical mile, nautical miles, nmi,
630             parsec, pc, planck, yard, yard_imperical, yd, Å, ångstrøm
631            
632             mass: Da, _eV, _g, bag, carat, ct, dwt, eV, electronvolt, g,
633             grain, grains, gram, grams, kilo, kilos, kt, lb, lb_av,
634             lb_t, lb_troy, lbs, ounce, ounce_av, ounce_troy, oz, oz_av, oz_t,
635             pennyweight, pound, pound_av, pound_metric, pound_troy, pounds,
636             pwt, seer, sl, slug, solar_mass, st, stone, t, tonn, tonne, tonnes, u, wey
637            
638             mileage: mpg, l/100km, l/km, l/10km, lp10km, l/mil, liter_pr_100km, liter_pr_km, lp100km
639              
640             money: AED, ARS, AUD, BGN, BHD, BND, BRL, BWP, CAD, CHF, CLP, CNY,
641             COP, CZK, DKK, EUR, GBP, HKD, HRK, HUF, IDR, ILS, INR, IRR,
642             ISK, JPY, KRW, KWD, KZT, LKR, LTL, LVL, LYD, MUR, MXN, MYR,
643             NOK, NPR, NZD, OMR, PHP, PKR, PLN, QAR, RON, RUB, SAR, SEK,
644             SGD, THB, TRY, TTD, TWD, USD, VEF, ZAR, BTC, LTC, mBTC, XBT
645             Currency rates are automatically updated from the net
646             at least every 24h since last update (on linux/cygwin).
647              
648             numbers: dec, hex, bin, oct, roman, dozen, doz, dz, dusin, gross, gro,
649             gr, great_gross, small_gross (not supported: decimal numbers)
650              
651             power: BTU, BTU/h, BTU/s, BTUph, GWhpy, J/s, Jps, MWhpy, TWhpy,
652             W, Whpy, _W, ftlb/min, ftlb/s, hk, hp, kWh/yr, kWhpy
653            
654             pressure: N/m2, Pa, _Pa, at, atm, bar, mbar, pascal, psi, torr
655            
656             radioactivity: Bq, becquerel, curie
657            
658             speed: _m/s, km/h, km/t, kmh, kmph, kmt, m/s, mi/h, mph, mps,
659             kn, knot, knots, kt, kts, mach, machs, c, fps, ft/s, ftps
660            
661             temperature: C, F, K, celsius, fahrenheit, kelvin
662            
663             time: _s, biennium, century, d, day, days, decade, dy, fortnight,
664             h, hour, hours, hr, indiction, jubilee, ke, lustrum, m,
665             millennium, min, minute, minutes, mo, moment, mon, month,
666             olympiad, quarter, s, season, sec, second, seconds, shake,
667             tp, triennium, w, week, weeks, y, y365, ySI, ycommon,
668             year, years, ygregorian, yjulian, ysideral, ytropical
669            
670             volume: l, L, _L, _l, cm3, m3, ft3, in3, liter, liters, litre, litres,
671             gal, gallon, gallon_imp, gallon_uk, gallon_us, gallons,
672             pint, pint_imp, pint_uk, pint_us, tsp, tablespoon, teaspoon,
673             floz, floz_uk, therm, thm, fat, bbl, Mbbl, MMbbl, drum,
674             container (or container20), container40, container40HC, container45HC
675              
676             See: L
677              
678             =cut
679              
680             #TODO: @arr2=conv(\@arr1,"from","to") # should be way faster than:
681             #TODO: @arr2=map conv($_,"from","to"),@arr1
682             #TODO: conv(123456789,'b','h'); # h converts to something human-readable
683              
684             our %conv=(
685             length=>{
686             m => 1,
687             _m => 1,
688             meter => 1,
689             meters => 1,
690             km => 1000,
691             mil => 10000, #scandinavian #also: inch/1000!
692             in => 0.0254,
693             inch => 0.0254,
694             inches => 0.0254,
695             ft => 0.0254*12, #0.3048 m
696             feet => 0.0254*12, #0.3048 m
697             yd => 0.0254*12*3, #0.9144 m
698             yard => 0.0254*12*3, #0.9144 m
699             yards => 0.0254*12*3, #0.9144 m
700             fathom => 0.0254*12*3*2, #1.8288 m
701             fathoms => 0.0254*12*3*2, #1.8288 m
702             chain => 0.0254*12*3*22, #20.1168 m
703             chains => 0.0254*12*3*22, #20.1168 m
704             furlong => 0.0254*12*3*22*10, #201.168 m
705             furlongs=> 0.0254*12*3*22*10, #201.168 m
706             mi => 0.0254*12*3*22*10*8, #1609.344 m
707             mile => 0.0254*12*3*22*10*8, #1609.344 m
708             miles => 0.0254*12*3*22*10*8,
709             league => 0.0254*12*3*22*10*8*3, #4828.032 m
710             leagues => 0.0254*12*3*22*10*8*3, #4828.032 m
711             yard_imp => 0.914398416,
712             yard_imperical => 0.914398416,
713             NM => 1852, #nautical mile
714             nmi => 1852, #nautical mile
715             'nautical mile' => 1852,
716             'nautical miles' => 1852,
717             'Å' => 1e-10,
718             'ångstrøm' => 1e-10,
719             ly => 299792458*3600*24*365.25,
720             lightyear => 299792458*3600*24*365.25, # = 9460730472580800 by def
721             ls => 299792458, #light-second
722             pc => 3.0857e16, #3.26156 ly
723             _pc => 3.0857e16, #3.26156 ly
724             parsec => 3.0857e16,
725             au => 149597870700, # by def, earth-sun
726             'astronomical unit' => 149597870700,
727             planck => 1.61619997e-35, #planck length
728             #Norwegian (old) lengths:
729             tomme => 0.0254,
730             tommer => 0.0254,
731             fot => 0.0254*12, #0.3048m
732             alen => 0.0254*12*2, #0.6096m
733             favn => 0.0254*12*2*3, #1.8288m
734             kvart => 0.0254*12*2/4, #0.1524m a quarter alen
735             },
736             mass =>{ #https://en.wikipedia.org/wiki/Unit_conversion#Mass
737             g => 1,
738             _g => 1,
739             gram => 1,
740             grams => 1,
741             kilo => 1000,
742             kilos => 1000,
743             t => 1000000,
744             tonn => 1000000,
745             tonne => 1000000,
746             tonnes => 1000000,
747             seer => 933.1,
748             lb => 453.59237,
749             lbs => 453.59237,
750             lb_av => 453.59237,
751             lb_t => 373.2417216, #5760 grains
752             lb_troy => 373.2417216,
753             pound => 453.59237,
754             pounds => 453.59237,
755             pound_av => 453.59237,
756             pound_troy => 373.2417216,
757             pound_metric => 500,
758             ounce => 28, # US food, 28g
759             ounce_av => 453.59237/16, # avoirdupois lb/16 = 28.349523125g
760             ounce_troy => 31.1034768, # lb_troy / 12
761             oz => 28, # US food, 28g
762             oz_av => 453.59237/16, # avoirdupois lb/16 = 28.349523125g
763             oz_t => 31.1034768, # lb_troy / 12,
764             grain => 64.79891/1000, # 453.59237/7000
765             grains => 64.79891/1000,
766             pennyweight => 31.1034768 / 20,
767             pwt => 31.1034768 / 20,
768             dwt => 31.1034768 / 20,
769             st => 6350.29318, # 14 lb_av
770             stone => 6350.29318,
771             wey => 114305.27724, # 252 lb = 18 stone
772             carat => 0.2,
773             ct => 0.2, #carat (metric)
774             kt => 64.79891/1000 * (3+1/6), #carat/karat
775             u => 1.66053892173e-30, #atomic mass carbon-12
776             Da => 1.66053892173e-30, #atomic mass carbon-12
777             slug => 14600,
778             sl => 14600,
779             eV => 1.78266172802679e-33, #e=mc2 = 1.60217646e-19 J / (2.99792458e8 m/s)**2
780             _eV => 1.78266172802679e-33,
781             electronvolt => 1.78266172802679e-33,
782             'solar mass' => 1.99e33,
783             solar_mass => 1.99e33,
784             bag => 60*1000, #60kg coffee
785             },
786             area =>{ # https://en.wikipedia.org/wiki/Unit_conversion#Area
787             m2 => 1,
788             dm2 => 0.1**2,
789             cm2 => 0.01**2,
790             mm2 => 0.001**2,
791             'µm2' => 1e-6**2,
792             um2 => 1e-6**2,
793             sqm => 1,
794             sqcm => 0.01**2,
795             sqmm => 0.001**2,
796             km2 => 1000**2,
797             sqkm => 1000**2,
798             a => 100,
799             ar => 100,
800             are => 100,
801             ares => 100,
802             dekar => 1000,
803             decare => 1000,
804             decares => 1000,
805             daa => 1000,
806             'mål' => 1000,
807             ha => 10000,
808             hektar => 10000,
809             hectare => 10000,
810             hectares=> 10000,
811             ft2 => (0.0254*12)**2,
812             sqft => (0.0254*12)**2,
813             mi2 => 1609.344**2,
814             sqmi => 1609.344**2,
815             yd2 => (0.0254*12*3)**2, #square yard
816             sqyd => (0.0254*12*3)**2,
817             yard2 => (0.0254*12*3)**2,
818             sqyard => (0.0254*12*3)**2,
819             rood => 1210*(0.0254*12*3)**2, # 1/4 acres
820             roods => 1210*(0.0254*12*3)**2, # 1/4 acres
821             ac => 4840*(0.0254*12*3)**2, # 4840 square yards = 1 chain x 1 furlong
822             acre => 4840*(0.0254*12*3)**2,
823             acres => 4840*(0.0254*12*3)**2,
824             homestead => 4840*(0.0254*12)**2 *160, #160 acres US Surveyors or 1/4 sqmiles
825             township => 4840*(0.0254*12)**2 *160*144, #144 homesteads or 36 sqmiles
826             perches => 4840*(0.0254*12)**2 /160, #160 perches = 1 acre in sri lanka
827             sotka => 100, #russian are
828             jerib => 10000, #iran hectare
829             djerib => 10000, #turkish hectare
830             gongqing => 10000, #chinese hectare
831             manzana => 10000, #argentinian hectare
832             bunder => 10000, #dutch hectare
833             centiare => 1,
834             deciare => 10,
835             ca => 1,
836             mu => 10000/15, #China
837             qing => 10000/0.15, #China
838             dunam => 10000/10, #Middle East
839             'dönüm' => 10000/10, #Middle East
840             stremmata => 10000/10, #Greece
841             rai => 10000/6.25, #Thailand
842             cho => 10000/1.008, #Japan
843             feddan => 10000/2.381, #Egypt
844             earths => 510072000e6, #510072000 km2, surface area of earth
845             barn => 1e-28, #physics
846             outhouse => 1e-34, #physics
847             shed => 1e-52, #physics
848             },
849             volume=>{
850             l => 1,
851             L => 1,
852             _L => 1,
853             _l => 1,
854             liter => 1,
855             liters => 1,
856             litre => 1,
857             litres => 1,
858             gal => 231*2.54**3/1000, #3.785411784, #231 cubic inches
859             gallon => 231*2.54**3/1000,
860             gallons => 231*2.54**3/1000,
861             gallon_us => 231*2.54**3/1000,
862             gallon_uk => 4.54609,
863             gallon_imp=> 4.54609,
864             gallon_us_dry => 4.40488377086, # ~ 9.25**2*pi*2.54**3/1000 L
865             m3 => 10**3, #1000 L
866             cm3 => 0.1**3, #0.001 L
867             in3 => 0.254**3, #0.016387064 L
868             ft3 => (0.254*12)**3,
869             tablespoon=> 3.785411784/256, #14.78676478125 mL
870             tsp => 3.785411784/256/3, #4.92892159375 mL
871             teaspoon => 3.785411784/256/3, #4.92892159375 mL
872             floz => 3.785411784/128, #fluid ounce US
873             floz_uk => 4.54609/160, #fluid ounce UK
874             pint => 4.54609/8, #0.56826125 L
875             pint_uk => 4.54609/8,
876             pint_imp => 4.54609/8,
877             pint_us => 3.785411784/8, #0.473176473
878             therm => 2.74e3, #? 100000BTUs? (!= thermie)
879             thm => 2.74e3, #? (!= th)
880             fat => 42*231*2.54**3/1000,
881             bbl => 42*231*2.54**3/1000, #oil barrel ~159 liters https://en.wikipedia.org/wiki/Barrel_(unit)
882             Mbbl => 42*231*2.54**3, #mille (thousand) oil barrels
883             MMbbl => 42*231*2.54**3*1000, #mille mille (million) oil barrels
884             drum => 200,
885             container => 33.1e3, #container20
886             container20 => 33.1e3,
887             container40 => 67.5e3,
888             container40HC => 75.3e3,
889             container45HC => 86.1e3,
890             #Norwegian:
891             meterfavn => 2 * 2 * 0.6, #ved 2.4 m3
892             storfavn => 2 * 2 * 3, #ved 12 m3
893             },
894             time =>{
895             s => 1,
896             _s => 1,
897             sec => 1,
898             second => 1,
899             seconds => 1,
900             m => 60,
901             min => 60,
902             minute => 60,
903             minutes => 60,
904             h => 60*60,
905             hr => 60*60,
906             hour => 60*60,
907             hours => 60*60,
908             d => 60*60*24,
909             dy => 60*60*24,
910             day => 60*60*24,
911             days => 60*60*24,
912             w => 60*60*24*7,
913             week => 60*60*24*7,
914             weeks => 60*60*24*7,
915             mo => 60*60*24 * 365.2425/12,
916             mon => 60*60*24 * 365.2425/12,
917             month => 60*60*24 * 365.2425/12,
918             quarter => 60*60*24 * 365.2425/12 * 3, #3 months
919             season => 60*60*24 * 365.2425/12 * 3, #3 months
920             y => 60*60*24 * 365.2425, # 365+97/400 #97 leap yers in 400 years
921             year => 60*60*24 * 365.2425,
922             years => 60*60*24 * 365.2425,
923             yjulian => 60*60*24 * 365.25, # 365+1/4
924             y365 => 60*60*24 * 365, # finance/science
925             ycommon => 60*60*24 * 365, # finance/science
926             ygregorian => 60*60*24 * 365.2425, # 365+97/400
927             #ygaussian => 365+(6*3600+9*60+56)/(24*3600), # 365+97/400
928             ytropical => 60*60*24 * 365.24219,
929             ysideral => 365.256363004,
930             ySI => 60*60*24*365.25, #31556925.9747
931             decade => 10 * 60*60*24*365.2425,
932             biennium => 2 * 60*60*24*365.2425,
933             triennium => 3 * 60*60*24*365.2425,
934             olympiad => 4 * 60*60*24*365.2425,
935             lustrum => 5 * 60*60*24*365.2425,
936             indiction => 15 * 60*60*24*365.2425,
937             jubilee => 50 * 60*60*24*365.2425,
938             century => 100 * 60*60*24*365.2425,
939             millennium => 1000 * 60*60*24*365.2425,
940             shake => 1e-8,
941             moment => 3600/40, #1/40th of an hour, used by Medieval Western European computists
942             ke => 864, #1/100th of a day, trad Chinese, 14m24s
943             fortnight => 14*24*3600,
944             tp => 5.3910632e-44, #planck time, time for ligth to travel 1 planck length
945             },
946             speed=>{
947             'm/s' => 1,
948             '_m/s' => 1,
949             mps => 1,
950             mph => 1609.344/3600,
951             'mi/h' => 1609.344/3600,
952             kmh => 1/3.6,
953             kmph => 1/3.6,
954             'km/h' => 1/3.6,
955             kmt => 1/3.6, # t=time or temps (scandinavian and french and dutch)
956             'km/t' => 1/3.6,
957             kt => 1852/3600,
958             kts => 1852/3600,
959             kn => 1852/3600,
960             knot => 1852/3600,
961             knots => 1852/3600,
962             knop => 1852/3600, #scandinavian
963             c => 299792458, #speed of light
964             mach => 340.3, #speed of sound
965             machs => 340.3,
966             fps => 0.3048, #0.0254*12
967             ftps => 0.3048,
968             'ft/s' => 0.3048,
969             },
970             acceleration=>{
971             'm/s2' => 1,
972             'mps2' => 1,
973             g => 9.80665,
974             g0 => 9.80665,
975             #0-100kmh or ca 0-60 mph x seconds...
976             },
977             temperature=>{ #http://en.wikipedia.org/wiki/Temperature#Conversion
978             C=>1, F=>1, K=>1, celsius=>1, fahrenheit=>1, kelvin=>1
979             },
980             radioactivity=>{
981             Bq => 1,
982             becquerel => 1,
983             curie => 3.7e10,
984             },
985             current=> {
986             A => 1,
987             _A => 1,
988             'N/m2' => 2e-7,
989             },
990             charge=>{
991             e => 1,
992             _e => 1,
993             C => 6.24150964712042e+18,
994             coulomb => 6.24150964712042e+18,
995             As => 6.24150964712042e+18,
996             #Faraday unit of charge ???
997             },
998             power=> {
999             W => 1,
1000             _W => 1,
1001             'J/s' => 1,
1002             Jps => 1,
1003             hp => 746,
1004             hk => 746, #hestekrefter (norwegian, scandinavian)
1005             PS => 746/1.014, #pferdestärken
1006             'kWh/yr' => 1000 * 3600/(24*365), #kWh annually
1007             Whpy => 3600/(24*365), #kWh annually
1008             kWhpy => 1000 * 3600/(24*365), #kWh annually
1009             MWhpy => 1000**2 * 3600/(24*365), #kWh annually
1010             GWhpy => 1000**3 * 3600/(24*365), #kWh annually
1011             TWhpy => 1000**4 * 3600/(24*365), #kWh annually
1012             BTU => 1055.05585262/3600, #
1013             BTUph => 1055.05585262/3600,
1014             'BTU/h' => 1055.05585262/3600,
1015             'BTU/s' => 1055.05585262,
1016             'ftlb/s' => 746/550,
1017             'ftlb/min'=> 746/550/60,
1018             },
1019             energy=>{
1020             joule => 1,
1021             J => 1,
1022             _J => 1,
1023             Ws => 1,
1024             Wps => 1,
1025             'W/s' => 1,
1026             Nm => 1,
1027             newtonmeter => 1,
1028             newtonmeters => 1,
1029             Wh => 3600,
1030             kWh => 3600000, #3.6 million J
1031             cal => 4.1868, # ~ 3600/860
1032             calorie => 4.1868,
1033             calories => 4.1868,
1034             kcal => 4.1868*1000,
1035             kilocalorie => 4.1868*1000,
1036             kilocalories => 4.1868*1000,
1037             BTU => 4.1868 * 252, # = 1055.0736 or is 1055.05585262 right?
1038             Btu => 4.1868 * 252,
1039             ftlb => 746/550, # ~ 1/0.7375621
1040             'foot-pound' => 746/550,
1041             'foot-pounds' => 746/550,
1042             erg => 1e-7,
1043             ergs => 1e-7,
1044             eV => 1.60217656535e-19,
1045             _eV => 1.60217656535e-19,
1046             BeV => 1.60217656535e-10,
1047             electronvolt => 1.60217656535e-19,
1048             thermie => 4.1868e6,
1049             th => 4.1868e6,
1050             hph => 3600*746,
1051             PSh => 3600*746/1.014,
1052             galatm_imp => 460.63256925,
1053             galatm_US => 383.5568490138,
1054             quad => 1.05505585262e18,
1055             Ry => 2.179872e-18,
1056             rydberg => 2.179872e-18,
1057             th => 4.1868e6,
1058             thermie => 4.1868e6,
1059             boe => 6.12e9, #barrel of oil equivalent
1060             TCE => 29.288e9, #ton of coal equivalent
1061             toe => 41.868e9, #tonne of oil equivalent
1062             tTNT => 4.184e9, #ton of TNT equivalent
1063             },
1064             force=> {
1065             newton=> 1,
1066             N => 1,
1067             _N => 1,
1068             dyn => 1e-5,
1069             dyne => 1e-5,
1070             dynes => 1e-5,
1071             lb => 4.448222,
1072             },
1073             pressure=>{
1074             Pa => 1,
1075             _Pa => 1,
1076             pascal => 1,
1077             'N/m2' => 1,
1078             bar => 100000.0,
1079             mbar => 100.0,
1080             at => 98066.5, #technical atmosphere
1081             atm => 101325.0, #standard atmosphere
1082             torr => 133.3224,
1083             psi => 6894.8, #pounds per square inch
1084             },
1085             bytes=> {
1086             b => 1,
1087             kb => 1024, #2**10
1088             mb => 1024**2, #2**20 = 1048576
1089             gb => 1024**3, #2**30 = 1073741824
1090             tb => 1024**4, #2**40 = 1099511627776
1091             pb => 1024**5, #2**50 = 1.12589990684262e+15
1092             eb => 1024**6, #2**60 =
1093             zb => 1024**7, #2**70 =
1094             yb => 1024**8, #2**80 =
1095             KiB => 1024, #2**10
1096             KiB => 1024**2, #2**20 = 1048576
1097             KiB => 1024**3, #2**30 = 1073741824
1098             KiB => 1024**4, #2**40 = 1099511627776
1099             KiB => 1024**5, #2**50 = 1.12589990684262e+15
1100             KiB => 1024**6, #2**60 =
1101             KiB => 1024**7, #2**70 =
1102             KiB => 1024**8, #2**80 =
1103             Kb => 1000, #2**10
1104             Mb => 1000**2, #2**20 = 1048576
1105             Gb => 1000**3, #2**30 = 1073741824
1106             Tb => 1000**4, #2**40 = 1099511627776
1107             Pb => 1000**5, #2**50 = 1.12589990684262e+15
1108             Eb => 1000**6, #2**60 =
1109             Zb => 1000**7, #2**70 =
1110             Yb => 1000**8, #2**80 =
1111             byte => 1,
1112             kilobyte => 1024, #2**10
1113             megabyte => 1024**2, #2**20 = 1048576
1114             gigabyte => 1024**3, #2**30 = 1073741824
1115             terabyte => 1024**4, #2**40 = 1099511627776
1116             petabyte => 1024**5, #2**50 = 1.12589990684262e+15
1117             exabyte => 1024**6, #2**60 =
1118             zettabyte => 1024**7, #2**70 =
1119             yottabyte => 1024**8, #2**80 =
1120             },
1121             milage=>{ #fuel consumption
1122             'l/mil' => 1,
1123             'l/10km' => 1,
1124             'lp10km' => 1,
1125             'l/km' => 10,
1126             'l/100km' => 1/10,
1127             lp100km => 1/10,
1128             liter_pr_100km => 1/10,
1129             liter_pr_km => 10,
1130             mpg => -23.5214584, #negative signals inverse
1131             },
1132             # light=> {
1133             # cd => 1,
1134             # candela => 1,
1135             # },
1136             # lumens
1137             # lux
1138             angle =>{
1139             turn => 1,
1140             rad => 1/(2*$PI), # 2 * pi
1141             radian => 1/(2*$PI), # 2 * pi
1142             radians => 1/(2*$PI), # 2 * pi
1143             deg => 1/360, # 4 * 90
1144             degree => 1/360, # 4 * 90
1145             degrees => 1/360, # 4 * 90
1146             grad => 1/400,
1147             gradian => 1/400,
1148             gradians => 1/400,
1149             grade => 1/400, #french revolutionary unit
1150             gon => 1/400,
1151             new_degree => 1/400,
1152             nygrad => 1/400, #scandinavian
1153             quadrant => 1/4,
1154             sextant => 1/6,
1155             hour => 1/24,
1156             point => 1/32, #used in navigation
1157             hexacontade => 1/60,
1158             binary_degree => 1/256,
1159             binary_radian => 1/256,
1160             brad => 1/256,
1161             },
1162             money =>{ # rates at dec 17 2015
1163             AED => 2.389117, #
1164             ARS => 0.895122, #
1165             AUD => 6.253619, #
1166             BGN => 4.847575, #
1167             BHD => 23.267384, #
1168             BND => 6.184624, #
1169             BRL => 2.260703, #
1170             BTC => 3910.932213547, #bitcoin
1171             BWP => 0.794654, #
1172             CAD => 6.289957, #
1173             CHF => 8.799974, #
1174             CLP => 0.012410, #
1175             CNY => 1.353406, #
1176             COP => 0.00262229, #
1177             CZK => 0.351171, #
1178             DKK => 1.271914, #
1179             EUR => 9.489926, #
1180             GBP => 13.069440, #
1181             HKD => 1.131783, #
1182             HRK => 1.240878, #
1183             HUF => 0.029947, #
1184             IDR => 0.00062471, #
1185             ILS => 2.254456, #
1186             INR => 0.132063, #
1187             IRR => 0.00029370, #
1188             ISK => 0.067245, #
1189             JPY => 0.071492, #
1190             KRW => 0.00739237, #
1191             KWD => 28.862497, #
1192             KZT => 0.027766, #
1193             LKR => 0.061173, #
1194             LTC => 31.78895354018, #litecoin
1195             LTL => 2.748472, #
1196             LVL => 13.503025, #
1197             LYD => 6.296978, #
1198             MUR => 0.240080, #
1199             MXN => 0.515159, #
1200             MYR => 2.032465, #
1201             NOK => 1.000000000, #norwegian kroner
1202             NPR => 0.084980, #
1203             NZD => 5.878331, #
1204             OMR => 22.795994, #
1205             PHP => 0.184839, #
1206             PKR => 0.083779, #
1207             PLN => 2.207243, #
1208             QAR => 2.409162, #
1209             RON => 2.101513, #
1210             RUB => 0.122991, #
1211             SAR => 2.339745, #
1212             SEK => 1.023591, #
1213             SGD => 6.184624, #
1214             THB => 0.242767, #
1215             TRY => 2.994338, #
1216             TTD => 1.374484, #
1217             TWD => 0.265806, #
1218             USD => 8.774159, #
1219             VEF => 1.395461, #
1220             ZAR => 0.576487, #
1221             XBT => 3910.932213547, # bitcoin
1222             mBTC => 3910.932213547, # bitcoin
1223             mXBT => 3910.932213547, # bitcoin
1224             },
1225             numbers =>{
1226             dec=>1,hex=>1,bin=>1,oct=>1,roman=>1, des=>1,#des: spelling error in v0.15-0.16
1227             dusin=>1,dozen=>1,doz=>1,dz=>1,gross=>144,gr=>144,gro=>144,great_gross=>12*144,small_gross=>10*12,
1228             }
1229             );
1230             our $conv_prepare_time=0;
1231             our $conv_prepare_money_time=0;
1232             sub conv_prepare {
1233 1     1 0 9 my %b =(da =>1e+1, h =>1e+2, k =>1e+3, M =>1e+6, G =>1e+9, T =>1e+12, P =>1e+15, E =>1e+18, Z =>1e+21, Y =>1e+24, H =>1e+27);
1234 1         6 my %big =(deca=>1e+1, hecto=>1e+2, kilo =>1e+3, mega =>1e+6, giga=>1e+9, tera=>1e+12, peta =>1e+15, exa =>1e+18, zetta=>1e+21, yotta=>1e+24, hella=>1e+27);
1235 1         6 my %s =(d =>1e-1, c =>1e-2, m =>1e-3,'µ' =>1e-6, u=>1e-6, n =>1e-9, p =>1e-12, f =>1e-15, a =>1e-18, z =>1e-21, y =>1e-24);
1236 1         8 my %small=(deci=>1e-1, centi=>1e-2, milli=>1e-3, micro =>1e-6, nano=>1e-9, pico=>1e-12, femto=>1e-15, atto=>1e-18, zepto=>1e-21, yocto=>1e-24);
1237             # myria=> 10000 #obsolete
1238             # demi => 1/2, double => 2 #obsolete
1239             # lakh => 1e5, crore => 1e7 #south asian
1240 1         8 my %x = (%s,%b);
1241 1         7 for my $type (keys%conv) {
1242 20         13 for(grep/^_/,keys%{$conv{$type}}) {
  20         178  
1243 15         15 my $c=$conv{$type}{$_};
1244 15         10 delete$conv{$type}{$_};
1245 15         14 my $unit=substr($_,1);
1246 15         288 $conv{$type}{$_.$unit}=$x{$_}*$c for keys%x;
1247             }
1248             }
1249 1         11 $conv_prepare_time=time();
1250             }
1251              
1252             our $Currency_rates_url = 'http://calthis.com/currency-rates';
1253             our $Currency_rates_expire = 6*3600;
1254             sub conv_prepare_money {
1255 0     0 0 0 eval {
1256 0         0 require LWP::Simple;
1257 0 0       0 my $td=$^O=~/^(?:linux|cygwin)$/?"/tmp":"/tmp"; #hm wrong!
1258 0         0 my $fn="$td/acme-tools-currency-rates.data";
1259 0 0 0     0 if( !-e$fn or time() - (stat($fn))[9] >= $Currency_rates_expire){
1260 0         0 LWP::Simple::getstore($Currency_rates_url,"$fn.$$.tmp"); # get ... see getrates.cmd
1261 0 0       0 die "nothing downloaded" if !-s"$fn.$$.tmp";
1262 0         0 rename "$fn.$$.tmp",$fn;
1263 0         0 chmod 0666,$fn;
1264             }
1265 0         0 my $d=readfile($fn);
1266 0         0 my %r=$d=~/^\s*([A-Z]{3}) +(\d+\.\d+)\b/gm;
1267 0         0 $r{lc($_)}=$r{$_} for keys%r;
1268             #warn serialize([minus([sort keys(%r)],[sort keys(%{$conv{money}})])],'minus'); #ARS,AED,COP,BWP,LVL,BHD,NPR,LKR,QAR,KWD,LYD,SAR,KZT,CLP,IRR,VEF,TTD,OMR,MUR,BND
1269             #warn serialize([minus([sort keys(%{$conv{money}})],[sort keys(%r)])],'minus'); #LTC,I44,BTC,BYR,TWI,NOK,XDR
1270 0 0       0 $conv{money}={%{$conv{money}},%r} if keys(%r)>20;
  0         0  
1271             };
1272 0 0       0 carp "conv: conv_prepare_money (currency conversion automatic daily updated rates) - $@\n" if $@;
1273 0         0 $conv{money}{"m$_"}=$conv{money}{$_}/1000 for qw/BTC XBT/;
1274 0         0 $conv_prepare_money_time=time();
1275 0         0 1; #not yet
1276             }
1277              
1278             sub conv {
1279 43     43 1 6725 my($num,$from,$to)=@_;
1280 43 50       76 croak "conf requires 3 args" if @_!=3;
1281 43 100       65 conv_prepare() if !$conv_prepare_time;
1282 43     86   103 my $types=sub{ my $unit=shift; [sort grep$conv{$_}{$unit}, keys%conv] };
  86         78  
  86         911  
1283 43   33     56 my @types=map{ my $ru=$_; my $r;$r=&$types($_) and @$r and $$ru=$_ and last for ($$ru,uc($$ru),lc($$ru)); $r }(\$from,\$to);
  86   33     53  
  86   50     52  
  86         143  
  86         161  
1284 43         50 my @err=map "Unit ".[$from,$to]->[$_]." is unknown",grep!@{$types[$_]},0..1;
  86         84  
1285 43         54 my @type=intersect(@types);
1286 43 50       62 push @err, "from=$from and to=$to has more than one possible conversions: ".join(", ", @type) if @type>1;
1287             push @err, "from $from (".(join(",",@{$types[0]})||'?').") and "
1288 43 50 0     51 ."to $to (" .(join(",",@{$types[1]})||'?').") has no known common unit type.\n" if @type<1;
      0        
1289 43 50       58 croak join"\n",map"conv: $_",@err if @err;
1290 43         33 my $type=$type[0];
1291 43 50 66     80 conv_prepare_money() if $type eq 'money' and time() >= $conv_prepare_money_time + $Currency_rates_expire;
1292 43 100       60 return conv_temperature(@_) if $type eq 'temperature';
1293 36 100       43 return conv_numbers(@_) if $type eq 'numbers';
1294 31         20 my $c=$conv{$type};
1295 31         26 my($cf,$ct)=@{$conv{$type}}{$from,$to};
  31         40  
1296 31 100 100     141 my $r= $cf>0 && $ct<0 ? -$ct/$num/$cf
    100 66        
1297             : $cf<0 && $ct>0 ? -$cf/$num/$ct
1298             : $cf*$num/$ct;
1299             # print STDERR "$num $from => $to from=$ff to=$ft r=$r\n";
1300 31         106 return $r;
1301             }
1302              
1303             sub conv_temperature { #http://en.wikipedia.org/wiki/Temperature#Conversion
1304 7     7 0 22 my($t,$from,$to)=(shift(),map uc(substr($_,0,1)),@_);
1305 7 100       16 $from=~s/K/C/ and $t-=273.15;
1306             #$from=~s/R/F/ and $t-=459.67; #rankine
1307 7 100       15 return $t if $from eq $to;
1308 1     1   7 {CK=>sub{$t+273.15},
1309 3     3   22 FC=>sub{($t-32)*5/9},
1310 2     2   15 CF=>sub{$t*9/5+32},
1311 0     0   0 FK=>sub{($t-32)*5/9+273.15},
1312 6         71 }->{$from.$to}->();
1313             }
1314              
1315             sub conv_numbers {
1316 5     5 0 6 my($n,$fr,$to)=@_;
1317 5 0       34 my $dec=$fr eq 'dec' ? $n
    50          
    50          
    50          
    50          
    50          
    50          
    100          
    50          
    100          
    50          
1318             :$fr eq 'hex' ? hex($n)
1319             :$fr eq 'oct' ? oct($n)
1320             :$fr eq 'bin' ? oct("0b$n")
1321             :$fr =~ /^(dusin|dozen|doz|dz)$/ ? $n*12
1322             :$fr =~ /^(gross|gr|gro)$/ ? $n*144
1323             :$fr eq 'great_gross' ? $n*12*144
1324             :$fr eq 'small_gross' ? $n*12*10
1325             :$fr eq 'skokk' ? $n*60 #norwegian unit
1326             :$fr eq 'roman' ? roman2int($n)
1327             :$fr eq 'des' ? $n
1328             :croak "Conv from $fr not supported yet";
1329 5 50       40 my $ret=$to eq 'dec' ? $dec
    100          
    50          
    50          
    50          
    50          
    50          
    100          
    100          
    50          
    50          
1330             :$to eq 'hex' ? sprintf("%x",$dec)
1331             :$to eq 'oct' ? sprintf("%o",$dec)
1332             :$to eq 'bin' ? sprintf("%b",$dec)
1333             :$to =~ /^(dusin|dozen|doz|dz)$/ ? $dec/12
1334             :$to =~ /^(gross|gr|gro)$/ ? $dec/144
1335             :$to eq 'great_gross' ? $dec/(12*144)
1336             :$to eq 'small_gross' ? $dec/(12*10)
1337             :$to eq 'skokk' ? $dec/60
1338             :$to eq 'roman' ? int2roman($dec)
1339             :$to eq 'des' ? $dec
1340             :croak "Conv to $to not suppoerted yet";
1341 5         19 $ret;
1342             }
1343             #http://en.wikipedia.org/wiki/Norwegian_units_of_measurement
1344              
1345              
1346             =head2 bytes_readable
1347              
1348             Converts a number of bytes to something human readable.
1349              
1350             Input 1: a number
1351              
1352             Input 2: optionally the number of decimals if >1000 B. Default is 2.
1353              
1354             Output: a string containing:
1355              
1356             the number with a B behind if the number is less than 1000
1357              
1358             the number divided by 1024 with two decimals and "kB" behind if the number is less than 1024*1000
1359              
1360             the number divided by 1048576 with two decimals and "MB" behind if the number is less than 1024*1024*1000
1361              
1362             the number divided by 1073741824 with two decimals and "GB" behind if the number is less than 1024*1024*1024*1000
1363              
1364             the number divided by 1099511627776 with two decimals and "TB" behind otherwise
1365              
1366             Examples:
1367              
1368             print bytes_readable(999); # 999 B
1369             print bytes_readable(1000); # 1000 B
1370             print bytes_readable(1001); # 0.98 kB
1371             print bytes_readable(1024); # 1.00 kB
1372             print bytes_readable(1153433.6); # 1.10 MB
1373             print bytes_readable(1181116006.4); # 1.10 GB
1374             print bytes_readable(1209462790553.6); # 1.10 TB
1375             print bytes_readable(1088516511498.24*1000); # 990.00 TB
1376             print bytes_readable(1088516511498.24*1000,3); # 990.000 TB
1377             print bytes_readable(1088516511498.24*1000,1); # 990.0 TB
1378              
1379             =cut
1380              
1381             sub bytes_readable {
1382 32     32 1 2292 my $bytes=shift();
1383 32   100     100 my $d=shift()||2; #decimals
1384 32 50       44 return undef if !defined $bytes;
1385 32 100       104 return "$bytes B" if abs($bytes) <= 2** 0*1000; #bytes
1386 24 100       177 return sprintf("%.*f kB",$d,$bytes/2**10) if abs($bytes) < 2**10*1000; #kilobyte
1387 8 100       25 return sprintf("%.*f MB",$d,$bytes/2**20) if abs($bytes) < 2**20*1000; #megabyte
1388 6 100       24 return sprintf("%.*f GB",$d,$bytes/2**30) if abs($bytes) < 2**30*1000; #gigabyte
1389 4 50       40 return sprintf("%.*f TB",$d,$bytes/2**40) if abs($bytes) < 2**40*1000; #terrabyte
1390 0         0 return sprintf("%.*f PB",$d,$bytes/2**50); #petabyte, exabyte, zettabyte, yottabyte
1391             }
1392              
1393             =head2 sec_readable
1394              
1395             Time written as C< 14h 37m > is often more humanly comprehensible than C< 52620 seconds >.
1396              
1397             print sec_readable( 0 ); # 0s
1398             print sec_readable( 0.0123 ); # 0.0123s
1399             print sec_readable(-0.0123 ); # -0.0123s
1400             print sec_readable( 1.23 ); # 1.23s
1401             print sec_readable( 1 ); # 1s
1402             print sec_readable( 9.87 ); # 9.87s
1403             print sec_readable( 10 ); # 10s
1404             print sec_readable( 10.1 ); # 10.1s
1405             print sec_readable( 59 ); # 59s
1406             print sec_readable( 59.123 ); # 59.1s
1407             print sec_readable( 60 ); # 1m 0s
1408             print sec_readable( 60.1 ); # 1m 0s
1409             print sec_readable( 121 ); # 2m 1s
1410             print sec_readable( 131 ); # 2m 11s
1411             print sec_readable( 1331 ); # 22m 11s
1412             print sec_readable(-1331 ); # -22m 11s
1413             print sec_readable( 13331 ); # 3h 42m
1414             print sec_readable( 133331 ); # 1d 13h
1415             print sec_readable( 1333331 ); # 15d 10h
1416             print sec_readable( 13333331 ); # 154d 7h
1417             print sec_readable( 133333331 ); # 4yr 82d
1418             print sec_readable( 1333333331 ); # 42yr 91d
1419              
1420             =cut
1421              
1422             sub sec_readable {
1423 25     25 1 3759 my $s=shift();
1424 25         32 my($h,$d,$y)=(3600,24*3600,365.25*24*3600);
1425 25 100 100     275 !defined$s ? undef
    100          
    100          
    100          
    100          
    100          
    50          
    50          
1426             :!length($s) ? ''
1427             :$s<0 ? '-'.sec_readable(-$s)
1428             :$s<60 && int($s)==$s
1429             ? $s."s"
1430             :$s<60 ? sprintf("%.*fs",int(3+-log($s)/log(10)),$s)
1431             :$s<3600 ? int($s/60)."m " .($s%60) ."s"
1432             :$s<24*3600 ? int($s/$h)."h " .int(($s%$h)/60)."m"
1433             :$s<366*24*3600 ? int($s/$d)."d " .int(($s%$d)/$h)."h"
1434             : int($s/$y)."yr ".int(($s%$y)/$d)."d";
1435             }
1436              
1437             =head2 int2roman
1438              
1439             Converts integers to roman numbers.
1440              
1441             B
1442              
1443             print int2roman(1234); # prints MCCXXXIV
1444             print int2roman(1971); # prints MCMLXXI
1445              
1446             (Adapted subroutine from Peter J. Acklam, jacklam(&)math.uio.no)
1447              
1448             I = 1
1449             V = 5
1450             X = 10
1451             L = 50
1452             C = 100 (centum)
1453             D = 500
1454             M = 1000 (mille)
1455              
1456             See also L.
1457              
1458             See L for more.
1459              
1460             =head2 roman2int
1461              
1462             roman2int("MCMLXXI") == 1971
1463              
1464             =cut
1465              
1466             #alternative algorithm: http://www.rapidtables.com/convert/number/how-number-to-roman-numerals.htm
1467             sub int2roman {
1468 1161     1161 1 5631 my($n,@p)=(shift,[],[1],[1,1],[1,1,1],[1,2],[2],[2,1],[2,1,1],[2,1,1,1],[1,3],[3]);
1469             !defined($n)? undef
1470             : !length($n) ? ""
1471             : int($n)!=$n ? croak"int2roman: $n is not an integer"
1472             : $n==0 ? ""
1473             : $n<0 ? "-".int2roman(-$n)
1474             : $n>3999 ? "M".int2roman($n-1000)
1475 1161 100       3823 : join'',@{[qw/I V X L C D M/]}[map{my$i=$_;map($_+5-$i*2,@{$p[$n/10**(3-$i)%10]})}(0..3)];
  913 100       3597  
  3652 100       2158  
  3652 50       1918  
  3652 100       5908  
    100          
1476             }
1477              
1478             sub roman2int {
1479 1017     1017 1 4310 my($r,$n,%c)=(shift,0,'',0,qw/I 1 V 5 X 10 L 50 C 100 D 500 M 1000/);
1480 1017 100 66     2767 $r=~s/^-//?-roman2int($r):
1481             $r=~s/(C?)([DM])|(X?)([LCDM])|(I?)([VXLCDM])|(I)|(.)/
1482             croak "roman2int: Invalid number $r" if $8;
1483             $n += $c{$2||$4||$6||$7} - $c{$1||$3||$5||''}; ''/eg && $n
1484             }
1485              
1486             #sub roman2int_slow {
1487             # my $r=shift;
1488             # $r=~s,^\-,, ? 0-roman2int($r)
1489             # : $r=~s,^M,,i ? 1000+roman2int($r)
1490             # : $r=~s,^CM,,i ? 900+roman2int($r)
1491             # : $r=~s,^D,,i ? 500+roman2int($r)
1492             # : $r=~s,^CD,,i ? 400+roman2int($r)
1493             # : $r=~s,^C,,i ? 100+roman2int($r)
1494             # : $r=~s,^XC,,i ? 90+roman2int($r)
1495             # : $r=~s,^L,,i ? 50+roman2int($r)
1496             # : $r=~s,^XL,,i ? 40+roman2int($r)
1497             # : $r=~s,^X,,i ? 10+roman2int($r)
1498             # : $r=~s,^IX,,i ? 9+roman2int($r)
1499             # : $r=~s,^V,,i ? 5+roman2int($r)
1500             # : $r=~s,^IV,,i ? 4+roman2int($r)
1501             # : $r=~s,^I,,i ? 1+roman2int($r)
1502             # : !length($r) ? 0
1503             # : croak "Invalid roman number $r";
1504             #}
1505              
1506             =head2 distance
1507              
1508             B the four decimal numbers of two GPS positions: latutude1, longitude1, latitude2, longitude2
1509              
1510             B the air distance in meters between the two points
1511              
1512             Calculation is done using the Haversine Formula for spherical distance:
1513              
1514             a = sin((lat2-lat1)/2)^2
1515             + sin((lon2-lon1)/2)^2 * cos(lat1) * cos(lat2);
1516              
1517             c = 2 * atan2(min(1,sqrt(a)),
1518             min(1,sqrt(1-a)))
1519              
1520             distance = c * R
1521              
1522             With earth radius set to:
1523              
1524             R = Re - (Re-Rp) * sin(abs(lat1+lat2)/2)
1525              
1526             Where C (equatorial radius) and C (polar radius).
1527              
1528             B
1529              
1530             my @oslo = ( 59.93937, 10.75135); # oslo in norway
1531             my @rio = (-22.97673, -43.19508); # rio in brazil
1532              
1533             printf "%.1f km\n", distance(@oslo,@rio)/1000; # 10431.7 km
1534             printf "%.1f km\n", distance(@rio,@oslo)/1000; # 10431.7 km
1535             printf "%.1f nmi\n", distance(@oslo,@rio)/1852.000; # 5632.7 nmi (nautical miles)
1536             printf "%.1f miles\n",distance(@oslo,@rio)/1609.344; # 6481.9 miles
1537             printf "%.1f miles\n",conv(distance(@oslo,@rio),"meters","miles");# 6481.9 miles
1538              
1539             See L
1540              
1541             and L
1542              
1543             and L
1544              
1545             and L, but Acme::Tools::distance() is about 8 times faster.
1546              
1547             =cut
1548              
1549             our $Distance_factor = $PI / 180;
1550 0     0 0 0 sub acos { atan2( sqrt(1 - $_[0] * $_[0]), $_[0] ) }
1551             sub distance_great_circle {
1552 0     0 0 0 my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
1553 0         0 my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1554 0         0 my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
1555 0         0 return $R*acos(sin($lat1)*sin($lat2)+cos($lat1)*cos($lat2)*cos($lon2-$lon1))
1556             }
1557              
1558             sub distance {
1559 4     4 1 786 my($lat1,$lon1,$lat2,$lon2)=map $Distance_factor*$_, @_;
1560 4         35 my $a= sin(($lat2-$lat1)/2)**2
1561             + sin(($lon2-$lon1)/2)**2 * cos($lat1) * cos($lat2);
1562 4 50       5 my $sqrt_a =sqrt($a); $sqrt_a =1 if $sqrt_a >1;
  4         10  
1563 4 50       5 my $sqrt_1ma=sqrt(1-$a); $sqrt_1ma=1 if $sqrt_1ma>1;
  4         8  
1564 4         16 my $c=2*atan2($sqrt_a,$sqrt_1ma);
1565 4         4 my($Re,$Rp)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1566 4         6 my $R=$Re-($Re-$Rp)*sin(abs($lat1+$lat2)/2); #approx
1567 4         14 return $c*$R;
1568             }
1569              
1570              
1571             =head2 big
1572              
1573             =head2 bigi
1574              
1575             =head2 bigf
1576              
1577             =head2 bigr
1578              
1579             =head2 bigscale
1580              
1581             big, bigi, bigf, bigr and bigscale are sometimes convenient shorthands for using
1582             C<< Math::BigInt->new() >>, C<< Math::BigFloat->new() >> and C<< Math::BigRat->new() >>
1583             (preferably with the GMP for faster calculations). Examples:
1584              
1585             my $num1 = big(3); #returns a new Math::BigInt-object
1586             my $num2 = big('3.0'); #returns a new Math::BigFloat-object
1587             my $num3 = big(3.0); #returns a new Math::BigInt-object
1588             my $num4 = big(3.1); #returns a new Math::BigFloat-object
1589             my $num5 = big('2/7'); #returns a new Math::BigRat-object
1590             my($i1,$f1,$i2,$f2) = big(3,'3.0',3.0,3.1); #returns the four new numbers, as the above four lines
1591             #uses wantarray
1592              
1593             print 2**200; # 1.60693804425899e+60
1594             print big(2)**200; # 1606938044258990275541962092341162602522202993782792835301376
1595             print 2**big(200); # 1606938044258990275541962092341162602522202993782792835301376
1596             print big(2**200); # 1606938044258990000000000000000000000000000000000000000000000
1597              
1598             print 1/7; # 0.142857142857143
1599             print 1/big(7); # 0 because of integer arithmetics
1600             print 1/big(7.0); # 0 because 7.0 is viewed as an integer
1601             print 1/big('7.0'); # 0.1428571428571428571428571428571428571429
1602             print 1/bigf(7); # 0.1428571428571428571428571428571428571429
1603             print bigf(1/7); # 0.142857142857143 probably not what you wanted
1604              
1605             print 1/bigf(7); # 0.1428571428571428571428571428571428571429
1606             bigscale(80); # for increased precesion (default is 40)
1607             print 1/bigf(7); # 0.14285714285714285714285714285714285714285714285714285714285714285714285714285714
1608              
1609             In C the characters C<< . >> and C<< / >> will make it return a
1610             Math::BigFloat- and Math::BigRat-object accordingly. Or else a Math::BigInt-object is returned.
1611              
1612             Instead of guessing, use C, C and C to return what you want.
1613              
1614             B Acme::Tools does not depend on Math::BigInt and
1615             Math::BigFloat and GMP, but these four big*-subs do (by C).
1616             To use big, bigi, bigf and bigr effectively you should
1617             install Math::BigInt::GMP and Math::BigFloat::GMP like this:
1618              
1619             sudo cpanm Math::BigFloat Math::GMP Math::BingInt::GMP # or
1620             sudo cpan Math::BigFloat Math::GMP Math::BingInt::GMP # or
1621             sudo yum install perl-Math-BigInt-GMP perl-Math-GMP # on RedHat, RHEL or
1622             sudo apt-get install libmath-bigint-gmp-perl libmath-gmp-perl # on Ubuntu or some other way
1623              
1624             Unless GMP is installed for perl like this, the Math::Big*-modules
1625             will fall back to using similar but slower built in modules. See: L
1626              
1627             =cut
1628              
1629             sub bigi {
1630 9 100   9 1 284 eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
  2     2   1013  
  2         16907  
  2         229  
1631 9 100       10344 if (wantarray) { return (map Math::BigInt->new($_),@_) }
  2         13  
1632 7         24 else { return Math::BigInt->new($_[0]) }
1633             }
1634             sub bigf {
1635 7 100   7 1 1677 eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
  2     2   894  
  2         16754  
  2         396  
1636 7 100       1175 if (wantarray) { return (map Math::BigFloat->new($_),@_) }
  2         11  
1637 5         19 else { return Math::BigFloat->new($_[0]) }
1638             }
1639             sub bigr {
1640 17 100   17 1 15042 eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
  2     2   645  
  2         11870  
  2         406  
1641 17 100       1657 if (wantarray) { return (map Math::BigRat->new($_),@_) }
  7         33  
1642 10         36 else { return Math::BigRat->new($_[0]) }
1643             }
1644             sub big {
1645             wantarray
1646 12 50   12 1 4904 ? (map /\./ ? bigf($_) : /\// ? bigr($_) : bigi($_), @_)
    100          
    100          
    100          
    100          
1647             : $_[0]=~/\./ ? bigf($_[0]) : $_[0]=~/\// ? bigr($_[0]) : bigi($_[0]);
1648             }
1649             sub bigscale {
1650 4 100   4 1 3517 @_==1 or croak "bigscale requires one and only one argument";
1651 2         4 my $scale=shift();
1652 2 50       7 eval q(use Math::BigInt try=>"GMP") if !$INC{'Math/BigInt.pm'};
1653 2 50       8 eval q(use Math::BigFloat try=>"GMP") if !$INC{'Math/BigFloat.pm'};
1654 2 50       8 eval q(use Math::BigRat try=>"GMP") if !$INC{'Math/BigRat.pm'};
1655 2         9 Math::BigInt->div_scale($scale);
1656 2         37 Math::BigFloat->div_scale($scale);
1657 2         24 Math::BigRat->div_scale($scale);
1658 2         18 return;
1659             }
1660              
1661             #my $R_authalic=6371007.2; #earth radius in meters, mean, Authalic radius, real R varies 6353-6384km, http://en.wikipedia.org/wiki/Earth_radius
1662             #*)
1663             # ( 6378157.5, 6356772.2 ) #hmm
1664             #my $e=0.081819218048345;#sqrt(1 - $b**2/$a**2); #eccentricity of the ellipsoid
1665             #my($a,$b)=( 6378137.0, 6356752.3 ); #earth equatorial and polar radius
1666             #warn "e=$e\n";
1667             #warn "t=".(1 - $e**2)."\n";
1668             #warn "n=".((1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5)."\n";
1669             #my $t=1 - $e**2;
1670             #my $n=(1 - $e**2 * sin(($lat1+$lat1)/2)**2)**1.5;
1671             #warn "t=$t\n";
1672             #warn "n=$n\n";
1673             #$a * (1 - $e**2) / ((1 - $e**2 * sin(($lat1+$lat2)/2)**2)**1.5); #hmm avg lat
1674             #$R=$a * $t/$n;
1675              
1676             #=head2 fractional
1677             #=cut
1678              
1679             sub fractional { #http://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/lil1.html
1680 0     0 0 0 carp "fractional: NOT FINISHED";
1681 0         0 my $n=shift;
1682 0         0 print "----fractional n=$n\n";
1683 0         0 my $nn=$n; my $dec;
  0         0  
1684 0         0 $nn=~s,\.(\d+)$,$dec=length($1);$1.,;
1685 0         0 my $l;
1686 0         0 my $max=0;
1687 0         0 my($te,$ne);
1688 0         0 for(1..length($nn)/2){
1689 0 0       0 if( $nn=~/^(\d*?)((.{$_})(\3)+)$/ ){
1690 0         0 print "_ = $_ ".length($2)."\n";
1691 0 0       0 if(length($2)>$max){
1692 0         0 $l=$_;
1693 0         0 $te="$1$3"-$1;
1694 0         0 $max=length($2);
1695             }
1696             }
1697             }
1698 0 0 0     0 return fractional($n) if !$l and !recursed() and $dec>6 and substr($n,-1) and substr($n,-1)--;
      0        
      0        
      0        
1699 0         0 print "l=$l max=$max\n";
1700 0         0 $ne="9" x $l;
1701 0         0 print log($n),"\n";
1702 0     0   0 my $st=sub{print "status: ".($te/$ne)." n=$n ".($n/$te*$ne)."\n"};
  0         0  
1703 0         0 while($n/$te*$ne<0.99){ &$st(); $ne*=10 }
  0         0  
  0         0  
1704 0         0 while($te/$n/$ne<0.99){ &$st(); $te*=10 }
  0         0  
  0         0  
1705 0         0 &$st();
1706 0         0 while(1){
1707 0         0 my $d=gcd($te,$ne); print "gcd=$d\n";
  0         0  
1708 0 0       0 last if $d==1;
1709 0         0 $te/=$d; $ne/=$d;
  0         0  
1710             }
1711 0         0 &$st();
1712 0 0       0 wantarray ? ($te,$ne) : "$te/$ne"; #gcd()
1713             }
1714              
1715             =head2 isnum
1716              
1717             B String to be tested on regexp C<< /^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x >>. If no argument is given isnum checks C<< $_ >>.
1718              
1719             B True or false (1 or 0)
1720              
1721             use Acme::Tools;
1722             my @e=(' +32.12354E-21 ', 2.2, '9' x 99999, ' -123.12', '29,323.31', '29 323.31');
1723             print isnum() ? 'num' : 'str' for @e; #prints num for every element except the last two
1724             print $_=~$Re_isnum ? 'num' : 'str' for @e; #same
1725              
1726             =cut
1727              
1728             our $Re_isnum=qr/^ \s* [\-\+]? (?: \d*\.\d+ | \d+ ) (?:[eE][\-\+]?\d+)?\s*$/x;
1729 20 100   20 1 3331 sub isnum {(@_?$_[0]:$_)=~$Re_isnum}
1730              
1731             =head2 between
1732              
1733             Input: Three arguments.
1734              
1735             Returns: Something I if the first argument is numerically between the two next.
1736              
1737             =cut
1738              
1739             sub between {
1740 27     27 1 7173 my($test,$fom,$tom)=@_;
1741 33     33   225 no warnings;
  33         48  
  33         6672  
1742 27 100 66     588 return $fom<$tom ? $test>=$fom&&$test<=$tom
      33        
1743             : $test>=$tom&&$test<=$fom;
1744             }
1745              
1746             =head2 curb
1747              
1748             B Three arguments: value, minumum, maximum.
1749              
1750             B Returns the value if its between the given minumum and maximum.
1751             Returns minimum if the value is less or maximum if the value is more.
1752              
1753             my $v = 234;
1754             print curb( $v, 200, 250 ); #prints 234
1755             print curb( $v, 150, 200 ); #prints 200
1756             print curb( $v, 250, 300 ); #prints 250
1757             print curb(\$v, 250, 300 ); #prints 250 and changes $v
1758             print $v; #prints 250
1759              
1760             In the last example $v is changed because the argument is a reference. (To keep backward compatability, C<< bound() >> is a synonym for C<< curb() >>)
1761              
1762             =cut
1763              
1764             sub curb {
1765 129     129 1 1114 my($val,$min,$max)=@_;
1766 129 50 66     1165 croak "curb: wrong args" if @_!=3 or !defined$min or !defined$max or !defined$val or $min>$max;
      100        
      66        
      66        
1767 126 100       149 return $$val=curb($$val,$min,$max) if ref($val) eq 'SCALAR';
1768 125 100       206 $val < $min ? $min :
    100          
1769             $val > $max ? $max :
1770             $val;
1771             }
1772 0     0 0 0 sub bound { curb(@_) }
1773 0     0 0 0 sub log10 { log($_[0])/log(10) }
1774 0     0 0 0 sub log2 { log($_[0])/log(2) }
1775              
1776             =head1 STRINGS
1777              
1778             =head2 upper
1779              
1780             =head2 lower
1781              
1782             Returns input string as uppercase or lowercase.
1783              
1784             Can be used if Perls build in C and C for some reason does not convert æøå or other latin letters outsize a-z.
1785              
1786             Converts C<< æøåäëïöüÿâêîôûãõàèìòùáéíóúýñð >> to and from C<< ÆØÅÄËÏÖÜ?ÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ >>
1787              
1788             See also C<< perldoc -f uc >> and C<< perldoc -f lc >>
1789              
1790             =head2 trim
1791              
1792             Removes space from the beginning and end of a string. Whitespace (C<< \s >>) that is.
1793             And removes any whitespace inside the string of more than one char, leaving the first whitespace char. Thus:
1794              
1795             trim(" asdf \t\n 123 ") eq "asdf 123"
1796             trim(" asdf\t\n 123\n") eq "asdf\t123"
1797              
1798             Works on C<< $_ >> if no argument i given:
1799              
1800             print join",", map trim, " please ", " remove ", " my ", " spaces "; # please,remove,my,spaces
1801             print join",", trim(" please ", " remove ", " my ", " spaces "); # works on arrays as well
1802             my $s=' please '; trim(\$s); # now $s eq 'please'
1803             trim(\@untrimmedstrings); # trims array strings inplace
1804             @untrimmedstrings = map trim, @untrimmedstrings; # same, works on $_
1805             trim(\$_) for @untrimmedstrings; # same, works on \$_
1806              
1807             =head2 lpad
1808              
1809             =head2 rpad
1810              
1811             Left or right pads a string to the given length by adding one or more spaces at the end for I or at the start for I.
1812              
1813             B First argument: string to be padded. Second argument: length of the output. Optional third argument: character(s) used to pad.
1814             Default is space.
1815              
1816             rpad('gomle',9); # 'gomle '
1817             lpad('gomle',9); # ' gomle'
1818             rpad('gomle',9,'-'); # 'gomle----'
1819             lpad('gomle',9,'+'); # '++++gomle'
1820             rpad('gomle',4); # 'goml'
1821             lpad('gomle',4); # 'goml'
1822             rpad('gomle',7,'xyz'); # 'gomlxy'
1823             lpad('gomle',10,'xyz'); # 'xyzxygoml'
1824              
1825             =head2 cpad
1826              
1827             Center pads. Pads the string both on left and right equal to the given length. Centers the string. Pads right side first.
1828              
1829             cpad('mat',5) eq ' mat '
1830             cpad('mat',4) eq 'mat '
1831             cpad('mat',6) eq ' mat '
1832             cpad('mat',9) eq ' mat '
1833             cpad('mat',5,'+') eq '+mat+'
1834             cpad('MMMM',20,'xyzXYZ') eq 'xyzXYZxyMMMMxyzXYZxy'
1835              
1836             =cut
1837              
1838 33 50   33 1 144 sub upper {no warnings;my $s=@_?shift:$_;$s=~tr/a-zæøåäëïöüÿâêîôûãõàèìòùáéíóúýñð/A-ZÆØÅÄËÏÖÜÿÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ/;$s}
  33     1   46  
  33         2003  
  1         4  
  1         3  
  1         5  
1839 33 50   33 1 120 sub lower {no warnings;my $s=@_?shift:$_;$s=~tr/A-ZÆØÅÄËÏÖÜÿÂÊÎÔÛÃÕÀÈÌÒÙÁÉÍÓÚÝÑÐ/a-zæøåäëïöüÿâêîôûãõàèìòùáéíóúýñð/;$s}
  33     1   39  
  33         84542  
  1         4  
  1         3  
  1         4  
1840              
1841             sub trim {
1842 24 100   24 1 56 return trim($_) if !@_;
1843 20 100       31 return map trim($_), @_ if @_>1;
1844 19         18 my $s=shift;
1845 19 100       29 if(ref($s) eq 'SCALAR'){ $$s=~s,^\s+|(?<=\s)\s+|\s+$,,g; return $$s}
  5         21  
  5         10  
1846 14 100       23 if(ref($s) eq 'ARRAY') { trim(\$_) for @$s; return $s }
  1         3  
  1         1  
1847 13         62 $s=~s,^\s+|(?<=\s)\s+|\s+$,,g;
1848 13         41 $s;
1849             }
1850              
1851             sub rpad {
1852 17     17 1 541 my($s,$l,$p)=@_;
1853 17 100 66     45 $p=' ' if @_<3 or !length($p);
1854 17         37 $s.=$p while length($s)<$l;
1855 17         52 substr($s,0,$l);
1856             }
1857              
1858             sub lpad {
1859 5     5 1 938 my($s,$l,$p)=@_;
1860 5 100 66     19 $p=' ' if @_<3 or !length($p);
1861 5 100       32 $l
1862             ? substr($s,0,$l)
1863             : substr($p x (1+$l/length($p)), 0, $l-length($s)).$s;
1864             }
1865              
1866             sub cpad {
1867 11     11 1 1775 my($s,$l,$p)=@_;
1868 11 100 66     41 $p=' ' if @_<3 or !length($p);
1869 11         10 my $ls=length($s);
1870 11 50       18 return substr($s,0,$l) if $l<$ls;
1871 11         22 $p=$p x (($l-$ls+2)/length($p));
1872 11         46 substr($p, 0, ($l-$ls )/2) . $s .
1873             substr($p, 0, ($l-$ls+1)/2);
1874             }
1875              
1876             sub cpad_old {
1877 0     0 0 0 my($s,$l,$p)=@_;
1878 0 0       0 $p=' ' if !length($p);
1879 0 0       0 return substr($s,0,$l) if $l
1880 0         0 my $i=0;
1881 0         0 while($l>length($s)){
1882 0 0       0 my $pc=substr($p,($i==int($i)?1:-1)*($i%length($p)),1);
1883 0 0       0 $i==int($i) ? ($s.=$pc) : ($s=$pc.$s);
1884 0         0 $i+=1/2;
1885             }
1886 0         0 $s;
1887             }
1888              
1889             =head2 trigram
1890              
1891             B A string (i.e. a name). And an optional x (see example 2)
1892              
1893             B A list of this strings trigrams (See examlpe)
1894              
1895             B
1896              
1897             print join ", ", trigram("Kjetil Skotheim");
1898              
1899             Prints:
1900              
1901             Kje, jet, eti, til, il , l S, Sk, Sko, kot, oth, the, hei, eim
1902              
1903             B
1904              
1905             Default is 3, but here 4 is used instead in the second optional input argument:
1906              
1907             print join ", ", trigram("Kjetil Skotheim", 4);
1908              
1909             And this prints:
1910              
1911             Kjet, jeti, etil, til , il S, l Sk, Sko, Skot, koth, othe, thei, heim
1912              
1913             C was created for "fuzzy" name searching. If you have a database of many names,
1914             addresses, phone numbers, customer numbers etc. You can use trigram() to search
1915             among all of those at the same time. If the search form only has one input field.
1916             One general search box.
1917              
1918             Store all of the trigrams of the trigram-indexed input fields coupled
1919             with each person, and when you search, you take each trigram of you
1920             query string and adds the list of people that has that trigram. The
1921             search result should then be sorted so that the persons with most hits
1922             are listed first. Both the query strings and the indexed database
1923             fields should have a space added first and last before C-ing
1924             them.
1925              
1926             This search algorithm is not includes here yet...
1927              
1928             C should perhaps have been named ngram for obvious reasons.
1929              
1930             =head2 sliding
1931              
1932             Same as trigram (except there is no default width). Works also with arrayref instead of string.
1933              
1934             Example:
1935              
1936             sliding( ["Reven","rasker","over","isen"], 2 )
1937              
1938             Result:
1939              
1940             ( ['Reven','rasker'], ['rasker','over'], ['over','isen'] )
1941              
1942             =head2 chunks
1943              
1944             Splits strings and arrays into chunks of given size:
1945              
1946             my @a = chunks("Reven rasker over isen",7);
1947             my @b = chunks([qw/Og gubben satt i kveldinga og koste seg med skillinga/], 3);
1948              
1949             Resulting arrays:
1950              
1951             ( 'Reven r', 'asker o', 'ver ise', 'n' )
1952             ( ['Og','gubben','satt'], ['i','kveldinga','og'], ['koste','seg','med'], ['skillinga'] )
1953              
1954             =head2 chars
1955              
1956             chars("Tittentei"); # ('T','i','t','t','e','n','t','e','i')
1957              
1958             =cut
1959              
1960 2   100 2 1 46 sub trigram { sliding($_[0],$_[1]||3) }
1961              
1962             sub sliding {
1963 3     3 1 4 my($s,$w)=@_;
1964 3 100       32 return map substr($s,$_,$w), 0..length($s)-$w if !ref($s);
1965 1 50       16 return map [@$s[$_..$_+$w-1]], 0..@$s-$w if ref($s) eq 'ARRAY';
1966             }
1967              
1968             sub chunks {
1969 2     2 1 4 my($s,$w)=@_;
1970 2 100       32 return $s=~/(.{1,$w})/g if !ref($s);
1971 1 50       17 return map [@$s[$_*$w .. min($_*$w+$w-1,$#$s)]], 0..$#$s/$w if ref($s) eq 'ARRAY';
1972             }
1973              
1974 0     0 1 0 sub chars { split//, shift }
1975              
1976             =head2 repl
1977              
1978             Synonym for replace().
1979              
1980             =head2 replace
1981              
1982             Return the string in the first input argument, but where pairs of search-replace strings (or rather regexes) has been run.
1983              
1984             Works as C in Oracle, or rather regexp_replace() in Oracle 10 and onward. Except that this C accepts more than three arguments.
1985              
1986             Examples:
1987              
1988             print replace("water","ater","ine"); # Turns water into wine
1989             print replace("water","ater"); # w
1990             print replace("water","at","eath"); # weather
1991             print replace("water","wa","ju",
1992             "te","ic",
1993             "x","y", # No x is found, no y is returned
1994             'r$',"e"); # Turns water into juice. 'r$' says that the r it wants
1995             # to change should be the last letters. This reveals that
1996             # second, fourth, sixth and so on argument is really regexs,
1997             # not normal strings. So use \ (or \\ inside "") to protect
1998             # the special characters of regexes. You probably also
1999             # should write qr/regexp/ instead of 'regexp' if you make
2000             # use of regexps here, just to make it more clear that
2001             # these are really regexps, not strings.
2002              
2003             print replace('JACK and JUE','J','BL'); # prints BLACK and BLUE
2004             print replace('JACK and JUE','J'); # prints ACK and UE
2005             print replace("abc","a","b","b","c"); # prints ccc (not bcc)
2006              
2007             If the first argument is a reference to a scalar variable, that variable is changed "in place".
2008              
2009             Example:
2010              
2011             my $str="test";
2012             replace(\$str,'e','ee','s','S');
2013             print $str; # prints teeSt
2014              
2015             =cut
2016              
2017 46     46 1 60 sub replace { repl(@_) }
2018             sub repl {
2019 83     83 1 134 my $str=shift;
2020 83 100       153 return $$str=replace($$str,@_) if ref($str) eq 'SCALAR';
2021             #return ? if ref($str) eq 'ARRAY';
2022             #return ? if ref($str) eq 'HASH';
2023 82         127 while(@_){
2024 109         131 my($fra,$til)=(shift,shift);
2025 109 100       3806 defined $til ? $str=~s/$fra/$til/g : $str=~s/$fra//g;
2026             }
2027 82         379 return $str;
2028             }
2029              
2030             =head1 ARRAYS
2031              
2032             =head2 min
2033              
2034             Returns the smallest number in a list. Undef is ignored.
2035              
2036             @lengths=(2,3,5,2,10,undef,5,4);
2037             $shortest = min(@lengths); # returns 2
2038              
2039             Note: The comparison operator is perls C<< < >>> which means empty strings is treated as C<0>, the number zero. The same goes for C, except of course C<< > >> is used instead.
2040              
2041             min(3,4,5) # 3
2042             min(3,4,5,undef) # 3
2043             min(3,4,5,'') # returns the empty string
2044              
2045             =head2 max
2046              
2047             Returns the largest number in a list. Undef is ignored.
2048              
2049             @heights=(123,90,134,undef,132);
2050             $highest = max(@heights); # 134
2051              
2052             =head2 mins
2053              
2054             Just as L, except for strings.
2055              
2056             print min(2,7,10); # 2
2057             print mins("2","7","10"); # 10
2058             print mins(2,7,10); # 10
2059              
2060             =head2 maxs
2061              
2062             Just as L, except for strings.
2063              
2064             print max(2,7,10); # 10
2065             print maxs("2","7","10"); # 7
2066             print maxs(2,7,10); # 7
2067              
2068             =cut
2069              
2070 35 100 100 35 1 1604 sub min {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ < $min } $min }
  35   66     50  
  7087         20189  
  35         204  
2071 1 100 100 1 1 2 sub mins {my $min;for(@_){ $min=$_ if defined($_) and !defined($min) || $_ lt $min} $min }
  1   33     3  
  3         17  
  1         4  
2072 32 100 100 32 1 27 sub max {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ > $max } $max }
  32   66     47  
  7093         19673  
  32         429  
2073 1 100 100 1 1 1 sub maxs {my $max;for(@_){ $max=$_ if defined($_) and !defined($max) || $_ gt $max} $max }
  1   33     3  
  3         17  
  1         3  
2074              
2075             =head2 zip
2076              
2077             B Two or more arrayrefs. A number of equal sized arrays
2078             containing numbers, strings or anything really.
2079              
2080             B An array of those input arrays zipped (interlocked, merged) into each other.
2081              
2082             print join " ", zip( [1,3,5], [2,4,6] ); # 1 2 3 4 5 6
2083             print join " ", zip( [1,4,7], [2,5,8], [3,6,9] ); # 1 2 3 4 5 6 7 8 9
2084              
2085             Example:
2086              
2087             zip() creates a hash where the keys are found in the first array and values in the secord in the correct order:
2088              
2089             my @media = qw/CD DVD VHS LP Blueray/;
2090             my @count = qw/20 12 2 4 3/;
2091             my %count = zip(\@media,\@count); # or zip( [@media], [@count] )
2092             print "I got $count{DVD} DVDs\n"; # I got 12 DVDs
2093              
2094             Dies (croaks) if the two lists are of different sizes
2095              
2096             ...or any input argument is not an array ref.
2097              
2098             =cut
2099              
2100             sub zip {
2101 7     7 1 56279 my @t=@_;
2102 7   66     368 ref($_) ne 'ARRAY' and croak "ERROR: zip should have arrayrefs as arguments" for @t;
2103 5   66     14 @{$t[$_]} != @{$t[0]} and croak "ERROR: zip should have equal sized arrays" for 1..$#t;
  5         6  
  5         222  
2104 3         4 my @res;
2105 3         2 for my $i (0..@{$t[0]}-1){
  3         9  
2106 9         17 push @res, $$_[$i] for @t;
2107             }
2108 3         18 return @res;
2109             }
2110              
2111              
2112             =head2 pushsort
2113              
2114             Adds one or more element to a numerically sorted array and keeps it sorted.
2115              
2116             pushsort @a, 13; # this...
2117             push @a, 13; @a = sort {$a<=>$b} @a; # is the same as this, but the former is faster if @a is large
2118              
2119             =head2 pushsortstr
2120              
2121             Same as pushsort except that the array is kept sorted alphanumerically (cmp) instead of numerically (<=>). See L.
2122              
2123             pushsort @a, "abc"; # this...
2124             push @a, "abc"; @a = sort @a; # is the same as this, but the former is faster if @a is large
2125              
2126             =cut
2127              
2128             our $Pushsort_cmpsub=undef;
2129             sub pushsort (\@@) {
2130 10915     10915 1 10367 my $ar=shift;
2131              
2132             #not needed but often faster
2133 10915 100 100     20098 if(!defined $Pushsort_cmpsub and @$ar+@_<100){ #hm speedup?
2134 2348         2379 @$ar=(sort {$a<=>$b} (@$ar,@_));
  51995         29892  
2135 2348         1815 return 0+@$ar;
2136             }
2137              
2138 8567         7879 for my $v (@_){
2139              
2140             #not needed but often faster
2141 7567 100       9013 if(!defined $Pushsort_cmpsub){ #faster rank() in most cases
2142 901 50 0     1145 push @$ar, $v and next if $v>=$$ar[-1];
2143 901 100 50     1130 unshift @$ar, $v and next if $v< $$ar[0];
2144             }
2145              
2146 7564         8278 splice @$ar, binsearch($v,$ar,1,$Pushsort_cmpsub)+1, 0, $v;
2147             }
2148 8567         8812 0+@$ar
2149             }
2150 0     0 1 0 sub pushsortstr(\@@){ local $Pushsort_cmpsub=sub{$_[0]cmp$_[1]}; pushsort(@_) } #speedup: copy sub pushsort
  1000     1000   2229  
  1000         1102  
2151              
2152             =head2 binsearch
2153              
2154             Returns the position of an element in a numerically sorted array. Returns undef if the element is not found.
2155              
2156             B Two, three or four arguments
2157              
2158             B the element to find. Usually a number.
2159              
2160             B a reference to the array to search in. The array
2161             should be sorted in ascending numerical order (se exceptions below).
2162              
2163             B Optional. Default false.
2164              
2165             If true, whether result I should return undef or a fractional position.
2166              
2167             If the third argument is false binsearch returns undef if the element is not found.
2168              
2169             If the third argument is true binsearch returns 0.5 plus closest position below the searched value.
2170              
2171             Returns C< last position + 0.5 > if the searched element is greater than all elements in the sorted array.
2172              
2173             Returns C< -0.5 > if the searched element is less than all elements in the sorted array.
2174              
2175             Fourth argument: Optional. Default C<< sub { $_[0] <=> $_[1] } >>.
2176              
2177             If present, the fourth argument is either:
2178              
2179             =over 4
2180              
2181             =item * a code-ref that alters the way binsearch compares two elements, default is C<< sub{$_[0]<=>$_[1]} >>
2182              
2183             =item * a string that works as a hash key (column name), see example below
2184              
2185             =back
2186              
2187             B
2188              
2189             binsearch(10,[5,10,15,20]); # 1
2190             binsearch(10,[20,15,10,5],undef,sub{$_[1]<=>$_[0]}); # 2 search arrays sorted numerically in opposite order
2191             binsearch("c",["a","b","c","d"],undef,sub{$_[0]cmp$_[1]}); # 2 search arrays sorted alphanumerically
2192             binsearchstr("b",["a","b","c","d"]); # 1 search arrays sorted alphanumerically
2193              
2194             my @data=( map { {num=>$_, sqrt=>sqrt($_), square=>$_**2} }
2195             grep !$_%7, 1..1000000 );
2196             my $i = binsearch( {num=>913374}, \@data, undef, sub {$_[0]{num} <=> $_[1]{num}} );
2197             my $i = binsearch( {num=>913374}, \@data, undef, 'num' ); #same as previous line
2198             my $found_hashref = defined $i ? $data[$i] : undef;
2199              
2200             =head2 binsearchstr
2201              
2202             Same as binsearch except that the arrays is sorted alphanumerically
2203             (cmp) instead of numerically (<=>) and the searched element is a
2204             string, not a number. See L.
2205              
2206             =cut
2207              
2208             our $Binsearch_steps;
2209             our $Binsearch_maxsteps=100;
2210             sub binsearch {
2211 14611     14611 1 28984 my($search,$aref,$insertpos,$cmpsub)=@_; #search pos of search in array
2212 14611 50       21646 croak "binsearch did not get arrayref as second arg" if ref($aref) ne 'ARRAY';
2213 14611 50 100     39645 croak "binsearch got fourth arg which is not a code-ref" if defined $cmpsub and ref($cmpsub) and ref($cmpsub) ne 'CODE';
      66        
2214 14611 100 100     27245 if(defined $cmpsub and !ref($cmpsub)){
2215 1         1 my $key=$cmpsub;
2216 1     17   4 $cmpsub = sub{ $_[0]{$key} <=> $_[1]{$key} };
  17         46  
2217             }
2218 14611 50       17600 return $insertpos ? -0.5 : undef if !@$aref;
    100          
2219 14599         12029 my($min,$max)=(0,$#$aref);
2220 14599         10208 $Binsearch_steps=0;
2221 14599         17224 while (++$Binsearch_steps <= $Binsearch_maxsteps) {
2222 128752         115639 my $middle=int(($min+$max+0.5)/2);
2223 128752         86838 my $middle_value=$$aref[$middle];
2224              
2225             #croak "binsearch got non-sorted array" if !$cmpsub and $$aref[$min]>$$aref[$min]
2226             # or $cmpsub and &$cmpsub($$aref[$min],$$aref[$min])>0;
2227              
2228 128752 100 100     464471 if( !$cmpsub and $search < $middle_value
    100 100        
      66        
      100        
      100        
      66        
2229             or $cmpsub and &$cmpsub($search,$middle_value) < 0 ) { #print "<\n";
2230 39007 50 66     50005 $max=$min, next if $middle == $max and $min != $max;
2231 39007 50       44940 return $insertpos ? $middle-0.5 : undef if $middle == $max;
    100          
2232 37554         42096 $max=$middle;
2233             }
2234             elsif( !$cmpsub and $search > $middle_value
2235             or $cmpsub and &$cmpsub($search,$middle_value) > 0 ) { #print ">\n";
2236 82706 100 100     133419 $min=$max, next if $middle == $min and $max != $min;
2237 75185 50       94906 return $insertpos ? $middle+0.5 : undef if $middle == $min;
    100          
2238 69078         88291 $min=$middle;
2239             }
2240             else { #print "=\n";
2241 7039         10494 return $middle;
2242             }
2243             }
2244 0         0 croak "binsearch exceded $Binsearch_maxsteps steps";
2245             }
2246              
2247             sub binsearchfast { # binary search routine finds index just below value
2248 0     0 0 0 my ($x,$v)=@_;
2249 0         0 my ($klo,$khi)=(0,$#{$x});
  0         0  
2250 0         0 my $k;
2251 0         0 while (($khi-$klo)>1) {
2252 0         0 $k=int(($khi+$klo)/2);
2253 0 0       0 if ($$x[$k]>$v) { $khi=$k; } else { $klo=$k; }
  0         0  
  0         0  
2254             }
2255 0         0 return $klo;
2256             }
2257              
2258              
2259 2     2 1 7 sub binsearchstr {binsearch(@_[0..2],sub{$_[0]cmp$_[1]})}
  1     1   9  
2260              
2261             =head2 rank
2262              
2263             B Two or three arguments. N and an arrayref for the list to look at.
2264              
2265             In scalar context: Returns the nth smallest number in an array. The array doesn't have to be sorted.
2266              
2267             In array context: Returns the n smallest numbers in an array.
2268              
2269             To return the n(th) largest number(s) instead of smallest, just negate n.
2270              
2271             An optional third argument can be a sub that is used to compare the elements of the input array.
2272              
2273             Examples:
2274              
2275             my $second_smallest = rank(2, [11,12,13,14]); # 12
2276             my @top10 = rank(-10, [1..100]); # 100, 99, 98, 97, 96, 95, 94, 93, 92, 91
2277             my $max = rank(-1, [101,102,103,102,101]); #103
2278             my @contest = ({name=>"Alice",score=>14},{name=>"Bob",score=>13},{name=>"Eve",score=>12});
2279             my $second = rank(2, \@contest, sub{$_[1]{score}<=>$_[0]{score}})->{name}; #Bob
2280              
2281             =head2 rankstr
2282              
2283             Just as C but sorts alphanumerically (strings, cmp) instead of numerically.
2284              
2285             =cut
2286              
2287             sub rank {
2288 35     35 1 339 my($rank,$aref,$cmpsub)=@_;
2289 35 100       60 if($rank<0){
2290 10   100 30006   48 $cmpsub||=sub{$_[0]<=>$_[1]};
  30006         93133  
2291 10     60004   38 return rank(-$rank,$aref,sub{0-&$cmpsub});
  60004         53615  
2292             }
2293 25         26 my @sort;
2294 25         24 local $Pushsort_cmpsub=$cmpsub;
2295 25         29 for(@$aref){
2296 8914         10037 pushsort @sort, $_;
2297 8914 100       16641 pop @sort if @sort>$rank;
2298             }
2299 25 100       1552 return wantarray ? @sort : $sort[$rank-1];
2300             }
2301 0 50   0 1 0 sub rankstr {wantarray?(rank(@_,sub{$_[0]cmp$_[1]})):rank(@_,sub{$_[0]cmp$_[1]})}
  60145     10   190098  
  10         57  
2302              
2303             =head2 eqarr
2304              
2305             B Two or more references to arrays.
2306              
2307             B True (1) or false (0) for whether or not the arrays are numerically I alphanumerically equal.
2308             Comparing each element in each array with both C< == > and C< eq >.
2309              
2310             Examples:
2311              
2312             eqarr([1,2,3],[1,2,3],[1,2,3]); # 1 (true)
2313             eqarr([1,2,3],[1,2,3],[1,2,4]); # 0 (false)
2314             eqarr([1,2,3],[1,2,3,4]); # undef (different size, false)
2315             eqarr([1,2,3]); # croak (should be two or more arrays)
2316             eqarr([1,2,3],1,2,3); # croak (not arraysrefs)
2317              
2318             =cut
2319              
2320             sub eqarr {
2321 27     27 1 1236 my @arefs=@_;
2322 27 100       235 croak if @arefs<2;
2323 26   66     186 ref($_) ne 'ARRAY' and croak for @arefs;
2324 25   100     67 @{$arefs[0]} != @{$arefs[$_]} and return undef for 1..$#arefs;
  27         32  
  27         65  
2325 24         31 my $ant;
2326            
2327 24         56 for my $ar (@arefs[1..$#arefs]){
2328 26         39 for(0..@$ar-1){
2329 272 50 33     645 ++$ant and $ant>100 and croak ">100"; #TODO: feiler ved sammenligning av to tabeller > 10000(?) tall
2330 272 100 66     862 return 0 if $arefs[0][$_] ne $$ar[$_]
2331             or $arefs[0][$_] != $$ar[$_];
2332             }
2333             }
2334 22         150 return 1;
2335             }
2336              
2337             =head2 sorted
2338              
2339             Return true if the input array is numerically sorted.
2340              
2341             @a=(1..10); print "array is sorted" if sorted @a; #true
2342              
2343             Optionally the last argument can be a comparison sub:
2344              
2345             @person=({Rank=>1,Name=>'Amy'}, {Rank=>2,Name=>'Paula'}, {Rank=>3,Name=>'Ruth'});
2346             print "Persons are sorted" if sorted @person, sub{$_[0]{Rank}<=>$_[1]{Rank}};
2347              
2348             =head2 sortedstr
2349              
2350             Return true if the input array is Inumerically sorted.
2351              
2352             @a=(1..10); print "array is sorted" if sortedstr @a; #false
2353             @a=("01".."10"); print "array is sorted" if sortedstr @a; #true
2354              
2355             =cut
2356              
2357             sub sorted (\@@) {
2358 4     4 1 20 my($a,$cmpsub)=@_;
2359 4         21 for(0..$#$a-1){
2360 1197 50 66     4251 return 0 if !$cmpsub and $$a[$_]>$$a[$_+1]
      66        
      33        
2361             or $cmpsub and &$cmpsub($$a[$_],$$a[$_+1])>0;
2362             }
2363 4         43 return 1;
2364             }
2365 99     99 1 548 sub sortedstr { sorted(@_,sub{$_[0]cmp$_[1]}) }
  2     2   20  
2366              
2367             =head2 part
2368              
2369             B A code-ref and a list
2370              
2371             B Two array-refs
2372              
2373             Like C but returns the false list as well. Partitions a list
2374             into two lists where each element goes into the first or second list
2375             whether the predicate (a code-ref) is true or false for that element.
2376              
2377             my( $odd, $even ) = part {$_%2} (1..8);
2378             print for @$odd; #prints 1 3 5 7
2379             print for @$even; #prints 2 4 6 8
2380              
2381             (Works like C< partition() > in the Scala programming language)
2382              
2383             =head2 parth
2384              
2385             Like C but returns any number of lists.
2386              
2387             B A code-ref and a list
2388              
2389             B A hash where the returned values from the code-ref are keys and the values are arrayrefs to the list elements which gave those keys.
2390              
2391             my %hash = parth { uc(substr($_,0,1)) } ('These','are','the','words','of','this','array');
2392             print serialize(\%hash);
2393              
2394             Result:
2395              
2396             %hash = ( T=>['These','the','this'],
2397             A=>['are','array'],
2398             O=>['of'],
2399             W=>['words'] )
2400              
2401             =head2 parta
2402              
2403             Like L but returns an array of lists.
2404              
2405             my @a = parta { length } qw/These are the words of this array/;
2406              
2407             Result:
2408              
2409             @a = ( undef, undef, ['of'], ['are','the'], ['this'], ['These','words','array'] )
2410              
2411             Two undefs at first (index positions 0 and 1) since there are no words of length 0 or 1 in the input array.
2412              
2413             =cut
2414              
2415 1 100   1 1 14 sub part (&@) { my($c,@r)=(shift,[],[]); push @{ $r[ &$c?0:1 ] }, $_ for @_; @r }
  1         3  
  8         20  
  1         4  
2416 1     1 1 318 sub parth (&@) { my($c,%r)=(shift); push @{ $r{ &$c } }, $_ for @_; %r }
  1         4  
  7         32  
  1         11  
2417 1     1 1 203 sub parta (&@) { my($c,@r)=(shift); push @{ $r[ &$c ] }, $_ for @_; @r }
  1         2  
  7         52  
  1         8  
2418             #sub mapn (&$@) { ... } like map but @_ contains n elems at a time, n=1 is map
2419              
2420             =head2 refa
2421              
2422             =head2 refh
2423              
2424             =head2 refs
2425              
2426             =head2 refaa
2427              
2428             =head2 refah
2429              
2430             =head2 refha
2431              
2432             =head2 refhh
2433              
2434             Returns true or false (1 or 0) if the argument is an arrayref, hashref, scalarref, ref to an array of arrays, ref to an array of hashes
2435              
2436             Examples:
2437              
2438             my $ref_to_array = [1,2,3];
2439             my $ref_to_hash = {1,100,2,200,3,300};
2440             my $ref_to_scalar = \"String";
2441             print "arrayref" if ref($ref_to_array) eq 'ARRAY'; #true
2442             print "hashref" if ref($ref_to_hash) eq 'HASH'; #true
2443             print "scalarref" if ref($ref_to_scalar) eq 'SCALAR'; #true
2444             print "arrayref" if refa($ref_to_array); #also true, without: eq 'ARRAY'
2445             print "hashref" if refh($ref_to_hash); #also true, without: eq 'HASH'
2446             print "scalarref" if refs($ref_to_scalar); #also true, without: eq 'SCALAR'
2447              
2448             my $ref_to_array_of_arrays = [ [1,2,3], [2,4,8], [10,100,1000] ];
2449             my $ref_to_array_of_hashes = [ {1=>10, 2=>100}, {first=>1, second=>2} ];
2450             my $ref_to_hash_of_arrays = { alice=>[1,2,3], bob=>[2,4,8], eve=>[10,100,1000] };
2451             my $ref_to_hash_of_hashes = { alice=>{a=>22,b=>11}, bob=>{a=>33,b=>66} };
2452              
2453             print "aa" if refaa($ref_to_array_of_arrays); #true
2454             print "ah" if refah($ref_to_array_of_hashes); #true
2455             print "ha" if refha($ref_to_hash_of_arrays); #true
2456             print "hh" if refhh($ref_to_hash_of_hashes); #true
2457              
2458             =cut
2459              
2460 3 0   3 1 25 sub refa { ref($_[0]) eq 'ARRAY' ? 1 : ref($_[0]) ? 0 : undef }
    50          
2461 3 0   3 1 13 sub refh { ref($_[0]) eq 'HASH' ? 1 : ref($_[0]) ? 0 : undef }
    50          
2462 1 0   1 1 6 sub refs { ref($_[0]) eq 'SCALAR' ? 1 : ref($_[0]) ? 0 : undef }
    50          
2463 1 0   1 1 6 sub refaa { ref($_[0]) eq 'ARRAY' ? refa($_[0][0]) : ref($_[0]) ? 0 : undef }
    50          
2464 1 0   1 1 6 sub refah { ref($_[0]) eq 'ARRAY' ? refh($_[0][0]) : ref($_[0]) ? 0 : undef }
    50          
2465 1 0   1 1 5 sub refha { ref($_[0]) eq 'HASH' ? refa((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
  1 50       4  
2466 1 0   1 1 7 sub refhh { ref($_[0]) eq 'HASH' ? refh((values%{$_[0]})[0]) : ref($_[0]) ? 0 : undef }
  1 50       7  
2467              
2468              
2469             =head2 pushr
2470              
2471             =head2 popr
2472              
2473             =head2 shiftr
2474              
2475             =head2 unshiftr
2476              
2477             =head2 splicer
2478              
2479             =head2 keysr
2480              
2481             =head2 valuesr
2482              
2483             =head2 eachr
2484              
2485             In Perl versions 5.12 - 5.22 push, pop, shift, unshift, splice, keys, values and each
2486             handled references to arrays and references to hashes just as if they where arrays and hashes. Examples:
2487              
2488             my $person={name=>'Gaga', array=>[1,2,3]};
2489             push $person{array} , 4; #works in perl 5.12-5.22 but not before and after
2490             push @{ $person{array} }, 4; #works in all perl5 versions
2491             pushr $person{array} , 4; #use Acme::Tools and this should work in perl >= 5.8
2492             popr $person{array}; #returns 4
2493              
2494             =cut
2495              
2496 3     3 1 262 sub pushr { push @{shift()}, @_ } # ? ($@)
  3         8  
2497 1     1 1 2 sub popr { pop @{shift()} }
  1         2  
2498 1     1 1 253 sub shiftr { shift @{shift()} }
  1         2  
2499 2     2 1 501 sub unshiftr { unshift @{shift()}, @_ }
  2         10  
2500 0         0 sub splicer { @_==1 ? splice( @{shift()} )
2501 1         5 :@_==2 ? splice( @{shift()}, shift() )
2502 2         8 :@_==3 ? splice( @{shift()}, shift(), shift() )
2503 4 50   4 1 1081 :@_>=4 ? splice( @{shift()}, shift(), shift(), @_ ) : die }
  1 100       6  
    100          
    50          
2504 1     1 1 264 sub keysr { keys( %{shift()} ) } #hm sort(keys%{shift()}) ?
  1         10  
2505 1     1 1 4 sub valuesr { values( %{shift()} ) }
  1         4  
2506 4 50   4 1 1289 sub eachr { ref($_[0]) eq 'HASH' ? each(%{shift()})
  4         11  
2507             #:ref($_[0]) eq 'ARRAY' ? each(@{shift()}) # perl 5.8.8 cannot compile each on array! eval?
2508             : croak("eachr needs hashref or arrayref got '".ref($_[0])."'") }
2509             #sub eachr { each(%{shift()}) }
2510              
2511             =head1 STATISTICS
2512              
2513             =head2 sum
2514              
2515             Returns the sum of a list of numbers. Undef is ignored.
2516              
2517             print sum(1,3,undef,8); # 12
2518             print sum(1..1000); # 500500
2519             print sum(undef); # undef
2520              
2521             =cut
2522              
2523 33   66 33 1 189 sub sum { my $sum; no warnings; defined($_) and $sum+=$_ for @_; $sum }
  33     60   44  
  33         2566  
  60         2766  
  60         10096  
  60         408  
2524              
2525             =head2 avg
2526              
2527             Returns the I number of a list of numbers. That is C
2528              
2529             print avg( 2, 4, 9); # 5 (2+4+9) / 3 = 5
2530             print avg( [2, 4, 9] ); # 5 pass by reference, same result but faster for large arrays
2531              
2532             Also known as I.
2533              
2534             Pass by reference: If one argument is given and it is a reference to an array,
2535             this array is taken as the list of numbers. This mode is about twice as fast
2536             for 10000 numbers or more. It most likely also saves memory.
2537              
2538             =cut
2539              
2540             sub avg {
2541 880     880 1 2173 my($sum,$n,@a)=(0,0);
2542 33     33   371 no warnings;
  33         43  
  33         11681  
2543 880 50       1044 if( @_==0 ) { return undef }
  0         0  
2544 880 100 100     1255 if( @_==1 and ref($_[0]) eq 'ARRAY' ){ @a=grep defined,@{$_[0]} }
  1         1  
  1         4  
2545 879         2379 else { @a=grep defined,@_ }
2546 880 100       995 if( @a==0 ) { return undef }
  1         4  
2547 879         2972 $sum+=$_ for @a;
2548 879         2244 return $sum/@a
2549             }
2550              
2551             =head2 geomavg
2552              
2553             Returns the I (a.k.a I) of a list of numbers.
2554              
2555             print geomavg(10,100,1000,10000,100000); # 1000
2556             print 0+ (10*100*1000*10000*100000) ** (1/5); # 1000 same thing
2557             print exp(avg(map log($_),10,100,1000,10000,100000)); # 1000 same thing, this is how geomavg() works internally
2558              
2559             =cut
2560              
2561 771     771 1 1820 sub geomavg { exp(avg(map log($_), @_)) }
2562              
2563             =head2 harmonicavg
2564              
2565             Returns the I (a.k.a I) of a list of numbers. L
2566              
2567             print harmonicavg(10,11,12); # 3 / ( 1/10 + 1/11 + 1/12) = 10.939226519337
2568              
2569             =cut
2570              
2571 0     0 1 0 sub harmonicavg { my $s; $s+=1/$_ for @_; @_/$s }
  0         0  
  0         0  
2572              
2573             =head2 variance
2574              
2575             C<< variance = ( sum (x[i]-Average)**2)/(n-1) >>
2576              
2577             =cut
2578              
2579             sub variance {
2580 0     0 1 0 my $sumx2; $sumx2+=$_*$_ for @_;
  0         0  
2581 0         0 my $sumx; $sumx+=$_ for @_;
  0         0  
2582 0         0 (@_*$sumx2-$sumx*$sumx)/(@_*(@_-1));
2583             }
2584              
2585             =head2 stddev
2586              
2587             C<< Standard_Deviation = sqrt(variance) >>
2588              
2589             Standard deviation (stddev) is a measurement of the width of a normal
2590             distribution where one stddev on each side of the mean covers 68% and
2591             two stddevs 95%. Normal distributions are sometimes called Gauss curves
2592             or Bell shapes. L
2593              
2594             stddev(4,5,6,5,6,4,3,5,5,6,7,6,5,7,5,6,4) # = 1.0914103126635
2595             avg(@testscores) + stddev(@testscores) # = the score for IQ = 115 (by one definition)
2596             avg(@testscores) - stddev(@testscores) # = the score for IQ = 85
2597              
2598             =cut
2599              
2600             sub stddev {
2601 14 100   14 1 596 return undef if @_==0;
2602 13 100 66     52 return stddev(\@_) if @_>0 and !ref($_[0]);
2603 8         8 my $ar=shift;
2604 8 50       12 return undef if @$ar==0;
2605 8 100       16 return 0 if @$ar==1;
2606 6         3 my $sumx2; $sumx2 += $_*$_ for @$ar;
  6         51  
2607 6         6 my $sumx; $sumx += $_ for @$ar;
  6         41  
2608 6         30 sqrt( (@$ar*$sumx2-$sumx*$sumx)/(@$ar*(@$ar-1)) );
2609             }
2610              
2611             =head2 rstddev
2612              
2613             Relative stddev = stddev / avg
2614              
2615             =cut
2616              
2617 0     0 1 0 sub rstddev { stddev(@_) / avg(@_) }
2618              
2619             =head2 median
2620              
2621             Returns the median value of a list of numbers. The list do not have to
2622             be sorted.
2623              
2624             Example 1, list having an odd number of numbers:
2625              
2626             print median(1, 100, 101); # 100
2627              
2628             100 is the middlemost number after sorting.
2629              
2630             Example 2, an even number of numbers:
2631              
2632             print median(1005, 100, 101, 99); # 100.5
2633              
2634             100.5 is the average of the two middlemost numbers.
2635              
2636             =cut
2637              
2638             sub median {
2639 33     33   146 no warnings;
  33         48  
  33         11928  
2640 6     6 1 327 my @list = sort {$a<=>$b} @_;
  10035         5776  
2641 6         8 my $n=@list;
2642 6 100       167 $n%2 ? $list[($n-1)/2]
2643             : ($list[$n/2-1] + $list[$n/2])/2;
2644             }
2645              
2646              
2647             =head2 percentile
2648              
2649             Returns one or more percentiles of a list of numbers.
2650              
2651             Percentile 50 is the same as the I, percentile 25 is the first
2652             quartile, 75 is the third quartile.
2653              
2654             B
2655              
2656             First argument is your wanted percentile, or a refrence to a list of percentiles you want from the dataset.
2657              
2658             If the first argument to percentile() is a scalar, this percentile is returned.
2659              
2660             If the first argument is a reference to an array, then all those percentiles are returned as an array.
2661              
2662             Second, third, fourth and so on argument are the numbers from which you want to find the percentile(s).
2663              
2664             B
2665              
2666             This finds the 50-percentile (the median) to the four numbers 1, 2, 3 and 4:
2667              
2668             print "Median = " . percentile(50, 1,2,3,4); # 2.5
2669              
2670             This:
2671              
2672             @data=(11, 5, 3, 5, 7, 3, 1, 17, 4, 2, 6, 4, 12, 9, 0, 5);
2673             @p = map percentile($_,@data), (25, 50, 75);
2674              
2675             Is the same as this:
2676              
2677             @p = percentile([25, 50, 75], @data);
2678              
2679             But the latter is faster, especially if @data is large since it sorts
2680             the numbers only once internally.
2681              
2682             B
2683              
2684             Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992
2685              
2686             Average (or mean) is 143
2687              
2688             Median is 15.5 (which is the average of 9 and 22 who both equally lays in the middle)
2689              
2690             The 25-percentile is 6.25 which are between 6 and 7, but closer to 6.
2691              
2692             The 75-percentile is 46.5, which are between 39 and 49 but close to 49.
2693              
2694             Linear interpolation is used to find the 25- and 75-percentile and any
2695             other x-percentile which doesn't fall exactly on one of the numbers in
2696             the set.
2697              
2698             B
2699              
2700             As you saw, 6.25 are closer to 6 than to 7 because 25% along the set of
2701             the twelve numbers is closer to the third number (6) than to he fourth
2702             (7). The median (50-percentile) is also really interpolated, but it is
2703             always in the middle of the two center numbers if there are an even count
2704             of numbers.
2705              
2706             However, there is two methods of interpolation:
2707              
2708             Example, we have only three numbers: 5, 6 and 7.
2709              
2710             Method 1: The most common is to say that 5 and 7 lays on the 25- and
2711             75-percentile. This method is used in Acme::Tools.
2712              
2713             Method 2: In Oracle databases the least and greatest numbers
2714             always lay on the 0- and 100-percentile.
2715              
2716             As an argument on why Oracles (and others?) definition is not the best way is to
2717             look at your data as for instance temperature measurements. If you
2718             place the highest temperature on the 100-percentile you are sort of
2719             saying that there can never be a higher temperatures in future measurements.
2720              
2721             A quick non-exhaustive Google survey suggests that method 1 here is most used.
2722              
2723             The larger the data sets, the less difference there is between the two methods.
2724              
2725             B
2726              
2727             In method one, when you want a percentile outside of any possible
2728             interpolation, you use the smallest and second smallest to extrapolate
2729             from. For instance in the data set C<5, 6, 7>, if you want an
2730             x-percentile of x < 25, this is below 5.
2731              
2732             If you feel tempted to go below 0 or above 100, C will
2733             I (or I to be more precise)
2734              
2735             Another method could be to use "soft curves" instead of "straight
2736             lines" in interpolation. Maybe B-splines or Bezier curves. This is not
2737             used here.
2738              
2739             For large sets of data Hoares algorithm would be faster than the
2740             simple straightforward implementation used in C
2741             here. Hoares don't sort all the numbers fully.
2742              
2743             B
2744              
2745             Data: 1, 4, 6, 7, 8, 9, 22, 24, 39, 49, 555, 992
2746              
2747             Percentile Method 1 Method 2
2748             (Acme::Tools::percentile (Oracle)
2749             and others)
2750             ------------- ----------------------------- ---------
2751             0 -2 1
2752             1 -1.61 1.33
2753             25 6.25 6.75
2754             50 (median) 15.5 15.5
2755             75 46.5 41.5
2756             99 1372.19 943.93
2757             100 1429 992
2758              
2759             Found like this:
2760              
2761             perl -MAcme::Tools -le 'print for percentile([0,1,25,50,75,99,100], 1,4,6,7,8,9,22,24,39,49,555,992)'
2762              
2763             And like this in Oracle-databases:
2764              
2765             select
2766             percentile_cont(0.00) within group(order by n) per0,
2767             percentile_cont(0.01) within group(order by n) per1,
2768             percentile_cont(0.25) within group(order by n) per25,
2769             percentile_cont(0.50) within group(order by n) per50,
2770             percentile_cont(0.75) within group(order by n) per75,
2771             percentile_cont(0.99) within group(order by n) per99,
2772             percentile_cont(1.00) within group(order by n) per100
2773             from (
2774             select 0+regexp_substr('1,4,6,7,8,9,22,24,39,49,555,992','[^,]+',1,i) n
2775             from dual,(select level i from dual connect by level <= 12)
2776             );
2777              
2778             (Oracle also provides a similar function: C where I
2779             is short for I, meaning no interpolation is taking
2780             place. Instead the closest number from the data set is picked.)
2781              
2782             =cut
2783              
2784             sub percentile {
2785 3     3 1 4 my(@p,@t,@ret);
2786 3 100       9 if(ref($_[0]) eq 'ARRAY'){ @p=@{shift()} }
  1 50       2  
  1         3  
2787 2         4 elsif(not ref($_[0])) { @p=(shift()) }
2788 0         0 else{croak()}
2789 3         5 @t=@_;
2790 3 50       7 return if !@p;
2791 3 50       4 croak if !@t;
2792 3         9 @t=sort{$a<=>$b}@t;
  72         44  
2793 3 50       8 push@t,$t[0] if @t==1;
2794 3         4 for(@p){
2795 9 50 33     28 croak if $_<0 or $_>100;
2796 9         13 my $i=(@t+1)*$_/100-1;
2797 9 50       38 push@ret,
    100          
    100          
2798             $i<0 ? $t[0]+($t[1]-$t[0])*$i:
2799             $i>$#t ? $t[-1]+($t[-1]-$t[-2])*($i-$#t):
2800             $i==int($i)? $t[$i]:
2801             $t[$i]*(int($i+1)-$i) + $t[$i+1]*($i-int($i));
2802             }
2803 3 100       36 return @p==1 ? $ret[0] : @ret;
2804             }
2805              
2806             =head1 RANDOM
2807              
2808             =head2 random
2809              
2810             B One or two arguments.
2811              
2812             B
2813              
2814             If two integer arguments: returns a random integer between the integers in argument one and two.
2815              
2816             If the first argument is an arrayref: returns a random member of that array without changing the array.
2817              
2818             If the first argument is an arrayref and there is a second arg: return that many random members of that array
2819              
2820             If the first argument is an hashref and there is no second arg: return a random key weighted by the values of that hash
2821              
2822             If the first argument is an hashref and there is a second arg: return that many random keys weighted by the values of that hash
2823              
2824             If there is no second argument and the first is an integer, a random integer between 0 and that number is returned. Including 0 and the number itself.
2825              
2826             B
2827              
2828             $dice=random(1,6); # 1, 2, 3, 4, 5 or 6
2829             $dice=random([1..6]); # same as previous
2830             @dice=random([1..6],10); # 10 dice tosses
2831             $dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2}); # weighted dice with 6 being twice as likely as the others
2832             @dice=random({1=>1, 2=>1, 3=>1, 4=>1, 5=>1, 6=>2},10); # 10 weighted dice tosses
2833             print random({head=>0.4999,tail=>0.4999,edge=>0.0002}); # coin toss (sum 1 here but not required to be)
2834             print random(2); # prints 0, 1 or 2
2835             print 2**random(7); # prints 1, 2, 4, 8, 16, 32, 64 or 128
2836             @dice=map random([1..6]), 1..10; # as third example above, but much slower
2837             perl -MAcme::Tools -le 'print for random({head=>0.499,tail=>0.499,edge=>0.002},10000);' | sort | uniq -c
2838              
2839             =cut
2840              
2841             sub random {
2842 21002     21002 1 35376 my($from,$to)=@_;
2843 21002         18639 my $ref=ref($from);
2844 21002 100       26358 if($ref eq 'ARRAY'){
    100          
2845 15001   100     56326 my @r=map $$from[rand@$from], 1..$to||1;
2846 15001 100       54115 return @_>1?@r:$r[0];
2847             }
2848             elsif($ref eq 'HASH') {
2849 4001         7947 my @k=keys%$from;
2850 33   66 33   151 my $max;do{no warnings 'uninitialized';$_>$max and $max=$_ or $_<0 and croak"negative weight" for values%$from};
  33   33     38  
  33   66     24984  
  4001         3073  
  4001         2228  
  4001         36591  
2851 4001   100     9020 my @r=map {my$r;1 while $$from{$r=$k[rand@k]}
  4010         2338  
  4010         10629  
  4010         6039  
2852 4001 100       10993 return @_>1?@r:$r[0];
2853             }
2854 2000 100       3404 ($from,$to)=(0,$from) if @_==1;
2855 2000 50       2957 ($from,$to)=($to,$from) if $from>$to;
2856 2000         4899 return int($from+rand(1+$to-$from));
2857             }
2858             #todo?: https://en.wikipedia.org/wiki/Irwin%E2%80%93Hall_distribution
2859              
2860             =head2 random_gauss
2861              
2862             Returns an pseudo-random number with a Gaussian distribution instead
2863             of the uniform distribution of perls C or C in this
2864             module. The algorithm is a variation of the one at
2865             L which is both faster
2866             and better than adding a long series of C.
2867              
2868             Uses perls C function internally.
2869              
2870             B 0 - 3 arguments.
2871              
2872             First argument: the average of the distribution. Default 0.
2873              
2874             Second argument: the standard deviation of the distribution. Default 1.
2875              
2876             Third argument: If a third argument is present, C
2877             returns an array of that many pseudo-random numbers. If there is no
2878             third argument, a number (a scalar) is returned.
2879              
2880             B One or more pseudo-random numbers with a Gaussian distribution. Also known as a Bell curve or Normal distribution.
2881              
2882             Example:
2883              
2884             my @I=random_gauss(100, 15, 100000); # produces 100000 pseudo-random numbers, average=100, stddev=15
2885             #my @I=map random_gauss(100, 15), 1..100000; # same but more than three times slower
2886             print "Average is: ".avg(@I)."\n"; # prints a number close to 100
2887             print "Stddev is: ".stddev(@I)."\n"; # prints a number close to 15
2888              
2889             my @M=grep $_>100+15*2, @I; # those above 130
2890             print "Percent above two stddevs: ".(100*@M/@I)."%\n"; #prints a number close to 2.2%
2891              
2892             Example 2:
2893              
2894             my $num=1e6;
2895             my @h; $h[$_/2]++ for random_gauss(100,15, $num);
2896             $h[$_] and printf "%3d - %3d %6d %s\n",
2897             $_*2,$_*2+1,$h[$_],'=' x ($h[$_]*1000/$num)
2898             for 1..200/2;
2899              
2900             ...prints an example of the famous Bell curve:
2901              
2902             44 - 45 70
2903             46 - 47 114
2904             48 - 49 168
2905             50 - 51 250
2906             52 - 53 395
2907             54 - 55 588
2908             56 - 57 871
2909             58 - 59 1238 =
2910             60 - 61 1807 =
2911             62 - 63 2553 ==
2912             64 - 65 3528 ===
2913             66 - 67 4797 ====
2914             68 - 69 6490 ======
2915             70 - 71 8202 ========
2916             72 - 73 10577 ==========
2917             74 - 75 13319 =============
2918             76 - 77 16283 ================
2919             78 - 79 20076 ====================
2920             80 - 81 23742 =======================
2921             82 - 83 27726 ===========================
2922             84 - 85 32205 ================================
2923             86 - 87 36577 ====================================
2924             88 - 89 40684 ========================================
2925             90 - 91 44515 ============================================
2926             92 - 93 47575 ===============================================
2927             94 - 95 50098 ==================================================
2928             96 - 97 52062 ====================================================
2929             98 - 99 53338 =====================================================
2930             100 - 101 52834 ====================================================
2931             102 - 103 52185 ====================================================
2932             104 - 105 50472 ==================================================
2933             106 - 107 47551 ===============================================
2934             108 - 109 44471 ============================================
2935             110 - 111 40704 ========================================
2936             112 - 113 36642 ====================================
2937             114 - 115 32171 ================================
2938             116 - 117 28166 ============================
2939             118 - 119 23618 =======================
2940             120 - 121 19873 ===================
2941             122 - 123 16360 ================
2942             124 - 125 13452 =============
2943             126 - 127 10575 ==========
2944             128 - 129 8283 ========
2945             130 - 131 6224 ======
2946             132 - 133 4661 ====
2947             134 - 135 3527 ===
2948             136 - 137 2516 ==
2949             138 - 139 1833 =
2950             140 - 141 1327 =
2951             142 - 143 860
2952             144 - 145 604
2953             146 - 147 428
2954             148 - 149 275
2955             150 - 151 184
2956             152 - 153 111
2957             154 - 155 67
2958              
2959             =cut
2960              
2961             sub random_gauss {
2962 1     1 1 3 my($avg,$stddev,$num)=@_;
2963 1 50       4 $avg=0 if !defined $avg;
2964 1 50       3 $stddev=1 if !defined $stddev;
2965 1 50       3 $num=1 if !defined $num;
2966 1 50       5 croak "random_gauss should not have more than 3 arguments" if @_>3;
2967 1         1 my @r;
2968 1         4 while (@r<$num) {
2969 2500         1378 my($x1,$x2,$w);
2970 2500         1349 do {
2971 3215         2066 $x1=2.0*rand()-1.0;
2972 3215         1767 $x2=2.0*rand()-1.0;
2973 3215         3925 $w=$x1*$x1+$x2*$x2;
2974             } while $w>=1.0;
2975 2500         1927 $w=sqrt(-2.0*log($w)/$w) * $stddev;
2976 2500         3733 push @r, $x1*$w + $avg,
2977             $x2*$w + $avg;
2978             }
2979 1 50       10 pop @r if @r > $num;
2980 1 50       13 return $r[0] if @_<3;
2981 1         543 return @r;
2982             }
2983              
2984             =head2 mix
2985              
2986             Mixes an array in random order. In-place if given an array reference or not if given an array.
2987              
2988             C could also have been named C, as in shuffling a deck of cards.
2989              
2990             Example:
2991              
2992             This:
2993              
2994             print mix("a".."z"),"\n" for 1..3;
2995              
2996             ...could write something like:
2997              
2998             trgoykzfqsduphlbcmxejivnwa
2999             qycatilmpgxbhrdezfwsovujkn
3000             ytogrjialbewcpvndhkxfzqsmu
3001              
3002             B
3003              
3004             =over 4
3005              
3006             =item 1.
3007             Either a reference to an array as the only input. This array will then be mixed I. The array will be changed:
3008              
3009             This: C<< @a=mix(@a) >> is the same as: C<< mix(\@a) >>.
3010              
3011             =item 2.
3012             Or an array of zero, one or more elements.
3013              
3014             =back
3015              
3016             Note that an input-array which COINCIDENTLY SOME TIMES has one element
3017             (but more other times), and that element is an array-ref, you will
3018             probably not get the expected result.
3019              
3020             To check distribution:
3021              
3022             perl -MAcme::Tools -le 'print mix("a".."z") for 1..26000'|cut -c1|sort|uniq -c|sort -n
3023              
3024             The letters a-z should occur around 1000 times each.
3025              
3026             Shuffles a deck of cards: (s=spaces, h=hearts, c=clubs, d=diamonds)
3027              
3028             perl -MAcme::Tools -le '@cards=map join("",@$_),cart([qw/s h c d/],[2..10,qw/J Q K A/]); print join " ",mix(@cards)'
3029              
3030             (Uses L, which is not a typo, see further down here)
3031              
3032             Note: C is approximately four times faster. Both respects the Perl built-in C.
3033              
3034             =cut
3035              
3036             sub mix {
3037 1020 50 33 1020 1 3947 if(@_==1 and ref($_[0]) eq 'ARRAY'){ #kun ett arg, og det er ref array
3038 0         0 my $r=$_[0];
3039 0         0 push@$r,splice(@$r,rand(@$r-$_),1) for 0..(@$r-1);
3040 0         0 return $r;
3041             }
3042             else{
3043 1020         1605 my@e=@_;
3044 1020         9491 push@e,splice(@e,rand(@e-$_),1) for 0..$#e;
3045 1020         4560 return @e;
3046             }
3047             }
3048              
3049             =head2 pwgen
3050              
3051             Generates random passwords.
3052              
3053             B 0-n args
3054              
3055             * First arg: length of password(s), default 8
3056              
3057             * Second arg: number of passwords, default 1
3058              
3059             * Third arg: string containing legal chars in password, default A-Za-z0-9,-./&%_!
3060              
3061             * Fourth to n'th arg: list of requirements for passwords, default if the third arg is false/undef (so default third arg is used) is:
3062              
3063             sub{/^[a-zA-Z0-9].*[a-zA-Z0-9]$/ and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
3064              
3065             ...meaning the password should:
3066             * start and end with: a letter a-z (lower- or uppercase) or a digit 0-9
3067             * should contain at least one char from each of the groups lower, upper, digit and special char
3068              
3069             To keep the default requirement-sub but add additional ones just set the fourth arg to false/undef
3070             and add your own requirements in the fifth arg and forward (examples below). Sub pwgen uses perls
3071             own C internally.
3072              
3073             C<< $Acme::Tools::Pwgen_max_sec >> and C<< $Acme::Tools::Pwgen_max_trials >> can be set to adjust for how long
3074             pwgen tries to find a password. Defaults for those are 0.01 and 10000.
3075             Whenever one of the two limits is reached, a first generates a croak.
3076              
3077             Examples:
3078              
3079             my $pw=pwgen(); # a random 8 chars password A-Z a-z 0-9 ,-./&%!_ (8 is default length)
3080             my $pw=pwgen(12); # a random 12 chars password A-Z a-z 0-9 ,-./&%!_
3081             my @pw=pwgen(0,10); # 10 random 8 chars passwords, containing the same possible chars
3082             my @pw=pwgen(0,1000,'A-Z'); # 1000 random 8 chars passwords containing just uppercase letters from A to Z
3083              
3084             pwgen(3); # dies, defaults require chars in each of 4 group (see above)
3085             pwgen(5,1,'A-C0-9', qr/^\D{3}\d{2}$/); # a 5 char string starting with three A, B or Cs and endring with two digits
3086             pwgen(5,1,'ABC0-9',sub{/^\D{3}\d{2}$/}); # same as above
3087              
3088             Examples of adding additional requirements to the default ones:
3089              
3090             my @pwreq = ( qr/^[A-C]/ );
3091             pwgen(8,1,'','',@pwreq); # use defaults for allowed chars and the standard requirements
3092             # but also demand that the password must start with A, B or C
3093              
3094             push @pwreq, sub{ not /[a-z]{3}/i };
3095             pwgen(8,1,'','',@pwreq); # as above and in addition the password should not contain three
3096             # or more consecutive letters (to avoid "offensive" words perhaps)
3097              
3098             =cut
3099              
3100             our $Pwgen_max_sec=0.01; #max seconds/password before croak (for hard to find requirements)
3101             our $Pwgen_max_trials=10000; #max trials/password before croak (for hard to find requirements)
3102             our $Pwgen_sec=0; #seconds used in last call to pwgen()
3103             our $Pwgen_trials=0; #trials in last call to pwgen()
3104 3515 100 100 3515 0 24889 sub pwgendefreq{/^[a-z].*[a-z\d]$/i and /[a-z]/ and /[A-Z]/ and /\d/ and /[,-.\/&%_!]/}
      100        
      100        
3105             sub pwgen {
3106 60     60 1 3107 my($len,$num,$chars,@req)=@_;
3107 60   100     101 $len||=8;
3108 60   100     85 $num||=1;
3109 60   100     87 $chars||='A-Za-z0-9,-./&%_!';
3110 60 100 50     105 $req[0]||=\&pwgendefreq if !$_[2];
3111 60         1583 $chars=~s/([$_])-([$_])/join("","$1".."$2")/eg for ('a-z','A-Z','0-9');
  125         1499  
3112 60         95 my($c,$t,@pw,$d)=(length($chars),time_fp());
3113 60         52 ($Pwgen_trials,$Pwgen_sec)=(0,0);
3114             TRIAL:
3115 60         100 while(@pw<$num){
3116 13909 100 66     38540 croak "pwgen timeout after $Pwgen_trials trials"
      66        
3117             if ++$Pwgen_trials >= $Pwgen_max_trials
3118             or ($d=time_fp()-$t) > $Pwgen_max_sec*$num
3119             and $d!~/^\d+$/; #jic int from time_fp
3120 13907         63713 my $pw=join"",map substr($chars,rand($c),1),1..$len;
3121 13907         17403 for my $r (@req){
3122 20556 100       27608 if (ref($r) eq 'CODE' ){ local$_=$pw; &$r() or next TRIAL }
  8179 100       6159  
  8179 50       8044  
3123 33 100   33   170 elsif(ref($r) eq 'Regexp'){ no warnings; $pw=~$r or next TRIAL }
  33         59  
  33         5983  
  12377         39004  
3124 0         0 else { croak "pwgen: invalid req type $r ".ref($r) }
3125             }
3126 651         1519 push@pw,$pw;
3127             }
3128 58         65 $Pwgen_sec=time_fp()-$t;
3129 58 100       294 return $pw[0] if $num==1;
3130 7         269 return @pw;
3131             }
3132              
3133             # =head1 veci
3134             #
3135             # Perls C takes 1, 2, 4, 8, 16, 32 and possibly 64 as its third argument.
3136             #
3137             # This limitation is removed with C (vec improved, but much slower)
3138             #
3139             # The third argument still needs to be 32 or lower (or possibly 64 or lower).
3140             #
3141             # =cut
3142             #
3143             # sub vecibs ($) {
3144             # my($s,$o,$b,$new)=@_;
3145             # if($b=~/^(1|2|4|8|16|32|64)$/){
3146             # return vec($s,$o,$b)=$new if @_==4;
3147             # return vec($s,$o,$b);
3148             # }
3149             # my $bb=$b<4?4:$b<8?8:$b<16?16:$b<32?32:$b<64?64:die;
3150             # my $ob=int($o*$b/$bb);
3151             # my $v=vec($s,$ob,$bb)*2**$bb+vec($s,$ob+1,$bb);
3152             # $v & (2**$b-1)
3153             # }
3154              
3155              
3156             =head1 SETS
3157              
3158             =head2 distinct
3159              
3160             Returns the values of the input list, sorted alfanumerically, but only
3161             one of each value. This is the same as L except uniq does not
3162             sort the returned list.
3163              
3164             Example:
3165              
3166             print join(", ", distinct(4,9,3,4,"abc",3,"abc")); # 3, 4, 9, abc
3167             print join(", ", distinct(4,9,30,4,"abc",30,"abc")); # 30, 4, 9, abc note: alphanumeric sort
3168              
3169             =cut
3170              
3171 1     1 1 282 sub distinct { return sort keys %{{map {($_,1)} @_}} }
  1         2  
  7         20  
3172              
3173             =head2 in
3174              
3175             Returns I<1> (true) if first argument is in the list of the remaining arguments. Uses the perl-operator C<< eq >>.
3176              
3177             Otherwise it returns I<0> (false).
3178              
3179             print in( 5, 1,2,3,4,6); # 0
3180             print in( 4, 1,2,3,4,6); # 1
3181             print in( 'a', 'A','B','C','aa'); # 0
3182             print in( 'a', 'A','B','C','a'); # 1
3183              
3184             I guess in perl 5.10 or perl 6 you could use the C<< ~~ >> operator instead.
3185              
3186             =head2 in_num
3187              
3188             Just as sub L, but for numbers. Internally uses the perl operator C<< == >> instead of C< eq >.
3189              
3190             print in(5000, '5e3'); # 0
3191             print in(5000, 5e3); # 1 since 5e3 is converted to 5000 before the call
3192             print in_num(5000, 5e3); # 1
3193             print in_num(5000, '+5.0e03'); # 1
3194              
3195             =cut
3196              
3197             sub in {
3198 33     33   151 no warnings 'uninitialized';
  33         48  
  33         2292  
3199 95     95 1 109 my $val=shift;
3200 95 100       90 for(@_){ return 1 if $_ eq $val }
  294         471  
3201 6         20 return 0;
3202             }
3203              
3204             sub in_num {
3205 33     33   129 no warnings 'uninitialized';
  33         45  
  33         59289  
3206 1     1 1 2 my $val=shift;
3207 1 100       3 for(@_){ return 1 if $_ == $val }
  5000         5246  
3208 0         0 return 0;
3209             }
3210              
3211             =head2 union
3212              
3213             Input: Two arrayrefs. (Two lists, that is)
3214              
3215             Output: An array containing all elements from both input lists, but no element more than once even if it occurs twice or more in the input.
3216              
3217             Example, prints 1,2,3,4:
3218              
3219             perl -MAcme::Tools -le 'print join ",", union([1,2,3],[2,3,3,4,4])' # 1,2,3,4
3220              
3221             =cut
3222              
3223 1     1 1 2 sub union { my %seen; grep !$seen{$_}++, map @{shift()},@_ }
  1         2  
  2         12  
3224             =head2 minus
3225              
3226             Input: Two arrayrefs.
3227              
3228             Output: An array containing all elements in the first input array but not in the second.
3229              
3230             Example:
3231              
3232             perl -MAcme::Tools -le 'print join " ", minus( ["five", "FIVE", 1, 2, 3.0, 4], [4, 3, "FIVE"] )'
3233              
3234             Output is C<< five 1 2 >>.
3235              
3236             =cut
3237              
3238             sub minus {
3239 1     1 0 2 my %seen;
3240 1         2 my %notme=map{($_=>1)}@{$_[1]};
  3         6  
  1         3  
3241 1   66     1 grep !$notme{$_}&&!$seen{$_}++, @{$_[0]};
  1         24  
3242             }
3243              
3244             =head2 intersect
3245              
3246             Input: Two arrayrefs
3247              
3248             Output: An array containing all elements which exists in both input arrays.
3249              
3250             Example:
3251              
3252             perl -MAcme::Tools -le 'print join" ", intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )' # 4 3 five
3253              
3254             Output: C<< 4 3 five >>
3255              
3256             =cut
3257              
3258             sub intersect {
3259 44     44 1 33 my %first=map{($_=>1)}@{$_[0]};
  52         92  
  44         69  
3260 44         38 my %seen;
3261 44 100       30 return grep{$first{$_}&&!$seen{$_}++}@{$_[1]};
  50         202  
  44         35  
3262             }
3263              
3264             =head2 not_intersect
3265              
3266             Input: Two arrayrefs
3267              
3268             Output: An array containing all elements member of just one of the input arrays (not both).
3269              
3270             Example:
3271              
3272             perl -MAcme::Tools -le ' print join " ", not_intersect( ["five", 1, 2, 3.0, 4], [4, 2+1, "five"] )'
3273              
3274             The output is C<< 1 2 >>.
3275              
3276             =cut
3277              
3278             sub not_intersect {
3279 1     1 1 1 my %code;
3280             my %seen;
3281 1         2 for(@{$_[0]}){$code{$_}|=1}
  1         3  
  5         11  
3282 1         3 for(@{$_[1]}){$code{$_}|=2}
  1         2  
  3         4  
3283 1 100       1 return grep{$code{$_}!=3&&!$seen{$_}++}(@{$_[0]},@{$_[1]});
  8         28  
  1         2  
  1         1  
3284             }
3285              
3286             =head2 uniq
3287              
3288             Input: An array of strings (or numbers)
3289              
3290             Output: The same array in the same order, except elements which exists earlier in the list.
3291              
3292             Same as L but distinct sorts the returned list, I does not.
3293              
3294             Example:
3295              
3296             my @t=(7,2,3,3,4,2,1,4,5,3,"x","xx","x",02,"07");
3297             print join " ", uniq @t; # prints 7 2 3 4 1 5 x xx 07
3298              
3299             =cut
3300              
3301 1     1 1 2 sub uniq(@) { my %seen; grep !$seen{$_}++, @_ }
  1         19  
3302              
3303             =head1 HASHES
3304              
3305             =head2 subhash
3306              
3307             Copies a subset of keys/values from one hash to another.
3308              
3309             B First argument is a reference to a hash. The rest of the arguments are a list of the keys of which key/value-pair you want to be copied.
3310              
3311             B The hash consisting of the keys and values you specified.
3312              
3313             Example:
3314              
3315             %population = ( Norway=>5000000, Sweden=>9500000, Finland=>5400000,
3316             Denmark=>5600000, Iceland=>320000,
3317             India => 1.21e9, China=>1.35e9, USA=>313e6, UK=>62e6 );
3318              
3319             %scandinavia = subhash( \%population , 'Norway', 'Sweden', 'Denmark' ); # this and
3320             %scandinavia = (Norway=>5000000,Sweden=>9500000,Denmark=>5600000); # this is the same
3321              
3322             print "Population of $_ is $scandinavia{$_}\n" for keys %scandinavia;
3323              
3324             ...prints the populations of the three scandinavian countries.
3325              
3326             Note: The values are NOT deep copied when they are references. (Use C<< Storable::dclone() >> to do that).
3327              
3328             Note2: For perl version 5.20+ subhashes (hash slices returning keys as well as values) is built in like this:
3329              
3330             %scandinavia = %population{'Norway','Sweden','Denmark'};
3331              
3332             =cut
3333              
3334             sub subhash {
3335 1     1 1 2 my $hr=shift;
3336 1         1 my @r;
3337 1         3 for(@_){ push@r,($_=>$$hr{$_}) }
  3         6  
3338 1         7 return @r;
3339             }
3340              
3341             =head2 hashtrans
3342              
3343             B a reference to a hash of hashes
3344              
3345             B a hash like the input-hash, but matrix transposed (kind of). Think of it as if X and Y has swapped places.
3346              
3347             %h = ( 1 => {a=>33,b=>55},
3348             2 => {a=>11,b=>22},
3349             3 => {a=>88,b=>99} );
3350             print serialize({hashtrans(\%h)},'v');
3351              
3352             Gives:
3353              
3354             %v=( 'a'=>{'1'=>'33','2'=>'11','3'=>'88'},
3355             'b'=>{'1'=>'55','2'=>'22','3'=>'99'} );
3356              
3357             =cut
3358              
3359             #Hashtrans brukes automatisk når første argument er -1 i sub hashtabell()
3360              
3361             sub hashtrans {
3362 1     1 1 5 my $h=shift;
3363 1         2 my %new;
3364 1         4 for my $k (keys%$h){
3365 3         4 my $r=$$h{$k};
3366 3         5 for(keys%$r){
3367 6         9 $new{$_}{$k}=$$r{$_};
3368             }
3369             }
3370 1         9 return %new;
3371             }
3372              
3373             =head2 a2h
3374              
3375             B array of arrays
3376              
3377             B array of hashes
3378              
3379             Transforms an array of arrays (arrayrefs) to an array of hashes (hashrefs).
3380              
3381             Example:
3382              
3383             my @h = a2h( ['Name', 'Age', 'Gender'], #1st row become keys
3384             ['Alice', 20, 'F'],
3385             ['Bob', 30, 'M'],
3386             ['Eve', undef, 'F'] );
3387              
3388             Result array @h:
3389              
3390             (
3391             {Name=>'Alice', Age=>20, Gender=>'F'},
3392             {Name=>'Bob', Age=>30, Gender=>'M'},
3393             {Name=>'Eve', Age=>undef, Gender=>'F'},
3394             );
3395              
3396             =head2 h2a
3397              
3398             B array of hashes
3399              
3400             B array of arrays
3401              
3402             Opposite of L
3403              
3404             =cut
3405              
3406             sub a2h {
3407 1     1 1 25 my @col=@{shift@_};
  1         4  
3408 1         3 map { my%h;@h{@col}=@$_;\%h} @_;
  10         6  
  10         15  
  10         15  
3409             }
3410              
3411             sub h2a {
3412 1     1 1 5 my %c;
3413 1         16 map $c{$_}++, keys%$_ for @_;
3414 1 0       8 my @c=sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c;
  5         11  
3415 1         11 (\@c,map[@$_{@c}],@_);
3416             }
3417              
3418             =head1 COMPRESSION
3419              
3420             L, L, L, L, L, and L
3421             compresses and uncompresses strings to save space in disk, memory,
3422             database or network transfer. Trades time for space. (Beware of wormholes)
3423              
3424             =head2 zipb64
3425              
3426             Compresses the input (text or binary) and returns a base64-encoded string of the compressed binary data.
3427             No known limit on input length, several MB has been tested, as long as you've got the RAM...
3428              
3429             B One or two strings.
3430              
3431             First argument: The string to be compressed.
3432              
3433             Second argument is optional: A I string.
3434              
3435             B a base64-kodet string of the compressed input.
3436              
3437             The use of an optional I string will result in an even
3438             further compressed output in the dictionary string is somewhat similar
3439             to the string that is compressed (the data in the first argument).
3440              
3441             If x relatively similar string are to be compressed, i.e. x number
3442             automatic of email responses to some action by a user, it will pay of
3443             to choose one of those x as a dictionary string and store it as
3444             such. (You will also use the same dictionary string when decompressing
3445             using L.
3446              
3447             The returned string is base64 encoded. That is, the output is 33%
3448             larger than it has to be. The advantage is that this string more
3449             easily can be stored in a database (without the hassles of CLOB/BLOB)
3450             or perhaps easier transfer in http POST requests (it still needs some
3451             url-encoding, normally). See L and L for the
3452             same without base 64 encoding.
3453              
3454             Example 1, normal compression without dictionary:
3455              
3456             $txt = "Test av komprimering, hva skjer? " x 10; # ten copies of this norwegian string, $txt is now 330 bytes (or chars rather...)
3457             print length($txt)," bytes input!\n"; # prints 330
3458             $zip = zipb64($txt); # compresses
3459             print length($zip)," bytes output!\n"; # prints 65
3460             print $zip; # prints the base64 string ("noise")
3461              
3462             $output=unzipb64($zip); # decompresses
3463             print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
3464             print length($output),"\n"; # prints 330
3465              
3466             Example 2, same compression, now with dictionary:
3467              
3468             $txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
3469             $dict = "Testing av kompresjon, hva vil skje?"; # dictionary with certain similarities
3470             # of the text to be compressed
3471             $zip2 = zipb64($txt,$dict); # compressing with $dict as dictionary
3472             print length($zip2)," bytes output!\n"; # prints 49, which is less than 65 in ex. 1 above
3473             $output=unzipb64($zip2,$dict); # uses $dict in the decompressions too
3474             print "Hurra\n" if $output eq $txt; # prints Hurra if everything went well
3475              
3476              
3477             Example 3, dictionary = string to be compressed: (out of curiosity)
3478              
3479             $txt = "Test av komprimering, hva skjer? " x 10; # Same original string as above
3480             $zip3 = zipb64($txt,$txt); # hmm
3481             print length($zip3)," bytes output!\n"; # prints 25
3482             print "Hurra\n" if unzipb64($zip3,$txt) eq $txt; # hipp hipp ...
3483              
3484             zipb64() and zipbin() is really just wrappers around L and C & co there.
3485              
3486             =cut
3487              
3488             sub zipb64 {
3489 9     9 1 1008 require MIME::Base64;
3490 9         898 return MIME::Base64::encode_base64(zipbin(@_));
3491             }
3492              
3493              
3494             =head2 zipbin
3495              
3496             C does the same as C except that zipbin()
3497             does not base64 encode the result. Returns binary data.
3498              
3499             See L for documentation.
3500              
3501             =cut
3502              
3503             sub zipbin {
3504 17     17 1 13395 require Compress::Zlib;
3505 17         30 my($data,$dict)=@_;
3506 17 50 100     145 my $x=Compress::Zlib::deflateInit(-Dictionary=>$dict||'',-Level=>Compress::Zlib::Z_BEST_COMPRESSION()) or croak();
3507 17 50       5433 my($output,$status)=$x->deflate($data); croak() if $status!=Compress::Zlib::Z_OK();
  17         50749  
3508 17 50       112 my($out,$status2)=$x->flush(); croak() if $status2!=Compress::Zlib::Z_OK();
  17         2219  
3509 17         609 return $output.$out;
3510             }
3511              
3512             =head2 unzipb64
3513              
3514             Opposite of L.
3515              
3516             Input:
3517              
3518             First argument: A string made by L
3519              
3520             Second argument: (optional) a dictionary string which where used in L.
3521              
3522             Output: The original string (be it text or binary).
3523              
3524             See L.
3525              
3526             =cut
3527              
3528             sub unzipb64 {
3529 2     2 1 7 my($data,$dict)=@_;
3530 2         12 require MIME::Base64;
3531 2         32 unzipbin(MIME::Base64::decode_base64($data),$dict);
3532             }
3533              
3534             =head2 unzipbin
3535              
3536             C does the same as L except that C
3537             wants a pure binary compressed string as input, not base64.
3538              
3539             See L for documentation.
3540              
3541             =cut
3542              
3543             sub unzipbin {
3544 5     5 1 291 require Compress::Zlib;
3545 5         15 require Carp;
3546 5         11 my($data,$dict)=@_;
3547 5 50 50     43 my $x=Compress::Zlib::inflateInit(-Dictionary=>$dict||'') or croak();
3548 5         594 my($output,$status)=$x->inflate($data);
3549 5 50       507 croak() if $status!=Compress::Zlib::Z_STREAM_END();
3550 5         92 return $output;
3551             }
3552              
3553             =head2 gzip
3554              
3555             B A string or reference to a string you want to compress. Text or binary.
3556              
3557             B The binary compressed representation of that input string.
3558              
3559             C is really just a wrapper for C< Compress:Zlib::memGzip() > and uses the same
3560             compression algorithm as the well known GNU program gzip found in most unix/linux/cygwin
3561             distros. Except C does this in-memory. (Both using the C-library C).
3562              
3563             writefile( "file.gz", gzip("some string") );
3564              
3565             =head2 gunzip
3566              
3567             B A binary compressed string or a reference to such a string. I.e. something returned from
3568             C earlier or read from a C<< .gz >> file.
3569              
3570             B The original larger non-compressed string. Text or binary.
3571              
3572             C is a wrapper for Compress::Zlib::memGunzip()
3573              
3574             print gunzip( gzip("some string") ); #some string
3575              
3576             =head2 bzip2
3577              
3578             Same as L and L except with a different compression algorithm (compresses more but is slower). Wrapper for Compress::Bzip2::memBzip.
3579              
3580             Compared to gzip/gunzip, bzip2 compression is much slower, bunzip2 decompression not so much.
3581              
3582             See also L, C, C, C.
3583              
3584             writefile( "file.bz2", bzip2("some string") );
3585             print bunzip2( bzip2("some string") ); #some string
3586              
3587             =head2 bunzip2
3588              
3589             Decompressed something compressed by bzip2() or data from a C<.bz2> file. See L.
3590              
3591             =cut
3592              
3593 8 100   8 1 13352 sub gzip { my $s=shift(); eval"require Compress::Zlib" if !$INC{'Compress/Zlib.pm'}; croak "Compress::Zlib not found" if $@; Compress::Zlib::memGzip( ref($s)?$s:\$s ) }
  8 50       263  
  8 50       108940  
  8         47  
3594 3 50   3 1 12898 sub gunzip { my $s=shift(); eval"require Compress::Zlib" if !$INC{'Compress/Zlib.pm'}; croak "Compress::Zlib not found" if $@; Compress::Zlib::memGunzip( ref($s)?$s:\$s ) }
  3 50       18  
  3 50       9  
  3         24  
3595 0 0   0 1 0 sub bzip2 { my $s=shift(); eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBzip( ref($s)?$s:\$s ) }
  0 0       0  
  0 0       0  
  0         0  
3596 0 0   0 1 0 sub bunzip2 { my $s=shift(); eval"require Compress::Bzip2" if !$INC{'Compress/Bzip2.pm'}; croak "Compress::Bzip2 not found" if $@; Compress::Bzip2::memBunzip( ref($s)?$s:\$s ) }
  0 0       0  
  0 0       0  
  0         0  
3597              
3598             =head1 NET, WEB, CGI-STUFF
3599              
3600             =head2 ipaddr
3601              
3602             B an IP-number
3603              
3604             B either an IP-address I or an empty string
3605             if the DNS lookup didn't find anything.
3606              
3607             Example:
3608              
3609             perl -MAcme::Tools -le 'print ipaddr("129.240.8.200")' # prints www.uio.no
3610              
3611             Uses perls C internally.
3612              
3613             C memoizes the results internally (using the
3614             C<%Acme::Tools::IPADDR_memo> hash) so only the first loopup on a
3615             particular IP number might take some time.
3616              
3617             Some few DNS loopups can take several seconds.
3618             Most is done in a fraction of a second. Due to this slowness, medium to high traffic web servers should
3619             probably turn off hostname lookups in their logs and just log IP numbers by using
3620             C in Apache C and then use I afterwards if necessary.
3621              
3622             =cut
3623              
3624             our %IPADDR_memo;
3625             sub ipaddr {
3626 2     2 1 582 my $ipnr=shift;
3627             #NB, 2-tallet på neste kodelinje er ikke det samme på alle os,
3628             #men ser ut til å funke i linux og hpux. Den Riktige Måten(tm)
3629             #er konstanten AF_INET i Socket eller IO::Socket-pakken.
3630 2   66     113005 return $IPADDR_memo{$ipnr} ||= gethostbyaddr(pack("C4",split("\\.",$ipnr)),2);
3631             }
3632              
3633             =head2 ipnum
3634              
3635             C does the opposite of C
3636              
3637             Does an attempt of converting an IP address (hostname) to an IP number.
3638             Uses DNS name servers via perls internal C.
3639             Return empty string (undef) if unsuccessful.
3640              
3641             print ipnum("www.uio.no"); # prints 129.240.13.152
3642              
3643             Does internal memoization via the hash C<%Acme::Tools::IPNUM_memo>.
3644              
3645             =cut
3646              
3647             our %IPNUM_memo;
3648             sub ipnum {
3649 1     1 1 2 my $ipaddr=shift;
3650             #croak "No $ipaddr" if !length($ipaddr);
3651 1 50       4 return $IPNUM_memo{$ipaddr} if exists $IPNUM_memo{$ipaddr};
3652 1         50704 my $h=gethostbyname($ipaddr);
3653             #croak "No ipnum for $ipaddr" if !$h;
3654 1 50       15 return if !defined $h;
3655 1         16 my $ipnum = join(".",unpack("C4",$h));
3656 1 50       35 $IPNUM_memo{$ipaddr} = $ipnum=~/^(\d+\.){3}\d+$/ ? $ipnum : undef;
3657 1         13 return $IPNUM_memo{$ipaddr};
3658             }
3659              
3660             =head2 webparams
3661              
3662             B (optional)
3663              
3664             Zero or one input argument: A string of the same type often found behind the first question mark (C<< ? >>) in URLs.
3665              
3666             This string can have one or more parts separated by C<&> chars.
3667              
3668             Each part consists of C pairs (with the first C<=> char being the separation char).
3669              
3670             Both C and C can be url-encoded.
3671              
3672             If there is no input argument, C uses C<< $ENV{QUERY_STRING} >> instead.
3673              
3674             If also C<< $ENV{QUERY_STRING} >> is lacking, C checks if C<< $ENV{REQUEST_METHOD} eq 'POST' >>.
3675             In that case C<< $ENV{CONTENT_LENGTH} >> is taken as the number of bytes to be read from C
3676             and those bytes are used as the missing input argument.
3677              
3678             The environment variables QUERY_STRING, REQUEST_METHOD and CONTENT_LENGTH is
3679             typically set by a web server following the CGI standard (which Apache and
3680             most of them can do I guess) or in mod_perl by Apache. Although you are
3681             probably better off using L. Or C<< $R->args() >> or C<< $R->content() >> in mod_perl.
3682              
3683             B
3684              
3685             C returns a hash of the key/value pairs in the input argument. Url-decoded.
3686              
3687             If an input string has more than one occurrence of the same key, that keys value in the returned hash will become concatenated each value separated by a C<,> char. (A comma char)
3688              
3689             Examples:
3690              
3691             use Acme::Tools;
3692             my %R=webparams();
3693             print "Content-Type: text/plain\n\n"; # or rather \cM\cJ\cM\cJ instead of \n\n to be http-compliant
3694             print "My name is $R{name}";
3695              
3696             Storing those four lines in a file in the directory designated for CGI-scripts
3697             on your web server (or perhaps naming the file .cgi is enough), and C
3698             /.../cgi-bin/script> and the URL
3699             L will print
3700             C to the web page.
3701              
3702             L will print C.
3703              
3704             =cut
3705              
3706             sub webparams {
3707 2     2 1 358 my $query=shift();
3708 2 50       7 $query=$ENV{QUERY_STRING} if !defined $query;
3709 2 50 33     7 if(!defined $query and $ENV{REQUEST_METHOD} eq "POST"){
3710 0         0 read(STDIN,$query , $ENV{CONTENT_LENGTH});
3711 0         0 $ENV{QUERY_STRING}=$query;
3712             }
3713 2         3 my %R;
3714 2         11 for(split("&",$query)){
3715 7 50       13 next if !length($_);
3716 7         15 my($nkl,$verdi)=map urldec($_),split("=",$_,2);
3717 7 100       19 $R{$nkl}=exists$R{$nkl}?"$R{$nkl},$verdi":$verdi;
3718             }
3719 2         13 return %R;
3720             }
3721              
3722             =head2 urlenc
3723              
3724             Input: a string
3725              
3726             Output: the same string URL encoded so it can be sent in URLs or POST requests.
3727              
3728             In URLs (web addresses) certain characters are illegal. For instance I and I.
3729             And certain other chars have special meaning, such as C<+>, C<%>, C<=>, C, C<&>.
3730              
3731             These illegal and special chars needs to be encoded to be sent in
3732             URLs. This is done by sending them as C<%> and two hex-digits. All
3733             chars can be URL encodes this way, but it's necessary just on some.
3734              
3735             Example:
3736              
3737             $search="Østdal, Åge";
3738             my $url="http://machine.somewhere.com/search?q=" . urlenc($search);
3739             print $url;
3740              
3741             Prints C<< http://machine.somewhere.com/search?q=%D8stdal%2C%20%C5ge >>
3742              
3743             =cut
3744              
3745             sub urlenc {
3746 4     4 1 787 my $str=shift;
3747 4         13 $str=~s/([^\w\-\.\/\,\[\]])/sprintf("%%%02x",ord($1))/eg; #more chars is probably legal...
  622         802  
3748 4         21 return $str;
3749             }
3750              
3751             =head2 urldec
3752              
3753             Opposite of L.
3754              
3755             Example, this returns 'C< ø>'. That is space and C<< ø >>.
3756              
3757             urldec('+%C3')
3758              
3759             =cut
3760              
3761             sub urldec {
3762 14     14 1 11 my $str=shift;
3763 14         13 $str=~s/\+/ /gs;
3764 14         75 $str=~s/%([a-f\d]{2})/pack("C", hex($1))/egi;
  623         823  
3765 14         26 return $str;
3766             }
3767              
3768             =head2 ht2t
3769              
3770             C is short for I.
3771              
3772             This sub extracts an html-C<< >>s and returns its C<< s >>
3773             and C<< s >> as an array of arrayrefs. And strips away any html
3774             inside the C<< s >> as well.
3775              
3776             my @table = ht2t($html,'some string occuring before the you want');
3777              
3778             Input: One or two arguments.
3779              
3780             First argument: the html where a C<< >> is to be found and converted.
3781              
3782             Second argument: (optional) If the html contains more than one C<<
3783             >>, and you do not want the first one, applying a second
3784             argument is a way of telling C which to capture: the one with this word
3785             or string occurring before it.
3786              
3787             Output: An array of arrayrefs.
3788              
3789             C is a quick and dirty way of scraping (or harvesting as it is
3790             also called) data from a web page. Look too L to do this
3791             more accurate.
3792              
3793             Example:
3794              
3795             use Acme::Tools;
3796             use LWP::Simple;
3797             my $url = "http://en.wikipedia.org/wiki/List_of_countries_by_population";
3798             for( ht2t( get($url), "Countries" ) ) {
3799             my($rank, $country, $pop) = @$_;
3800             $pop =~ s/,//g;
3801             printf "%3d | %-32s | %9d\n", @$_ if $pop>0;
3802             }
3803              
3804             Output:
3805              
3806             1 | China | 1367740000
3807             2 | India | 1262090000
3808             3 | United States | 319043000
3809             4 | Indonesia | 252164800
3810             5 | Brazil | 203404000
3811              
3812             ...and so on.
3813              
3814             =cut
3815              
3816             sub ht2t {
3817 9 100 100 9 1 274 my($f,$s,$r)=@_; 1>@_||@_>3 and croak; $s='' if @_==1;
  9 100       362  
  7         19  
3818 7         136 $f=~s,.*?($s).*?(
3819 33     33   22469 my $e=0;$e++ while index($f,$s=chr($e))>=$[;
  33         9306  
  33         95235  
  7         9  
  7         49  
3820 7         112 $f=~s//\l$1$s/gsi;
3821 7         45 $f=~s/\s*<.*?>\s*/ /gsi;
3822 7         81 my @t=split("r$s",$f);shift @t;
  7         9  
3823 20     20   19 $r||=sub{s/&(#160|nbsp);/ /g;s/&/&/g;s/^\s*(.*?)\s*$/$1/s;
  20         21  
  20         53  
3824 7 100 100     42 s/(\d) (\d)/$1$2/g if /^[\d \.\,]+$/};
  20         46  
3825 7         15 for(@t){my @r=split/[dh]$s/;shift@r;$_=[map{&$r;$_}@r]}
  9         54  
  9         7  
  9         14  
  20         21  
  20         46  
3826 7         46 @t;
3827             }
3828              
3829             =head1 FILES, DIRECTORIES
3830              
3831             =head2 writefile
3832              
3833             Justification:
3834              
3835             Perl needs three or four operations to make a file out of a string:
3836              
3837             open my $FILE, '>', $filename or die $!;
3838             print $FILE $text;
3839             close($FILE);
3840              
3841             This is way simpler:
3842              
3843             writefile($filename,$text);
3844              
3845             Sub writefile opens the file i binary mode (C) and has two usage modes:
3846              
3847             B Two arguments
3848              
3849             B is the filename. If the file exists, its overwritten.
3850             If the file can not be opened for writing, a die (a croak really) happens.
3851              
3852             B is one of:
3853              
3854             =over 4
3855              
3856             =item * Either a scaler. That is a normal string to be written to the file.
3857              
3858             =item * Or a reference to a scalar. That referred text is written to the file.
3859              
3860             =item * Or a reference to an array of scalars. This array is the written to the
3861             file element by element and C<< \n >> is automatically appended to each element.
3862              
3863             =back
3864              
3865             Alternativelly, you can write several files at once.
3866              
3867             Example, this:
3868              
3869             writefile('file1.txt','The text....tjo');
3870             writefile('file2.txt','The text....hip');
3871             writefile('file3.txt','The text....and hop');
3872              
3873             ...is the same as this:
3874              
3875             writefile([
3876             ['file1.txt','The text....tjo'],
3877             ['file2.txt','The text....hip'],
3878             ['file3.txt','The text....and hop'],
3879             ]);
3880              
3881             B Nothing (for the time being). Cs (C really) if something goes wrong.
3882              
3883             =cut
3884              
3885             #todo: use openstr() as in readfile(), transparently gzip .gz filenames and so on
3886             sub writefile {
3887 9     9 1 70274 my($filename,$text)=@_;
3888 9 50       38 if(ref($filename) eq 'ARRAY'){
3889 0         0 writefile(@$_) for @$filename;
3890 0         0 return;
3891             }
3892 9 50 33     521 open(WRITEFILE,">",$filename) and binmode(WRITEFILE) or croak($!);
3893 9 50 33     54 if(!defined $text or !ref($text)){
    0          
    0          
3894 9         746 print WRITEFILE $text;
3895             }
3896             elsif(ref($text) eq 'SCALAR'){
3897 0         0 print WRITEFILE $$text;
3898             }
3899             elsif(ref($text) eq 'ARRAY'){
3900 0         0 print WRITEFILE "$_\n" for @$text;
3901             }
3902             else {
3903 0         0 croak;
3904             }
3905 9         201 close(WRITEFILE);
3906 9         1419 return;
3907             }
3908              
3909             =head2 readfile
3910              
3911             Just as with L you can read in a whole file in one operation with C. Instead of:
3912              
3913             open my $FILE,'<', $filename or die $!;
3914             my $data = join"",<$FILE>;
3915             close($FILE);
3916              
3917             This is simpler:
3918              
3919             my $data = readfile($filename);
3920              
3921             B
3922              
3923             Reading the content of the file to a scalar variable: (Any content in C<$data> will be overwritten)
3924              
3925             my $data;
3926             readfile('filename.txt',\$data);
3927              
3928             Reading the lines of a file into an array:
3929              
3930             my @lines;
3931             readfile('filnavn.txt',\@lines);
3932             for(@lines){
3933             ...
3934             }
3935              
3936             Note: Chomp is done on each line. That is, any newlines (C<< \n >>) will be removed.
3937             If C<@lines> is non-empty, this will be lost.
3938              
3939             Sub readfile is context aware. If an array is expected it returns an array of the lines without a trailing C<< \n >>.
3940             The last example can be rewritten:
3941              
3942             for(readfile('filnavn.txt')){
3943             ...
3944             }
3945              
3946             With two input arguments, nothing (undef) is returned from C.
3947              
3948             =cut
3949              
3950             #http://blogs.perl.org/users/leon_timmermans/2013/05/why-you-dont-need-fileslurp.html
3951             #todo: readfile with grep-filter code ref in a third arg (avoid reading all into mem)
3952              
3953             sub readfile {
3954 12     12 1 8067 my($filename,$ref)=@_;
3955 12 100       25 if(@_==1){
3956 6 100       15 if(wantarray){ my @data; readfile($filename,\@data); return @data }
  1         1  
  1         4  
  1         3855  
3957 5         5 else { my $data; readfile($filename,\$data); return $data }
  5         343  
  5         400  
3958             }
3959             else {
3960 6 50       35 open my $fh,openstr($filename) or croak("ERROR: readfile $! $?");
3961 6 100       31 if ( ref($ref) eq 'SCALAR') { $$ref=join"",<$fh> }
  5 50       5383  
3962 1         12 elsif( ref($ref) eq 'ARRAY' ) { while(my $l=<$fh>){ chomp($l); push @$ref, $l } }
  20003         12022  
  20003         31678  
3963 0         0 else { croak "ERROR: Second arg to readfile should be a ref to a scalar og array" }
3964 6         766 close($fh);
3965 6         33 return;#?
3966             }
3967             }
3968              
3969             =head2 readdirectory
3970              
3971             B
3972              
3973             Name of a directory.
3974              
3975             B
3976              
3977             A list of all files in it, except of C<.> and C<..> (on linux/unix systems, all directories have a C<.> and C<..> directory).
3978              
3979             The names of all types of files are returned: normal files, directories, symbolic links,
3980             pipes, semaphores. That is every thing shown by C except C<.> and C<..>
3981              
3982             C do not recurce down into subdirectories (but see example below).
3983              
3984             B
3985              
3986             my @files = readdirectory("/tmp");
3987              
3988             B
3989              
3990             Sometimes calling the built ins C, C and C seems a tad tedious, since this:
3991              
3992             my $dir="/usr/bin";
3993             opendir(D,$dir);
3994             my @files=map "$dir/$_", grep {!/^\.\.?$/} readdir(D);
3995             closedir(D);
3996              
3997             Is the same as this:
3998              
3999             my @files=readdirectory("/usr/bin");
4000              
4001             See also: L
4002              
4003             B
4004              
4005             On huge directories with perhaps tens or houndreds of thousands of
4006             files, readdirectory() will consume more memory than perls
4007             opendir/readdir. This isn't usually a concern anymore for modern
4008             computers with gigabytes of RAM, but might be the rationale behind
4009             Perls more tedious way created in the 80s. The same argument goes for
4010             file slurping. On the other side it's also a good practice to never
4011             assume to much on available memory and the number of files if you
4012             don't know for certain that enough memory is available whereever your
4013             code is run or that the size of the directory is limited.
4014              
4015             B
4016              
4017             How to get all files in the C directory including all subdirectories below of any depth:
4018              
4019             my @files=("/tmp");
4020             map {-d $_ and unshift @files,$_ or push @files,$_} readdirectory(shift(@files)) while -d $files[0];
4021              
4022             ...or to avoid symlinks and only get real files:
4023              
4024             map {-d and !-l and unshift @files,$_ or -f and !-l and push @files,$_} readdirectory(shift(@files)) while -d $files[0];
4025              
4026             =cut
4027              
4028             sub readdirectory {
4029 0     0 1 0 my $dir=shift;
4030 0         0 opendir(my $D,$dir);
4031 0         0 my @filer=map "$dir/$_", grep {!/^\.\.?$/} readdir($D);
  0         0  
4032 0         0 closedir($D);
4033 0         0 return @filer;
4034             }
4035              
4036             =head2 basename
4037              
4038             The basename and dirname functions behaves like the *nix shell commands with the same names.
4039              
4040             B One or two arguments: Filename and an optional suffix
4041              
4042             B Returns the filename with any directory and (if given) the suffix removed.
4043              
4044             basename('/usr/bin/perl') # returns 'perl'
4045             basename('/usr/local/bin/report.pl','.pl') # returns 'report' since .pl at the end is removed
4046             basename('report2.pl','.pl') # returns 'report2'
4047             basename('report2.pl','.\w+') # returns 'report2.pl', probably not what you meant
4048             basename('report2.pl',qr/.\w+/) # returns 'report2', use qr for regex
4049              
4050             =head2 dirname
4051              
4052             B A filename including path
4053              
4054             B Removes the filename path and returns just the directory path up until but not including
4055             the last /. Return just a one char C<< . >> (period string) if there is no directory in the input.
4056              
4057             dirname('/usr/bin/perl') # returns '/usr/bin'
4058             dirname('perl') # returns '.'
4059              
4060             =head2 username
4061              
4062             Returns the current linux/unix username, for example the string root
4063              
4064             print username(); #just (getpwuid($<))[0] but more readable perhaps
4065              
4066             =cut
4067              
4068 8 100   8 1 1407 sub basename { my($f,$s)=(@_,'');$s=quotemeta($s)if!ref($s);$f=~m,^(.*/)?([^/]*?)($s)?$,;$2 }
  8         21  
  8         146  
  8         24  
4069 2 100 66 2 1 178 sub dirname { $_[0]=~m,^(.*)/,;defined($1) && length($1) ? $1 : '.' }
  2         17  
4070 0     0 1 0 sub username { (getpwuid($<))[0] }
4071              
4072             =head2 wipe
4073              
4074             Deletes a file by "wiping" it on the disk. Overwrites the file before deleting. (May not work properly on SSDs)
4075              
4076             B
4077             * Arg 1: A filename
4078             * Optional arg 2: number of times to overwrite file. Default is 3 if omitted, 0 or undef
4079             * Optional arg 3: keep (true/false), wipe() but no delete of file
4080              
4081             B Same as the C (remove file): 1 for success, 0 or false for failure.
4082              
4083             See also: L, L
4084              
4085             =cut
4086              
4087             sub wipe {
4088 2     2 1 3050 my($file,$times,$keep)=@_;
4089 2   100     8 $times||=3;
4090 2 50 33     48 croak "ERROR: File $file nonexisting\n" if not -f $file or not -e $file;
4091 2         13 my $size=-s$file;
4092 2 50       52 open my $WIFH, '+<', $file or croak "ERROR: Unable to open $file: $!\n";
4093 2         5 binmode($WIFH);
4094 2         5 for(1..$times){
4095 4         13 my $block=chr(int(rand(256))) x 1024;#hm
4096 4         8 for(0..($size/1024)){
4097 73         253 seek($WIFH,$_*1024,0);
4098 73         76 print $WIFH $block;
4099             }
4100             }
4101 2         41 close($WIFH);
4102 2 100       109 $keep || unlink($file);
4103             }
4104              
4105             =head2 chall
4106              
4107             Does chmod + utime + chown on one or more files.
4108              
4109             Returns the number of files of which those operations was successful.
4110              
4111             Mode, uid, gid, atime and mtime are set from the array ref in the first argument.
4112              
4113             The first argument references an array which is exactly like an array returned from perls internal C -function.
4114              
4115             Example:
4116              
4117             my @stat=stat($filenameA);
4118             chall( \@stat, $filenameB, $filenameC, ... ); # by stat-array
4119             chall( $filenameA, $filenameB, $filenameC, ... ); # by file name
4120              
4121             Copies the chmod, owner, group, access time and modify time from file A to file B and C.
4122              
4123             See C, C, C, C
4124              
4125             =cut
4126              
4127              
4128             sub chall {
4129             my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks )
4130 12 100   12 1 10856 = ref($_[0]) ? @{shift()} : stat(shift());
  1         3  
4131 12         20 my $successful=0;
4132 12 50 33     31 for(@_){ chmod($mode,$_) && utime($atime,$mtime,$_) && chown($uid,$gid,$_) && $successful++ }
  12   33     607  
4133 12         45 return $successful;
4134             }
4135              
4136             =head2 makedir
4137              
4138             Input: One or two arguments.
4139              
4140             Works like perls C except that C will create nesessary parent directories if they dont exists.
4141              
4142             First input argument: A directory name (absolute, starting with C< / > or relative).
4143              
4144             Second input argument: (optional) permission bits. Using the normal C<< 0777^umask() >> as the default if no second input argument is provided.
4145              
4146             Example:
4147              
4148             makedir("dirB/dirC")
4149              
4150             ...will create directory C if it does not already exists, to be able to create C inside C.
4151              
4152             Returns true on success, otherwise false.
4153              
4154             C memoizes directories it has checked for existence before (trading memory and for speed).
4155             Thus directories removed during running the script is not discovered by makedir.
4156              
4157             See also C<< perldoc -f mkdir >>, C<< man umask >>
4158              
4159             =cut
4160              
4161             our %MAKEDIR;
4162              
4163             sub makedir {
4164 0     0 1 0 my($d,$p,$dd)=@_;
4165 0 0       0 $p=0777^umask() if !defined$p;
4166             (
4167             $MAKEDIR{$d} or -d$d or mkdir($d,$p) #or croak("mkdir $d, $p")
4168             or ($dd)=($d=~m,^(.+)/+([^/]+)$,) and makedir($dd,$p) and mkdir($d,$p) #or die;
4169 0 0 0     0 ) and ++$MAKEDIR{$d};
      0        
      0        
      0        
      0        
4170             }
4171              
4172             =head2 md5sum
4173              
4174             B a filename.
4175              
4176             B a string of 32 hexadecimal chars from 0-9 or a-f.
4177              
4178             Example, the md5sum gnu/linux command without options could be implementet like this:
4179              
4180             use Acme::Tools;
4181             print eval{ md5sum($_)." $_\n" } || $@ for @ARGV;
4182              
4183             This sub requires L, which is a core perl-module since
4184             version 5.?.? It does not slurp the files or spawn new processes.
4185              
4186             =cut
4187              
4188             sub md5sum {
4189 2     2 1 16 require Digest::MD5;
4190 2         4 my $fn=shift;
4191 2 100       176 croak "md5sum: $fn is a directory (no md5sum)" if -d $fn;
4192 1 50       23 open my $FH, '<', $fn or croak "Could not open file $fn for md5sum() $!";
4193 1         2 binmode($FH);
4194 1         2 my $r = eval { Digest::MD5->new->addfile($FH)->hexdigest };
  1         17  
4195 1 50       3 croak "md5sum on $fn failed ($@)\n" if $@;
4196 1         13 $r;
4197             }
4198              
4199             =head2 read_conf
4200              
4201             B A file name or a reference to a string with settings in the format described below.
4202              
4203             B A reference to a hash. This hash will have the settings from the file (or stringref).
4204             The hash do not have to be empty beforehand.
4205              
4206             Returns a hash with the settings as in this examples:
4207              
4208             my %conf = read_conf('/etc/your/thing.conf');
4209             print $conf{sectionA}{knobble}; #prints ABC if the file is as shown below
4210             print $conf{sectionA}{gobble}; #prints ZZZ, the last gobble
4211             print $conf{switch}; #prints OK here as well, unsectioned value
4212             print $conf{part2}{password}; #prints oh:no= x
4213              
4214             File use for the above example:
4215              
4216             switch: OK #before first section, the '' (empty) section
4217             [sectionA]
4218             knobble: ABC
4219             gobble: XYZ #this gobble is overwritten by the gobble on the next line
4220             gobble: ZZZ
4221             [part2]
4222             password: oh:no= x #should be better
4223             text: { values starting with { continues
4224             until reaching a line with }
4225              
4226             Everything from # and behind is regarded comments and ignored. Comments can be on any line.
4227             To keep a # char, put a \ in front of it.
4228              
4229             A C< : > or C< = > separates keys and values. Spaces at the beginning or end of lines are
4230             ignored (after removal of #comments), as are any spaces before and after : and = separators.
4231              
4232             Empty lines or lines with no C< : > or C< = > is also ignored. Keys and values can contain
4233             internal spaces and tabs, but not at the beginning or end.
4234              
4235             Multi-line values must start and end with { and }. Using { and } keep spaces at the start
4236             or end in both one-line and multi-line values.
4237              
4238             Sections are marked with C<< [sectionname] >>. Section names, keys and values is case
4239             sensitive. C above the first section or below and empty C<< [] >> is placed
4240             both in the empty section in the returned hash and as top level key/values.
4241              
4242             C can be a simpler alternative to the core module L which has
4243             its own hassles.
4244              
4245             $Acme::Tools::Read_conf_empty_section=1; #default 0 (was 1 in version 0.16)
4246             my %conf = read_conf('/etc/your/thing.conf');
4247             print $conf{''}{switch}; #prints OK with the file above
4248             print $conf{switch}; #prints OK here as well
4249              
4250             =cut
4251              
4252             our $Read_conf_empty_section=0;
4253             sub read_conf {
4254 3     3 1 15 my($fn,$hr)=(@_,{});
4255 3 100       11 my $conf=ref($fn)?$$fn:readfile($fn);
4256 3         104 $conf=~s,\s*(?
4257 3         29 my($section,@l)=('',split"\n",$conf);
4258 3         9 while(@l) {
4259 63         42 my $l=shift@l;
4260 63 100       219 if( $l=~/^\s*\[\s*(.*?)\s*\]/ ) {
    100          
4261 12         12 $section=$1;
4262 12   100     41 $$hr{$1}||={};
4263             }
4264             elsif( $l=~/^\s*([^\:\=]+)[:=]\s*(.*?)\s*$/ ) {
4265 33   66 33   61 my $ml=sub{my$v=shift;$v.="\n".shift@l while $v=~/^\{[^\}]*$/&&@l;$v=~s/^\{(.*)\}\s*$/$1/s;$v=~s,\\#,#,g;$v};
  33         34  
  33         106  
  33         42  
  33         22  
  33         41  
4266 33         39 my $v=&$ml($2);
4267 33 100 66     89 $$hr{$section}{$1}=$v if length($section) or $Read_conf_empty_section;
4268 33 100       108 $$hr{$1}=$v if !length($section);
4269             }
4270             }
4271 3         20 %$hr;
4272             }
4273             # my $incfn=sub{return $1 if $_[0]=~m,^(/.+),;my$f=$fn;$f=~s,[^/]+$,$_[0],;$f};
4274             # s,]+)>,"".readfile(&$incfn($1)),eg; #todo
4275              
4276              
4277             =head2 openstr
4278              
4279             # returned from openstr:
4280             open my $FH, openstr("fil.txt") or die; # fil.txt
4281             open my $FH, openstr("fil.gz") or die; # zcat fil.gz |
4282             open my $FH, openstr("fil.bz2") or die; # bzcat fil.bz2 |
4283             open my $FH, openstr("fil.xz") or die; # xzcat fil.xz |
4284             open my $FH, openstr(">fil.txt") or die; # > fil.txt
4285             open my $FH, openstr(">fil.gz") or die; # | gzip > fil.gz
4286             open my $FH, openstr(">fil.bz2") or die; # | bzip2 > fil.bz2
4287             open my $FH, openstr(">fil.xz") or die; # | xz > fil.bz2
4288              
4289             Environment variable PATH is used. So in the examples above, /bin/gzip
4290             is returned instead of gzip if /bin is the first directory in
4291             $ENV{PATH} containing an executable file gzip. Dirs /usr/bin, /bin and
4292             /usr/local/bin is added to PATH in openstr(). They are checked even if
4293             PATH is empty.
4294              
4295             =cut
4296              
4297             our @Openstrpath=(grep$_,split(":",$ENV{PATH}),qw(/usr/bin /bin /usr/local/bin));
4298             sub openstr {
4299 22     22 1 2944 my($fn,$ext)=(shift()=~/^(.*?(?:\.(t?gz|bz2|xz))?)$/i);
4300 22 100       235 return $fn if !$ext;
4301 12 50   12   37 my $prog=sub{@Openstrpath or return $_[0];(grep -x$_, map "$_/$_[0]", @Openstrpath)[0] or croak"$_[0] not found"};
  12 100       20  
  12         1316  
4302             $fn =~ /^\s*>/
4303             ? "| ".(&$prog({qw/gz gzip bz2 bzip2 xz xz tgz gzip/ }->{lc($ext)})).$fn
4304 12 100       68 : &$prog({qw/gz zcat bz2 bzcat xz xzcat tgz zcat/}->{lc($ext)})." $fn |";
4305             }
4306              
4307             =head1 TIME FUNCTIONS
4308              
4309             =head2 tms
4310              
4311             Timestring, works somewhat like the Gnu/Linux C command and Oracle's C
4312              
4313             Converts timestamps to more readable forms of time strings.
4314              
4315             Converts seconds since I and time strings on the form C to other forms.
4316              
4317             B One, two or three arguments.
4318              
4319             B A format string.
4320              
4321             B An epock C number or a time
4322             string of the form YYYYMMDD-HH24:MI:SS or YYYYMMDDTHH:MI:SS or
4323             YYYY-MM-DDTHH:MI:SS (in which T is litteral and HH is the 24-hour
4324             version of hours) or YYYYMMDD. Uses the current C if the
4325             second argument is missing.
4326              
4327             TODO: Formats with % as in C (C<%Y%m%d> and so on)
4328              
4329             B True or false. If true and first argument
4330             is eight digits: Its interpreted as a date like YYYYMMDD time string,
4331             not an epoch time. If true and first argument is six digits its
4332             interpreted as a date like DDMMYY (not YYMMDD!).
4333              
4334             B a date or clock string on the wanted form.
4335              
4336             B
4337              
4338             Prints C<< 3. july 1997 >> if thats the dato today:
4339              
4340             perl -MAcme::Tools -le 'print timestr("D. month YYYY")'
4341              
4342             print tms("HH24:MI"); # prints 23:55 if thats the time now
4343             tms("HH24:MI",time()); # ...same,since time() is the default
4344             tms("HH:MI",time()-5*60); # 23:50 if that was the time 5 minutes ago
4345             tms("HH:MI",time()-5*60*60); # 18:55 if thats the time 5 hours ago
4346             tms("Day Month Dth YYYY HH:MI"); # Saturday July 1st 2004 23:55 (big S, big J)
4347             tms("Day D. Month YYYY HH:MI"); # Saturday 8. July 2004 23:55 (big S, big J)
4348             tms("DAY D. MONTH YYYY HH:MI"); # SATURDAY 8. JULY 2004 23:55 (upper)
4349             tms("dy D. month YYYY HH:MI"); # sat 8. july 2004 23:55 (small s, small j)
4350             tms("Dy DD. MON YYYY HH12:MI am"); # Sat 08. JUL 2004 11:55 pm (HH12, am becomes pm if after 12)
4351             tms("DD-MON-YYYY"); # 03-MAY-2004 (mon, english)
4352              
4353             The following list of codes in the first argument will be replaced:
4354              
4355             YYYY Year, four digits
4356             YY Year, two digits, i.e. 04 instead of 2004
4357             yyyy Year, four digits, but nothing if its the current year
4358             YYYY|HH:MI Year if its another year than the current, a time in hours and minutes elsewise
4359             MM Month, two digits. I.e. 08 for August
4360             DD Day of month, two digits. I.e. 01 (not 1) for the first day in a month
4361             D Day of month, one digit. I.e. 1 (not 01)
4362             HH Hour. From 00 to 23.
4363             HH24 Same as HH.
4364             HH12 12 becomes 12 (never 00), 13 becomes 01, 14 02 and so on.
4365             Note: 00 after midnight becomes 12 (am). Tip: always include the code
4366             am in a format string that uses HH12.
4367             MI Minutt. Fra 00 til 59.
4368             SS Sekund. Fra 00 til 59.
4369             am Becomes am or pm
4370             pm Same
4371             AM Becomes AM or PM (upper case)
4372             PM Same
4373            
4374             Month The full name of the month in English from January to December
4375             MONTH Same in upper case (JANUARY)
4376             month Same in lower case (january)
4377             Mont Jan Feb Mars Apr May June July Aug Sep Oct Nov Dec
4378             Mont. Jan. Feb. Mars Apr. May June July Aug. Sep. Oct. Nov. Dec. (always four chars)
4379             Mon Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec (always three chars)
4380            
4381             Day The full name of the weekday. Sunday to Saturday
4382             Dy Three letters: Sun Mon Tue Wed Thu Fri Sat
4383             DAY Upper case
4384             DY Upper case
4385             Dth 1st 2nd 3rd 4th 5th ... 11th 12th ... 20th 21st 22nd 23rd 24th ... 30th 31st
4386            
4387             WW Week number of the year 01-53 according to the ISO8601-definition (which most countries uses)
4388             WWUS Week number of the year 01-53 according to the most used definition in the USA.
4389             Other definitions also exists.
4390              
4391             epoch Converts a time string from YYYYMMDD-HH24:MI:SS, YYYYMMDD-HH24:MI:SS, YYYYMMDDTHH:MI:SS,
4392             YYYY-MM-DDTHH:MI:SS or YYYYMMDD to the number of seconds since January 1st 1970.
4393             Commonly known as the Unix epoch.
4394            
4395             JDN Julian day number. Integer. The number of days since the day starting at noon on January 1 4713 BC
4396             JD Same as JDN but a float accounting for the time of day
4397            
4398             TODO: sub smt() (tms backward... or something better named, converts the other way)
4399             As to_date and to_char in Oracle. Se maybe L instead
4400              
4401             B (optional) Is_date. False|true, default false. If true, the second argument is
4402             interpreted as a date of the form YYYYMMDD, not as a number of seconds since epoch (January 1st 1970).
4403              
4404             =cut
4405              
4406             #Se også L og L
4407              
4408             our $Tms_pattern;
4409             our %Tms_str=
4410             ('MÅNED' => [4, 'JANUAR','FEBRUAR','MARS','APRIL','MAI','JUNI','JULI',
4411             'AUGUST','SEPTEMBER','OKTOBER','NOVEMBER','DESEMBER' ],
4412             'Måned' => [4, 'Januar','Februar','Mars','April','Mai','Juni','Juli',
4413             'August','September','Oktober','November','Desember'],
4414             'måned' => [4, 'januar','februar','mars','april','mai','juni','juli',
4415             'august','september','oktober','november','desember'],
4416             'MÅNE.' => [4, 'JAN.','FEB.','MARS','APR.','MAI','JUNI','JULI','AUG.','SEP.','OKT.','NOV.','DES.'],
4417             'Måne.' => [4, 'Jan.','Feb.','Mars','Apr.','Mai','Juni','Juli','Aug.','Sep.','Okt.','Nov.','Des.'],
4418             'måne.' => [4, 'jan.','feb.','mars','apr.','mai','juni','juli','aug.','sep.','okt.','nov.','des.'],
4419             'MÅNE' => [4, 'JAN','FEB','MARS','APR','MAI','JUNI','JULI','AUG','SEP','OKT','NOV','DES'],
4420             'Måne' => [4, 'Jan','Feb','Mars','Apr','Mai','Juni','Juli','Aug','Sep','Okt','Nov','Des'],
4421             'måne' => [4, 'jan','feb','mars','apr','mai','juni','juli','aug','sep','okt','nov','des'],
4422             'MÅN' => [4, 'JAN','FEB','MAR','APR','MAI','JUN','JUL','AUG','SEP','OKT','NOV','DES'],
4423             'Mån' => [4, 'Jan','Feb','Mar','Apr','Mai','Jun','Jul','Aug','Sep','Okt','Nov','Des'],
4424             'mån' => [4, 'jan','feb','mar','apr','mai','jun','jul','aug','sep','okt','nov','des'],
4425             'MONTH' => [4, 'JANUARY','FEBRUARY','MARCH','APRIL','MAY','JUNE','JULY',
4426             'AUGUST','SEPTEMBER','OCTOBER','NOVEMBER','DECEMBER'],
4427             'Month' => [4, 'January','February','March','April','May','June','July',
4428             'August','September','October','November','December'],
4429             'month' => [4, 'january','february','march','april','may','june','july',
4430             'august','september','october','november','december'],
4431             'MONT.' => [4, 'JAN.','FEB.','MAR.','APR.','MAY','JUNE','JULY','AUG.','SEP.','OCT.','NOV.','DEC.'],
4432             'Mont.' => [4, 'Jan.','Feb.','Mar.','Apr.','May','June','July','Aug.','Sep.','Oct.','Nov.','Dec.'],
4433             'mont.' => [4, 'jan.','feb.','mar.','apr.','may','june','july','aug.','sep.','oct.','nov.','dec.'],
4434             'MONT' => [4, 'JAN','FEB','MAR','APR','MAY','JUNE','JULY','AUG','SEP','OCT','NOV','DEC'],
4435             'Mont' => [4, 'Jan','Feb','Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec'],
4436             'mont' => [4, 'jan','feb','mar','apr','may','june','july','aug','sep','oct','nov','dec'],
4437             'MON' => [4, 'JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC'],
4438             'Mon' => [4, 'Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],
4439             'mon' => [4, 'jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'],
4440             'DAY' => [6, 'SUNDAY','MONDAY','TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY'],
4441             'Day' => [6, 'Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'],
4442             'day' => [6, 'sunday','monday','tuesday','wednesday','thursday','friday','saturday'],
4443             'DY' => [6, 'SUN','MON','TUE','WED','THU','FRI','SAT'],
4444             'Dy' => [6, 'Sun','Mon','Tue','Wed','Thu','Fri','Sat'],
4445             'dy' => [6, 'sun','mon','tue','wed','thu','fri','sat'],
4446             'DAG' => [6, 'SØNDAG','MANDAG','TIRSDAG','ONSDAG','TORSDAG','FREDAG','LØRDAG'],
4447             'Dag' => [6, 'Søndag','Mandag','Tirsdag','Onsdag','Torsdag','Fredag','Lørdag'],
4448             'dag' => [6, 'søndag','mandag','tirsdag','onsdag','torsdag','fredag','lørdag'],
4449             'DG' => [6, 'SØN','MAN','TIR','ONS','TOR','FRE','LØR'],
4450             'Dg' => [6, 'Søn','Man','Tir','Ons','Tor','Fre','Lør'],
4451             'dg' => [6, 'søn','man','tir','ons','tor','fre','lør'],
4452             );
4453             my $_tms_inited=0;
4454             sub tms_init {
4455 1 50   1 0 2 return if $_tms_inited++;
4456 1         3 for(qw(MAANED Maaned maaned MAAN Maan maan),'MAANE.','Maane.','maane.'){
4457 9         15 $Tms_str{$_}=$Tms_str{replace($_,"aa","å","AA","Å")};
4458             }
4459 45         40 $Tms_pattern=join("|",map{quotemeta($_)}
4460 1         12 sort{length($b)<=>length($a)}
  194         115  
4461             keys %Tms_str);
4462             #uten sort kan "måned" bli "mared", fordi "mån"=>"mar"
4463             }
4464              
4465       0 0   sub totime {
4466              
4467             }
4468              
4469             sub date_ok {
4470 0     0 0 0 my($y,$m,$d)=@_;
4471 0 0 0     0 return date_ok($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
4472 0 0       0 return 0 if $y!~/^\d\d\d\d$/;
4473 0 0 0     0 return 0 if $m<1||$m>12||$d<1||$d>(31,$y%4||$y%100==0&&$y%400?28:29,31,30,31,30,31,31,30,31,30,31)[$m-1];
    0 0        
      0        
      0        
4474 0         0 return 1;
4475             }
4476              
4477             sub weeknum {
4478 0 0   0 0 0 return weeknum(tms('YYYYMMDD')) if @_<1;
4479 0 0 0     0 return weeknum($1,$2,$3) if @_==1 and $_[0]=~/^(\d{4})(\d\d)(\d\d)$/;
4480 0         0 my($year,$month,$day)= @_;
4481 0         0 eval{
4482 0 0       0 if(@_<2){
    0          
4483 0 0       0 if($year=~/^\d{8}$/) { ($year,$month,$day)=unpack("A4A2A2",$year) }
  0 0       0  
4484 0         0 elsif($year>99999999){ ($year,$month,$day)=(localtime($year))[5,4,3]; $year+=1900; $month++ }
  0         0  
  0         0  
4485 0         0 else {die}
4486             }
4487 0         0 elsif(@_!=3){croak}
4488 0 0       0 croak if !date_ok(sprintf("%04d%02d%02d",$year,$month,$day));
4489             };
4490 0 0       0 croak "ERROR: Wrong args Acme::Tools::weeknum(".join(",",@_).")" if $@;
4491 33     33   15779 use integer;#heltallsdivisjon
  33         276  
  33         126  
4492 0         0 my $y=$year+4800-(14-$month)/12;
4493 0         0 my $j=$day+(153*($month+(14-$month)/12*12-3)+2)/5+365*$y+$y/4-$y/100+$y/400-32045;
4494 0         0 my $d=($j+31741-$j%7)%146097%36524%1461;
4495 0         0 return (($d-$d/1460)%365+$d/1460)/7+1;
4496             }
4497              
4498             #perl -MAcme::Tools -le 'print "$_ ".tms($_."0501","day",1) for 2015..2026'
4499              
4500             sub tms {
4501 42 50 66 42 1 5350 return undef if @_>1 and not defined $_[1]; #time=undef => undef
4502 42 100       63 if(@_==1){
4503 2         9 my @lt=localtime();
4504 2 50       5 $_[0] eq 'YYYY' and return 1900+$lt[5];
4505 2 50       4 $_[0] eq 'YYYYMMDD' and return sprintf("%04d%02d%02d",1900+$lt[5],1+$lt[4],$lt[3]);
4506 2 100 66     30 $_[0] =~ $Re_isnum and @lt=localtime($_[0]) and return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]);
4507             }
4508 41         44 my($format,$time,$is_date)=@_;
4509 41 100       60 $time=time_fp() if !defined$time;
4510 41 100 100     245 ($time,$format)=($format,$time) if @_>=2 and $format=~/^[\d+\:\-\.]+$/; #swap /hm/
4511 41         193 my @lt=localtime($time);
4512             #todo? $is_date=0 if $time=~s/^\@(\-?\d)/$1/; #@n where n is sec since epoch makes it clear that its not a formatted, as in `date`
4513             #todo? date --date='TZ="America/Los_Angeles" 09:00 next Fri' #`info date`
4514             # Fri Nov 13 18:00:00 CET 2015
4515             #date --date="next Friday" #--date or -d
4516             #date --date="last friday"
4517             #date --date="2 days ago"
4518             #date --date="yesterday" #or tomorrow
4519             #date --date="-1 day" #date --date='10 week'
4520              
4521 41 50       57 if( $is_date ){
4522 0 0   0   0 my $yy2c=sub{10+$_[0]>$lt[5]%100?"20":"19"}; #hm 10+
  0         0  
4523 0 0       0 $time=totime(&$yy2c($1)."$1$2$3")."000000" if $time=~/^(\d\d)(\d\d)(\d\d)$/;
4524 0 0       0 $time=totime("$1$2${3}000000") if $time=~/^((?:18|19|20)\d\d)(\d\d)(\d\d)$/; #hm 18-20?
4525             }
4526             else {
4527 41 50       90 $time = yyyymmddhh24miss_time("$1$2$3$4$5$6") #yyyymmddhh24miss_time ???
4528             if $time=~/^((?:19|20|18)\d\d) #yyyy
4529             (0[1-9]|1[012]) #mm
4530             (0[1-9]|[12]\d|3[01]) \-? #dd
4531             ([01]\d|2[0-3]) \:? #hh24
4532             ([0-5]\d) \:? #mi
4533             ([0-5]\d) $/x; #ss
4534             }
4535 41 100       65 tms_init() if !$_tms_inited;
4536 41 100       60 return sprintf("%04d%02d%02d-%02d:%02d:%02d",1900+$lt[5],1+$lt[4],@lt[3,2,1,0]) if !$format;
4537 40         349 my %p=('%'=>'%',
4538             a=>'Dy',
4539             A=>'Day',
4540             b=>'Mon',
4541             b=>'Month',
4542             c=>'Dy Mon D HH:MI:SS YYYY',
4543             C=>'CC',
4544             d=>'DD',
4545             D=>'MM/DD/YY',
4546             e=>'D',
4547             F=>'YYYY-MM-DD',
4548             #G=>'',
4549             h=>'Month', H=>'HH24', I=>'HH12',
4550             j=>'DoY', #day of year
4551             k=>'H24', _H=>'H24',
4552             l=>'H12', _I=>'H12',
4553             m=>'MM', M=>'MI',
4554             n=>"\n",
4555             #N=>'NS', #sprintf%09d,1e9*(time_fp()-time()) #000000000..999999999
4556             p=>'AM', #AM|PM upper (yes, opposite: date +%H%M%S%P%p)
4557             P=>'am', #am|pm lower
4558             S=>'SS',
4559             t=>"\t",
4560             T=>'HH24:MI:SS',
4561             u=>'DoW', #day of week 1..7, 1=mon 7=sun
4562             w=>'DoW0', #day of week 0..6, 1=mon 0=sun
4563             #U=>'WoYs', #week num of year 00..53, sunday as first day of week
4564             #V=>'UKE', #ISO week num of year 01..53, monday as first day of week
4565             #W=>'WoYm', #week num of year 00..53, monday as first day of week, not ISO!
4566             #x=>$ENV{locale's date representation}, #e.g. MM/DD/YY
4567             #X=>$ENV{locale's time representation}, #e.g. HH/MI/SS
4568             y=>'YY',
4569             Y=>'YYYY',
4570             #z=>'TZHHMI', #time zone hour minute e.g. -0430
4571             #':z'=>'TZHH:MI',
4572             #'::z'=>'TZHH:MI:SS',
4573             #':::z'=>'TZ', #number of :'s necessary precision, e.g. -02 or +03:30
4574             #Z=>'TZN', #e.g. CET, EDT, ...
4575             );
4576 40         178 my $pkeys=join"|",keys%p;
4577 40         1218 $format=~s,\%($pkeys),$p{$1},g;
4578 40         274 $format=~s/($Tms_pattern)/$Tms_str{$1}[1+$lt[$Tms_str{$1}[0]]]/g;
4579 40         51 $format=~s/YYYY / 1900+$lt[5] /gxe;
  13         26  
4580 40 0       31 $format=~s/(\s?)yyyy / $lt[5]==(localtime)[5]?"":$1.(1900+$lt[5])/gxe;
  0         0  
4581 40         39 $format=~s/YY / sprintf("%02d",$lt[5]%100) /gxei;
  1         5  
4582 40         37 $format=~s|CC | sprintf("%02d",(1900+$lt[5])/100) |gxei;
  2         10  
4583 40         32 $format=~s/MM / sprintf("%02d",$lt[4]+1) /gxe;
  5         15  
4584 40         27 $format=~s/mm / sprintf("%d",$lt[4]+1) /gxe;
  0         0  
4585 40         36 $format=~s,M/ , ($lt[4]+1).'/' ,gxe;
  2         5  
4586 40         26 $format=~s,/M , '/'.($lt[4]+1) ,gxe;
  1         3  
4587 40         28 $format=~s/DD / sprintf("%02d",$lt[3]) /gxe;
  6         15  
4588 40         67 $format=~s/d0w|dow0 / $lt[6] /gxei;
  4         7  
4589 40 100       35 $format=~s/dow / $lt[6]?$lt[6]:7 /gxei;
  2         7  
4590 40         42 $format=~s/d0y|doy0 / $lt[7] /gxei; #0-364 (365 leap)
  2         5  
4591 40         60 $format=~s/doy / $lt[7]+1 /gxei; #1-365 (366 leap)
  1         3  
4592 40         38 $format=~s/D(?![AaGgYyEeNn]) / $lt[3] /gxe; #EN pga desember og wednesday
  5         7  
4593 40         26 $format=~s/dd / sprintf("%d",$lt[3]) /gxe;
  0         0  
4594 40 100 50     59 $format=~s/hh12|HH12 / sprintf("%02d",$lt[2]<13?$lt[2]||12:$lt[2]-12)/gxe;
  8         40  
4595 40         48 $format=~s/HH24|HH24|HH|hh / sprintf("%02d",$lt[2]) /gxe;
  4         15  
4596 40         42 $format=~s/MI / sprintf("%02d",$lt[1]) /gxei;
  12         24  
4597 40   100     38 $format=~s{SS\.([1-9]) }{ sprintf("%0*.$1f",3+$1,$lt[0]+(repl($time,qr/^[^\.]+/)||0)) }gxei;
  4         22  
4598 40         41 $format=~s/SS(?:\.0)? / sprintf("%02d",$lt[0]) /gxei;
  5         12  
4599 40 100       54 $format=~s/(?:am|pm|apm|xm) / $lt[2]<13 ? 'am' : 'pm' /gxe;
  4         6  
4600 40 100       40 $format=~s/(?:AM|PM|APM|XM) / $lt[2]<13 ? 'AM' : 'PM' /gxe;
  4         9  
4601 40         44 $format=~s/WWI|WW / sprintf("%02d",weeknum($time)) /gxei;
  0         0  
4602 40         31 $format=~s/W / weeknum($time) /gxei;
  0         0  
4603 40         203 $format;
4604             }
4605              
4606             =head2 easter
4607              
4608             Input: A year (a four digit number)
4609              
4610             Output: array of two numbers: day and month of Easter Sunday that year. Month 3 means March and 4 means April.
4611              
4612             sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
4613             (($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
4614              
4615             ...is a "golfed" version of Oudins algorithm (1940) L
4616             (see also http://www.smart.net/~mmontes/ec-cal.html )
4617              
4618             Valid for any Gregorian year. Dates repeat themselves after 70499183
4619             lunations = 2081882250 days = ca 5699845 years. However, our planet will
4620             by then have a different rotation and spin time...
4621              
4622             Example:
4623              
4624             ( $day, $month ) = easter( 2012 ); # $day == 8 and $month == 4
4625              
4626             Example 2:
4627              
4628             my @e=map sprintf("%02d%02d", reverse(easter($_))), 1800..300000;
4629             print "First: ".min(@e)." Last: ".max(@e)."\n"; # First: 0322 Last: 0425
4630              
4631             Note: The Spencer Jones formula differs Oudins used in C in some years
4632             before 1498. However, in that period the Julian calendar with a different formula was
4633             used anyway. Countries introduced the current Gregorian calendar between 1583 and 1926.
4634              
4635             =cut
4636              
4637 33     33 1 42964 sub easter { use integer;my$Y=shift;my$C=$Y/100;my$L=($C-$C/4-($C-($C-17)/25)/3+$Y%19*19+15)%30;
  33     13503   42  
  33         100  
  13503         84407  
  13503         8503  
  13503         12817  
4638 13503 100 66     38672 (($L-=$L>28||($L>27?1-(21-$Y%19)/11:0))-=($Y+$Y/4+$L+2-$C+$C/4)%7)<4?($L+28,3):($L-3,4) }
4639              
4640              
4641             =head2 time_fp
4642              
4643             No input arguments.
4644              
4645             Return the same number as perls C except with decimals (fractions of a second, _fp as in floating point number).
4646              
4647             print time_fp(),"\n";
4648             print time(),"\n";
4649              
4650             Could write:
4651              
4652             1116776232.38632
4653              
4654             ...if that is the time now.
4655              
4656             Or just:
4657              
4658             1116776232
4659              
4660             ...from perl's internal C if C isn't installed and available.
4661              
4662              
4663             =cut
4664              
4665             sub time_fp { # {return 0+gettimeofday} is just as well?
4666 14069 50   14069 1 12371 eval{ require Time::HiRes } or return time();
  14069         42604  
4667 14069         28925 my($sec,$mic)=Time::HiRes::gettimeofday();
4668 14069         30858 return $sec+$mic/1e6; #1e6 not portable?
4669             }
4670              
4671             sub timems {
4672 0 0   0 0 0 eval{ require Time::HiRes } or return time();
  0         0  
4673 0         0 my($sec,$mic)=Time::HiRes::gettimeofday();
4674 0         0 return $sec*1000+$mic/1e3;
4675             }
4676              
4677             =head2 sleep_fp
4678              
4679             sleep_fp() work as the built in C<< sleep() >> but also accepts fractional seconds:
4680              
4681             sleep_fp(0.020); # sleeps for 20 milliseconds
4682              
4683             Sub sleep_fp do a C, thus it might take some
4684             extra time the first call. To avoid that, add C<< use Time::HiRes >>
4685             to your code. Sleep_fp should not be trusted for accuracy to more than
4686             a tenth of a second. Virtual machines tend to be less accurate (sleep
4687             longer) than physical ones. This was tested on VMware and RHEL
4688             (Linux). See also L.
4689              
4690             =head2 sleeps
4691              
4692             =head2 sleepms
4693              
4694             =head2 sleepus
4695              
4696             =head2 sleepns
4697              
4698             sleep_fp(0.020); #sleeps for 20 milliseconds
4699             sleeps(0.020); #sleeps for 20 milliseconds, sleeps() is a synonym to sleep_fp()
4700             sleepms(20); #sleeps for 20 milliseconds
4701             sleepus(20000); #sleeps for 20000 microseconds = 20 milliseconds
4702             sleepns(20000000); #sleeps for 20 million nanoseconds = 20 milliseconds
4703              
4704             =cut
4705              
4706 5 50   5 1 19 sub sleep_fp { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
  5         33  
  5         170544  
4707 1 50   1 1 7 sub sleeps { eval{require Time::HiRes} or (sleep(shift()),return);Time::HiRes::sleep(shift()) }
  1         13  
  1         10106  
4708 1 50   1 1 8 sub sleepms { eval{require Time::HiRes} or (sleep(shift()/1e3),return);Time::HiRes::sleep(shift()/1e3) }
  1         17  
  1         10117  
4709 1 50   1 1 10 sub sleepus { eval{require Time::HiRes} or (sleep(shift()/1e6),return);Time::HiRes::sleep(shift()/1e6) }
  1         22  
  1         10111  
4710 1 50   1 1 12 sub sleepns { eval{require Time::HiRes} or (sleep(shift()/1e9),return);Time::HiRes::sleep(shift()/1e9) }
  1         17  
  1         10181  
4711              
4712             =head2 eta
4713              
4714             Estimated time of arrival (ETA).
4715              
4716             for(@files){
4717             ...do work on file...
4718             my $eta = eta( ++$i, 0+@files ); # file now, number of files
4719             print "" . localtime($eta);
4720             }
4721              
4722             ..DOC MISSING..
4723              
4724             =head2 etahhmm
4725              
4726             ...NOT YET
4727              
4728             =cut
4729              
4730             #http://en.wikipedia.org/wiki/Kalman_filter god idé?
4731             our %Eta;
4732             our $Eta_forgetfulness=2;
4733             sub eta {
4734 2012 100   2012 1 9355 my($id,$pos,$end,$time_fp)=( @_==2 ? (join(";",caller()),@_) : @_ );
4735 2012   66     2639 $time_fp||=time_fp();
4736 2012   100     3015 my $a=$Eta{$id}||=[];
4737 2012         2782 push @$a, [$pos,$time_fp];
4738 2012 100       3759 @$a=@$a[map$_*2,0..@$a/2] if @$a>40; #hm 40
4739 2012 100 100     6182 splice(@$a,-2,1) if @$a>1 and $$a[-2][0]==$$a[-1][0]; #same pos as last
4740 2012 100       2426 return undef if @$a<2;
4741 2001         1288 my @eta;
4742 2001         2125 for(2..@$a){
4743 58602         95585 push @eta, $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-$_][1])/($$a[-1][0]-$$a[-$_][0]);
4744             }
4745 2001         1785 my($sum,$sumw,$w)=(0,0,1);
4746 2001         1748 for(@eta){
4747 58602         36319 $sum+=$w*$_;
4748 58602         34498 $sumw+=$w;
4749 58602         40931 $w/=$Eta_forgetfulness;
4750             }
4751 2001         1570 my $avg=$sum/$sumw;
4752 2001         4295 return $avg;
4753             # return avg(@eta);
4754             #return $$a[-1][1] + ($end-$$a[-1][0]) * ($$a[-1][1]-$$a[-2][1])/($$a[-1][0]-$$a[-2][0]);
4755 0         0 1;
4756             }
4757              
4758             =head2 sleep_until
4759              
4760             sleep_until(0.5) sleeps until half a second has passed since the last
4761             call to sleep_until. This example starts the next job excactly ten
4762             seconds after the last job started even if the last job lasted for a
4763             while (but not more than ten seconds):
4764              
4765             for(@jobs){
4766             sleep_until(10);
4767             print localtime()."\n";
4768             ...heavy job....
4769             }
4770              
4771             Might print:
4772              
4773             Thu Jan 12 16:00:00 2012
4774             Thu Jan 12 16:00:10 2012
4775             Thu Jan 12 16:00:20 2012
4776              
4777             ...and so on even if the C<< ...heavy job... >>-part takes more than a
4778             second to complete. Whereas if sleep(10) was used, each job would
4779             spend more than ten seconds in average since the work time would be
4780             added to sleep(10).
4781              
4782             Note: sleep_until() will remember the time of ANY last call of this sub,
4783             not just the one on the same line in the source code (this might change
4784             in the future). The first call to sleep_until() will be the same as
4785             sleep_fp() or Perl's own sleep() if the argument is an integer.
4786              
4787             =cut
4788              
4789             our $Time_last_sleep_until;
4790             sub sleep_until {
4791 0 0   0 1 0 my $s=@_==1?shift():0;
4792 0         0 my $time=time_fp();
4793 0         0 my $sleep=$s-($time-nvl($Time_last_sleep_until,0));
4794 0         0 $Time_last_sleep_until=time;
4795 0 0       0 sleep_fp($sleep) if $sleep>0;
4796             }
4797              
4798             my %thr;
4799             sub throttle {
4800 0     0 0 0 my($times,$mintime,$what)=@_;
4801 0   0     0 $what||=join(":",@{[caller(1)]}[3,2]);
  0         0  
4802 0   0     0 $thr{$what}||=[];
4803 0         0 my $thr=$thr{$what};
4804 0         0 push @$thr,time_fp();
4805 0 0       0 return if @$thr<$times;
4806 0         0 my $since=$$thr[-1]-shift(@$thr);
4807 0 0       0 my $sleep=$since<$mintime?$mintime-$since:0;
4808 0         0 sleep_fp($sleep);
4809 0         0 return $sleep;
4810             }
4811              
4812             =head2 leapyear
4813              
4814             B A year. A four digit number.
4815              
4816             B True (1) or false (0) of whether the year is a leap year or
4817             not. (Uses current calendar even for periods before leapyears was used).
4818              
4819             print join(", ",grep leapyear($_), 1900..2014)."\n";
4820              
4821             1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 1948, 1952, 1956,
4822             1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012
4823              
4824             Note: 1900 is not a leap year, but 2000 is. Years divided by 100 is a leap year only
4825             if it can be divided by 400.
4826              
4827             =cut
4828              
4829 0 0   0 1 0 sub leapyear{$_[0]%400?$_[0]%100?$_[0]%4?0:1:0:1} #bool
    0          
    0          
4830              
4831             #http://rosettacode.org/wiki/Levenshtein_distance#Perl
4832             our %ldist_cache;
4833             sub ldist {
4834 0     0 0 0 my($s,$t,$l) = @_;
4835 0 0       0 return length($t) if !$s;
4836 0 0       0 return length($s) if !$t;
4837 0 0 0     0 %ldist_cache=() if !$l and 1000<0+%ldist_cache;
4838             $ldist_cache{$s,$t} ||=
4839 0   0     0 do {
4840 0         0 my($s1,$t1) = ( substr($s,1), substr($t,1) );
4841 0 0       0 substr($s,0,1) eq substr($t,0,1)
4842             ? ldist($s1,$t1)
4843             : 1 + min( ldist($s1,$t1,1+$l), ldist($s,$t1,1+$l), ldist($s1,$t,1+$l) );
4844             };
4845             }
4846              
4847             =head1 OTHER
4848              
4849             =head2 nvl
4850              
4851             The I function (or I function)
4852              
4853             C takes two or more arguments. (Oracles nvl-function take just two)
4854              
4855             Returns the value of the first input argument with length() > 0.
4856              
4857             Return I if there is no such input argument.
4858              
4859             In perl 5.10 and perl 6 this will most often be easier with the C< //
4860             > operator, although C and C<< // >> treats empty strings C<"">
4861             differently. Sub nvl here considers empty strings and undef the same.
4862              
4863             =cut
4864              
4865             sub nvl {
4866 38 100 66 38 1 191 return $_[0] if defined $_[0] and length($_[0]) or @_==1;
      100        
4867 22 100       53 return $_[1] if @_==2;
4868 9 100       27 return nvl(@_[1..$#_]) if @_>2;
4869 1         3 return undef;
4870             }
4871              
4872             =head2 decode_num
4873              
4874             See L.
4875              
4876             =head2 decode
4877              
4878             C and C works just as Oracles C.
4879              
4880             C and C accordingly uses perl operators C and C<==> for comparison.
4881              
4882             Examples:
4883              
4884             my $a=123;
4885             print decode($a, 123,3, 214,4, $a); # prints 3
4886             print decode($a, 123=>3, 214=>4, $a); # prints 3, same thing since => is synonymous to comma in Perl
4887              
4888             The first argument is tested against the second, fourth, sixth and so on,
4889             and then the third, fifth, seventh and so on is
4890             returned if decode() finds an equal string or number.
4891              
4892             In the above example: 123 maps to 3, 124 maps to 4 and the last argument $a is returned elsewise.
4893              
4894             More examples:
4895              
4896             my $a=123;
4897             print decode($a, 123=>3, 214=>7, $a); # also 3, note that => is synonym for , (comma) in perl
4898             print decode($a, 122=>3, 214=>7, $a); # prints 123
4899             print decode($a, 123.0 =>3, 214=>7); # prints 3
4900             print decode($a, '123.0'=>3, 214=>7); # prints nothing (undef), no last argument default value here
4901             print decode_num($a, 121=>3, 221=>7, '123.0','b'); # prints b
4902              
4903             Sort of:
4904              
4905             decode($string, %conversion, $default);
4906              
4907             The last argument is returned as a default if none of the keys in the keys/value-pairs matched.
4908              
4909             A more perl-ish and often faster way of doing the same:
4910              
4911             {123=>3, 214=>7}->{$a} || $a # (beware of 0)
4912              
4913             =cut
4914              
4915             sub decode {
4916 4 50   4 1 10 croak "Must have a mimimum of two arguments" if @_<2;
4917 4         4 my $uttrykk=shift;
4918 4 50 100     6 if(defined$uttrykk){ shift eq $uttrykk and return shift or shift for 1..@_/2 }
  4   33     30  
4919 0   0     0 else { !defined shift and return shift or shift for 1..@_/2 }
      0        
4920 2         8 return shift;
4921             }
4922              
4923             sub decode_num {
4924 1 50   1 1 8 croak "Must have a mimimum of two arguments" if @_<2;
4925 1         2 my $uttrykk=shift;
4926 1 50 100     3 if(defined$uttrykk){ shift == $uttrykk and return shift or shift for 1..@_/2 }
  1   33     12  
4927 0   0     0 else { !defined shift and return shift or shift for 1..@_/2 }
      0        
4928 0         0 return shift;
4929             }
4930              
4931             =head2 qrlist
4932              
4933             Input: An array of values to be used to test againts for existence.
4934              
4935             Output: A reference to a regular expression. That is a C
4936              
4937             The regex sets $1 if it match.
4938              
4939             Example:
4940              
4941             my @list=qw/ABc XY DEF DEFG XYZ/;
4942             my $filter=qrlist("ABC","DEF","XY."); # makes a regex of it qr/^(\QABC\E|\QDEF\E|\QXY.\E)$/
4943             my @filtered= grep { $_ =~ $filter } @list; # returns DEF and XYZ, but not XYZ because the . char is taken literally
4944              
4945             Note: Filtering with hash lookups are WAY faster.
4946              
4947             Source:
4948              
4949             sub qrlist (@) { my $str=join"|",map quotemeta, @_; qr/^($str)$/ }
4950              
4951             =cut
4952              
4953             sub qrlist (@) {
4954 0     0 1 0 my $str=join"|",map quotemeta,@_;
4955 0         0 return qr/^($str)$/;
4956             }
4957              
4958             =head2 ansicolor
4959              
4960             Perhaps easier to use than L ?
4961              
4962             B One argument. A string where the char C<¤> have special
4963             meaning and is replaced by color codings depending on the letter
4964             following the C<¤>.
4965              
4966             B The same string, but with C<¤letter> replaced by ANSI color
4967             codes respected by many types terminal windows. (xterm, telnet, ssh,
4968             telnet, rlog, vt100, cygwin, rxvt and such...).
4969              
4970             B
4971              
4972             ¤r red
4973             ¤g green
4974             ¤b blue
4975             ¤y yellow
4976             ¤m magenta
4977             ¤B bold
4978             ¤u underline
4979             ¤c clear
4980             ¤¤ reset, quits and returns to default text color.
4981              
4982             B
4983              
4984             print ansicolor("This is maybe ¤ggreen¤¤?");
4985              
4986             Prints I where the word I is shown in green.
4987              
4988             If L is not installed or not found, returns the input
4989             string with every C<¤> including the following code letters
4990             removed. (That is: ansicolor is safe to use even if Term::ANSIColor is
4991             not installed, you just don't get the colors).
4992              
4993             See also L.
4994              
4995             =cut
4996              
4997             sub ansicolor {
4998 0     0 1 0 my $txt=shift;
4999 0 0       0 eval{require Term::ANSIColor} or return replace($txt,qr/¤./);
  0         0  
5000 0         0 my %h=qw/r red g green b blue y yellow m magenta B bold u underline c clear ¤ reset/;
5001 0         0 my $re=join"|",keys%h;
5002 0         0 $txt=~s/¤($re)/Term::ANSIColor::color($h{$1})/ge;
  0         0  
5003 0         0 return $txt;
5004             }
5005              
5006             =head2 ccn_ok
5007              
5008             Checks if a Credit Card number (CCN) has correct control digits according to the LUHN-algorithm from 1960.
5009             This method of control digits is used by MasterCard, Visa, American Express,
5010             Discover, Diners Club / Carte Blanche, JCB and others.
5011              
5012             B
5013              
5014             A credit card number. Can contain non-digits, but they are removed internally before checking.
5015              
5016             B
5017              
5018             Something true or false.
5019              
5020             Or more accurately:
5021              
5022             Returns C (false) if the input argument is missing digits.
5023              
5024             Returns 0 (zero, which is false) is the digits is not correct according to the LUHN algorithm.
5025              
5026             Returns 1 or the name of a credit card company (true either way) if the last digit is an ok control digit for this ccn.
5027              
5028             The name of the credit card company is returned like this (without the C<'> character)
5029              
5030             Returns (wo '') Starts on Number of digits
5031             ------------------------------ ------------------------ ----------------
5032             'MasterCard' 51-55 16
5033             'Visa' 4 13 eller 16
5034             'American Express' 34 eller 37 15
5035             'Discover' 6011 16
5036             'Diners Club / Carte Blanche' 300-305, 36 eller 38 14
5037             'JCB' 3 16
5038             'JCB' 2131 eller 1800 15
5039              
5040             And should perhaps have had:
5041              
5042             'enRoute' 2014 eller 2149 15
5043              
5044             ...but that card uses either another control algorithm or no control
5045             digits at all. So C is never returned here.
5046              
5047             If the control digits is valid, but the input does not match anything in the column C, 1 is returned.
5048              
5049             (This is also the same control digit mechanism used in Norwegian KID numbers on payment bills)
5050              
5051             The first digit in a credit card number is supposed to tell what "industry" the card is meant for:
5052              
5053             MII Digit Value Issuer Category
5054             --------------------------- ----------------------------------------------------
5055             0 ISO/TC 68 and other industry assignments
5056             1 Airlines
5057             2 Airlines and other industry assignments
5058             3 Travel and entertainment
5059             4 Banking and financial
5060             5 Banking and financial
5061             6 Merchandizing and banking
5062             7 Petroleum
5063             8 Telecommunications and other industry assignments
5064             9 National assignment
5065              
5066             ...although this has no meaning to C.
5067              
5068             The first six digits is I, that is the bank
5069             (probably). The rest in the "account number", except the last digits,
5070             which is the control digit. Max length on credit card numbers are 19
5071             digits.
5072              
5073             =cut
5074              
5075             sub ccn_ok {
5076 0     0 1 0 my $ccn=shift(); #credit card number
5077 0         0 $ccn=~s/\D+//g;
5078 0 0       0 if(KID_ok($ccn)){
5079 0 0       0 return "MasterCard" if $ccn=~/^5[1-5]\d{14}$/;
5080 0 0       0 return "Visa" if $ccn=~/^4\d{12}(?:\d{3})?$/;
5081 0 0       0 return "American Express" if $ccn=~/^3[47]\d{13}$/;
5082 0 0       0 return "Discover" if $ccn=~/^6011\d{12}$/;
5083 0 0       0 return "Diners Club / Carte Blanche" if $ccn=~/^3(?:0[0-5]\d{11}|[68]\d{12})$/;
5084 0 0       0 return "JCB" if $ccn=~/^(?:3\d{15}|(?:2131|1800)\d{11})$/;
5085 0         0 return 1;
5086             }
5087             #return "enRoute" if $ccn=~/^(?:2014|2149)\d{11}$/; #ikke LUHN-krav?
5088 0         0 return 0;
5089             }
5090              
5091             =head2 KID_ok
5092              
5093             Checks if a norwegian KID number has an ok control digit.
5094              
5095             To check if a customer has typed the number correctly.
5096              
5097             This uses the LUHN algorithm (also known as mod-10) from 1960 which is also used
5098             internationally in control digits for credit card numbers, and Canadian social security ID numbers as well.
5099              
5100             The algorithm, as described in Phrack (47-8) (a long time hacker online publication):
5101              
5102             "For a card with an even number of digits, double every odd numbered
5103             digit and subtract 9 if the product is greater than 9. Add up all the
5104             even digits as well as the doubled-odd digits, and the result must be
5105             a multiple of 10 or it's not a valid card. If the card has an odd
5106             number of digits, perform the same addition doubling the even numbered
5107             digits instead."
5108              
5109             B A KID-nummer. Must consist of digits 0-9 only, otherwise a die (croak) happens.
5110              
5111             B
5112              
5113             - Returns undef if the input argument is missing.
5114              
5115             - Returns 0 if the control digit (the last digit) does not satify the LUHN/mod-10 algorithm.
5116              
5117             - Returns 1 if ok
5118              
5119             B L
5120              
5121             =cut
5122              
5123             sub KID_ok {
5124 0 0   0 1 0 croak "Non-numeric argument" if $_[0]=~/\D/;
5125 0 0       0 my @k=split//,shift or return undef;
5126 0         0 my $s;$s+=pop(@k)+[qw/0 2 4 6 8 1 3 5 7 9/]->[pop@k] while @k;
  0         0  
5127 0 0       0 $s%10==0?1:0;
5128             }
5129              
5130              
5131              
5132             =head2 range
5133              
5134             B
5135              
5136             One or more numeric arguments:
5137              
5138             First: x (first returned element)
5139              
5140             Second: y (last but not including)
5141              
5142             Third: step, default 1. The step between each returned element
5143              
5144             If a fourth, fifth and so on arguments are given, they change the step for each returned element. As first derivative, second derivative.
5145              
5146             B
5147              
5148             If one argument: returns the array C<(0 .. x-1)>
5149              
5150             If two arguments: returns the array C<(x .. y-1)>
5151              
5152             If three arguments: The default step is 1. Use a third argument to use a different step.
5153              
5154             B
5155              
5156             print join ",", range(11); # prints 0,1,2,3,4,5,6,7,8,9,10 (but not 11)
5157             print join ",", range(2,11); # 2,3,4,5,6,7,8,9,10 (but not 11)
5158             print join ",", range(11,2,-1); # 11,10,9,8,7,6,5,4,3
5159             print join ",", range(2,11,3); # 2,5,8
5160             print join ",", range(11,2,-3); # 11,8,5
5161             print join ",", range(11,2,+3); # prints nothing
5162              
5163             print join ", ",range(2,11,1,0.1); # 2, 3, 4.1, 5.3, 6.6, 8, 9.5 adds 0.1 to step each time
5164             print join ", ",range(2,11,1,0.1,-0.01); # 2, 3, 4.1, 5.29, 6.56, 7.9, 9.3, 10.75
5165              
5166             Note: In the Python language and others, C is a build in iterator (a
5167             generator), not an array. This saves memory for large sets and sometimes time.
5168             Use C in L to get a similar lazy generator in Perl.
5169              
5170             =cut
5171              
5172             sub range {
5173 8 100   8 1 28 return _range_accellerated(@_) if @_>3; #se under
5174 6         7 my($x,$y,$jump)=@_;
5175 6 100       51 return ( 0 .. $x-1 ) if @_==1;
5176 5 100       18 return ( $x .. $y-1 ) if @_==2;
5177 4 50 33     20 croak "Wrong number of arguments or jump==0" if @_!=3 or $jump==0;
5178 4         3 my @r;
5179 4 100       9 if($jump>0){ while($x<$y){ push @r, $x; $x+=$jump } }
  2         6  
  772         435  
  772         795  
5180 2         4 else { while($x>$y){ push @r, $x; $x+=$jump } }
  12         10  
  12         13  
5181 4         38 return @r;
5182             }
5183              
5184             #jumps derivative, double der., trippled der usw
5185             sub _range_accellerated {
5186 2     2   3 my($x,$y,@jump)=@_;
5187 2         2 my @r;
5188 2 50   17   9 my $test = $jump[0]>=0 ? sub{$x<$y} : sub{$x>$y};
  17         25  
  0         0  
5189 2         3 while(&$test()){
5190 15         14 push @r, $x;
5191 15         10 $x+=$jump[0];
5192 15         28 $jump[$_-1]+=$jump[$_] for 1..$#jump;
5193             }
5194 2         13 return @r;
5195             }
5196              
5197             =head2 globr
5198              
5199             Works like and uses Perls builtin C<< glob() >> function but adds support for ranges
5200             with C<< {from..to} >> and C<< {from..to..step} >>. Like brace expansion in bash.
5201              
5202             Examples:
5203              
5204             my @arr = glob "X{a,b,c,d}Z"; # return four element array: XaZ XbZ XcZ XdZ
5205             my @arr = globr "X{a,b,c,d}Z"; # same as above
5206             my @arr = globr "X{a..d}Z"; # same as above
5207             my @arr = globr "X{a..d..2}Z"; # step 2, returns array: XaZ XcZ
5208             my @arr = globr "X{aa..bz..13}Z"; # XaaZ XanZ XbaZ XbnZ
5209             my @arr = globr "{1..12}b"; # 1b 2b 3b 4b 5b 6b 7b 8b 9b 10b 11b 12b
5210             my @arr = globr "{01..12}b"; # 01b 02b 03b 04b 05b 06b 07b 08b 09b 10b 11b 12b
5211             my @arr = globr "{01..12..3}b"; # 01b 04b 07b 10b
5212              
5213             =cut
5214              
5215             sub globr($) {
5216 8     8 1 9 my $p=shift;
5217 8 100       64 $p=~s/\{(\w+)\.\.(\w+)(\.\.(\d+))?\}/my$i=0;"{".join(",",grep{$4?!($i++%$4):1}$1..$2)."}";/eg;
  7         5  
  7         44  
  133         196  
5218 8         709 glob $p;
5219             }
5220              
5221             =head2 permutations
5222              
5223             How many ways (permutations) can six people be placed around a table:
5224              
5225             If one person: one
5226             If two persons: two (they can swap places)
5227             If three persons: six
5228             If four persons: 24
5229             If five persons: 120
5230             If six persons: 720
5231              
5232             The formula is C where the postfix unary operator C, also known as I is defined like:
5233             C. Example: C<5! = 5 * 4 * 3 * 2 * 1 = 120>.Run this to see the 100 first C<< n! >>
5234              
5235             perl -MAcme::Tools -le'$i=big(1);print "$_!=",$i*=$_ for 1..100'
5236              
5237             1! = 1
5238             2! = 2
5239             3! = 6
5240             4! = 24
5241             5! = 120
5242             6! = 720
5243             7! = 5040
5244             8! = 40320
5245             9! = 362880
5246             10! = 3628800
5247             .
5248             .
5249             .
5250             100! = 93326215443944152681699238856266700490715968264381621468592963895217599993229915608941463976156518286253697920827223758251185210916864000000000000000000000000
5251              
5252             C takes a list and return a list of arrayrefs for each
5253             of the permutations of the input list:
5254              
5255             permutations('a','b'); #returns (['a','b'],['b','a'])
5256              
5257             permutations('a','b','c'); #returns (['a','b','c'],['a','c','b'],
5258             # ['b','a','c'],['b','c','a'],
5259             # ['c','a','b'],['c','b','a'])
5260              
5261             Up to five input arguments C is probably as fast as it
5262             can be in this pure perl implementation (see source). For more than
5263             five, it could be faster. How fast is it now: Running with different
5264             n, this many time took that many seconds:
5265              
5266             n times seconds
5267             -- ------- ---------
5268             2 100000 0.32
5269             3 10000 0.09
5270             4 10000 0.33
5271             5 1000 0.18
5272             6 100 0.27
5273             7 10 0.21
5274             8 1 0.17
5275             9 1 1.63
5276             10 1 17.00
5277              
5278             If the first argument is a coderef, that sub will be called for each permutation and the return from those calls with be the real return from C. For example this:
5279              
5280             print for permutations(sub{join"",@_},1..3);
5281              
5282             ...will print the same as:
5283              
5284             print for map join("",@$_), permutations(1..3);
5285              
5286             ...but the first of those two uses less RAM if 3 has been say 9.
5287             Changing 3 with 10, and many computers hasn't enough memory
5288             for the latter.
5289              
5290             The examples prints:
5291              
5292             123
5293             132
5294             213
5295             231
5296             312
5297             321
5298              
5299             If you just want to say calculate something on each permutation,
5300             but is not interested in the list of them, you just don't
5301             take the return. That is:
5302              
5303             my $ant;
5304             permutations(sub{$ant++ if $_[-1]>=$_[0]*2},1..9);
5305              
5306             ...is the same as:
5307              
5308             $$_[-1]>=$$_[0]*2 and $ant++ for permutations(1..9);
5309              
5310             ...but the first uses next to nothing of memory compared to the latter. They have about the same speed.
5311             (The examples just counts the permutations where the last number is at least twice as large as the first)
5312              
5313             C was created to find all combinations of a persons
5314             name. This is useful in "fuzzy" name searches with
5315             L if you can not be certain what is first, middle
5316             and last names. In foreign or unfamiliar names it can be difficult to
5317             know that.
5318              
5319             =cut
5320              
5321             #TODO: se test_perl.pl
5322              
5323             sub permutations {
5324 2 50   2 1 26 my $code=ref($_[0]) eq 'CODE' ? shift() : undef;
5325 2 50 33     6 $code and @_<6 and return map &$code(@$_),permutations(@_);
5326              
5327 2 50       6 return [@_] if @_<2;
5328              
5329 2 100       17 return ([@_[0,1]],[@_[1,0]]) if @_==2;
5330              
5331 1 50       19 return ([@_[0,1,2]],[@_[0,2,1]],[@_[1,0,2]],
5332             [@_[1,2,0]],[@_[2,0,1]],[@_[2,1,0]]) if @_==3;
5333              
5334 0 0       0 return ([@_[0,1,2,3]],[@_[0,1,3,2]],[@_[0,2,1,3]],[@_[0,2,3,1]],
5335             [@_[0,3,1,2]],[@_[0,3,2,1]],[@_[1,0,2,3]],[@_[1,0,3,2]],
5336             [@_[1,2,0,3]],[@_[1,2,3,0]],[@_[1,3,0,2]],[@_[1,3,2,0]],
5337             [@_[2,0,1,3]],[@_[2,0,3,1]],[@_[2,1,0,3]],[@_[2,1,3,0]],
5338             [@_[2,3,0,1]],[@_[2,3,1,0]],[@_[3,0,1,2]],[@_[3,0,2,1]],
5339             [@_[3,1,0,2]],[@_[3,1,2,0]],[@_[3,2,0,1]],[@_[3,2,1,0]]) if @_==4;
5340              
5341 0 0       0 return ([@_[0,1,2,3,4]],[@_[0,1,2,4,3]],[@_[0,1,3,2,4]],[@_[0,1,3,4,2]],[@_[0,1,4,2,3]],
5342             [@_[0,1,4,3,2]],[@_[0,2,1,3,4]],[@_[0,2,1,4,3]],[@_[0,2,3,1,4]],[@_[0,2,3,4,1]],
5343             [@_[0,2,4,1,3]],[@_[0,2,4,3,1]],[@_[0,3,1,2,4]],[@_[0,3,1,4,2]],[@_[0,3,2,1,4]],
5344             [@_[0,3,2,4,1]],[@_[0,3,4,1,2]],[@_[0,3,4,2,1]],[@_[0,4,1,2,3]],[@_[0,4,1,3,2]],
5345             [@_[0,4,2,1,3]],[@_[0,4,2,3,1]],[@_[0,4,3,1,2]],[@_[0,4,3,2,1]],[@_[1,0,2,3,4]],
5346             [@_[1,0,2,4,3]],[@_[1,0,3,2,4]],[@_[1,0,3,4,2]],[@_[1,0,4,2,3]],[@_[1,0,4,3,2]],
5347             [@_[1,2,0,3,4]],[@_[1,2,0,4,3]],[@_[1,2,3,0,4]],[@_[1,2,3,4,0]],[@_[1,2,4,0,3]],
5348             [@_[1,2,4,3,0]],[@_[1,3,0,2,4]],[@_[1,3,0,4,2]],[@_[1,3,2,0,4]],[@_[1,3,2,4,0]],
5349             [@_[1,3,4,0,2]],[@_[1,3,4,2,0]],[@_[1,4,0,2,3]],[@_[1,4,0,3,2]],[@_[1,4,2,0,3]],
5350             [@_[1,4,2,3,0]],[@_[1,4,3,0,2]],[@_[1,4,3,2,0]],[@_[2,0,1,3,4]],[@_[2,0,1,4,3]],
5351             [@_[2,0,3,1,4]],[@_[2,0,3,4,1]],[@_[2,0,4,1,3]],[@_[2,0,4,3,1]],[@_[2,1,0,3,4]],
5352             [@_[2,1,0,4,3]],[@_[2,1,3,0,4]],[@_[2,1,3,4,0]],[@_[2,1,4,0,3]],[@_[2,1,4,3,0]],
5353             [@_[2,3,0,1,4]],[@_[2,3,0,4,1]],[@_[2,3,1,0,4]],[@_[2,3,1,4,0]],[@_[2,3,4,0,1]],
5354             [@_[2,3,4,1,0]],[@_[2,4,0,1,3]],[@_[2,4,0,3,1]],[@_[2,4,1,0,3]],[@_[2,4,1,3,0]],
5355             [@_[2,4,3,0,1]],[@_[2,4,3,1,0]],[@_[3,0,1,2,4]],[@_[3,0,1,4,2]],[@_[3,0,2,1,4]],
5356             [@_[3,0,2,4,1]],[@_[3,0,4,1,2]],[@_[3,0,4,2,1]],[@_[3,1,0,2,4]],[@_[3,1,0,4,2]],
5357             [@_[3,1,2,0,4]],[@_[3,1,2,4,0]],[@_[3,1,4,0,2]],[@_[3,1,4,2,0]],[@_[3,2,0,1,4]],
5358             [@_[3,2,0,4,1]],[@_[3,2,1,0,4]],[@_[3,2,1,4,0]],[@_[3,2,4,0,1]],[@_[3,2,4,1,0]],
5359             [@_[3,4,0,1,2]],[@_[3,4,0,2,1]],[@_[3,4,1,0,2]],[@_[3,4,1,2,0]],[@_[3,4,2,0,1]],
5360             [@_[3,4,2,1,0]],[@_[4,0,1,2,3]],[@_[4,0,1,3,2]],[@_[4,0,2,1,3]],[@_[4,0,2,3,1]],
5361             [@_[4,0,3,1,2]],[@_[4,0,3,2,1]],[@_[4,1,0,2,3]],[@_[4,1,0,3,2]],[@_[4,1,2,0,3]],
5362             [@_[4,1,2,3,0]],[@_[4,1,3,0,2]],[@_[4,1,3,2,0]],[@_[4,2,0,1,3]],[@_[4,2,0,3,1]],
5363             [@_[4,2,1,0,3]],[@_[4,2,1,3,0]],[@_[4,2,3,0,1]],[@_[4,2,3,1,0]],[@_[4,3,0,1,2]],
5364             [@_[4,3,0,2,1]],[@_[4,3,1,0,2]],[@_[4,3,1,2,0]],[@_[4,3,2,0,1]],[@_[4,3,2,1,0]]) if @_==5;
5365              
5366 0         0 my(@r,@p,@c,@i,@n); @i=(0,@_); @p=@c=1..@_; @n=1..@_-1;
  0         0  
  0         0  
  0         0  
5367             PERM:
5368 0         0 while(1){
5369 0 0       0 if($code){if(defined wantarray){push(@r,&$code(@i[@p]))}else{&$code(@i[@p])}}else{push@r,[@i[@p]]}
  0 0       0  
  0         0  
  0         0  
  0         0  
5370 0 0       0 for my$i(@n){splice@p,$i,0,shift@p;next PERM if --$c[$i];$c[$i]=$i+1}
  0         0  
  0         0  
  0         0  
5371             return@r
5372 0         0 }
5373             }
5374              
5375             =head2 cart
5376              
5377             Cartesian product
5378              
5379             B
5380              
5381             Input: two or more arrayrefs with accordingly x, y, z and so on number of elements.
5382              
5383             Output: An array of x * y * z number of arrayrefs. The arrays being the cartesian product of the input arrays.
5384              
5385             It can be useful to think of this as joins in SQL. In C
5386             more tables behind C, but without any C condition to join the tables.
5387              
5388             B
5389              
5390             B
5391              
5392             - Either two or more arrayrefs with x, y, z and so on number of elements.
5393              
5394             - Or coderefs to subs containing condition checks. Somewhat like C conditions in SQL.
5395              
5396             B An array of x * y * z number of arrayrefs (the cartesian product)
5397             minus the ones that did not fulfill the condition(s).
5398              
5399             This of is as joins with one or more where conditions as coderefs.
5400              
5401             The coderef input arguments can be placed last or among the array refs
5402             to save both runtime and memory if the conditions depend on
5403             arrays further back.
5404              
5405             B
5406              
5407             for(cart(\@a1,\@a2,\@a3)){
5408             my($a1,$a2,$a3) = @$_;
5409             print "$a1,$a2,$a3\n";
5410             }
5411              
5412             Prints the same as this:
5413              
5414             for my $a1 (@a1){
5415             for my $a2 (@a2){
5416             for my $a3 (@a3){
5417             print "$a1,$a2,$a3\n";
5418             }
5419             }
5420             }
5421              
5422             B (with a condition: the sum of the first two should be dividable with 3)
5423              
5424             for( cart( \@a1, \@a2, sub{sum(@$_)%3==0}, \@a3 ) ) {
5425             my($a1,$a2,$a3)=@$_;
5426             print "$a1,$a2,$a3\n";
5427             }
5428              
5429             Prints the same as this:
5430              
5431             for my $a1 (@a1){
5432             for my $a2 (@a2){
5433             next if 0==($a1+$a2)%3;
5434             for my $a3 (@a3){
5435             print "$a1,$a2,$a3\n";
5436             }
5437             }
5438             }
5439              
5440             B
5441              
5442             my @a1 = (1,2);
5443             my @a2 = (10,20,30);
5444             my @a3 = (100,200,300,400);
5445              
5446             my $s = join"", map "*".join(",",@$_), cart(\@a1,\@a2,\@a3);
5447             ok( $s eq "*1,10,100*1,10,200*1,10,300*1,10,400*1,20,100*1,20,200"
5448             ."*1,20,300*1,20,400*1,30,100*1,30,200*1,30,300*1,30,400"
5449             ."*2,10,100*2,10,200*2,10,300*2,10,400*2,20,100*2,20,200"
5450             ."*2,20,300*2,20,400*2,30,100*2,30,200*2,30,300*2,30,400");
5451              
5452             $s=join"",map "*".join(",",@$_), cart(\@a1,\@a2,\@a3,sub{sum(@$_)%3==0});
5453             ok( $s eq "*1,10,100*1,10,400*1,20,300*1,30,200*2,10,300*2,20,200*2,30,100*2,30,400");
5454              
5455             B
5456              
5457             Returns hashrefs instead of arrayrefs:
5458              
5459             my @cards=cart( #5200 cards: 100 decks of 52 cards
5460             deck => [1..100],
5461             value => [qw/2 3 4 5 6 7 8 9 10 J Q K A/],
5462             col => [qw/heart diamond club star/],
5463             );
5464             for my $card ( mix(@cards) ) {
5465             print "From deck number $$card{deck} we got $$card{value} $$card{col}\n";
5466             }
5467              
5468             Note: using sub-ref filters do not work (yet) in hash-mode. Use grep on result instead.
5469              
5470             =cut
5471              
5472             sub cart {
5473 5     5 1 518 my @ars=@_;
5474 5 100       16 if(!ref($_[0])){ #if hash-mode detected
5475 1   33     3 my(@k,@v); push@k,shift@ars and push@v,shift@ars while @ars;
  1         14  
5476 1         8 return map{my%h;@h{@k}=@$_;\%h}cart(@v);
  24         25  
  24         37  
  24         37  
5477             }
5478 4         3 my @res=map[$_],@{shift@ars};
  4         16  
5479 4         8 for my $ar (@ars){
5480 9 100 50     20 @res=grep{&$ar(@$_)}@res and next if ref($ar) eq 'CODE';
  24         40  
5481 8         10 @res=map{my$r=$_;map{[@$r,$_]}@$ar}@res;
  34         21  
  34         27  
  120         165  
5482             }
5483 4         45 return @res;
5484             }
5485              
5486             sub cart_easy { #not tested/exported http://stackoverflow.com/questions/2457096/in-perl-how-can-i-get-the-cartesian-product-of-multiple-sets
5487 0     0 0 0 my $last = pop @_;
5488 0 0       0 @_ ? (map {my$left=$_; map [@$left, $_], @$last } cart_easy(@_) )
  0         0  
  0         0  
5489             : (map [$_], @$last);
5490             }
5491              
5492             =head2 reduce
5493              
5494             From: Why Functional Programming Matters: L
5495              
5496             L
5497              
5498             DON'T TRY THIS AT HOME, C PROGRAMMERS.
5499              
5500             sub reduce (&@) {
5501             my ($proc, $first, @rest) = @_;
5502             return $first if @rest == 0;
5503             local ($a, $b) = ($first, reduce($proc, @rest));
5504             return $proc->();
5505             }
5506              
5507             Many functions can then be implemented with very little code. Such as:
5508              
5509             sub mean { (reduce {$a + $b} @_) / @_ }
5510              
5511             =cut
5512              
5513             sub reduce (&@) {
5514 0     0 1 0 my ($proc, $first, @rest) = @_;
5515 0 0       0 return $first if @rest == 0;
5516 33     33   101418 no warnings;
  33         52  
  33         18868  
5517 0         0 local ($a, $b) = ($first, reduce($proc, @rest));
5518 0         0 return $proc->();
5519             }
5520              
5521              
5522             =head2 pivot
5523              
5524             Resembles the pivot table function in Excel.
5525              
5526             C is used to spread out a slim and long table to a visually improved layout.
5527              
5528             For instance spreading out the results of C-selects from SQL:
5529              
5530             pivot( arrayref, columnname1, columnname2, ...)
5531              
5532             pivot( ref_to_array_of_arrayrefs, @list_of_names_to_down_fields )
5533              
5534             The first argument is a ref to a two dimensional table.
5535              
5536             The rest of the arguments is a list which also signals the number of
5537             columns from left in each row that is ending up to the left of the
5538             data table, the rest ends up at the top and the last element of
5539             each row ends up as data.
5540              
5541             top1 top1 top1 top1
5542             left1 left2 left3 top2 top2 top2 top2
5543             ----- ----- ----- ---- ---- ---- ----
5544             data data data data
5545             data data data data
5546             data data data data
5547              
5548             Example:
5549              
5550             my @table=(
5551             ["1997","Gerd", "Weight", "Summer",66],
5552             ["1997","Gerd", "Height", "Summer",170],
5553             ["1997","Per", "Weight", "Summer",75],
5554             ["1997","Per", "Height", "Summer",182],
5555             ["1997","Hilde","Weight", "Summer",62],
5556             ["1997","Hilde","Height", "Summer",168],
5557             ["1997","Tone", "Weight", "Summer",70],
5558            
5559             ["1997","Gerd", "Weight", "Winter",64],
5560             ["1997","Gerd", "Height", "Winter",158],
5561             ["1997","Per", "Weight", "Winter",73],
5562             ["1997","Per", "Height", "Winter",180],
5563             ["1997","Hilde","Weight", "Winter",61],
5564             ["1997","Hilde","Height", "Winter",164],
5565             ["1997","Tone", "Weight", "Winter",69],
5566            
5567             ["1998","Gerd", "Weight", "Summer",64],
5568             ["1998","Gerd", "Height", "Summer",171],
5569             ["1998","Per", "Weight", "Summer",76],
5570             ["1998","Per", "Height", "Summer",182],
5571             ["1998","Hilde","Weight", "Summer",62],
5572             ["1998","Hilde","Height", "Summer",168],
5573             ["1998","Tone", "Weight", "Summer",70],
5574            
5575             ["1998","Gerd", "Weight", "Winter",64],
5576             ["1998","Gerd", "Height", "Winter",171],
5577             ["1998","Per", "Weight", "Winter",74],
5578             ["1998","Per", "Height", "Winter",183],
5579             ["1998","Hilde","Weight", "Winter",62],
5580             ["1998","Hilde","Height", "Winter",168],
5581             ["1998","Tone", "Weight", "Winter",71],
5582             );
5583              
5584             .
5585              
5586             my @reportA=pivot(\@table,"Year","Name");
5587             print "\n\nReport A\n\n".tablestring(\@reportA);
5588              
5589             Will print:
5590              
5591             Report A
5592            
5593             Year Name Height Height Weight Weight
5594             Summer Winter Summer Winter
5595             ---- ----- ------ ------ ------ ------
5596             1997 Gerd 170 158 66 64
5597             1997 Hilde 168 164 62 61
5598             1997 Per 182 180 75 73
5599             1997 Tone 70 69
5600             1998 Gerd 171 171 64 64
5601             1998 Hilde 168 168 62 62
5602             1998 Per 182 183 76 74
5603             1998 Tone 70 71
5604              
5605             .
5606              
5607             my @reportB=pivot([map{$_=[@$_[0,3,2,1,4]]}(@t=@table)],"Year","Season");
5608             print "\n\nReport B\n\n".tablestring(\@reportB);
5609              
5610             Will print:
5611              
5612             Report B
5613            
5614             Year Season Height Height Height Weight Weight Weight Weight
5615             Gerd Hilde Per Gerd Hilde Per Tone
5616             ---- ------ ------ ------ ----- ----- ------ ------ ------
5617             1997 Summer 170 168 182 66 62 75 70
5618             1997 Winter 158 164 180 64 61 73 69
5619             1998 Summer 171 168 182 64 62 76 70
5620             1998 Winter 171 168 183 64 62 74 71
5621              
5622             .
5623              
5624             my @reportC=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name","Attributt");
5625             print "\n\nReport C\n\n".tablestring(\@reportC);
5626              
5627             Will print:
5628              
5629             Report C
5630            
5631             Name Attributt 1997 1997 1998 1998
5632             Summer Winter Summer Winter
5633             ----- --------- ------ ------ ------ ------
5634             Gerd Height 170 158 171 171
5635             Gerd Weight 66 64 64 64
5636             Hilde Height 168 164 168 168
5637             Hilde Weight 62 61 62 62
5638             Per Height 182 180 182 183
5639             Per Weight 75 73 76 74
5640             Tone Weight 70 69 70 71
5641              
5642             .
5643              
5644             my @reportD=pivot([map{$_=[@$_[1,2,0,3,4]]}(@t=@table)],"Name");
5645             print "\n\nReport D\n\n".tablestring(\@reportD);
5646              
5647             Will print:
5648              
5649             Report D
5650            
5651             Name Height Height Height Height Weight Weight Weight Weight
5652             1997 1997 1998 1998 1997 1997 1998 1998
5653             Summer Winter Summer Winter Summer Winter Summer Winter
5654             ----- ------ ------ ------ ------ ------ ------ ------ ------
5655             Gerd 170 158 171 171 66 64 64 64
5656             Hilde 168 164 168 168 62 61 62 62
5657             Per 182 180 182 183 75 73 76 74
5658             Tone 70 69 70 71
5659              
5660             Options:
5661              
5662             Options to sort differently and show sums and percents are available. (...MORE DOC ON THAT LATER...)
5663              
5664             See also L
5665              
5666             =cut
5667              
5668             sub pivot {
5669 4     4 1 9 my($tabref,@vertikalefelt)=@_;
5670 4 50       11 my %opt=ref($vertikalefelt[-1]) eq 'HASH' ? %{pop(@vertikalefelt)} : ();
  0         0  
5671 4 50       9 my $opt_sum=1 if $opt{sum};
5672 4 50 0     9 my $opt_pro=exists $opt{prosent}?$opt{prosent}||0:undef;
5673 4   50     32 my $sortsub = $opt{'sortsub'} || \&_sortsub;
5674 4   33     14 my $sortsub_bortover = $opt{'sortsub_bortover'} || $sortsub;
5675 4   33     35 my $sortsub_nedover = $opt{'sortsub_nedover'} || $sortsub;
5676             #print serialize(\%opt,'opt');
5677             #print serialize(\$opt_pro,'opt_pro');
5678 4         6 my $antned=0+@vertikalefelt;
5679 4         3 my $bakerst=-1+@{$$tabref[0]};
  4         6  
5680 4         5 my(%h,%feltfinnes,%sum);
5681             #print "Bakerst<$bakerst>\n";
5682 4         6 for(@$tabref){
5683 112         166 my $rad=join($;,@$_[0..($antned-1)]);
5684 112         121 my $felt=join($;,@$_[$antned..($bakerst-1)]);
5685 112         89 my $verdi=$$_[$bakerst];
5686 112 50       131 length($rad) or $rad=' ';
5687 112 50       115 length($felt) or $felt=' ';
5688 112         134 $h{$rad}{$felt}=$verdi;
5689 112         138 $h{$rad}{"%$felt"}=$verdi;
5690 112 50 33     261 if($opt_sum or defined $opt_pro){
5691 0         0 $h{$rad}{Sum}+=$verdi;
5692 0         0 $sum{$felt}+=$verdi;
5693 0         0 $sum{Sum}+=$verdi;
5694             }
5695 112         88 $feltfinnes{$felt}++;
5696 112 50       159 $feltfinnes{"%$felt"}++ if $opt_pro;
5697             }
5698 4         28 my @feltfinnes = sort $sortsub_bortover keys%feltfinnes;
5699 4 50       9 push @feltfinnes, "Sum" if $opt_sum;
5700 4         7 my @t=([@vertikalefelt,map{replace($_,$;,"\n")}@feltfinnes]);
  23         31  
5701             #print serialize(\@feltfinnes,'feltfinnes');
5702             #print serialize(\%h,'h');
5703             #print "H = ".join(", ",sort _sortsub keys%h)."\n";
5704 4         16 for my $rad (sort $sortsub_nedover keys(%h)){
5705             my @rad=(split($;,$rad),
5706             map{
5707 23 50 33     111 if(/^\%/ and defined $opt_pro){
  120         211  
5708 0         0 my $sum=$h{$rad}{Sum};
5709 0         0 my $verdi=$h{$rad}{$_};
5710 0 0       0 if($sum!=0){
5711 0 0       0 defined $verdi
5712             ?sprintf("%*.*f",3+1+$opt_pro,$opt_pro,100*$verdi/$sum)
5713             :$verdi;
5714             }
5715             else{
5716 0 0       0 $verdi!=0?"div0":$verdi;
5717             }
5718             }
5719             else{
5720 120         122 $h{$rad}{$_};
5721             }
5722             }
5723             @feltfinnes);
5724 23         47 push(@t,[@rad]);
5725             }
5726 4 50       10 push(@t,"-",["Sum",(map{""}(2..$antned)),map{print "<$_>\n";$sum{$_}}@feltfinnes]) if $opt_sum;
  0         0  
  0         0  
  0         0  
5727 4         76 return @t;
5728             }
5729              
5730             # default sortsub for pivot()
5731              
5732             sub _sortsub {
5733 33     33   163 no warnings;
  33         39  
  33         10145  
5734             #my $c=($a<=>$b)||($a cmp $b);
5735             #return $c if $c;
5736             #printf "%-30s %-30s ",replace($a,$;,','),replace($b,$;,',');
5737 78     78   450 my @a=split $;,$a;
5738 78         314 my @b=split $;,$b;
5739 78         101 for(0..$#a){
5740 119         153 my $c=$a[$_]<=>$b[$_];
5741 119 100 66     281 return $c if $c and "$a[$_]$b[$_]"!~/[iI][nN][fF]|þ/i; # inf(inity)
5742 101         70 $c=$a[$_]cmp$b[$_];
5743 101 100       192 return $c if $c;
5744             }
5745 0         0 return 0;
5746             }
5747              
5748             =head2 tablestring
5749              
5750             B a reference to an array of arrayrefs -- a two dimensional table of strings and numbers
5751              
5752             B a string containing the textual table -- a string of two or more lines
5753              
5754             The first arrayref in the list refers to a list of either column headings (scalar)
5755             or ... (...more later...)
5756              
5757             In this output table:
5758              
5759             - the columns will not be wider than necessary by its widest value (any -tags are removed in every internal width-calculation)
5760              
5761             - multi-lined cell values are handled also
5762              
5763             - and so are html-tags, if the output is to be used inside
-tags on a web page. 
5764              
5765             - columns with just numeric values are right justified (header row excepted)
5766              
5767             Example:
5768              
5769             print tablestring([
5770             [qw/AA BB CCCC/],
5771             [123,23,"d"],
5772             [12,23,34],
5773             [77,88,99],
5774             ["lin\nes",12,"asdff\nfdsa\naa"],[0,22,"adf"]
5775             ]);
5776              
5777             Prints this string of 11 lines:
5778              
5779             AA BB CCCC
5780             --- -- -----
5781             123 23 d
5782             12 23 34
5783             77 8 99
5784            
5785             lin 12 asdff
5786             es fdsa
5787             aa
5788            
5789             10 22 adf
5790              
5791             As you can see, rows containing multi-lined cells gets an empty line before and after the row to separate it more clearly.
5792              
5793             =cut
5794              
5795             sub tablestring {
5796 5     5 1 20 my $tab=shift;
5797 5 50       13 my %o=$_[0] ? %{shift()} : ();
  0         0  
5798 5         6 my $fjern_tom=$o{fjern_tomme_kolonner};
5799 5         6 my $ikke_space=$o{ikke_space};
5800 5   50     17 my $nodup=$o{nodup}||0;
5801 5         4 my $ikke_hodestrek=$o{ikke_hodestrek};
5802 5 50       8 my $pagesize=exists $o{pagesize} ? $o{pagesize}-3 : 9999999;
5803 5         5 my $venstretvang=$o{venstre};
5804 5         4 my(@bredde,@venstre,@hoeyde,@ikketom,@nodup);
5805 5         6 my $hode=1;
5806 5         4 my $i=0;
5807 5         4 my $j;
5808 5         8 for(@$tab){
5809 33         22 $j=0;
5810 33         25 $hoeyde[$i]=0;
5811 33         23 my $nodup_rad=$nodup;
5812 33 50       46 if(ref($_) eq 'ARRAY'){
5813 33         31 for(@$_){
5814 210         147 my $celle=$_;
5815 210   100     307 $bredde[$j]||=0;
5816 210 50 33     343 if($nodup_rad and $i>0 and $$tab[$i][$j] eq $$tab[$i-1][$j] || ($nodup_rad=0)){
      0        
      33        
5817 0 0       0 $celle=$nodup==1?"":$nodup;
5818 0         0 $nodup[$i][$j]=1;
5819             }
5820             else{
5821 210         129 my $hoeyde=0;
5822 210         111 my $bredere;
5823 33     33   169 no warnings;
  33         40  
  33         12590  
5824 210 100 100     511 $ikketom[$j]=1 if !$hode && length($celle)>0;
5825 210         235 for(split("\n",$celle)){
5826 236 50       233 $bredere=/
5827 236         174 s/<[^>]+>//g;
5828 236         141 $hoeyde++;
5829 236         194 s/>/>/g;
5830 236         139 s/</
5831 236 100       321 $bredde[$j]=length($_)+1+$bredere if length($_)+1+$bredere>$bredde[$j];
5832 236 100 100     955 $venstre[$j]=1 if $_ && !/^\s*[\-\+]?(\d+|\d*\.\d+)\s*\%?$/ && !$hode;
      100        
5833             }
5834 210 100 66     349 if( $hoeyde>1 && !$ikke_space){
5835 25 100       30 $hoeyde++ unless $hode;
5836 25 100 100     42 $hoeyde[$i-1]++ if $i>1 && $hoeyde[$i-1]==1;
5837             }
5838 210 100       259 $hoeyde[$i]=$hoeyde if $hoeyde>$hoeyde[$i];
5839             }
5840 210         156 $j++;
5841             }
5842             }
5843             else{
5844 0         0 $hoeyde[$i]=1;
5845 0         0 $ikke_hodestrek=1;
5846             }
5847 33         17 $hode=0;
5848 33         27 $i++;
5849             }
5850 5         7 $i=$#hoeyde;
5851 5         5 $j=$#bredde;
5852 5 50 33     18 if($i==0 or $venstretvang) { @venstre=map{1}(0..$j) }
  0         0  
  0         0  
5853 5 50       7 else { for(0..$j){ $venstre[$_]=1 if !$ikketom[$_] } }
  33         41  
5854 5         8 my @tabut;
5855 5         2 my $rad_startlinje=0;
5856 5         4 my @overskrift;
5857             my $overskrift_forrige;
5858 5         7 for my $x (0..$i){
5859 33 50       54 if($$tab[$x] eq '-'){
5860 0 0       0 my @tegn=map {$$tab[$x-1][$_]=~/\S/?"-":" "} (0..$j);
  0         0  
5861 0         0 $tabut[$rad_startlinje]=join(" ",map {$tegn[$_] x ($bredde[$_]-1)} (0..$j));
  0         0  
5862             }
5863             else{
5864 33         32 for my $y (0..$j){
5865 210 50 33     278 next if $fjern_tom && !$ikketom[$y];
5866 33     33   165 no warnings;
  33         46  
  33         17891  
5867            
5868 210 0 33     805 my @celle=
    50          
5869             !$overskrift_forrige&&$nodup&&$nodup[$x][$y]
5870             ?($nodup>0?():((" " x (($bredde[$y]-length($nodup))/2)).$nodup))
5871             :split("\n",$$tab[$x][$y]);
5872 210         228 for(0..($hoeyde[$x]-1)){
5873 261         153 my $linje=$rad_startlinje+$_;
5874 261   100     373 my $txt=shift @celle || '';
5875 261 100 100     946 $txt=sprintf("%*s",$bredde[$y]-1,$txt) if length($txt)>0 && !$venstre[$y] && ($x>0 || $ikke_hodestrek);
      66        
      66        
5876 261         213 $tabut[$linje].=$txt;
5877 261 100       245 if($y==$j){
5878 42         115 $tabut[$linje]=~s/\s+$//;
5879             }
5880             else{
5881 219         124 my $bredere;
5882 219 50       197 $bredere = $txt=~/
5883 219         167 $txt=~s/<[^>]+>//g;
5884 219         132 $txt=~s/>/>/g;
5885 219         124 $txt=~s/</
5886 219         335 $tabut[$linje].= ' ' x ($bredde[$y]-length($txt)-$bredere);
5887             }
5888             }
5889             }
5890             }
5891 33         22 $rad_startlinje+=$hoeyde[$x];
5892              
5893             #--lage streker?
5894 33 50       40 if(not $ikke_hodestrek){
5895 33 100 33     102 if($x==0){
    50 33        
      0        
5896 5         6 for my $y (0..$j){
5897 33 50 33     41 next if $fjern_tom && !$ikketom[$y];
5898 33         42 $tabut[$rad_startlinje].=('-' x ($bredde[$y]-1))." ";
5899             }
5900 5         3 $rad_startlinje++;
5901 5         11 @overskrift=("",@tabut);
5902             }
5903             elsif(
5904             $x%$pagesize==0 || $nodup>0&&!$nodup[$x+1][$nodup-1]
5905             and $x+1<@$tab
5906             and !$ikke_hodestrek
5907             )
5908             {
5909 0         0 push(@tabut,@overskrift);
5910 0         0 $rad_startlinje+=@overskrift;
5911 0         0 $overskrift_forrige=1;
5912             }
5913             else{
5914 28         26 $overskrift_forrige=0;
5915             }
5916             }
5917             }#for x
5918 5         49 return join("\n",@tabut)."\n";
5919             }
5920              
5921             =head2 serialize
5922              
5923             Returns a data structure as a string. See also C
5924             (serialize was created long time ago before Data::Dumper appeared on
5925             CPAN, before CPAN even...)
5926              
5927             B One to four arguments.
5928              
5929             First argument: A reference to the structure you want.
5930              
5931             Second argument: (optional) The name the structure will get in the output string.
5932             If second argument is missing or is undef or '', it will get no name in the output.
5933              
5934             Third argument: (optional) The string that is returned is also put
5935             into a created file with the name given in this argument. Putting a
5936             C<< > >> char in from of the filename will append that file
5937             instead. Use C<''> or C to not write to a file if you want to
5938             use a fourth argument.
5939              
5940             Fourth argument: (optional) A number signalling the depth on which newlines is used in the output.
5941             The default is infinite (some big number) so no extra newlines are output.
5942              
5943             B A string containing the perl-code definition that makes that data structure.
5944             The input reference (first input argument) can be to an array, hash or a string.
5945             Those can contain other refs and strings in a deep data structure.
5946              
5947             Limitations:
5948              
5949             - Code refs are not handled (just returns C)
5950              
5951             - Regex, class refs and circular recursive structures are also not handled.
5952              
5953             B
5954              
5955             $a = 'test';
5956             @b = (1,2,3);
5957             %c = (1=>2, 2=>3, 3=>5, 4=>7, 5=>11);
5958             %d = (1=>2, 2=>3, 3=>\5, 4=>7, 5=>11, 6=>[13,17,19,{1,2,3,'asdf\'\\\''}],7=>'x');
5959             print serialize(\$a,'a');
5960             print serialize(\@b,'tab');
5961             print serialize(\%c,'c');
5962             print serialize(\%d,'d');
5963             print serialize(\("test'n roll",'brb "brb"'));
5964             print serialize(\%d,'d',undef,1);
5965              
5966             Prints accordingly:
5967              
5968             $a='test';
5969             @tab=('1','2','3');
5970             %c=('1','2','2','3','3','5','4','7','5','11');
5971             %d=('1'=>'2','2'=>'3','3'=>\'5','4'=>'7','5'=>'11','6'=>['13','17','19',{'1'=>'2','3'=>'asdf\'\\\''}]);
5972             ('test\'n roll','brb "brb"');
5973             %d=('1'=>'2',
5974             '2'=>'3',
5975             '3'=>\'5',
5976             '4'=>'7',
5977             '5'=>'11',
5978             '6'=>['13','17','19',{'1'=>'2','3'=>'asdf\'\\\''}],
5979             '7'=>'x');
5980              
5981             Areas of use:
5982              
5983             - Debugging (first and foremost)
5984              
5985             - Storing arrays and hashes and data structures of those on file, database or sending them over the net
5986              
5987             - eval earlier stored string to get back the data structure
5988              
5989             Be aware of the security implications of Cing a perl code string
5990             stored somewhere that unauthorized users can change them! You are
5991             probably better of using L or L without
5992             enabling the CODE-options if you have such security issues.
5993             More on decompiling Perl-code: L or L.
5994              
5995             =head2 dserialize
5996              
5997             Debug-serialize, dumping data structures for you to look at.
5998              
5999             Same as C but the output is given a newline every 80th character.
6000             (Every 80th or whatever C<$Acme::Tools::Dserialize_width> contains)
6001              
6002             =cut
6003              
6004             our $Dserialize_width=80;
6005 0   0 0   0 sub _kallstack { my $tilbake=shift||0; my @c; my $ret; $ret.=serialize(\@c,"caller$tilbake") while @c=caller(++$tilbake); $ret }
  0         0  
  0         0  
  0         0  
6006 0     0 1 0 sub dserialize{join "\n",serialize(@_)=~/(.{1,$Dserialize_width})/gs}
6007             sub serialize {
6008 33     33   160 no warnings;
  33         45  
  33         108811  
6009 1421     1421 1 3391 my($r,$name,$filename,$level)=@_;
6010 1421   100     2346 my @r=(undef,undef,($level||0)-1);
6011 1421 50       1549 if($filename){
6012 0 0       0 open my $fh, '>', $filename or croak("FEIL: could not open file $filename\n" . _kallstack());
6013 0         0 my $ret=serialize($r,$name,undef,$level);
6014 0         0 print $fh "$ret\n1;\n";
6015 0         0 close($fh);
6016 0         0 return $ret;
6017             }
6018 1421 100       1977 if(ref($r) eq 'SCALAR'){
    100          
    100          
    100          
    50          
    0          
    0          
    0          
    0          
6019 1151 50       1295 return "\$$name=".serialize($r,@r).";\n" if $name;
6020 1151 100       1321 return "undef" unless defined $$r;
6021 1143         822 my $ret=$$r;
6022 1143         957 $ret=~s/\\/\\\\/g;
6023 1143         749 $ret=~s/\'/\\'/g;
6024 1143         2139 return "'$ret'";
6025             }
6026             elsif(ref($r) eq 'ARRAY'){
6027 76 100       141 return "\@$name=".serialize($r,@r).";\n" if $name;
6028 74         87 my $ret="(";
6029 74         99 for(@$r){
6030 392         525 $ret.=serialize(\$_,@r).",";
6031 392 100       753 $ret.="\n" if $level>=0;
6032             }
6033 74         176 $ret=~s/,$//;
6034 74         91 $ret.=")";
6035 74 50       110 $ret.=";\n" if $name;
6036 74         193 return $ret;
6037             }
6038             elsif(ref($r) eq 'HASH'){
6039 31 100       168 return "\%$name=".serialize($r,@r).";\n" if $name;
6040 21         20 my $ret="(";
6041 21         86 for(sort keys %$r){
6042 73         134 $ret.=serialize(\$_,@r)."=>".serialize(\$$r{$_},@r).",";
6043 73 50       131 $ret.="\n" if $level>=0;
6044             }
6045 21         65 $ret=~s/,$//;
6046 21         28 $ret.=")";
6047 21 50       34 $ret.=";\n" if $name;
6048 21         107 return $ret;
6049             }
6050             elsif(ref($$r) eq 'ARRAY'){
6051 70 50       111 return "\@$name=".serialize($r,@r).";\n" if $name;
6052 70         52 my $ret="[";
6053 70         81 for(@$$r){
6054 188         265 $ret.=serialize(\$_,@r).",";
6055 188 50 33     569 $ret.="\n" if !defined $level or $level>=0;
6056             }
6057 70         148 $ret=~s/,$//;
6058 70         52 $ret.="]";
6059 70 50       90 $ret.=";\n" if $name;
6060 70         138 return $ret;
6061             }
6062             elsif(ref($$r) eq 'HASH'){
6063 93 50       116 return "\%$name=".serialize($r,@r).";\n" if $name;
6064 93         68 my $ret="{";
6065 93         257 for(sort keys %$$r){
6066 294         349 $ret.=serialize(\$_,@r)."=>".serialize(\$$$r{$_},@r).",";
6067 294 50       478 $ret.="\n" if $level>=0;
6068             }
6069 93         165 $ret=~s/,$//;
6070 93         73 $ret.="}";
6071 93 50       115 $ret.=";\n" if $name;
6072 93         163 return $ret;
6073             }
6074             elsif(ref($$r) eq 'SCALAR'){
6075 0         0 return "\\".serialize($$r,@r);
6076             }
6077             elsif(ref($r) eq 'LVALUE'){
6078 0         0 my $s="$$r";
6079 0         0 return serialize(\$s,@r);
6080             }
6081             elsif(ref($$r) eq 'CODE'){
6082             #warn "Tried to serialize CODE";
6083 0         0 return 'sub{croak "Can not serialize CODE-references, see perhaps B::Deparse and Storable"}'
6084             }
6085             elsif(ref($$r) eq 'GLOB'){
6086 0         0 warn "Tried to serialize a GLOB";
6087 0         0 return '\*STDERR'
6088             }
6089             else{
6090 0         0 my $tilbake;
6091 0         0 my($pakke,$fil,$linje,$sub,$hasargs,$wantarray);
6092 0   0     0 ($pakke,$fil,$linje,$sub,$hasargs,$wantarray)=caller($tilbake++) until $sub ne 'serialize' || $tilbake>20;
6093 0         0 croak("serialize() argument should be reference!\n".
6094             "\$r=$r\n".
6095             "ref(\$r) = ".ref($r)."\n".
6096             "ref(\$\$r) = ".ref($$r)."\n".
6097             "kallstack:\n". _kallstack());
6098             }
6099             }
6100              
6101             =head2 srlz
6102              
6103             Synonym to L, but remove unnecessary single quote chars around
6104             C<< \w+ >>-keys and number values (except numbers with leading zeros). Example:
6105              
6106             serialize:
6107              
6108             %s=('action'=>{'del'=>'0','ins'=>'0','upd'=>'18'},'post'=>'1348','pre'=>'1348',
6109             'updcol'=>{'Laerestednr'=>'18','Studietypenr'=>'18','Undervisningssted'=>'7','Url'=>'11'},
6110             'where'=>'where 1=1');
6111              
6112             srlz:
6113              
6114             %s=(action=>{del=>0,ins=>0,upd=>18},post=>1348,pre=>1348,
6115             updcol=>{Laerestednr=>18,Studietypenr=>18,Undervisningssted=>7,Url=>11},
6116             where=>'where 1=1');
6117              
6118             Todo: update L to do the same, but in the right way. (For now
6119             srlz runs the string from serialize() through two C<< s/// >>, this will break
6120             in certain cases). L will be kept as a synonym (or the other way around).
6121              
6122             =cut
6123              
6124             sub srlz {
6125 0     0 1 0 my $s=serialize(@_);
6126 0         0 $s=~s,'(\w+)'=>,$1=>,g;
6127 0         0 $s=~s,=>'((0|[1-9]\d*)(\.\d+)?(e[-+]?\d+)?)',=>$1,gi; #ikke ledende null! hm
6128 0         0 $s;
6129             }
6130              
6131             =head2 cnttbl
6132              
6133             my %nordic_country_population=(Norway=>5214890,Sweden=>9845155,Denmark=>5699220,Finland=>5496907,Iceland=>331310);
6134             print cnttbl(\%nordic_country_population);
6135             Iceland 331310 1.25%
6136             Norway 5214890 19.61%
6137             Finland 5496907 20.67%
6138             Denmark 5699220 21.44%
6139             Sweden 9845155 37.03%
6140             SUM 26587482 100.00%
6141              
6142             Todo: Levels...:
6143              
6144             my %sales=(
6145             Toyota=>{Prius=>19,RAV=>12,Auris=>18,Avensis=>7},
6146             Volvo=>{V40=>14, XC90=>4},
6147             Nissan=>{Leaf=>19,Qashqai=>17},
6148             Tesla=>{ModelS=>8}
6149             );
6150             print cnttbl(\%sales);
6151             Toyota SUM 56
6152             Volvo SUM 18
6153             Nissan SUM 36
6154             Tesla SUM 8
6155             SUM SUM 56 100%
6156              
6157             =cut
6158              
6159             sub cnttbl {
6160 6     6 1 338 my $hr=shift;
6161 6         20 my $maxlen=max(3,map length($_),keys%$hr);
6162             join"",ref((values%$hr)[0])
6163 4         3 ?do{ map {my$o=$_;join("",map rpad($$o[0],$maxlen)." $_\n",split("\n",$$o[1]))}
  4         12  
6164 1         8 map [$_,cnttbl($$hr{$_})],
6165             sort keys%$hr }
6166 6 100       14 :do{ my $sum=sum(values%$hr);
  5         9  
6167 5         8 my $fmt=repl("%-xs %yd %6.2f%%\n",x=>$maxlen,y=>length($sum));
6168             map sprintf($fmt,@$_,100*$$_[1]/$sum),
6169 5 50       26 (map[$_,$$hr{$_}],sort{$$hr{$a}<=>$$hr{$b} or $a cmp $b}keys%$hr),
  16         127  
6170             (['SUM',$sum]) }
6171             }
6172              
6173             =head2 nicenum
6174              
6175             print 14.3 - 14.0; # 0.300000000000001
6176             print 34.3 - 34.0; # 0.299999999999997
6177             print nicenum( 14.3 - 14.0 ); # 0.3
6178             print nicenum( 34.3 - 34.0 ); # 0.3
6179              
6180             =cut
6181              
6182             our $Nicenum;
6183             sub nicenum { #hm
6184 3     3 1 558 $Nicenum=$_[0];
6185 3         42 $Nicenum=~s/([\.,]\d*)((\d)\3\3\3\3\3)\d$/$1$2$3$3$3$3$3$3$3$3$3/;
6186 3         9 my $r=0+$Nicenum;
6187             #warn "nn $_[0] --> $Nicenum --> $r\n";
6188 3         6 $r;
6189             }
6190              
6191              
6192             =head2 sys
6193              
6194             Call instead of C if you want C (Carp::croak) when something fails.
6195              
6196             sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }
6197              
6198              
6199             =cut
6200              
6201 11 50   11 1 13 sub sys($){ my$s=shift; my$r=system($s); $r==0 or croak"ERROR: system($s)==$r ($!) ($?)" }
  11         58375  
  11         160  
6202              
6203             =head2 recursed
6204              
6205             Returns true or false (actually 1 or 0) depending on whether the
6206             current sub has been called by itself or not.
6207              
6208             sub xyz
6209             {
6210             xyz() if not recursed;
6211              
6212             }
6213              
6214             =cut
6215              
6216 0 0   0 1 0 sub recursed {(caller(1))[3] eq (caller(2))[3]?1:0}
6217              
6218              
6219              
6220             =head2 ed
6221              
6222             String editor commands
6223              
6224             literals: a-z 0-9 space
6225             move cursor: FBAEPN MF MB ME
6226             delete: D Md
6227             up/low/camelcase word U L C
6228             backspace: -
6229             search: S
6230             return/enter: R
6231             meta/esc/alt: M
6232             shift: T
6233             cut to eol: K
6234             caps lock: C
6235             yank: Y
6236             start and end: < >
6237             macro start/end/play: { } !
6238             times for next cmd: M (i.e. M24a inserts 24 a's)
6239              
6240             (TODO: alfa...and more docs needed)
6241              
6242             =cut
6243              
6244             our $Edcursor;
6245             sub ed {
6246 18     18 1 2772 my($s,$cs,$p,$buf)=@_; #string, commands, point (or cursor)
6247 18 100       40 return $$s=ed($$s,$cs,$p,$buf) if ref($s);
6248 17         22 my($sh,$cl,$m,$t,@m)=(0,0,0,undef);
6249 17         26 while(length($cs)){
6250 121         79 my $n = 0;
6251 121 50       304 my $c = $cs=~s,^(M\d+|M.|""|".+?"|S.+?R|\\.|.),,s ? $1 : die;
6252 121   100     272 $p = curb($p||0,0,length($s));
6253 121 100       138 if(defined$t){$cs="".($c x $t).$cs;$t=undef;next}
  1         5  
  1         1  
  1         2  
6254 120     81   248 my $add=sub{substr($s,$p,0)=$_[0];$p+=length($_[0])};
  81         87  
  81         61  
6255 120 50       498 if ($c =~ /^([a-z0-9 ])/){ &$add($sh^$cl?uc($1):$1); $sh=0 }
  65 100       87  
  65 100       46  
    100          
    100          
    100          
    100          
    100          
    50          
    100          
    100          
    100          
    100          
    100          
    50          
    100          
    100          
    100          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    100          
    100          
    100          
    50          
6256 1         2 elsif($c =~ /^"(.+)"$/) { &$add($1) }
6257 1         2 elsif($c =~ /^\\(.)/) { &$add($1) }
6258 2 50       3 elsif($c =~ /^S(.+)R/) { my $i=index($s,$1,$p);$p=$i+length($1) if $i>=0 }
  2         7  
6259 1         6 elsif($c =~ /^M(\d+)/) { $t=$1; next }
  1         4  
6260 2         3 elsif($c eq 'F') { $p++ }
6261 1         2 elsif($c eq 'B') { $p-- }
6262 0   0     0 elsif($c eq 'A') { $p-- while $p>0 and substr($s,$p-1,2)!~/^\n/ }
6263 8 50       26 elsif($c eq 'E') { substr($s,$p)=~/(.*)/ and $p+=length($1) }
6264 4         4 elsif($c eq 'D') { substr($s,$p,1)='' }
6265 3 50       15 elsif($c eq 'MD'){ substr($s,$p)=~s/^(\W*\w+)// and $buf=$1 }
6266 3 50       21 elsif($c eq 'MF'){ substr($s,$p)=~/(\W*\w+)/ and $p+=length($1) }
6267 3 50       13 elsif($c eq 'MB'){ substr($s,0,$p)=~/(\w+\W*)$/ and $p-=length($1) }
6268 0 0       0 elsif($c eq '-') { substr($s,--$p,1)='' if $p }
6269 5 50       28 elsif($c eq 'M-'){ substr($s,0,$p)=~s/(\w+\W*)$// and $p-=length($buf=$1)}
6270 3 50       13 elsif($c eq 'K') { substr($s,$p)=~s/(\S.+|\s*?\n)// and $buf=$1 }
6271 12         15 elsif($c eq 'Y') { &$add($buf) }
6272 0         0 elsif($c eq 'U') { substr($s,$p)=~s/(\W*)(\w+)/$1\U$2\E/; $p+=length($1.$2) }
  0         0  
6273 0         0 elsif($c eq 'L') { substr($s,$p)=~s/(\W*)(\w+)/$1\L$2\E/; $p+=length($1.$2) }
  0         0  
6274 0         0 elsif($c eq 'C') { substr($s,$p)=~s/(\W*)(\w+)/$1\u\L$2\E/; $p+=length($1.$2) }
  0         0  
6275 0         0 elsif($c eq '<') { $p=0 }
6276 0         0 elsif($c eq '>') { $p=length($s) }
6277 0         0 elsif($c eq 'T') { $sh=1 }
6278 0         0 elsif($c eq 'C') { $cl^=1 }
6279 1         2 elsif($c eq '{') { $m=1; @m=() }
  1         1  
6280 1         2 elsif($c eq '}') { $m=0 }
6281 2 50 33     9 elsif($c eq '!') { $m||!@m and die"ed: no macro"; $cs=join("",@m).$cs }
  2         4  
6282 2         3 elsif($c eq '""'){ &$add('"') }
6283 0         0 else { croak "ed: Unknown cmd '$c'\n" }
6284 119 100 100     352 push @m, $c if $m and $c ne '{';
6285             #warn serialize([$c,$m,$cs],'d');
6286             }
6287 17         13 $Edcursor=$p;
6288 17         32 $s;
6289             }
6290              
6291             #todo: sub unbless eller sub damn
6292             #todo: ..se også: use Data::Structure::Util qw/unbless/;
6293             #todo: ...og: Acme::Damn sin damn()
6294             #todo? sub swap($$) http://www.idg.no/computerworld/article242008.ece
6295             #todo? catal
6296             #todo?
6297             #void quicksort(int t, int u) int i, m; if (t >= u) return; swap(t, randint(t, u)); m = t; for (i = t + 1; i <= u; i++) if (x[i] < x[t]) swap(++m, i); swap(t, m) quicksort(t, m-1); quicksort(m+1, u);
6298              
6299              
6300             =head1 JUST FOR FUN
6301              
6302             =head2 brainfu
6303              
6304             B one or two arguments
6305              
6306             First argument: a string, source code of the brainfu
6307             language. String containing the eight charachters + - < > [ ] . ,
6308             Every other char is ignored silently.
6309              
6310             Second argument: if the source code contains commas (,) the second
6311             argument is the input characters in a string.
6312              
6313             B The resulting output from the program.
6314              
6315             Example:
6316              
6317             print brainfu(<<""); #prints "Hallo Verden!\n"
6318             ++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>---.+++++++++++..+++.>++.<<++++++++++++++
6319             .>----------.+++++++++++++.--------------.+.+++++++++.>+.>.
6320              
6321             See L
6322              
6323             =head2 brainfu2perl
6324              
6325             Just as L but instead it return the perl code to which the
6326             brainfu code is translated. Just C<< eval() >> this perl code to run.
6327              
6328             Example:
6329              
6330             print brainfu2perl('>++++++++[<++++++++>-]<++++++++.>++++++[<++++++>-]<---.');
6331              
6332             Prints this string:
6333              
6334             my($c,$o,@b)=(0); sub out{$o.=chr($b[$c]) for 1..$_[0]||1}
6335             ++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6336             while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6337             ++$b[$c];++$c;--$b[$c];}--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6338             ++$b[$c];++$b[$c];out;++$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];
6339             while($b[$c]){--$c;++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$b[$c];++$c;--$b[$c];}
6340             --$c;--$b[$c];--$b[$c];--$b[$c];out;$o;
6341              
6342             =head2 brainfu2perl_optimized
6343              
6344             Just as L but optimizes the perl code. The same
6345             example as above with brainfu2perl_optimized returns this equivalent
6346             but shorter perl code:
6347              
6348             $b[++$c]+=8;while($b[$c]){$b[--$c]+=8;--$b[++$c]}$b[--$c]+=8;out;$b[++$c]+=6;
6349             while($b[$c]){$b[--$c]+=6;--$b[++$c]}$b[--$c]-=3;out;$o;
6350              
6351             =cut
6352              
6353 3     3 1 1210 sub brainfu { eval(brainfu2perl(@_)) }
6354              
6355             sub brainfu2perl {
6356 6     6 1 18 my($bf,$inp)=@_;
6357 17         52 my $perl='my($c,$inp,$o,@b)=(0,\''.$inp.'\'); no warnings; sub out{$o.=chr($b[$c]) for 1..$_[0]||1}'."\n";
6358 3 100 66     25 $perl.='sub inp{$inp=~s/(.)//s and $b[$c]=ord($1)}'."\n" if $inp and $bf=~/,/;
6359 3 100       545 $perl.=join("",map/\+/?'++$b[$c];':/\-/?'--$b[$c];':/\[/?'while($b[$c]){':/\]/?'}':/>/?'++$c;':/
    100          
    100          
    100          
    100          
    100          
    100          
    100          
6360 3         339 $perl;
6361             }
6362              
6363             sub brainfu2perl_optimized {
6364 0     14 1 0 my $perl=brainfu2perl(@_);
6365 0         0 $perl=~s{(((\+|\-)\3\$b\[\$c\];){2,})}{ '$b[$c]'.$3.'='.(grep/b/,split//,$1).';' }gisex;
  0         0  
6366 0         0 1 while $perl=~s/\+\+\$c;\-\-\$c;//g + $perl=~s/\-\-\$c;\+\+\$c;//g;
6367 0         0 $perl=~s{((([\-\+])\3\$c;){2,})}{"\$c$3=".(grep/c/,split//,$1).';'}gisex;
  0         0  
6368 0 0       0 $perl=~s{((\+\+|\-\-)\$c;([^;{}]+;))}{my($o,$s)=($2,$3);$s=~s/\$c/$o\$c/?$s:$1}ge;
  0         0  
  0         0  
6369 0         0 $perl=~s/\$c(\-|\+)=(\d+);(\+\+|\-\-)\$b\[\$c\]/$3.'$b[$c'.$1.'='.$2.'];'/ge;
  0         0  
6370 0         0 $perl=~s{((out;){2,})}{'out('.(grep/o/,split//,$1).');'}ge;
  0         0  
6371 0         0 $perl=~s/;}/}/g;$perl=~s/;+/;/g;
  0         0  
6372 0         0 $perl;
6373             }
6374              
6375              
6376             =head1 BLOOM FILTER SUBROUTINES
6377              
6378             Bloom filters can be used to check whether an element (a string) is a
6379             member of a large set using much less memory or disk space than other
6380             data structures. Trading speed and accuracy for memory usage. While
6381             risking false positives, Bloom filters have a very strong space
6382             advantage over other data structures for representing sets.
6383              
6384             In the example below, a set of 100000 phone numbers (or any string of
6385             any length) can be "stored" in just 91230 bytes if you accept that you
6386             can only check the data structure for existence of a string and accept
6387             false positives with an error rate of 0.03 (that is three percent, error
6388             rates are given in numbers larger than 0 and smaller than 1).
6389              
6390             You can not retrieve the strings in the set without using "brute
6391             force" methods and even then you would get slightly more strings than
6392             you put in because of the error rate inaccuracy.
6393              
6394             Bloom Filters have many uses.
6395              
6396             See also: L
6397              
6398             See also: L
6399              
6400             =head2 bfinit
6401              
6402             Initialize a new Bloom Filter:
6403              
6404             my $bf = bfinit( error_rate=>0.01, capacity=>100000 );
6405              
6406             The same:
6407              
6408             my $bf = bfinit( 0.01, 100000 );
6409              
6410             since two arguments is interpreted as error_rate and capacity accordingly.
6411              
6412              
6413             =head2 bfadd
6414              
6415             bfadd($bf, $_) for @phone_numbers; # Adding strings one at a time
6416              
6417             bfadd($bf, @phone_numbers); # ...or all at once (faster)
6418              
6419             Returns 1 on success. Dies (croaks) if more strings than capacity is added.
6420              
6421             =head2 bfcheck
6422              
6423             my $phone_number="97713246";
6424             if ( bfcheck($bf, $phone_number) ) {
6425             print "Yes, $phone_number was PROBABLY added\n";
6426             }
6427             else{
6428             print "No, $phone_number was DEFINITELY NOT added\n";
6429             }
6430              
6431             Returns true if C<$phone_number> exists in C<@phone_numbers>.
6432              
6433             Returns false most of the times, but sometimes true*), if C<$phone_number> doesn't exists in C<@phone_numbers>.
6434              
6435             *) This is called a false positive.
6436              
6437             Checking more than one key:
6438              
6439             @bools = bfcheck($bf, @keys); # or ...
6440             @bools = bfcheck($bf, \@keys); # better, uses less memory if @keys is large
6441              
6442             Returns an array the same size as @keys where each element is true or false accordingly.
6443              
6444             =head2 bfgrep
6445              
6446             Same as C except it returns the keys that exists in the bloom filter
6447              
6448             @found = bfgrep($bf, @keys); # or ...
6449             @found = bfgrep($bf, \@keys); # better, uses less memory if @keys is large, or ...
6450             @found = grep bfcheck($bf,$_), @keys; # same but slower
6451              
6452             =head2 bfgrepnot
6453              
6454             Same as C except it returns the keys that do NOT exists in the bloom filter:
6455              
6456             @not_found = bfgrepnot($bf, @keys); # or ...
6457             @not_found = bfgrepnot($bf, \@keys); # better, uses less memory if @keys is large, or ...
6458             @not_found = grep !bfcheck($bf,$_), @keys); # same but slower
6459              
6460             =head2 bfdelete
6461              
6462             Deletes from a counting bloom filter.
6463              
6464             To enable deleting be sure to initialize the bloom filter with the
6465             numeric C argument. The number of bits could be 2 or 3*)
6466             for small filters with a small capacity (a small number of keys), but
6467             setting the number to 4 ensures that even very large filters with very
6468             small error rates would not overflow.
6469              
6470             *) Acme::Tools do not currently support C<< counting_bits => 3 >> so 4
6471             and 8 are the only practical alternatives where 8 is almost always overkill.
6472              
6473             my $bf=bfinit(
6474             error_rate => 0.001,
6475             capacity => 10000000,
6476             counting_bits => 4 # power of 2, that is 2, 4, 8, 16 or 32
6477             );
6478             bfadd( $bf, @unique_phone_numbers);
6479             bfdelete($bf, @unique_phone_numbers);
6480              
6481             Example: examine the frequency of the counters with 4 bit counters and 4 million keys:
6482              
6483             my $bf=bfinit( error_rate=>0.001, capacity=>4e6, counting_bits=>4 );
6484             bfadd($bf,[1e3*$_+1 .. 1e3*($_+1)]) for 0..4000-1; # adding 4 million keys one thousand at a time
6485             my %c; $c{vec($$bf{filter},$_,$$bf{counting_bits})}++ for 0..$$bf{filterlength}-1;
6486             printf "%8d counters = %d\n",$c{$_},$_ for sort{$a<=>$b}keys%c;
6487              
6488             The output:
6489              
6490             28689562 counters = 0
6491             19947673 counters = 1
6492             6941082 counters = 2
6493             1608250 counters = 3
6494             280107 counters = 4
6495             38859 counters = 5
6496             4533 counters = 6
6497             445 counters = 7
6498             46 counters = 8
6499             1 counters = 9
6500              
6501             Even after the error_rate is changed from 0.001 to a percent of that, 0.00001, the limit of 16 (4 bits) is still far away:
6502              
6503             47162242 counters = 0
6504             33457237 counters = 1
6505             11865217 counters = 2
6506             2804447 counters = 3
6507             497308 counters = 4
6508             70608 counters = 5
6509             8359 counters = 6
6510             858 counters = 7
6511             65 counters = 8
6512             4 counters = 9
6513              
6514             In algorithmic terms the number of bits needed is C. Thats why 4 bits (counters up
6515             to 15) is "always" good enough except for extremely large capasities or extremely small error rates.
6516             (Except when adding the same key many times, which should be avoided, and Acme::Tools::bfadd do not
6517             check for that, perhaps in future versions).
6518              
6519             Bloom filters of the counting type are not very space efficient: The tables above shows that 84%-85%
6520             of the counters are 0 or 1. This means most bits are zero-bits. This doesn't have to be a problem if
6521             a counting bloom filter is used to be sent over slow networks because they are very compressable by
6522             common compression tools like I or L and such.
6523              
6524             Deletion of non-existing keys makes C die (croak).
6525              
6526             =head2 bfdelete
6527              
6528             Deletes from a counting bloom filter:
6529              
6530             bfdelete($bf, @keys);
6531             bfdelete($bf, \@keys);
6532              
6533             Returns C<$bf> after deletion.
6534              
6535             Croaks (dies) on deleting a non-existing key or deleting from an previouly overflown counter in a counting bloom filter.
6536              
6537             =head2 bfaddbf
6538              
6539             Adds another bloom filter to a bloom filter.
6540              
6541             Bloom filters has the proberty that bit-wise I-ing the bit-filters
6542             of two filters with the same capacity and the same number and type of
6543             hash functions, adds the filters:
6544              
6545             my $bf1=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[1..500]);
6546             my $bf2=bfinit(error_rate=>0.01,capacity=>$cap,keys=>[501..1000]);
6547              
6548             bfaddbf($bf1,$bf2);
6549              
6550             print "Yes!" if bfgrep($bf1, 1..1000) == 1000;
6551              
6552             Prints yes since C now returns an array of all the 1000 elements.
6553              
6554             Croaks if the filters are of different dimensions.
6555              
6556             Works for counting bloom filters as well (C<< counting_bits=>4 >> e.g.)
6557              
6558             =head2 bfsum
6559              
6560             Returns the number of 1's in the filter.
6561              
6562             my $percent=100*bfsum($bf)/$$bf{filterlength};
6563             printf "The filter is %.1f%% filled\n",$percent; #prints 50.0% or so if filled to capacity
6564              
6565             Sums the counters for counting bloom filters (much slower than for non counting).
6566              
6567             =head2 bfdimensions
6568              
6569             Input, two numeric arguments: Capacity and error_rate.
6570              
6571             Outputs an array of two numbers: m and k.
6572              
6573             m = - n * log(p) / log(2)**2 # n = capacity, m = bits in filter (divide by 8 to get bytes)
6574             k = log(1/p) / log(2) # p = error_rate, uses perls internal log() with base e (2.718)
6575              
6576             ...that is: m = the best number of bits in the filter and k = the best
6577             number of hash functions optimized for the given capacity (n) and
6578             error_rate (p). Note that k is a dependent only of the error_rate. At
6579             about two percent error rate the bloom filter needs just the same
6580             number of bytes as the number of keys.
6581              
6582             Storage (bytes):
6583             Capacity Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate Error-rate
6584             0.000000001 0.00000001 0.0000001 0.000001 0.00001 0.0001 0.001 0.01 0.02141585 0.1 0.5 0.99
6585             ------------- ----------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
6586             10 54.48 48.49 42.5 36.51 30.52 24.53 18.53 12.54 10.56 6.553 2.366 0.5886
6587             100 539.7 479.8 419.9 360 300.1 240.2 180.3 120.4 100.6 60.47 18.6 0.824
6588             1000 5392 4793 4194 3595 2996 2397 1798 1199 1001 599.6 180.9 3.177
6589             10000 5.392e+04 4.793e+04 4.194e+04 3.594e+04 2.995e+04 2.396e+04 1.797e+04 1.198e+04 1e+04 5991 1804 26.71
6590             100000 5.392e+05 4.793e+05 4.193e+05 3.594e+05 2.995e+05 2.396e+05 1.797e+05 1.198e+05 1e+05 5.991e+04 1.803e+04 262
6591             1000000 5.392e+06 4.793e+06 4.193e+06 3.594e+06 2.995e+06 2.396e+06 1.797e+06 1.198e+06 1e+06 5.991e+05 1.803e+05 2615
6592             10000000 5.392e+07 4.793e+07 4.193e+07 3.594e+07 2.995e+07 2.396e+07 1.797e+07 1.198e+07 1e+07 5.991e+06 1.803e+06 2.615e+04
6593             100000000 5.392e+08 4.793e+08 4.193e+08 3.594e+08 2.995e+08 2.396e+08 1.797e+08 1.198e+08 1e+08 5.991e+07 1.803e+07 2.615e+05
6594             1000000000 5.392e+09 4.793e+09 4.193e+09 3.594e+09 2.995e+09 2.396e+09 1.797e+09 1.198e+09 1e+09 5.991e+08 1.803e+08 2.615e+06
6595             10000000000 5.392e+10 4.793e+10 4.193e+10 3.594e+10 2.995e+10 2.396e+10 1.797e+10 1.198e+10 1e+10 5.991e+09 1.803e+09 2.615e+07
6596             100000000000 5.392e+11 4.793e+11 4.193e+11 3.594e+11 2.995e+11 2.396e+11 1.797e+11 1.198e+11 1e+11 5.991e+10 1.803e+10 2.615e+08
6597             1000000000000 5.392e+12 4.793e+12 4.193e+12 3.594e+12 2.995e+12 2.396e+12 1.797e+12 1.198e+12 1e+12 5.991e+11 1.803e+11 2.615e+09
6598              
6599             Error rate: 0.99 Hash functions: 1
6600             Error rate: 0.5 Hash functions: 1
6601             Error rate: 0.1 Hash functions: 3
6602             Error rate: 0.0214158522653385 Hash functions: 6
6603             Error rate: 0.01 Hash functions: 7
6604             Error rate: 0.001 Hash functions: 10
6605             Error rate: 0.0001 Hash functions: 13
6606             Error rate: 0.00001 Hash functions: 17
6607             Error rate: 0.000001 Hash functions: 20
6608             Error rate: 0.0000001 Hash functions: 23
6609             Error rate: 0.00000001 Hash functions: 27
6610             Error rate: 0.000000001 Hash functions: 30
6611              
6612             =head2 bfstore
6613              
6614             Storing and retrieving bloom filters to and from disk uses Ls C and C. This:
6615              
6616             bfstore($bf,'filename.bf');
6617              
6618             It the same as:
6619              
6620             use Storable qw(store retrieve);
6621             ...
6622             store($bf,'filename.bf');
6623              
6624             =head2 bfretrieve
6625              
6626             This:
6627              
6628             my $bf=bfretrieve('filename.bf');
6629              
6630             Or this:
6631              
6632             my $bf=bfinit('filename.bf');
6633              
6634             Is the same as:
6635              
6636             use Storable qw(store retrieve);
6637             my $bf=retrieve('filename.bf');
6638              
6639             =head2 bfclone
6640              
6641             Deep copies the bloom filter data structure. (Which btw is not very deep, two levels at most)
6642              
6643             This:
6644              
6645             my $bfc = bfclone($bf);
6646              
6647             Works just as:
6648              
6649             use Storable;
6650             my $bfc=Storable::dclone($bf);
6651              
6652             =head2 Object oriented interface to bloom filters
6653              
6654             use Acme::Tools;
6655             my $bf=new Acme::Tools::BloomFilter(0.1,1000); # the same as bfinit, see bfinit above
6656             print ref($bf),"\n"; # prints Acme::Tools::BloomFilter
6657             $bf->add(@keys);
6658             $bf->check($keys[0]) and print "ok\n"; # prints ok
6659             $bf->grep(\@keys)==@keys and print "ok\n"; # prints ok
6660             $bf->store('filename.bf');
6661             my $bf2=bfretrieve('filename.bf');
6662             $bf2->check($keys[0]) and print "ok\n"; # still ok
6663              
6664             $bf2=$bf->clone();
6665              
6666             To instantiate a previously stored bloom filter:
6667              
6668             my $bf = Acme::Tools::BloomFilter->new( '/path/to/stored/bloomfilter.bf' );
6669              
6670             The o.o. interface has the same methods as the C-subs without the
6671             C-prefix in the names. The C is not available as a
6672             method, although C, C and
6673             C are synonyms.
6674              
6675             =head2 Internals and speed
6676              
6677             The internal hash-functions are C<< md5( "$key$salt" ) >> from L.
6678              
6679             Since C returns 128 bits and most medium to large sized bloom
6680             filters need only a 32 bit hash function, the result from md5() are
6681             split (C-ed) into 4 parts 32 bits each and are treated as if 4
6682             hash functions was called at once (speedup). Using different salts to
6683             the key on each md5 results in different hash functions.
6684              
6685             Digest::SHA512 would have been even better since it returns more bits,
6686             if it werent for the fact that it's much slower than Digest::MD5.
6687              
6688             String::CRC32::crc32 is faster than Digest::MD5, but not 4 times faster:
6689              
6690             time perl -e'use Digest::MD5 qw(md5);md5("asdf$_") for 1..10e6' #5.56 sec
6691             time perl -e'use String::CRC32;crc32("asdf$_") for 1..10e6' #2.79 sec, faster but not per bit
6692             time perl -e'use Digest::SHA qw(sha512);sha512("asdf$_") for 1..10e6' #36.10 sec, too slow (sha1, sha224, sha256 and sha384 too)
6693              
6694             Md5 seems to be an ok choice both for speed and avoiding collitions due to skewed data keys.
6695              
6696             =head2 Theory and math behind bloom filters
6697              
6698             L
6699              
6700             L
6701              
6702             L
6703              
6704             See also Scaleable Bloom Filters: L (not implemented in Acme::Tools)
6705              
6706             ...and perhaps L
6707              
6708             =cut
6709              
6710             sub bfinit {
6711 24 100   24 1 8064 return bfretrieve(@_) if @_==1;
6712 23 50 66     72 return bfinit(error_rate=>$_[0], capacity=>$_[1]) if @_==2 and 0<$_[0] and $_[0]<1 and $_[1]>1;
      66        
      33        
6713 21 0 33     40 return bfinit(error_rate=>$_[1], capacity=>$_[0]) if @_==2 and 0<$_[1] and $_[1]<1 and $_[0]>1;
      33        
      0        
6714 21         82 require Digest::MD5;
6715 21 50       33 @_%2&&croak "Arguments should be a hash of equal number of keys and values";
6716 21         60 my %arg=@_;
6717 21         47 my @ok_param=qw/error_rate capacity min_hashfuncs max_hashfuncs hashfuncs counting_bits adaptive keys/;
6718 21         62 my @not_ok=sort(grep!in($_,@ok_param),keys%arg);
6719 21 100       216 croak "Not ok param to bfinit: ".join(", ",@not_ok) if @not_ok;
6720 20 50 66     58 croak "Not an arrayref in keys-param" if exists $arg{keys} and ref($arg{keys}) ne 'ARRAY';
6721 20 50       47 croak "Not implemented counting_bits=$arg{counting_bits}, should be 2, 4, 8, 16 or 32" if !in(nvl($arg{counting_bits},1),1,2,4,8,16,32);
6722 20 50 33     56 croak "An bloom filters here can not be in both adaptive and counting_bits modes" if $arg{adaptive} and $arg{counting_bits}>1;
6723 20         103 my $bf={error_rate => 0.001, #default p
6724             capacity => 100000, #default n
6725             min_hashfuncs => 1,
6726             max_hashfuncs => 100,
6727             counting_bits => 1, #default: not counting filter
6728             adaptive => 0,
6729             %arg, #arguments
6730             key_count => 0,
6731             overflow => {},
6732             version => $Acme::Tools::VERSION,
6733             };
6734 20 100 100     278 croak "Error rate ($$bf{error_rate}) should be larger than 0 and smaller than 1" if $$bf{error_rate}<=0 or $$bf{error_rate}>=1;
6735 18 50       29 @$bf{'min_hashfuncs','max_hashfuncs'}=(map$arg{hashfuncs},1..2) if $arg{hashfuncs};
6736 18         34 @$bf{'filterlength','hashfuncs'}=bfdimensions($bf); #m and k
6737 18         756 $$bf{filter}=pack("b*", '0' x ($$bf{filterlength}*$$bf{counting_bits}) ); #hm x new empty filter
6738             $$bf{unpack}= $$bf{filterlength}<=2**16/4 ? "n*" # /4 alleviates skewing if m just slightly < 2**x
6739 18 50       37 :$$bf{filterlength}<=2**32/4 ? "N*"
    100          
6740             : "Q*";
6741 18 100       30 bfadd($bf,@{$arg{keys}}) if $arg{keys};
  8         34  
6742 16         60 return $bf;
6743             }
6744             sub bfaddbf {
6745 2     2 1 3 my($bf,$bf2)=@_;
6746             my $differror=join"\n",
6747             map "Property $_ differs ($$bf{$_} vs $$bf2{$_})",
6748 2         24 grep $$bf{$_} ne $$bf2{$_},
6749             qw/capacity counting_bits adaptive hashfuncs filterlength/; #not error_rate
6750 2 50       5 croak $differror if $differror;
6751 2 50       5 croak "Can not add adaptive bloom filters" if $$bf{adaptive};
6752 2         3 my $count=$$bf{key_count}+$$bf2{key_count};
6753             croak "Exceeded filter capacity $$bf{key_count} + $$bf2{key_count} = $count > $$bf{capacity}"
6754 2 50       4 if $count > $$bf{capacity};
6755 2         3 $$bf{key_count}+=$$bf2{key_count};
6756 2 100       3 if($$bf{counting_bits}==1){
6757 1         3 $$bf{filter} |= $$bf2{filter};
6758             #$$bf{filter} = $$bf{filter} | $$bf2{filter}; #or-ing
6759             }
6760             else {
6761 1         2 my $cb=$$bf{counting_bits};
6762 1         4 for(0..$$bf{filterlength}-1){
6763             my $sum=
6764             vec($$bf{filter}, $_,$cb)+
6765 4793         3625 vec($$bf2{filter},$_,$cb);
6766 4793 50       5265 if( $sum>2**$cb-1 ){
6767 0         0 $sum=2**$cb-1;
6768 0         0 $$bf{overflow}{$_}++;
6769             }
6770 4793         3911 vec($$bf{filter}, $_,$cb)=$sum;
6771 33     33   191 no warnings;
  33         46  
  33         73296  
6772             $$bf{overflow}{$_}+=$$bf2{overflow}{$_}
6773 0         0 and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
6774             and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb)
6775 4793 50 0     6180 if exists $$bf2{overflow}{$_};
      0        
6776             }
6777             }
6778 2         2 return $bf; #for convenience
6779             }
6780             sub bfsum {
6781 8     8 1 26 my($bf)=@_;
6782 8 100       55 return unpack( "%32b*", $$bf{filter}) if $$bf{counting_bits}==1;
6783 3         5 my($sum,$cb)=(0,$$bf{counting_bits});
6784 3         2549 $sum+=vec($$bf{filter},$_,$cb) for 0..$$bf{filterlength}-1;
6785 3         12 return $sum;
6786             }
6787             sub bfadd {
6788 11     11 1 1434 require Digest::MD5;
6789 11         508 my($bf,@keys)=@_;
6790 11 50       21 return if !@keys;
6791 11 100 66     34 my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6792 11         31 my($m,$k,$up,$n,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','capacity','counting_bits','adaptive'};
6793 11         15 for(@$keysref){
6794             #croak "Key should be scalar" if ref($_);
6795 15084 50 66     21894 $$bf{key_count} >= $n and croak "Exceeded filter capacity $n" or $$bf{key_count}++;
6796 15083         7960 my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
  15083         76699  
6797 15083 100 66     32584 if ($cb==1 and !$adaptive) { # normal bloom filter
    50          
    0          
6798 11110         51315 vec($$bf{filter}, $h[$_] % $m, 1) = 1 for 0..$k-1;
6799             }
6800             elsif ($cb>1) { # counting bloom filter
6801 3973         3607 for(0..$k-1){
6802 11919         7943 my $pos=$h[$_] % $m;
6803             my $c=
6804             vec($$bf{filter}, $pos, $cb) =
6805 11919         13826 vec($$bf{filter}, $pos, $cb) + 1;
6806 11919 100       17050 if($c==0){
6807 26         26 vec($$bf{filter}, $pos, $cb) = -1;
6808             $$bf{overflow}{$pos}++
6809 26 100 100     69 and keys(%{$$bf{overflow}})>10 #hmm, arbitrary limit
  2         129  
6810             and croak "Too many overflows, concider doubling counting_bits from $cb to ".(2*$cb);
6811             }
6812             }
6813             }
6814             elsif ($adaptive) { # adaptive bloom filter
6815 0         0 my($i,$key,$bit)=(0+@h,$_);
6816 0         0 for(0..$$bf{filterlength}-1){
6817 0 0       0 $i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
6818 0         0 my $pos=shift(@h) % $m;
6819 0         0 $bit=vec($$bf{filter}, $pos, 1);
6820 0         0 vec($$bf{filter}, $pos, 1)=1;
6821 0 0 0     0 last if $_>=$k-1 and $bit==0;
6822             }
6823             }
6824 0         0 else {croak}
6825             }
6826 9         546 return 1;
6827             }
6828             sub bfcheck {
6829 5     5 1 55 require Digest::MD5;
6830 5         710 my($bf,@keys)=@_;
6831 5 50       12 return if !@keys;
6832 5 50 33     20 my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6833 5         15 my($m,$k,$up,$cb,$adaptive)=@$bf{'filterlength','hashfuncs','unpack','counting_bits','adaptive'};
6834 5         5 my $wa=wantarray();
6835 5 50       7 if(!$adaptive){ # normal bloom filter or counting bloom filter
6836             return map {
6837 5         42 my $match = 1; # match if every bit is on
  24001         13484  
6838 24001         12762 my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
  24001         128523  
6839 24001   100     117531 vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
      100        
6840 24001 50       24041 return $match if !$wa;
6841 24001         25453 $match;
6842             } @$keysref;
6843             }
6844             else { # adaptive bloom filter
6845             return map {
6846 0         0 my($match,$i,$key,$bit,@h)=(1,0,$_);
  0         0  
6847 0         0 for(0..$$bf{filterlength}-1){
6848 0 0       0 $i+=push(@h, unpack $up, Digest::MD5::md5($key,$i)) if !@h;
6849 0         0 my $pos=shift(@h) % $m;
6850 0         0 $bit=vec($$bf{filter}, $pos, 1);
6851 0 0 0     0 $match++ if $_ > $k-1 and $bit==1;
6852 0 0 0     0 $match=0 if $_ <= $k-1 and $bit==0;
6853 0 0       0 last if $bit==0;
6854             }
6855 0 0       0 return $match if !$wa;
6856 0         0 $match;
6857             } @$keysref;
6858             }
6859             }
6860             sub bfgrep { # just a copy of bfcheck with map replaced by grep
6861 8     8 1 430 require Digest::MD5;
6862 8         13 my($bf,@keys)=@_;
6863 8 50       16 return if !@keys;
6864 8 50 33     37 my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6865 8         20 my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6866             return grep {
6867 8         19 my $match = 1; # match if every bit is on
  6100         3595  
6868 6100         3408 my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
  6100         22704  
6869 6100   100     22555 vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
      100        
6870 6100         5861 $match;
6871             } @$keysref;
6872             }
6873             sub bfgrepnot { # just a copy of bfgrep with $match replaced by not $match
6874 1     1 1 5 require Digest::MD5;
6875 1         3 my($bf,@keys)=@_;
6876 1 50       3 return if !@keys;
6877 1 50 33     10 my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6878 1         4 my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6879             return grep {
6880 1         4 my $match = 1; # match if every bit is on
  1000         540  
6881 1000         516 my @h; push @h, unpack $up, Digest::MD5::md5($_,0+@h) while @h<$k;
  1000         3726  
6882 1000   50     3730 vec($$bf{filter}, $h[$_] % $m, $cb) or $match=0 or last for 0..$k-1;
      50        
6883 1000         1115 !$match;
6884             } @$keysref;
6885             }
6886             sub bfdelete {
6887 3     3 1 60 require Digest::MD5;
6888 3         11 my($bf,@keys)=@_;
6889 3 50       7 return if !@keys;
6890 3 100 100     15 my $keysref=@keys==1 && ref($keys[0]) eq 'ARRAY' ? $keys[0] : \@keys;
6891 3         7 my($m,$k,$up,$cb)=@$bf{'filterlength','hashfuncs','unpack','counting_bits'};
6892 3 50       5 croak "Cannot delete from non-counting bloom filter (use counting_bits 4 e.g.)" if $cb==1;
6893 3         5 for my $key (@$keysref){
6894 501         254 my @h; push @h, unpack $up, Digest::MD5::md5($key,0+@h) while @h<$k;
  501         2016  
6895 501 50 33     751 $$bf{key_count}==0 and croak "Deleted all and then some" or $$bf{key_count}--;
6896 501         369 my($ones,$croak,@pos)=(0);
6897 501         431 for(0..$k-1){
6898 1503         1067 my $pos=$h[$_] % $m;
6899             my $c=
6900 1503         1002 vec($$bf{filter}, $pos, $cb);
6901 1503         1411 vec($$bf{filter}, $pos, $cb)=$c-1;
6902 1503 100       1746 $croak="Cannot delete a non-existing key $key" if $c==0;
6903             $croak="Cannot delete with previously overflown position. Try doubleing counting_bits"
6904 1503 50 66     3602 if $c==1 and ++$ones and $$bf{overflow}{$pos};
      66        
6905             }
6906 501 100       662 if($croak){ #rollback
6907             vec($$bf{filter}, $h[$_] % $m, $cb)=
6908 1         6 vec($$bf{filter}, $h[$_] % $m, $cb)+1 for 0..$k-1;
6909 1         142 croak $croak;
6910             }
6911             }
6912 2         14 return $bf;
6913             }
6914             sub bfstore {
6915 1     1 1 13818 require Storable;
6916 1         5 Storable::store(@_);
6917             }
6918             sub bfretrieve {
6919 2     2 1 301 require Storable;
6920 2         6 my $bf=Storable::retrieve(@_);
6921 2 50       344 carp "Retrieved bloom filter was stored in version $$bf{version}, this is version $VERSION" if $$bf{version}>$VERSION;
6922 2         5 return $bf;
6923             }
6924             sub bfclone {
6925 1     1 1 792 require Storable;
6926 1         2578 return Storable::dclone(@_); #could be faster
6927             }
6928             sub bfdimensions_old {
6929             my($n,$p,$mink,$maxk, $k,$flen,$m)=
6930 0 0   0 0 0 @_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'},1)
  0 0       0  
6931             :@_==2 ? (@_,1,100,1)
6932             : croak "Wrong number of arguments (".@_."), should be 2";
6933 0 0 0     0 croak "p ($p) should be > 0 and < 1" if not ( 0<$p && $p<1 );
6934 0   0     0 $m=-1*$_*$n/log(1-$p**(1/$_)) and (!defined $flen or $m<$flen) and ($flen,$k)=($m,$_) for $mink..$maxk;
      0        
      0        
6935 0         0 $flen = int(1+$flen);
6936 0         0 return ($flen,$k);
6937             }
6938             sub bfdimensions {
6939             my($n,$p,$mink,$maxk)=
6940 18 0   18 1 35 @_==1 ? (@{$_[0]}{'capacity','error_rate','min_hashfuncs','max_hashfuncs'})
  18 50       32  
6941             :@_==2 ? (@_,1,100)
6942             : croak "Wrong number of arguments (".@_."), should be 2";
6943 18         42 my $k=log(1/$p)/log(2); # k hash funcs
6944 18         30 my $m=-$n*log($p)/log(2)**2; # m bits in filter
6945 18         38 return ($m+0.5,min($maxk,max($mink,int($k+0.5))));
6946             }
6947              
6948             #crontab -e
6949             #01 4,10,16,22 * * * /usr/bin/perl -MAcme::Tools -e'Acme::Tools::_update_currency_file("/var/www/html/currency-rates")' > /dev/null 2>&1
6950              
6951             sub _update_currency_file { #call from cron
6952 0   0 0   0 my $fn=shift()||'/var/www/html/currency-rates';
6953 0   0     0 my %exe=map+($_=>"/usr/bin/$_"),qw/curl ci/;-x$_ or die for values %exe;
  0         0  
6954 0 0       0 open my $F, '>', $fn or die"ERROR: Could not write file $fn ($!)\n";
6955 0         0 print $F "#-- Currency rates ".localtime()." (".time().")\n";
6956 0         0 print $F "# File generated by Acme::Tools version $VERSION\n";
6957 0         0 print $F "# Updated every 6th hour on http://calthis.com/currency-rates\n";
6958 0         0 print $F "NOK 1.000000000\n";
6959 0         0 my $amount=1000;
6960 0         0 my $data=qx($exe{curl} -s "http://www.x-rates.com/table/?from=NOK&amount=$amount");
6961 0         0 $data=~s,to=([A-Z]{3})(.)>,$2>$1,g;
6962 0         0 my @data=ht2t($data,"Alphabetical order"); shift @data;
  0         0  
6963 0 0       0 @data=map "$$_[1] ".($$_[4]>1e-2?$$_[4]:$$_[2]?sprintf("%.8f",$amount/$$_[2]):0)."\n",@data;
    0          
6964 0         0 my %data=map split,@data;
6965 0         0 my@tc=qx($exe{curl} -s https://btc-e.com/api/3/ticker/btc_usd-ltc_usd)=~/avg.?:(\d+\.?\d*)/g;
6966 0         0 push @data,"BTC ".($tc[0]*$data{USD})."\n";
6967 0         0 push @data,"LTC ".($tc[1]*$data{USD})."\n";
6968 0         0 print $F sort(@data);
6969 0         0 close($F);
6970 0 0       0 qx($exe{ci} -l -m. -d $fn) if -w"$fn,v";
6971             }
6972              
6973             sub ftype {
6974 0     0 0 0 my $f=shift;
6975 0 0 0     0 -e $f and
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
6976             -f$f ? 'file' # -f File is a plain file.
6977             :-d$f ? 'dir' # -d File is a directory.
6978             :-l$f ? 'symlink' # -l File is a symbolic link.
6979             :-p$f ? 'pipe' # -p File is a named pipe (FIFO), or Filehandle is a pipe.
6980             :-S$f ? 'socket' # -S File is a socket.
6981             :-b$f ? 'blockfile' # -b File is a block special file.
6982             :-c$f ? 'charfile' # -c File is a character special file.
6983             :-t$f ? 'ttyfile' # -t Filehandle is opened to a tty.
6984             : ''
6985             or undef;
6986             }
6987              
6988             sub ext2mime {
6989 0     0 0 0 my $ext=shift(); #or filename
6990             #http://www.sitepoint.com/web-foundations/mime-types-complete-list/
6991 0         0 croak "todo: ext2mime not yet implemented";
6992             #return "application/json";#feks
6993             }
6994              
6995             =head1 COMMANDS
6996              
6997             =head2 install_acme_command_tools
6998              
6999             sudo perl -MAcme::Tools -e install_acme_command_tools
7000              
7001             Wrote executable /usr/local/bin/conv
7002             Wrote executable /usr/local/bin/due
7003             Wrote executable /usr/local/bin/xcat
7004             Wrote executable /usr/local/bin/freq
7005             Wrote executable /usr/local/bin/deldup
7006             Wrote executable /usr/local/bin/ccmd
7007             Wrote executable /usr/local/bin/z2z
7008             Wrote executable /usr/local/bin/2gz
7009             Wrote executable /usr/local/bin/2gzip
7010             Wrote executable /usr/local/bin/2bz2
7011             Wrote executable /usr/local/bin/2bzip2
7012             Wrote executable /usr/local/bin/2xz
7013              
7014             Examples of commands then made available:
7015              
7016             conv 1 USD EUR #might show 0.88029 if thats the current currency rate. Uses conv()
7017             conv .5 in cm #reveals that 1/2 inch is 1.27 cm, see doc on conv() for all supported units
7018             due [-h] /path/1/ /path/2/ #like du, but show statistics on file extentions instead of subdirs
7019             xcat file #like cat, zcat, bzcat or xzcat in one. Uses file extention to decide. Uses openstr()
7020             freq file #reads file(s) or stdin and view counts of each byte 0-255
7021             ccmd grep string /huge/file #caches stdout+stderr for 15 minutes (default) for much faster results later
7022             ccmd "sleep 2;echo hello" #slow first time. Note the quotes!
7023             ccmd "du -s ~/*|sort -n|tail" #ccmd store stdout+stderr in /tmp files (default)
7024             z2z [-pvk1-9o -t type] files #convert from/to .gz/bz2/xz files, -p progress, -v verbose (output result),
7025             #-k keep org file, -o overwrite, 1-9 compression degree
7026             #2xz and 2bz2 depends on xz and bzip2 being installed on system
7027             2xz #same as z2z with -t xz
7028             2bz2 #same as z2z with -t bz2
7029             2gz #same as z2z with -t gz
7030              
7031             TODO :
7032             finddup [-v -d -s -h] path1/ path2/
7033             #reports (+deletes with -d) duplicate files
7034             #finddup is NOT IMPLEMENTED YET! Use -s for symlink dups, -h for hardlink
7035             rttop
7036             trunc file(s)
7037             wipe file(s)
7038              
7039             =head3 z2z
7040              
7041             =head3 2xz
7042              
7043             =head3 2bz2
7044              
7045             =head3 2gz
7046              
7047             The commands C<2xz>, C<2bz2> and C<2gz> are just synonyms for C with an implicitly added option C<-t xz>, C<-t xz> or C<-t gz> accordingly.
7048              
7049             z2z [-p -k -v -o -1 -2 -3 -4 -5 -6 -7 -8 -9 ] files
7050              
7051             Converts (recompresses) files from one compression sc
7052              
7053              
7054              
7055             =head3 due
7056              
7057             Like C command but views space used by file extentions instead of dirs. Options:
7058              
7059             due [-options] [dirs] [files]
7060             due -h View bytes "human readable", i.e. C<8.72 MB> instead of C<9145662 b> (bytes)
7061             due -k | -m View bytes in kilobytes | megabytes (1024 | 1048576)
7062             due -K Like -k but uses 1000 instead of 1024
7063             due -z View two extentions if .z .Z .gz .bz2 .rz or .xz (.tar.gz, not just .gz)
7064             due -M Also show min, medium and max date (mtime) of files, give an idea of their age
7065             due -P Also show 10, 50 (medium) and 90 percentile of file date
7066             due -MP Both -M and -P, shows min, 10p, 50p, 90p and max
7067             due -a Sort output alphabetically by extention (default order is by size)
7068             due -c Sort output by number of files
7069             due -i Ignore case, .GZ and .gz is the same, output in lower case
7070             due -t Adds time of day to -M and -P output
7071             due -e 'regex' Exclude files (full path) matching regex. Ex: due -e '\.git'
7072             TODO: due -l TODO: Exclude hardlinks (dont count "same" file more than once, "man du")
7073             ls -l | due Parses output of ls -l, find -ls, tar tvf for size+filename and reports
7074             find | due List of filenames from stdin produces same as just command 'due'
7075             ls | due Reports on just files in current dir without recursing into subdirs
7076              
7077             =cut
7078              
7079             sub install_acme_command_tools {
7080 0     0 1 0 my $dir=(grep -d$_, @_, '/usr/local/bin', '/usr/bin')[0];
7081 0         0 for( qw( conv due xcat freq finddup ccmd trunc wipe rttop z2z 2gz 2gzip 2bz2 2bzip2 2xz ) ){
7082 0         0 unlink("$dir/$_");
7083 0         0 writefile("$dir/$_", "#!$^X\nuse Acme::Tools;\nAcme::Tools::cmd_$_(\@ARGV);\n");
7084 0         0 sys("/bin/chmod +x $dir/$_"); #hm umask
7085 0         0 print "Wrote executable $dir/$_\n";
7086             }
7087             }
7088 0     0 0 0 sub cmd_conv { print conv(@ARGV)."\n" }
7089              
7090             sub cmd_due { #TODO: output from tar tvf and ls and find -ls
7091 0     0 0 0 my %o=_go("zkKmhciMPate:l");
7092 0         0 require File::Find;
7093 33     33   169 no warnings 'uninitialized';
  33         43  
  33         102984  
7094 0 0       0 die"$0: -l not implemented yet\n" if $o{l}; #man du: default is not to count hardlinks more than once, with -l it does
7095 0 0       0 die"$0: -h, -k or -m can not be used together\n" if $o{h}+$o{k}+$o{m}>1;
7096 0 0       0 die"$0: -c and -a can not be used together\n" if $o{a}+$o{c}>1;
7097 0 0       0 die"$0: -k and -m can not be used together\n" if $o{k}+$o{m}>1;
7098 0 0       0 my @q=@ARGV; @q=('.') if !@q;
  0         0  
7099 0         0 my(%c,%b,$cnt,$bts,%mtime);
7100 0 0       0 my $zext=$o{z}?'(\.(z|Z|gz|bz2|xz|rz|kr))?':'';
7101 0         0 my $r=qr/(\.[^\.\/]{1,10}$zext)$/;
7102 0 0       0 my $qrexcl=exists$o{e}?qr/$o{e}/:0;
7103             #TODO: ought to work: tar cf - .|tar tvf -|due
7104             #my $qrstdin=qr/(^| )\-[rwx\-sS]{9} +\d+ \w+ +\w+ +(\d+) [a-zA-Z]+\.? +\d+ +(?:\d\d:\d\d|\d{4}) (.*)$/;
7105 0         0 my $qrstdin=qr/(^| )\-[rwx\-sS]{9} +(\d+ )?\w+[ \/]+\w+ +(\d+) [a-zA-Z]+\.? +\d+ +(?:\d\d:\d\d|\d{4}) (.*)$/;
7106 0 0       0 if(-p STDIN){
7107 0         0 while(<>){
7108 0         0 chomp;
7109 0 0       0 my($sz,$f)=/$qrstdin/?($2,$3):-f$_?(-s$_,$_):next;
    0          
7110 0 0       0 my $ext=$f=~$r?$1:'';
7111 0 0       0 $ext=lc($ext) if $o{i};
7112 0         0 $cnt++; $c{$ext}++;
  0         0  
7113 0         0 $bts+=$sz; $b{$ext}+=$sz;
  0         0  
7114             #$mtime{$ext}.=",$mtime" if
7115 0 0 0     0 $o{M} || $o{P} and die"due: -M and -P not yet implemented for STDIN";
7116             }
7117             }
7118             else { #hm DRY
7119             File::Find::find({wanted =>
7120             sub {
7121 0 0   0   0 return if !-f$_;
7122 0 0 0     0 return if $qrexcl and defined $File::Find::name and $File::Find::name=~$qrexcl;
      0        
7123 0         0 my($sz,$mtime)=(stat($_))[7,9];
7124 0 0       0 my $ext=m/$r/?$1:'';
7125 0 0       0 $ext=lc($ext) if $o{i};
7126 0         0 $cnt++; $c{$ext}++;
  0         0  
7127 0         0 $bts+=$sz; $b{$ext}+=$sz;
  0         0  
7128 0 0 0     0 $mtime{$ext}.=",$mtime" if $o{M} || $o{P};
7129 0         0 1;
7130 0         0 } },@q);
7131             }
7132 0     0   0 my($f,$s)=$o{k}?("%14.2f kb",sub{$_[0]/1024})
7133 0     0   0 :$o{K}?("%14.2f Kb",sub{$_[0]/1000})
7134 0     0   0 :$o{m}?("%14.2f mb",sub{$_[0]/1024**2})
7135 0     0   0 :$o{h}?("%14s", sub{bytes_readable($_[0])})
7136 0 0   0   0 : ("%14d b", sub{$_[0]});
  0 0       0  
    0          
    0          
7137             my @e=$o{a}?(sort(keys%c))
7138 0 0       0 :$o{c}?(sort{$c{$a}<=>$c{$b} or $a cmp $b}keys%c)
7139 0 0       0 : (sort{$b{$a}<=>$b{$b} or $a cmp $b}keys%c);
  0 0       0  
    0          
7140 0     0   0 my $perc=!$o{M}&&!$o{P}?sub{""}:
7141             sub{
7142 0 0   0   0 my @p=$o{P}?(10,50,90):(50);
7143 0         0 my @m=@_>0 ? do {grep$_, split",", $mtime{$_[0]}}
7144 0 0       0 : do {grep$_, map {split","} values %mtime};
  0         0  
  0         0  
7145 0         0 my @r=percentile(\@p,@m);
7146 0 0       0 @r=(min(@m),@r,max(@m)) if $o{M};
7147 0         0 @r=map int($_), @r;
7148 0 0       0 my $fmt=$o{t}?'YYYY/MM/DD-MM:MI:SS':'YYYY/MM/DD';
7149 0         0 @r=map tms($_,$fmt), @r;
7150 0         0 " ".join(" ",@r);
7151 0 0 0     0 };
7152 0         0 printf("%-11s %8d $f %7.2f%%%s\n",$_,$c{$_},&$s($b{$_}),100*$b{$_}/$bts,&$perc($_)) for @e;
7153 0         0 printf("%-11s %8d $f %7.2f%%%s\n","Sum",$cnt,&$s($bts),100,&$perc());
7154             }
7155             sub cmd_xcat {
7156 0     0 0 0 for my $fn (@_){
7157 0         0 my $os=openstr($fn);
7158 0 0 0     0 open my $FH, $os or warn "xcat: cannot open $os ($!)\n" and next;
7159             #binmode($FH);#hm?
7160 0         0 print while <$FH>;
7161 0         0 close($FH);
7162             }
7163             }
7164             sub cmd_freq {
7165 0     0 0 0 my(@f,$i);
7166 0         0 map $f[$_]++, unpack("C*",$_) while <>;
7167 0         0 my $s=" " x 12;map{print"$_$s$_$s$_\n"}("BYTE CHAR COUNT","---- ----- -------");
  0         0  
  0         0  
7168 0         0 my %m=(145,"DOS-æ",155,"DOS-ø",134,"DOS-å",146,"DOS-Æ",157,"DOS-Ø",143,"DOS-Å",map{($_," ")}0..31);
  0         0  
7169 0 0 0     0 printf("%4d %5s%8d".(++$i%3?$s:"\n"),$_,$m{$_}||chr,$f[$_]) for grep$f[$_],0..255;print "\n";
  0         0  
7170 0         0 my @no=grep!$f[$_],0..255; print "No bytes for these ".@no.": ".join(" ",@no)."\n";
  0         0  
7171             }
7172             sub cmd_deldup {
7173 0     0 0 0 cmd_finddup(@_);
7174             }
7175             sub cmd_finddup {
7176             # ~/test/deldup.pl #find and optionally delete duplicate files effiencently
7177             #http://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash
7178 0     0 0 0 die "todo: finddup not ready yet"
7179             }
7180             #http://stackoverflow.com/questions/11900239/can-i-cache-the-output-of-a-command-on-linux-from-cli
7181             our $Ccmd_cache_dir='/tmp/acme-tools-ccmd-cache';
7182             our $Ccmd_cache_expire=15*60; #default 15 minutes
7183             sub cmd_ccmd {
7184 0     0 0 0 require Digest::MD5;
7185 0         0 my $cmd=join" ",@_;
7186 0         0 my $d="$Ccmd_cache_dir/".username();
7187 0         0 makedir($d);
7188 0         0 my $md5=Digest::MD5::md5_hex($cmd);
7189 0         0 my($fno,$fne)=map"$d/cmd.$md5.std$_","out","err";
7190 0     0   0 my $too_old=sub{time()-(stat(shift))[9] >= $Ccmd_cache_expire};
  0         0  
7191 0         0 unlink grep &$too_old($_), <$d/*.std???>;
7192 0 0 0     0 sys("($cmd) > $fno 2> $fne") if !-e$fno or &$too_old($fno);
7193 0         0 print STDOUT "".readfile($fno);
7194 0         0 print STDERR "".readfile($fne);
7195             }
7196              
7197 0     0 0 0 sub cmd_trunc { die "todo: trunc not ready yet"} #truncate a file, size 0, keep all other attr
7198              
7199             sub cmd_wipe {
7200 0     0 0 0 my %o=_go("n:k");
7201 0         0 wipe($_,$o{n},$o{k}) for @_;
7202             }
7203              
7204 0     0 0 0 sub cmd_2gz {cmd_z2z("-t","gz", @_)}
7205 0     0 0 0 sub cmd_2gzip {cmd_z2z("-t","gz", @_)}
7206 0     0 0 0 sub cmd_2bz2 {cmd_z2z("-t","bz2",@_)}
7207 0     0 0 0 sub cmd_2bzip2 {cmd_z2z("-t","bz2",@_)}
7208 0     0 0 0 sub cmd_2xz {cmd_z2z("-t","xz", @_)}
7209             #todo?: sub cmd_7z
7210             sub cmd_z2z {
7211 8     8 0 18171 local @ARGV=@_;
7212 8         35 my %o=_go("pt:kvhon123456789");
7213 8         60 my $t=repl(lc$o{t},qw/gzip gz bzip2 bz2/);
7214 8 50       60 die "due: unknown compression type $o{t}, known are gz, bz2 and xz" if $t!~/^(gz|bz2|xz)$/;
7215 8         180 my $sum=sum(map -s$_,@ARGV);
7216 8 100 66     60 print "Converting ".@ARGV." files, total ".bytes_readable($sum)."\n" if $o{v} and @ARGV>1;
7217 8         17 my $cat='cat';
7218 8 0       20 if($o{p}){ if(qx(which pv)){ $cat='pv' } else { warn repl(<<"",qr/^\s+/) } }
  0 50       0  
  0         0  
  0         0  
7219             due: pv for -p not found, install with sudo yum install pv, sudo apt-get install pv or similar
7220              
7221 8         16 my $sumnew=0;
7222 8         34 my $start=time_fp();
7223 8         11 my($i,$bsf)=(0,0);#bytes so far
7224 8         24 $Eta{'z2z'}=[];eta('z2z',0,$sum);
  8         36  
7225 8         13 for(@ARGV){
7226 11 50       17 my $new=$_; $new=~s/(\.(gz|bz2|xz))?$/.$t/i or die;
  11         138  
7227 11 100       37 my $ext=defined($2)?lc($2):'';
7228 11 50 33     172 my $same=/^$new$/; $new.=".tmp" if $same; die if $o{k} and $same;
  11 50       18  
  11         69  
7229 11 50 33     141 next if !-e$_ and warn"$_ do not exists\n";
7230 11 50 33     84 next if !-r$_ and warn"$_ is not readable\n";
7231 11 0 33     149 next if -e$new and !$o{o} and warn"$new already exists, skipping (use -o to overwrite)\n";
      33        
7232 11   100     167 my $unz={qw/gz gunzip bz2 bunzip2 xz unxz/}->{$ext}||'';
7233             #todo: my $cntfile="/tmp/acme-tools-z2z-wc-c.$$";
7234             #todo: my $cnt="tee >(wc -c>$cntfile)" if $ENV{SHELL}=~/bash/ and $o{v}; #hm dash vs bash
7235 11         44 my $z= {qw/gz gzip bz2 bzip2 xz xz/}->{$t};
7236 11         68 $z.=" -$_" for grep$o{$_},1..9;
7237 11         38 my $cmd="$cat $_|$unz|$z>$new";
7238             #todo: "$cat $_|$unz|$cnt|$z>$new";
7239             #cat /tmp/kontroll-linux.xz|unxz|tee >(wc -c>/tmp/p)|gzip|wc -c;cat /tmp/p
7240 11         56 $cmd=~s,\|+,|,g; #print "cmd: $cmd\n";
7241 11         50 sys($cmd);
7242 11 50 50     118 chall($_,$new)||die if !$o{n};
7243 11         42 my($szold,$sznew)=map{-s$_}($_,$new);
  22         206  
7244 11         80 $bsf+=-s$_;
7245 11 50       692 unlink $_ if !$o{k};
7246 11 50 0     24 rename($new, replace($new,qr/.tmp$/)) or die if $same;
7247 11 50       25 if($o{v}){
7248 11         21 $sumnew+=$sznew;
7249 11         168 my $pr=sprintf"%0.1f%%",100*$sznew/$szold;
7250             #todo: my $szuncmp=-s$cntfile&&time()-(stat($cntfile))[9]<10 ? qx(cat $cntfile) : '';
7251             #todo: $o{h} ? printf("%6.1f%% %9s => %9s => %9s %s\n", $pr,(map bytes_readable($_),$szold,$szuncmp,$sznew),$_)
7252             #todo: : printf("%6.1f%% %11d b => %11d b => %11 b %s\n",$pr,$szold,$szuncmp,$sznew,$_)
7253             my $str= $o{h}
7254 11 100       87 ? sprintf("%-7s %9s => %9s", $pr,(map bytes_readable($_),$szold,$sznew))
7255             : sprintf("%-7s %11d b => %11d b", $pr,$szold,$sznew);
7256 11 100       30 if(@ARGV>1){
7257 4         10 $i++;
7258 4 0       12 $str=$i<@ARGV
    50          
7259             ? " ETA:".sec_readable(eta('z2z',$bsf,$sum)-time_fp())." $str"
7260             : " TA: 0s $str"
7261             if $sum>1e6;
7262 4         19 $str="$i/".@ARGV." $str";
7263             }
7264 11         341 print "$str $new\n";
7265             }
7266             }
7267 8 100 66     142 if($o{v} and @ARGV>1){
7268 1 50       17 my $bytes=$o{h}?'':'bytes ';
7269             my $str=
7270             sprintf "%d files compressed in %.3f seconds from %s to %s $bytes (%s bytes) %.1f%% of original\n",
7271             0+@ARGV,
7272             time_fp()-$start,
7273 1 50       11 (map{$o{h}?bytes_readable($_):$_}($sum,$sumnew,$sumnew-$sum)),
  3         10  
7274             100*$sumnew/$sum;
7275 1         8 $str=~s,\((\d),(+$1,;
7276 1         30 print $str;
7277             }
7278             }
7279              
7280 8     8   1802 sub _go { require Getopt::Std; my %o; Getopt::Std::getopts(shift() => \%o); %o }
  8         51  
  8         48  
  8         560  
7281              
7282 0     0 0 0 sub cmd_rttop { die "rttop: not implemented here yet.\n" }
7283 0     0 0 0 sub cmd_whichpm { die "whichpm: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
7284 0     0 0 0 sub cmd_catal { die "catal: not implemented here yet.\n" } #-a (all, inkl VERSION og ls -l)
7285             #todo: cmd_tabdiff (fra sonyk)
7286             #todo: cmd_catlog (ala catal med /etc/catlog.conf, default er access_log)
7287              
7288             =head1 DATABASE STUFF - NOT IMPLEMENTED YET
7289              
7290             Uses L. Comming soon...
7291              
7292             $Dbh
7293             dlogin
7294             dlogout
7295             drow
7296             drows
7297             drowc
7298             drowsc
7299             dcols
7300             dpk
7301             dsel
7302             ddo
7303             dins
7304             dupd
7305             ddel
7306             dcommit
7307             drollback
7308              
7309             =cut
7310              
7311             #my$dummy=<<'SOON';
7312             sub dtype {
7313 0     0 0 0 my $connstr=shift;
7314 0 0       0 return 'SQLite' if $connstr=~/(\.sqlite|sqlite:.*\.db)$/i;
7315 0 0       0 return 'Oracle' if $connstr=~/\@/;
7316 0         0 return 'Pg' if 1==2;
7317 0         0 die;
7318             }
7319              
7320             our($Dbh,@Dbh,%Sth);
7321             our %Dbattr=(RaiseError => 1, AutoCommit => 0); #defaults
7322             sub dlogin {
7323 0     0 0 0 my $connstr=shift();
7324 0         0 my %attr=(%Dbattr,@_);
7325 0         0 my $type=dtype($connstr);
7326 0         0 my($dsn,$u,$p)=('','','');
7327 0 0       0 if($type eq 'SQLite'){
    0          
    0          
7328 0         0 $dsn=$connstr;
7329             }
7330             elsif($type eq 'Oracle'){
7331 0         0 ($u,$p,$dsn)=($connstr=~m,(.+?)(/.+?)?\@(.+),);
7332             }
7333             elsif($type eq 'Pg'){
7334 0         0 croak "todo";
7335             }
7336             else{
7337 0         0 croak "dblogin: unknown database type for connection string $connstr\n";
7338             }
7339 0         0 $dsn="dbi:$type:$dsn";
7340 0 0       0 push @Dbh, $Dbh if $Dbh; #local is better?
7341 0         0 require DBI;
7342 0         0 $Dbh=DBI->connect($dsn,$u,$p,\%attr); #connect_cached?
7343             }
7344             sub dlogout {
7345 0     0 0 0 $Dbh->disconnect;
7346 0 0       0 $Dbh=pop@Dbh if @Dbh;
7347             }
7348             sub drow {
7349 0     0 0 0 my($q,@b)=_dattrarg(@_);
7350             #my $sth=do{$Sth{$Dbh,$q} ||= $Dbh->prepare_cached($q)};
7351 0         0 my $sth=$Dbh->prepare_cached($q);
7352 0         0 $sth->execute(@b);
7353 0         0 my @r=$sth->fetchrow_array;
7354 0 0       0 $sth->finish if $$Dbh{Driver}{Name} eq 'SQLite';
7355             #$dbh->selectrow_array($statement);
7356 0 0       0 return @r==1?$r[0]:@r;
7357             }
7358       0 0   sub drows {
7359             }
7360       0 0   sub drowc {
7361             }
7362       0 0   sub drowsc {
7363             }
7364       0 0   sub dcols {
7365             }
7366       0 0   sub dpk {
7367             }
7368       0 0   sub dsel {
7369             }
7370             sub ddo {
7371 0     0 0 0 my @arg=_dattrarg(@_);
7372             #warn serialize(\@arg,'arg','',1);
7373 0         0 $Dbh->do(@arg); #hm cache?
7374             }
7375       0 0   sub dins {
7376             }
7377       0 0   sub dupd {
7378             }
7379       0 0   sub ddel {
7380             }
7381 0     0 0 0 sub dcommit { $Dbh->commit }
7382 0     0 0 0 sub drollback { $Dbh->rollback }
7383              
7384             sub _dattrarg {
7385 0     0   0 my @arg=@_;
7386 0 0       0 splice @arg,1,0, ref($arg[-1]) eq 'HASH' ? pop(@arg) : {};
7387 0         0 @arg;
7388             }
7389              
7390             =head2 self_update
7391              
7392             Update Acme::Tools to newest version quick and dirty:
7393              
7394             function pmview(){ ls -ld `perl -M$1 -le'$m=shift;$mi=$m;$mi=~s,::,/,g;print $INC{"$mi.pm"};warn"Version ".${$m."::VERSION"}."\n"' $1`;}
7395              
7396             pmview Acme::Tools #view date and version before
7397             sudo perl -MAcme::Tools -e Acme::Tools::self_update #update to newest version
7398             pmview Acme::Tools #view date and version after
7399              
7400             Does C to where Acme/Tools.pm are and then wget -N https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm
7401              
7402             TODO: cmd_acme_tools_self_update, accept --no-check-certificate to use on curl
7403              
7404             =cut
7405              
7406             our $Wget;
7407             our $Self_update_url='https://raw.githubusercontent.com/kjetillll/Acme-Tools/master/Tools.pm'; #todo: change site
7408             sub self_update {
7409             #in($^O,'linux','cygwin') or die"ERROR: self_update works on linux and cygwin only";
7410 0   0 0 1 0 $Wget||=(grep -x$_,map"$_/wget",'/usr/bin','/bin','/usr/local/bin','.')[0]; #hm --no-check-certificate
7411 0 0       0 -x$Wget or die"ERROR: wget ($Wget) executable not found\n";
7412 0         0 my $d=dirname(__FILE__);
7413 0         0 sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
7414 0   0     0 sys("cd $d; $Wget -N ".($ARGV[0]||$Self_update_url));
7415 0         0 sys("cd $d; ls -l Tools.pm; md5sum Tools.pm");
7416             }
7417              
7418             1;
7419              
7420             package Acme::Tools::BloomFilter;
7421 33     33   848 use 5.008; use strict; use warnings; use Carp;
  33     33   101  
  33     33   138  
  33     33   40  
  33         793  
  33         104  
  33         39  
  33         916  
  33         110  
  33         29  
  33         13885  
7422 2     2   185 sub new { my($class,@p)=@_; my $self=Acme::Tools::bfinit(@p); bless $self, $class }
  2         6  
  2         8  
7423 2     2   55 sub add { &Acme::Tools::bfadd }
7424 0     0   0 sub addbf { &Acme::Tools::bfaddbf }
7425 2     2   52 sub check { &Acme::Tools::bfcheck }
7426 1     1   3 sub grep { &Acme::Tools::bfgrep }
7427 0     0   0 sub grepnot { &Acme::Tools::bfgrepnot }
7428 0     0   0 sub delete { &Acme::Tools::bfdelete }
7429 0     0   0 sub store { &Acme::Tools::bfstore }
7430 0     0   0 sub retrieve { &Acme::Tools::bfretrieve }
7431 1     1   3 sub clone { &Acme::Tools::bfclone }
7432 0     0   0 sub sum { &Acme::Tools::bfsum }
7433             1;
7434              
7435             # Ny versjon:
7436             # + c-s todo
7437             # + endre $VERSION
7438             # + endre Release history under HISTORY
7439             # + endre årstall under COPYRIGHT AND LICENSE
7440             # + oppd default valutakurser inkl datoen
7441             # + emacs Changes
7442             # + emacs README + aarstall
7443             # + emacs MANIFEST legg til ev nye t/*.t
7444             # + perl Makefile.PL;make test
7445             # + /usr/bin/perl Makefile.PL;make test
7446             # + perlbrew exec "perl ~/Acme-Tools/Makefile.PL ; time make test"
7447             # + perlbrew use perl-5.10.1; perl Makefile.PL; make test; perlbrew off
7448             # + test evt i cygwin og mingw-perl
7449             # + pod2html Tools.pm > Tools.html ; firefox Tools.html
7450             # + https://metacpan.org/pod/Acme::Tools
7451             # + http://cpants.cpanauthors.org/dist/Acme-Tools #kvalitee
7452             # + perl Makefile.PL ; make test && make dist
7453             # + cp -p *tar.gz /htdocs/
7454             # + ci -l -mversjon -d `cat MANIFEST`
7455             # + git add `cat MANIFEST`
7456             # + git status
7457             # + git commit -am versjon
7458             # + git push #eller:
7459             # + git push origin master
7460             # + http://pause.perl.org/
7461             # + tegnsett/utf8-kroell
7462             # http://en.wikipedia.org/wiki/Birthday_problem#Approximations
7463              
7464             # memoize_expire() http://perldoc.perl.org/Memoize/Expire.html
7465             # memoize_file_expire()
7466             # memoize_limit_size() #lru
7467             # memoize_file_limit_size()
7468             # memoize_memcached http://search.cpan.org/~dtrischuk/Memoize-Memcached-0.03/lib/Memoize/Memcached.pm
7469             # hint on http://perl.jonallen.info/writing/articles/install-perl-modules-without-root
7470              
7471             # sub mycrc32 { #http://billauer.co.il/blog/2011/05/perl-crc32-crc-xs-module/ eller String::CRC32::crc32 som er 100 x raskere enn Digest::CRC::crc32
7472             # my ($input, $init_value, $polynomial) = @_;
7473             # $init_value = 0 unless (defined $init_value);
7474             # $polynomial = 0xedb88320 unless (defined $polynomial);
7475             # my @lookup_table;
7476             # for (my $i=0; $i<256; $i++) {
7477             # my $x = $i;
7478             # for (my $j=0; $j<8; $j++) {
7479             # if ($x & 1) {
7480             # $x = ($x >> 1) ^ $polynomial;
7481             # } else {
7482             # $x = $x >> 1;
7483             # }
7484             # }
7485             # push @lookup_table, $x;
7486             # }
7487             # my $crc = $init_value ^ 0xffffffff;
7488             # foreach my $x (unpack ('C*', $input)) {
7489             # $crc = (($crc >> 8) & 0xffffff) ^ $lookup_table[ ($crc ^ $x) & 0xff ];
7490             # }
7491             # $crc = $crc ^ 0xffffffff;
7492             # return $crc;
7493             # }
7494             #
7495              
7496              
7497              
7498             =head1 HISTORY
7499              
7500             Release history
7501              
7502             0.21 Mar 2017 Improved nicenum() and its tests
7503            
7504             0.20 Mar 2017 Subs: a2h cnttbl h2a log10 log2 nicenum rstddev sec_readable
7505             throttle timems refa refaa refah refh refha refhh refs
7506             eachr globr keysr popr pushr shiftr splicer unshiftr valuesr
7507             Commands: 2bz2 2gz 2xz z2z
7508            
7509             0.172 Dec 2015 Subs: curb openstr pwgen sleepms sleepnm srlz tms username
7510             self_update install_acme_command_tools
7511             Commands: conv due freq wipe xcat (see "Commands")
7512            
7513             0.16 Feb 2015 bigr curb cpad isnum parta parth read_conf resolve_equation
7514             roman2int trim. Improved: conv (numbers currency) range ("derivatives")
7515            
7516             0.15 Nov 2014 Improved doc
7517             0.14 Nov 2014 New subs, improved tests and doc
7518             0.13 Oct 2010 Non-linux test issue, resolve. improved: bloom filter, tests, doc
7519             0.12 Oct 2010 Improved tests, doc, bloom filter, random_gauss, bytes_readable
7520             0.11 Dec 2008 Improved doc
7521             0.10 Dec 2008
7522              
7523             =head1 SEE ALSO
7524              
7525             L
7526              
7527             =head1 AUTHOR
7528              
7529             Kjetil Skotheim, Ekjetil.skotheim@gmail.comE
7530              
7531             =head1 COPYRIGHT
7532              
7533             2008-2017, Kjetil Skotheim
7534              
7535             =head1 LICENSE
7536              
7537             This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
7538              
7539             =cut