| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Web::SIVA; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 15754 | use warnings; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 29 |  | 
| 4 | 1 |  |  | 1 |  | 4 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 25 |  | 
| 5 | 1 |  |  | 1 |  | 6 | use Carp; | 
|  | 1 |  |  |  |  | 7 |  | 
|  | 1 |  |  |  |  | 74 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 1 |  |  | 1 |  | 396 | use version; our $VERSION = qv('0.0.6'); | 
|  | 1 |  |  |  |  | 1526 |  | 
|  | 1 |  |  |  |  | 6 |  | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 1 |  |  | 1 |  | 560 | use Mojo::DOM; | 
|  | 1 |  |  |  |  | 96904 |  | 
|  | 1 |  |  |  |  | 33 |  | 
| 10 | 1 |  |  | 1 |  | 526 | use LWP::Simple; | 
|  | 1 |  |  |  |  | 40591 |  | 
|  | 1 |  |  |  |  | 7 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | our @provincias = qw(al ca ma gr se co hu ja); | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | our %provincias = map { $_ => 1 } @provincias; | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | our @meses = qw(ene feb mar abr may jun jul ago sep oct nov dic); | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | our $base_url = "http://www.juntadeandalucia.es/medioambiente/atmosfera/informes_siva/"; | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | # Module implementation here | 
| 21 |  |  |  |  |  |  | sub new { | 
| 22 | 1 |  |  | 1 | 1 | 9 | my $class = shift; | 
| 23 | 1 |  | 33 |  |  | 3 | my $province = shift || croak "Necesito una provincia"; | 
| 24 |  |  |  |  |  |  |  | 
| 25 | 1 |  |  |  |  | 4 | return bless { _province => $province}, $class; | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | } | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | sub day { | 
| 30 | 6 |  |  | 6 | 1 | 6034 | my $self = shift; | 
| 31 | 6 |  |  |  |  | 15 | my ($dia, $mes, $year ) = @_; | 
| 32 | 6 |  |  |  |  | 19 | my $year_digits = substr($year,2,2); | 
| 33 | 6 |  |  |  |  | 16 | my $provincia = $self->{'_province'}; | 
| 34 | 6 | 50 |  |  |  | 26 | if ( ! $provincias{$provincia} ) { | 
| 35 | 0 |  |  |  |  | 0 | croak "$provincia is not one of the 8 provinces"; | 
| 36 |  |  |  |  |  |  | } | 
| 37 | 6 |  |  |  |  | 39 | my $date =  sprintf("%02d%02d%02d",$year_digits,$mes,$dia); | 
| 38 | 6 |  |  |  |  | 20 | my $fecha = sprintf("%04d-%02d-%02d", $year, $mes, $dia ); | 
| 39 | 6 |  |  |  |  | 8 | my @datos; | 
| 40 |  |  |  |  |  |  |  | 
| 41 | 6 | 100 | 66 |  |  | 50 | if ( ($year < 2004) || ( $year == 2004 && $mes == 1 && $dia < 11 ) ) { | 
|  |  |  | 100 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
| 42 | 4 |  |  |  |  | 21 | my $url = $base_url."$meses[$mes-1]$year_digits/n$provincia$date.txt"; | 
| 43 | 4 |  |  |  |  | 20 | my $content = get( $url ); | 
| 44 | 4 | 50 |  |  |  | 1001132 | if ( $content ) { | 
| 45 | 4 |  |  |  |  | 102 | my @tables; | 
| 46 | 4 | 100 |  |  |  | 31 | if ( $content =~ /Ambiental/ ) { | 
| 47 | 2 |  |  |  |  | 1116 | @tables = ($content =~ /Ambiental\s+(.+?)\s+Nota/gs); | 
| 48 |  |  |  |  |  |  | } else { | 
| 49 | 2 |  |  |  |  | 2467 | @tables = split(/\s+\n\s+\n\s+\n/, $content); | 
| 50 |  |  |  |  |  |  | } | 
| 51 | 4 |  |  |  |  | 12 | shift @tables; # unneeded first row | 
| 52 | 4 |  |  |  |  | 14 | for my $t (@tables) { | 
| 53 | 16 |  |  |  |  | 1332 | my @lines = grep( /\S+/, split("\n", $t ) ); # Only non-empty | 
| 54 | 16 | 100 |  |  |  | 102 | next if $lines[$#lines] =~ /Fecha/; # No data | 
| 55 | 12 | 100 |  |  |  | 69 | if ( $lines[$#lines] =~ /unidades/ ) { | 
| 56 | 8 |  |  |  |  | 10 | pop @lines; | 
| 57 | 8 |  |  |  |  | 11 | pop @lines; | 
| 58 |  |  |  |  |  |  | } | 
| 59 | 12 |  |  |  |  | 49 | my $this_metadata = { date => $fecha."T00:00" }; | 
| 60 | 12 |  |  |  |  | 12 | my @metadatos; | 
| 61 | 12 |  |  |  |  | 88 | push @metadatos, ( $lines[0] =~ /Provincia\s*:\s+(\w+)\s+Estacion\s*:\s+(.+)/ ); | 
| 62 | 12 |  |  |  |  | 63 | push @metadatos, ( $lines[1] =~ /Municipio\s*:\s+(\w+)\s+Direccion\s*:\s+(.+)/ ); | 
| 63 | 12 |  |  |  |  | 20 | for my $k (qw(provincia estacion municipio direccion)) { | 
| 64 | 48 |  |  |  |  | 68 | $this_metadata->{$k} = shift @metadatos; | 
| 65 |  |  |  |  |  |  | } | 
| 66 | 12 |  |  |  |  | 149 | my (@cabeceras) = split( /\s+/, $lines[2]); | 
| 67 | 12 |  |  |  |  | 13 | shift @cabeceras; #Date goes first | 
| 68 | 12 |  |  |  |  | 34 | for (my $l =  3; $l <= $#lines; $l++ ) { | 
| 69 | 1248 |  |  |  |  | 831 | my %these_medidas = %{$this_metadata}; | 
|  | 1248 |  |  |  |  | 4082 |  | 
| 70 | 1248 |  |  |  |  | 1183 | my @columnas; | 
| 71 | 1248 | 100 |  |  |  | 2264 | if ( $lines[$l] =~ /:/ ) { | 
| 72 | 1152 |  |  |  |  | 3955 | @columnas = split( /\t/, $lines[$l]); | 
| 73 | 1152 |  |  |  |  | 1002 | my $fecha_hora = shift @columnas; | 
| 74 | 1152 |  |  |  |  | 3786 | my ($hora) = ($fecha_hora =~ /(\d+:\d+)/); | 
| 75 | 1152 | 50 |  |  |  | 1626 | if ( !$hora ) { | 
| 76 | 0 |  |  |  |  | 0 | carp "Problemas con el formato en $l $lines[$l] $fecha"; | 
| 77 | 0 |  |  |  |  | 0 | next; | 
| 78 |  |  |  |  |  |  | } | 
| 79 | 1152 |  |  |  |  | 2684 | $these_medidas{'date'} =~ s/00:00/$hora/; | 
| 80 |  |  |  |  |  |  | } else { #Different format | 
| 81 | 96 |  |  |  |  | 262 | my ($fecha_hora, $resto) = ($lines[$l] =~ /(\S+  \d+)\s{3}(.+)/); | 
| 82 | 96 | 50 |  |  |  | 121 | if ( !$resto ) { | 
| 83 | 0 |  |  |  |  | 0 | carp "Problemas con formato en $l => $lines[$l]"; | 
| 84 |  |  |  |  |  |  | } | 
| 85 | 96 |  |  |  |  | 390 | @columnas= split(/\s{7}/, $resto); | 
| 86 | 96 |  |  |  |  | 150 | my ($this_date, $hour) = split(/\s+/, $fecha_hora); | 
| 87 | 96 |  |  |  |  | 122 | my ($this_day,$mon,$year) = split("/", $this_date); | 
| 88 | 96 |  |  |  |  | 277 | $these_medidas{'date'} = sprintf("%04d-%02d-%02dT%02d:00", $year+1900,$mon,$this_day,$hour); | 
| 89 |  |  |  |  |  |  | } | 
| 90 | 1248 |  |  |  |  | 1325 | for my $c ( @cabeceras ) { | 
| 91 | 5904 |  |  |  |  | 6920 | $these_medidas{$c} = shift @columnas; | 
| 92 | 5904 | 100 |  |  |  | 7923 | next if !$these_medidas{$c}; | 
| 93 | 5885 |  |  |  |  | 5638 | $these_medidas{$c} =~ s/\.//; | 
| 94 | 5885 |  |  |  |  | 5212 | $these_medidas{$c} =~ s/,/./; | 
| 95 | 5885 |  |  |  |  | 10027 | $these_medidas{$c} = 0 + $these_medidas{$c}; | 
| 96 |  |  |  |  |  |  | } | 
| 97 | 1248 |  |  |  |  | 3341 | push @datos, \%these_medidas; | 
| 98 |  |  |  |  |  |  | } | 
| 99 |  |  |  |  |  |  | } | 
| 100 |  |  |  |  |  |  | } | 
| 101 |  |  |  |  |  |  | } else { | 
| 102 | 2 |  |  |  |  | 15 | my $url = $base_url."$meses[$mes-1]$year_digits/n$provincia$date.htm"; | 
| 103 |  |  |  |  |  |  |  | 
| 104 | 2 |  |  |  |  | 12 | my $content = get( $url ); | 
| 105 |  |  |  |  |  |  |  | 
| 106 | 2 | 50 | 33 |  |  | 349078 | if  ( $content and $content =~ m{$year | 
| 107 | 2 |  |  |  |  | 25 | my $dom = Mojo::DOM->new( $content ); | 
| 108 |  |  |  |  |  |  |  | 
| 109 | 2 |  |  |  |  | 464119 | my @tables = $dom->find('table')->each; | 
| 110 |  |  |  |  |  |  |  | 
| 111 | 2 |  |  |  |  | 99911 | shift @tables; #Primera tabla con leyenda | 
| 112 |  |  |  |  |  |  |  | 
| 113 | 2 |  |  |  |  | 16 | while ( @tables ) { | 
| 114 | 8 |  |  |  |  | 17 | my $metadatos = shift @tables; | 
| 115 | 8 | 50 |  |  |  | 24 | next if !@tables; | 
| 116 | 8 |  |  |  |  | 14 | my $medidas = shift @tables; | 
| 117 |  |  |  |  |  |  |  | 
| 118 | 8 |  |  |  |  | 46 | my @metadatos = ( $metadatos =~ /.([A-Z][^<]+)/g); | 
| 119 | 8 |  |  |  |  | 5230 | my $this_metadata = { date => $fecha }; | 
| 120 | 8 |  |  |  |  | 22 | for my $k (qw(provincia municipio estacion direccion)) { | 
| 121 | 32 |  |  |  |  | 51 | $this_metadata->{$k} = shift @metadatos; | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  |  | 
| 124 | 8 |  |  |  |  | 29 | my @filas = $medidas->find('tr')->each; | 
| 125 |  |  |  |  |  |  |  | 
| 126 | 8 |  |  |  |  | 125856 | shift @filas; #Cabecera | 
| 127 | 8 |  |  |  |  | 25 | pop @filas; | 
| 128 | 8 |  |  |  |  | 22 | for my $f (@filas) { | 
| 129 | 1152 |  |  |  |  | 2359 | my @columnas = $f->find('td')->map('text')->each; | 
| 130 | 1152 |  |  |  |  | 403675 | my %these_medidas = %{$this_metadata}; | 
|  | 1152 |  |  |  |  | 3955 |  | 
| 131 | 1152 |  |  |  |  | 1372 | my $fecha_hora = shift @columnas; | 
| 132 | 1152 |  |  |  |  | 5003 | my ($hora) = ($fecha_hora =~ /(\d+:\d+)/); | 
| 133 | 1152 | 50 |  |  |  | 1740 | if ( !$hora ) { | 
| 134 | 0 |  |  |  |  | 0 | carp "Problemas con el formato en $f $fecha"; | 
| 135 |  |  |  |  |  |  | } | 
| 136 | 1152 |  |  |  |  | 1656 | $these_medidas{'date'} =~ s/00:00/$hora/; | 
| 137 | 1152 |  |  |  |  | 1285 | for my $c (qw(SO2 PART NO2 CO O3)) { | 
| 138 | 5760 |  |  |  |  | 6813 | $these_medidas{$c} = shift @columnas; | 
| 139 |  |  |  |  |  |  | } | 
| 140 | 1152 |  |  |  |  | 7614 | push @datos, \%these_medidas; | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  | } | 
| 144 |  |  |  |  |  |  | } | 
| 145 | 6 |  |  |  |  | 52 | return \@datos; | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | "We want air"; # Magic true value required at end of module | 
| 149 |  |  |  |  |  |  | __END__ |