line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Web::SIVA; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
15754
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
29
|
|
4
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
25
|
|
5
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
74
|
|
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
396
|
use version; our $VERSION = qv('0.0.6'); |
|
1
|
|
|
|
|
1526
|
|
|
1
|
|
|
|
|
6
|
|
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
560
|
use Mojo::DOM; |
|
1
|
|
|
|
|
96904
|
|
|
1
|
|
|
|
|
33
|
|
10
|
1
|
|
|
1
|
|
526
|
use LWP::Simple; |
|
1
|
|
|
|
|
40591
|
|
|
1
|
|
|
|
|
7
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our @provincias = qw(al ca ma gr se co hu ja); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our %provincias = map { $_ => 1 } @provincias; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our @meses = qw(ene feb mar abr may jun jul ago sep oct nov dic); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our $base_url = "http://www.juntadeandalucia.es/medioambiente/atmosfera/informes_siva/"; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# Module implementation here |
21
|
|
|
|
|
|
|
sub new { |
22
|
1
|
|
|
1
|
1
|
9
|
my $class = shift; |
23
|
1
|
|
33
|
|
|
3
|
my $province = shift || croak "Necesito una provincia"; |
24
|
|
|
|
|
|
|
|
25
|
1
|
|
|
|
|
4
|
return bless { _province => $province}, $class; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub day { |
30
|
6
|
|
|
6
|
1
|
6034
|
my $self = shift; |
31
|
6
|
|
|
|
|
15
|
my ($dia, $mes, $year ) = @_; |
32
|
6
|
|
|
|
|
19
|
my $year_digits = substr($year,2,2); |
33
|
6
|
|
|
|
|
16
|
my $provincia = $self->{'_province'}; |
34
|
6
|
50
|
|
|
|
26
|
if ( ! $provincias{$provincia} ) { |
35
|
0
|
|
|
|
|
0
|
croak "$provincia is not one of the 8 provinces"; |
36
|
|
|
|
|
|
|
} |
37
|
6
|
|
|
|
|
39
|
my $date = sprintf("%02d%02d%02d",$year_digits,$mes,$dia); |
38
|
6
|
|
|
|
|
20
|
my $fecha = sprintf("%04d-%02d-%02d", $year, $mes, $dia ); |
39
|
6
|
|
|
|
|
8
|
my @datos; |
40
|
|
|
|
|
|
|
|
41
|
6
|
100
|
66
|
|
|
50
|
if ( ($year < 2004) || ( $year == 2004 && $mes == 1 && $dia < 11 ) ) { |
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
42
|
4
|
|
|
|
|
21
|
my $url = $base_url."$meses[$mes-1]$year_digits/n$provincia$date.txt"; |
43
|
4
|
|
|
|
|
20
|
my $content = get( $url ); |
44
|
4
|
50
|
|
|
|
1001132
|
if ( $content ) { |
45
|
4
|
|
|
|
|
102
|
my @tables; |
46
|
4
|
100
|
|
|
|
31
|
if ( $content =~ /Ambiental/ ) { |
47
|
2
|
|
|
|
|
1116
|
@tables = ($content =~ /Ambiental\s+(.+?)\s+Nota/gs); |
48
|
|
|
|
|
|
|
} else { |
49
|
2
|
|
|
|
|
2467
|
@tables = split(/\s+\n\s+\n\s+\n/, $content); |
50
|
|
|
|
|
|
|
} |
51
|
4
|
|
|
|
|
12
|
shift @tables; # unneeded first row |
52
|
4
|
|
|
|
|
14
|
for my $t (@tables) { |
53
|
16
|
|
|
|
|
1332
|
my @lines = grep( /\S+/, split("\n", $t ) ); # Only non-empty |
54
|
16
|
100
|
|
|
|
102
|
next if $lines[$#lines] =~ /Fecha/; # No data |
55
|
12
|
100
|
|
|
|
69
|
if ( $lines[$#lines] =~ /unidades/ ) { |
56
|
8
|
|
|
|
|
10
|
pop @lines; |
57
|
8
|
|
|
|
|
11
|
pop @lines; |
58
|
|
|
|
|
|
|
} |
59
|
12
|
|
|
|
|
49
|
my $this_metadata = { date => $fecha."T00:00" }; |
60
|
12
|
|
|
|
|
12
|
my @metadatos; |
61
|
12
|
|
|
|
|
88
|
push @metadatos, ( $lines[0] =~ /Provincia\s*:\s+(\w+)\s+Estacion\s*:\s+(.+)/ ); |
62
|
12
|
|
|
|
|
63
|
push @metadatos, ( $lines[1] =~ /Municipio\s*:\s+(\w+)\s+Direccion\s*:\s+(.+)/ ); |
63
|
12
|
|
|
|
|
20
|
for my $k (qw(provincia estacion municipio direccion)) { |
64
|
48
|
|
|
|
|
68
|
$this_metadata->{$k} = shift @metadatos; |
65
|
|
|
|
|
|
|
} |
66
|
12
|
|
|
|
|
149
|
my (@cabeceras) = split( /\s+/, $lines[2]); |
67
|
12
|
|
|
|
|
13
|
shift @cabeceras; #Date goes first |
68
|
12
|
|
|
|
|
34
|
for (my $l = 3; $l <= $#lines; $l++ ) { |
69
|
1248
|
|
|
|
|
831
|
my %these_medidas = %{$this_metadata}; |
|
1248
|
|
|
|
|
4082
|
|
70
|
1248
|
|
|
|
|
1183
|
my @columnas; |
71
|
1248
|
100
|
|
|
|
2264
|
if ( $lines[$l] =~ /:/ ) { |
72
|
1152
|
|
|
|
|
3955
|
@columnas = split( /\t/, $lines[$l]); |
73
|
1152
|
|
|
|
|
1002
|
my $fecha_hora = shift @columnas; |
74
|
1152
|
|
|
|
|
3786
|
my ($hora) = ($fecha_hora =~ /(\d+:\d+)/); |
75
|
1152
|
50
|
|
|
|
1626
|
if ( !$hora ) { |
76
|
0
|
|
|
|
|
0
|
carp "Problemas con el formato en $l $lines[$l] $fecha"; |
77
|
0
|
|
|
|
|
0
|
next; |
78
|
|
|
|
|
|
|
} |
79
|
1152
|
|
|
|
|
2684
|
$these_medidas{'date'} =~ s/00:00/$hora/; |
80
|
|
|
|
|
|
|
} else { #Different format |
81
|
96
|
|
|
|
|
262
|
my ($fecha_hora, $resto) = ($lines[$l] =~ /(\S+ \d+)\s{3}(.+)/); |
82
|
96
|
50
|
|
|
|
121
|
if ( !$resto ) { |
83
|
0
|
|
|
|
|
0
|
carp "Problemas con formato en $l => $lines[$l]"; |
84
|
|
|
|
|
|
|
} |
85
|
96
|
|
|
|
|
390
|
@columnas= split(/\s{7}/, $resto); |
86
|
96
|
|
|
|
|
150
|
my ($this_date, $hour) = split(/\s+/, $fecha_hora); |
87
|
96
|
|
|
|
|
122
|
my ($this_day,$mon,$year) = split("/", $this_date); |
88
|
96
|
|
|
|
|
277
|
$these_medidas{'date'} = sprintf("%04d-%02d-%02dT%02d:00", $year+1900,$mon,$this_day,$hour); |
89
|
|
|
|
|
|
|
} |
90
|
1248
|
|
|
|
|
1325
|
for my $c ( @cabeceras ) { |
91
|
5904
|
|
|
|
|
6920
|
$these_medidas{$c} = shift @columnas; |
92
|
5904
|
100
|
|
|
|
7923
|
next if !$these_medidas{$c}; |
93
|
5885
|
|
|
|
|
5638
|
$these_medidas{$c} =~ s/\.//; |
94
|
5885
|
|
|
|
|
5212
|
$these_medidas{$c} =~ s/,/./; |
95
|
5885
|
|
|
|
|
10027
|
$these_medidas{$c} = 0 + $these_medidas{$c}; |
96
|
|
|
|
|
|
|
} |
97
|
1248
|
|
|
|
|
3341
|
push @datos, \%these_medidas; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
} else { |
102
|
2
|
|
|
|
|
15
|
my $url = $base_url."$meses[$mes-1]$year_digits/n$provincia$date.htm"; |
103
|
|
|
|
|
|
|
|
104
|
2
|
|
|
|
|
12
|
my $content = get( $url ); |
105
|
|
|
|
|
|
|
|
106
|
2
|
50
|
33
|
|
|
349078
|
if ( $content and $content =~ m{$year
|
107
|
2
|
|
|
|
|
25
|
my $dom = Mojo::DOM->new( $content ); |
108
|
|
|
|
|
|
|
|
109
|
2
|
|
|
|
|
464119
|
my @tables = $dom->find('table')->each; |
110
|
|
|
|
|
|
|
|
111
|
2
|
|
|
|
|
99911
|
shift @tables; #Primera tabla con leyenda |
112
|
|
|
|
|
|
|
|
113
|
2
|
|
|
|
|
16
|
while ( @tables ) { |
114
|
8
|
|
|
|
|
17
|
my $metadatos = shift @tables; |
115
|
8
|
50
|
|
|
|
24
|
next if !@tables; |
116
|
8
|
|
|
|
|
14
|
my $medidas = shift @tables; |
117
|
|
|
|
|
|
|
|
118
|
8
|
|
|
|
|
46
|
my @metadatos = ( $metadatos =~ /.([A-Z][^<]+)/g); |
119
|
8
|
|
|
|
|
5230
|
my $this_metadata = { date => $fecha }; |
120
|
8
|
|
|
|
|
22
|
for my $k (qw(provincia municipio estacion direccion)) { |
121
|
32
|
|
|
|
|
51
|
$this_metadata->{$k} = shift @metadatos; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
8
|
|
|
|
|
29
|
my @filas = $medidas->find('tr')->each; |
125
|
|
|
|
|
|
|
|
126
|
8
|
|
|
|
|
125856
|
shift @filas; #Cabecera |
127
|
8
|
|
|
|
|
25
|
pop @filas; |
128
|
8
|
|
|
|
|
22
|
for my $f (@filas) { |
129
|
1152
|
|
|
|
|
2359
|
my @columnas = $f->find('td')->map('text')->each; |
130
|
1152
|
|
|
|
|
403675
|
my %these_medidas = %{$this_metadata}; |
|
1152
|
|
|
|
|
3955
|
|
131
|
1152
|
|
|
|
|
1372
|
my $fecha_hora = shift @columnas; |
132
|
1152
|
|
|
|
|
5003
|
my ($hora) = ($fecha_hora =~ /(\d+:\d+)/); |
133
|
1152
|
50
|
|
|
|
1740
|
if ( !$hora ) { |
134
|
0
|
|
|
|
|
0
|
carp "Problemas con el formato en $f $fecha"; |
135
|
|
|
|
|
|
|
} |
136
|
1152
|
|
|
|
|
1656
|
$these_medidas{'date'} =~ s/00:00/$hora/; |
137
|
1152
|
|
|
|
|
1285
|
for my $c (qw(SO2 PART NO2 CO O3)) { |
138
|
5760
|
|
|
|
|
6813
|
$these_medidas{$c} = shift @columnas; |
139
|
|
|
|
|
|
|
} |
140
|
1152
|
|
|
|
|
7614
|
push @datos, \%these_medidas; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
} |
145
|
6
|
|
|
|
|
52
|
return \@datos; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
"We want air"; # Magic true value required at end of module |
149
|
|
|
|
|
|
|
__END__ |