File Coverage

bufrextract.pl
Criterion Covered Total %
statement 66 75 88.0
branch 32 48 66.6
condition 24 43 55.8
subroutine 7 7 100.0
pod n/a
total 129 173 74.5


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2              
3             # (C) Copyright 2010-2025 MET Norway
4             #
5             # This program is free software; you can redistribute it and/or modify
6             # it under the terms of the GNU General Public License as published by
7             # the Free Software Foundation; either version 2 of the License, or
8             # (at your option) any later version.
9             #
10             # This program is distributed in the hope that it will be useful, but
11             # WITHOUT ANY WARRANTY; without even the implied warranty of
12             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13             # General Public License for more details.
14             #
15             # You should have received a copy of the GNU General Public License
16             # along with this program; if not, write to the Free Software
17             # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18             # 02110-1301, USA.
19              
20             # pod included at end of file
21              
22 7     7   33514 use strict;
  7         9  
  7         287  
23 7     7   31 use warnings;
  7         10  
  7         422  
24 7     7   4884 use Getopt::Long;
  7         106425  
  7         61  
25 7     7   4831 use Pod::Usage qw(pod2usage);
  7         496242  
  7         595  
26 7     7   9919 use Geo::BUFR;
  7         43  
  7         17845  
27              
28             # Parse command line options
29 7         1154372 my %option = ();
30 7 50       79 GetOptions(
31             \%option,
32             'ahl=s', # Extract BUFR messages with AHL matching only
33             'gts', # Include full gts message envelope if present
34             'help', # Print help information and exit
35             'only_ahl', # Extract AHLs only
36             'outfile=s', # Print to file instead of STDOUT
37             'verbose=i', # Set verbose level to n, 0<=n<=6 (default 0)
38             'without_ahl', # Print the BUFR messages only, skipping AHLs
39             ) or pod2usage(-verbose => 0);
40              
41             # User asked for help
42 7 50       8226 pod2usage(-verbose => 1) if $option{help};
43              
44             # only_ahl and without_ahl are mutually exclusive
45             pod2usage( -message => "Options only_ahl, without_ahl and gts are mutually exclusive",
46             -exitval => 2,
47             -verbose => 0)
48             if ( ($option{only_ahl} && ($option{without_ahl} || $option{gts}))
49             || ($option{without_ahl} && ($option{only_ahl} || $option{gts}))
50 7 50 33     121 || ($option{gts} && ($option{only_ahl} || $option{without_ahl})) );
      66        
      33        
      66        
      33        
      33        
      66        
      33        
51              
52             # Make sure there is at least one input file
53 7 50       26 pod2usage(-verbose => 0) unless @ARGV;
54              
55             # Set verbosity level
56 7 50       23 Geo::BUFR->set_verbose($option{verbose}) if $option{verbose};
57              
58 7         12 my $ahl_regexp;
59 7 100       23 if ($option{ahl}) {
60 3         6 eval { $ahl_regexp = qr/$option{ahl}/ };
  3         119  
61 3 50       15 die "Argument to --ahl is not a valid Perl regular expression: $@" if $@;
62             }
63              
64             # Where to direct output (including verbose output, but not output to STDERR)
65 7         12 my $OUT;
66 7 100       25 if ($option{outfile}) {
67             open($OUT, '>', $option{outfile})
68 5 50       1765 or die "Cannot open $option{outfile} for writing: $!";
69             } else {
70 2         6 $OUT = *STDOUT;
71             }
72 7         35 binmode($OUT);
73              
74             # No need to decode section 4 here
75 7         92 Geo::BUFR->set_nodata(1);
76              
77             # Loop for processing of BUFR input files
78 7         34 foreach my $inputfname ( @ARGV ) {
79 7         40 my $bufr = Geo::BUFR->new();
80 7 100       46 $bufr->set_filter_cb(\&filter_on_ahl,$ahl_regexp) if $option{ahl};
81              
82             # Open BUFR file
83 7         39 $bufr->fopen($inputfname);
84              
85             # Process input file
86 7         34 extract($bufr);
87 7         103 $bufr->fclose();
88             }
89              
90              
91             # Extract BUFR messages and/or AHLs from BUFR file
92             sub extract {
93 7     7   14 my $bufr = shift; # BUFR object
94              
95 7         17 my ($current_message_number, $current_ahl);
96             READLOOP:
97 7         36 while (not $bufr->eof()) {
98              
99             # Read next observation. If an error is encountered during
100             # decoding, skip this observation while printing the error
101             # message to STDERR, also displaying ahl of bulletin if found
102             # (but skip error message if the message should be skipped on
103             # --ahl anyway).
104 63         77 eval {
105 63         143 $bufr->next_observation();
106             };
107 63 50       111 if ($@) {
108 0   0     0 $current_ahl = $bufr->get_current_ahl() || '';
109 0 0 0     0 next READLOOP if $option{ahl} && $current_ahl !~ $ahl_regexp;
110              
111 0         0 warn $@;
112             # Try to extract message number and ahl of the bulletin
113             # where the error occurred
114 0         0 $current_message_number = $bufr->get_current_message_number();
115 0 0       0 if (defined $current_message_number) {
116 0         0 my $error_msg = "In message $current_message_number";
117 0 0       0 $error_msg .= " contained in bulletin with ahl $current_ahl\n"
118             if $current_ahl;
119 0 0       0 warn $error_msg if $error_msg;
120             }
121 0         0 next READLOOP;
122             }
123              
124 63 100 100     257 next READLOOP if $option{ahl} && $bufr->is_filtered();
125             # Skip messages where stated length of BUFR message is sure to
126             # be erroneous, unless we want ahls only (or should we skip
127             # message in this case also? Hard choice...)
128 51 100 100     164 next READLOOP if !$option{only_ahl} && $bufr->bad_bufrlength();
129              
130 40         83 my $current_subset_number = $bufr->get_current_subset_number();
131             # If next_observation() did find a BUFR message, subset number
132             # should have been set to at least 1 (even in a 0 subset message)
133 40 100       104 last READLOOP if $current_subset_number == 0;
134              
135 33         123 $current_message_number = $bufr->get_current_message_number();
136 33   100     57 $current_ahl = $bufr->get_current_ahl() || '';
137 33         43 my $gts_eom = '';
138              
139 33 100       53 if ($current_ahl) {
140 28 100       88 if ($option{only_ahl}) {
    100          
141 10         44 print $OUT $current_ahl, "\n";
142             } elsif (!$option{without_ahl}) {
143 14 100       29 if ($option{gts}) {
144 7   50     10 my $current_gts_starting_line = $bufr->get_current_gts_starting_line() || '';
145 7         30 print $OUT $current_gts_starting_line;
146 7   100     12 $gts_eom = $bufr->get_current_gts_eom() || '';
147             }
148             # Use \r\r\n after AHL, since this is the standard
149             # sequence used in GTS bulletins
150 14         106 print $OUT $current_ahl . "\r\r\n";
151             }
152             }
153 33 100       111 next READLOOP if $option{only_ahl};
154              
155 21         38 my $msg = $bufr->get_bufr_message();
156 21         83 print $OUT $msg, $gts_eom;
157             }
158             }
159              
160             # Filter routines
161              
162             sub filter_on_ahl {
163 24     24   37 my $bufr = shift;
164 24         36 my $ahl_regexp = shift;
165 24   100     60 my $ahl = $bufr->get_current_ahl() || '';
166 24 100       300 return $ahl =~ $ahl_regexp ? 0 : 1;
167             }
168              
169              
170             =pod
171              
172             =encoding utf8
173              
174             =head1 SYNOPSIS
175              
176             bufrextract.pl
177             [--ahl ]
178             [--only_ahl] | [--without_ahl] | [--gts]
179             [--outfile ]
180             [--help]
181             [--verbose n]
182              
183             =head1 DESCRIPTION
184              
185             Extract all BUFR messages and/or corresponding AHLs from BUFR file(s),
186             possibly filtering on AHL.
187              
188             The AHL (Abbreviated Header Line) is recognized as the TTAAii CCCC
189             YYGGgg [BBB] immediately preceding the BUFR message.
190              
191             Execute without arguments for Usage, with option C<--help> for some
192             additional info. See also L for
193             examples of use.
194              
195              
196             =head1 OPTIONS
197              
198             --ahl Extract BUFR messages and/or AHLs with AHL
199             matching only
200             --gts Include full gts message envelope if present
201             --only_ahl Extract AHLs only
202             --without_ahl Extract BUFR messages only
203             --outfile
204             Will print to instead of STDOUT
205             --help Display Usage and explain the options used. For even
206             more info you might prefer to consult perldoc bufrextract.pl
207             --verbose n Set verbose level to n, 0<=n<=6 (default 0)
208              
209             Options may be abbreviated, e.g. C<--h> or C<-h> for C<--help>.
210              
211             For option C<--ahl> the should be a Perl regular
212             expression. E.g. C<--ahl 'ISS... ENMI'> will decode only BUFR SHIP
213             (ISS) from CCCC=ENMI.
214              
215             Use option C<--gts> if you want the full GTS message envelope (if
216             present) to be included in output. There are 2 main variations on this
217             envelope (SOH/ETX and ZCZC notation), for details see the Manual on
218             the GTS: Attachment II-4. Format of Meteorological Messages.
219              
220             No bufrtables are needed for running bufrextract.pl, since section 4
221             in BUFR message will not be decoded (which also speeds up execution
222             quite a bit).
223              
224             =head1 HINTS
225              
226             With a little knowledge of Perl you could easily extend bufrextract.pl
227             to extract BUFR messages based on whatever information is available in
228             section 0-3, by making your own copy of bufrextract.pl and then
229             employing one of the many C subroutines in BUFR.pm. For example,
230             to extract only BUFR messages with data category 1, add the following
231             line just before calling C in code:
232              
233             next if $bufr->get_data_category() != 1;
234              
235             Or to extract BUFR messages with TM315009 only:
236              
237             next if $bufr->get_descriptors_unexpanded() ne '315009';
238              
239             =head1 CAVEAT
240              
241             Sometimes GTS bulletins are erroneously issued with extra characters
242             between the GTS AHL and the start of BUFR message (besides the
243             standard character sequence CRCRLF), likely leading bufrextract.pl to
244             miss the AHL.
245              
246             =head1 AUTHOR
247              
248             Pål Sannes Epal.sannes@met.noE
249              
250             =head1 COPYRIGHT
251              
252             Copyright (C) 2010-2025 MET Norway
253              
254             =cut