File Coverage

blib/lib/Parse/H.pm
Criterion Covered Total %
statement 609 609 100.0
branch 470 470 100.0
condition 240 240 100.0
subroutine 11 11 100.0
pod 3 3 100.0
total 1333 1333 100.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2             # Parse::H - A parser for C header files that calls the given
3             # subroutines when a symbol of a specified type is encountered.
4             #
5             # Copyright (C) 2022-2025 Bogdan 'bogdro' Drozdowski,
6             # bogdro (at) users . sourceforge . net
7             # bogdro /at\ cpan . org
8             #
9             # This program is free software; you can redistribute it and/or modify it
10             # under the same terms as Perl itself.
11             #
12              
13             package Parse::H;
14              
15 3     3   605569 use warnings;
  3         5  
  3         461  
16              
17             require Exporter;
18             @ISA = (Exporter);
19             @EXPORT = qw();
20             @EXPORT_OK = qw(parse_struct parse_union parse_file);
21              
22 3     3   18 use strict;
  3         11  
  3         61001  
23              
24             =head1 NAME
25              
26             Parse::H - A parser for C header files that calls the given subroutines when a symbol of a specified type is encountered.
27              
28             =head1 VERSION
29              
30             Version 0.30
31              
32             =cut
33              
34             our $VERSION = '0.30';
35              
36             =head1 DESCRIPTION
37              
38             This module provides subroutines for parsing C language header files
39             (*.h files) while calling user-provided callback subroutines on various
40             found elements.
41              
42             =head1 SYNOPSIS
43              
44             use Parse::H qw(parse_file);
45              
46             open (my $infile, '<', 'test.h') or die "Cannot open test.h: $!\n";
47              
48             my $extern_sub = sub { ... }
49             my $comment_sub = sub { ... }
50             my $preproc_sub = sub { ... }
51             my $typedef_sub = sub { ... }
52             my $struct_start_sub = sub { ... }
53             my $struct_entry_sub = sub { ... }
54             my $struct_end_sub = sub { ... }
55             my $enum_start_sub = sub { ... }
56             my $enum_entry_sub = sub { ... }
57             my $enum_end_sub = sub { ... }
58             my $union_start_sub = sub { ... }
59             my $union_entry_sub = sub { ... }
60             my $union_end_sub = sub { ... }
61             my $output_sub = sub { ... }
62              
63             my %params = (
64             'infile' => $infile,
65             'output_sub' => $output_sub,
66             'comment_sub' => $comment_sub,
67             'preproc_sub' => $preproc_sub,
68             'extern_sub' => $extern_sub,
69             'typedef_sub' => $typedef_sub,
70             'struct_start_sub' => $struct_start_sub,
71             'struct_entry_sub' => $struct_entry_sub,
72             'struct_end_sub' => $struct_end_sub,
73             'union_start_sub' => $union_start_sub,
74             'union_entry_sub' => $union_entry_sub,
75             'union_end_sub' => $union_end_sub,
76             'enum_start_sub' => $enum_start_sub,
77             'enum_entry_sub' => $enum_entry_sub,
78             'enum_end_sub' => $enum_end_sub,
79             'pointer_size' => 8,
80             );
81              
82             parse_file (%params);
83              
84             close $infile;
85              
86             =head1 EXPORT
87              
88             Nothing is exported by default.
89              
90             The following functions are exported on request:
91             parse_struct
92             parse_union
93             parse_file
94              
95             These parse a C "struct" type, a C "union" type or a whole C header
96             file, respectively.
97              
98             =head1 DATA
99              
100             =cut
101              
102             # =head2 _max
103             #
104             # PRIVATE SUBROUTINE.
105             # Returns the greater of 2 numbers.
106             #
107             # =cut
108             sub _max
109             {
110 2842     2842   4746 my ($a, $b) = @_;
111 2842 100       5626 return $a if $a > $b;
112 2407         6348 return $b;
113             }
114              
115             # =head2 _get_param
116             #
117             # PRIVATE SUBROUTINE.
118             # Returns the value specified by name (parameter 2) from the
119             # hashref specified in parameter 1, or undef.
120             #
121             # =cut
122             sub _get_param
123             {
124 17920     17920   26611 my ($hash, $name) = @_;
125 17920 100       38943 return defined($hash->{$name})? $hash->{$name} : undef;
126             }
127              
128             # =head2 _is_a_number
129             #
130             # PRIVATE SUBROUTINE.
131             # Returns 1 if the provided parameter string looks like a valid number, 0 otherwise.
132             #
133             # =cut
134             sub _is_a_number
135             {
136 2436     2436   5550 my $v = shift;
137 2436 100       13756 return ($v =~ /^((0?x[0-9a-f]+)|(0?b[01]+)|(0?o[0-7]+)|([0-9]+))$/oi)? 1 : 0;
138             }
139              
140             # =head2 _output_array_entry_size
141             #
142             # PRIVATE SUBROUTINE.
143             # Outputs an entry for the given array count and element size.
144             # Params: entry sub ref (struct or union), output sub ref,
145             # variable name, element count, element size.
146             # Returns the element count, converted to a number if possible.
147             #
148             # =cut
149             sub _output_array_entry_size
150             {
151 2378     2378   9398 my ($entry_sub, $output_sub, $var_name, $count, $size) = @_;
152 2378         3864 my $line = '';
153 2378 100       9189 if ( $count =~ /^(0?[xbo])[0-9a-f_]+$/oi )
    100          
154             {
155             # looks like a hex/bin/oct number - convert
156 58         147 $count = oct($count);
157 58 100       220 $line = &$entry_sub($var_name, $size * $count) if $entry_sub;
158             }
159             elsif ( $count =~ /^[0-9_]+$/o )
160             {
161             # looks like a dec number - convert
162 1218         2123 $count = int($count);
163 1218 100       3651 $line = &$entry_sub($var_name, $size * $count) if $entry_sub;
164             }
165             else
166             {
167             # not a number - emit a string
168 1102 100       4140 $line = &$entry_sub($var_name, "$size * $count") if $entry_sub;
169             }
170 2378 100 100     20631 &$output_sub($line) if $output_sub and $line;
171             # remove the parsed element
172 2378         16605 s/^[^;]*;//o;
173 2378         4945 return $count;
174             }
175              
176             # =head2 _split_decl
177             #
178             # PRIVATE SUBROUTINE.
179             # Splits a declaration line of multiple variables into separate declarations.
180             # Params: the input file handle.
181             #
182             # =cut
183             sub _split_decl
184             {
185 12615     12615   16963 my $infile = shift;
186              
187             # joining lines
188 12615   100     44605 while ( /\\$/o || (/,/o && ! /;/o) )
      100        
189             {
190 174         437 s/\\[\r\n]+//o;
191 174         585 $_ .= <$infile>;
192 174         2247 s/[\r\n]//go;
193             }
194 12615         21315 s#(.*)/\*(.*)\*/(.*)#$1 $3 ;$2#o;
195 12615         18339 s#(.*)//(.*)#$1 ;$2#o;
196             # many variables of the same type - we put each on a separate line together with its type
197 12615         25743 while ( /,/o )#&& !/\(/o )
198             {
199 1508 100       2875 if ( m#\[.*#o )
200             {
201 58         654 s/([\w*\s]+)\s+([()\w\s*]+)\s*(\[\w+\]),\s*(.*)/$1 $2$3;\n$1 $4/;
202             }
203             else
204             {
205 1450         14886 s/([\w*\s]+)\s+([()\w\s*]+)\s*,\s*(.*)/$1 $2;\n$1 $3/;
206             }
207             }
208             }
209              
210             # =head2 _remove_attrs
211             #
212             # PRIVATE SUBROUTINE.
213             # Removes attributes from the current line.
214             #
215             # =cut
216             sub _remove_attrs
217             {
218 19111     19111   28040 s/__attribute__\s*\(\(.*\)\)//go;
219 19111         27270 s/\[\[.*\]\]//go;
220             }
221              
222             sub parse_union(\%);
223             sub parse_struct(\%);
224              
225             =head2 parse_struct
226              
227             Parses a C "structure" type, calling the provided subroutines when
228             a symbol of a specified type is encountered.
229             Parameters: a hash containing the input file handle and references to
230             the subroutines. All subroutines should return a line of text (which
231             may later go to $output_sub) after their processing of the given parameter.
232             If a key is not present in the hash, its functionality is not used
233             (unless a default value is specified).
234             Hash keys:
235              
236             'infile' => input file handle (required),
237             'line' => the current line to process (default: empty line),
238             'output_sub' => a subroutine that processes the output.
239             Takes the line to output as its single parameter,
240             'comment_sub' => a subroutine that processes comments.
241             Takes the current line as its single parameter,
242             'preproc_sub' => a subroutine that processes preprocessor lines.
243             Takes the current line as its single parameter,
244             'struct_start_sub' => a subroutine that processes the beginning of a structure.
245             Takes the structure name as its single parameter,
246             'struct_entry_sub' => a subroutine that processes an entry of a structure.
247             Takes the symbol name as its first parameter, its size as the second and the structure name as the third,
248             'struct_end_sub' => a subroutine that processes the end of a structure.
249             Takes the structure name as its first parameter and its size as the second,
250             'union_start_sub' => a subroutine that processes the beginning of a union.
251             Takes the union name as its single parameter,
252             'union_entry_sub' => a subroutine that processes an entry of a union.
253             Takes the symbol name as its first parameter and its size as the second,
254             'union_end_sub' => a subroutine that processes the end of a union.
255             Takes the symbol name as its first parameter, its size as the second and the union name as the third,
256             'pointer_size' => the pointer size to use, in bytes (default: 8),
257              
258             =cut
259              
260             sub parse_struct(\%)
261             {
262 755     755 1 11055 my $params = shift;
263              
264 755         1453 my $infile = _get_param($params, 'infile'); # input file handle
265 755         1301 my $output_sub = _get_param($params, 'output_sub'); # output subroutine
266 755         1223 $_ = _get_param($params, 'line');
267 755 100       1535 $_ = '' unless defined($_);
268 755         1105 my $struct_start_sub = _get_param($params, 'struct_start_sub'); # subroutine that converts structures
269 755         1166 my $struct_entry_sub = _get_param($params, 'struct_entry_sub'); # subroutine that converts structures
270 755         1135 my $struct_end_sub = _get_param($params, 'struct_end_sub'); # subroutine that converts structures
271 755         1173 my $union_start_sub = _get_param($params, 'union_start_sub'); # subroutine that converts unions
272 755         1084 my $union_entry_sub = _get_param($params, 'union_entry_sub'); # subroutine that converts unions
273 755         1141 my $union_end_sub = _get_param($params, 'union_end_sub'); # subroutine that converts unions
274 755         1137 my $comment_sub = _get_param($params, 'comment_sub'); # subroutine that converts comments
275 755         1360 my $preproc_sub = _get_param($params, 'preproc_sub'); # subroutine that converts proceprocessor directives
276 755         1313 my $pointer_size = _get_param($params, 'pointer_size'); # pointer size in bytes
277 755 100       1411 $pointer_size = 8 unless defined($pointer_size);
278              
279 755 100       1366 return unless $infile;
280              
281 754         4920 my %sub_params = (
282             'infile' => $infile,
283             'output_sub' => $output_sub,
284             'comment_sub' => $comment_sub,
285             'preproc_sub' => $preproc_sub,
286             'extern_sub' => undef,
287             'typedef_sub' => undef,
288             'struct_start_sub' => undef,
289             'struct_entry_sub' => undef,
290             'struct_end_sub' => undef,
291             'union_start_sub' => undef,
292             'union_entry_sub' => undef,
293             'union_end_sub' => undef,
294             'enum_start_sub' => undef,
295             'enum_entry_sub' => undef,
296             'enum_end_sub' => undef,
297             'pointer_size' => $pointer_size,
298             );
299              
300 754         1515 &_remove_attrs;
301             # skip over "struct foo;"
302 754 100       4261 if ( /^\s*struct\s+[\w\s\$\*]+(\[[^\]]*\])?;/o )#&& ! /{/o )
303             {
304 87         406 s/^\s*struct\s+[\w\s\$\*]+(\[[^\]]*\])?;\s*//o;
305             # processing the comments
306 87 100 100     512 if ( $comment_sub && (m#/\*#o || m#//#o) )
      100        
307             {
308 56         153 $_ = &$comment_sub($_);
309 56 100 100     429 &$output_sub($_) if $output_sub and $_;
310             }
311 87         428 return (0, '');
312             }
313              
314             # skip over "struct {};" (syntax error, but causes an infinite loop)
315 667 100       2191 if ( /^\s*struct\s*\{\s*\}\s*;/o )
316             {
317 87         505 s/^\s*struct\s*\{\s*\}\s*;\s*//o;
318             # processing the comments
319 87 100 100     510 if ( $comment_sub and ( m#//#o or m#/\*#o ) )
      100        
320             {
321 56         138 $_ = &$comment_sub($_);
322 56 100 100     425 &$output_sub($_) if $output_sub and $_;
323             }
324 87         363 return (0, '');
325             }
326              
327             # the name of the structure
328 580         955 my $str_name = '';
329 580 100       1911 if ( /^\s*struct\s+(\w+)/o )
330             {
331 435         931 $str_name = $1;
332 435         2608 s/^\s*struct\s+\w+//o;
333             }
334             else
335             {
336             # remove 'struct' so that the start line is not interpreted
337             # as a structure inside a structure
338 145         536 s/^\s*struct\s*\{?//o;
339             }
340 580         904 my $size = 0;
341 580         1133 my ($memb_size, $name);
342 580         0 my $line;
343 580 100       1701 $line = &$struct_start_sub($str_name) if $struct_start_sub;
344 580 100 100     4348 &$output_sub($line) if $output_sub and $line;
345              
346             # a structure can end on the same line or contain many declaration per line
347             # - we simply put a newline after each semicolon and go on
348              
349 580         1215 s/;/;\n/go;
350             # processing the comments
351 580 100 100     2801 if ( $comment_sub and ( m#//#o or m#/\*#o ) )
      100        
352             {
353 84         216 $line = &$comment_sub($_);
354 84 100       527 $_ = $line if $line;
355             }
356              
357             do
358 580         814 {
359 6728         12950 s/^\s*{\s*$//go;
360              
361 6728         14526 &_remove_attrs;
362 6728         12955 &_split_decl($infile);
363              
364             # union/struct arrays must be processed first
365 6728         13128 while ( /.*union\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;/o )
366             {
367 58 100       168 $line = &$struct_entry_sub($2, 0) if $struct_entry_sub;
368 58 100 100     592 &$output_sub($line) if $output_sub and $line;
369             # remove the parsed element
370 58         381 s/.*union\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;//o;
371             }
372 6728         12446 while ( /.*union\s+(\w+)\s+(\w+)\s*;/o )
373             {
374 29 100       95 $line = &$struct_entry_sub($2, 0) if $struct_entry_sub;
375 29 100 100     312 &$output_sub($line) if $output_sub and $line;
376             # remove the parsed element
377 29         187 s/.*union\s+\w+\s+\w+\s*;//o;
378             }
379             # while ( /^\s*union\s+(\w+)/o )
380             # {
381             # $sub_params{'line'} = $_;
382             # ($memb_size, $name) = parse_union(%sub_params);
383             # $line = &$struct_entry_sub($name, $memb_size) if $struct_entry_sub;
384             # &$output_sub($line) if $output_sub and $line;
385             # $_ = '';
386             # $size += $memb_size;
387             # goto STR_END;
388             # }
389              
390 6728         12800 while ( /^\s*union/o )
391             {
392 145 100       451 if ( ! /^\s*union\s+(\w+)/o )
393             {
394             # no name on the first line - look for it
395 87         219 while ( ! /\{/o )
396             {
397 116         186 s/\\[\r\n]+//o;
398 116         480 $_ .= <$infile>;
399             }
400 87         276 &_remove_attrs;
401 87 100       401 if ( ! /^\s*union\s+(\w+)/o )
402             {
403             # no name at all - delete 'union' to
404             # avoid endless loop
405 58         283 s/^\s*union\s*//o;
406             }
407             }
408 145         409 $sub_params{'line'} = $_;
409 145         352 my ($memb_size, $name) = parse_union(%sub_params);
410 145 100       592 $line = &$struct_entry_sub($name, $memb_size) if $struct_entry_sub;
411 145 100 100     1239 &$output_sub($line) if $output_sub and $line;
412 145         294 $_ = '';
413 145         220 $size += $memb_size;
414 145         1610 goto STR_END;
415             }
416              
417             # first we remove the ":digit" from the structure fields
418 6583         9434 s/(.*):\s*\d+\s*/$1/g;
419              
420             # skip over 'volatile'
421 6583         32903 s/^[^;]*_*volatile_*//gio;
422              
423             # pointers to functions
424 6583         17738 while ( /^[^};]+\(\s*\*\s*(\w+)\s*\)\s*\([^)]*\)\s*;/o )
425             {
426 435 100       1075 $line = &$struct_entry_sub($1, $pointer_size) if $struct_entry_sub;
427 435 100 100     3622 &$output_sub($line) if $output_sub and $line;
428             # remove the parsed element
429 435         1677 s/^[^;]*;//o;
430 435         1184 $size += $pointer_size;
431             }
432             # pointer type
433 6583         13758 while ( /^[^};]+\*+\s*(\w+)\s*;/o )
434             {
435 1653 100       3978 $line = &$struct_entry_sub($1, $pointer_size) if $struct_entry_sub;
436 1653 100 100     13914 &$output_sub($line) if $output_sub and $line;
437             # remove the parsed element
438 1653         7131 s/^[^;]*;//o;
439 1653         4275 $size += $pointer_size;
440             }
441              
442             # arrays
443 6583         13287 while ( /.*struct\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;/o )
444             {
445 58 100       216 $line = &$struct_entry_sub($2, 0) if $struct_entry_sub;
446 58 100 100     677 &$output_sub($line) if $output_sub and $line;
447             # remove the parsed element
448 58         416 s/.*struct\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;//o;
449             }
450 6583         18791 while ( /.*(signed|unsigned)?\s+long\s+long(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
451             {
452 174         439 my $count = &_output_array_entry_size ($struct_entry_sub,
453             $output_sub, $3, $4, 8);
454 174 100       275 $size += 8 * $count if _is_a_number ($count);
455             }
456 6583         11468 while ( /.*long\s+double\s+(\w+)\s*\[(\w+)\]\s*;/o )
457             {
458 58         135 my $count = &_output_array_entry_size ($struct_entry_sub,
459             $output_sub, $1, $2, 10);
460 58 100       123 $size += 10 * $count if _is_a_number ($count);
461             }
462 6583         13408 while ( /.*(char|unsigned\s+char|signed\s+char)\s+(\w+)\s*\[(\w+)\]\s*;/o )
463             {
464 203         436 my $count = &_output_array_entry_size ($struct_entry_sub,
465             $output_sub, $2, $3, 1);
466 203 100       323 $size += 1 * $count if _is_a_number ($count);
467             }
468 6583         12470 while ( /.*float\s+(\w+)\s*\[(\w+)\]\s*;/o )
469             {
470 58         149 my $count = &_output_array_entry_size ($struct_entry_sub,
471             $output_sub, $1, $2, 4);
472 58 100       123 $size += 4 * $count if _is_a_number ($count);
473             }
474 6583         12413 while ( /.*double\s+(\w+)\s*\[(\w+)\]\s*;/o )
475             {
476 58         133 my $count = &_output_array_entry_size ($struct_entry_sub,
477             $output_sub, $1, $2, 8);
478 58 100       110 $size += 8 * $count if _is_a_number ($count);
479             }
480 6583         14003 while ( /.*(short|signed\s+short|unsigned\s+short)(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
481             {
482 174         408 my $count = &_output_array_entry_size ($struct_entry_sub,
483             $output_sub, $3, $4, 2);
484 174 100       268 $size += 2 * $count if _is_a_number ($count);
485             }
486 6583         12591 while ( /.*(long|signed\s+long|unsigned\s+long)(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
487             {
488             # NOTE: assuming 'long int' is the same size as a pointer (should be on 32- and 64-bit systems)
489 290         602 my $count = &_output_array_entry_size ($struct_entry_sub,
490             $output_sub, $3, $4, $pointer_size);
491 290 100       466 $size += $pointer_size * $count if _is_a_number ($count);
492             }
493 6583         16725 while ( /.*(signed\s+|unsigned\s+)?int\s+(\w+)\s*\[(\w+)\]\s*;/o )
494             {
495 174         335 my $count = &_output_array_entry_size ($struct_entry_sub,
496             $output_sub, $2, $3, 4);
497 174 100       272 $size += 4 * $count if _is_a_number ($count);
498             }
499 6583         14144 while ( /.*u?int(\d+)_t\s+(\w+)\s*\[(\w+)\]\s*;/o )
500             {
501 58         280 my $count = &_output_array_entry_size ($struct_entry_sub,
502             $output_sub, $2, $3, $1/8);
503 58 100       141 $size += $1/8 * $count if _is_a_number ($count);
504             }
505              
506             # variables' types
507 6583         11222 while ( /.*struct\s+(\w+)\s+(\w+)\s*;/o )
508             {
509 29 100       140 $line = &$struct_entry_sub($2, 0) if $struct_entry_sub;
510 29 100 100     332 &$output_sub($line) if $output_sub and $line;
511             # remove the parsed element
512 29         227 s/.*struct\s+\w+\s+\w+\s*;//o;
513             }
514 6583         11675 while ( /^\s*struct/o )
515             {
516 87 100       239 if ( ! /^\s*struct\s+(\w+)/o )
517             {
518             # no name on the first line - look for it
519 58         132 while ( ! /\{/o )
520             {
521 87         137 s/\\[\r\n]+//o;
522 87         372 $_ .= <$infile>;
523             }
524 58         157 &_remove_attrs;
525 58 100       276 if ( ! /^\s*struct\s+(\w+)/o )
526             {
527             # no name at all - delete 'struct' to
528             # avoid endless loop
529 29         153 s/^\s*struct\s*//o;
530             }
531             }
532 87         267 $sub_params{'line'} = $_;
533 87         215 my ($memb_size, $name) = parse_struct(%sub_params);
534 87 100       310 $line = &$struct_entry_sub($name, $memb_size) if $struct_entry_sub;
535 87 100 100     696 &$output_sub($line) if $output_sub and $line;
536 87         176 $_ = '';
537 87         107 $size += $memb_size;
538 87         847 goto STR_END;
539             }
540              
541             # all "\w+" stand for the variable name
542 6496         12984 while ( /.*(signed|unsigned)?\s+long\s+long(\s+int)?\s+(\w+)\s*;/o )
543             {
544 87 100       1015 $line = &$struct_entry_sub($3, 8) if $struct_entry_sub;
545 87 100 100     801 &$output_sub($line) if $output_sub and $line;
546             # remove the parsed element
547 87         389 s/^[^;]*;//o;
548 87         192 $size += 8;
549             }
550 6496         10642 while ( /.*long\s+double\s+(\w+)\s*;/o )
551             {
552 58 100       199 $line = &$struct_entry_sub($1, 10) if $struct_entry_sub;
553 58 100 100     1500 &$output_sub($line) if $output_sub and $line;
554             # remove the parsed element
555 58         299 s/^[^;]*;//o;
556 58         132 $size += 10;
557             }
558 6496         14269 while ( /.*(char|unsigned\s+char|signed\s+char)\s+(\w+)\s*;/o )
559             {
560 464 100       1267 $line = &$struct_entry_sub($2, 1) if $struct_entry_sub;
561 464 100 100     3470 &$output_sub($line) if $output_sub and $line;
562             # remove the parsed element
563 464         1782 s/^[^;]*;//o;
564 464         1174 $size += 1;
565             }
566 6496         10990 while ( /.*float\s+(\w+)\s*;/o )
567             {
568 116 100       355 $line = &$struct_entry_sub($1, 4) if $struct_entry_sub;
569 116 100 100     1033 &$output_sub($line) if $output_sub and $line;
570             # remove the parsed element
571 116         554 s/^[^;]*;//o;
572 116         470 $size += 4;
573             }
574 6496         11386 while ( /.*double\s+(\w+)\s*;/o )
575             {
576 29 100       91 $line = &$struct_entry_sub($1, 8) if $struct_entry_sub;
577 29 100 100     308 &$output_sub($line) if $output_sub and $line;
578             # remove the parsed element
579 29         129 s/^[^;]*;//o;
580 29         62 $size += 8;
581             }
582 6496         11658 while ( /.*(short|signed\s+short|unsigned\s+short)(\s+int)?\s+(\w+)\s*;/o )
583             {
584 174 100       473 $line = &$struct_entry_sub($3, 2) if $struct_entry_sub;
585 174 100 100     1619 &$output_sub($line) if $output_sub and $line;
586             # remove the parsed element
587 174         773 s/^[^;]*;//o;
588 174         490 $size += 2;
589             }
590 6496         12504 while ( /.*(long|signed\s+long|unsigned\s+long)(\s+int)?\s+(\w+)\s*;/o )
591             {
592             # NOTE: assuming 'long int' is the same size as a pointer (should be on 32- and 64-bit systems)
593 87 100       315 $line = &$struct_entry_sub($3, $pointer_size) if $struct_entry_sub;
594 87 100 100     818 &$output_sub($line) if $output_sub and $line;
595             # remove the parsed element
596 87         411 s/^[^;]*;//o;
597 87         173 $size += $pointer_size;
598             }
599 6496         12579 while ( /.*(unsigned\s+|signed\s+)?int\s+(\w+)\s*;/o )
600             {
601 261 100       756 $line = &$struct_entry_sub($2, 4) if $struct_entry_sub;
602 261 100 100     2324 &$output_sub($line) if $output_sub and $line;
603             # remove the parsed element
604 261         1034 s/^[^;]*;//o;
605 261         1339 $size += 4;
606             }
607 6496         11438 while ( /.*u?int(\d+)_t\s+(\w+)\s*;/o )
608             {
609 29 100       195 $line = &$struct_entry_sub($2, $1/8) if $struct_entry_sub;
610 29         275 $size += $1/8;
611 29 100 100     158 &$output_sub($line) if $output_sub and $line;
612             # remove the parsed element
613 29         200 s/^[^;]*;//o;
614             }
615              
616             # look for the end of the structure
617 6496 100       12828 if ( /}/o )
618             {
619             # add a structure size definition
620 580         1031 my $var_name = '';
621 580 100       1784 if ( /\}\s*(\*?)\s*(\w+)[^;]*;/o )
622             {
623 145         352 $var_name = $2;
624             }
625 580 100       1118 if ( /\}\s*\*/o )
626             {
627 29         79 $size = $pointer_size;
628             }
629 580 100       1601 $line = &$struct_end_sub($var_name, $size, $str_name) if $struct_end_sub;
630 580 100 100     4060 &$output_sub($line) if $output_sub and $line;
631 580         3090 s/.*;(.*)/$1/o;
632 580 100       1737 &$output_sub($_) if $output_sub;
633 580         1394 $_ = '';
634 580         3404 return ($size, $var_name);
635             }
636              
637             # processing of conditional compiling directives
638 5916 100 100     20629 if ( $preproc_sub && /^\s*#/o )
639             {
640 56         234 $_ = &$preproc_sub($_);
641             }
642 5916 100       10149 if ( $_ )
643             {
644 5392 100       14843 $_ .= "\n" unless /[\r\n]$/o;
645             }
646 5916 100 100     22351 &$output_sub($_) if $output_sub and $_;
647              
648 6148         37182 STR_END: } while ( <$infile> );
649             }
650              
651             =head2 parse_union
652              
653             Parses a C "union" type, calling the provided subroutines when
654             a symbol of a specified type is encountered.
655             Parameters: a hash containing the input file handle and references to
656             the subroutines. All subroutines should return a line of text (which
657             may later go to $output_sub) after their processing of the given parameter.
658             If a key is not present in the hash, its functionality is not used
659             (unless a default value is specified).
660             Hash keys:
661              
662             'infile' => input file handle (required),
663             'line' => the current line to process (default: empty line),
664             'output_sub' => a subroutine that processes the output.
665             Takes the line to output as its single parameter,
666             'comment_sub' => a subroutine that processes comments.
667             Takes the current line as its single parameter,
668             'preproc_sub' => a subroutine that processes preprocessor lines.
669             Takes the current line as its single parameter,
670             'struct_start_sub' => a subroutine that processes the beginning of a structure.
671             Takes the structure name as its single parameter,
672             'struct_entry_sub' => a subroutine that processes an entry of a structure.
673             Takes the symbol name as its first parameter, its size as the second and the structure name as the third,
674             'struct_end_sub' => a subroutine that processes the end of a structure.
675             Takes the structure name as its first parameter and its size as the second,
676             'union_start_sub' => a subroutine that processes the beginning of a union.
677             Takes the union name as its single parameter,
678             'union_entry_sub' => a subroutine that processes an entry of a union.
679             Takes the symbol name as its first parameter and its size as the second,
680             'union_end_sub' => a subroutine that processes the end of a union.
681             Takes the symbol name as its first parameter, its size as the second and the union name as the third,
682             'pointer_size' => the pointer size to use, in bytes (default: 8),
683              
684              
685             =cut
686              
687             sub parse_union(\%)
688             {
689 697     697 1 11094 my $params = shift;
690              
691 697         1455 my $infile = _get_param($params, 'infile'); # input file handle
692 697         1136 my $output_sub = _get_param($params, 'output_sub'); # output subroutine
693 697         1088 $_ = _get_param($params, 'line');
694 697 100       1419 $_ = '' unless defined($_);
695 697         1081 my $struct_start_sub = _get_param($params, 'struct_start_sub'); # subroutine that converts structures
696 697         1401 my $struct_entry_sub = _get_param($params, 'struct_entry_sub'); # subroutine that converts structures
697 697         1170 my $struct_end_sub = _get_param($params, 'struct_end_sub'); # subroutine that converts structures
698 697         1058 my $union_start_sub = _get_param($params, 'union_start_sub'); # subroutine that converts unions
699 697         1033 my $union_entry_sub = _get_param($params, 'union_entry_sub'); # subroutine that converts unions
700 697         1126 my $union_end_sub = _get_param($params, 'union_end_sub'); # subroutine that converts unions
701 697         1033 my $comment_sub = _get_param($params, 'comment_sub'); # subroutine that converts comments
702 697         1057 my $preproc_sub = _get_param($params, 'preproc_sub'); # subroutine that converts proceprocessor directives
703 697         1095 my $pointer_size = _get_param($params, 'pointer_size'); # pointer size in bytes
704 697 100       1404 $pointer_size = 8 unless defined($pointer_size);
705              
706 697 100       1240 return unless $infile;
707              
708 696         4250 my %sub_params = (
709             'infile' => $infile,
710             'output_sub' => $output_sub,
711             'comment_sub' => $comment_sub,
712             'preproc_sub' => $preproc_sub,
713             'extern_sub' => undef,
714             'typedef_sub' => undef,
715             'struct_start_sub' => undef,
716             'struct_entry_sub' => undef,
717             'struct_end_sub' => undef,
718             'union_start_sub' => undef,
719             'union_entry_sub' => undef,
720             'union_end_sub' => undef,
721             'enum_start_sub' => undef,
722             'enum_entry_sub' => undef,
723             'enum_end_sub' => undef,
724             'pointer_size' => $pointer_size,
725             );
726              
727 696         3056 &_remove_attrs;
728             # skip over "union foo;"
729 696 100       2791 if ( /^\s*union\s+[^;{}]*;/o )
730             {
731 87         376 s/^\s*union\s+[^;{}]*;\s*//o;
732             # processing the comments
733 87 100 100     560 if ( $comment_sub && (m#/\*#o || m#//#o) )
      100        
734             {
735 56         155 $_ = &$comment_sub($_);
736 56 100 100     472 &$output_sub($_) if $output_sub and $_;
737             }
738 87         359 return (0, '');
739             }
740              
741             # skip over "union {};" (syntax error, but causes an infinite loop)
742 609 100       1875 if ( /^\s*union\s*\{\s*\}\s*;/o )
743             {
744 87         397 s/^\s*union\s*\{\s*\}\s*;\s*//o;
745             # processing the comments
746 87 100 100     480 if ( $comment_sub && (m#/\*#o || m#//#o) )
      100        
747             {
748 56         130 $_ = &$comment_sub($_);
749 56 100 100     444 &$output_sub($_) if $output_sub and $_;
750             }
751 87         334 return (0, '');
752             }
753              
754             # the name of the union
755 522         867 my $union_name = '';
756              
757 522 100       1695 if ( /^\s*union\s+(\w+)/o )
758             {
759 348         723 $union_name = $1;
760 348         1345 s/^\s*union\s+\w+//o;
761             }
762             else
763             {
764             # remove 'union' so that the start line is not interpreted
765             # as a union inside a union
766 174         518 s/^\s*union\s*\{?//o;
767             }
768 522         835 my $size = 0;
769 522         995 my ($memb_size, $name);
770 522         0 my $line;
771 522 100       1445 $line = &$union_start_sub($union_name) if $union_start_sub;
772 522 100 100     4146 &$output_sub($line) if $output_sub and $line;
773              
774             # if there was a '{' in the first line, we put it in the second
775             # if ( /{/o )
776             # {
777             # s/\s*\{/\n\{\n/o;
778             # }
779              
780             # an union can end on the same line or contain many declaration per line
781             # - we simply put a newline after each semicolon and go on
782              
783 522         2542 s/;/;\n/go;
784              
785             do
786 522         633 {
787 5568         10508 s/^\s*{\s*$//go;
788              
789 5568         10704 &_remove_attrs;
790 5568         10856 &_split_decl($infile);
791              
792             # pointers to functions
793 5568         11172 while ( /^[^};]+\(\s*\*\s*(\w+)\s*\)\s*\([^)]*\)\s*;/o )
794             {
795 145 100       371 $line = &$union_entry_sub($1, $pointer_size) if $union_entry_sub;
796 145 100 100     1272 &$output_sub($line) if $output_sub and $line;
797             # remove the parsed element
798 145         598 s/^[^;]*;//o;
799 145         257 $size = _max($size, $pointer_size);
800             }
801             # pointer type
802 5568         11366 while ( /^[^};]+\*+\s*(\w+)\s*;/o )
803             {
804 580 100       1474 $line = &$union_entry_sub($1, $pointer_size) if $union_entry_sub;
805 580 100 100     4975 &$output_sub($line) if $output_sub and $line;
806             # remove the parsed element
807 580         2241 s/^[^;]*;//o;
808 580         1076 $size = _max($size, $pointer_size);
809             }
810              
811             # union/struct arrays must be processed first
812 5568         10797 while ( /.*union\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;/o )
813             {
814 58 100       179 $line = &$union_entry_sub($2, 0) if $union_entry_sub;
815 58 100 100     655 &$output_sub($line) if $output_sub and $line;
816             # remove the parsed element
817 58         392 s/.*union\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;//o;
818             }
819              
820 5568         10389 while ( /.*union\s+(\w+)\s+(\w+)\s*;/o )
821             {
822 29 100       114 $line = &$union_entry_sub($2, 0) if $union_entry_sub;
823 29 100 100     356 &$output_sub($line) if $output_sub and $line;
824             # remove the parsed element
825 29         214 s/.*union\s+\w+\s+\w+\s*;//o;
826             }
827              
828 5568         10452 while ( /.*struct\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;/o )
829             {
830 58 100       197 $line = &$union_entry_sub($2, 0) if $union_entry_sub;
831 58 100 100     1450 &$output_sub($line) if $output_sub and $line;
832             # remove the parsed element
833 58         419 s/.*struct\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;//o;
834             }
835              
836 5568         9722 while ( /.*struct\s+(\w+)\s+(\w+)\s*;/o )
837             {
838 29 100       113 $line = &$union_entry_sub($2, 0) if $union_entry_sub;
839 29 100 100     293 &$output_sub($line) if $output_sub and $line;
840             # remove the parsed element
841 29         209 s/.*struct\s+\w+\s+\w+\s*;//o;
842             }
843              
844 5568         10705 while ( /^\s*struct/o )
845             {
846 87 100       267 if ( ! /^\s*struct\s+(\w+)/o )
847             {
848             # no name on the first line - look for it
849 58         159 while ( ! /\{/o )
850             {
851 87         136 s/\\[\r\n]+//o;
852 87         335 $_ .= <$infile>;
853             }
854 58         143 &_remove_attrs;
855 58 100       291 if ( ! /^\s*struct\s+(\w+)/o )
856             {
857             # no name at all - delete 'struct' to
858             # avoid endless loop
859 29         133 s/^\s*struct\s*//o;
860             }
861             }
862 87         249 $sub_params{'line'} = $_;
863 87         236 my ($memb_size, $name) = parse_struct(%sub_params);
864 87 100       330 $line = &$union_entry_sub($name, $memb_size) if $union_entry_sub;
865 87 100 100     778 &$output_sub($line) if $output_sub and $line;
866 87         182 $size = _max($size, $memb_size);
867 87         135 $_ = '';
868 87         945 goto STR_END;
869             }
870              
871             # variables' types
872             # all "\w+" stand for the variable name
873 5481         16359 while ( /.*(signed|unsigned)?\s+long\s+long(\s+int)?\s+(\w+)\s*;/o )
874             {
875 87 100       249 $line = &$union_entry_sub($3, 8) if $union_entry_sub;
876 87 100 100     830 &$output_sub($line) if $output_sub and $line;
877             # remove the parsed element
878 87         359 s/^[^;]*;//o;
879 87         204 $size = _max($size, 8);
880             }
881              
882 5481         9621 while ( /.*long\s+double\s+(\w+)\s*;/o )
883             {
884 29 100       88 $line = &$union_entry_sub($1, 10) if $union_entry_sub;
885 29 100 100     296 &$output_sub($line) if $output_sub and $line;
886             # remove the parsed element
887 29         158 s/^[^;]*;//o;
888 29         73 $size = _max($size, 10);
889             }
890              
891 5481         15413 while ( /.*(char|unsigned\s+char|signed\s+char)\s+(\w+)\s*;/o )
892             {
893 609 100       1467 $line = &$union_entry_sub($2, 1) if $union_entry_sub;
894 609 100 100     3615 &$output_sub($line) if $output_sub and $line;
895             # remove the parsed element
896 609         2079 s/^[^;]*;//o;
897 609         1168 $size = _max($size, 1);
898             }
899              
900 5481         13612 while ( /.*float\s+(\w+)\s*;/o )
901             {
902 116 100       340 $line = &$union_entry_sub($1, 4) if $union_entry_sub;
903 116 100 100     1126 &$output_sub($line) if $output_sub and $line;
904             # remove the parsed element
905 116         490 s/^[^;]*;//o;
906 116         224 $size = _max($size, 4);
907             }
908              
909 5481         10915 while ( /.*double\s+(\w+)\s*;/o )
910             {
911 29 100       97 $line = &$union_entry_sub($1, 8) if $union_entry_sub;
912 29 100 100     288 &$output_sub($line) if $output_sub and $line;
913             # remove the parsed element
914 29         131 s/^[^;]*;//o;
915 29         64 $size = _max($size, 8);
916             }
917              
918 5481         14330 while ( /.*(short|signed\s+short|unsigned\s+short)(\s+int)?\s+(\w+)\s*;/o )
919             {
920 174 100       484 $line = &$union_entry_sub($3, 2) if $union_entry_sub;
921 174 100 100     1604 &$output_sub($line) if $output_sub and $line;
922             # remove the parsed element
923 174         751 s/^[^;]*;//o;
924 174         328 $size = _max($size, 2);
925             }
926              
927 5481         15839 while ( /.*(long|signed\s+long|unsigned\s+long)(\s+int)?\s+(\w+)\s*;/o )
928             {
929             # NOTE: assuming 'long int' is the same size as a pointer (should be on 32- and 64-bit systems)
930 87 100       272 $line = &$union_entry_sub($3, $pointer_size) if $union_entry_sub;
931 87 100 100     927 &$output_sub($line) if $output_sub and $line;
932             # remove the parsed element
933 87         444 s/^[^;]*;//o;
934 87         214 $size = _max($size, $pointer_size);
935             }
936              
937 5481         14762 while ( /.*(unsigned\s+|signed\s+)?int\s+(\w+)\s*;/o )
938             {
939 261 100       707 $line = &$union_entry_sub($2, 4) if $union_entry_sub;
940 261 100 100     2212 &$output_sub($line) if $output_sub and $line;
941             # remove the parsed element
942 261         1119 s/^[^;]*;//o;
943 261         498 $size = _max($size, 4);
944             }
945              
946 5481         13911 while ( /.*u?int(\d+)_t\s+(\w+)\s*;/o )
947             {
948 87 100       466 $line = &$union_entry_sub($2, $1/8) if $union_entry_sub;
949 87         678 $size += $1/8;
950 87 100 100     373 &$output_sub($line) if $output_sub and $line;
951             # remove the parsed element
952 87         702 s/^[^;]*;//o;
953             }
954              
955             # arrays
956              
957 5481         13672 while ( /.*(signed|unsigned)?\s+long\s+long(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
958             {
959 174         410 my $count = &_output_array_entry_size ($union_entry_sub,
960             $output_sub, $3, $4, 8);
961 174 100       337 $size = _max($size, 8 * $count) if _is_a_number ($count);
962             }
963              
964 5481         9523 while ( /.*long\s+double\s+(\w+)\s*\[(\w+)\]\s*;/o )
965             {
966 58         185 my $count = &_output_array_entry_size ($union_entry_sub,
967             $output_sub, $1, $2, 10);
968 58 100       143 $size = _max($size, 10 * $count) if _is_a_number ($count);
969             }
970              
971 5481         11486 while ( /.*(char|unsigned\s+char|signed\s+char)\s+(\w+)\s*\[(\w+)\]\s*;/o )
972             {
973 203         524 my $count = &_output_array_entry_size ($union_entry_sub,
974             $output_sub, $2, $3, 1);
975 203 100       369 $size = _max($size, 1 * $count) if _is_a_number ($count);
976             }
977              
978 5481         9843 while ( /.*float\s+(\w+)\s*\[(\w+)\]\s*;/o )
979             {
980 58         157 my $count = &_output_array_entry_size ($union_entry_sub,
981             $output_sub, $1, $2, 4);
982 58 100       122 $size = _max($size, 4 * $count) if _is_a_number ($count);
983             }
984              
985 5481         9350 while ( /.*double\s+(\w+)\s*\[(\w+)\]\s*;/o )
986             {
987 58         154 my $count = &_output_array_entry_size ($union_entry_sub,
988             $output_sub, $1, $2, 8);
989 58 100       116 $size = _max($size, 8 * $count) if _is_a_number ($count);
990             }
991              
992 5481         11612 while ( /.*(short|signed\s+short|unsigned\s+short)(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
993             {
994 174         394 my $count = &_output_array_entry_size ($union_entry_sub,
995             $output_sub, $3, $4, 2);
996 174 100       297 $size = _max($size, 2 * $count) if _is_a_number ($count);
997             }
998              
999 5481         10221 while ( /.*(long|signed\s+long|unsigned\s+long)(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
1000             {
1001             # NOTE: assuming 'long int' is the same size as a pointer (should be on 32- and 64-bit systems)
1002 174         395 my $count = &_output_array_entry_size ($union_entry_sub,
1003             $output_sub, $3, $4, $pointer_size);
1004 174 100       312 $size = _max($size, $pointer_size * $count) if _is_a_number ($count);
1005             }
1006              
1007 5481         12005 while ( /.*(signed\s+|unsigned\s+)?int\s+(\w+)\s*\[(\w+)\]\s*;/o )
1008             {
1009 174         414 my $count = &_output_array_entry_size ($union_entry_sub,
1010             $output_sub, $2, $3, 4);
1011 174 100       302 $size = _max($size, 4 * $count) if _is_a_number ($count);
1012             }
1013              
1014 5481         12132 while ( /.*u?int(\d+)_t\s+(\w+)\s*\[(\w+)\]\s*;/o )
1015             {
1016 58         287 my $count = &_output_array_entry_size ($union_entry_sub,
1017             $output_sub, $2, $3, $1/8);
1018 58 100       155 $size += $1/8 * $count if _is_a_number ($count);
1019             }
1020              
1021 5481         9953 while ( /^\s*union/o )
1022             {
1023 87 100       291 if ( ! /^\s*union\s+(\w+)/o )
1024             {
1025             # no name on the first line - look for it
1026 58         137 while ( ! /\{/o )
1027             {
1028 87         139 s/\\[\r\n]+//o;
1029 87         375 $_ .= <$infile>;
1030             }
1031 58         126 &_remove_attrs;
1032 58 100       245 if ( ! /^\s*union\s+(\w+)/o )
1033             {
1034             # no name at all - delete 'union' to
1035             # avoid endless loop
1036 29         130 s/^\s*union\s*//o;
1037             }
1038             }
1039 87         217 $sub_params{'line'} = $_;
1040 87         259 my ($memb_size, $name) = parse_union(%sub_params);
1041 87 100       315 $line = &$union_entry_sub($name, $memb_size) if $union_entry_sub;
1042 87 100 100     1343 &$output_sub($line) if $output_sub and $line;
1043 87         152 $_ = '';
1044 87         172 $size = _max($size, $memb_size);
1045             }
1046              
1047             # look for the end of the union
1048 5481 100       12037 if ( /\s*\}.*/o )
1049             {
1050 522         853 my $var_name = '';
1051 522 100       1681 if ( /\}\s*(\*?)\s*(\w+)[^;]*;/o )
1052             {
1053 174         423 $var_name = $2;
1054             }
1055 522 100       1181 if ( /\}\s*\*/o )
1056             {
1057 29         50 $size = $pointer_size;
1058             }
1059 522 100       1291 $line = &$union_end_sub($var_name, $size, $union_name) if $union_end_sub;
1060 522 100 100     3298 &$output_sub($line) if $output_sub and $line;
1061 522         2624 s/.*;(.*)/$1/o;
1062 522 100       1662 &$output_sub($_) if $output_sub;
1063 522         1246 $_ = '';
1064 522         3234 return ($size, $var_name);
1065             }
1066              
1067             # processing of conditional compiling directives
1068 4959 100 100     15181 if ( $preproc_sub && /^\s*#/o )
1069             {
1070 28         103 $_ = &$preproc_sub($_);
1071             }
1072 4959 100       10080 if ( $_ )
1073             {
1074 4407 100       11703 $_ .= "\n" unless /[\r\n]$/o;
1075             }
1076 4959 100 100     18360 &$output_sub($_) if $output_sub and $_;
1077              
1078 5046         28973 STR_END: } while ( <$infile> );
1079             }
1080              
1081             =head2 parse_file
1082              
1083             Parses a C header file, calling the provided subroutines when
1084             a symbol of a specified type is encountered.
1085             Parameters: a hash containing the input file handle and references to
1086             the subroutines. All subroutines should return a line of text (which
1087             may later go to $output_sub) after their processing of the given parameter.
1088             If a key is not present in the hash, its functionality is not used
1089             (unless a default value is specified).
1090             Hash keys:
1091              
1092             'infile' => input file handle (required),
1093             'output_sub' => a subroutine that processes the output.
1094             Takes the line to output as its single parameter,
1095             'comment_sub' => a subroutine that processes comments.
1096             Takes the current line as its single parameter,
1097             'preproc_sub' => a subroutine that processes preprocessor lines.
1098             Takes the current line as its single parameter,
1099             'extern_sub' => a subroutine that processes external symbol declarations.
1100             Takes the symbol name as its single parameter,
1101             'typedef_sub' => a subroutine that processes typedefs.
1102             Takes the old type's name as its first parameter and the new type's name as the second,
1103             'struct_start_sub' => a subroutine that processes the beginning of a structure.
1104             Takes the structure name as its single parameter,
1105             'struct_entry_sub' => a subroutine that processes an entry of a structure.
1106             Takes the symbol name as its first parameter, its size as the second and the structure name as the third,
1107             'struct_end_sub' => a subroutine that processes the end of a structure.
1108             Takes the structure name as its first parameter and its size as the second,
1109             'union_start_sub' => a subroutine that processes the beginning of a union.
1110             Takes the union name as its single parameter,
1111             'union_entry_sub' => a subroutine that processes an entry of a union.
1112             Takes the symbol name as its first parameter and its size as the second,
1113             'union_end_sub' => a subroutine that processes the end of a union.
1114             Takes the symbol name as its first parameter, its size as the second and the union name as the third,
1115             'enum_start_sub' => a subroutine that processes the beginning of an enumeration.
1116             Takes the enum's name as its single parameter,
1117             'enum_entry_sub' => a subroutine that processes an entry of an enumeration.
1118             Takes the symbol name as its first parameter and its value as the second,
1119             'enum_end_sub' => a subroutine that processes the end of an enumeration.
1120             Takes no parameters,
1121             'pointer_size' => the pointer size to use, in bytes (default: 8),
1122              
1123             =cut
1124              
1125             sub parse_file(\%)
1126             {
1127 31     31 1 483064 my $params = shift;
1128              
1129 31         122 my $infile = _get_param($params, 'infile'); # input file handle
1130 31         81 my $output_sub = _get_param($params, 'output_sub'); # output subroutine
1131 31         129 my $extern_sub = _get_param($params, 'extern_sub'); # subroutine that converts external declarations
1132 31         76 my $typedef_sub = _get_param($params, 'typedef_sub'); # subroutine that converts typedefs
1133 31         81 my $comment_sub = _get_param($params, 'comment_sub'); # subroutine that converts comments
1134 31         69 my $preproc_sub = _get_param($params, 'preproc_sub'); # subroutine that converts proceprocessor directives
1135 31         87 my $pointer_size = _get_param($params, 'pointer_size'); # pointer size in bytes
1136 31 100       223 $pointer_size = 8 unless defined($pointer_size);
1137 31         90 my $struct_start_sub = _get_param($params, 'struct_start_sub'); # subroutine that converts structures
1138 31         77 my $struct_entry_sub = _get_param($params, 'struct_entry_sub'); # subroutine that converts structures
1139 31         71 my $struct_end_sub = _get_param($params, 'struct_end_sub'); # subroutine that converts structures
1140 31         100 my $union_start_sub = _get_param($params, 'union_start_sub'); # subroutine that converts unions
1141 31         71 my $union_entry_sub = _get_param($params, 'union_entry_sub'); # subroutine that converts unions
1142 31         66 my $union_end_sub = _get_param($params, 'union_end_sub'); # subroutine that converts unions
1143 31         66 my $enum_start_sub = _get_param($params, 'enum_start_sub'); # subroutine that converts enumerations
1144 31         69 my $enum_entry_sub = _get_param($params, 'enum_entry_sub'); # subroutine that converts enumerations
1145 31         67 my $enum_end_sub = _get_param($params, 'enum_end_sub'); # subroutine that converts enumerations
1146              
1147 31 100       104 return unless $infile;
1148              
1149 29         221 my %sub_params = (
1150             'infile' => $infile,
1151             'output_sub' => $output_sub,
1152             'comment_sub' => $comment_sub,
1153             'preproc_sub' => $preproc_sub,
1154             'extern_sub' => $extern_sub,
1155             'typedef_sub' => $typedef_sub,
1156             'struct_start_sub' => $struct_start_sub,
1157             'struct_entry_sub' => $struct_entry_sub,
1158             'struct_end_sub' => $struct_end_sub,
1159             'union_start_sub' => $union_start_sub,
1160             'union_entry_sub' => $union_entry_sub,
1161             'union_end_sub' => $union_end_sub,
1162             'enum_start_sub' => $enum_start_sub,
1163             'enum_entry_sub' => $enum_entry_sub,
1164             'enum_end_sub' => $enum_end_sub,
1165             'pointer_size' => $pointer_size,
1166             );
1167              
1168 29         50 my $line;
1169 29         1154 READ: while ( <$infile> )
1170             {
1171             # empty lines go without change
1172 4640 100       12145 if ( /^\s*$/o )
1173             {
1174 1595 100       4062 &$output_sub("\n") if $output_sub;
1175 1595         6816 next;
1176             }
1177              
1178             # joining lines
1179 3045         7655 while ( /[\\,]$/o )
1180             {
1181 58         103 s/\\[\r\n]+//o;
1182 58         219 s/,[\r\n]+/,/o;
1183 58         231 $_ .= <$infile>;
1184             }
1185              
1186 3045         6634 &_remove_attrs;
1187             # check if a comment is the only thing on this line
1188 3045 100 100     11583 if ( m#^\s*/\*.*\*/\s*$#o || m#^\s*//#o )
1189             {
1190 232 100       428 if ( $comment_sub )
1191             {
1192 224         423 $line = &$comment_sub($_);
1193 224 100       1280 $_ = $line if $line;
1194             }
1195             else
1196             {
1197 8         21 $_ = '';
1198             }
1199 232 100       672 &$output_sub($_) if $output_sub;
1200              
1201 232         836 next;
1202             }
1203              
1204             # processing of preprocessor directives
1205 2813 100       6656 if ( /^\s*#/o )
1206             {
1207 87 100       227 if ( $comment_sub )
1208             {
1209 84         175 $line = &$comment_sub($_);
1210 84 100       480 $_ = $line if $line;
1211             }
1212 87 100       184 if ( $preproc_sub )
1213             {
1214 84         218 $_ = &$preproc_sub($_);
1215             }
1216             else
1217             {
1218 3         7 $_ = '';
1219             }
1220 87 100 100     730 &$output_sub($_) if $output_sub and $_;
1221              
1222 87         1531 next;
1223             }
1224              
1225             # externs
1226 2726 100       6631 if ( /^\s*extern/o )
1227             {
1228 899 100       1496 if ( $comment_sub )
1229             {
1230 868         1718 $line = &$comment_sub($_);
1231 868 100       5379 $_ = $line if $line;
1232             }
1233              
1234 899 100       2489 if ( ! /^\s*extern\s+"C/o )
1235             {
1236             # joining lines
1237 870         1867 while ( ! /;/o )
1238             {
1239 29         183 s/[\r\n]+//o;
1240 29         113 $_ .= <$infile>;
1241             }
1242             }
1243              
1244 899         1629 &_remove_attrs;
1245             # external functions
1246              
1247             # extern "C", extern "C++"
1248 899         1868 s/^\s*extern\s+"C"\s*{//o;
1249 899         1618 s/^\s*extern\s+"C"/extern/o;
1250 899         1643 s/^\s*extern\s+"C\+\+"\s*{//o;
1251 899         1719 s/^\s*extern\s+"C\+\+"/extern/o;
1252              
1253             # first remove: extern MACRO_NAME ( fcn name, args, ... )
1254 899         3118 s/^\s*\w*\s*extern\s+\w+\s*\([^*].*//o;
1255             # type ^^^
1256              
1257             # extern pointers to functions:
1258 899 100       3593 if ( /^\s*\w*\s*extern\s+[\w\*\s]+\(\s*\*\s*(\w+)[()\*\s\w]*\)\s*\(.*/o )
1259             {
1260 29 100       67 if ( $extern_sub )
1261             {
1262 28         58 $line = &$extern_sub($1);
1263 28 100       196 $_ = $line if $line;
1264             }
1265             else
1266             {
1267 1         5 $_ = '';
1268             }
1269 29 100       100 &$output_sub($_) if $output_sub;
1270             }
1271              
1272 899 100       9158 if ( /^\s*\w*\s*extern\s+[\w\*\s]+?(\w+)\s*\(.*/o )
1273             {
1274 262 100       567 if ( $extern_sub )
1275             {
1276 253         619 $line = &$extern_sub($1);
1277 253 100       1866 $_ = $line if $line;
1278             }
1279             else
1280             {
1281 9         20 $_ = '';
1282             }
1283 262 100       611 &$output_sub($_) if $output_sub;
1284             }
1285              
1286             # external variables
1287 899 100       3173 if ( /^\s*extern[\w\*\s]+\s+\**(\w+)\s*;/o )
1288             {
1289 580 100       868 if ( $extern_sub )
1290             {
1291 560         1241 $line = &$extern_sub($1);
1292 560 100       4683 $_ = $line if $line;
1293             }
1294             else
1295             {
1296 20         42 $_ = '';
1297             }
1298 580 100       1217 &$output_sub($_) if $output_sub;
1299             }
1300              
1301 899         4430 next;
1302             }
1303              
1304             # typedef
1305 1827 100       4510 if ( /^\s*typedef/o )
1306             {
1307 435 100       1599 if ( ! /\b(struct|union|enum)\b/o )
1308             {
1309             # joining lines
1310 290         630 while ( ! /;/o )
1311             {
1312 29         71 s/[\r\n]+//o;
1313 29         110 $_ .= <$infile>;
1314             }
1315             }
1316              
1317 435         786 &_remove_attrs;
1318             # split typedefs, but not within function parameters
1319 435 100 100     1878 &_split_decl($infile) unless /\([^)]*,/o or /enum/o;
1320              
1321 435 100       1968 if ( /\(/o )
    100          
    100          
1322             {
1323 145         398 s/^.*$/\n/o;
1324             }
1325             # "typedef struct ...." ----> "struct ....."
1326             elsif ( /(struct|union|enum)/o )
1327             {
1328 145         488 s/^\s*typedef\s+//o;
1329             }
1330             elsif ( ! /{/o ) #&& /;/o ) # lines already joined
1331             {
1332 116         529 while ( /\btypedef\s+[^;]+\s*;/o )
1333             {
1334             # cannot do function pointers, take
1335             # just simple types
1336 145 100       709 if ( /\btypedef([\w*\s]+)\b(\w+)\s*;/o )
1337             {
1338 116 100       259 if ( $typedef_sub )
1339             {
1340 112         265 my $old = $1;
1341 112         204 my $new = $2;
1342 112         303 $old =~ s/^\s+//o;
1343 112         247 $new =~ s/^\s+//o;
1344 112         367 $old =~ s/\s+$//o;
1345 112         1287 $new =~ s/\s+$//o;
1346 112         295 $line = &$typedef_sub($old, $new);
1347             }
1348             else
1349             {
1350 4         8 $line = '';
1351             }
1352 116 100 100     1112 &$output_sub($line) if $output_sub and $line;
1353             }
1354 145         908 s/^\s*typedef\s+[^;]+\s*;//o;
1355             }
1356              
1357 116         439 next;
1358             }
1359             # no NEXT here
1360             }
1361              
1362             # structures:
1363              
1364 1711 100       4364 if ( /^\s*struct/o )
1365             {
1366             # skip over expressions of the type:
1367             # struct xxx function(arg1, ...);
1368 609 100       1342 if ( /\(/o )
1369             {
1370 29         90 $_ = '';
1371             }
1372             else
1373             {
1374 580         1406 $sub_params{'line'} = $_;
1375 580         1426 parse_struct(%sub_params);
1376             }
1377 609         2978 next;
1378             }
1379              
1380             # enumerations
1381 1102 100       2485 if ( /^\s*enum/o )
1382             {
1383             # skip over expressions of the type:
1384             # enum xxx function(arg1, ...);
1385 319 100       676 if ( /\(/o )
1386             {
1387 29         57 $_ = '';
1388 29         82 next;
1389             }
1390             # remove the 'enum' and its name
1391 290 100       1040 if ( /^.*enum\s+(\w+)\s*\{?/o )
1392             {
1393 261 100       826 $line = &$enum_start_sub($1) if $enum_start_sub;
1394 261 100 100     2487 &$output_sub($line) if $output_sub and $line;
1395 261         1333 s/^.*enum\s+\w+\s*\{?//o;
1396             }
1397             else
1398             {
1399 29         155 s/^.*enum\s*\{?//o;
1400             }
1401 290         444 my $curr_value = 0;
1402              
1403             #&_split_decl($infile);
1404             # check if one-line enum
1405 290 100       569 if ( /}/o )
1406             {
1407             # there are no conditional compiling directives in one-line enums
1408             #if ( $preproc_sub )
1409             #{
1410             # $_ = &$preproc_sub($_);
1411             #}
1412             #else
1413             #{
1414             # $_ = '';
1415             #}
1416              
1417 116         357 while ( /,.*;/o )
1418             {
1419 116 100       577 if ( /([\w\s]*)\s+(\w+)\s*=\s*(\w+)\s*,/o )
1420             {
1421 29 100       110 $line = &$enum_entry_sub ($2, $3) if $enum_entry_sub;
1422 29 100 100     349 &$output_sub($line) if $output_sub and $line;
1423 29         98 $curr_value = $3+1;
1424 29         161 s/[\w\s]*\s+\w+\s*=\s*\w+\s*,//o
1425             }
1426 116 100       441 if ( /([\w\s]*)\s+(\w+)\s*,/o )
1427             {
1428 87 100       221 $line = &$enum_entry_sub ($2, $curr_value) if $enum_entry_sub;
1429 87 100 100     723 &$output_sub($line) if $output_sub and $line;
1430 87         191 $curr_value++;
1431 87         472 s/[\w\s]*\s+\w+\s*,//o
1432             }
1433             }
1434              
1435             # the last line has no comma
1436 116 100       303 if ( /^\s*(\w+)\s*=\s*(\w+)\s*\}\s*;/o )
1437             {
1438 29 100       97 $line = &$enum_entry_sub ($1, $2) if $enum_entry_sub;
1439 29 100 100     247 &$output_sub($line) if $output_sub and $line;
1440 29         128 s/^\s*\w+\s*=\s*\w+\s*\}\s*;//o
1441             }
1442 116 100       338 if ( /^\s*(\w+)\s*\}\s*;/o )
1443             {
1444 87 100       228 $line = &$enum_entry_sub ($1, $curr_value) if $enum_entry_sub;
1445 87 100 100     736 &$output_sub($line) if $output_sub and $line;
1446 87         408 s/^\s*\w+\s*\}\s*;//o
1447             }
1448              
1449 116 100       312 $line = &$enum_end_sub() if $enum_end_sub;
1450 116 100 100     773 &$output_sub($line) if $output_sub and $line;
1451             # processing the comments
1452 116 100 100     553 if ( $comment_sub and ( m#//# or m#/\*# ) )
      100        
1453             {
1454 56         120 $_ = &$comment_sub($_);
1455 56 100 100     422 &$output_sub($_) if $output_sub and $_;
1456             }
1457 116         404 next;
1458             }
1459             else
1460             {
1461 174         463 while ( <$infile> )
1462             {
1463             # processing of conditional compiling directives
1464 754 100       2108 if ( /^\s*#/o )
1465             {
1466 29 100       82 if ( $preproc_sub )
1467             {
1468 28         85 $_ = &$preproc_sub($_);
1469             }
1470             else
1471             {
1472 1         3 $_ = '';
1473             }
1474 29 100 100     272 &$output_sub($_) if $output_sub and $_;
1475              
1476 29         94 next;
1477             }
1478              
1479 725         1290 &_remove_attrs;
1480             # skip over the first '{' character
1481             #next if /^\s*\{\s*$/o;
1482 725         1202 s/^\s*{\s*$//go;
1483              
1484 725 100       1942 next if /^\s*$/o;
1485              
1486             # if the constant has a value, we don't touch it
1487 551 100       957 if ( /=/o )
1488             {
1489 87 100       369 if ( /^\s*(\w+)\s*=\s*([-*\/+\w]+)\s*,?/o )
1490             {
1491 58 100       183 $line = &$enum_entry_sub ($1, $2) if $enum_entry_sub;
1492 58 100 100     591 &$output_sub($line) if $output_sub and $line;
1493 58 100       176 $curr_value = $2 + 1 if _is_a_number ($2);
1494 58         241 s/^\s*\w+\s*=\s*\w+\s*,?//o;
1495             }
1496             }
1497             else
1498             {
1499             # assign a subsequent value
1500 464 100       1197 if ( /^\s*(\w+)\s*,?/o )
1501             {
1502 232 100       611 $line = &$enum_entry_sub ($1, $curr_value) if $enum_entry_sub;
1503 232 100 100     2064 &$output_sub($line) if $output_sub and $line;
1504 232         481 $curr_value++;
1505 232         695 s/^\s*\w+\s*,?//o;
1506             }
1507             }
1508              
1509             # processing the comments
1510 551 100 100     2298 if ( $comment_sub and ( m#//# or m#/\*# ) )
      100        
1511             {
1512 56         118 $line = &$comment_sub($_);
1513 56 100       343 $_ = $line if $line;
1514             }
1515              
1516             # look for the end of the enum
1517 551 100       2325 if ( /\s*\}.*/o )
1518             {
1519 174 100       489 $line = &$enum_end_sub() if $enum_end_sub;
1520 174 100 100     1160 &$output_sub($line) if $output_sub and $line;
1521 174         668 next READ;
1522             }
1523              
1524 377 100 100     1219 &$output_sub($_) if $output_sub and $_;
1525             }
1526             }
1527             }
1528              
1529 783 100       2202 if ( /^\s*union/o )
1530             {
1531             # skip over expressions of the type:
1532             # union xxx function(arg1, ...);
1533 493 100       1064 if ( /\(/o )
1534             {
1535 29         79 $_ = '';
1536             }
1537             else
1538             {
1539 464         940 $sub_params{'line'} = $_;
1540 464         1140 parse_union(%sub_params);
1541             }
1542 493         2201 next;
1543             }
1544              
1545 290         531 s/^\s*{\s*$//go;
1546             # remove any }'s left after , for example
1547 290         468 s/^\s*}\s*$//go;
1548 290 100 100     981 if ( $comment_sub and m#/\*# ) # single-line comments should be processed at the top
1549             {
1550 28         150 $line = &$comment_sub($_);
1551 28 100       264 $_ = $line if $line;
1552             }
1553 290 100       809 &$output_sub($_) if $output_sub; # and $_; # the line won't be empty here
1554             }
1555             }
1556              
1557              
1558             =head1 SUPPORT AND DOCUMENTATION
1559              
1560             After installing, you can find documentation for this module with the perldoc command.
1561              
1562             perldoc Parse::H
1563              
1564             You can also look for information at:
1565              
1566             Search CPAN
1567             https://metacpan.org/release/Parse-H
1568              
1569             CPAN Request Tracker:
1570             https://rt.cpan.org/Public/Dist/Display.html?Name=Parse-H
1571              
1572             =head1 AUTHOR
1573              
1574             Bogdan Drozdowski, C<< >>
1575              
1576             =head1 COPYRIGHT
1577              
1578             Copyright 2022-2025 Bogdan Drozdowski, all rights reserved.
1579              
1580             =head1 LICENSE
1581              
1582             This program is free software; you can redistribute it and/or modify it
1583             under the same terms as Perl itself.
1584              
1585             =cut
1586              
1587             1; # End of Parse::H