File Coverage

blib/lib/Parse/H.pm
Criterion Covered Total %
statement 587 587 100.0
branch 442 442 100.0
condition 204 204 100.0
subroutine 11 11 100.0
pod 3 3 100.0
total 1247 1247 100.0


line stmt bran cond sub pod time code
1             #!perl
2             # Parse::H - A parser for C header files that calls the given
3             # subroutines when a symbol of a specified type is encountered.
4             #
5             # Copyright (C) 2022-2023 Bogdan 'bogdro' Drozdowski,
6             # bogdro (at) users . sourceforge . net
7             # bogdro /at\ cpan . org
8             #
9             # This program is free software; you can redistribute it and/or modify it
10             # under the same terms as Perl itself.
11             #
12              
13             package Parse::H;
14              
15 2     2   136900 use warnings;
  2         13  
  2         146  
16              
17             require Exporter;
18             @ISA = (Exporter);
19             @EXPORT = qw();
20             @EXPORT_OK = qw(parse_struct parse_union parse_file);
21              
22 2     2   14 use strict;
  2         3  
  2         20075  
23              
24             =head1 NAME
25              
26             Parse::H - A parser for C header files that calls the given subroutines when a symbol of a specified type is encountered.
27              
28             =head1 VERSION
29              
30             Version 0.21
31              
32             =cut
33              
34             our $VERSION = '0.21';
35              
36             =head1 DESCRIPTION
37              
38             This module provides subroutines for parsing C language header files
39             (*.h files) while calling user-provided callback subroutines on various
40             found elements.
41              
42             =head1 SYNOPSIS
43              
44             use Parse::H qw(parse_file);
45              
46             open (my $infile, '<', 'test.h') or die "Cannot open test.h: $!\n";
47              
48             my $extern_sub = sub { ... }
49             my $comment_sub = sub { ... }
50             my $preproc_sub = sub { ... }
51             my $typedef_sub = sub { ... }
52             my $struct_start_sub = sub { ... }
53             my $struct_entry_sub = sub { ... }
54             my $struct_end_sub = sub { ... }
55             my $enum_start_sub = sub { ... }
56             my $enum_entry_sub = sub { ... }
57             my $enum_end_sub = sub { ... }
58             my $union_start_sub = sub { ... }
59             my $union_entry_sub = sub { ... }
60             my $union_end_sub = sub { ... }
61             my $output_sub = sub { ... }
62              
63             my %params = (
64             'infile' => $infile,
65             'output_sub' => $output_sub,
66             'comment_sub' => $comment_sub,
67             'preproc_sub' => $preproc_sub,
68             'extern_sub' => $extern_sub,
69             'typedef_sub' => $typedef_sub,
70             'struct_start_sub' => $struct_start_sub,
71             'struct_entry_sub' => $struct_entry_sub,
72             'struct_end_sub' => $struct_end_sub,
73             'union_start_sub' => $union_start_sub,
74             'union_entry_sub' => $union_entry_sub,
75             'union_end_sub' => $union_end_sub,
76             'enum_start_sub' => $enum_start_sub,
77             'enum_entry_sub' => $enum_entry_sub,
78             'enum_end_sub' => $enum_end_sub,
79             'pointer_size' => 8,
80             );
81              
82             parse_file (%params);
83              
84             close $infile;
85              
86             =head1 EXPORT
87              
88             Nothing is exported by default.
89              
90             The following functions are exported on request:
91             parse_struct
92             parse_union
93             parse_file
94              
95             These parse a C "struct" type, a C "union" type or a whole C header
96             file, respectively.
97              
98             =head1 DATA
99              
100             =cut
101              
102             # =head2 _max
103             #
104             # PRIVATE SUBROUTINE.
105             # Returns the greater of 2 numbers.
106             #
107             # =cut
108             sub _max
109             {
110 2610     2610   3618 my $a = shift, $b = shift;
111 2610 100       5216 return $a if $a > $b;
112 2291         5189 return $b;
113             }
114              
115             # =head2 _get_param
116             #
117             # PRIVATE SUBROUTINE.
118             # Returns the value specified by name (parameter 2) from the
119             # hashref specified in parameter 1, or undef.
120             #
121             # =cut
122             sub _get_param
123             {
124 15136     15136   17486 my $hash = shift;
125 15136         17144 my $name = shift;
126 15136 100       28094 return defined($hash->{$name})? $hash->{$name} : undef;
127             }
128              
129             # =head2 _is_a_number
130             #
131             # PRIVATE SUBROUTINE.
132             # Returns 1 if the provided parameter string looks like a valid number, 0 otherwise.
133             #
134             # =cut
135             sub _is_a_number
136             {
137 2320     2320   2939 my $v = shift;
138 2320 100       11391 return ($v =~ /^((0?x[0-9a-f]+)|(0?b[01]+)|(0?o[0-7]+)|([0-9]+))$/oi)? 1 : 0;
139             }
140              
141             # =head2 _output_array_entry_size
142             #
143             # PRIVATE SUBROUTINE.
144             # Outputs an entry for the given array count and element size.
145             # Params: entry sub ref (struct or union), output sub ref,
146             # variable name, element count, element size.
147             # Returns the element count, converted to a number if possible.
148             #
149             # =cut
150             sub _output_array_entry_size
151             {
152 2262     2262   2882 my $entry_sub = shift;
153 2262         2706 my $output_sub = shift;
154 2262         3967 my $var_name = shift;
155 2262         3363 my $count = shift;
156 2262         2827 my $size = shift;
157 2262         2992 my $line = '';
158 2262 100       6201 if ( $count =~ /^(0?[xbo])[0-9a-f_]+$/oi )
    100          
159             {
160             # looks like a hex/bin/oct number - convert
161 58         144 $count = oct($count);
162 58 100       243 $line = &$entry_sub($var_name, $size * $count) if $entry_sub;
163             }
164             elsif ( $count =~ /^[0-9_]+$/oi )
165             {
166             # looks like a dec number - convert
167 1160         1907 $count = int($count);
168 1160 100       3077 $line = &$entry_sub($var_name, $size * $count) if $entry_sub;
169             }
170             else
171             {
172             # not a number - emit a string
173 1044 100       3340 $line = &$entry_sub($var_name, "$size * $count") if $entry_sub;
174             }
175 2262 100 100     20160 &$output_sub($line) if $output_sub and $line;
176             # remove the parsed element
177 2262         9025 s/^[^;]*;//o;
178 2262         4556 return $count;
179             }
180              
181             # =head2 _split_decl
182             #
183             # PRIVATE SUBROUTINE.
184             # Splits a declaration line of multiple variables into separate declarations.
185             # Params: the input file handle.
186             #
187             # =cut
188             sub _split_decl
189             {
190 11774     11774   15436 my $infile = shift;
191             # many variables of the same type - we put each on a separate line together with its type
192 11774 100       20340 if ( m#/\*#o )
193             {
194 290         760 while ( /,\s*$/o )
195             {
196 29         124 s/[\r\n]+//o;
197 29         132 $_ .= <$infile>;
198             }
199 290         893 while ( m#,.*/\*#o )#&& !/\(/o )
200             {
201 145 100       354 if ( m#\[.*/\*#o )
202             {
203 29         317 s/([\w*\s]+)\s+([()\w\s*]+)\s*(\[\w+\]),\s*(.*)/$1 $2$3;\n$1 $4/;
204             }
205             else
206             {
207 116         1091 s/([\w*\s]+)\s+([()\w\s*]+)\s*,\s*(.*)/$1 $2;\n$1 $3/;
208             }
209             }
210             }
211             else
212             {
213 11484         21893 while ( /,\s*$/o )
214             {
215 29         127 s/[\r\n]+//o;
216 29         135 $_ .= <$infile>;
217             }
218 11484         22065 while ( /,.*/o )#&& !/\(/o )
219             {
220 1131 100       2215 if ( /\[/o )
221             {
222 29         320 s/([\w*\s]+)\s+([()\w\s*]+)\s*(\[\w+\]),\s*(.*)/$1 $2$3;\n$1 $4/;
223             }
224             else
225             {
226 1102         9637 s/([\w*\s]+)\s+([()\w\s*]+)\s*,\s*(.*)/$1 $2;\n$1 $3/;
227             }
228             }
229             }
230             }
231              
232             # =head2 _remove_attrs
233             #
234             # PRIVATE SUBROUTINE.
235             # Removes attributes from the current line.
236             #
237             # =cut
238             sub _remove_attrs
239             {
240 17342     17342   24037 s/__attribute__\s*\(\(.*\)\)//go;
241 17342         22152 s/\[\[.*\]\]//go;
242             }
243              
244             sub parse_union(\%);
245             sub parse_struct(\%);
246              
247             =head2 parse_struct
248              
249             Parses a C "structure" type, calling the provided subroutines when
250             a symbol of a specified type is encountered.
251             Parameters: a hash containing the input file handle and references to
252             the subroutines. All subroutines should return a line of text (which
253             may later go to $output_sub) after their processing of the given parameter.
254             If a key is not present in the hash, its functionality is not used
255             (unless a default value is specified).
256             Hash keys:
257              
258             'infile' => input file handle (required),
259             'line' => the current line to process (default: empty line),
260             'output_sub' => a subroutine that processes the output.
261             Takes the line to output as its single parameter,
262             'comment_sub' => a subroutine that processes comments.
263             Takes the current line as its single parameter,
264             'preproc_sub' => a subroutine that processes preprocessor lines.
265             Takes the current line as its single parameter,
266             'struct_start_sub' => a subroutine that processes the beginning of a structure.
267             Takes the structure name as its single parameter,
268             'struct_entry_sub' => a subroutine that processes an entry of a structure.
269             Takes the symbol name as its first parameter, its size as the second and the structure name as the third,
270             'struct_end_sub' => a subroutine that processes the end of a structure.
271             Takes the structure name as its first parameter and its size as the second,
272             'union_start_sub' => a subroutine that processes the beginning of a union.
273             Takes the union name as its single parameter,
274             'union_entry_sub' => a subroutine that processes an entry of a union.
275             Takes the symbol name as its first parameter and its size as the second,
276             'union_end_sub' => a subroutine that processes the end of a union.
277             Takes the symbol name as its first parameter, its size as the second and the union name as the third,
278             'pointer_size' => the pointer size to use, in bytes (default: 8),
279              
280             =cut
281              
282             sub parse_struct(\%)
283             {
284 639     639 1 7412 my $params = shift;
285              
286 639         1110 my $infile = _get_param($params, 'infile'); # input file handle
287 639         1086 my $output_sub = _get_param($params, 'output_sub'); # output subroutine
288 639         904 $_ = _get_param($params, 'line');
289 639 100       1178 $_ = '' unless defined($_);
290 639         1015 my $struct_start_sub = _get_param($params, 'struct_start_sub'); # subroutine that converts structures
291 639         1015 my $struct_entry_sub = _get_param($params, 'struct_entry_sub'); # subroutine that converts structures
292 639         903 my $struct_end_sub = _get_param($params, 'struct_end_sub'); # subroutine that converts structures
293 639         962 my $union_start_sub = _get_param($params, 'union_start_sub'); # subroutine that converts unions
294 639         895 my $union_entry_sub = _get_param($params, 'union_entry_sub'); # subroutine that converts unions
295 639         876 my $union_end_sub = _get_param($params, 'union_end_sub'); # subroutine that converts unions
296 639         929 my $comment_sub = _get_param($params, 'comment_sub'); # subroutine that converts comments
297 639         975 my $preproc_sub = _get_param($params, 'preproc_sub'); # subroutine that converts proceprocessor directives
298 639         1015 my $pointer_size = _get_param($params, 'pointer_size'); # pointer size in bytes
299 639 100       1078 $pointer_size = 8 unless defined($pointer_size);
300              
301 639 100       1075 return unless $infile;
302              
303 638         3480 my %sub_params = (
304             'infile' => $infile,
305             'output_sub' => $output_sub,
306             'comment_sub' => $comment_sub,
307             'preproc_sub' => $preproc_sub,
308             'extern_sub' => undef,
309             'typedef_sub' => undef,
310             'struct_start_sub' => undef,
311             'struct_entry_sub' => undef,
312             'struct_end_sub' => undef,
313             'union_start_sub' => undef,
314             'union_entry_sub' => undef,
315             'union_end_sub' => undef,
316             'enum_start_sub' => undef,
317             'enum_entry_sub' => undef,
318             'enum_end_sub' => undef,
319             'pointer_size' => $pointer_size,
320             );
321              
322 638         1306 &_remove_attrs;
323             # skip over "struct foo;"
324 638 100       3011 if ( /^\s*struct\s+[\w\s\$\*]+(\[[^\]]*\])?;/o )#&& ! /{/o )
325             {
326             # processing the comments
327             # if ( $comment_sub )
328             # {
329             # $_ = &$comment_sub($_);
330             # &$output_sub($_) if $output_sub and $_;
331             # }
332 29         115 return (0, '');
333             }
334              
335             # skip over "struct {};" (syntax error, but causes an infinite loop)
336 609 100       1515 if ( /^\s*struct\s*\{\s*\}\s*;/o )
337             {
338             # processing the comments
339             # if ( $comment_sub )
340             # {
341             # $_ = &$comment_sub($_);
342             # &$output_sub($_) if $output_sub and $_;
343             # }
344 29         94 return (0, '');
345             }
346              
347             # the name of the structure
348 580         919 my $str_name = '';
349 580 100       1574 if ( /^\s*struct\s+(\w+)/o )
350             {
351 435         1060 $str_name = $1;
352 435         1746 s/^\s*struct\s+\w+//o;
353             }
354             else
355             {
356             # remove 'struct' so that the start line is not interpreted
357             # as a structure inside a structure
358 145         451 s/^\s*struct\s*\{?//o;
359             }
360 580         978 my $size = 0;
361 580         1228 my ($memb_size, $name);
362 580         0 my $line;
363 580 100       1317 $line = &$struct_start_sub($str_name) if $struct_start_sub;
364 580 100 100     4027 &$output_sub($line) if $output_sub and $line;
365              
366             # a structure can end on the same line or contain many declaration per line
367             # - we simply put a newline after each semicolon and go on
368              
369 580         1177 s/;/;\n/go;
370             # processing the comments
371 580 100 100     2447 if ( $comment_sub and ( m#//# or m#/\*# ) )
      100        
372             {
373 56         137 $line = &$comment_sub($_);
374 56 100       318 $_ = $line if $line;
375             }
376              
377             do
378 580         814 {
379 6322         10844 s/^\s*{\s*$//go;
380             # joining lines
381 6322         14608 while ( /[\\,]$/o )
382             {
383 29         61 s/\\[\r\n]+//o;
384 29         135 $_ .= <$infile>;
385             }
386              
387 6322         12231 &_remove_attrs;
388 6322         11378 &_split_decl($infile);
389              
390             # processing the comments
391 6322 100 100     22450 if ( $comment_sub and ( m#//# or m#/\*# ) )
      100        
392             {
393 280         596 $line = &$comment_sub($_);
394 280 100       1474 $_ = $line if $line;
395             }
396              
397             # union/struct arrays must be processed first
398 6322         11584 while ( /.*union\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;/o )
399             {
400 58 100       192 $line = &$struct_entry_sub($2, 0) if $struct_entry_sub;
401 58 100 100     607 &$output_sub($line) if $output_sub and $line;
402             # remove the parsed element
403 58         407 s/.*union\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;//o;
404             }
405 6322         10339 while ( /.*union\s+(\w+)\s+(\w+)\s*;/o )
406             {
407 29 100       104 $line = &$struct_entry_sub($2, 0) if $struct_entry_sub;
408 29 100 100     333 &$output_sub($line) if $output_sub and $line;
409             # remove the parsed element
410 29         183 s/.*union\s+\w+\s+\w+\s*;//o;
411             }
412             # while ( /^\s*union\s+(\w+)/o )
413             # {
414             # $sub_params{'line'} = $_;
415             # ($memb_size, $name) = parse_union(%sub_params);
416             # $line = &$struct_entry_sub($name, $memb_size) if $struct_entry_sub;
417             # &$output_sub($line) if $output_sub and $line;
418             # $_ = '';
419             # $size += $memb_size;
420             # goto STR_END;
421             # }
422              
423 6322         10568 while ( /^\s*union/o )
424             {
425 145 100       390 if ( ! /^\s*union\s+(\w+)/o )
426             {
427             # no name on the first line - look for it
428 87         543 while ( ! /\{/o )
429             {
430 116         187 s/\\[\r\n]+//o;
431 116         430 $_ .= <$infile>;
432             }
433 87         184 &_remove_attrs;
434 87 100       334 if ( ! /^\s*union\s+(\w+)/o )
435             {
436             # no name at all - delete 'union' to
437             # avoid endless loop
438 58         233 s/^\s*union\s*//o;
439             }
440             }
441 145         391 $sub_params{'line'} = $_;
442 145         364 my ($memb_size, $name) = parse_union(%sub_params);
443 145 100       480 $line = &$struct_entry_sub($name, $memb_size) if $struct_entry_sub;
444 145 100 100     1303 &$output_sub($line) if $output_sub and $line;
445 145         269 $_ = '';
446 145         237 $size += $memb_size;
447 145         976 goto STR_END;
448             }
449              
450             # first we remove the ":digit" from the structure fields
451 6177         8564 s/(.*):\s*\d+\s*/$1/g;
452              
453             # skip over 'volatile'
454 6177         10554 s/_*volatile_*//gio;
455              
456             # pointers to functions
457 6177         11136 while ( /^[^};]+\(\s*\*\s*(\w+)\s*\)\s*\([^)]*\)\s*;/o )
458             {
459 435 100       1032 $line = &$struct_entry_sub($1, $pointer_size) if $struct_entry_sub;
460 435 100 100     3799 &$output_sub($line) if $output_sub and $line;
461             # remove the parsed element
462 435         1794 s/^[^;]*;//o;
463 435         1168 $size += $pointer_size;
464             }
465             # pointer type
466 6177         11599 while ( /^[^};]+\*\s*(\w+)\s*;/o )
467             {
468 1537 100       3523 $line = &$struct_entry_sub($1, $pointer_size) if $struct_entry_sub;
469 1537 100 100     13292 &$output_sub($line) if $output_sub and $line;
470             # remove the parsed element
471 1537         5792 s/^[^;]*;//o;
472 1537         3848 $size += $pointer_size;
473             }
474              
475             # arrays
476 6177         9913 while ( /.*struct\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;/o )
477             {
478 58 100       182 $line = &$struct_entry_sub($2, 0) if $struct_entry_sub;
479 58 100 100     622 &$output_sub($line) if $output_sub and $line;
480             # remove the parsed element
481 58         447 s/.*struct\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;//o;
482             }
483 6177         14756 while ( /.*(signed|unsigned)?\s+long\s+long(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
484             {
485 174         376 my $count = &_output_array_entry_size ($struct_entry_sub,
486             $output_sub, $3, $4, 8);
487 174 100       309 $size += 8 * $count if _is_a_number ($count);
488             }
489 6177         9943 while ( /.*long\s+double\s+(\w+)\s*\[(\w+)\]\s*;/o )
490             {
491 58         142 my $count = &_output_array_entry_size ($struct_entry_sub,
492             $output_sub, $1, $2, 10);
493 58 100       97 $size += 10 * $count if _is_a_number ($count);
494             }
495 6177         11403 while ( /.*(char|unsigned\s+char|signed\s+char)\s+(\w+)\s*\[(\w+)\]\s*;/o )
496             {
497 203         475 my $count = &_output_array_entry_size ($struct_entry_sub,
498             $output_sub, $2, $3, 1);
499 203 100       334 $size += 1 * $count if _is_a_number ($count);
500             }
501 6177         10886 while ( /.*float\s+(\w+)\s*\[(\w+)\]\s*;/o )
502             {
503 58         128 my $count = &_output_array_entry_size ($struct_entry_sub,
504             $output_sub, $1, $2, 4);
505 58 100       97 $size += 4 * $count if _is_a_number ($count);
506             }
507 6177         9883 while ( /.*double\s+(\w+)\s*\[(\w+)\]\s*;/o )
508             {
509 58         141 my $count = &_output_array_entry_size ($struct_entry_sub,
510             $output_sub, $1, $2, 8);
511 58 100       100 $size += 8 * $count if _is_a_number ($count);
512             }
513 6177         10978 while ( /.*(short|signed\s+short|unsigned\s+short)(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
514             {
515 174         391 my $count = &_output_array_entry_size ($struct_entry_sub,
516             $output_sub, $3, $4, 2);
517 174 100       283 $size += 2 * $count if _is_a_number ($count);
518             }
519 6177         10439 while ( /.*(long|signed\s+long|unsigned\s+long)(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
520             {
521             # NOTE: assuming 'long int' is the same size as a pointer (should be on 32- and 64-bit systems)
522 290         543 my $count = &_output_array_entry_size ($struct_entry_sub,
523             $output_sub, $3, $4, $pointer_size);
524 290 100       432 $size += $pointer_size * $count if _is_a_number ($count);
525             }
526 6177         12635 while ( /.*(signed\s+|unsigned\s+)?int\s+(\w+)\s*\[(\w+)\]\s*;/o )
527             {
528 174         384 my $count = &_output_array_entry_size ($struct_entry_sub,
529             $output_sub, $2, $3, 4);
530 174 100       272 $size += 4 * $count if _is_a_number ($count);
531             }
532              
533             # variables' types
534 6177         9851 while ( /.*struct\s+(\w+)\s+(\w+)\s*;/o )
535             {
536 29 100       95 $line = &$struct_entry_sub($2, 0) if $struct_entry_sub;
537 29 100 100     306 &$output_sub($line) if $output_sub and $line;
538             # remove the parsed element
539 29         180 s/.*struct\s+\w+\s+\w+\s*;//o;
540             }
541 6177         10416 while ( /^\s*struct/o )
542             {
543 87 100       247 if ( ! /^\s*struct\s+(\w+)/o )
544             {
545             # no name on the first line - look for it
546 58         131 while ( ! /\{/o )
547             {
548 87         132 s/\\[\r\n]+//o;
549 87         305 $_ .= <$infile>;
550             }
551 58         127 &_remove_attrs;
552 58 100       241 if ( ! /^\s*struct\s+(\w+)/o )
553             {
554             # no name at all - delete 'struct' to
555             # avoid endless loop
556 29         137 s/^\s*struct\s*//o;
557             }
558             }
559 87         231 $sub_params{'line'} = $_;
560 87         249 my ($memb_size, $name) = parse_struct(%sub_params);
561 87 100       279 $line = &$struct_entry_sub($name, $memb_size) if $struct_entry_sub;
562 87 100 100     791 &$output_sub($line) if $output_sub and $line;
563 87         149 $_ = '';
564 87         114 $size += $memb_size;
565 87         556 goto STR_END;
566             }
567              
568             # all "\w+" stand for the variable name
569 6090         11422 while ( /.*(signed|unsigned)?\s+long\s+long(\s+int)?\s+(\w+)\s*;/o )
570             {
571 87 100       234 $line = &$struct_entry_sub($3, 8) if $struct_entry_sub;
572 87 100 100     868 &$output_sub($line) if $output_sub and $line;
573             # remove the parsed element
574 87         373 s/^[^;]*;//o;
575 87         190 $size += 8;
576             }
577 6090         9328 while ( /.*long\s+double\s+(\w+)\s*;/o )
578             {
579 58 100       185 $line = &$struct_entry_sub($1, 10) if $struct_entry_sub;
580 58 100 100     586 &$output_sub($line) if $output_sub and $line;
581             # remove the parsed element
582 58         246 s/^[^;]*;//o;
583 58         131 $size += 10;
584             }
585 6090         11802 while ( /.*(char|unsigned\s+char|signed\s+char)\s+(\w+)\s*;/o )
586             {
587 464 100       1069 $line = &$struct_entry_sub($2, 1) if $struct_entry_sub;
588 464 100 100     3311 &$output_sub($line) if $output_sub and $line;
589             # remove the parsed element
590 464         1822 s/^[^;]*;//o;
591 464         1192 $size += 1;
592             }
593 6090         9997 while ( /.*float\s+(\w+)\s*;/o )
594             {
595 58 100       171 $line = &$struct_entry_sub($1, 4) if $struct_entry_sub;
596 58 100 100     593 &$output_sub($line) if $output_sub and $line;
597             # remove the parsed element
598 58         259 s/^[^;]*;//o;
599 58         224 $size += 4;
600             }
601 6090         9407 while ( /.*double\s+(\w+)\s*;/o )
602             {
603 29 100       117 $line = &$struct_entry_sub($1, 8) if $struct_entry_sub;
604 29 100 100     333 &$output_sub($line) if $output_sub and $line;
605             # remove the parsed element
606 29         139 s/^[^;]*;//o;
607 29         67 $size += 8;
608             }
609 6090         10415 while ( /.*(short|signed\s+short|unsigned\s+short)(\s+int)?\s+(\w+)\s*;/o )
610             {
611 174 100       460 $line = &$struct_entry_sub($3, 2) if $struct_entry_sub;
612 174 100 100     1609 &$output_sub($line) if $output_sub and $line;
613             # remove the parsed element
614 174         733 s/^[^;]*;//o;
615 174         535 $size += 2;
616             }
617 6090         9579 while ( /.*(long|signed\s+long|unsigned\s+long)(\s+int)?\s+(\w+)\s*;/o )
618             {
619             # NOTE: assuming 'long int' is the same size as a pointer (should be on 32- and 64-bit systems)
620 87 100       251 $line = &$struct_entry_sub($3, $pointer_size) if $struct_entry_sub;
621 87 100 100     905 &$output_sub($line) if $output_sub and $line;
622             # remove the parsed element
623 87         395 s/^[^;]*;//o;
624 87         196 $size += $pointer_size;
625             }
626 6090         10619 while ( /.*(unsigned\s+|signed\s+)?int\s+(\w+)\s*;/o )
627             {
628 174 100       458 $line = &$struct_entry_sub($2, 4) if $struct_entry_sub;
629 174 100 100     1646 &$output_sub($line) if $output_sub and $line;
630             # remove the parsed element
631 174         713 s/^[^;]*;//o;
632 174         801 $size += 4;
633             }
634              
635             # look for the end of the structure
636 6090 100       10860 if ( /}/o )
637             {
638             # add a structure size definition
639 580         993 my $var_name = '';
640 580 100       1474 if ( /\}\s*(\*?)\s*(\w+)[^;]*;/o )
641             {
642 145         396 $var_name = $2;
643             }
644 580 100       1073 if ( /\}\s*\*/o )
645             {
646 29         49 $size = $pointer_size;
647             }
648 580 100       1239 $line = &$struct_end_sub($var_name, $size, $str_name) if $struct_end_sub;
649 580 100 100     3597 &$output_sub($line) if $output_sub and $line;
650 580         1010 $_ = '';
651 580         2391 return ($size, $var_name);
652             }
653              
654             # processing of conditional compiling directives
655 5510 100 100     14379 if ( $preproc_sub && /^\s*#/o )
656             {
657 56         171 $_ = &$preproc_sub($_);
658             }
659 5510 100 100     18514 &$output_sub($_) if $output_sub and $_;
660              
661 5742         28479 STR_END: } while ( <$infile> );
662             }
663              
664             =head2 parse_union
665              
666             Parses a C "union" type, calling the provided subroutines when
667             a symbol of a specified type is encountered.
668             Parameters: a hash containing the input file handle and references to
669             the subroutines. All subroutines should return a line of text (which
670             may later go to $output_sub) after their processing of the given parameter.
671             If a key is not present in the hash, its functionality is not used
672             (unless a default value is specified).
673             Hash keys:
674              
675             'infile' => input file handle (required),
676             'line' => the current line to process (default: empty line),
677             'output_sub' => a subroutine that processes the output.
678             Takes the line to output as its single parameter,
679             'comment_sub' => a subroutine that processes comments.
680             Takes the current line as its single parameter,
681             'preproc_sub' => a subroutine that processes preprocessor lines.
682             Takes the current line as its single parameter,
683             'struct_start_sub' => a subroutine that processes the beginning of a structure.
684             Takes the structure name as its single parameter,
685             'struct_entry_sub' => a subroutine that processes an entry of a structure.
686             Takes the symbol name as its first parameter, its size as the second and the structure name as the third,
687             'struct_end_sub' => a subroutine that processes the end of a structure.
688             Takes the structure name as its first parameter and its size as the second,
689             'union_start_sub' => a subroutine that processes the beginning of a union.
690             Takes the union name as its single parameter,
691             'union_entry_sub' => a subroutine that processes an entry of a union.
692             Takes the symbol name as its first parameter and its size as the second,
693             'union_end_sub' => a subroutine that processes the end of a union.
694             Takes the symbol name as its first parameter, its size as the second and the union name as the third,
695             'pointer_size' => the pointer size to use, in bytes (default: 8),
696              
697              
698             =cut
699              
700             sub parse_union(\%)
701             {
702 581     581 1 7412 my $params = shift;
703              
704 581         976 my $infile = _get_param($params, 'infile'); # input file handle
705 581         1031 my $output_sub = _get_param($params, 'output_sub'); # output subroutine
706 581         857 $_ = _get_param($params, 'line');
707 581 100       1119 $_ = '' unless defined($_);
708 581         1003 my $struct_start_sub = _get_param($params, 'struct_start_sub'); # subroutine that converts structures
709 581         926 my $struct_entry_sub = _get_param($params, 'struct_entry_sub'); # subroutine that converts structures
710 581         833 my $struct_end_sub = _get_param($params, 'struct_end_sub'); # subroutine that converts structures
711 581         838 my $union_start_sub = _get_param($params, 'union_start_sub'); # subroutine that converts unions
712 581         896 my $union_entry_sub = _get_param($params, 'union_entry_sub'); # subroutine that converts unions
713 581         821 my $union_end_sub = _get_param($params, 'union_end_sub'); # subroutine that converts unions
714 581         884 my $comment_sub = _get_param($params, 'comment_sub'); # subroutine that converts comments
715 581         807 my $preproc_sub = _get_param($params, 'preproc_sub'); # subroutine that converts proceprocessor directives
716 581         853 my $pointer_size = _get_param($params, 'pointer_size'); # pointer size in bytes
717 581 100       985 $pointer_size = 8 unless defined($pointer_size);
718              
719 581 100       1021 return unless $infile;
720              
721 580         3005 my %sub_params = (
722             'infile' => $infile,
723             'output_sub' => $output_sub,
724             'comment_sub' => $comment_sub,
725             'preproc_sub' => $preproc_sub,
726             'extern_sub' => undef,
727             'typedef_sub' => undef,
728             'struct_start_sub' => undef,
729             'struct_entry_sub' => undef,
730             'struct_end_sub' => undef,
731             'union_start_sub' => undef,
732             'union_entry_sub' => undef,
733             'union_end_sub' => undef,
734             'enum_start_sub' => undef,
735             'enum_entry_sub' => undef,
736             'enum_end_sub' => undef,
737             'pointer_size' => $pointer_size,
738             );
739              
740 580         1158 &_remove_attrs;
741             # skip over "union foo;"
742 580 100       1870 if ( /^\s*union\s+[^;{}]*;/o )
743             {
744             # processing the comments
745             # if ( $comment_sub )
746             # {
747             # $_ = &$comment_sub($_);
748             # &$output_sub($_) if $output_sub and $_;
749             # }
750 29         114 return (0, '');
751             }
752              
753             # skip over "union {};" (syntax error, but causes an infinite loop)
754 551 100       1348 if ( /^\s*union\s*\{\s*\}\s*;/o )
755             {
756             # processing the comments
757             # if ( $comment_sub )
758             # {
759             # $_ = &$comment_sub($_);
760             # &$output_sub($_) if $output_sub and $_;
761             # }
762 29         84 return (0, '');
763             }
764              
765             # the name of the union
766 522         799 my $union_name = '';
767              
768 522 100       1373 if ( /^\s*union\s+(\w+)/o )
769             {
770 348         756 $union_name = $1;
771 348         1226 s/^\s*union\s+\w+//o;
772             }
773             else
774             {
775             # remove 'union' so that the start line is not interpreted
776             # as a union inside a union
777 174         455 s/^\s*union\s*\{?//o;
778             }
779 522         823 my $size = 0;
780 522         1073 my ($memb_size, $name);
781 522         0 my $line;
782 522 100       1230 $line = &$union_start_sub($union_name) if $union_start_sub;
783 522 100 100     3164 &$output_sub($line) if $output_sub and $line;
784              
785             # if there was a '{' in the first line, we put it in the second
786 522 100       1297 if ( /{/o )
787             {
788 203         649 s/\s*\{/\n\{\n/o;
789             }
790              
791             # an union can end on the same line or contain many declaration per line
792             # - we simply put a newline after each semicolon and go on
793              
794 522         2069 s/;/;\n/go;
795              
796             do
797 522         676 {
798 5133         8747 s/^\s*{\s*$//go;
799 5133         8998 &_remove_attrs;
800 5133         9205 &_split_decl($infile);
801              
802             # processing the comments
803 5133 100 100     17765 if ( $comment_sub and ( m#//# or m#/\*# ) )
      100        
804             {
805 84         212 $line = &$comment_sub($_);
806 84 100       525 $_ = $line if $line;
807             }
808              
809             # pointers to functions
810 5133         9445 while ( /^[^};]+\(\s*\*\s*(\w+)\s*\)\s*\([^)]*\)\s*;/o )
811             {
812 145 100       360 $line = &$union_entry_sub($1, $pointer_size) if $union_entry_sub;
813 145 100 100     1348 &$output_sub($line) if $output_sub and $line;
814             # remove the parsed element
815 145         583 s/^[^;]*;//o;
816 145         273 $size = _max($size, $pointer_size);
817             }
818             # pointer type
819 5133         9172 while ( /^[^};]+\*\s*(\w+)\s*;/o )
820             {
821 522 100       1292 $line = &$union_entry_sub($1, $pointer_size) if $union_entry_sub;
822 522 100 100     4696 &$output_sub($line) if $output_sub and $line;
823             # remove the parsed element
824 522         2165 s/^[^;]*;//o;
825 522         932 $size = _max($size, $pointer_size);
826             }
827              
828             # union/struct arrays must be processed first
829 5133         8722 while ( /.*union\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;/o )
830             {
831 58 100       179 $line = &$union_entry_sub($2, 0) if $union_entry_sub;
832 58 100 100     924 &$output_sub($line) if $output_sub and $line;
833             # remove the parsed element
834 58         345 s/.*union\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;//o;
835             }
836              
837 5133         9047 while ( /.*union\s+(\w+)\s+(\w+)\s*;/o )
838             {
839 29 100       100 $line = &$union_entry_sub($2, 0) if $union_entry_sub;
840 29 100 100     330 &$output_sub($line) if $output_sub and $line;
841             # remove the parsed element
842 29         178 s/.*union\s+\w+\s+\w+\s*;//o;
843             }
844              
845 5133         8250 while ( /.*struct\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;/o )
846             {
847 58 100       174 $line = &$union_entry_sub($2, 0) if $union_entry_sub;
848 58 100 100     613 &$output_sub($line) if $output_sub and $line;
849             # remove the parsed element
850 58         354 s/.*struct\s+(\w+)\s+(\w+)\s*\[(\w+)\]\s*;//o;
851             }
852              
853 5133         8273 while ( /.*struct\s+(\w+)\s+(\w+)\s*;/o )
854             {
855 29 100       121 $line = &$union_entry_sub($2, 0) if $union_entry_sub;
856 29 100 100     330 &$output_sub($line) if $output_sub and $line;
857             # remove the parsed element
858 29         195 s/.*struct\s+\w+\s+\w+\s*;//o;
859             }
860              
861 5133         9097 while ( /^\s*struct/o )
862             {
863 87 100       250 if ( ! /^\s*struct\s+(\w+)/o )
864             {
865             # no name on the first line - look for it
866 58         152 while ( ! /\{/o )
867             {
868 87         132 s/\\[\r\n]+//o;
869 87         304 $_ .= <$infile>;
870             }
871 58         132 &_remove_attrs;
872 58 100       236 if ( ! /^\s*struct\s+(\w+)/o )
873             {
874             # no name at all - delete 'struct' to
875             # avoid endless loop
876 29         124 s/^\s*struct\s*//o;
877             }
878             }
879 87         211 $sub_params{'line'} = $_;
880 87         231 my ($memb_size, $name) = parse_struct(%sub_params);
881 87 100       275 $line = &$union_entry_sub($name, $memb_size) if $union_entry_sub;
882 87 100 100     767 &$output_sub($line) if $output_sub and $line;
883 87         190 $size = _max($size, $memb_size);
884 87         134 $_ = '';
885 87         560 goto STR_END;
886             }
887              
888             # variables' types
889             # all "\w+" stand for the variable name
890 5046         12531 while ( /.*(signed|unsigned)?\s+long\s+long(\s+int)?\s+(\w+)\s*;/o )
891             {
892 87 100       255 $line = &$union_entry_sub($3, 8) if $union_entry_sub;
893 87 100 100     834 &$output_sub($line) if $output_sub and $line;
894             # remove the parsed element
895 87         387 s/^[^;]*;//o;
896 87         178 $size = _max($size, 8);
897             }
898              
899 5046         8223 while ( /.*long\s+double\s+(\w+)\s*;/o )
900             {
901 29 100       99 $line = &$union_entry_sub($1, 10) if $union_entry_sub;
902 29 100 100     321 &$output_sub($line) if $output_sub and $line;
903             # remove the parsed element
904 29         136 s/^[^;]*;//o;
905 29         69 $size = _max($size, 10);
906             }
907              
908 5046         12444 while ( /.*(char|unsigned\s+char|signed\s+char)\s+(\w+)\s*;/o )
909             {
910 609 100       1373 $line = &$union_entry_sub($2, 1) if $union_entry_sub;
911 609 100 100     3382 &$output_sub($line) if $output_sub and $line;
912             # remove the parsed element
913 609         2180 s/^[^;]*;//o;
914 609         1139 $size = _max($size, 1);
915             }
916              
917 5046         8433 while ( /.*float\s+(\w+)\s*;/o )
918             {
919 29 100       104 $line = &$union_entry_sub($1, 4) if $union_entry_sub;
920 29 100 100     303 &$output_sub($line) if $output_sub and $line;
921             # remove the parsed element
922 29         195 s/^[^;]*;//o;
923 29         60 $size = _max($size, 4);
924             }
925              
926 5046         7997 while ( /.*double\s+(\w+)\s*;/o )
927             {
928 29 100       137 $line = &$union_entry_sub($1, 8) if $union_entry_sub;
929 29 100 100     358 &$output_sub($line) if $output_sub and $line;
930             # remove the parsed element
931 29         143 s/^[^;]*;//o;
932 29         63 $size = _max($size, 8);
933             }
934              
935 5046         10530 while ( /.*(short|signed\s+short|unsigned\s+short)(\s+int)?\s+(\w+)\s*;/o )
936             {
937 174 100       474 $line = &$union_entry_sub($3, 2) if $union_entry_sub;
938 174 100 100     1573 &$output_sub($line) if $output_sub and $line;
939             # remove the parsed element
940 174         708 s/^[^;]*;//o;
941 174         300 $size = _max($size, 2);
942             }
943              
944 5046         11184 while ( /.*(long|signed\s+long|unsigned\s+long)(\s+int)?\s+(\w+)\s*;/o )
945             {
946             # NOTE: assuming 'long int' is the same size as a pointer (should be on 32- and 64-bit systems)
947 87 100       238 $line = &$union_entry_sub($3, $pointer_size) if $union_entry_sub;
948 87 100 100     846 &$output_sub($line) if $output_sub and $line;
949             # remove the parsed element
950 87         393 s/^[^;]*;//o;
951 87         181 $size = _max($size, $pointer_size);
952             }
953              
954 5046         11023 while ( /.*(unsigned\s+|signed\s+)?int\s+(\w+)\s*;/o )
955             {
956 174 100       473 $line = &$union_entry_sub($2, 4) if $union_entry_sub;
957 174 100 100     1597 &$output_sub($line) if $output_sub and $line;
958             # remove the parsed element
959 174         737 s/^[^;]*;//o;
960 174         317 $size = _max($size, 4);
961             }
962              
963             # arrays
964              
965 5046         10276 while ( /.*(signed|unsigned)?\s+long\s+long(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
966             {
967 174         331 my $count = &_output_array_entry_size ($union_entry_sub,
968             $output_sub, $3, $4, 8);
969 174 100       259 $size = _max($size, 8 * $count) if _is_a_number ($count);
970             }
971              
972 5046         7799 while ( /.*long\s+double\s+(\w+)\s*\[(\w+)\]\s*;/o )
973             {
974 58         127 my $count = &_output_array_entry_size ($union_entry_sub,
975             $output_sub, $1, $2, 10);
976 58 100       97 $size = _max($size, 10 * $count) if _is_a_number ($count);
977             }
978              
979 5046         9315 while ( /.*(char|unsigned\s+char|signed\s+char)\s+(\w+)\s*\[(\w+)\]\s*;/o )
980             {
981 203         421 my $count = &_output_array_entry_size ($union_entry_sub,
982             $output_sub, $2, $3, 1);
983 203 100       344 $size = _max($size, 1 * $count) if _is_a_number ($count);
984             }
985              
986 5046         8312 while ( /.*float\s+(\w+)\s*\[(\w+)\]\s*;/o )
987             {
988 58         141 my $count = &_output_array_entry_size ($union_entry_sub,
989             $output_sub, $1, $2, 4);
990 58 100       108 $size = _max($size, 4 * $count) if _is_a_number ($count);
991             }
992              
993 5046         7551 while ( /.*double\s+(\w+)\s*\[(\w+)\]\s*;/o )
994             {
995 58         136 my $count = &_output_array_entry_size ($union_entry_sub,
996             $output_sub, $1, $2, 8);
997 58 100       121 $size = _max($size, 8 * $count) if _is_a_number ($count);
998             }
999              
1000 5046         8649 while ( /.*(short|signed\s+short|unsigned\s+short)(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
1001             {
1002 174         364 my $count = &_output_array_entry_size ($union_entry_sub,
1003             $output_sub, $3, $4, 2);
1004 174 100       318 $size = _max($size, 2 * $count) if _is_a_number ($count);
1005             }
1006              
1007 5046         8516 while ( /.*(long|signed\s+long|unsigned\s+long)(\s+int)?\s+(\w+)\s*\[(\w+)\]\s*;/o )
1008             {
1009             # NOTE: assuming 'long int' is the same size as a pointer (should be on 32- and 64-bit systems)
1010 174         351 my $count = &_output_array_entry_size ($union_entry_sub,
1011             $output_sub, $3, $4, $pointer_size);
1012 174 100       288 $size = _max($size, $pointer_size * $count) if _is_a_number ($count);
1013             }
1014              
1015 5046         8456 while ( /.*(signed\s+|unsigned\s+)?int\s+(\w+)\s*\[(\w+)\]\s*;/o )
1016             {
1017 174         356 my $count = &_output_array_entry_size ($union_entry_sub,
1018             $output_sub, $2, $3, 4);
1019 174 100       274 $size = _max($size, 4 * $count) if _is_a_number ($count);
1020             }
1021              
1022 5046         8086 while ( /^\s*union/o )
1023             {
1024 87 100       281 if ( ! /^\s*union\s+(\w+)/o )
1025             {
1026             # no name on the first line - look for it
1027 58         190 while ( ! /\{/o )
1028             {
1029 87         143 s/\\[\r\n]+//o;
1030 87         324 $_ .= <$infile>;
1031             }
1032 58         128 &_remove_attrs;
1033 58 100       239 if ( ! /^\s*union\s+(\w+)/o )
1034             {
1035             # no name at all - delete 'union' to
1036             # avoid endless loop
1037 29         136 s/^\s*union\s*//o;
1038             }
1039             }
1040 87         199 $sub_params{'line'} = $_;
1041 87         273 my ($memb_size, $name) = parse_union(%sub_params);
1042 87 100       302 $line = &$union_entry_sub($name, $memb_size) if $union_entry_sub;
1043 87 100 100     767 &$output_sub($line) if $output_sub and $line;
1044 87         150 $_ = '';
1045 87         131 $size = _max($size, $memb_size);
1046             }
1047              
1048             # look for the end of the union
1049 5046 100       9768 if ( /\s*\}.*/o )
1050             {
1051 522         862 my $var_name = '';
1052 522 100       1522 if ( /\}\s*(\*?)\s*(\w+)[^;]*;/o )
1053             {
1054 174         422 $var_name = $2;
1055             }
1056 522 100       1001 if ( /\}\s*\*/o )
1057             {
1058 29         52 $size = $pointer_size;
1059             }
1060 522 100       1181 $line = &$union_end_sub($var_name, $size, $union_name) if $union_end_sub;
1061 522 100 100     2985 &$output_sub($line) if $output_sub and $line;
1062 522         844 $_ = '';
1063 522         2248 return ($size, $var_name);
1064             }
1065              
1066             # processing of conditional compiling directives
1067 4524 100 100     12270 if ( $preproc_sub && /^\s*#/o )
1068             {
1069 28         97 $_ = &$preproc_sub($_);
1070             }
1071 4524 100 100     15016 &$output_sub($_) if $output_sub and $_;
1072              
1073 4611         20047 STR_END: } while ( <$infile> );
1074             }
1075              
1076             =head2 parse_file
1077              
1078             Parses a C header file, calling the provided subroutines when
1079             a symbol of a specified type is encountered.
1080             Parameters: a hash containing the input file handle and references to
1081             the subroutines. All subroutines should return a line of text (which
1082             may later go to $output_sub) after their processing of the given parameter.
1083             If a key is not present in the hash, its functionality is not used
1084             (unless a default value is specified).
1085             Hash keys:
1086              
1087             'infile' => input file handle (required),
1088             'output_sub' => a subroutine that processes the output.
1089             Takes the line to output as its single parameter,
1090             'comment_sub' => a subroutine that processes comments.
1091             Takes the current line as its single parameter,
1092             'preproc_sub' => a subroutine that processes preprocessor lines.
1093             Takes the current line as its single parameter,
1094             'extern_sub' => a subroutine that processes external symbol declarations.
1095             Takes the symbol name as its single parameter,
1096             'typedef_sub' => a subroutine that processes typedefs.
1097             Takes the old type's name as its first parameter and the new type's name as the second,
1098             'struct_start_sub' => a subroutine that processes the beginning of a structure.
1099             Takes the structure name as its single parameter,
1100             'struct_entry_sub' => a subroutine that processes an entry of a structure.
1101             Takes the symbol name as its first parameter, its size as the second and the structure name as the third,
1102             'struct_end_sub' => a subroutine that processes the end of a structure.
1103             Takes the structure name as its first parameter and its size as the second,
1104             'union_start_sub' => a subroutine that processes the beginning of a union.
1105             Takes the union name as its single parameter,
1106             'union_entry_sub' => a subroutine that processes an entry of a union.
1107             Takes the symbol name as its first parameter and its size as the second,
1108             'union_end_sub' => a subroutine that processes the end of a union.
1109             Takes the symbol name as its first parameter, its size as the second and the union name as the third,
1110             'enum_start_sub' => a subroutine that processes the beginning of an enumeration.
1111             Takes the enum's name as its single parameter,
1112             'enum_entry_sub' => a subroutine that processes an entry of an enumeration.
1113             Takes the symbol name as its first parameter and its value as the second,
1114             'enum_end_sub' => a subroutine that processes the end of an enumeration.
1115             Takes no parameters,
1116             'pointer_size' => the pointer size to use, in bytes (default: 8),
1117              
1118             =cut
1119              
1120             sub parse_file(\%)
1121             {
1122 31     31 1 211308 my $params = shift;
1123              
1124 31         94 my $infile = _get_param($params, 'infile'); # input file handle
1125 31         87 my $output_sub = _get_param($params, 'output_sub'); # output subroutine
1126 31         90 my $extern_sub = _get_param($params, 'extern_sub'); # subroutine that converts external declarations
1127 31         73 my $typedef_sub = _get_param($params, 'typedef_sub'); # subroutine that converts typedefs
1128 31         72 my $comment_sub = _get_param($params, 'comment_sub'); # subroutine that converts comments
1129 31         80 my $preproc_sub = _get_param($params, 'preproc_sub'); # subroutine that converts proceprocessor directives
1130 31         69 my $pointer_size = _get_param($params, 'pointer_size'); # pointer size in bytes
1131 31 100       104 $pointer_size = 8 unless defined($pointer_size);
1132 31         82 my $struct_start_sub = _get_param($params, 'struct_start_sub'); # subroutine that converts structures
1133 31         83 my $struct_entry_sub = _get_param($params, 'struct_entry_sub'); # subroutine that converts structures
1134 31         68 my $struct_end_sub = _get_param($params, 'struct_end_sub'); # subroutine that converts structures
1135 31         62 my $union_start_sub = _get_param($params, 'union_start_sub'); # subroutine that converts unions
1136 31         56 my $union_entry_sub = _get_param($params, 'union_entry_sub'); # subroutine that converts unions
1137 31         47 my $union_end_sub = _get_param($params, 'union_end_sub'); # subroutine that converts unions
1138 31         50 my $enum_start_sub = _get_param($params, 'enum_start_sub'); # subroutine that converts enumerations
1139 31         53 my $enum_entry_sub = _get_param($params, 'enum_entry_sub'); # subroutine that converts enumerations
1140 31         64 my $enum_end_sub = _get_param($params, 'enum_end_sub'); # subroutine that converts enumerations
1141              
1142 31 100       83 return unless $infile;
1143              
1144 29         158 my %sub_params = (
1145             'infile' => $infile,
1146             'output_sub' => $output_sub,
1147             'comment_sub' => $comment_sub,
1148             'preproc_sub' => $preproc_sub,
1149             'extern_sub' => $extern_sub,
1150             'typedef_sub' => $typedef_sub,
1151             'struct_start_sub' => $struct_start_sub,
1152             'struct_entry_sub' => $struct_entry_sub,
1153             'struct_end_sub' => $struct_end_sub,
1154             'union_start_sub' => $union_start_sub,
1155             'union_entry_sub' => $union_entry_sub,
1156             'union_end_sub' => $union_end_sub,
1157             'enum_start_sub' => $enum_start_sub,
1158             'enum_entry_sub' => $enum_entry_sub,
1159             'enum_end_sub' => $enum_end_sub,
1160             'pointer_size' => $pointer_size,
1161             );
1162              
1163 29         48 my $line;
1164 29         646 READ: while ( <$infile> )
1165             {
1166             # empty lines go without change
1167 4060 100       11087 if ( /^\s*$/o )
1168             {
1169 1508 100       4044 &$output_sub("\n") if $output_sub;
1170 1508         5451 next;
1171             }
1172              
1173             # joining lines
1174 2552         5477 while ( /[\\,]$/o )
1175             {
1176 58         110 s/\\[\r\n]+//o;
1177 58         202 s/,[\r\n]+/,/o;
1178 58         204 $_ .= <$infile>;
1179             }
1180              
1181 2552         4709 &_remove_attrs;
1182             # check if a comment is the only thing on this line
1183 2552 100 100     8065 if ( m#^\s*/\*.*\*/\s*$#o || m#^\s*//#o )
1184             {
1185 203 100       385 if ( $comment_sub )
1186             {
1187 196         368 $line = &$comment_sub($_);
1188 196 100       1156 $_ = $line if $line;
1189             }
1190             else
1191             {
1192 7         15 $_ = '';
1193             }
1194 203 100       565 &$output_sub($_) if $output_sub;
1195              
1196 203         737 next;
1197             }
1198              
1199             # processing of preprocessor directives
1200 2349 100       5620 if ( /^\s*#/o )
1201             {
1202 87 100       189 if ( $comment_sub )
1203             {
1204 84         176 $line = &$comment_sub($_);
1205 84 100       472 $_ = $line if $line;
1206             }
1207 87 100       185 if ( $preproc_sub )
1208             {
1209 84         150 $_ = &$preproc_sub($_);
1210             }
1211             else
1212             {
1213 3         5 $_ = '';
1214             }
1215 87 100 100     641 &$output_sub($_) if $output_sub and $_;
1216              
1217 87         834 next;
1218             }
1219              
1220             # externs
1221 2262 100       4596 if ( /^\s*extern/o )
1222             {
1223 696 100       1161 if ( $comment_sub )
1224             {
1225 672         1167 $line = &$comment_sub($_);
1226 672 100       3374 $_ = $line if $line;
1227             }
1228              
1229 696 100       1769 if ( ! /^\s*extern\s+"C/o )
1230             {
1231             # joining lines
1232 667         1426 while ( ! /;/o )
1233             {
1234 29         126 s/[\r\n]+//o;
1235 29         115 $_ .= <$infile>;
1236             }
1237             }
1238              
1239 696         1210 &_remove_attrs;
1240             # external functions
1241              
1242             # extern "C", extern "C++"
1243 696         1271 s/^\s*extern\s+"C"\s*{//o;
1244 696         1109 s/^\s*extern\s+"C"/extern/o;
1245 696         1101 s/^\s*extern\s+"C\+\+"\s*{//o;
1246 696         1132 s/^\s*extern\s+"C\+\+"/extern/o;
1247              
1248             # first remove: extern MACRO_NAME ( fcn name, args, ... )
1249 696         1688 s/^\s*\w*\s*extern\s+\w+\s*\([^*].*//o;
1250             # type ^^^
1251              
1252             # extern pointers to functions:
1253 696 100       2153 if ( /^\s*\w*\s*extern\s+[\w\*\s]+\(\s*\*\s*(\w+)[()\*\s\w]*\)\s*\(.*/o )
1254             {
1255 29 100       63 if ( $extern_sub )
1256             {
1257 28         65 $line = &$extern_sub($1);
1258 28 100       194 $_ = $line if $line;
1259             }
1260             else
1261             {
1262 1         2 $_ = '';
1263             }
1264 29 100       87 &$output_sub($_) if $output_sub;
1265             }
1266              
1267 696 100       4401 if ( /^\s*\w*\s*extern\s+[\w\*\s]+?(\w+)\s*\(.*/o )
1268             {
1269 175 100       292 if ( $extern_sub )
1270             {
1271 169         285 $line = &$extern_sub($1);
1272 169 100       1031 $_ = $line if $line;
1273             }
1274             else
1275             {
1276 6         10 $_ = '';
1277             }
1278 175 100       375 &$output_sub($_) if $output_sub;
1279             }
1280              
1281             # external variables
1282 696 100       2069 if ( /^\s*extern[\w\*\s]+\s+\**(\w+)\s*;/o )
1283             {
1284 464 100       769 if ( $extern_sub )
1285             {
1286 448         781 $line = &$extern_sub($1);
1287 448 100       2797 $_ = $line if $line;
1288             }
1289             else
1290             {
1291 16         23 $_ = '';
1292             }
1293 464 100       934 &$output_sub($_) if $output_sub;
1294             }
1295              
1296 696         2505 next;
1297             }
1298              
1299             # typedef
1300 1566 100       3211 if ( /^\s*typedef/o )
1301             {
1302 435 100       1488 if ( ! /\b(struct|union|enum)\b/o )
1303             {
1304             # joining lines
1305 290         690 while ( ! /;/o )
1306             {
1307 29         107 s/[\r\n]+//o;
1308 29         117 $_ .= <$infile>;
1309             }
1310             }
1311              
1312 435         805 &_remove_attrs;
1313             # split typedefs, but not within function parameters
1314 435 100 100     1723 &_split_decl($infile) unless /\([^)]*,/o or /enum/o;
1315              
1316 435 100       1784 if ( /\(/o )
    100          
    100          
1317             {
1318 145         444 s/^.*$/\n/o;
1319             }
1320             # "typedef struct ...." ----> "struct ....."
1321             elsif ( /(struct|union|enum)/o )
1322             {
1323 145         541 s/^\s*typedef\s+//o;
1324             }
1325             elsif ( ! /{/o ) #&& /;/o ) # lines already joined
1326             {
1327 116         546 while ( /\btypedef\s+[^;]+\s*;/o )
1328             {
1329             # cannot do function pointers, take
1330             # just simple types
1331 145 100       621 if ( /\btypedef([\w*\s]+)\b(\w+)\s*;/o )
1332             {
1333 116 100       223 if ( $typedef_sub )
1334             {
1335 112         237 my $old = $1;
1336 112         176 my $new = $2;
1337 112         329 $old =~ s/^\s+//o;
1338 112         212 $new =~ s/^\s+//o;
1339 112         381 $old =~ s/\s+$//o;
1340 112         194 $new =~ s/\s+$//o;
1341 112         269 $line = &$typedef_sub($old, $new);
1342             }
1343             else
1344             {
1345 4         9 $line = '';
1346             }
1347 116 100 100     1071 &$output_sub($line) if $output_sub and $line;
1348             }
1349 145         811 s/^\s*typedef\s+[^;]+\s*;//o;
1350             }
1351              
1352 116         965 next;
1353             }
1354             # no NEXT here
1355             }
1356              
1357             # structures:
1358              
1359 1450 100       3365 if ( /^\s*struct/o )
1360             {
1361             # skip over expressions of the type:
1362             # struct xxx function(arg1, ...);
1363 493 100       1019 if ( /\(/o )
1364             {
1365 29         58 $_ = '';
1366             }
1367             else
1368             {
1369 464         971 $sub_params{'line'} = $_;
1370 464         937 parse_struct(%sub_params);
1371             }
1372 493         2465 next;
1373             }
1374              
1375             # enumerations
1376 957 100       2053 if ( /^\s*enum/o )
1377             {
1378             # remove the 'enum' and its name
1379 290 100       920 if ( /^.*enum\s+(\w+)\s*\{?/o )
1380             {
1381 261 100       663 $line = &$enum_start_sub($1) if $enum_start_sub;
1382 261 100 100     2207 &$output_sub($line) if $output_sub and $line;
1383 261         1195 s/^.*enum\s+\w+\s*\{?//o;
1384             }
1385             else
1386             {
1387 29         115 s/^.*enum\s*\{?//o;
1388             }
1389 290         440 my $curr_value = 0;
1390              
1391             #&_split_decl($infile);
1392             # check if one-line enum
1393 290 100       595 if ( /}/o )
1394             {
1395             # there are no conditional compiling directives in one-line enums
1396             #if ( $preproc_sub )
1397             #{
1398             # $_ = &$preproc_sub($_);
1399             #}
1400             #else
1401             #{
1402             # $_ = '';
1403             #}
1404              
1405 116         372 while ( /,.*;/o )
1406             {
1407 116 100       550 if ( /([\w\s]*)\s+(\w+)\s*=\s*(\w+)\s*,/o )
1408             {
1409 29 100       94 $line = &$enum_entry_sub ($2, $3) if $enum_entry_sub;
1410 29 100 100     303 &$output_sub($line) if $output_sub and $line;
1411 29         91 $curr_value = $3+1;
1412 29         165 s/[\w\s]*\s+\w+\s*=\s*\w+\s*,//o
1413             }
1414 116 100       404 if ( /([\w\s]*)\s+(\w+)\s*,/o )
1415             {
1416 87 100       221 $line = &$enum_entry_sub ($2, $curr_value) if $enum_entry_sub;
1417 87 100 100     785 &$output_sub($line) if $output_sub and $line;
1418 87         183 $curr_value++;
1419 87         477 s/[\w\s]*\s+\w+\s*,//o
1420             }
1421             }
1422              
1423             # the last line has no comma
1424 116 100       305 if ( /^\s*(\w+)\s*=\s*(\w+)\s*\}\s*;/o )
1425             {
1426 29 100       93 $line = &$enum_entry_sub ($1, $2) if $enum_entry_sub;
1427 29 100 100     294 &$output_sub($line) if $output_sub and $line;
1428 29         127 s/^\s*\w+\s*=\s*\w+\s*\}\s*;//o
1429             }
1430 116 100       352 if ( /^\s*(\w+)\s*\}\s*;/o )
1431             {
1432 87 100       225 $line = &$enum_entry_sub ($1, $curr_value) if $enum_entry_sub;
1433 87 100 100     789 &$output_sub($line) if $output_sub and $line;
1434 87         352 s/^\s*\w+\s*\}\s*;//o
1435             }
1436              
1437 116 100       306 $line = &$enum_end_sub() if $enum_end_sub;
1438 116 100 100     673 &$output_sub($line) if $output_sub and $line;
1439             # processing the comments
1440 116 100 100     501 if ( $comment_sub and ( m#//# or m#/\*# ) )
      100        
1441             {
1442 56         133 $_ = &$comment_sub($_);
1443 56 100 100     406 &$output_sub($_) if $output_sub and $_;
1444             }
1445 116         401 next;
1446             }
1447             else
1448             {
1449 174         494 while ( <$infile> )
1450             {
1451             # processing of conditional compiling directives
1452 754 100       2029 if ( /^\s*#/o )
1453             {
1454 29 100       79 if ( $preproc_sub )
1455             {
1456 28         79 $_ = &$preproc_sub($_);
1457             }
1458             else
1459             {
1460 1         2 $_ = '';
1461             }
1462 29 100 100     286 &$output_sub($_) if $output_sub and $_;
1463              
1464 29         108 next;
1465             }
1466              
1467 725         1211 &_remove_attrs;
1468             # skip over the first '{' character
1469             #next if /^\s*\{\s*$/o;
1470 725         1140 s/^\s*{\s*$//go;
1471              
1472 725 100       1854 next if /^\s*$/o;
1473              
1474             # if the constant has a value, we don't touch it
1475 551 100       1004 if ( /=/o )
1476             {
1477 87 100       325 if ( /^\s*(\w+)\s*=\s*([-*\/+\w]+)\s*,?/o )
1478             {
1479 58 100       163 $line = &$enum_entry_sub ($1, $2) if $enum_entry_sub;
1480 58 100 100     646 &$output_sub($line) if $output_sub and $line;
1481 58 100       159 $curr_value = $2 + 1 if _is_a_number ($2);
1482 58         218 s/^\s*\w+\s*=\s*\w+\s*,?//o;
1483             }
1484             }
1485             else
1486             {
1487             # assign a subsequent value
1488 464 100       1101 if ( /^\s*(\w+)\s*,?/o )
1489             {
1490 232 100       568 $line = &$enum_entry_sub ($1, $curr_value) if $enum_entry_sub;
1491 232 100 100     2082 &$output_sub($line) if $output_sub and $line;
1492 232         459 $curr_value++;
1493 232         696 s/^\s*\w+\s*,?//o;
1494             }
1495             }
1496              
1497             # processing the comments
1498 551 100 100     2249 if ( $comment_sub and ( m#//# or m#/\*# ) )
      100        
1499             {
1500 56         124 $line = &$comment_sub($_);
1501 56 100       307 $_ = $line if $line;
1502             }
1503              
1504             # look for the end of the enum
1505 551 100       1187 if ( /\s*\}.*/o )
1506             {
1507 174 100       432 $line = &$enum_end_sub() if $enum_end_sub;
1508 174 100 100     1112 &$output_sub($line) if $output_sub and $line;
1509 174         581 next READ;
1510             }
1511              
1512 377 100 100     1289 &$output_sub($_) if $output_sub and $_;
1513             }
1514             }
1515             }
1516              
1517 667 100       1607 if ( /^\s*union/o )
1518             {
1519             # skip over expressions of the type:
1520             # union xxx function(arg1, ...);
1521 377 100       758 if ( /\(/o )
1522             {
1523 29         72 $_ = '';
1524             }
1525             else
1526             {
1527 348         716 $sub_params{'line'} = $_;
1528 348         713 parse_union(%sub_params);
1529             }
1530 377         1336 next;
1531             }
1532              
1533 290         475 s/^\s*{\s*$//go;
1534             # remove any }'s left after , for example
1535 290         753 s/^\s*}\s*$//go;
1536 290 100 100     960 if ( $comment_sub and m#/\*# ) # single-line comments should be processed at the top
1537             {
1538 28         98 $line = &$comment_sub($_);
1539 28 100       216 $_ = $line if $line;
1540             }
1541 290 100       663 &$output_sub($_) if $output_sub; # and $_; # the line won't be empty here
1542             }
1543             }
1544              
1545              
1546             =head1 SUPPORT AND DOCUMENTATION
1547              
1548             After installing, you can find documentation for this module with the perldoc command.
1549              
1550             perldoc Parse::H
1551              
1552             You can also look for information at:
1553              
1554             Search CPAN
1555             https://metacpan.org/release/Parse-H
1556              
1557             CPAN Request Tracker:
1558             https://rt.cpan.org/Public/Dist/Display.html?Name=Parse-H
1559              
1560             =head1 AUTHOR
1561              
1562             Bogdan Drozdowski, C<< >>
1563              
1564             =head1 COPYRIGHT
1565              
1566             Copyright 2022-2023 Bogdan Drozdowski, all rights reserved.
1567              
1568             =head1 LICENSE
1569              
1570             This program is free software; you can redistribute it and/or modify it
1571             under the same terms as Perl itself.
1572              
1573             =cut
1574              
1575             1; # End of Parse::H