File Coverage

blib/lib/Data/Edit/Xml/Lint.pm
Criterion Covered Total %
statement 32 263 12.1
branch 1 90 1.1
condition 0 30 0.0
subroutine 12 30 40.0
pod 13 15 86.6
total 58 428 13.5


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             #-I/home/phil/z/perl/cpan/DataEditXml/lib -I/home/phil/z/perl/cpan/DataTableText/lib
3             #-------------------------------------------------------------------------------
4             # Lint xml files in parallel using xmllint and report the failure rate
5             # Philip R Brenan at gmail dot com, Appa Apps Ltd, 2016
6             #-------------------------------------------------------------------------------
7             # podDocumentation
8              
9             package Data::Edit::Xml::Lint;
10             require v5.16.0;
11 1     1   825 use warnings FATAL => qw(all);
  1         3  
  1         43  
12 1     1   7 use strict;
  1         3  
  1         24  
13 1     1   6 use Carp;
  1         3  
  1         94  
14 1     1   736 use Data::Table::Text qw(:all);
  1         29182  
  1         440  
15 1     1   778 use Digest::SHA qw(sha256_hex);
  1         3652  
  1         95  
16 1     1   737 use Encode;
  1         11935  
  1         3639  
17             our $VERSION = 20170726;
18              
19             #1 Constructor # Construct a new linter
20              
21             sub new # Create a new xml linter - call this method statically as in L
22              
23 0     0 1 0 {bless {} # Create xml linter
24             }
25              
26             #2 Attributes # Attributes describing a lint
27              
28             genLValueScalarMethods(qw(file)); # File that the xml will be written to and read from by L or L
29             genLValueScalarMethods(qw(catalog)); # Optional catalog file containing the locations of the DTDs used to validate the xml
30             genLValueScalarMethods(qw(docType)); # The second line: the document type extracted from the L
31             genLValueScalarMethods(qw(dtds)); # Optional directory containing the DTDs used to validate the xml
32             genLValueScalarMethods(qw(errors)); # Number of lint errors detected by xmllint
33             genLValueScalarMethods(qw(header)); # The first line: the xml header extracted from L
34             genLValueScalarMethods(qw(labels)); # Optional parse tree to supply L for the current L as the labels are present in the parse tree not in the string representing the parse tree
35             genLValueScalarMethods(qw(linted)); # Date the lint was performed by L
36             genLValueScalarMethods(qw(idDefs)); # {id} = count - the number of times this id is defined in the xml contained in this L
37             genLValueScalarMethods(qw(labelDefs)); # {label or id} = id - the id of the node containing a L defined on the xml
38             genLValueScalarMethods(qw(project)); # Optional L name to allow error counts to be aggregated by L and to allow L to be scoped to the L contained in each L
39             genLValueScalarMethods(qw(processes)); # Maximum number of xmllint processes to run in parallel - 8 by default
40             genLValueScalarMethods(qw(sha256)); # Sha256 hash of the string containing the xml processed by L or L
41             genLValueScalarMethods(qw(source)); # The source Xml to be linted
42              
43             #1 Lint # Lint xml L in parallel
44              
45             my @pids;
46              
47             sub lint($@) # Store some xml in a L and apply xmllint in parallel
48 0     0 1 0 {my ($lint, %attributes) = @_; # Linter, attributes to be recorded as xml comments
49              
50 0 0       0 $lint->source or confess "Use the source() method to provide the source xml"; # Check that we have some source
51              
52 0         0 if (1) # Maximum amount of parallelism
53 0   0     0 {my $processes = $lint->processes // 8; # Maximum number of processes
54 0         0 waitpid(pop @pids, 0) while @pids > $processes; # Wait until enough sub processes have completed
55             }
56              
57 0 0       0 if (my $pid = fork()) # Perform lints in parallel
58 0         0 {push @pids, $pid;
59 0         0 return;
60             }
61              
62 0         0 $lint->source = $lint->source =~ s/\s+\Z//gsr; # Xml text to be written minus trailing blanks
63 0         0 my $s = $lint->source; # Split source into lines
64 0         0 my @a = split /\n/, $lint->source; # Split source into lines
65 0         0 my $f = $lint->file; # File to be written to
66 0 0       0 $f or confess "Use the ->file method to provide the target file"; # Check that we have an output file
67              
68 0         0 my $C = $lint->catalog; # Catalog to be used to validate xml
69 0         0 my $d = $lint->dtds; # Folder containing dtds used to validate xml
70 0   0     0 my $P = $lint->project // 'unknown'; # L name
71              
72 0         0 $attributes{file} = $f; # Record attributes
73 0 0       0 $attributes{catalog} = $C if $C;
74 0 0       0 $attributes{dtds} = $d if $d;
75 0         0 $attributes{header} = $a[0];
76 0         0 $attributes{docType} = $a[1];
77 0 0       0 $attributes{project} = $P if $P;
78 0         0 $attributes{sha256} = sha256_hex(encode("ascii", $lint->source)); # Digest of source string
79              
80             my $a = sub # Attributes to be recorded with the xml
81 0     0   0 {my @s;
82 0         0 for(sort keys %attributes)
83 0         0 {my $v = $attributes{$_}; # Attribute value
84 0 0       0 defined($v) or confess "Attribute $_ has no value";
85 0         0 push @s, ""; # Place attribute inside a comment
86             }
87 0         0 join "\n", @s
88 0         0 }->();
89              
90 0         0 my $T = "\n"; # Time stamp marks the start of the added comments
91              
92             my $L = sub # Process any labels in the parse tree
93 0 0   0   0 {return '' unless $lint->labels;
94 0         0 my $s = '';
95             $lint->labels->by(sub # Search the supplied parse tree for any id or label definitions
96 0         0 {my ($o) = @_;
97              
98 0 0       0 if (my $i = $o->id) # Id for this node but no labels
99 0         0 {$s .= "\n"; # Id definition
100 0   0     0 my $d = $lint->idDefs //= {}; # Id definitions for this file
101 0         0 $d->{$i} = $i; # Record id definition
102             }
103              
104 0 0       0 if (my @labels = $o->getLabels) # Labels for this node
105 0         0 {my $i = $o->id; # Id for this node
106 0 0       0 $i or confess "No id for node with labels ".$o->prettyString;
107 0         0 $s .= "\n";
108 0   0     0 my $l = $lint->labelDefs //= {}; # Labels for this file
109 0         0 $l->{$_} = $i for @labels; # Link each label to its primary id
110             }
111 0         0 });
112 0         0 $s
113 0         0 }->();
114              
115 0         0 writeFile($f, my $source = $lint->source."\n$T\n$a\n$L"); # Write xml to file
116              
117 0 0       0 if (my $v = qx(xmllint --version 2>&1)) # Check xmllint is present
118 0 0       0 {unless ($v =~ m(\Axmllint)is)
119 0         0 {confess "xmllint missing, install with:\nsudo apt-get xmllint";
120             }
121             }
122              
123             my $c = sub # Lint command
124 0 0   0   0 {return "xmllint --path \"$d\" --noout --valid \"$f\" 2>&1" if $d; # Lint against DTDs
125 0 0       0 return qq(xmllint --noout - < '$f' 2>&1) unless $C; # Normal lint
126 0         0 qq(export XML_CATALOG_FILES='$C' && xmllint --noout --valid - < '$f' 2>&1) # Catalog lint
127 0         0 }->();
128              
129 0 0       0 if (my @errors = qx($c)) # Perform lint and add errors as comments
130 0         0 {my $s = readFile($f);
131 0         0 my $e = join '', map {chomp; "\n"} @errors;
  0         0  
  0         0  
132 0         0 my $n = $lint->errors = int @errors / 3; # Three lines per error message
133              
134 0         0 my $t = "";
135              
136 0         0 writeFile($f, "$source\n$T$e\n$t"); # Update xml file with errors
137             }
138             else # No errors detected
139 0         0 {$lint->errors = 0;
140             }
141 0         0 exit;
142             } # lint
143              
144             sub read($) # Reread a linted xml L and extract the L associated with the L
145 0     0 1 0 {my ($file) = @_; # File containing xml
146 0         0 my $s = readFile($file); # Read xml from file
147 0         0 my %a = $s =~ m//igs; # Get attributes
148 0         0 my @a = split m/\n/, $s; # Split into lines
149              
150 0         0 my $l = {}; # Reconstructed labels
151 0         0 for(@a) # Each source line
152 0 0       0 {if (//gs) # Labels line
153 0         0 {my @w = split /\s+/, $1; # id labels
154 0         0 $l->{$_} = $w[0] for @w; # Associate each id and label with the id
155             }
156             }
157              
158 0         0 my $d = {}; # Id definitions
159 0         0 for(@a) # Each source line
160 0 0       0 {if (//gs) # Definition
161 0         0 {$d->{$1}++; # Record definition
162 0         0 $l->{$1} = $1; # An id also defines a label
163             }
164             }
165              
166 0         0 my $S = $s =~ s/\s+