line
stmt
bran
cond
sub
pod
time
code
1
#!/usr/bin/perl
2
#-I/home/phil/z/perl/cpan/DataEditXml/lib -I/home/phil/z/perl/cpan/DataTableText/lib
3
#-------------------------------------------------------------------------------
4
# Lint xml files in parallel using xmllint and report the failure rate
5
# Philip R Brenan at gmail dot com, Appa Apps Ltd, 2016
6
#-------------------------------------------------------------------------------
7
# podDocumentation
8
9
package Data::Edit::Xml::Lint;
10
require v5.16.0;
11
1
1
825
use warnings FATAL => qw(all);
1
3
1
43
12
1
1
7
use strict;
1
3
1
24
13
1
1
6
use Carp;
1
3
1
94
14
1
1
736
use Data::Table::Text qw(:all);
1
29182
1
440
15
1
1
778
use Digest::SHA qw(sha256_hex);
1
3652
1
95
16
1
1
737
use Encode;
1
11935
1
3639
17
our $VERSION = 20170726;
18
19
#1 Constructor # Construct a new linter
20
21
sub new # Create a new xml linter - call this method statically as in L
22
23
0
0
1
0
{bless {} # Create xml linter
24
}
25
26
#2 Attributes # Attributes describing a lint
27
28
genLValueScalarMethods(qw(file)); # File that the xml will be written to and read from by L or L
29
genLValueScalarMethods(qw(catalog)); # Optional catalog file containing the locations of the DTDs used to validate the xml
30
genLValueScalarMethods(qw(docType)); # The second line: the document type extracted from the L
31
genLValueScalarMethods(qw(dtds)); # Optional directory containing the DTDs used to validate the xml
32
genLValueScalarMethods(qw(errors)); # Number of lint errors detected by xmllint
33
genLValueScalarMethods(qw(header)); # The first line: the xml header extracted from L
34
genLValueScalarMethods(qw(labels)); # Optional parse tree to supply L for the current L as the labels are present in the parse tree not in the string representing the parse tree
35
genLValueScalarMethods(qw(linted)); # Date the lint was performed by L
36
genLValueScalarMethods(qw(idDefs)); # {id} = count - the number of times this id is defined in the xml contained in this L
37
genLValueScalarMethods(qw(labelDefs)); # {label or id} = id - the id of the node containing a L defined on the xml
38
genLValueScalarMethods(qw(project)); # Optional L name to allow error counts to be aggregated by L and to allow L to be scoped to the L contained in each L
39
genLValueScalarMethods(qw(processes)); # Maximum number of xmllint processes to run in parallel - 8 by default
40
genLValueScalarMethods(qw(sha256)); # Sha256 hash of the string containing the xml processed by L or L
41
genLValueScalarMethods(qw(source)); # The source Xml to be linted
42
43
#1 Lint # Lint xml L in parallel
44
45
my @pids;
46
47
sub lint($@) # Store some xml in a L and apply xmllint in parallel
48
0
0
1
0
{my ($lint, %attributes) = @_; # Linter, attributes to be recorded as xml comments
49
50
0
0
0
$lint->source or confess "Use the source() method to provide the source xml"; # Check that we have some source
51
52
0
0
if (1) # Maximum amount of parallelism
53
0
0
0
{my $processes = $lint->processes // 8; # Maximum number of processes
54
0
0
waitpid(pop @pids, 0) while @pids > $processes; # Wait until enough sub processes have completed
55
}
56
57
0
0
0
if (my $pid = fork()) # Perform lints in parallel
58
0
0
{push @pids, $pid;
59
0
0
return;
60
}
61
62
0
0
$lint->source = $lint->source =~ s/\s+\Z//gsr; # Xml text to be written minus trailing blanks
63
0
0
my $s = $lint->source; # Split source into lines
64
0
0
my @a = split /\n/, $lint->source; # Split source into lines
65
0
0
my $f = $lint->file; # File to be written to
66
0
0
0
$f or confess "Use the ->file method to provide the target file"; # Check that we have an output file
67
68
0
0
my $C = $lint->catalog; # Catalog to be used to validate xml
69
0
0
my $d = $lint->dtds; # Folder containing dtds used to validate xml
70
0
0
0
my $P = $lint->project // 'unknown'; # L name
71
72
0
0
$attributes{file} = $f; # Record attributes
73
0
0
0
$attributes{catalog} = $C if $C;
74
0
0
0
$attributes{dtds} = $d if $d;
75
0
0
$attributes{header} = $a[0];
76
0
0
$attributes{docType} = $a[1];
77
0
0
0
$attributes{project} = $P if $P;
78
0
0
$attributes{sha256} = sha256_hex(encode("ascii", $lint->source)); # Digest of source string
79
80
my $a = sub # Attributes to be recorded with the xml
81
0
0
0
{my @s;
82
0
0
for(sort keys %attributes)
83
0
0
{my $v = $attributes{$_}; # Attribute value
84
0
0
0
defined($v) or confess "Attribute $_ has no value";
85
0
0
push @s, ""; # Place attribute inside a comment
86
}
87
0
0
join "\n", @s
88
0
0
}->();
89
90
0
0
my $T = "\n"; # Time stamp marks the start of the added comments
91
92
my $L = sub # Process any labels in the parse tree
93
0
0
0
0
{return '' unless $lint->labels;
94
0
0
my $s = '';
95
$lint->labels->by(sub # Search the supplied parse tree for any id or label definitions
96
0
0
{my ($o) = @_;
97
98
0
0
0
if (my $i = $o->id) # Id for this node but no labels
99
0
0
{$s .= "\n"; # Id definition
100
0
0
0
my $d = $lint->idDefs //= {}; # Id definitions for this file
101
0
0
$d->{$i} = $i; # Record id definition
102
}
103
104
0
0
0
if (my @labels = $o->getLabels) # Labels for this node
105
0
0
{my $i = $o->id; # Id for this node
106
0
0
0
$i or confess "No id for node with labels ".$o->prettyString;
107
0
0
$s .= "\n";
108
0
0
0
my $l = $lint->labelDefs //= {}; # Labels for this file
109
0
0
$l->{$_} = $i for @labels; # Link each label to its primary id
110
}
111
0
0
});
112
0
0
$s
113
0
0
}->();
114
115
0
0
writeFile($f, my $source = $lint->source."\n$T\n$a\n$L"); # Write xml to file
116
117
0
0
0
if (my $v = qx(xmllint --version 2>&1)) # Check xmllint is present
118
0
0
0
{unless ($v =~ m(\Axmllint)is)
119
0
0
{confess "xmllint missing, install with:\nsudo apt-get xmllint";
120
}
121
}
122
123
my $c = sub # Lint command
124
0
0
0
0
{return "xmllint --path \"$d\" --noout --valid \"$f\" 2>&1" if $d; # Lint against DTDs
125
0
0
0
return qq(xmllint --noout - < '$f' 2>&1) unless $C; # Normal lint
126
0
0
qq(export XML_CATALOG_FILES='$C' && xmllint --noout --valid - < '$f' 2>&1) # Catalog lint
127
0
0
}->();
128
129
0
0
0
if (my @errors = qx($c)) # Perform lint and add errors as comments
130
0
0
{my $s = readFile($f);
131
0
0
my $e = join '', map {chomp; "\n"} @errors;
0
0
0
0
132
0
0
my $n = $lint->errors = int @errors / 3; # Three lines per error message
133
134
0
0
my $t = "";
135
136
0
0
writeFile($f, "$source\n$T$e\n$t"); # Update xml file with errors
137
}
138
else # No errors detected
139
0
0
{$lint->errors = 0;
140
}
141
0
0
exit;
142
} # lint
143
144
sub read($) # Reread a linted xml L and extract the L associated with the L
145
0
0
1
0
{my ($file) = @_; # File containing xml
146
0
0
my $s = readFile($file); # Read xml from file
147
0
0
my %a = $s =~ m//igs; # Get attributes
148
0
0
my @a = split m/\n/, $s; # Split into lines
149
150
0
0
my $l = {}; # Reconstructed labels
151
0
0
for(@a) # Each source line
152
0
0
0
{if (//gs) # Labels line
153
0
0
{my @w = split /\s+/, $1; # id labels
154
0
0
$l->{$_} = $w[0] for @w; # Associate each id and label with the id
155
}
156
}
157
158
0
0
my $d = {}; # Id definitions
159
0
0
for(@a) # Each source line
160
0
0
0
{if (//gs) # Definition
161
0
0
{$d->{$1}++; # Record definition
162
0
0
$l->{$1} = $1; # An id also defines a label
163
}
164
}
165
166
0
0
my $S = $s =~ s/\s+