line
stmt
bran
cond
sub
pod
time
code
1
#!/usr/bin/perl
2
#-I/home/phil/z/perl/cpan/DataEditXml/lib -I/home/phil/z/perl/cpan/DataTableText/lib
3
#-------------------------------------------------------------------------------
4
# Lint xml files in parallel using xmllint and report the failure rate
5
# Philip R Brenan at gmail dot com, Appa Apps Ltd, 2016
6
#-------------------------------------------------------------------------------
7
# Report should allow the integration of other statistics into its summery besides the one it produces itself
8
# podDocumentation
9
10
package Data::Edit::Xml::Lint;
11
require v5.16.0;
12
1
1
694
use warnings FATAL => qw(all);
1
3
1
41
13
1
1
6
use strict;
1
3
1
22
14
1
1
6
use Carp;
1
2
1
90
15
1
1
497
use Data::Table::Text qw(:all);
1
25505
1
452
16
1
1
533
use Digest::SHA qw(sha256_hex);
1
3022
1
100
17
1
1
468
use Encode;
1
10488
1
3412
18
our $VERSION = 20170801;
19
20
#1 Constructor # Construct a new linter
21
22
sub new # Create a new xml linter - call this method statically as in L
23
0
0
1
0
{bless {} # Create xml linter
24
}
25
26
#2 Attributes # Attributes describing a lint
27
28
genLValueScalarMethods(qw(author)); # Optional author of the xml - only needed if you want to generate an SDL file map
29
genLValueScalarMethods(qw(catalog)); # Optional catalog file containing the locations of the DTDs used to validate the xml
30
genLValueScalarMethods(qw(ditaType)); # Optional Dita topic type(concept|task|troubleshooting|reference) of the xml - only needed if you want to generate an SDL file map
31
genLValueScalarMethods(qw(docType)); # The second line: the document type extracted from the L
32
genLValueScalarMethods(qw(dtds)); # Optional directory containing the DTDs used to validate the xml
33
genLValueScalarMethods(qw(errors)); # Number of lint errors detected by xmllint
34
genLValueScalarMethods(qw(file)); # File that the xml will be written to and read from by L or L
35
genLValueScalarMethods(qw(guid)); # Guid for outermost tag - only required if you want to generate an SD file map
36
genLValueScalarMethods(qw(header)); # The first line: the xml header extracted from L
37
genLValueScalarMethods(qw(idDefs)); # {id} = count - the number of times this id is defined in the xml contained in this L
38
genLValueScalarMethods(qw(labelDefs)); # {label or id} = id - the id of the node containing a L defined on the xml
39
genLValueScalarMethods(qw(labels)); # Optional parse tree to supply L for the current L as the labels are present in the parse tree not in the string representing the parse tree
40
genLValueScalarMethods(qw(linted)); # Date the lint was performed by L
41
genLValueScalarMethods(qw(processes)); # Maximum number of xmllint processes to run in parallel - 8 by default
42
genLValueScalarMethods(qw(project)); # Optional L name to allow error counts to be aggregated by L and to allow L to be scoped to the L contained in each L
43
genLValueScalarMethods(qw(sha256)); # Sha256 hash of the string containing the xml processed by L or L
44
genLValueScalarMethods(qw(source)); # The source Xml to be linted
45
genLValueScalarMethods(qw(title)); # Optional title of the xml - only needed if you want to generate an SDL file map
46
47
#1 Lint # Lint xml L in parallel
48
49
my @pids; # Lint pids
50
51
sub lint($@) # Store some xml in a L, apply xmllint in parallel and update the source file with the results
52
0
0
1
0
{my ($lint, %attributes) = @_; # Linter, attributes to be recorded as xml comments
53
0
0
&lintOP(1, @_);
54
}
55
56
sub lintNOP($@) # Store some xml in a L, apply xmllint in single and update the source file with the results
57
0
0
1
0
{my ($lint, %attributes) = @_; # Linter, attributes to be recorded as xml comments
58
0
0
&lintOP(0, @_);
59
}
60
61
sub lintOP($$@) ## Store some xml in a L, apply xmllint in parallel or single and update the source file with the results
62
0
0
0
0
{my ($inParallel, $lint, %attributes) = @_; # In parallel or not, Linter, attributes to be recorded as xml comments
63
64
0
0
0
$lint->source or confess "Use the source() method to provide the source xml"; # Check that we have some source
65
0
0
0
$lint->file or confess "Use the ->file method to provide the target file"; # Check that we have an output file
66
67
0
0
0
if ($inParallel) # Process in parallel if possible
68
0
0
0
{my $processes = $lint->processes // 8; # Maximum number of processes
69
0
0
&waitProcessing; # Wait until enough sub processes have completed
70
0
0
0
if (my $pid = fork()) # Perform lints in parallel
71
0
0
{push @pids, $pid;
72
0
0
return;
73
}
74
}
75
76
0
0
$lint->source = $lint->source =~ s/\s+\Z//gsr; # Xml text to be written minus trailing blanks
77
0
0
my @lines = split /\n/, $lint->source; # Split source into lines
78
79
0
0
my $file = $lint->file; # File to be written to
80
0
0
0
confess "File name contains a new line:\n$file\n" if $file =~ m/\n/s; # Complain if the source file contains a new line
81
82
0
0
for(qw(author catalog ditaType dtds file guid project title)) # Map parameters to attributes
83
0
0
{my $a = $lint->$_;
84
0
0
0
next unless $a;
85
0
0
0
confess "Attribute $_=>'$a' contains --" if $a =~ m/--/s; # Complain if the attribute value contains -- as it will cause problems when the attribute is stored as a label definition in an xml comment
86
0
0
$attributes{$_} = $a
87
}
88
89
0
0
$attributes{docType} = $lines[1]; # Source details
90
0
0
$attributes{header} = $lines[0];
91
0
0
$attributes{sha256} = sha256_hex(encode("ascii", $lint->source)); # Digest of source string
92
93
0
0
my $time = "\n"; # Time stamp marks the start of the added comments
94
0
0
my $attr = &formatAttributes({%attributes}); # Attributes to be recorded with the xml
95
my $labels = sub # Process any labels in the parse tree
96
0
0
0
0
{return '' unless $lint->labels;
97
0
0
my $s = '';
98
$lint->labels->by(sub # Search the supplied parse tree for any id or label definitions
99
0
0
{my ($o) = @_;
100
101
0
0
0
if (my $i = $o->id) # Id for this node but no labels
102
0
0
{$s .= "\n"; # Id definition
103
0
0
0
my $d = $lint->idDefs //= {}; # Id definitions for this file
104
0
0
$d->{$i} = $i; # Record id definition
105
}
106
107
0
0
0
if (my @labels = $o->getLabels) # Labels for this node
108
0
0
{my $i = $o->id; # Id for this node
109
0
0
0
$i or confess "No id for node with labels:\n".$o->prettyString;
110
0
0
$s .= "\n";
111
0
0
0
my $l = $lint->labelDefs //= {}; # Labels for this file
112
0
0
$l->{$_} = $i for @labels; # Link each label to its primary id
113
}
114
0
0
});
115
0
0
$s
116
0
0
}->();
117
118
0
0
writeFile($file, my $source = $lint->source."\n$time\n$attr\n$labels"); # Write xml to file
119
120
0
0
0
if (my $v = qx(xmllint --version 2>&1)) # Check xmllint is present
121
0
0
0
{unless ($v =~ m(\Axmllint)is)
122
0
0
{confess "xmllint missing, install with:\nsudo apt-get xmllint";
123
}
124
}
125
126
my $c = sub # Lint command
127
0
0
0
{my $d = $lint->dtds; # Optional dtd to use
128
0
0
my $f = $file; # File name
129
0
0
0
return "xmllint --path \"$d\" --noout --valid \"$f\" 2>&1" if $d; # Lint against DTDs
130
0
0
my $c = $lint->catalog; # Optional dtd catalog to use
131
0
0
0
return qq(xmllint --noout - < '$f' 2>&1) unless $c; # Normal lint
132
0
0
qq(export XML_CATALOG_FILES='$c' && xmllint --noout --valid - < '$f' 2>&1) # Catalog lint
133
0
0
}->();
134
135
0
0
0
if (my @errors = qx($c)) # Perform lint and add errors as comments
136
0
0
{my $s = readFile($file);
137
0
0
my $e = join '', map {chomp; "\n"} @errors;
0
0
0
0
138
0
0
my $n = $lint->errors = int @errors / 3; # Three lines per error message
139
140
0
0
my $t = "";
141
142
0
0
writeFile($file, "$source\n$time$e\n$t"); # Update xml file with errors
143
}
144
else # No errors detected
145
0
0
{$lint->errors = 0;
146
}
147
0
0
0
exit if $inParallel;
148
} # lint
149
150
sub nolint($@) # Store just the attributes in a file so that they can be retrieved later to process non xml objects referenced in the xml - like images
151
0
0
1
0
{my ($lint, %attributes) = @_; # Linter, attributes to be recorded as xml comments
152
0
0
0
!$lint->source or confess "Source specified for nolint(), use lint()"; # Source not permitted for nolint()
153
0
0
my $file = $lint->file; # File to be written to
154
0
0
0
$file or confess "Use the ->file method to provide the target file"; # Check that we have an output file
155
156
0
0
for(qw(author ditaType file guid project)) # Map parameters to attributes
157
0
0
{my $a = $lint->$_;
158
0
0
0
$attributes{$_} = $a if $a;
159
}
160
161
0
0
my $time = "\n"; # Time stamp marks the start of the added comments
162
0
0
my $attr = &formatAttributes({%attributes}); # Attributes to be recorded with the xml
163
164
0
0
writeFile($file, "\n$time\n$attr"); # Write attributes to file
165
} # nolint
166
167
sub formatAttributes(%) ## Format the attributes section of the output file
168
0
0
0
0
{my ($attributes) = @_; # Hash of attributes
169
0
0
my @s;
170
0
0
for(sort keys %$attributes)
171
0
0
{my $v = $attributes->{$_}; # Attribute value
172
0
0
0
defined($v) or confess "Attribute $_ has no value";
173
0
0
0
$v =~ s/--/__/gs if /title/; # Replace -- with __ as -- will upset the use of xml comments to hold the data in a greppable form - but only for title - for files we need to see an error message
174
0
0
0
$v =~ m/--/s and confess "-- in value of $_=>$v"; # Confess if -- present in attribute value as this will mess up the xml comments
175
0
0
push @s, ""; # Place attribute inside a comment
176
}
177
0
0
join "\n", @s
178
}
179
180
sub read($) # Reread a linted xml L and extract the L associated with the L
181
0
0
1
0
{my ($file) = @_; # File containing xml
182
0
0
my $s = readFile($file); # Read xml from file
183
0
0
my %a = $s =~ m//igs; # Get attributes
184
0
0
my @a = split m/\n/, $s; # Split into lines
185
186
0
0
my $l = {}; # Reconstructed labels
187
0
0
for(@a) # Each source line
188
0
0
0
{if (//gs) # Labels line
189
0
0
{my @w = split /\s+/, $1; # id labels
190
0
0
$l->{$_} = $w[0] for @w; # Associate each id and label with the id
191
}
192
}
193
194
0
0
my $d = {}; # Id definitions
195
0
0
for(@a) # Each source line
196
0
0
0
{if (//gs) # Definition
197
0
0
{$d->{$1}++; # Record definition
198
0
0
$l->{$1} = $1; # An id also defines a label
199
}
200
}
201
202
0
0
my $S = $s =~ s/\s+