File Coverage

blib/lib/Code/Quality.pm

Criterion	Covered	Total	%
statement	67	93	72.0
branch	15	34	44.1
condition	1	9	11.1
subroutine	20	21	95.2
pod	3	6	50.0
total	106	163	65.0

line	stmt	bran	cond	sub	pod	time	code
1							package Code::Quality;
2
3	2			2		70856	use 5.020000;
	2					17
4	2			2		11	use strict;
	2					3
	2					50
5	2			2		11	use warnings;
	2					20
	2					65
6	2			2		1193	use utf8;
	2					28
	2					11
7	2			2		93	use re '/s';
	2					5
	2					192
8	2			2		869	use parent qw/Exporter/;
	2					664
	2					10
9
10							=encoding utf-8
11
12							=head1 NAME
13
14							Code::Quality - use static analysis to compute a "code quality" metric for a program
15
16							=head1 SYNOPSIS
17
18							use v5.20;
19							use Code::Quality;
20							# code to test (required)
21							my $code = ...;
22							# reference code to compare against (optional)
23							my $reference = ...;
24
25							my $warnings =
26							analyse_code
27							code => $code,
28							reference => $reference,
29							language => 'C';
30							if (defined $warnings) {
31							my $stars = star_rating_of_warnings $warnings;
32							say "Program is rated $stars stars"; # 3 is best, 1 is worst
33							my @errors = grep { $_->[0] eq 'error' } @$warnings;
34							if (@errors > 0) {
35							say 'Found ', scalar @errors, ' errors';
36							say "First error: $errors[0][1]";
37							}
38							} else {
39							say 'Failed to analyse code';
40							}
41
42							=head1 DESCRIPTION
43
44							Code::Quality runs a series of tests on a piece of source code to
45							compute a code quality metric. Each test returns a possibly empty list
46							of warnings, that is potential issues present in the source code. This
47							list of warnings can then be turned into a star rating: 3 stars for
48							good code, 2 stars for acceptable code, and 1 stars for dubious code.
49
50							=head2 Warnings
51
52							A warning is an arrayref C<< [type, message, row, column] >>, where
53							the first two entries are mandatory and the last two can be either
54							both present or both absent.
55							The type is one of C<< qw/error warning info/ >>.
56
57							Four-element warnings correspond to ACE code editor annotations.
58							Two-element warnings apply to the entire document, not a specific
59							place in the code.
60
61							=head2 Tests
62
63							A test is a function that takes key-value arguments:
64
65							B(code => I<$code>, language => I<$language>, [reference => I<$reference>, formatted_code => I<$formatted>])
66
67							Here I<$code> is the code to be tested, I<$language> is the
68							programming language, I<$reference> is an optional reference source
69							code to compare I<$code> against, and I<$formatted_code> is the
70							optional result of running I<$code> through a source code formatter.
71
72							Each test returns undef if the test failed (for example, if the test
73							cannot be applied to this programming language), and an arrayref of
74							warnings otherwise.
75
76							Most tests have several configurable parameters, which come from
77							global variables. The documentation of each test mentions the global
78							variables that affect its operations. C can be used to run a
79							test with special configuration once, without affecting other code:
80
81							{
82							local $Code::Quality::bla_threshold = 5;
83							test_bla code => $code, language => 'C';
84							}
85
86							=cut
87
88							our $VERSION = '0.001003';
89							our @EXPORT = qw/analyse_code star_rating_of_warnings/;
90							our @EXPORT_OK = (@EXPORT, qw/test_lines test_clang_tidy/);
91							our %EXPORT_TAGS = (default => \@EXPORT, all => \@EXPORT_OK);
92
93							# set this to a "Test::More::diag"-like function to get debug output
94							our $DEBUG;
95
96	2			2		293	use Carp qw/carp croak/;
	2					4
	2					160
97	2			2		1893	use Cpanel::JSON::XS qw/encode_json/;
	2					11476
	2					115
98	2			2		1474	use File::Temp qw//;
	2					38919
	2					87
99	2			2		24	use List::Util qw/reduce any/;
	2					4
	2					2788
100
101	8	50		8	0	31	sub diag { $DEBUG->(@_) if defined $DEBUG }
102
103							sub remove_empty_lines {
104	16			16	0	28	my ($code) = @_;
105	16					250	$code =~ s/\n\s*/\n/g; # remove empty lines
106	16					57	$code =~ s/^\s*//g; # remove leading whitespace
107	16					33	return $code;
108							}
109
110							our $warn_code_is_long = [warning => 'A shorter solution is possible'];
111							our $warn_code_is_very_long = [error => 'A significantly shorter solution is possible'];
112
113							# a criterion is a pair [abs, rel]. a program matches a criterion if
114							# the absolute loc difference is at most abs AND the relative loc
115							# difference is at most rel. These criteria are used to categorise
116							# code as "short", "long", or "very long".
117
118							# code is considered short if one of these criteria match
119							our @short_code_criteria = (
120							[1e9, 0.3],
121							[20, 0.5],
122							[10, 1],
123							);
124
125							# code is considered long if one of these criteria match, and none of
126							# the above do
127							our @long_code_criteria = (
128							[1e9, 0.5],
129							[20, 1],
130							[10, 2],
131							);
132
133							# code is considered very long if none of the criteria above match
134
135							=head3 test_lines
136
137							This test counts non-empty lines in both the formatted code and the reference.
138							If no formatted code is available, the original code is used.
139							If the code is significantly longer than the reference, it returns a warning.
140							If the code is much longer, it returns an error.
141							Otherwise it returns an empty arrayref.
142
143							The thresholds for raising a warning/error are available in the source
144							code, see global variables C<@short_code_criteria> and
145							C<@long_code_criteria>.
146
147							This test fails if no reference is provided, but is language-agnostic
148
149							=cut
150
151							sub test_lines {
152	8			8	1	2649	my %args = @_;
153	8		33			39	my $user_solution = $args{formatted_code} // $args{code};
154	8					16	my $official_solution = $args{reference};
155	8	50				17	return unless defined $official_solution;
156
157	8					27	$user_solution = remove_empty_lines($user_solution . "\n");
158	8					22	$official_solution = remove_empty_lines($official_solution . "\n");
159
160							# Count number of lines of code from both solutions.
161	8					90	my $loc_user_solution = () = $user_solution =~ /\n/g;
162	8					56	my $loc_official_solution = () = $official_solution =~ /\n/g;
163	8	50				27	return if $loc_official_solution == 0;
164
165	8					12	my $loc_absolute_diff = $loc_user_solution - $loc_official_solution;
166	8					16	my $loc_relative_diff = $loc_absolute_diff / $loc_official_solution;
167	8					53	diag "abs diff: $loc_absolute_diff, rel diff: $loc_relative_diff";
168							my $predicate = sub {
169	29	100		29		122	$loc_absolute_diff <= $_->[0] && $loc_relative_diff <= $_->[1]
170	8					2588	};
171
172	8	100				30	return [] if any \&$predicate, @short_code_criteria;
173	4	100				11	return [$warn_code_is_long] if any \&$predicate, @long_code_criteria;
174	1					6	return [$warn_code_is_very_long]
175							}
176
177							=head3 test_clang_tidy
178
179							This test runs the
180							L static analyser
181							on the code and returns all warnings found.
182
183							The clang-tidy checks in use are determined by two global variables,
184							each of which is a list of globs such as C. The checks in
185							C<@clang_tidy_warnings> produce warnings, while the checks in
186							C<@clang_tidy_errors> produce errors. There is also a hash
187							C<%clang_tidy_check_options> which contains configuration for the
188							checks.
189
190							This test does not require a reference, but is limited to languages
191							that clang-tidy understands. This is controlled by the global variable
192							C<%extension_of_language>, which contains file extensions for the
193							supported languages.
194
195							=cut
196
197							our %extension_of_language = (
198							'C' => '.c',
199							'C++' => '.cpp',
200							);
201
202							our @clang_tidy_warnings =
203							qw/clang-analyzer-deadcode.DeadStores
204							clang-analyzer-unix.*
205							clang-analyzer-valist.*
206							misc-*
207							modernize-*
208							performance-*
209							readability-*
210							-readability-braces-around-statements/;
211
212							our @clang_tidy_errors =
213							qw/bugprone-*
214							clang-analyzer-core.*
215							clang-analyzer-cplusplus.*
216							clang-diagnostic-*/;
217
218							our %clang_tidy_check_options = (
219							'readability-implicit-bool-conversion.AllowIntegerConditions' => 1,
220							);
221
222							sub _clang_tidy_exists {
223							# does clang-tidy exist?
224							# run it with no arguments, see if exit code is 127
225	1			1		71476	system 'clang-tidy 2>/dev/null 1>/dev/null';
226	1					72	$? >> 8 != 127
227							}
228
229							sub test_clang_tidy {
230	3			3	1	11	my %args = @_;
231	3					9	my $extension = $extension_of_language{uc $args{language}};
232	3	50				13	return unless defined $extension;
233
234	0					0	my $fh = File::Temp->new(
235							TEMPLATE => 'code-qualityXXXXX',
236							TMPDIR => 1,
237							SUFFIX => $extension,
238							);
239	0	0				0	print $fh $args{code} or croak 'Failed to write code to temporary file';
240	0	0				0	close $fh or croak 'Failed to close temporary file';
241
242	0					0	my $checks = join ',', '-*', @clang_tidy_warnings, @clang_tidy_errors;
243	0					0	my $errors = join ',', '-*', @clang_tidy_errors;
244	0					0	my @check_options;
245	0					0	while (my ($key, $value) = each %clang_tidy_check_options) {
246	0					0	push @check_options, { key => $key, value => $value }
247							}
248	0					0	my $config = encode_json +{
249							CheckOptions => \@check_options,
250							Checks => $checks,
251							WarningsAsErrors => $errors,
252							};
253
254	0					0	my @output = qx,clang-tidy -config='$config' $fh 2>/dev/null,;
255	0					0	my $exit_code = $? >> 8; # this is usually the number of clang-tidy errors
256	0					0	my $signal = $? & 127;
257	0	0	0			0	if ($signal \|\| ($exit_code == 127 && !_clang_tidy_exists)) {
			0
258							# special case: exit code 127 could mean "127 errors found" or
259							# "clang-tidy not found"
260	0					0	carp "Failed to run clang-tidy, \$? is $?";
261							return
262	0					0	}
263
264	0					0	my @warnings;
265	0					0	for my $line (@output) {
266	0	0				0	my ($row, $col, $type, $msg) =
267							$line =~ /$fh:(\d+):(\d+): (\w+): (.*)$/
268							or next;
269	0					0	chomp $msg;
270	0					0	$msg =~ s/,-warnings-as-errors//;
271	0	0				0	$type = 'info' if $type eq 'note';
272	0					0	push @warnings, [$type, $msg, $row, $col]
273							}
274							\@warnings
275	0					0	}
276
277							=head3 analyse_code
278
279							B runs every test above on the code, producing a
280							combined list of warnings. It fails (returns undef) if all tests fail.
281							The tests run by B are those in the global variable
282							C<@all_tests>, which is a list of coderefs.
283
284							=cut
285
286							our @all_tests = (
287							\&test_lines,
288							\&test_clang_tidy,
289							);
290
291							sub analyse_code {
292							# arguments/return value are identical to those of individual tests
293	3			3	1	1280	my @test_args = @_;
294	3					9	my @test_results = map { $_->(@test_args) } @all_tests;
	6					17
295							reduce {
296							# $a accumulates warnings so far, $b are warnings from current test
297	0	0		0		0	return $b unless defined $a;
298	0	0				0	push @$a, @$b if defined $b;
299	0					0	$a
300	3					25	} @test_results;
301							}
302
303							=head2 Star rating
304
305							B(I<$warnings>) is a subroutine that takes
306							the output of a test and computes the star rating as an integer. The
307							rating is undef if the test failed, 1 if the test returned at least
308							one error, 2 if the test returned at least one warning but no errors,
309							and 3 otherwise. So a program gets 3 stars if it only raises
310							informational messages, or no messages at all.
311
312							=cut
313
314							sub star_rating_of_warnings {
315	8			8	0	27	my ($warnings) = @_;
316	8	50				17	return unless defined $warnings;
317	8	100		4		30	return 1 if any { $_->[0] eq 'error' } @$warnings;
	4					13
318	7	100		3		27	return 2 if any { $_->[0] eq 'warning' } @$warnings;
	3					12
319	4					41	return 3;
320							}
321
322							1;
323							__END__