File Coverage

lib/ThreatDetector/Classifier.pm

Criterion	Covered	Total	%
statement	27	27	100.0
branch	14	22	63.6
condition	3	6	50.0
subroutine	5	5	100.0
pod	2	2	100.0
total	51	62	82.2

line	stmt	bran	cond	sub	pod	time	code
1							package ThreatDetector::Classifier;
2
3	2			2		133806	use strict;
	2					3
	2					68
4	2			2		11	use warnings;
	2					10
	2					101
5	2			2		520	use URI::Escape;
	2					1796
	2					2420
6
7							our $VERBOSE = 0;
8							our $VERSION = '0.04';
9
10							# SQL Injection patterns
11							my @sqli_patterns = (
12							qr/\bUNION\s+ALL\s+SELECT\b/i,
13							qr/\bUNION\s+SELECT\b/i,
14							qr/\bSELECT\s+\*\s+FROM\b/i,
15							qr/(?:'\|")\sor\s+\d+\s=\s*\d+/i,
16							qr/(['"]).?\1\s--/,
17							qr/or\s+1\s=\s1/i,
18							qr/\bsleep\s*\(/i,
19							qr/\bconcat\b/i,
20							qr/benchmark\s*\(/i,
21							);
22
23							# Command Injection
24							my @cmd_patterns = (
25							qr/;.*\b(ls\|whoami\|cat\|curl\|wget)\b/i,
26							qr/\\|\s*(ls\|cat\|uname)/i,
27							qr/(?:[;&\|]\s*)(nc\|bash\|sh\|powershell)\b/i,
28							qr/(?:http\|ftp):\/\/[^ ]+/i,
29							);
30
31							# Directory traversal
32							my @traversal_patterns = (
33							qr/\.\.\/+/,
34							qr/\%2e\%2e\//i,
35							qr/\/etc\/passwd/,
36							);
37
38							# XSS patterns
39							my @xss_patterns = (
40							qr//i,
41							qr/(?:\?\|&)on(?:click\|error\|load\|mouseover\|focus\|submit\|keydown\|keyup\|blur\|change)\s*=/i,
42							qr/javascript:/i,
43							qr/%3Cscript/i,
44							);
45
46							# Encoded Payloads
47							my @encoded_patterns = (
48							qr/%[0-9a-fA-F]{2}/, # general URL encoding
49							qr/%2e/i, # encoded .
50							qr/%3c/i, # encoded <
51							);
52
53							# Bad User-Agents / Scanner Signatures
54							my @bad_agents = (
55							qr/sqlmap/i,
56							qr/nikto/i,
57							qr/nmap/i,
58							qr/dirbuster/i,
59							qr/python-requests/i,
60							qr/libwww/i,
61							);
62
63							sub classify {
64	3			3	1	148349	my ($entry) = @_;
65	3	50	33			15	return () unless $entry && ref $entry eq 'HASH';
66
67	3					4	my @threats;
68
69	3	100				7	push @threats, 'sql_injection' if any_match($entry->{uri}, @sqli_patterns);
70	3	50				9	push @threats, 'client_error' if $entry->{status} =~ /^4\d\d$/;
71	3	50				6	push @threats, 'command_injection' if any_match($entry->{uri}, @cmd_patterns);
72	3	50				6	push @threats, 'directory_traversal' if any_match($entry->{uri}, @traversal_patterns);
73	3	100				5	push @threats, 'xss_attempt' if any_match($entry->{uri}, @xss_patterns);
74	3	50				4	push @threats, 'encoded_payload' if any_match($entry->{uri}, @encoded_patterns);
75	3	50				6	push @threats, 'scanner_fingerprint' if any_match($entry->{user_agent}, @bad_agents);
76	3	50				8	push @threats, 'http_method_abuse' if $entry->{method} =~ /^(PUT\|DELETE\|TRACE\|CONNECT)$/;
77
78							# TODO:
79							# 9. Rate limiting / burst detection (requires time tracking outside of this module)
80							# 10. Login brute-force (likely needs context or endpoint + rate info)
81							# 11. Header abuse (referer, user-agent anomalies — could go here)
82
83	3					10	return @threats;
84							}
85
86							sub any_match {
87	18			18	1	33	my ($text, @patterns) = @_;
88	18					15	for my $re (@patterns) {
89	77	100	66			292	if ($text && $text =~ $re) {
90	2	50				6	warn "[DEBUG] Matched pattern $re on: $text\n" if $VERBOSE;
91	2					9	return 1;
92							}
93							}
94	16					28	return 0;
95							}
96
97							1;
98
99							=head1 NAME
100
101							ThreatDetector::Classifier - Threat classification engine for parsed Apache log entries
102
103							=head1 SYNOPSIS
104
105							use ThreatDetector::Classifier;
106
107							my @threats = ThreatDetector::Classifier::classify($entry);
108
109							=head1 DESCRIPTION
110
111							This module analyzes structured Apache log entries (as hashrefs) and classifies them into one or more known web threat categories. The output is a list of threat types for further processing by the dispatcher.
112
113							=head1 FUNCTIONS
114
115							=head2 classify($entry)
116
117							Takes a hashref representing a parsed log entry (from Parser.pm) and returns a list of matching threat types. Returns an empty list if no known threats are found.
118
119							=head2 any_match($text, @patterns)
120
121							Internal utility function. Returns true if any regex in @patterns matches $text.
122
123							=head1 THREAT TYPES RETURNED
124
125							=over 4
126
127							=item * sql_injection
128
129							=item * client_error
130
131							=item * command_injection
132
133							=item * directory_traversal
134
135							=item * xss_attempt
136
137							=item * encoded_payload
138
139							=item * scanner_fingerprint
140
141							=item * http_method_abuse
142
143							=back
144
145							Future versions may include:
146
147							=over 4
148
149							=item * rate_burst
150
151							=item * login_bruteforce
152
153							=item * header_abuse
154
155							=back
156
157							=head1 AUTHOR
158
159							Jason Hall
160
161							=cut