| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Apache::Logmonster; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 3 |  |  | 3 |  | 43572 | use strict; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 127 |  | 
| 4 | 3 |  |  | 3 |  | 16 | use warnings; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 175 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | our $VERSION = '5.36'; | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 3 |  |  | 3 |  | 15 | use Carp; | 
|  | 3 |  |  |  |  | 20 |  | 
|  | 3 |  |  |  |  | 296 |  | 
| 9 | 3 |  |  | 3 |  | 5413 | use Compress::Zlib; | 
|  | 3 |  |  |  |  | 269795 |  | 
|  | 3 |  |  |  |  | 1499 |  | 
| 10 | 3 |  |  | 3 |  | 27 | use Cwd; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 222 |  | 
| 11 |  |  |  |  |  |  | #use Data::Dumper; | 
| 12 | 3 |  |  | 3 |  | 3042 | use Date::Parse; | 
|  | 3 |  |  |  |  | 27163 |  | 
|  | 3 |  |  |  |  | 453 |  | 
| 13 | 3 |  |  | 3 |  | 2979 | use FileHandle; | 
|  | 3 |  |  |  |  | 3003 |  | 
|  | 3 |  |  |  |  | 19 |  | 
| 14 | 3 |  |  | 3 |  | 1556 | use File::Basename; | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 283 |  | 
| 15 | 3 |  |  | 3 |  | 2998 | use File::Copy; | 
|  | 3 |  |  |  |  | 11252 |  | 
|  | 3 |  |  |  |  | 192 |  | 
| 16 | 3 |  |  | 3 |  | 2812 | use Regexp::Log; | 
|  | 3 |  |  |  |  | 5055 |  | 
|  | 3 |  |  |  |  | 119 |  | 
| 17 |  |  |  |  |  |  |  | 
| 18 | 3 |  |  | 3 |  | 22 | use lib 'lib'; | 
|  | 3 |  |  |  |  | 8 |  | 
|  | 3 |  |  |  |  | 28 |  | 
| 19 | 3 |  |  | 3 |  | 3587 | use Apache::Logmonster::Utility; | 
|  | 3 |  |  |  |  | 12 |  | 
|  | 3 |  |  |  |  | 185 |  | 
| 20 | 3 |  |  | 3 |  | 2198 | use Regexp::Log::Monster; | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 11926 |  | 
| 21 |  |  |  |  |  |  | my ( $util, $err, %fhs, $debug ); | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | sub new { | 
| 24 | 3 |  |  | 3 | 1 | 3288 | my $class = shift; | 
| 25 | 3 |  | 50 |  |  | 28 | $debug = shift || 0; | 
| 26 |  |  |  |  |  |  |  | 
| 27 | 3 | 50 |  |  |  | 24 | my $self = { | 
| 28 |  |  |  |  |  |  | conf  => undef, | 
| 29 |  |  |  |  |  |  | debug => $debug ? 1 : 0, | 
| 30 |  |  |  |  |  |  | }; | 
| 31 |  |  |  |  |  |  |  | 
| 32 | 3 |  |  |  |  | 10 | bless( $self, $class ); | 
| 33 |  |  |  |  |  |  |  | 
| 34 | 3 |  |  |  |  | 16 | $self->get_util(); | 
| 35 |  |  |  |  |  |  |  | 
| 36 | 3 |  |  |  |  | 13 | return $self; | 
| 37 |  |  |  |  |  |  | }; | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | sub check_awstats_file { | 
| 40 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 41 | 0 |  |  |  |  | 0 | my $domain = shift; | 
| 42 |  |  |  |  |  |  |  | 
| 43 | 0 |  |  |  |  | 0 | my $conf     = $self->{'conf'}; | 
| 44 | 0 |  | 0 |  |  | 0 | my $confdir  = $conf->{confdir}  || '/etc/awstats'; | 
| 45 | 0 |  | 0 |  |  | 0 | my $statsdir = $conf->{statsdir} || '/var/db/awstats'; | 
| 46 |  |  |  |  |  |  |  | 
| 47 | 0 |  |  |  |  | 0 | my $adc = "$confdir/awstats.$domain.conf"; | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 0 | 0 |  |  |  | 0 | return if -f $adc; | 
| 50 |  |  |  |  |  |  |  | 
| 51 | 0 |  |  |  |  | 0 | $util->file_write( $adc, | 
| 52 |  |  |  |  |  |  | lines => [ | 
| 53 |  |  |  |  |  |  | <<"EO_AWSTATS_VHOST" | 
| 54 |  |  |  |  |  |  | Include "$confdir/awstats.model.conf" | 
| 55 |  |  |  |  |  |  | SiteDomain = $domain | 
| 56 |  |  |  |  |  |  | DirData = $statsdir/$domain | 
| 57 |  |  |  |  |  |  | HostAliases = $domain localhost 127.0.0.1 | 
| 58 |  |  |  |  |  |  | EO_AWSTATS_VHOST | 
| 59 |  |  |  |  |  |  | ], | 
| 60 |  |  |  |  |  |  | debug => 0, | 
| 61 |  |  |  |  |  |  | ); | 
| 62 |  |  |  |  |  |  | }; | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | sub check_config { | 
| 65 | 1 |  |  | 1 | 1 | 31080 | my $self  = shift; | 
| 66 | 1 |  |  |  |  | 20 | my $conf  = $self->{'conf'}; | 
| 67 |  |  |  |  |  |  |  | 
| 68 | 1 |  |  |  |  | 11 | $err = "performing sanity tests"; | 
| 69 | 1 | 50 |  |  |  | 16 | $self->_progress_begin($err) if $debug; | 
| 70 |  |  |  |  |  |  |  | 
| 71 | 1 | 50 |  |  |  | 32 | print "\n\t verbose mode $debug\n" if $debug > 1; | 
| 72 |  |  |  |  |  |  |  | 
| 73 | 1 | 50 |  |  |  | 11 | if ( $debug > 1 ) { | 
| 74 | 0 |  |  |  |  | 0 | print "\t clean mode "; | 
| 75 | 0 | 0 |  |  |  | 0 | print $conf->{'clean'} ? "enabled.\n" : "disabled.\n"; | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  |  | 
| 78 | 1 |  |  |  |  | 12 | my $tmpdir = $conf->{tmpdir}; | 
| 79 | 1 | 50 |  |  |  | 15 | print "\t temporary working directory is $tmpdir.\n" if $debug > 1; | 
| 80 |  |  |  |  |  |  |  | 
| 81 | 1 | 50 |  |  |  | 66 | if ( ! -d $tmpdir ) { | 
| 82 | 0 | 0 |  |  |  | 0 | print "\t temp dir does not existing, creating..." if $debug > 1; | 
| 83 | 0 | 0 |  |  |  | 0 | if ( !mkdir $tmpdir, oct('0755') ) { | 
| 84 | 0 |  |  |  |  | 0 | die "FATAL: The directory $tmpdir does not exist and I could not " | 
| 85 |  |  |  |  |  |  | . "create it. Edit logmonster.conf or create it.\n"; | 
| 86 |  |  |  |  |  |  | } | 
| 87 | 0 | 0 |  |  |  | 0 | print "done.\n" if $debug > 1; | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | # this will fail unless we're root, but that should not matter much | 
| 90 | 0 | 0 |  |  |  | 0 | print "\t setting permissions on temp dir..." if $debug > 1; | 
| 91 | 0 | 0 | 0 |  |  | 0 | $util->chown( $tmpdir, | 
|  |  |  | 0 |  |  |  |  | 
| 92 |  |  |  |  |  |  | uid         => $conf->{'log_user'} || 'www', | 
| 93 |  |  |  |  |  |  | gid         => $conf->{'log_group'} || 'www', | 
| 94 |  |  |  |  |  |  | debug       => $debug > 1 ? 1 : 0, | 
| 95 |  |  |  |  |  |  | fatal       => 0, | 
| 96 |  |  |  |  |  |  | ); | 
| 97 | 0 | 0 |  |  |  | 0 | print "done.\n" if $debug > 1; | 
| 98 |  |  |  |  |  |  | } | 
| 99 |  |  |  |  |  |  |  | 
| 100 | 1 | 50 | 33 |  |  | 153 | if ( !-w $tmpdir || !-r $tmpdir ) { | 
| 101 | 0 |  |  |  |  | 0 | croak "FATAL: \$tmpdir ($tmpdir) must be read and writable!"; | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  |  | 
| 104 | 1 | 50 |  |  |  | 9 | if ( $conf->{'clean'} ) { | 
| 105 | 1 | 50 |  |  |  | 32 | if ( !$util->clean_tmp_dir( $tmpdir, debug => 1, fatal=>0 ) ) { | 
| 106 | 0 |  |  |  |  | 0 | croak "\nfailed to clean out $tmpdir"; | 
| 107 |  |  |  |  |  |  | } | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 1 | 50 |  |  |  | 13 | die "\nFATAL: you must edit logmonster.conf and set default_vhost!\n" | 
| 111 |  |  |  |  |  |  | if ! defined $conf->{'default_vhost'}; | 
| 112 |  |  |  |  |  |  |  | 
| 113 | 1 | 50 |  |  |  | 11 | if ( $conf->{'time_offset'} ) { | 
| 114 | 0 |  |  |  |  | 0 | my ( $dd, $mm, $yy, $lm, $hh, $mn ) = $util->get_the_date( debug=>0 ); | 
| 115 |  |  |  |  |  |  |  | 
| 116 | 0 |  | 0 |  |  | 0 | my $interval = $self->{rotation_interval} || 'day'; | 
| 117 | 0 |  |  |  |  | 0 | my $bump     = $conf->{time_offset}; | 
| 118 | 0 |  |  |  |  | 0 | my $logbase  = $conf->{logbase}; | 
| 119 |  |  |  |  |  |  |  | 
| 120 | 0 | 0 |  |  |  | 0 | my $how_far_back = $interval eq "hour"  ? .04      # back 1 hour | 
|  |  | 0 |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | : $interval eq "month" ? $dd + 1  # last month | 
| 122 |  |  |  |  |  |  | :                        1;       # 1 day | 
| 123 |  |  |  |  |  |  |  | 
| 124 | 0 |  |  |  |  | 0 | ( $dd, $mm, $yy, $lm, $hh, $mn ) | 
| 125 |  |  |  |  |  |  | = $util->get_the_date( bump => $bump + $how_far_back, debug => 0 ); | 
| 126 |  |  |  |  |  |  |  | 
| 127 | 0 | 0 |  |  |  | 0 | die "OK then, try again.\n" | 
| 128 |  |  |  |  |  |  | if ! $util->yes_or_no( | 
| 129 |  |  |  |  |  |  | "\nDoes the date $yy/$mm/$dd look correct? "); | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  |  | 
| 132 | 1 | 50 |  |  |  | 7 | $self->_progress_end('passed') if $debug == 1; | 
| 133 |  |  |  |  |  |  |  | 
| 134 | 1 |  |  |  |  | 37 | return 1; | 
| 135 |  |  |  |  |  |  | }; | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | sub compress_log_file { | 
| 138 | 0 |  |  | 0 | 1 | 0 | my $self    = shift; | 
| 139 | 0 |  |  |  |  | 0 | my $host    = shift; | 
| 140 | 0 |  |  |  |  | 0 | my $logfile = shift; | 
| 141 |  |  |  |  |  |  |  | 
| 142 | 0 |  |  |  |  | 0 | my $debug = $self->{'debug'}; | 
| 143 |  |  |  |  |  |  |  | 
| 144 | 0 | 0 | 0 |  |  | 0 | unless ( $host && $logfile ) { | 
| 145 | 0 |  |  |  |  | 0 | croak "compress_log_file: called incorrectly!"; | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 0 |  |  |  |  | 0 | my $REPORT = $self->{'report'}; | 
| 149 |  |  |  |  |  |  |  | 
| 150 | 0 | 0 |  |  |  | 0 | if ( $host eq "localhost" ) { | 
| 151 | 0 |  |  |  |  | 0 | my $gzip = $util->find_bin( 'gzip', debug => 0 ); | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 0 | 0 |  |  |  | 0 | if ( !-e $logfile ) { | 
| 154 | 0 |  |  |  |  | 0 | print $REPORT "compress_log_file: $logfile does not exist!\n"; | 
| 155 | 0 | 0 |  |  |  | 0 | if ( -e "$logfile.gz" ) { | 
| 156 | 0 |  |  |  |  | 0 | print $REPORT "     already compressed as $logfile.gz!\n"; | 
| 157 | 0 |  |  |  |  | 0 | return 1; | 
| 158 |  |  |  |  |  |  | } | 
| 159 | 0 |  |  |  |  | 0 | return; | 
| 160 |  |  |  |  |  |  | } | 
| 161 |  |  |  |  |  |  |  | 
| 162 | 0 |  |  |  |  | 0 | my $cmd = "$gzip $logfile"; | 
| 163 | 0 | 0 |  |  |  | 0 | $self->_progress("gzipping localhost:$logfile") if $debug; | 
| 164 | 0 |  |  |  |  | 0 | print $REPORT "syscmd: $cmd\n"; | 
| 165 | 0 |  |  |  |  | 0 | my $r = $util->syscmd( $cmd, debug => 0 ); | 
| 166 | 0 | 0 |  |  |  | 0 | print $REPORT "syscmd: error result: $r\n" if ( $r != 0 ); | 
| 167 |  |  |  |  |  |  |  | 
| 168 | 0 |  |  |  |  | 0 | return 1; | 
| 169 |  |  |  |  |  |  | } | 
| 170 |  |  |  |  |  |  |  | 
| 171 | 0 | 0 |  |  |  | 0 | $self->_progress_begin("checking $host for $logfile") if $debug; | 
| 172 |  |  |  |  |  |  |  | 
| 173 |  |  |  |  |  |  | # $host is remote, so we interact via SSH | 
| 174 | 0 |  |  |  |  | 0 | my $ssh = $util->find_bin( "ssh", debug => 0 ); | 
| 175 | 0 |  |  |  |  | 0 | my $cmd = "$ssh $host test -f $logfile"; | 
| 176 |  |  |  |  |  |  |  | 
| 177 | 0 |  |  |  |  | 0 | print $REPORT "compress_log_file: checking $host\n"; | 
| 178 | 0 |  |  |  |  | 0 | print $REPORT "\tsyscmd: $cmd\n"; | 
| 179 |  |  |  |  |  |  |  | 
| 180 | 0 | 0 |  |  |  | 0 | $self->_progress_continue() if $debug; | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | # does the file exist? | 
| 183 | 0 | 0 |  |  |  | 0 | if ( !$util->syscmd( $cmd, debug => 0, fatal => 0 ) ) { | 
| 184 | 0 | 0 |  |  |  | 0 | $self->_progress_continue() if $debug; | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | # does file.gz exist? | 
| 187 | 0 | 0 |  |  |  | 0 | if ( -f "$logfile.gz" ) { | 
| 188 | 0 |  |  |  |  | 0 | $err = "ALREADY COMPRESSED"; | 
| 189 | 0 |  |  |  |  | 0 | print $REPORT "\t$err\n"; | 
| 190 |  |  |  |  |  |  |  | 
| 191 | 0 | 0 |  |  |  | 0 | $self->_progress_end($err) if $debug; | 
| 192 |  |  |  |  |  |  |  | 
| 193 | 0 |  |  |  |  | 0 | return 1; | 
| 194 |  |  |  |  |  |  | } | 
| 195 | 0 | 0 |  |  |  | 0 | $self->_progress_end("no") if $debug; | 
| 196 |  |  |  |  |  |  |  | 
| 197 | 0 |  |  |  |  | 0 | print $REPORT "no\n"; | 
| 198 | 0 |  |  |  |  | 0 | return; | 
| 199 |  |  |  |  |  |  | } | 
| 200 |  |  |  |  |  |  |  | 
| 201 | 0 | 0 |  |  |  | 0 | $self->_progress_end("yes") if $debug; | 
| 202 |  |  |  |  |  |  |  | 
| 203 | 0 |  |  |  |  | 0 | print $REPORT "yes\n"; | 
| 204 |  |  |  |  |  |  |  | 
| 205 | 0 |  |  |  |  | 0 | $err = "compressing log file on $host"; | 
| 206 | 0 | 0 |  |  |  | 0 | $self->_progress_begin($err) if $debug; | 
| 207 |  |  |  |  |  |  |  | 
| 208 | 0 |  |  |  |  | 0 | $cmd = "$ssh $host gzip $logfile"; | 
| 209 | 0 |  |  |  |  | 0 | print $REPORT "\tcompressing\n\tsyscmd: $cmd \n"; | 
| 210 |  |  |  |  |  |  |  | 
| 211 | 0 | 0 |  |  |  | 0 | $self->_progress_continue() if !$debug; | 
| 212 |  |  |  |  |  |  |  | 
| 213 | 0 |  |  |  |  | 0 | my $r = $util->syscmd( $cmd, debug => 0, fatal => 0 ); | 
| 214 | 0 | 0 |  |  |  | 0 | if ( !$r ) { | 
| 215 | 0 |  |  |  |  | 0 | print $REPORT "\terror result: $r\n"; | 
| 216 | 0 |  |  |  |  | 0 | return; | 
| 217 |  |  |  |  |  |  | } | 
| 218 |  |  |  |  |  |  | $debug | 
| 219 | 0 | 0 |  |  |  | 0 | ? print "done\n" | 
| 220 |  |  |  |  |  |  | : $self->_progress_end(); | 
| 221 |  |  |  |  |  |  |  | 
| 222 | 0 |  |  |  |  | 0 | return 1; | 
| 223 |  |  |  |  |  |  | }; | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | sub consolidate_logfile { | 
| 226 |  |  |  |  |  |  |  | 
| 227 | 0 |  |  | 0 | 1 | 0 | my $self           = shift; | 
| 228 | 0 |  |  |  |  | 0 | my $host           = shift; | 
| 229 | 0 |  |  |  |  | 0 | my $remote_logfile = shift; | 
| 230 | 0 |  |  |  |  | 0 | my $local_logfile  = shift; | 
| 231 |  |  |  |  |  |  |  | 
| 232 | 0 |  |  |  |  | 0 | my $dry_run = $self->{'dry_run'}; | 
| 233 | 0 |  |  |  |  | 0 | my $debug   = $self->{'debug'}; | 
| 234 | 0 |  |  |  |  | 0 | my $REPORT  = $self->{'report'}; | 
| 235 |  |  |  |  |  |  |  | 
| 236 | 0 |  |  |  |  | 0 | my ( $r, $size ); | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | # retrieve yesterdays log files | 
| 239 | 0 | 0 |  |  |  | 0 | if ( $host eq "localhost" ) { | 
| 240 | 0 |  |  |  |  | 0 | $err | 
| 241 |  |  |  |  |  |  | = "consolidate_logfile: checking localhost for\n\t $remote_logfile..."; | 
| 242 | 0 | 0 |  |  |  | 0 | $self->_progress_begin($err) if $debug; | 
| 243 | 0 |  |  |  |  | 0 | print $REPORT $err; | 
| 244 |  |  |  |  |  |  |  | 
| 245 |  |  |  |  |  |  | # requires "use File::Copy" | 
| 246 | 0 |  |  |  |  | 0 | $r = copy $remote_logfile, $local_logfile; | 
| 247 | 0 | 0 |  |  |  | 0 | print $REPORT "FAILED: $!\n" unless ($r); | 
| 248 |  |  |  |  |  |  |  | 
| 249 | 0 |  |  |  |  | 0 | $size = ( stat $local_logfile )[7]; | 
| 250 |  |  |  |  |  |  |  | 
| 251 | 0 | 0 |  |  |  | 0 | if ( $size > 1000000 ) { $size = sprintf "%.2f MB", $size / 1000000; } | 
|  | 0 |  |  |  |  | 0 |  | 
| 252 | 0 |  |  |  |  | 0 | else                   { $size = sprintf "%.2f KB", $size / 1000; } | 
| 253 |  |  |  |  |  |  |  | 
| 254 | 0 |  |  |  |  | 0 | $err = "retrieved $size\n"; | 
| 255 | 0 | 0 |  |  |  | 0 | $self->_progress_end($err) if $debug; | 
| 256 | 0 |  |  |  |  | 0 | print $REPORT $err; | 
| 257 | 0 |  |  |  |  | 0 | return 1; | 
| 258 |  |  |  |  |  |  | } | 
| 259 |  |  |  |  |  |  |  | 
| 260 | 0 | 0 |  |  |  | 0 | return 1 if $dry_run; | 
| 261 |  |  |  |  |  |  |  | 
| 262 | 0 |  |  |  |  | 0 | my $scp = $util->find_bin( "scp", debug => 0 ); | 
| 263 | 0 |  |  |  |  | 0 | $scp .= " -q"; | 
| 264 |  |  |  |  |  |  |  | 
| 265 | 0 | 0 |  |  |  | 0 | $self->_progress_begin("\tconsolidate_logfile: fetching") if $debug; | 
| 266 |  |  |  |  |  |  |  | 
| 267 | 0 |  |  |  |  | 0 | print $REPORT | 
| 268 |  |  |  |  |  |  | "\tsyscmd: $scp \n\t\t$host:$remote_logfile \n\t\t$local_logfile\n"; | 
| 269 |  |  |  |  |  |  |  | 
| 270 | 0 |  |  |  |  | 0 | $r = $util->syscmd( "$scp $host:$remote_logfile $local_logfile", | 
| 271 |  |  |  |  |  |  | debug => 0 | 
| 272 |  |  |  |  |  |  | ); | 
| 273 | 0 | 0 |  |  |  | 0 | print $REPORT "syscmd: error result: $r\n" if !$r; | 
| 274 |  |  |  |  |  |  |  | 
| 275 | 0 |  |  |  |  | 0 | $size = ( stat $local_logfile )[7]; | 
| 276 | 0 | 0 |  |  |  | 0 | if ( !$size ) { | 
| 277 | 0 |  |  |  |  | 0 | $err = "FAILED. No logfiles retrieved!"; | 
| 278 | 0 | 0 |  |  |  | 0 | $self->_progress_end($err) if $debug; | 
| 279 | 0 |  |  |  |  | 0 | print $REPORT "\t $err \n"; | 
| 280 | 0 |  |  |  |  | 0 | return; | 
| 281 |  |  |  |  |  |  | } | 
| 282 |  |  |  |  |  |  |  | 
| 283 | 0 | 0 |  |  |  | 0 | if   ( $size > 1000000 ) { $size = sprintf "%.2f MB", $size / 1000000; } | 
|  | 0 |  |  |  |  | 0 |  | 
| 284 | 0 |  |  |  |  | 0 | else                     { $size = sprintf "%.2f KB", $size / 1000; } | 
| 285 |  |  |  |  |  |  |  | 
| 286 | 0 |  |  |  |  | 0 | $err = "retrieved $size"; | 
| 287 | 0 | 0 |  |  |  | 0 | $self->_progress_end($err) if $debug; | 
| 288 | 0 |  |  |  |  | 0 | print $REPORT "\t $err\n"; | 
| 289 |  |  |  |  |  |  |  | 
| 290 | 0 |  |  |  |  | 0 | return 1; | 
| 291 |  |  |  |  |  |  | }; | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  | sub feed_the_machine { | 
| 294 | 0 |  |  | 0 | 1 | 0 | my $self        = shift; | 
| 295 | 0 |  |  |  |  | 0 | my $domains_ref = shift; | 
| 296 |  |  |  |  |  |  |  | 
| 297 | 0 | 0 | 0 |  |  | 0 | if ( !$domains_ref || ref $domains_ref ne 'HASH' ) { | 
| 298 | 0 |  |  |  |  | 0 | croak "feed_the_machine: invalid parameters passed."; | 
| 299 |  |  |  |  |  |  | } | 
| 300 |  |  |  |  |  |  |  | 
| 301 | 0 |  |  |  |  | 0 | my $debug    = $self->{'debug'}; | 
| 302 | 0 |  |  |  |  | 0 | my $conf     = $self->{'conf'}; | 
| 303 | 0 |  |  |  |  | 0 | my $REPORT   = $self->{'report'}; | 
| 304 | 0 |  |  |  |  | 0 | my $interval = $self->{'rotation_interval'}; | 
| 305 |  |  |  |  |  |  |  | 
| 306 | 0 |  |  |  |  | 0 | my ( $cmd, $r ); | 
| 307 |  |  |  |  |  |  |  | 
| 308 | 0 |  |  |  |  | 0 | my $tmpdir    = $conf->{'tmpdir'}; | 
| 309 | 0 |  |  |  |  | 0 | my $processor = $conf->{'processor'}; | 
| 310 |  |  |  |  |  |  |  | 
| 311 | 0 |  |  |  |  | 0 | foreach my $file ( $util->get_dir_files( "$tmpdir/doms" ) ) { | 
| 312 | 0 | 0 |  |  |  | 0 | next if ( $file =~ /\.bak$/ ); | 
| 313 |  |  |  |  |  |  |  | 
| 314 | 3 |  |  | 3 |  | 24 | use File::Basename; | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 4141 |  | 
| 315 | 0 |  |  |  |  | 0 | my $domain   = fileparse($file); | 
| 316 | 0 |  |  |  |  | 0 | my $statsdir = "$conf->{'statsdir'}/$domain"; | 
| 317 |  |  |  |  |  |  |  | 
| 318 | 0 |  |  |  |  | 0 | $util->cwd_source_dir( $statsdir, debug => 0 ); | 
| 319 |  |  |  |  |  |  |  | 
| 320 | 0 | 0 |  |  |  | 0 | if ( ! -d $statsdir ) { | 
| 321 | 0 | 0 |  |  |  | 0 | print "skipping $file because $statsdir is not a directory.\n" | 
| 322 |  |  |  |  |  |  | if $debug; | 
| 323 | 0 |  |  |  |  | 0 | next; | 
| 324 |  |  |  |  |  |  | }; | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | # allow domain to select their stats processor | 
| 327 | 0 | 0 |  |  |  | 0 | if ( -f "$statsdir/.processor" ) { | 
| 328 | 0 |  |  |  |  | 0 | $processor = `head -n1 $statsdir/.processor`; | 
| 329 | 0 |  |  |  |  | 0 | chomp $processor; | 
| 330 |  |  |  |  |  |  | } | 
| 331 |  |  |  |  |  |  |  | 
| 332 | 0 | 0 |  |  |  | 0 | if ( $processor eq "webalizer" ) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 333 | 0 |  |  |  |  | 0 | my $webalizer | 
| 334 |  |  |  |  |  |  | = $util->find_bin( "webalizer", debug => 0 ); | 
| 335 | 0 | 0 |  |  |  | 0 | $webalizer .= " -q" if !$debug; | 
| 336 | 0 | 0 | 0 |  |  | 0 | $webalizer .= " -p" | 
| 337 |  |  |  |  |  |  | if ( $interval eq "hour" || $interval eq "day" ); | 
| 338 | 0 |  |  |  |  | 0 | $cmd = "$webalizer -n $domain -o $statsdir $file"; | 
| 339 | 0 | 0 |  |  |  | 0 | printf "$webalizer -n %-20s -o $statsdir\n", $domain if $debug; | 
| 340 | 0 |  |  |  |  | 0 | printf $REPORT "$webalizer -n %-20s -o $statsdir\n", $domain; | 
| 341 |  |  |  |  |  |  | } | 
| 342 |  |  |  |  |  |  | elsif ( $processor eq "http-analyze" ) { | 
| 343 | 0 |  |  |  |  | 0 | my $http_analyze | 
| 344 |  |  |  |  |  |  | = $util->find_bin( "http-analyze", debug => 0 ); | 
| 345 | 0 | 0 | 0 |  |  | 0 | $http_analyze .= " -d" | 
| 346 |  |  |  |  |  |  | if ( $interval eq "hour" || $interval eq "day" ); | 
| 347 | 0 | 0 |  |  |  | 0 | $http_analyze .= " -m" if ( $interval eq "month" ); | 
| 348 | 0 |  |  |  |  | 0 | $cmd = "$http_analyze -S $domain -o $statsdir $file"; | 
| 349 | 0 | 0 |  |  |  | 0 | printf "$http_analyze -S %-20s -o $statsdir\n", $domain if $debug; | 
| 350 | 0 |  |  |  |  | 0 | printf $REPORT "$http_analyze -S %-20s -o $statsdir\n", $domain; | 
| 351 |  |  |  |  |  |  | } | 
| 352 |  |  |  |  |  |  | elsif ( $processor eq "awstats" ) { | 
| 353 | 0 |  |  |  |  | 0 | $self->check_awstats_file( $domain ); | 
| 354 |  |  |  |  |  |  |  | 
| 355 | 0 |  |  |  |  | 0 | my $aws_cgi = "/usr/local/www/awstats/cgi-bin"; # freebsd port location | 
| 356 | 0 | 0 |  |  |  | 0 | $aws_cgi = "/usr/local/www/cgi-bin" unless -d $aws_cgi; | 
| 357 | 0 | 0 |  |  |  | 0 | $aws_cgi = "/var/www/cgi-bin"       unless -d $aws_cgi; | 
| 358 |  |  |  |  |  |  |  | 
| 359 | 0 |  |  |  |  | 0 | my $awstats = $util->find_bin( "awstats.pl", | 
| 360 |  |  |  |  |  |  | debug => 0, | 
| 361 |  |  |  |  |  |  | dir   => $aws_cgi, | 
| 362 |  |  |  |  |  |  | ); | 
| 363 | 0 |  |  |  |  | 0 | $cmd = "$awstats -config=$domain -logfile=$file"; | 
| 364 | 0 | 0 |  |  |  | 0 | printf "$awstats for \%-20s to $statsdir\n", $domain if $debug; | 
| 365 | 0 |  |  |  |  | 0 | printf $REPORT "$awstats for \%-20s to $statsdir\n", $domain; | 
| 366 |  |  |  |  |  |  | } | 
| 367 |  |  |  |  |  |  | else { | 
| 368 | 0 |  |  |  |  | 0 | $err = "Sorry, that is not supported! Valid options are: webalizer, http-analyze, and awstats.\n"; | 
| 369 | 0 |  |  |  |  | 0 | print $err; | 
| 370 | 0 |  |  |  |  | 0 | print $REPORT $err; | 
| 371 |  |  |  |  |  |  | } | 
| 372 |  |  |  |  |  |  |  | 
| 373 | 0 | 0 |  |  |  | 0 | unless ( $self->{'dry_run'} ) { | 
| 374 | 0 | 0 |  |  |  | 0 | print "running $processor!\n"  if $debug; | 
| 375 | 0 | 0 |  |  |  | 0 | print $REPORT "syscmd: $cmd\n" if $debug; | 
| 376 | 0 |  |  |  |  | 0 | $r = $util->syscmd( $cmd, debug => 0 ); | 
| 377 | 0 | 0 |  |  |  | 0 | print $REPORT "syscmd: error result: $r\n" if ( $r != 0 ); | 
| 378 |  |  |  |  |  |  | } | 
| 379 |  |  |  |  |  |  |  | 
| 380 |  |  |  |  |  |  |  | 
| 381 | 0 | 0 |  |  |  | 0 | if ( $conf->{'clean'} ) { | 
| 382 | 0 |  |  |  |  | 0 | $util->file_delete( $file, debug => 0 ); | 
| 383 | 0 |  |  |  |  | 0 | next; | 
| 384 |  |  |  |  |  |  | } | 
| 385 |  |  |  |  |  |  |  | 
| 386 | 0 |  |  |  |  | 0 | print "\nDon't forget about $file\n"; | 
| 387 | 0 |  |  |  |  | 0 | print $REPORT "\nDon't forget about $file\n"; | 
| 388 |  |  |  |  |  |  | } | 
| 389 |  |  |  |  |  |  | }; | 
| 390 |  |  |  |  |  |  |  | 
| 391 |  |  |  |  |  |  | sub fetch_log_files { | 
| 392 |  |  |  |  |  |  |  | 
| 393 | 0 |  |  | 0 | 1 | 0 | my $self    = shift; | 
| 394 | 0 |  |  |  |  | 0 | my $debug   = $self->{'debug'}; | 
| 395 | 0 |  |  |  |  | 0 | my $conf    = $self->{'conf'}; | 
| 396 | 0 |  |  |  |  | 0 | my $dry_run = $self->{'dry_run'}; | 
| 397 |  |  |  |  |  |  |  | 
| 398 | 0 |  |  |  |  | 0 | my $r; | 
| 399 |  |  |  |  |  |  |  | 
| 400 |  |  |  |  |  |  | # in a format like this: /var/log/apache/200?/09/25 | 
| 401 | 0 |  |  |  |  | 0 | my $logdir = $self->get_log_dir(); | 
| 402 | 0 |  |  |  |  | 0 | my $tmpdir = $conf->{'tmpdir'}; | 
| 403 |  |  |  |  |  |  |  | 
| 404 | 0 |  |  |  |  | 0 | my $access_log = "$logdir/" . $conf->{'access'}; | 
| 405 | 0 |  |  |  |  | 0 | my $error_log  = "$logdir/" . $conf->{'error'}; | 
| 406 |  |  |  |  |  |  |  | 
| 407 | 0 | 0 |  |  |  | 0 | print "fetch_log_files: warming up.\n" if $debug > 1; | 
| 408 |  |  |  |  |  |  |  | 
| 409 |  |  |  |  |  |  | WEBHOST: | 
| 410 | 0 |  |  |  |  | 0 | foreach my $webserver ( split( / /, $conf->{'hosts'} ) ) { | 
| 411 | 0 |  |  |  |  | 0 | my $compressed = 0; | 
| 412 |  |  |  |  |  |  |  | 
| 413 | 0 | 0 |  |  |  | 0 | if ( !$dry_run ) { | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | # compress yesterdays log files | 
| 416 | 0 |  |  |  |  | 0 | $self->compress_log_file( $webserver, $error_log ); | 
| 417 |  |  |  |  |  |  |  | 
| 418 | 0 | 0 |  |  |  | 0 | if ( !$self->compress_log_file( $webserver, $access_log ) ) { | 
| 419 |  |  |  |  |  |  |  | 
| 420 |  |  |  |  |  |  | # if there is no compressed logfile, there is no point in | 
| 421 |  |  |  |  |  |  | # trying to retrieve. | 
| 422 | 0 |  |  |  |  | 0 | next WEBHOST; | 
| 423 |  |  |  |  |  |  | } | 
| 424 |  |  |  |  |  |  | } | 
| 425 |  |  |  |  |  |  |  | 
| 426 | 0 |  |  |  |  | 0 | my $local_logfile = "$tmpdir/$webserver-" . $conf->{'access'} . ".gz"; | 
| 427 |  |  |  |  |  |  |  | 
| 428 | 0 |  |  |  |  | 0 | $self->consolidate_logfile( | 
| 429 |  |  |  |  |  |  | $webserver,          # hostname to retrieve from | 
| 430 |  |  |  |  |  |  | "$access_log.gz",    # the logfile to fetch | 
| 431 |  |  |  |  |  |  | $local_logfile,      # where to put it | 
| 432 |  |  |  |  |  |  | ); | 
| 433 |  |  |  |  |  |  | } | 
| 434 |  |  |  |  |  |  |  | 
| 435 | 0 |  |  |  |  | 0 | return 1; | 
| 436 |  |  |  |  |  |  | }; | 
| 437 |  |  |  |  |  |  |  | 
| 438 |  |  |  |  |  |  | sub get_log_dir { | 
| 439 |  |  |  |  |  |  |  | 
| 440 | 5 |  |  | 5 | 1 | 12 | my $self  = shift; | 
| 441 | 5 |  |  |  |  | 8 | my $debug = $self->{'debug'}; | 
| 442 | 5 |  |  |  |  | 9 | my $conf  = $self->{'conf'}; | 
| 443 |  |  |  |  |  |  |  | 
| 444 | 5 |  | 50 |  |  | 29 | my $interval = $self->{'rotation_interval'} || "day"; | 
| 445 |  |  |  |  |  |  |  | 
| 446 | 5 | 50 |  |  |  | 23 | unless ($conf) { | 
| 447 | 0 |  |  |  |  | 0 | croak "get_log_dir: \$conf is not set!\n"; | 
| 448 |  |  |  |  |  |  | } | 
| 449 |  |  |  |  |  |  |  | 
| 450 | 5 |  |  |  |  | 10 | my $bump    = $conf->{'time_offset'}; | 
| 451 | 5 |  |  |  |  | 8 | my $logbase = $conf->{'logbase'}; | 
| 452 |  |  |  |  |  |  |  | 
| 453 | 5 |  |  |  |  | 25 | my ( $dd, $mm, $yy, $lm, $hh, $mn ) = $util->get_the_date( debug => 0 ); | 
| 454 |  |  |  |  |  |  |  | 
| 455 | 5 | 50 |  |  |  | 24 | my $how_far_back = $interval eq "hour" | 
|  |  | 50 |  |  |  |  |  | 
| 456 |  |  |  |  |  |  | ? .04    # back 1 hour | 
| 457 |  |  |  |  |  |  | : $interval eq "month" ? $dd + 1    # last month | 
| 458 |  |  |  |  |  |  | :                        1;         # 1 day | 
| 459 |  |  |  |  |  |  |  | 
| 460 | 5 | 50 |  |  |  | 11 | if ($bump) { | 
| 461 | 0 | 0 |  |  |  | 0 | ( $dd, $mm, $yy, $lm, $hh, $mn ) = $util->get_the_date( | 
| 462 |  |  |  |  |  |  | bump  => $bump + $how_far_back, | 
| 463 |  |  |  |  |  |  | debug => $debug > 1 ? 1 : 0, | 
| 464 |  |  |  |  |  |  | ); | 
| 465 |  |  |  |  |  |  | } | 
| 466 |  |  |  |  |  |  | else { | 
| 467 | 5 | 50 |  |  |  | 23 | ( $dd, $mm, $yy, $lm, $hh, $mn ) = $util->get_the_date( | 
| 468 |  |  |  |  |  |  | bump  => $how_far_back, | 
| 469 |  |  |  |  |  |  | debug => $debug > 1 ? 1 : 0, | 
| 470 |  |  |  |  |  |  | ); | 
| 471 |  |  |  |  |  |  | } | 
| 472 |  |  |  |  |  |  |  | 
| 473 | 5 | 0 |  |  |  | 32 | my $logdir | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 474 |  |  |  |  |  |  | = $interval eq "hour"  ? "$logbase/$yy/$mm/$dd/$hh" | 
| 475 |  |  |  |  |  |  | : $interval eq "day"   ? "$logbase/$yy/$mm/$dd" | 
| 476 |  |  |  |  |  |  | : $interval eq "month" ? "$logbase/$yy/$mm" | 
| 477 |  |  |  |  |  |  | :                        "$logbase"; | 
| 478 |  |  |  |  |  |  |  | 
| 479 | 5 | 50 |  |  |  | 16 | print "get_log_dir: using $logdir\n" if $debug > 1; | 
| 480 | 5 |  |  |  |  | 27 | return $logdir; | 
| 481 |  |  |  |  |  |  | }; | 
| 482 |  |  |  |  |  |  |  | 
| 483 |  |  |  |  |  |  | sub get_log_files { | 
| 484 | 0 |  |  | 0 | 0 | 0 | my $self = shift; | 
| 485 | 0 | 0 |  |  |  | 0 | my $dir = shift or die "missing dir argument"; | 
| 486 |  |  |  |  |  |  |  | 
| 487 | 0 |  |  |  |  | 0 | my @logs = glob("$dir/*.gz"); | 
| 488 |  |  |  |  |  |  |  | 
| 489 | 0 |  |  |  |  | 0 | my $debug  = $self->{debug}; | 
| 490 | 0 |  |  |  |  | 0 | my $REPORT = $self->{report}; | 
| 491 |  |  |  |  |  |  |  | 
| 492 |  |  |  |  |  |  | # make sure we have logs to process | 
| 493 | 0 | 0 | 0 |  |  | 0 | if ( !$logs[0] or $logs[0] eq '' ) { | 
| 494 | 0 |  |  |  |  | 0 | $err = "WARNING: No web server log files found!\n"; | 
| 495 | 0 | 0 |  |  |  | 0 | print $err if $debug; | 
| 496 | 0 |  |  |  |  | 0 | print $REPORT $err; | 
| 497 | 0 |  |  |  |  | 0 | return; | 
| 498 |  |  |  |  |  |  | } | 
| 499 |  |  |  |  |  |  |  | 
| 500 | 0 | 0 |  |  |  | 0 | if ( $debug > 1 ) { | 
| 501 | 0 |  |  |  |  | 0 | print "found logfiles \n\t" . join( "\n\t", @logs ) . "\n"; | 
| 502 |  |  |  |  |  |  | }; | 
| 503 |  |  |  |  |  |  |  | 
| 504 | 0 |  |  |  |  | 0 | return @logs; | 
| 505 |  |  |  |  |  |  | }; | 
| 506 |  |  |  |  |  |  |  | 
| 507 |  |  |  |  |  |  | sub get_config { | 
| 508 | 1 |  |  | 1 | 0 | 6 | my ($self, $file, $config) = @_; | 
| 509 |  |  |  |  |  |  |  | 
| 510 | 1 | 50 | 33 |  |  | 8 | if ( $config && ref $config eq 'HASH' ) { | 
| 511 | 0 |  |  |  |  | 0 | $self->{conf} = $config; | 
| 512 | 0 |  |  |  |  | 0 | return $config; | 
| 513 |  |  |  |  |  |  | } | 
| 514 |  |  |  |  |  |  |  | 
| 515 | 1 | 50 | 33 |  |  | 5 | return $self->{conf} if (defined $self->{conf} && ref $self->{conf}); | 
| 516 |  |  |  |  |  |  |  | 
| 517 | 1 |  | 50 |  |  | 7 | $self->{conf} = $util->parse_config( $file || 'logmonster.conf' ); | 
| 518 | 1 |  |  |  |  | 4 | return $self->{conf}; | 
| 519 |  |  |  |  |  |  | }; | 
| 520 |  |  |  |  |  |  |  | 
| 521 |  |  |  |  |  |  | sub get_util { | 
| 522 | 4 |  |  | 4 | 0 | 344 | my $self = shift; | 
| 523 | 4 | 100 |  |  |  | 18 | return $util if ref $util; | 
| 524 | 3 |  |  | 3 |  | 25 | use lib 'lib'; | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 32 |  | 
| 525 | 3 |  |  |  |  | 32 | require Apache::Logmonster::Utility; | 
| 526 | 3 |  |  |  |  | 43 | $self->{util} = $util = Apache::Logmonster::Utility->new( debug => $self->{debug} ); | 
| 527 | 3 |  |  |  |  | 7 | return $util; | 
| 528 |  |  |  |  |  |  | }; | 
| 529 |  |  |  |  |  |  |  | 
| 530 |  |  |  |  |  |  | sub report_hits { | 
| 531 |  |  |  |  |  |  |  | 
| 532 | 1 |  |  | 1 | 1 | 29 | my $self   = shift; | 
| 533 | 1 |  |  |  |  | 2 | my $logdir = shift; | 
| 534 | 1 |  |  |  |  | 4 | my $debug  = $self->{'debug'}; | 
| 535 |  |  |  |  |  |  |  | 
| 536 | 1 |  |  |  |  | 3 | $self->{'debug'} = 0;    # hush get_log_dir | 
| 537 | 1 |  | 33 |  |  | 5 | $logdir ||= $self->get_log_dir(); | 
| 538 |  |  |  |  |  |  |  | 
| 539 | 1 |  |  |  |  | 5 | my $vhost_count_summary = $logdir . "/HitsPerVhost.txt"; | 
| 540 |  |  |  |  |  |  |  | 
| 541 |  |  |  |  |  |  | # fail if $vhost_count_summary is not present | 
| 542 | 1 | 50 | 33 |  |  | 54 | unless ( $vhost_count_summary | 
|  |  |  | 33 |  |  |  |  | 
| 543 |  |  |  |  |  |  | && -e $vhost_count_summary | 
| 544 |  |  |  |  |  |  | && -f $vhost_count_summary ) | 
| 545 |  |  |  |  |  |  | { | 
| 546 | 0 |  |  |  |  | 0 | print | 
| 547 |  |  |  |  |  |  | "report_hits: ERROR: hit summary file is missing. It should have" | 
| 548 |  |  |  |  |  |  | . " been at: $vhost_count_summary. Report FAILURE.\n"; | 
| 549 | 0 |  |  |  |  | 0 | return; | 
| 550 |  |  |  |  |  |  | } | 
| 551 |  |  |  |  |  |  |  | 
| 552 | 1 | 50 |  |  |  | 8 | print "report_hits: reporting summary from file $vhost_count_summary\n" | 
| 553 |  |  |  |  |  |  | if $debug; | 
| 554 |  |  |  |  |  |  |  | 
| 555 | 1 |  |  |  |  | 11 | my @lines = $util->file_read( $vhost_count_summary, | 
| 556 |  |  |  |  |  |  | debug => $debug, | 
| 557 |  |  |  |  |  |  | fatal => 0, | 
| 558 |  |  |  |  |  |  | ); | 
| 559 |  |  |  |  |  |  |  | 
| 560 | 1 |  |  |  |  | 2 | my $lines_in_array = @lines; | 
| 561 |  |  |  |  |  |  |  | 
| 562 | 1 | 50 |  |  |  | 5 | if ( $lines_in_array > 0 ) { | 
| 563 | 1 |  |  |  |  | 143 | print join( ':', @lines ) . "\n"; | 
| 564 | 1 |  |  |  |  | 12 | return 1; | 
| 565 |  |  |  |  |  |  | } | 
| 566 |  |  |  |  |  |  |  | 
| 567 | 0 | 0 |  |  |  | 0 | print "report_hits: no entries found!\n" if $debug; | 
| 568 | 0 |  |  |  |  | 0 | return; | 
| 569 |  |  |  |  |  |  | }; | 
| 570 |  |  |  |  |  |  |  | 
| 571 |  |  |  |  |  |  | sub report_close { | 
| 572 | 2 |  |  | 2 | 0 | 485 | my $self = shift; | 
| 573 | 2 |  |  |  |  | 8 | my $fh   = shift; | 
| 574 |  |  |  |  |  |  |  | 
| 575 | 2 | 50 |  |  |  | 13 | if ($fh) { | 
| 576 | 2 |  |  |  |  | 101 | close($fh); | 
| 577 | 2 |  |  |  |  | 35 | return 1; | 
| 578 |  |  |  |  |  |  | } | 
| 579 |  |  |  |  |  |  |  | 
| 580 | 0 |  |  |  |  | 0 | carp "report_close: was not passed a valid filehandle!"; | 
| 581 | 0 |  |  |  |  | 0 | return; | 
| 582 |  |  |  |  |  |  | }; | 
| 583 |  |  |  |  |  |  |  | 
| 584 |  |  |  |  |  |  | sub report_open { | 
| 585 | 2 |  |  | 2 | 0 | 725 | my $self  = shift; | 
| 586 | 2 |  |  |  |  | 3 | my $vhost  = shift; | 
| 587 | 2 |  |  |  |  | 4 | my $debug = $self->{'debug'}; | 
| 588 |  |  |  |  |  |  |  | 
| 589 | 2 | 50 |  |  |  | 8 | $vhost || croak "report_open: no filename passed!"; | 
| 590 |  |  |  |  |  |  |  | 
| 591 | 2 |  |  |  |  | 6 | my $logdir = $self->get_log_dir(); | 
| 592 |  |  |  |  |  |  |  | 
| 593 | 2 | 50 | 33 |  |  | 64 | unless ( $logdir && -w $logdir ) { | 
| 594 | 2 |  |  |  |  | 488 | print "\tNOTICE!\nreport_open: logdir $logdir is not writable!\n"; | 
| 595 | 2 |  |  |  |  | 5 | $logdir = "/tmp"; | 
| 596 |  |  |  |  |  |  | } | 
| 597 |  |  |  |  |  |  |  | 
| 598 | 2 |  |  |  |  | 9 | my $report_file = "$logdir/$vhost.txt"; | 
| 599 | 2 |  |  |  |  | 4 | my $REPORT; | 
| 600 |  |  |  |  |  |  |  | 
| 601 | 2 | 50 |  |  |  | 304 | if ( !open $REPORT, ">", $report_file ) { | 
| 602 | 0 |  |  |  |  | 0 | carp "couldn't open $report_file for write: $!"; | 
| 603 | 0 |  |  |  |  | 0 | return; | 
| 604 |  |  |  |  |  |  | } | 
| 605 |  |  |  |  |  |  |  | 
| 606 | 2 | 50 |  |  |  | 6 | print "\n ***  this report is saved in $report_file *** \n" if $debug; | 
| 607 | 2 |  |  |  |  | 10 | return $REPORT; | 
| 608 |  |  |  |  |  |  | }; | 
| 609 |  |  |  |  |  |  |  | 
| 610 |  |  |  |  |  |  | sub sort_vhost_logs { | 
| 611 |  |  |  |  |  |  |  | 
| 612 |  |  |  |  |  |  | ############################################ | 
| 613 |  |  |  |  |  |  | # Usage      : see t/Logmonster.t for usage example | 
| 614 |  |  |  |  |  |  | # Purpose    : since the log entries for each host are concatenated, they are | 
| 615 |  |  |  |  |  |  | #              no longer in cronological order. Most stats post-processors | 
| 616 |  |  |  |  |  |  | #              require that log entries be in chrono order so this sorts them | 
| 617 |  |  |  |  |  |  | #              based on their log entry date, which also resolves any timezone | 
| 618 |  |  |  |  |  |  | #              differences. | 
| 619 |  |  |  |  |  |  | # Returns    : boolean, 1 for success | 
| 620 |  |  |  |  |  |  | # Parameters : conf - hashref of setting from logmonster.conf | 
| 621 |  |  |  |  |  |  | #              report | 
| 622 |  |  |  |  |  |  |  | 
| 623 | 0 |  |  | 0 | 1 |  | my $self   = shift; | 
| 624 | 0 |  |  |  |  |  | my $debug  = $self->{'debug'}; | 
| 625 | 0 |  |  |  |  |  | my $conf   = $self->{'conf'}; | 
| 626 | 0 |  |  |  |  |  | my $REPORT = $self->{'report'}; | 
| 627 |  |  |  |  |  |  |  | 
| 628 | 0 |  |  |  |  |  | my ( %beastie, %sortme ); | 
| 629 |  |  |  |  |  |  |  | 
| 630 | 0 |  | 0 |  |  |  | my $dir = $conf->{'tmpdir'} || croak "tmpdir not set in \$conf"; | 
| 631 |  |  |  |  |  |  |  | 
| 632 | 0 | 0 |  |  |  |  | if ( $self->{'host_count'} < 2 ) { | 
| 633 | 0 | 0 |  |  |  |  | print "sort_vhost_logs: only one log host, skipping sort.\n" | 
| 634 |  |  |  |  |  |  | if $debug; | 
| 635 | 0 |  |  |  |  |  | return 1;    # sort not needed with only one host | 
| 636 |  |  |  |  |  |  | } | 
| 637 |  |  |  |  |  |  |  | 
| 638 | 0 | 0 |  |  |  |  | $self->_progress_begin("sort_vhost_logs: sorting each vhost logfile...") | 
| 639 |  |  |  |  |  |  | if $debug == 1; | 
| 640 |  |  |  |  |  |  |  | 
| 641 | 0 |  |  |  |  |  | my $lines = 0; | 
| 642 | 0 |  |  |  |  |  | my ($SORTED, $UNSORTED); | 
| 643 |  |  |  |  |  |  |  | 
| 644 |  |  |  |  |  |  | VHOST_FILE: | 
| 645 | 0 |  |  |  |  |  | foreach | 
| 646 |  |  |  |  |  |  | my $file ( $util->get_dir_files( "$dir/doms", fatal => 0 ) ) | 
| 647 |  |  |  |  |  |  | { | 
| 648 | 0 |  |  |  |  |  | undef %beastie;    # clear the hash | 
| 649 | 0 |  |  |  |  |  | undef %sortme; | 
| 650 |  |  |  |  |  |  |  | 
| 651 | 0 | 0 |  |  |  |  | if ( -s $file > 10000000 ) { | 
| 652 | 0 | 0 |  |  |  |  | print "\nsort_vhost_logs: logfile $file is greater than 10MB\n" | 
| 653 |  |  |  |  |  |  | if $debug; | 
| 654 | 0 |  |  |  |  |  | print $REPORT | 
| 655 |  |  |  |  |  |  | "sort_vhost_logs: logfile $file is greater than 10MB\n"; | 
| 656 |  |  |  |  |  |  | } | 
| 657 |  |  |  |  |  |  |  | 
| 658 | 0 | 0 |  |  |  |  | unless ( open $UNSORTED, '<', $file ) { | 
| 659 | 0 |  |  |  |  |  | warn | 
| 660 |  |  |  |  |  |  | "\nsort_vhost_logs: WARN: could not open input file $file: $!"; | 
| 661 | 0 |  |  |  |  |  | next VHOST_FILE; | 
| 662 |  |  |  |  |  |  | } | 
| 663 |  |  |  |  |  |  |  | 
| 664 |  |  |  |  |  |  | # make sure we can write out the results before doing all the work | 
| 665 | 0 | 0 |  |  |  |  | unless ( open $SORTED, ">", "$file.sorted" ) { | 
| 666 | 0 | 0 |  |  |  |  | print | 
| 667 |  |  |  |  |  |  | "\n sort_vhost_logs: FAILED: could not open output file $file: $!\n" | 
| 668 |  |  |  |  |  |  | if $debug; | 
| 669 | 0 |  |  |  |  |  | next VHOST_FILE; | 
| 670 |  |  |  |  |  |  | } | 
| 671 |  |  |  |  |  |  |  | 
| 672 | 0 | 0 |  |  |  |  | $self->_progress_begin("    sorting $file...") if $debug > 1; | 
| 673 |  |  |  |  |  |  |  | 
| 674 | 0 |  |  |  |  |  | while (<$UNSORTED>) { | 
| 675 | 0 | 0 |  |  |  |  | $self->_progress_continue() if $debug > 1; | 
| 676 | 0 |  |  |  |  |  | chomp; | 
| 677 |  |  |  |  |  |  | ### | 
| 678 |  |  |  |  |  |  | # Per Earl Ruby, switched from / / to /\s+/ so that naughty modules like | 
| 679 |  |  |  |  |  |  | # Apache::Register that insert extra spaces in the Log output won't mess | 
| 680 |  |  |  |  |  |  | # up logmonsters parsing. | 
| 681 |  |  |  |  |  |  | #    @log_entry_fields = split(/ /, $_)  =>  @log.. = split(/\s+/, $_) | 
| 682 |  |  |  |  |  |  | ### | 
| 683 |  |  |  |  |  |  | #    sample log entry | 
| 684 |  |  |  |  |  |  | #216.220.22.182 - - [16/Jun/2004:09:37:51 -0400] "GET /images/google-g.jpg HTTP/1.1" 200 539 "http://www.tnpi.biz/internet/mail/toaster/" "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.6) Gecko/20040113" www.thenetworkpeople.biz | 
| 685 |  |  |  |  |  |  |  | 
| 686 |  |  |  |  |  |  | # From an Apache log entry, we first split apart the line based on whitespace | 
| 687 |  |  |  |  |  |  |  | 
| 688 |  |  |  |  |  |  | my @log_entry_fields | 
| 689 | 0 |  |  |  |  |  | = split( /\s+/, $_ );    # split the log entry into fields | 
| 690 |  |  |  |  |  |  |  | 
| 691 |  |  |  |  |  |  | # Then we use substr to extract the middle 26 characters: | 
| 692 |  |  |  |  |  |  | #   16/Jun/2004:09:37:51 -0400 | 
| 693 |  |  |  |  |  |  | # | 
| 694 |  |  |  |  |  |  | # We could also use a regexp to do this but substr is more efficient and we | 
| 695 |  |  |  |  |  |  | # can safely expect the date format of ApacheLog to remain constant. | 
| 696 |  |  |  |  |  |  |  | 
| 697 | 0 |  |  |  |  |  | my $rawdate | 
| 698 |  |  |  |  |  |  | = substr( "$log_entry_fields[3] $log_entry_fields[4]", 1, | 
| 699 |  |  |  |  |  |  | 26 ); | 
| 700 |  |  |  |  |  |  |  | 
| 701 |  |  |  |  |  |  | # then we convert that date string to a numeric string that we can use for sorting. | 
| 702 |  |  |  |  |  |  |  | 
| 703 | 0 |  |  |  |  |  | my $date = str2time($rawdate); | 
| 704 |  |  |  |  |  |  |  | 
| 705 |  |  |  |  |  |  | # Finally, we put the entire line into the hash beastie (keyed with $lines, | 
| 706 |  |  |  |  |  |  | # an incrementing number) and create a second hash ($sortme) with the | 
| 707 |  |  |  |  |  |  | # same key but the value is the timestamp. | 
| 708 |  |  |  |  |  |  |  | 
| 709 | 0 |  |  |  |  |  | $beastie{$lines} = $_; | 
| 710 | 0 |  |  |  |  |  | $sortme{$lines}  = $date; | 
| 711 |  |  |  |  |  |  |  | 
| 712 | 0 |  |  |  |  |  | $lines++; | 
| 713 |  |  |  |  |  |  | } | 
| 714 | 0 | 0 |  |  |  |  | close($UNSORTED) | 
| 715 |  |  |  |  |  |  | || croak "sort_vhost_logs: Gack, could not close $file: $!\n"; | 
| 716 | 0 | 0 |  |  |  |  | $self->_progress_end() if $debug > 1; | 
| 717 |  |  |  |  |  |  |  | 
| 718 |  |  |  |  |  |  | # We create an array (because elements in arrays stay in order) of line | 
| 719 |  |  |  |  |  |  | # numbers based on the sortme hash, sorted based on date | 
| 720 |  |  |  |  |  |  |  | 
| 721 | 0 | 0 |  |  |  |  | my @sorted = sort { | 
| 722 | 0 |  |  |  |  |  | ( $sortme{$a} <=> $sortme{$b} ) | 
| 723 |  |  |  |  |  |  | || ( $sortme{$a} cmp $sortme{$b} ); | 
| 724 |  |  |  |  |  |  | } ( keys(%sortme) ); | 
| 725 |  |  |  |  |  |  |  | 
| 726 | 0 |  |  |  |  |  | foreach (@sorted) { | 
| 727 |  |  |  |  |  |  |  | 
| 728 |  |  |  |  |  |  | # iterate through @sorted, adding the corresponding lines from %beastie to the file | 
| 729 | 0 |  |  |  |  |  | print $SORTED "$beastie{$_}\n"; | 
| 730 |  |  |  |  |  |  | } | 
| 731 | 0 |  |  |  |  |  | close $SORTED; | 
| 732 |  |  |  |  |  |  |  | 
| 733 | 0 | 0 |  |  |  |  | move( "$file.sorted", $file ) | 
| 734 |  |  |  |  |  |  | or carp | 
| 735 |  |  |  |  |  |  | "sort_vhost_logs: could not replace $file with $file.sorted: $!\n"; | 
| 736 |  |  |  |  |  |  |  | 
| 737 | 0 | 0 |  |  |  |  | $self->_progress_continue() if $debug == 1; | 
| 738 |  |  |  |  |  |  | } | 
| 739 |  |  |  |  |  |  |  | 
| 740 | 0 | 0 |  |  |  |  | $self->_progress_end() if $debug == 1; | 
| 741 |  |  |  |  |  |  |  | 
| 742 | 0 |  |  |  |  |  | return 1; | 
| 743 |  |  |  |  |  |  | }; | 
| 744 |  |  |  |  |  |  |  | 
| 745 |  |  |  |  |  |  | sub split_logs_to_vhosts { | 
| 746 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 747 |  |  |  |  |  |  |  | 
| 748 | 0 |  |  |  |  |  | my $debug  = $self->{'debug'}; | 
| 749 | 0 |  |  |  |  |  | my $conf   = $self->{'conf'}; | 
| 750 | 0 |  |  |  |  |  | my $REPORT = $self->{'report'}; | 
| 751 |  |  |  |  |  |  |  | 
| 752 | 0 |  |  |  |  |  | my ( %count, %orphans, $bad ); | 
| 753 |  |  |  |  |  |  |  | 
| 754 | 0 |  |  |  |  |  | my $dir = $conf->{'tmpdir'};      # normally /var/log/(apache|http)/tmp | 
| 755 | 0 |  | 0 |  |  |  | my $countlog = $conf->{'CountLog'} || 1; | 
| 756 |  |  |  |  |  |  |  | 
| 757 | 0 |  |  |  |  |  | my @webserver_logs = $self->get_log_files($dir); | 
| 758 |  |  |  |  |  |  |  | 
| 759 | 0 | 0 |  |  |  |  | if ( !-d "$dir/doms" ) { | 
| 760 | 0 | 0 |  |  |  |  | if ( !mkdir "$dir/doms", oct('0755') ) { | 
| 761 | 0 |  |  |  |  |  | $err = "FATAL: couldn't create $dir/doms: $!\n"; | 
| 762 | 0 |  |  |  |  |  | print $REPORT $err; | 
| 763 | 0 |  |  |  |  |  | die $err; | 
| 764 |  |  |  |  |  |  | } | 
| 765 |  |  |  |  |  |  | } | 
| 766 |  |  |  |  |  |  |  | 
| 767 | 0 | 0 |  |  |  |  | print "\t output working dirs is $dir/doms\n" if $debug > 1; | 
| 768 |  |  |  |  |  |  |  | 
| 769 |  |  |  |  |  |  | # use my Regexp::Log::Monster | 
| 770 | 0 |  |  |  |  |  | my $regexp_parser = Regexp::Log::Monster->new( | 
| 771 |  |  |  |  |  |  | format  => ':logmonster', | 
| 772 |  |  |  |  |  |  | capture => [qw( host vhost status bytes ref ua )], | 
| 773 |  |  |  |  |  |  | ); | 
| 774 |  |  |  |  |  |  |  | 
| 775 |  |  |  |  |  |  | # Apache fields | 
| 776 |  |  |  |  |  |  | # host, ident, auth, date, request, status, bytes, referer, agent, vhost | 
| 777 |  |  |  |  |  |  | # returned from parser (available for capture) as: | 
| 778 |  |  |  |  |  |  | # host, rfc, authuser, date, ts, request, req, status, bytes, referer, ref, useragent, ua, vhost | 
| 779 |  |  |  |  |  |  |  | 
| 780 | 0 |  |  |  |  |  | my @captured_fields = $regexp_parser->capture; | 
| 781 | 0 |  |  |  |  |  | my $re              = $regexp_parser->regexp; | 
| 782 |  |  |  |  |  |  |  | 
| 783 | 0 |  |  |  |  |  | foreach my $file (@webserver_logs) { | 
| 784 |  |  |  |  |  |  |  | 
| 785 | 0 | 0 |  |  |  |  | my $gz = gzopen( $file, 'rb' ) or do { | 
| 786 | 0 |  |  |  |  |  | warn "Couldn't open $file: $gzerrno"; | 
| 787 | 0 |  |  |  |  |  | next; | 
| 788 |  |  |  |  |  |  | }; | 
| 789 |  |  |  |  |  |  |  | 
| 790 | 0 |  |  |  |  |  | my $lines = 0; | 
| 791 | 0 | 0 |  |  |  |  | $self->_progress_begin("\t parsing entries from $file") if $debug; | 
| 792 |  |  |  |  |  |  |  | 
| 793 | 0 |  |  |  |  |  | while ( $gz->gzreadline($_) > 0 ) { | 
| 794 | 0 |  |  |  |  |  | chomp $_; | 
| 795 | 0 |  |  |  |  |  | $lines++; | 
| 796 | 0 | 0 | 0 |  |  |  | $self->_progress_continue() if ( $debug && $lines =~ /00$/ ); | 
| 797 |  |  |  |  |  |  |  | 
| 798 | 0 |  |  |  |  |  | my %data; | 
| 799 | 0 |  |  |  |  |  | @data{@captured_fields} = /$re/;  # no need for /o, a compiled regexp | 
| 800 |  |  |  |  |  |  |  | 
| 801 |  |  |  |  |  |  | # make sure the log format has the vhost tag appended | 
| 802 | 0 |  |  |  |  |  | my $vhost = $data{'vhost'}; | 
| 803 | 0 | 0 | 0 |  |  |  | if ( !$vhost || $vhost eq '-' ) { | 
| 804 |  |  |  |  |  |  | #print "Invalid log entries! Read the FAQ!\n" if $debug; | 
| 805 | 0 | 0 |  |  |  |  | print $_ . "\n" if $debug > 2; | 
| 806 | 0 |  |  |  |  |  | $vhost = $conf->{default_vhost}; | 
| 807 | 0 |  |  |  |  |  | $bad++; | 
| 808 |  |  |  |  |  |  | }; | 
| 809 |  |  |  |  |  |  |  | 
| 810 | 0 |  |  |  |  |  | $vhost = lc($vhost); | 
| 811 |  |  |  |  |  |  |  | 
| 812 | 0 |  |  |  |  |  | $self->spam_check(\%data, \%count); | 
| 813 |  |  |  |  |  |  |  | 
| 814 | 0 | 0 |  |  |  |  | if ( ! $fhs{$vhost} ) { | 
| 815 | 0 |  |  |  |  |  | $self->open_vhost_handle( $vhost ); | 
| 816 |  |  |  |  |  |  | }; | 
| 817 | 0 | 0 |  |  |  |  | if ( $fhs{$vhost} ) { | 
| 818 | 0 |  |  |  |  |  | my $fh = $fhs{$vhost}; | 
| 819 | 0 |  |  |  |  |  | print $fh "$_\n"; | 
| 820 | 0 |  |  |  |  |  | $count{$vhost}++; | 
| 821 | 0 |  |  |  |  |  | next; | 
| 822 |  |  |  |  |  |  | }; | 
| 823 | 0 | 0 |  |  |  |  | print "\nthe main domain for $vhost is missing!\n" if $debug > 1; | 
| 824 | 0 |  |  |  |  |  | $orphans{$vhost} = $vhost; | 
| 825 |  |  |  |  |  |  | }; | 
| 826 | 0 |  |  |  |  |  | $gz->gzclose(); | 
| 827 |  |  |  |  |  |  |  | 
| 828 | 0 | 0 |  |  |  |  | $self->_progress_end() if $debug; | 
| 829 |  |  |  |  |  |  | }; | 
| 830 |  |  |  |  |  |  |  | 
| 831 | 0 |  |  |  |  |  | $self->report_matches( \%count, \%orphans); | 
| 832 | 0 |  |  |  |  |  | $self->report_spam_hits( \%count ); | 
| 833 | 0 |  |  |  |  |  | $self->report_bad_hits( $bad ); | 
| 834 |  |  |  |  |  |  |  | 
| 835 | 0 |  |  |  |  |  | return \%fhs; | 
| 836 |  |  |  |  |  |  | }; | 
| 837 |  |  |  |  |  |  |  | 
| 838 |  |  |  |  |  |  | sub spam_check { | 
| 839 | 0 |  |  | 0 | 0 |  | my ($self, $data, $count) = @_; | 
| 840 | 0 |  |  |  |  |  | my $conf = $self->{conf}; | 
| 841 |  |  |  |  |  |  |  | 
| 842 | 0 | 0 |  |  |  |  | return if ! $conf->{spam_check}; | 
| 843 |  |  |  |  |  |  |  | 
| 844 | 0 |  |  |  |  |  | my $spam_score = 0; | 
| 845 |  |  |  |  |  |  |  | 
| 846 |  |  |  |  |  |  | # check for spam quotient | 
| 847 | 0 | 0 |  |  |  |  | if ( $data->{status} ) { | 
| 848 | 0 | 0 |  |  |  |  | if ( $data->{status} == 404 ) {    # check for 404 status | 
| 849 | 0 |  |  |  |  |  | $spam_score++; # a 404 alone is not a sign of naughtiness | 
| 850 |  |  |  |  |  |  | } | 
| 851 |  |  |  |  |  |  |  | 
| 852 | 0 | 0 |  |  |  |  | if ( $data->{status} == 412 ) { # httpd config slapping them | 
| 853 | 0 |  |  |  |  |  | $spam_score++; | 
| 854 |  |  |  |  |  |  | } | 
| 855 |  |  |  |  |  |  |  | 
| 856 | 0 | 0 |  |  |  |  | if ( $data->{status} == 403 ) { # httpd config slapping them | 
| 857 | 0 |  |  |  |  |  | $spam_score += 2; | 
| 858 |  |  |  |  |  |  | } | 
| 859 |  |  |  |  |  |  | } | 
| 860 |  |  |  |  |  |  |  | 
| 861 |  |  |  |  |  |  | # nearly all of my referer spam has a # ending the referer string | 
| 862 | 0 | 0 | 0 |  |  |  | if ( $data->{ref} && $data->{ref} =~ /#$/ ) { | 
| 863 | 0 |  |  |  |  |  | $spam_score += 2; | 
| 864 |  |  |  |  |  |  | } | 
| 865 |  |  |  |  |  |  |  | 
| 866 |  |  |  |  |  |  | # should check for invalid/suspect useragent strings here | 
| 867 | 0 | 0 |  |  |  |  | if ( $data->{ua} ) { | 
| 868 | 0 | 0 |  |  |  |  | $spam_score += | 
|  |  | 0 |  |  |  |  |  | 
| 869 |  |  |  |  |  |  | $data->{ua} =~ /crazy/ixms ? 1 | 
| 870 |  |  |  |  |  |  | : $data->{ua} =~ /email/i    ? 3 | 
| 871 |  |  |  |  |  |  | #           : $data->{ua} =~ /windows/   ? 1 | 
| 872 |  |  |  |  |  |  | : 0; | 
| 873 |  |  |  |  |  |  | } | 
| 874 |  |  |  |  |  |  |  | 
| 875 |  |  |  |  |  |  | # if we fail more than one spam test... | 
| 876 | 0 | 0 |  |  |  |  | if ( $spam_score > 2 ) { | 
| 877 | 0 |  |  |  |  |  | $count->{spam}++; | 
| 878 | 0 | 0 | 0 |  |  |  | if ( defined $data->{bytes} | 
| 879 |  |  |  |  |  |  | && $data->{bytes} =~ /[0-9]+/ ) | 
| 880 |  |  |  |  |  |  | { | 
| 881 | 0 |  |  |  |  |  | $count->{bytes} += $data->{bytes}; | 
| 882 |  |  |  |  |  |  | } | 
| 883 |  |  |  |  |  |  |  | 
| 884 | 0 |  |  |  |  |  | $count->{spam_agents}{ $data->{ua} }++; | 
| 885 | 0 |  |  |  |  |  | $count->{spam_referers}{ $data->{ref} }++; | 
| 886 |  |  |  |  |  |  |  | 
| 887 |  |  |  |  |  |  | #				printf "%3s - %30s - %30s \n", $data->{status}, | 
| 888 |  |  |  |  |  |  | #				$data->{ref}, $data->{ua}; | 
| 889 | 0 |  |  |  |  |  | next;    # skips processing the line | 
| 890 |  |  |  |  |  |  | } | 
| 891 |  |  |  |  |  |  |  | 
| 892 |  |  |  |  |  |  | # TODO: also keep track of ham referers, and print in referer spam reports, so | 
| 893 |  |  |  |  |  |  | # that I can see which UA are entirely spammers and block them in my Apache | 
| 894 |  |  |  |  |  |  | # config. | 
| 895 |  |  |  |  |  |  | #   else { | 
| 896 |  |  |  |  |  |  | #       $count->{ham_referers}{$data->{ref}}++; | 
| 897 |  |  |  |  |  |  | #   } | 
| 898 |  |  |  |  |  |  | }; | 
| 899 |  |  |  |  |  |  |  | 
| 900 |  |  |  |  |  |  | sub open_vhost_handle { | 
| 901 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 902 | 0 |  |  |  |  |  | my $vhost = shift; | 
| 903 |  |  |  |  |  |  |  | 
| 904 | 0 |  |  |  |  |  | my $fh = new FileHandle;   # create a file handle for each ServerName | 
| 905 | 0 |  |  |  |  |  | $fhs{$vhost} = $fh;         # store in a hash keyed off the domain name | 
| 906 |  |  |  |  |  |  |  | 
| 907 | 0 |  |  |  |  |  | my $debug  = $self->{debug}; | 
| 908 |  |  |  |  |  |  |  | 
| 909 | 0 |  |  |  |  |  | my $dir = $self->{conf}{tmpdir};    # normally /var/log/(apache|http)/tmp | 
| 910 | 0 | 0 |  |  |  |  | open( $fh, '>', "$dir/doms/$vhost" ) and do { | 
| 911 | 0 | 0 |  |  |  |  | if ( $debug > 1 ) { | 
| 912 | 0 |  |  |  |  |  | print "            "; | 
| 913 | 0 |  |  |  |  |  | printf "opening file for %35s...ok\n", $vhost; | 
| 914 |  |  |  |  |  |  | } | 
| 915 | 0 |  |  |  |  |  | return $fh; | 
| 916 |  |  |  |  |  |  | }; | 
| 917 |  |  |  |  |  |  |  | 
| 918 | 0 |  |  |  |  |  | print "            "; | 
| 919 | 0 |  |  |  |  |  | printf "opening file for %35s...FAILED.\n", $vhost; | 
| 920 | 0 |  |  |  |  |  | return; | 
| 921 |  |  |  |  |  |  | }; | 
| 922 |  |  |  |  |  |  |  | 
| 923 |  |  |  |  |  |  | sub report_bad_hits { | 
| 924 | 0 |  |  | 0 | 0 |  | my ($self, $bad) = @_; | 
| 925 |  |  |  |  |  |  |  | 
| 926 | 0 | 0 |  |  |  |  | return if ! $bad; | 
| 927 |  |  |  |  |  |  |  | 
| 928 | 0 |  |  |  |  |  | my $conf     = $self->{conf}; | 
| 929 | 0 |  |  |  |  |  | my $debug    = $self->{debug}; | 
| 930 | 0 |  |  |  |  |  | my $REPORT   = $self->{report}; | 
| 931 |  |  |  |  |  |  |  | 
| 932 | 0 | 0 |  |  |  |  | printf "Default: %15.0f lines to $conf->{default_vhost}.\n", $bad if $debug; | 
| 933 | 0 |  |  |  |  |  | my $msg = "\nSee the FAQ (logging) to see why records get assigned to the default vhost.\n\n"; | 
| 934 | 0 | 0 |  |  |  |  | print $msg if $debug; | 
| 935 | 0 |  |  |  |  |  | print $REPORT $msg; | 
| 936 |  |  |  |  |  |  | }; | 
| 937 |  |  |  |  |  |  |  | 
| 938 |  |  |  |  |  |  | sub report_matches { | 
| 939 | 0 |  |  | 0 | 0 |  | my ($self, $count, $orphans ) = @_; | 
| 940 |  |  |  |  |  |  |  | 
| 941 | 0 |  |  |  |  |  | my $debug    = $self->{debug}; | 
| 942 | 0 |  |  |  |  |  | my $conf     = $self->{conf}; | 
| 943 | 0 |  |  |  |  |  | my $REPORT   = $self->{report}; | 
| 944 | 0 |  | 0 |  |  |  | my $countlog = $conf->{CountLog} || 1; | 
| 945 |  |  |  |  |  |  |  | 
| 946 | 0 | 0 |  |  |  |  | print "\n\t\t\t Matched Entries\n\n" if $debug; | 
| 947 | 0 |  |  |  |  |  | print $REPORT "\n\t\t Matched Entries\n\n"; | 
| 948 |  |  |  |  |  |  |  | 
| 949 | 0 |  |  |  |  |  | my $HitLog = ''; | 
| 950 | 0 | 0 |  |  |  |  | $HitLog = $self->report_open("HitsPerVhost") if $countlog; | 
| 951 |  |  |  |  |  |  |  | 
| 952 | 0 |  |  |  |  |  | foreach my $key ( keys %fhs ) { | 
| 953 | 0 |  |  |  |  |  | close $fhs{$key}; | 
| 954 |  |  |  |  |  |  |  | 
| 955 | 0 | 0 |  |  |  |  | if ( $count->{$key} ) { | 
| 956 | 0 | 0 |  |  |  |  | printf "         %15.0f lines to %s\n", $count->{$key}, $key if $debug; | 
| 957 | 0 |  |  |  |  |  | printf $REPORT "         %15.0f lines to %s\n", $count->{$key}, $key; | 
| 958 | 0 | 0 |  |  |  |  | print $HitLog "$key:$count->{$key}\n" if $countlog; | 
| 959 |  |  |  |  |  |  | } | 
| 960 |  |  |  |  |  |  | } | 
| 961 | 0 | 0 |  |  |  |  | $self->report_close( $HitLog, $debug ) if $countlog; | 
| 962 |  |  |  |  |  |  |  | 
| 963 | 0 | 0 |  |  |  |  | print "\n" if $debug; | 
| 964 | 0 |  |  |  |  |  | print $REPORT "\n"; | 
| 965 |  |  |  |  |  |  |  | 
| 966 | 0 |  |  |  |  |  | foreach my $key ( keys %$orphans ) { | 
| 967 | 0 | 0 |  |  |  |  | if ( $count->{$key} ) { | 
| 968 | 0 | 0 |  |  |  |  | printf "Orphans: %15.0f lines to %s\n", $count->{$key}, $key if $debug; | 
| 969 | 0 |  |  |  |  |  | printf $REPORT "Orphans: %15.0f lines to %s\n", $count->{$key}, $key; | 
| 970 |  |  |  |  |  |  | } | 
| 971 |  |  |  |  |  |  | } | 
| 972 |  |  |  |  |  |  | }; | 
| 973 |  |  |  |  |  |  |  | 
| 974 |  |  |  |  |  |  | sub report_spam_hits { | 
| 975 | 0 |  |  | 0 | 1 |  | my ($self, $count ) = @_; | 
| 976 |  |  |  |  |  |  |  | 
| 977 | 0 | 0 |  |  |  |  | return if ! $count->{spam}; | 
| 978 |  |  |  |  |  |  |  | 
| 979 | 0 |  |  |  |  |  | my $conf  = $self->{conf}; | 
| 980 | 0 |  |  |  |  |  | my $debug = $self->{debug}; | 
| 981 |  |  |  |  |  |  |  | 
| 982 | 0 | 0 |  |  |  |  | if ( $conf->{report_spam_user_agents} ) { | 
| 983 |  |  |  |  |  |  |  | 
| 984 | 0 | 0 |  |  |  |  | if ( $debug ) { | 
| 985 | 0 |  |  |  |  |  | printf "Referer spammers hit you $count->{spam} times"; | 
| 986 |  |  |  |  |  |  |  | 
| 987 | 0 |  |  |  |  |  | my $bytes = $count->{bytes}; | 
| 988 | 0 | 0 |  |  |  |  | if ( $bytes ) { | 
| 989 | 0 | 0 |  |  |  |  | if    ( $bytes > 1000000000 ) { | 
|  |  | 0 |  |  |  |  |  | 
| 990 | 0 |  |  |  |  |  | $bytes = sprintf "%.2f GB", $bytes / 1000000000; | 
| 991 |  |  |  |  |  |  | } | 
| 992 |  |  |  |  |  |  | elsif ( $bytes > 1000000 ) { | 
| 993 | 0 |  |  |  |  |  | $bytes = sprintf "%.2f MB", $bytes / 1000000; | 
| 994 |  |  |  |  |  |  | } | 
| 995 |  |  |  |  |  |  | else { | 
| 996 | 0 |  |  |  |  |  | $bytes = sprintf "%.2f KB", $bytes / 1000; | 
| 997 |  |  |  |  |  |  | } | 
| 998 |  |  |  |  |  |  |  | 
| 999 | 0 |  |  |  |  |  | print " and wasted $bytes of your bandwidth."; | 
| 1000 |  |  |  |  |  |  | } | 
| 1001 | 0 |  |  |  |  |  | print "\n\n"; | 
| 1002 |  |  |  |  |  |  | }; | 
| 1003 |  |  |  |  |  |  |  | 
| 1004 | 0 |  |  |  |  |  | my $REPORT = $self->{report}; | 
| 1005 | 0 |  |  |  |  |  | printf $REPORT "Referer Spam: %15.0f lines\n", $count->{spam}; | 
| 1006 |  |  |  |  |  |  |  | 
| 1007 | 0 |  |  |  |  |  | my $spamagents = $count->{spam_agents}; | 
| 1008 | 0 |  |  |  |  |  | foreach my $value ( sort { $spamagents->{$b} cmp $spamagents->{$a} } keys %$spamagents ) { | 
|  | 0 |  |  |  |  |  |  | 
| 1009 | 0 |  |  |  |  |  | print "\t $spamagents->{$value} \t $value\n"; | 
| 1010 |  |  |  |  |  |  | } | 
| 1011 |  |  |  |  |  |  | } | 
| 1012 |  |  |  |  |  |  |  | 
| 1013 | 0 | 0 |  |  |  |  | if ( $conf->{report_spam_referrers} ) { # This report can get very long | 
| 1014 | 0 |  |  |  |  |  | my $sr = $count->{spam_referers}; | 
| 1015 | 0 |  |  |  |  |  | foreach ( sort { $sr->{$b} <=> $sr->{$a} } keys %$sr ) { | 
|  | 0 |  |  |  |  |  |  | 
| 1016 | 0 |  |  |  |  |  | print "$sr->{$_} \t $_\n"; | 
| 1017 |  |  |  |  |  |  | } | 
| 1018 |  |  |  |  |  |  | } | 
| 1019 |  |  |  |  |  |  | } | 
| 1020 |  |  |  |  |  |  |  | 
| 1021 |  |  |  |  |  |  |  | 
| 1022 |  |  |  |  |  |  | sub _progress { | 
| 1023 | 0 |  |  | 0 |  |  | my ($self, $mess) = @_; | 
| 1024 | 0 |  |  |  |  |  | print {*STDERR} "$mess.\n"; | 
|  | 0 |  |  |  |  |  |  | 
| 1025 | 0 |  |  |  |  |  | return; | 
| 1026 |  |  |  |  |  |  | }; | 
| 1027 |  |  |  |  |  |  | sub _progress_begin { | 
| 1028 | 0 |  |  | 0 |  |  | my ($self, $phase) = @_; | 
| 1029 | 0 |  |  |  |  |  | print {*STDERR} "$phase..."; | 
|  | 0 |  |  |  |  |  |  | 
| 1030 | 0 |  |  |  |  |  | return; | 
| 1031 |  |  |  |  |  |  | }; | 
| 1032 |  |  |  |  |  |  | sub _progress_continue { | 
| 1033 | 0 |  |  | 0 |  |  | print {*STDERR} '.'; | 
|  | 0 |  |  |  |  |  |  | 
| 1034 | 0 |  |  |  |  |  | return; | 
| 1035 |  |  |  |  |  |  | }; | 
| 1036 |  |  |  |  |  |  | sub _progress_end { | 
| 1037 | 0 |  |  | 0 |  |  | my ($self,$mess) = @_; | 
| 1038 | 0 | 0 |  |  |  |  | if ( $mess ) { | 
| 1039 | 0 |  |  |  |  |  | print {*STDERR} "$mess\n"; | 
|  | 0 |  |  |  |  |  |  | 
| 1040 |  |  |  |  |  |  | } | 
| 1041 |  |  |  |  |  |  | else { | 
| 1042 | 0 |  |  |  |  |  | print {*STDERR} "done\n"; | 
|  | 0 |  |  |  |  |  |  | 
| 1043 |  |  |  |  |  |  | }; | 
| 1044 | 0 |  |  |  |  |  | return; | 
| 1045 |  |  |  |  |  |  | }; | 
| 1046 |  |  |  |  |  |  |  | 
| 1047 |  |  |  |  |  |  | 1; | 
| 1048 |  |  |  |  |  |  | __END__ |