http://paperlined.org/dev/src/pl/apache/parse_logs/walgreens.pl
#!/usr/bin/perl
# Guestimate when coworkers had dropped by
use strict;
use warnings;
use Data::Dumper;
use ParseLog;
use Socket;
#use Search::Binary;
sub hashpopsort { my $h=shift; return sort {$h->{$b} <=> $h->{$a}} keys %$h}
my $fh;
open $fh, "</var/log/apache/interiot/paperlined.org-combined.log" or die;
seek($fh, -9000000, 2); # like tail -f, but byte-oriented, not line-oriented (much cheaper)
my $l = <$fh>; # discard this half-line
my %ips;
my %ip_pages;
my %ip_uas;
while (my $rec = ParseLog::parseCombined($fh, "paperlined.org")) {
next if (ParseLog::is_me($rec));
next if (ParseLog::is_search_spider($rec));
next if ($rec->{url} =~ m#reference/clearcase#si); # seems like it's somebody else, but I could be wrong...
next if (ParseLog::is_embedded_content($rec));
next if (ParseLog::is_stupid_file($rec));
if ($rec->{userid} eq '-') {
#next unless (ParseLog::is_friends_static_ip($rec));
next if (ParseLog::is_individual_rss_reader($rec));
}
#next unless ($rec->{useragent} =~ m#^Mozilla\/4\.0 \(compatible;\)$#i); # The firewall forces all useragents to this
# these IP addresses were scraped ex-post-facto from the Apache logs (based on the telltale UserAgent,
# above), not discovered while still employed (using insider knowledge)
next unless ($rec->{ipaddr} eq "63.73.199.69");
# 208.12.121.254 = Northrop Grumman
next if ($rec->{useragent} =~ m#^Mozilla\/4\.0 \(compatible;\)$#i); # this is actually the cache autonomously refreshing itself
#print $rec->{line}, "\n";
printf "%-20s %-10s %-10s %-70s %s\n",
$rec->{ipaddr},
$rec->{textdate},
$rec->{userid},
$rec->{url},
$rec->{useragent};
}
Generated by GNU enscript 1.6.4.