http://paperlined.org/dev/src/pl/apache/parse_logs/walgreens.pl

#!/usr/bin/perl

# Guestimate when coworkers had dropped by


    use strict;
    use warnings;

    use Data::Dumper;
    use ParseLog;
    use Socket;
    #use Search::Binary;

    sub hashpopsort { my $h=shift; return sort {$h->{$b} <=> $h->{$a}} keys %$h}

my $fh;
open $fh, "</var/log/apache/interiot/paperlined.org-combined.log" or die;

seek($fh, -9000000, 2);     # like tail -f, but byte-oriented, not line-oriented  (much cheaper)
my $l = <$fh>;      # discard this half-line


my %ips;
my %ip_pages;
my %ip_uas;
while (my $rec = ParseLog::parseCombined($fh, "paperlined.org")) {
    next if (ParseLog::is_me($rec));
    next if (ParseLog::is_search_spider($rec));
    next if ($rec->{url} =~ m#reference/clearcase#si);      # seems like it's somebody else, but I could be wrong...
    next if (ParseLog::is_embedded_content($rec));
    next if (ParseLog::is_stupid_file($rec));
    if ($rec->{userid} eq '-') {
        #next unless (ParseLog::is_friends_static_ip($rec));
        next if (ParseLog::is_individual_rss_reader($rec));
    }
    #next unless ($rec->{useragent} =~ m#^Mozilla\/4\.0 \(compatible;\)$#i);      # The firewall forces all useragents to this

    # these IP addresses were scraped ex-post-facto from the Apache logs (based on the telltale UserAgent,
    # above), not discovered while still employed (using insider knowledge)
    next unless ($rec->{ipaddr} eq "63.73.199.69");

        # 208.12.121.254 = Northrop Grumman

    next if ($rec->{useragent} =~ m#^Mozilla\/4\.0 \(compatible;\)$#i);      # this is actually the cache autonomously refreshing itself

    #print $rec->{line}, "\n";
    printf "%-20s %-10s %-10s %-70s %s\n",
            $rec->{ipaddr},
            $rec->{textdate},
            $rec->{userid},
            $rec->{url},
            $rec->{useragent};
}

Generated by GNU enscript 1.6.4.