http://paperlined.org/dev/src/pl/ahk/parse_post_chunks.pl

#!/usr/bin/perl

# <short description of program>

    use strict;
    use warnings;

    use Data::Dumper;

my @files = map {s#^chunks/##;$_} glob "chunks/*";

#open FOUT, ">/home/interiot/public_html/apps/autohotkey/AutoHotkey_Chris_posts.txt" or die $!;  select FOUT;

my $lyear = "";
foreach my $id (sort {$b <=> $a} @files) {
    open FIN, "<chunks/$id" or die $!;
    my $chunk = do {local $/=undef; <FIN>};
    close FIN;

    my $url = "http://www.autohotkey.com/forum/post-$id.html#$id";
    my ($date) = ($chunk =~ /Posted: \S+ (\S+ \S+ \S+)/si);
    #my ($title) = ($chunk =~ m#class="postdetails">([^<]+)</a>#si);
    my ($title) = ($chunk =~ m#class="topictitle">([^<]+)</a>#si);
    my ($body) = ($chunk =~ m#<span class="postbody">(.*)</span></td>#si);
    my ($year) = ($date =~ /(\d\d\d\d)/si);

    if ($year ne $lyear) {
        close FOUT;
        open FOUT, ">/home/interiot/public_html/apps/autohotkey/AutoHotkey_Chris_posts_$year.txt" or die $!;
    }
    $lyear = $year;

    $body = massage_body($body);
    $title = massage_body($title);

    print FOUT "==================================================================================\n";
    print FOUT "$title\t$date\t$url\n\n";
    print FOUT "$body\n\n";
}

sub massage_body {
    local $_ = shift;

    s/[\n\r]//gs;
    s#<br />#\n#gs;
    s/&quot;/"/gs;
    s/&gt;/>/gs;
    s/&lt;/</gs;
    s/&#(\d{1,2}|1\d\d);/chr($1)/gse;
    s/&amp;/&/gs;

    return $_;
}

Generated by GNU enscript 1.6.4.