http://paperlined.org/dev/src/pl/ahk/parse_post_chunks.pl
#!/usr/bin/perl
# <short description of program>
use strict;
use warnings;
use Data::Dumper;
my @files = map {s#^chunks/##;$_} glob "chunks/*";
#open FOUT, ">/home/interiot/public_html/apps/autohotkey/AutoHotkey_Chris_posts.txt" or die $!; select FOUT;
my $lyear = "";
foreach my $id (sort {$b <=> $a} @files) {
open FIN, "<chunks/$id" or die $!;
my $chunk = do {local $/=undef; <FIN>};
close FIN;
my $url = "http://www.autohotkey.com/forum/post-$id.html#$id";
my ($date) = ($chunk =~ /Posted: \S+ (\S+ \S+ \S+)/si);
#my ($title) = ($chunk =~ m#class="postdetails">([^<]+)</a>#si);
my ($title) = ($chunk =~ m#class="topictitle">([^<]+)</a>#si);
my ($body) = ($chunk =~ m#<span class="postbody">(.*)</span></td>#si);
my ($year) = ($date =~ /(\d\d\d\d)/si);
if ($year ne $lyear) {
close FOUT;
open FOUT, ">/home/interiot/public_html/apps/autohotkey/AutoHotkey_Chris_posts_$year.txt" or die $!;
}
$lyear = $year;
$body = massage_body($body);
$title = massage_body($title);
print FOUT "==================================================================================\n";
print FOUT "$title\t$date\t$url\n\n";
print FOUT "$body\n\n";
}
sub massage_body {
local $_ = shift;
s/[\n\r]//gs;
s#<br />#\n#gs;
s/"/"/gs;
s/>/>/gs;
s/</</gs;
s/&#(\d{1,2}|1\d\d);/chr($1)/gse;
s/&/&/gs;
return $_;
}
Generated by GNU enscript 1.6.4.