http://paperlined.org/rss/feeds/dailyrotten.gen.pl
#! /usr/bin/env perl
BEGIN{$^W=1} use strict;
use HTML::Entities;
use LWP::Simple 'get';
use POSIX;
use Storable;
use Time::Zone;
# Load the persistent data
my $persistent = PersistentHash::tie_storable("dailyrotten.db");
#############################################
# Load all stories on main page
#############################################
my $stories = {};
my $page = LWP::Simple::get("http://www.dailyrotten.com/")
or die "Unable to load dailyrotten.com: $!";
$page =~ s/^.*<!---+ BUZZ ---+?>.*?<table.*?>//si;
my $order = 0;
foreach my $story_text ($page =~ m#<td colspan="2">(.*?)</td>#gsi) {
print('-'x80, "\n", $story_text, "\n") if (-t STDOUT);
my $story = {};
if ($story_text !~ m#<a href="([^"]+)" target="?_blank"? class="newslink">(.*?)</a>#si) {
die "Unexpected text, dailyrotten.com likely changed HTML format.";
}
$story->{link} = $1;
$story->{title} = HTML::Entities::encode_entities($2);
#if ($story_text !~ m#<font[^>]*>([^|<>]+) | Submitted by:#i) {
# die "Unexpected text, dailyrotten.com likely changed HTML format.";
#}
#$story->{creator} = $1;
$story->{creator} = "dailyrotten.com";
my $descr = $story_text;
$descr =~ s/^.*<font.*?>\s*//si;
$descr =~ s/^.*?\n"//si;
$descr =~ s/\s*<\/font>\s*$//si;
$descr =~ s/"$//s;
$story->{description} = HTML::Entities::encode_entities($descr);
#if ($story_text !~ m|Submitted by:.*?<font face="Verdana,Arial,Helvetica,sans-serif" color="#000000">.*?"(.*?)"[\s\n]+</font>|si) {
#if ($story_text !~ m|Submitted by:.*?<font face="Verdana,Arial,Helvetica,sans-serif" color="#000000">.*?"(.*?)"[\s\n]+</font>|si) {
# die "Unexpected text, dailyrotten.com likely changed HTML format.";
#}
#$story->{description} = HTML::Entities::encode_entities($1);
$story->{order} = $order++;
$stories->{ $story->{link} } = $story;
last if ($order >= 15); # since the .rss is public, it can potentially add up to a lot of traffic.
# if you're hosting a private .rss, you'll probably want to remove this.
}
#############################################
# Attach date/times
#############################################
# Pull times from persistent data
my $num_unattached = 0;
foreach my $link (keys %$stories) {
if (exists $persistent->{$link}) {
$stories->{$link}{date} = $persistent->{$link};
} else {
$num_unattached++;
}
}
# If there are too many unattached, set the new time back a day
my $new_time = time();
if ($num_unattached > 6) {
$new_time -= 24*60*60;
}
my $new_rss_time = rsstime($new_time);
# Attach the new time to new stories
foreach my $link (keys %$stories) {
if (!exists $stories->{$link}{date}) {
$stories->{$link}{date} = $new_rss_time;
$persistent->{$link} = $new_rss_time;
}
}
# Clean out the persistent data once the story has moved off the main page
foreach my $link (keys %$persistent) {
if (! exists $stories->{$link} ) {
delete $persistent->{$link};
}
}
#############################################
# Output RSS file
#############################################
open RSS, ">/home/interiot/www/rss/feeds/dailyrotten.rss" or die "Unable to write to /home/interiot/www/rss/feeds/dailyrotten.rss: $!";
print RSS <<"EOF";
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
>
<channel rdf:about="http://paperlined.org/rss/feeds/dailyrotten.rss">
<title>Newcum's DailyRotten Feed</title>
<link>http://www.dailyrotten.com/</link>
<description>David Newcum's RSS Feed of dailyrotten.com. Contact rss_feeds\@paperlined.org for change requests.</description>
<language>en-us</language>
</channel>
EOF
foreach my $story (sort {$a->{order} <=> $b->{order}} values %$stories)
{
print RSS <<"EOF";
<item rdf:about="$story->{link}">
<title>$story->{title}</title>
<link>$story->{link}</link>
<description>$story->{description}</description>
<dc:creator>$story->{creator}</dc:creator>
<dc:date>$story->{date}</dc:date>
</item>
EOF
}
print RSS "</rdf:RDF>\n";
sub rsstime {
my $epoc_seconds = shift;
my $tz_offset = Time::Zone::tz_local_offset();
my $timezone = sprintf("%d:%02d", $tz_offset/3600, ($tz_offset/60)% 60);
$timezone = "+$timezone" if ($tz_offset >= 0);
return(POSIX::strftime("%Y-%m-%dT%H:%M", localtime($epoc_seconds)) . $timezone);
}
###################################################################################################################
###################################################################################################################
package PersistentHash;
use File::Basename;
use File::Spec;
use Storable;
## pass in a filename, get back the hashref from that file (or an empty hashref if the file doesn't exist yet)
## If it's a relative filename, make it relative to where the current script lives, not relative to the current directory.
## Each hashref is tied to its original filename, and will be written back out once the tied var goes out of scope.
sub tie_storable {
my $filename = File::Spec->rel2abs(shift, dirname $0);
my ($persist, %tied) = (-e $filename) ? retrieve $filename : {};
tie %tied, 'PersistentHash', $persist, $filename; # $self = [$persist, $filename]
return \%tied;
}
sub DESTROY { store @{$_[0]} } # see tie(...) above
sub TIEHASH { bless \@_, shift } # see tie(...) above
# From Tie::ExtraHash
sub STORE { $_[0][0]{$_[1]} = $_[2] }
sub FETCH { $_[0][0]{$_[1]} }
sub FIRSTKEY { my $a = scalar keys %{$_[0][0]}; each %{$_[0][0]} }
sub NEXTKEY { each %{$_[0][0]} }
sub EXISTS { exists $_[0][0]->{$_[1]} }
sub DELETE { delete $_[0][0]->{$_[1]} }
sub CLEAR { %{$_[0][0]} = () }
Generated by GNU enscript 1.6.4.