http://paperlined.org/rss/feeds/dailyrotten.gen.pl

#! /usr/bin/env perl
BEGIN{$^W=1}  use strict;

use HTML::Entities;
use LWP::Simple 'get';
use POSIX;
use Storable;
use Time::Zone;

# Load the persistent data
my $persistent = PersistentHash::tie_storable("dailyrotten.db");

#############################################
# Load all stories on main page
#############################################
my $stories = {};

my $page = LWP::Simple::get("http://www.dailyrotten.com/")
	or die "Unable to load dailyrotten.com: $!";

$page =~ s/^.*<!---+ BUZZ ---+?>.*?<table.*?>//si;

my $order = 0;
foreach my $story_text ($page =~ m#<td colspan="2">(.*?)</td>#gsi) {
	print('-'x80, "\n", $story_text, "\n") if (-t STDOUT);

	my $story = {};

	if ($story_text !~ m#<a href="([^"]+)" target="?_blank"? class="newslink">(.*?)</a>#si) {
		die "Unexpected text, dailyrotten.com likely changed HTML format.";
	}
	$story->{link} = $1;
	$story->{title} = HTML::Entities::encode_entities($2);

	#if ($story_text !~ m#<font[^>]*>([^|<>]+) | Submitted by:#i) {
	#	die "Unexpected text, dailyrotten.com likely changed HTML format.";
	#}
	#$story->{creator} = $1;
	$story->{creator} = "dailyrotten.com";

	my $descr = $story_text;
	$descr =~ s/^.*<font.*?>\s*//si;
	$descr =~ s/^.*?\n"//si;
	$descr =~ s/\s*<\/font>\s*$//si;
	$descr =~ s/"$//s;
	$story->{description} = HTML::Entities::encode_entities($descr);

	#if ($story_text !~ m|Submitted by:.*?<font face="Verdana,Arial,Helvetica,sans-serif" color="#000000">.*?"(.*?)"[\s\n]+</font>|si) {
	#if ($story_text !~ m|Submitted by:.*?<font face="Verdana,Arial,Helvetica,sans-serif" color="#000000">.*?"(.*?)"[\s\n]+</font>|si) {
	#	die "Unexpected text, dailyrotten.com likely changed HTML format.";
	#}
	#$story->{description} = HTML::Entities::encode_entities($1);

	$story->{order} = $order++;

	$stories->{ $story->{link} }  =  $story;

	last if ($order >= 15);		# since the .rss is public, it can potentially add up to a lot of traffic.
					# if you're hosting a private .rss, you'll probably want to remove this.
}


#############################################
# Attach date/times
#############################################
# Pull times from persistent data
my $num_unattached = 0;
foreach my $link (keys %$stories) {
	if (exists $persistent->{$link}) {
		$stories->{$link}{date} = $persistent->{$link};
	} else {
		$num_unattached++;
	}
}

# If there are too many unattached, set the new time back a day
my $new_time = time();
if ($num_unattached > 6) {
	$new_time -= 24*60*60;
}
my $new_rss_time = rsstime($new_time);

# Attach the new time to new stories
foreach my $link (keys %$stories) {
	if (!exists $stories->{$link}{date}) {
		$stories->{$link}{date} = $new_rss_time;
		$persistent->{$link}  	= $new_rss_time;
	}
}

# Clean out the persistent data once the story has moved off the main page
foreach my $link (keys %$persistent) {
	if (! exists $stories->{$link} ) {
		delete $persistent->{$link};
	}
}


#############################################
# Output RSS file
#############################################
open RSS, ">/home/interiot/www/rss/feeds/dailyrotten.rss"	or die "Unable to write to /home/interiot/www/rss/feeds/dailyrotten.rss: $!";
print RSS <<"EOF";
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  xmlns="http://purl.org/rss/1.0/"
  xmlns:dc="http://purl.org/dc/elements/1.1/"
>
        <channel rdf:about="http://paperlined.org/rss/feeds/dailyrotten.rss">
                <title>Newcum's DailyRotten Feed</title>
                <link>http://www.dailyrotten.com/</link>
                <description>David Newcum's RSS Feed of dailyrotten.com.  Contact rss_feeds\@paperlined.org for change requests.</description>
                <language>en-us</language>
        </channel>
EOF

foreach my $story (sort {$a->{order} <=> $b->{order}} values %$stories)
{
	print RSS <<"EOF";
        <item rdf:about="$story->{link}">
                <title>$story->{title}</title>
                <link>$story->{link}</link>
                <description>$story->{description}</description>
                <dc:creator>$story->{creator}</dc:creator>
                <dc:date>$story->{date}</dc:date>
        </item>
EOF
}
print RSS "</rdf:RDF>\n";



sub rsstime {
	my $epoc_seconds = shift;

	my $tz_offset = Time::Zone::tz_local_offset();
	my $timezone = sprintf("%d:%02d", $tz_offset/3600, ($tz_offset/60)% 60);
	$timezone = "+$timezone" if ($tz_offset >= 0);

	return(POSIX::strftime("%Y-%m-%dT%H:%M", localtime($epoc_seconds)) . $timezone);
}


###################################################################################################################
###################################################################################################################
package PersistentHash;

use File::Basename;
use File::Spec;
use Storable;

## pass in a filename, get back the hashref from that file (or an empty hashref if the file doesn't exist yet)
## If it's a relative filename, make it relative to where the current script lives, not relative to the current directory.
## Each hashref is tied to its original filename, and will be written back out once the tied var goes out of scope.
sub tie_storable {		
	my $filename = File::Spec->rel2abs(shift, dirname $0);
	my ($persist, %tied) = (-e $filename) ? retrieve $filename : {};
	tie %tied, 'PersistentHash',      $persist, $filename;		# $self = [$persist, $filename]
	return \%tied;
}
sub DESTROY  { store @{$_[0]} }			# see tie(...) above
sub TIEHASH  { bless \@_, shift }		# see tie(...) above
# From Tie::ExtraHash
sub STORE    { $_[0][0]{$_[1]} = $_[2] }
sub FETCH    { $_[0][0]{$_[1]} }
sub FIRSTKEY { my $a = scalar keys %{$_[0][0]}; each %{$_[0][0]} }
sub NEXTKEY  { each %{$_[0][0]} }
sub EXISTS   { exists $_[0][0]->{$_[1]} }
sub DELETE   { delete $_[0][0]->{$_[1]} }
sub CLEAR    { %{$_[0][0]} = () }

Generated by GNU enscript 1.6.4.