#!/usr/bin/perl -T #$Id: sobt.pl,v 1.31 2005/06/21 06:09:07 andrewfresh Exp $ ######################################################################## # sobt.pl *** Synchronizes current OpenBSD torrents from # http://OpenBSD.somedomain.net/ # # Copyright (c) 2005 # Andrew Fresh. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # ######################################################################## use strict; use warnings; # $HOME can be overridden, but is normally gotten from the environment in a # BEGIN block below. my $HOME; my @URLs = ( # This one syncs with the latest release torrents 'http://OpenBSD.somedomain.net/latest_release.rss', # Uncomment this one (2 lines) to for snapshots for alpha, amd64 and i386 #'http://OpenBSD.somedomain.net/rss.php?' . # 'arch=i386&arch=alpha&arch=amd64&version=snapshots', # Uncomment to sync with the latest 25 torrents #'http://OpenBSD.somedomain.net/index.rss', # Uncomment to sync with all available torrents #'http://OpenBSD.somedomain.net/all.rss', ); # This is where the torrents go my $Torrentdir = $HOME . '/torrents'; # This file keeps track of the torrents that have been downloaded my $Datafile = $HOME . '/.sobtdata'; # Set this to 0 to re-download deleted torrents, # set to 1 to trust the text list of archived files. my $Trust_Datafile = 0; =pod =head1 NAME sobt.pl - Synchronizes current OpenBSD torrents from http://OpenBSD.somedomain.net/ =head1 DESCRIPTION When new files are released by the OpenBSD team, new torrents are generated. Keeping your local files synchronized with what is out there can be time consuming. This script is here to help. It will read the RSS feeds available from http://OpenBSD.somedomain.net, make sure you have the newest torrent for each directory you want, and get rid of any old versions. This script does not do anything with the torrents other than sync them. If you want to actually download the files in the torrent you will need another program in addition to this script. I have tested it with C from the official BitTorrent client, but any BitTorrent client that can watch a directory for torrents should work. A limitation of this script is that it will not clean up torrents that you no longer want. For example, when the latest version changes from 3.7 to 3.8, it will download the new 3.8 torrents, but it will no longer update the 3.7 torrents or delete them. The same is true if you remove an RSS feed from the list of URLs. If you want them deleted, you will have to do it manually. =head1 REQUIREMENTS This script should run on any version of perl that supports the required modules. The only required modules that are not part of the base perl install are XML::RAI and LWP::Simple. It has been successfully tested on OpenBSD 3.7, OS X 10.2 and Windows 2000 with Activeperl 5.8.7 build 813. =head1 SYNOPSIS =head2 Installation This script is designed to be run regularly from cron (or the Windows task scheduler). It should not need to run more than every four hours because that is as often as the RSS feeds are updated. The file can be saved anywhere, for instance, $HOME/bin. It runs under taint mode normally, so if it is running in Windows, it will need to be run with perl -T sobt.pl, or remove the -T from the first line in the script. It can be put in cron like this: MIN */4 * * * sobt.pl > /dev/null Where MIN is a number between 6 and 54 to even the load on the server. The script outputs newly downloaded and deleted files on STDERR so cron should send you an e-mail whenever files are updated. =head2 Changing Settings Changing the settings that control the operation of this script can be done by changing variables directly in the script. There are only a few settings available, and most of them can be left at their default settings. =over =item URLS TO DOWNLOAD - @URLs This is a list of the rss feeds to be synchronized. It is stored in the @URLs array. It defaults to downloading all torrents for the latest release from http://OpenBSD.somedomain.net/latest_release.rss A valid url for the array can either be one of the special mod_rewrite *.rss feeds, or one of the normal rss.php feeds. =item TORRENT DIRECTORY - $Torrendir This should point to the location where the dowmnladed torrents should end up. It is controlled by the $Torrentdir variable. The default is a directory named 'torrent' in $HOME. =item DATA FILE - $Datafile The default setting here should be OK. It is the location of the file where a list of the latest torrents you have downloaded are stored. This setting is controlled by the $Datafile variable. It defaults to $HOME/.sobtdata =item TRUST DATA FILE - $Trust_Datafile Normally the script will check to see if a file it thinks it has downloaded exists and if not it will redownload it. If you do not want this feature, for example if your torrent client deletes the torrents after it is done with them, you can set $Trust_Datafile to 1 to trust what is in the Datafile and not redownload the torrents. =back =head1 LINKS =over =item OpenBSD http://www.OpenBSD.org =item BitTorrent http://www.bittorrent.org =item OpenBSD Torrents http://OpenBSD.somedomain.net =item Some OpenBSD Torrent RSS feeds Use any rss feed from http://OpenBSD.somedomain.net =over =item All Torrents http://OpenBSD.somedomain.net/all.rss =item Latest Release http://OpenBSD.somedomain.net/latest_release.rss =item Recent Torrents http://OpenBSD.somedomain.net/index.rss =item All torrents for Zaurus http://OpenBSD.somedomain.net/arch_zaurus.rss Replace 'zaurus' in the url with a different architecture. =item Latest release torrents for i386 http://OpenBSD.somedomain.net/latest+release_i386.rss Replace 'i386' in the url with a different architecture and 'latest+release' with a different version. =item Snapshots for amd64 http://OpenBSD.somedomain.net/index.php?arch=amd64&version=snapshots Replace 'amd64' in the url with a different architecture and 'snapshots' with a different version. =back =back =head1 AUTHOR Andrew Fresh =head1 COPYRIGHT Copyright E 2005 Andrew Fresh. All rights reserved. Licensed under a two clause BSD style license, the full text of which can be found in the source of the main script. C<$Id: sobt.pl,v 1.31 2005/06/21 06:09:07 andrewfresh Exp $> =cut use XML::RAI; use LWP::Simple qw/ $ua get getstore is_success /; $ua->agent('OpenBSDTorrentSyncer/0.1'); use Fcntl ':flock'; use File::Basename; use Cwd 'abs_path'; use File::Temp qw/ tempdir tempfile /; use File::Copy; use Time::Local; BEGIN { $HOME = abs_path( $ENV{HOME} || $ENV{HOMEPATH} ); if ($HOME !~ m#[/\\]\.\.# && $HOME =~ m#^([/\\].*|\w:.*)$#) { $HOME = $1; } else { $HOME = '.'; } } unless (-d $Torrentdir) { mkdir $Torrentdir || die "Couldn't mkdir '$Torrentdir': $!"; } my $Tempdir = tempdir('OpenBSDTorrents-XXXXXXXX', CLEANUP => 1, TMPDIR => 1); foreach my $url (@URLs) { my $content = get($url) or die "Couldn't retrieve '$url'"; my $rai = XML::RAI->parse($content); print scalar $rai->channel->title, "\n"; if (@{ $rai->{'__items'} } <= 1) { # Workaround for broken XML::RAI 0.51 print STDERR "No items in feed\n"; next; } foreach my $item (@{ $rai->items }) { print "Checking: ", basename(scalar $item->link), "\n"; unless (IsDownloaded($item)) { Download($item); } } print "\n"; } SyncDatafile(); exit 0; { my %dl; # Downloaded sub IsDownloaded { my $item = shift; unless (%dl) { SyncDatafile(); } my $itemname = FixName(scalar $item->link); if ( exists $dl{$item->identifier} && ( $Trust_Datafile || (-e $Torrentdir . '/' . $dl{$item->identifier}{'filename'}) ) ) { print "\tSkipping, already downloaded '$itemname' with this hash\n"; return 1; } foreach my $hash (keys %dl) { unless ($dl{$hash}{'name'}) { $dl{$hash}{'name'} = FixName($dl{$hash}{'filename'}); } if ($itemname eq $dl{$hash}{'name'}) { $dl{$hash}{'epoch'} ||= ToEpoch($dl{$hash}{'created'}); if ($dl{$hash}{'epoch'} >= ToEpoch(scalar $item->created)) { if ( $Trust_Datafile || -e $Torrentdir . '/' . $dl{$hash}{'filename'} ) { print "\tSkipping, already have the same or newer '$itemname'\n"; return 1; } } } } return 0; } sub Download { my $item = shift; my $basename = basename(scalar $item->link); print "\tDownloading '$basename'\n"; if (is_success(getstore($item->link, $Tempdir . '/' . $basename))) { CleanOld($basename); move($Tempdir . '/' . $basename, $Torrentdir . '/' . $basename) or die "Couldn't move '$basename' from temp dir to '$Torrentdir': $!"; $dl{$item->identifier} = { filename => $basename, created => scalar $item->created, file => scalar $item->link, }; print STDERR "\tDownloaded '$basename'\n"; } } sub SyncDatafile { my $fh; if (-e $Datafile) { open $fh, '<', $Datafile or die "Couldn't open FILE '$Datafile': $!"; flock($fh, LOCK_EX); unless (%dl) { while (<$fh>) { chomp; next unless $_; my ($file, $created, $hash) = split /\t/; next unless $file && $created && $hash; $dl{$hash} = { filename => $file, created => $created, }; } } } if (%dl) { foreach my $hash (keys %dl) { next unless $hash; unless ($dl{$hash}{'name'}) { $dl{$hash}{'name'} = FixName($dl{$hash}{'filename'}); } unless ($dl{$hash}{'epoch'}) { $dl{$hash}{'epoch'} = ToEpoch($dl{$hash}{'created'}); } foreach my $tmphash (keys %dl) { next unless $tmphash; next if ($hash eq $tmphash); unless ($dl{$tmphash}{'name'}) { $dl{$tmphash}{'name'} = FixName($dl{$tmphash}{'filename'}); } unless ($dl{$tmphash}{'epoch'}) { $dl{$tmphash}{'epoch'} = ToEpoch($dl{$tmphash}{'created'}); } if ( $dl{$hash}{'name'} eq $dl{$tmphash}{'name'} && $dl{$hash}{'epoch'} > $dl{$tmphash}{'epoch'} ) { delete $dl{$tmphash}; } } } } my ($tfh, $tempfile) = tempfile($Tempdir . '/XXXXXX'); foreach my $hash (keys %dl) { next unless $hash && $dl{$hash}{'filename'} && $dl{$hash}{'created'}; print $tfh join "\t", $dl{$hash}{'filename'}, $dl{$hash}{'created'}, $hash; print $tfh "\n"; } close $tfh; if ($fh) { flock($fh, LOCK_UN); close $fh; } move($tempfile, $Datafile) or die "Couldn't sync datafile: $!"; } } { my %cf; # Current Files sub CleanOld { my $file = shift; my $name = FixName($file); unless (%cf) { my $dir; opendir($dir, $Torrentdir) or die "Couldn't opendir '$Torrentdir': $!"; foreach (readdir($dir)) { next if /^\./; next unless /\.torrent$/; $cf{$_} = {}; } closedir $dir; } foreach my $cur (keys %cf) { unless ($cf{$cur}{'name'}) { $cf{$cur}{'name'} = FixName($cur); } if ($name eq $cf{$cur}{'name'}) { print STDERR "\tUnlinking old '$cur'\n"; unlink($Torrentdir . '/' . $cur) or die "Couldn't unlink '$cur': $!"; delete $cf{$cur}; } } } } sub FixName { my $name = shift || return ''; my $basename = basename($name); $basename =~ s/(-.*)?\.torrent$//; return $basename; } sub ToEpoch { my $time = shift || return 0; if ($time =~ /^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})[+-](\d{4})/ ) { my ($year, $mon, $mday, $hour, $min, $sec) = ($1, $2, $3, $4, $5, $6); $mon--; my $epoch = timegm($sec, $min, $hour, $mday, $mon, $year); return $epoch; } else { warn "$time is not valid!\n"; return 0; } }