my $true= (1==1);
my $false= (1==0);
my $noError= 0;
my $dataFileName= "data.txt";
my $addedFileName= "added.txt";
my $removedFileName= "removed.txt";
my $retainedFileName= "retained.txt";
my $snapshotFileName= "snapshot.txt";
my $oldSnapshotFileName= "old-snapshot.txt";
my $preFilterScriptName= "pre-filter";
my $postFilterScriptName= "post-filter";
my $webIndexFile= "index.html";
my $logFileToken= "<!-- Log file path -->";
my $previousReportToken= "<!-- Previous report -->";
my $tocToken= "<!-- Table of Contents -->";
my $directoryDelimiter= "/";
my $specialDelimiter= ":\t";
my $myPage= "http://www.ayradyss.org/programs/current.html#i-spy";
my $prettyNameRegEx= qr/^[\+_]+/;
push (@INC, 'pwd'); push (@INC, $ENV{HOME}."/bin"); use strict;
use Getopt::Long; use Log::File; use Date::Format;
my $dataDirectory= undef;
my $logDirectory= undef;
my $webDirectory= undef;
my $mailTo= undef;
my $html= undef;
my $myName= undef;
my $myAgentMask= undef;
my $logFile= undef;
my %sites= ();
my %skippedSites= ();
GetOptions("d|dir|directory=s" => \$dataDirectory,
"l|log|logDirectory=s" => \$logDirectory,
"w|web=s" => \$webDirectory,
"m|mail=s" => \$mailTo,
"h|html" => \$html,
"n|name=s" => \$myName,
"a|agent|masquarade" => \$myAgentMask,
);
Initialize();
Run();
Quit();
sub Initialize
{
unless ($myName)
{ $myName= $0;
$myName= $1 if $myName=~ /([^$directoryDelimiter]+)$/;
$myName=~ s/\.pl$//; }
$myName= "i-spy" unless $myName;
$dataDirectory= "sites" unless $dataDirectory;
$dataDirectory.= $directoryDelimiter
unless $dataDirectory=~ /$directoryDelimiter$/;
if ($webDirectory)
{ $webDirectory.= $directoryDelimiter
unless $webDirectory=~ /$directoryDelimiter$/;
$logDirectory= $webDirectory."logs"
unless $logDirectory;
}
$logDirectory= "logs" unless $logDirectory;
$logFile= new Log::File(directory => $logDirectory,
name => "$myName.log"
);
}
sub Run
{
GatherSites();
CheckSites() if keys(%sites);
CompareSnapshots() if keys(%sites);
ReportNew();
}
sub Quit
{
$logFile->Close();
exit;
}
sub LogEntry
{
return unless $logFile;
$logFile->MakeEntry(shift || "");
}
sub MajorLogEntry
{
return unless $logFile;
$logFile->SeparateLogEntries();
$logFile->MakeEntry(shift);
$logFile->MakeEntry();
}
sub MinorLogEntry
{
return unless $logFile;
$logFile->MakeEntry("\t", shift);
}
sub BlankLogEntry
{
return unless $logFile;
$logFile->MakeEntry();
}
sub GatherSites
{
my $site; my @leaves= (); my $dataFilePath;
MajorLogEntry("Gathering sites within <$dataDirectory>...");
if (-d $dataDirectory)
{
if (opendir(Sites, $dataDirectory))
{ @leaves= readdir(Sites);
closedir(Sites);
}
else
{
LogEntry("Failed to open <$dataDirectory>: $!");
}
}
else
{
LogEntry("<$dataDirectory> is not a directory");
}
@leaves= grep(!/^[\.~]/, @leaves);
if (@leaves)
{
foreach $site (@leaves)
{
if (!-d $dataDirectory.$site)
{
LogEntry("Skipping <$site>: not a directory");
next;
}
$dataFilePath= "$dataDirectory$site$directoryDelimiter$dataFileName";
if (-T $dataFilePath)
{ if (open(URL, $dataFilePath))
{ while (<URL>)
{
$sites{$site}.= $_;
}
close(URL);
LogEntry("Grabbed <$site>");
}
else
{
LogEntry("Skipping <$site>: $!");
}
}
else
{
LogEntry("Skipping <$site>: cannot access the data file "
. "<$dataFilePath>");
}
}
}
else
{
LogEntry("Seems that we have no inputs");
}
}
sub CheckSites
{
my $site; my $url; my $directive; my $siteDirectory; my $error;
MajorLogEntry("Checking sites...");
foreach $site (keys %sites)
{
$siteDirectory= $dataDirectory.$site.$directoryDelimiter;
($url, $directive)= split(/\n/, $sites{$site});
$url=~ s/\n$//g; $url=~ s/^<//g; $url=~ s/>$//g; $url=~ s/^URL://g; LogEntry("$site:");
$url= $directive if $directive=~ /^\w+:/;
my ($protocol, $host, $login, $password, $path)= ParseURL($url);
unless ($protocol and $host)
{ MinorLogEntry("could not parse the URL:");
MinorLogEntry("\tprotocol= $protocol");
MinorLogEntry("\thost= $host");
MinorLogEntry("\tlogin= $login");
MinorLogEntry("\tpassword= *secret*") if $password;
MinorLogEntry("\tpath= $path");
return "Could not parse the URL";
}
if ($protocol eq "FTP")
{ $error= CheckViaFTP($host, $login, $password, $path,
$siteDirectory.$snapshotFileName, $directive);
}
elsif ($protocol eq "HTTP")
{ $error= CheckViaHTTP($url, $siteDirectory.$snapshotFileName);
}
else
{ $error= "Unsupported protocol ($protocol)";
}
if ($error)
{ $skippedSites{$site}= $error; delete($sites{$site});
MinorLogEntry("skipping $site in future processing");
}
LogEntry();
}
}
sub CheckViaFTP
{
use Net::FTP; my $site= shift; my $login= shift; my $password= shift; my $path= shift; my $targetFile= shift; my $directive= shift; my $ftp; my $error= 0;
$ftp= Net::FTP->new($site);
if ($ftp)
{
MinorLogEntry("contacting <$site>");
MinorLogEntry("with login name <$login>") if $login;
MinorLogEntry("and password *secret*") if $login and $password;
if ($ftp->login($login ? $login : undef, $password ? $password : undef))
{ if (($path eq "") or ($path=~ /$directoryDelimiter$/))
{ my @listing; if ($ftp->cwd($path))
{ if ($directive eq "dir")
{ MinorLogEntry("getting a verbose listing of <$path>");
@listing= $ftp->dir();
}
else
{ MinorLogEntry("getting a terse listing of <$path>");
if ($ftp->cwd($path))
{ @listing= $ftp->ls();
}
}
}
else
{ @listing= (); }
if (@listing)
{
if (open(Snapshot, ">$targetFile"))
{ print(Snapshot join("\n", @listing), "\n");
close(Snapshot);
}
else
{
MinorLogEntry("failed to save the listing: $!");
$error= "Failed to save the listing ($!)";
}
}
else
{
$error= $ftp->message(); $error=~ s/\s+$//s; $error=~ s/\n/\t/sg; MinorLogEntry("failed to grab the listing: $error" );
$error= "Failed to grab the listing ($error)";
}
}
else
{ MinorLogEntry("getting the file <$path>");
my $suffix= "";
if ($path=~ /(\.\w+)$/)
{ $suffix= $1;
$targetFile.= $suffix unless $targetFile=~ /$suffix$/;
}
my $localFile= $ftp->get($path, $targetFile);
if ($localFile ne $targetFile)
{ $error= $ftp->message(); $error=~ s/\s+$//s; $error=~ s/\n/\t/sg; $error=
MinorLogEntry("failed to grab the file <$path>: $error");
$error= "Failed to grab the file ($error)";
}
elsif (($suffix eq ".Z") or ($suffix eq ".gz"))
{ if (system("gunzip --force $localFile"))
{
MinorLogEntry("failed to decompress the file <$localFile>: $!");
$error= "Failed to decompress the file ($!)";
}
}
}
}
else
{ $error= $ftp->message(); $error=~ s/\s+$//s; $error=~ s/\n/\t/sg; MinorLogEntry("failed to log in: $error" );
$error= "Failed to log in ($error)";
}
$ftp->quit();
}
else
{ MinorLogEntry("failed to contact <$site>: $@");
$error= "Failed to contact <$site> ($@)";
}
return $error;
}
sub CheckViaHTTP
{
use LWP::UserAgent; use HTML::TokeParser; my $url= shift; my $targetFile= shift; my $agent= new LWP::UserAgent;
MinorLogEntry("grabbing the web page <$url>");
$agent->agent($myAgentMask) if $myAgentMask;
my $response= $agent->request(new HTTP::Request(GET => $url), $targetFile);
if ($response->is_success())
{
}
else
{ if (open(Error, ">$targetFile"))
{ print Error $response->error_as_HTML();
close(Error);
my $parser= new HTML::TokeParser($targetFile);
if ($parser->get_tag("body"))
{ my $error= $parser->get_trimmed_text("/body");
MinorLogEntry("remote server reported an error <$url>: $error");
return $error;
}
else
{ MinorLogEntry("remote server reported an unspecified error <$url>");
return "The remote server returned an unspecified error";
}
}
else
{ MinorLogEntry("could not save the remote server error "
. "<$targetFile>: $!");
return "Could not save the remote server error ($!)";
}
}
return $noError; }
sub ParseURL
{
my $url= shift;
$url=~ /^(\w+):\/*([^\/]+)\/(.*)$/;
my $protocol= uc($1); my $site= $2;
my $path= $3;
my $login= "";
my $password= "";
if ($site=~ /\@/)
{ ($login, $site)= split("\@", $site);
($login, $password)= split(":", $login);
}
return($protocol, $site, $login, $password, $path);
}
sub CompareSnapshots
{
my $site; my $siteDirectory; my @old; my @new; my $newItem; my $oldItem; my $error= 0;
MajorLogEntry("Comparing sites...");
foreach $site (keys %sites)
{
my @added= (); my @retained= (); my @removed= ();
$siteDirectory= $dataDirectory.$site.$directoryDelimiter;
LogEntry("$site:");
if (-x $siteDirectory.$preFilterScriptName)
{ MinorLogEntry("pre-filtering current snapshot");
if ($error= FilterResults($siteDirectory.$preFilterScriptName,
$siteDirectory.$snapshotFileName))
{ $skippedSites{$site}= $error; delete($sites{$site});
MinorLogEntry("skipping <$site>");
next;
}
}
if (open(New, $siteDirectory.$snapshotFileName))
{
@new= sort <New>; close(New);
}
else
{ MinorLogEntry("could not read the new snapshot <"
. $siteDirectory.$snapshotFileName . ">: $!");
MinorLogEntry("skipping to the next site...");
$skippedSites{$site}= "Could not read the new snapshot ($!)";
delete($sites{$site});
next; }
if (open(Old, $siteDirectory.$oldSnapshotFileName))
{
@old= sort <Old>; close(Old);
}
else
{ @old= (); MinorLogEntry("could not read a previous snapshot <"
. $siteDirectory.$oldSnapshotFileName . ">: $!");
}
$newItem= shift(@new); $oldItem= shift(@old);
while ($oldItem and $newItem)
{ if ($newItem lt $oldItem)
{ push(@added, $newItem);
$newItem= shift(@new);
}
elsif ($oldItem eq $newItem)
{ push(@retained, $oldItem);
$newItem= shift(@new);
$oldItem= shift(@old);
}
else
{ push(@removed, $oldItem);
$oldItem= shift(@old);
}
}
push(@removed, $oldItem) if $oldItem;
push(@removed, @old) if @old;
push(@added, $newItem) if $newItem;
push(@added, @new) if @new;
MinorLogEntry("saving results");
if (open(Added, ">$siteDirectory$addedFileName"))
{
print Added @added; close(Added);
}
else
{ MinorLogEntry("could not save added lines to disk <"
. $siteDirectory.$addedFileName . ">: $!");
MinorLogEntry("skipping to the next site...");
$skippedSites{$site}= "Could not save added lines to disk ($!)";
delete($sites{$site});
next; }
if (open(Retained, ">$siteDirectory$retainedFileName"))
{
print Retained @retained; close(Retained);
}
else
{ MinorLogEntry("could not save retained lines to disk <"
. $siteDirectory.$retainedFileName . ">: $!");
}
if (open(Removed, ">$siteDirectory$removedFileName"))
{
print Removed @removed; close(Removed);
}
else
{ MinorLogEntry("could not save removed lines to disk <"
. $siteDirectory.$removedFileName . ">: $!");
}
if (-x $siteDirectory.$postFilterScriptName)
{ MinorLogEntry("filtering new items");
if ($error= FilterResults($siteDirectory.$postFilterScriptName,
$siteDirectory.$addedFileName))
{ $skippedSites{$site}= $error; delete($sites{$site});
MinorLogEntry("skipping <$site>");
}
}
LogEntry();
}
}
sub ReportNew
{
my $success;
if ($success= ($mailTo or $webDirectory))
{
MajorLogEntry("Reporting results...");
$success= ReportNewViaMail($mailTo) if $mailTo;
$success&&= ReportNewViaWeb($webDirectory) if $webDirectory;
PreserveSnapshots() if $success;
}
else
{
MajorLogEntry("No report requested");
}
}
sub ReportNewViaMail
{
use Net::SMTP; my $to= shift; my $user; my $domain; my $message; my $mailer; my $from;
LogEntry("Sending the report to <$to>");
($user, $domain)= split("\@", $to);
unless ($domain)
{ $domain= "localhost"; $to= $user . "\@" . $domain;
}
unless ($mailer= new Net::SMTP($domain))
{ MinorLogEntry("could not create a mail connection ($@)" .
"\n\tmake sure that you properly configured libnet");
return $false;
}
if ($from= $ENV{'USER'} || $ENV{'LOGNAME'} || getlogin || (getpwuid($>))[0])
{ $from.= "\@" . $mailer->domain() if $mailer->domain();
}
else
{ MinorLogEntry("could not learn my From: address");
return $false;
}
$message= "To: $to\n" . "From: $myName <$from>\n" .
time2str("Date: %a, %b %e %T %Y %z\n", time()) .
"Subject: $myName report\n";
$message.= "Content-Type: text/html\n" if $html;
$message.= "\n" . CompileNew($html);
if ($mailer->mail($from) && $mailer->to($to))
{ $mailer->data();
$mailer->datasend($message);
$mailer->dataend();
$mailer->quit();
}
else
{ my $error= $mailer->message();
$error=~ s/\s+$//s; $error=~ s/\n/ /sg; MinorLogEntry("could not send the report ($error)");
return $false;
}
return $true;
}
sub ReportNewViaWeb
{
my $directory= shift; my $indexFile= $directory.$webIndexFile;
my $pageName= $myName . "." . Now($true) . ".html";
LogEntry("Saving the report as <$directory$pageName>");
my $page = CompileNew($true, $true);
my $log= LogReference(); $page=~ s/$logFileToken/$log/sg;
if (-l $indexFile)
{ my $link= readlink($indexFile);
if ($link=~ /$directoryDelimiter/)
{ MinorLogEntry("previous link invalid <$link>");
}
else
{ if (unlink($indexFile))
{ MinorLogEntry("could not create a new index link " . "<$indexFile>: $!")
unless symlink($pageName, $indexFile);
$link= PreviousReportReference($link);
$page=~ s/$previousReportToken/$link/sg;
}
else
{ MinorLogEntry("could not remove the old index link " .
"<$indexFile>: $!");
}
}
}
elsif (-e $indexFile)
{ MinorLogEntry("current index is not a symbolic link -- ignoring");
}
else
{ MinorLogEntry("could not create a new index link ". "<$indexFile>: $!")
unless symlink($pageName, $indexFile);
}
$pageName= $directory.$pageName;
if (open(Output, ">$pageName"))
{ print Output $page;
close(Output);
MinorLogEntry("could not change access for <$pageName>: $!")
unless chmod(0644, $pageName);
}
else
{ MinorLogEntry("could not save the report: $!");
return $false;
}
return $true;
}
sub CompileNew
{
my $html= shift; my $toc= shift; my $site; my $prettySiteName; my $siteDirectory; my $report; my @new; my @candidates; my $error= 0; my $url= ""; my ($timeTag, $dateTag)= Now();
if ($html)
{ MinorLogEntry("will format this report as HTML");
$report= "<html>\n<head>\n"
. "\t<title>New Items</title>\n\n"
. "\t<!-- Report generated at $timeTag on $dateTag -->\n"
. "\t<!-- by i-spy, a perl script written by Igor S. Livshits "
. "<mailto:i-spy\@ayradyss.org> -->\n"
. "\t<!-- <
. "current.html#i-spy">http://www.ayradyss.org/programs/"
. "current.html#i-spy> -->\n"
. "<style type=\"text/css\">"
. "<!--"
. "body {"
. "color: #000000;"
. "background-color: #d0d0d0;"
. "} /* default */"
. "a {"
. "color: inherit;"
. "background-color: inherit;"
. "font: inherit;"
. "text-decoration: inherit;"
. "}"
. "a:hover {"
. "text-decoration: underline;"
. "}"
. "a:link { color: blue }"
. "a:visited { color: purple }"
. "a:active { color: red }"
. "-->"
. "</style>"
. "</head>\n\n"
. "<body>\n";
}
else
{ MinorLogEntry("will format this report as plain text");
$report= ""; }
if (@candidates= sort (keys(%sites)))
{ LogEntry("\nListing new items for each site:");
if ($html)
{ $report.= "<center><h3>New Items</h3></center>\n";
if ($toc)
{ $toc= "$tocToken\n";
$report.= $toc;
}
}
else
{ $report.= "New items\n---------\n";
}
foreach $site (@candidates)
{
$prettySiteName= $site;
$prettySiteName=~ s/$prettyNameRegEx//;
$siteDirectory= $dataDirectory.$site.$directoryDelimiter;
MinorLogEntry($site);
($url)= split(/\n/, $sites{$site});
$url=~ /^(\w+):/; my $protocol= uc($1);
if (open(New, $siteDirectory.$addedFileName))
{
@new= <New>; close(New);
if (@new)
{
if ($html)
{ $report.= "<p><ul><h4><a name=\"$prettySiteName\">" .
"<a href=\"$url\">$prettySiteName</a></a></h4>\n";
$toc.= "\t<li><a href=\"\#$prettySiteName\">$prettySiteName</a>\n"
if $toc; }
else
{ $report.= "\n$prettySiteName:\n\n";
}
foreach (@new)
{
s/\s+$//; my ($link, $address)= split(/$specialDelimiter/);
$address= $link unless $address;
if ($address=~ /^\//)
{ $url=~ /^(\w+:\/*[^\/]+)/; $address= $1 . $address;
}
elsif ($address=~ /^\w+:/)
{ }
elsif (($address=~ /^\#/) and ($protocol eq "HTTP"))
{ if ($url=~ /^(.+)\#[^\#]*$/)
{ $address= $1 . $address;
}
else
{ $address= $url . $address;
}
}
else
{ if ($protocol eq "HTTP")
{ my $urlRoot= $url;
$urlRoot=~ s/\/[^\/]*$//; while ($address=~ s/^\.\.\///)
{ $urlRoot=~ s/\/[^\/]*$//;
} $address= $urlRoot . "/" . $address;
}
else
{ $address= $url . $address;
}
}
if ($html)
{ $report.= "\t<li><a href=\"$address\">$link</a>\n";
}
else
{ $report.= "$link\n\t<$address>\n";
}
}
if ($html)
{ $report.= "</ul></p>\n<br>\n\n";
}
else
{ $report.= "\n";
}
}
else
{
if ($html)
{ $report.= "<p><b>$myName</b> "
. "did not discover any new items at "
. "<a href=\"$url\">$prettySiteName</a>.</p>\n\n";
}
else
{ $report.= "\n$myName did not discover any new items at $prettySiteName.\n";
}
}
}
else
{ MinorLogEntry("could not read added lines from disk <"
. $siteDirectory.$addedFileName . ">: $!");
MinorLogEntry("skipping to the next site...");
$skippedSites{$site}= "Could not read added lines from disk ($!)";
delete($sites{$site});
next; }
}
$toc= "<ol>\n$toc</ol>\n<hr width=\"50%\">\n"
if $toc and $toc=~ /<li>/; }
else
{ if ($html)
{ $report.= "<center><h3>No New Items</h3></center>\n";
}
else
{ $report.= "No new items\n";
}
}
if (@candidates= keys(%skippedSites))
{ LogEntry("\nListing skipped sites:");
if ($html)
{ $report.= "\n<hr>\n<center><h3>Errors</h3></center>\n";
foreach $site (@candidates)
{
$prettySiteName= $site;
$prettySiteName=~ s/$prettyNameRegEx//;
MinorLogEntry("$site: " . $skippedSites{$site});
$report.= "<p>Skipped <b>$prettySiteName</b>: "
. $skippedSites{$site}
. "</p>\n";
}
}
else
{ $report.= "\nErrors\n------\n";
foreach $site (@candidates)
{
MinorLogEntry("$site: " . $skippedSites{$site});
$report.= "Skipped $prettySiteName: $skippedSites{$site}\n";
}
}
}
if ($html)
{ $report.= "<hr><hr>\n$previousReportToken\n" .
" $logFileToken\n" .
"<br><p>Generated at $timeTag on $dateTag by " .
"<a href=\"$myPage\">$myName</a></p>\n" .
"</body>\n</html>\n";
}
else
{ $report.= "\nGenerated at $timeTag on $dateTag by $myName.\n";
}
$report=~ s/$tocToken/$toc/sg; return $report;
}
sub FilterResults
{
my $filterProg= shift; my $sourceFile= shift;
if (system($filterProg, $sourceFile, $dataDirectory))
{ MinorLogEntry("filtering failed: $!");
return "Filtering failed ($!)";
}
else
{
return $noError;
}
}
sub PreserveSnapshots
{
my($site); my($siteDirectory);
return unless keys(%sites);
MajorLogEntry("Preserving site snapshots:");
foreach $site (keys %sites)
{
$siteDirectory= $dataDirectory.$site.$directoryDelimiter;
if (rename($siteDirectory.$snapshotFileName,
$siteDirectory.$oldSnapshotFileName))
{ MinorLogEntry($site);
}
else
{ LogEntry("Could not preserve $site snapshot: $!");
$skippedSites{$site}= "Could not preserve snapshot ($!)";
}
}
}
sub Now
{
my $terse= shift; my ($second, $minute, $hour, $day, $month, $year)=
localtime(time());
$month++; $minute= "0".$minute if ($minute < 10);
$second= "0".$second if ($second < 10);
$month= "0".$month if ($month < 10);
$day= "0".$day if ($day < 10);
$year+= 1900;
return "$year$month$day$hour$minute$second"
if $terse;
return ("$hour:$minute:$second", "$month/$day/$year");
}
sub PreviousReportReference
{
my $link= shift;
$link= "[<a href=\"$link\">Previous report</a>]";
return $link;
}
sub LogReference
{
my $log= $logFile->Path();
MinorLogEntry("could not change access for <$log>: $!")
unless chmod(0644, $log);
$log=~ s/^$webDirectory//; $log= "[<a href=\"$log\">Log</a>]";
return $log;
}
__END__
=pod
=head1 NAME
i-spy -- scrape FTP and web sites for content changes
=head1 SYNOPSIS
B<i-spy> S<[B<-m> I<e-mail>]> S<[B<-d> I<directory>]>
B<i-spy> S<[B<-d> I<directory>]> S<[B<-w> I<directory>]>
(See the OPTIONS section for alternate option syntax with long option names.)
=head1 DESCRIPTION
B<i-spy> grabs and compares contents of FTP directories and web pages.
It then compiles a report and either sends it via e-mail or saves
it as a web page. You may also request both deliveries of the report.
For e-mail reports, you may request plain text or HTML.
B<i-spy> logs its activity as it chugs along. You may specify the log
directory, or B<i-spy> will try to find one automatically. For web page
reports, B<i-spy> will attempt to store the log in such a place where
it may be referenced by the report and served by the web server.
A site definition is a I<directory> which contains at least the F<data.txt>
file. This file must have the target URL as its first line. B<i-spy>
currently deals with FTP and HTTP URLs. An optional second line may
indicate an alternate data source such as a precompiled site index
or listing (see the CPAN example). For FTP sites only, a second line may
also indicatre a directive for verbose listings (see the NTP
example). The latter may be useful for sites where the operator
updates directories and links without changing their name.
A site definition may also contain a F<pre-filter> (see the Apple example)
and a F<post-filter> (see the NTP example). B<i-spy> invokes pre-filters
before comparing snapshots, and post-filters after comparing snapshots.
B<i-spy> generates all other files you may find within site directories.
Options
=over 4
=item B<-d> I<directory>, B<--dir> I<directory>, B<--directory> I<directory>
A I<directory> contaning site definitions. If omitted, B<i-spy>
will try the present working directory.
=item B<-l> I<directory>, B<--log> I<directory>, B<--logDirectory> I<directory>
A I<directory> for the log. If omitted or improper, B<i-spy>
will try to figure out an appropriate place.
=item B<-w> I<directory>, B<--web> I<directory>
A I<directory> for generated HTML reports; this directory should
ideally be accessible by a local or a remote web browser.
=item B<-m> I<e-mail>, B<--mail> I<e-mail>
A destination I<e-mail> address for generated reports.
=item B<-h>, B<--html>
A flag indicated whether to send an HTML report (if set) or a plain
text report (if not set). Reports saved in a specified web directory
will be formatted as HTML regardless of this setting.
=item B<-n> I<text>, B<--name> I<text>
Specified text string overrides the name learned from the OS;
used for log names and such.
=back
=head1 EXAMPLES
For periodic automated runs, try something like this:
C<i-spy --web /var/www/i-spy --directory /var/sites>
The above may be set to run once a day via B<cron> and will generate
a report each time which one may later check from a client machine
with a web browser. All web reports provide a link to a report from
a previous run. Also, each report references its corresponding log file.
B<i-spy> will expect to find the B<site directories> within the
C</var/sites> directory specified above via the C<--directory>
directive.
B<i-spy> will save the report in the directory specified by the
C<--web /var/www/i-spy> directive and the log in the corresponding
C</var/www/i-spy/logs> directory. Both should be accessible via
a web server or a locally running web browser.
For interactive reports delivered via e-mail, consider the following:
C<i-spy --mail user@domain.tld>
B<i-spy> will check the C<sites> directory in your present working
directory for B<site directories>. It will generate a plain text report
and send it to the specified C<user@domain.tld> e-mail address.
B<i-spy> will also leave a log in the C<logs> subdirectory of the present
working directory.
For HTML-formatted reports over e-mail, try:
C<i-spy --mail user@domain.tld --html>
=head1 FILES
=over 4
=item data.txt
A text file within individual site directories containing the primary URL
on the first line and either an alternate URL on line two or an FTP listing
type directive on line two.
=item added.txt
A text file within individual site directories containing all items added
since the preceding run.
=item removed.txt
A text file within individual site directories containing all items removed
since the preceding run.
=item retained.txt
A text file within individual site directories containing all items which
have remained unchanged since the preceding run.
=item snapshot.txt
A text file within individual site directories containing the current
snapshot of the remote resource.
=item old-snapshot.txt
A text file within individual site directories containing the snapshot
of the remote resource saved during a preceding run.
=item pre-filter
An executable to process the scraped snapshot before comparison to the
F<old-snapshot>.
=item post-filter
An executable to process the scraped snapshot after comparison to the
F<old-snapshot>.
=item index.html
A symbolic link to the latest report saved in the B<web> directory.
=back
=head1 REQUIRES
Perl 5.8, Getopt::Long, Log::File, Date::Format
=head1 SEE ALSO
perl(1)
=head1 BUGS
Send bug reports, questions, and requests to i-spy@ayradyss.org.
=head1 AUTHOR
Igor S. Livshits <mailto:i-spy@ayradyss.org>
=head1 COPYRIGHT
Copyright (C) 2006 Igor S. Livshits
Use and distribute this tool as per the Artistic License
=cut