#!/usr/bin/perl

# dsync
#   A tool for merging or synchronizing contents of two directories
#   inspired by Dan Kogai's psync.
#
#   06/28/2006 Version 1.0
#   Use and distribute this script as per the Artistic License
#   Copyright (C) 2006 Igor S. Livshits <mailto:dsync@ayradyss.org>


# Define libraries and modules
#
use strict;
use Getopt::Long;               # command line options processor
use Fcntl;
use Fcntl qq(:mode);
use POSIX qw(tmpnam);
use DB_File;
use File::Basename;
use MacOSX::File;
use MacOSX::File::Copy;
use MacOSX::File::Info;


# Define some constants
#
my $true= (1==1);               # convenient boolean values
my $false= !$true;
my $ofNone= 0;                  # status conditions
my $ofFirst= 1;
my $ofSecond= 2;
my $ofBoth= $ofFirst | $ofSecond;
my $mode= 2;                    # indices into the stat/lstat attribute array
my $uid= 4;
my $gid= 5;
my $size= 7;
my $atime= 8;
my $mtime= 9;
my @indices= ($mode, $uid, $gid, $size, $mtime);
my $indices= @indices;          # number of attributes that we check
my $sigMode= 0;                 # indices into unpacked attribute signature array
my $sigUid= 1;
my $sigGid= 2;
my $sigSize= 3;
my $sigMtime= 4;
my $attAtime= 3;
my $unknownGroup= 99;           # special group ID for AFP mounts
my $dSyncDBFileName= ".dsync.db";
my $dSyncConflictDirectoryName= "Conflicts.dsync/";
my $versionString= "dsync 0.9.4\n"
  . "<http://www.ayradyss.org/programs/current.html#dsync>";

# Define some global storage
#
my $me= $0 or "dsync";          # this program's path or name
my $quiet= $false;              # suppress output?
my $debug= $false;              # provide run-time diagnostics?
my $help= $false;               # help mode?
my $version= $false;            # versions mode?
my $simulation= $false;         # simulation mode?
my $remote= $false;             # remote volume mode?
my $master= "";                 # is there a master directory?
my $snapshot= "";               # simply update the snapshot?
my $lax= "";                    # trust the primary snapshot versus file state
my $count= 0;                   # count nodes scanned for progress displays
my %ignoreItems= ();            # names of indicated items to skip

my %ignoreSpecialItems= map { $_ => 1 }
(                               # names of special items to skip
 $dSyncDBFileName,
 '.DS_Store',
 '.FBCIndex',
 '.FBCLockFolder',
 '.Trashes',
 'AppleShare PDS',
 'Desktop DB',
 'Desktop DF',
 'TheFindByContentFolder',
 'TheVolumeSettingsFolder',
 'Temporary Items',
);

my %ignoreSpecialPaths= map { $_ => 1 }
(                               # names of special directories to skip
 '/tmp',
 '/dev',
 '/var/run',
 '/var/tmp',
 '/private/tmp',
 '/private/var/tmp',
 '/private/var/vm',
 '/private/var/run',
 '/Network',
 '/Volumes',
 '/automount',
 '/.vol',
);

sub reverse { my $b cmp my $a; }

Initialize();                   # set up
Run(@ARGV);                     # execute

exit;                           # a clean exit on completion


#
# Subroutines
#

# Initialize
#
# Check conmmand-line options and set up variables
#
sub Initialize
{
  my @ignore= ();               # a list of items to ignore

  if (@ARGV)
  {                             # we have command-line options
    GetOptions("n|simulation" => \$simulation,
               "quiet" => \$quiet,
               "debug" => \$debug,
               "master=s" => \$master,
               "s|snapshot=s" => \$snapshot,
               "x|ignore=s" => \@ignore,
               "remote" => \$remote,
               "lax=s" => \$lax,
               "V|version" => \$version,
               "H|help" => \$help,
              );
    
    if ($quiet and $debug)
    {                           # can't have it both ways
      print "Debugging mode trumps quiet mode...\n";
      $quiet= $false;
    }

    %ignoreItems= map { $_ => 1 } @ignore;
  }
  else
  {                             # no command-line arguments, display usage information
    Usage();
  }
}

# Run
#
# Trigger appropriate modes and execute
#
sub Run
{
  if ($help)
  {                             # help mode takes precedence
    print "Initialized: ready for help mode.\n" if $debug;
    Help();
  }
  elsif ($version)
  {                             # version mode takes next precedence
    print "Initialized: ready for version mode.\n" if $debug;
    Version();
  }
  else
  {                             # prepare for one of real operations
    my @directories= ();        # directories for processing

    select(STDOUT); $|= 1;      # do not buffer output, best for progress display

    if ($master)
    {                           # master directory replication mode
      my $target= shift or Usage();

      ConfirmDirectory(\$master, $false)
        or die "Master directory <$master> does not seem to exist... scuttling\n";
      ConfirmDirectory(\$target, !$simulation)
        or die "Cannot access target directory <$target>... scuttling\n";

      print("Initialized: ready to replicate master.\n") if $debug;
      Synchronize($master, $target);
    }
    elsif ($lax)
    {                           # lax replication mode
      my $target= shift or Usage();

      ConfirmDirectory(\$lax, $false)
        or die "Local directory <$lax> does not seem to exist... scuttling\n";
      ConfirmDirectory(\$target, !$simulation)
        or die "Cannot access target directory <$target>... scuttling\n";

      print("Initialized: ready for lax merging.\n") if $debug;
      Synchronize($lax, $target);
    }
    elsif ($snapshot)
    {                           # snapshot update mode
      my $database= "";         # name of our database file

      ConfirmDirectory(\$snapshot, $false)
        or die "Snapshot directory <$snapshot> does not seem to exist... scuttling\n";

      print("Initialized: ready to take a snapshot.\n") if $debug;
      if ($database= Snapshot($snapshot, "snapshot"))
      {
        unless ($simulation)
        {
          copy($database, $snapshot.$dSyncDBFileName)
            or die "Failed to copy snapshot to <$snapshot$dSyncDBFileName>: ",
            &MacOSX::File::strerr, "\n";
        
          print "\nSnapshot taken <$snapshot$dSyncDBFileName>!\n" unless $quiet;
        }
        print "\n" if $debug;   # pretty formatting
        ClearTmpStorage($database);
      }
    }
    else
    {                           # normal mode
      my $firstDirectory= shift or Usage();
      my $secondDirectory= shift or Usage();

      ConfirmDirectory(\$firstDirectory, $false)
        or die "Cannot access first directory <$firstDirectory>... scuttling\n";
      ConfirmDirectory(\$secondDirectory, !$simulation)
        or die "Cannot access second directory <$secondDirectory>... scuttling\n";

      print("Initialized: ready to merge!\n") if $debug;
      Synchronize($firstDirectory, $secondDirectory);
    }
  }
}


# ConfirmDirectory
#
# Confirm and normalize a directory path
#
sub ConfirmDirectory
{
  my $directory= shift;         # a directory to confirm
  my $create= shift;            # create if it does not exist?

  unless (-d $$directory)
  {                             # does not exist
    return $false               # and we should not attempt to create
      unless $create;

    print "Attempting to create <$$directory>\n" if $debug;
    if (mkdir($$directory, 0700))
    {                           # directory created
      $master= $true;           # force master -> secondary mode
    }                           # the new directory is the secondary
    else
    {                           # failed to create the directory
      warn "Failed to create directory <$$directory>: $!\n";
      return $false;
    }
  }

  $$directory= $$directory . "/" # append directory delimiters, if necessary
    unless $$directory=~  /\/$/;

  $$directory=~  s/\/+/\//g;    # clean those ugly doubled directory delimiters

  return $true;
}


# Synchronize
#
# Synchronize the directories
#
sub Synchronize
{
  my %attributes= ();           # a hash of file and directory attributes from previous runs
  my %copy= ();                 # a hash of files and directories to copy
  my %delete= ();               # a hash of files and directories to delete
  my %reset= ();                # a hash of directories to reset during clean-up
  my $fakeSnapshot= "";         # a fake snapshot name and location for simulations
  my %firstDirectoryAttributes= (); # a hash of items in the first directory
  my %secondDirectoryAttributes= (); # a hash of items in the second directory
  my ($primarySnapshot,         # authoritative snapshot
      $secondarySnapshot,       # a snapshot to update later
      $firstDirectory,          # primary file hiearchy
      $secondDirectory          # secondary file hierarchy
     )= ChooseSnapshot(shift, shift);
  
                                # read item listing snapshot from last run
  if ($simulation)
  {                             # work with a copy
    $fakeSnapshot= tmpnam();

    if (-e $primarySnapshot)
    {                           # if a snapshot exists, copy it
      if (copy($primarySnapshot, $fakeSnapshot))
      {                         # data copied, replicate attributes
        CopyAttributes($primarySnapshot, $fakeSnapshot, $false)
          or die "Could not replicate snapshot attributes;\n",
          "\tthe simulation would not be accurate.\n";
      }
      else
      {                         # could not replicate the snapshot
        die "Failed to replicate snapshot <$primarySnapshot>\n",
        "\tas <$fakeSnapshot>: ", &MacOSX::File::strerr, "\n";
      }
    }
    $primarySnapshot= $fakeSnapshot;
  }
  tie(%attributes, 'DB_File', $primarySnapshot, O_CREAT|O_RDWR, 0600, $DB_HASH)
    or die "Cound not access snapshot <$primarySnapshot>: $!\n";

                                # temporary database files
  my $firstDirectoryTmpStorage= Snapshot($firstDirectory, $master ? "master" : "primary");
  my $secondDirectoryTmpStorage= Snapshot($secondDirectory, "secondary");

                                # access current directory listings
  tie(%firstDirectoryAttributes, 'DB_File', $firstDirectoryTmpStorage,
      O_RDWR, 0600, $DB_HASH)
    or die "Could not access <$firstDirectory> listings: $!\n";
  tie(%secondDirectoryAttributes, 'DB_File', $secondDirectoryTmpStorage,
      O_RDWR, 0600, $DB_HASH)
    or die "Could not access <$secondDirectory> listings: $!\n";

  %delete= FindDeleted(\%attributes,
                       \%firstDirectoryAttributes, \%secondDirectoryAttributes);
  %copy= FindChanged(\%delete, \%attributes,
                     \%firstDirectoryAttributes, \%secondDirectoryAttributes);

  DeleteItems(\%attributes, \%delete, \%reset, $firstDirectory, $secondDirectory)
    if keys %delete;

  CopyItems(\%attributes, \%copy, \%reset,
            \%firstDirectoryAttributes, \%secondDirectoryAttributes,
            $firstDirectory, $secondDirectory)
    if keys %copy;

  untie(%firstDirectoryAttributes); # remove links to temporary storage
  untie(%secondDirectoryAttributes);

  print "\n" if $debug;         # pretty formatting
  ClearTmpStorage($firstDirectoryTmpStorage, $secondDirectoryTmpStorage);

  CleanUp(\%attributes, \%reset, $firstDirectory, $secondDirectory)
    if keys %reset;             # reset modification times on unpdated directories

  untie(%attributes);           # flush item snapshot to disk for future comparisons
  if ($simulation)
  {                             # clean up temporary snapshot copy
    ClearTmpStorage($fakeSnapshot);
  }
  else
  {                             # replicate item snapshot in the other directory
    if (copy($primarySnapshot, $secondarySnapshot))
    {                           # data copied, replicate attributes
      CopyAttributes($primarySnapshot, $secondarySnapshot, $false);
    }
    else
    {                           # could not replicate the sbapshot
      warn "Failed to replicate snapshot <$primarySnapshot>\n",
      "\tas <$secondarySnapshot>: ", &MacOSX::File::strerr, "\n";
    }
  }
}


# Snapshot
#
# Take a snapshot of a directory
#
sub Snapshot
{
  my $directory= shift;         # a file hierarchy
  my $designation= shift;       # directory status designation
  my %attributes= ();           # a hash of file and directory attributes
  my $directoryTmpStorage= tmpnam(); # disk store for directory listings

  print "Setting up temporary storage <$directoryTmpStorage> "
    . "for <$directory> listings...\n" if $debug;
  tie(%attributes, 'DB_File', $directoryTmpStorage,
     O_CREAT|O_RDWR|O_EXCL, 0600, $DB_HASH)
    or die "Could not create temporary storage for <$directory> listings: $!\n";

  $count= 0;                    # scan the hierarchy
  unless ($quiet)
  {
    print "\nScanning $designation directory <$directory>...";
  }
  foreach my $subNode (ListDirectory($directory))
  {                             # scan each file and sub-directory
    ScanNode($directory, "$subNode", \%attributes);
  }
  print "\n" unless $quiet;

  untie(%attributes);           # flush item snapshot to disk for future comparisons

  return $directoryTmpStorage;  # return the name of our database file
}


# ScanNode
#
# Scan a given node and compare to previous snapshot
#
sub ScanNode
{
  my $root= shift;              # the original directory tree root
  my $node= shift;              # the directory to scan
  my $changes= shift;           # a reference to a hash of changed items
  my @attributes= ();           # a list of a node's attributes

  return                        # skip special items
    if ($ignoreSpecialItems{basename($node)});
  return                        # skip indicated items
    if ($ignoreItems{$node} or $ignoreItems{$root.$node});
 
  printf("\n%10d:", $count)     # start a new progress line
    if ($count % 8192 == 0) and !$debug and !$quiet;
  print "."                     # mark a progress tick
    if ($count % 128 == 0) and !$debug and !$quiet;
  
  $count++;
  printf("\n%10d: Checking <$root$node>\n", $count) if $debug;

                                # check mode, uid, gid, size and mtime
  @attributes= Attributes($root.$node);
  printf("%12s[%08x %08x %08x %08x %08x]\n", "", @attributes) if $debug;
  
  if (S_ISDIR(@attributes[$sigMode]))
  {                             # special treatment for directories
    unless ($ignoreSpecialPaths{$root.$node})
    {                           # descend into viable directories
      foreach my $subNode (ListDirectory($root.$node))
      {                         # scan each file and sub-directory
        ScanNode($root, "$node/$subNode", $changes);
      }
    }
  }
  elsif (S_ISLNK(@attributes[$sigMode]))
  {                             # don't care about certain link attributes
    return if $lax;             # don't care about links in lax mode
    @attributes[$sigMode]&= 0xff00;
    @attributes[$sigUid]= 0;
    @attributes[$sigGid]= 0;
    printf("%12s[%08x %08x %08x %08x %08x]\n", "", @attributes) if $debug;
  }
                                # capture the item attribute signature
  ${$changes}{$node}= pack("N$indices", @attributes);
}


# Attributes
#
# Read an item's attributes from the file system
#
sub Attributes
{
  my $item= shift;              # the item in question
  my @attributes= ();           # a list of a node's attributes

  if (@attributes= (lstat($item))[@indices])
  {
    @attributes[$sigSize]= 0    # don't care about directory sizes
      if -d _;

    return @attributes;
  }
  else
  {
    die "Cannot get information about <$item>: $!\n";
  }
}


# ListDirectory
#
# Scan a given node and compare to previous snapshot
#
sub ListDirectory
{
  my $path= shift;              # path to the target directory
  my @nodes= ();                # items in the target directory

  if (opendir(DIR, $path))
  {                             # grab contents
    @nodes= grep(!/^\.(?:\.?$|_)/o, readdir(DIR));
    closedir(DIR);
  }
  else
  {                             # could not list this directory
    warn "Could not list contents of <$path>: $!\n";
  }

  return @nodes;
}


# ChooseSnapshot
#
# Select a snapshot from a previous run for comparisons
#
sub ChooseSnapshot
{
  my $firstDirectory= shift;    # first file hierarchy
  my $secondDirectory= shift;   # the other file hierarchy

  unless ($master or $lax)
  {                             # designate an authoritative snapshot
    my $firstSignature= 0;      # signature of the snapshot in the first directory
    my $firstMtime= 0;          # modification time of the first snapshot
    my $secondSignature= 0;     # signature of the snapshot in the second directory
    my $secondMtime= 0;         # modification time of the second snapshot
    my @attributes= ();         # a list of a snapshot's attributes

    print "\nGetting information about <$firstDirectory$dSyncDBFileName>\n" if $debug;
    if (@attributes= lstat($firstDirectory.$dSyncDBFileName))
    {                           # learn about the first snapshot
      @attributes[$sigGid]= 0;  # ignore these fields for our comparison
      @attributes[$sigUid]= 0;
      @attributes[$sigMode]= 0;
      
      $firstSignature= pack("N$indices", @attributes[@indices]);
      printf("%12s[%08x %08x %08x %08x %08x]\n\n", "", 
             unpack("N$indices", $firstSignature))
        if $debug;
      $firstMtime= @attributes[$mtime];
    }
    else
    {
      TrackingAdvice($firstDirectory);
    }
    
    print "Getting information about <$secondDirectory$dSyncDBFileName>\n" if $debug;
    if (@attributes= lstat($secondDirectory.$dSyncDBFileName))
    {                           # learn about the second snapshot
      @attributes[$sigGid]= 0;  # ignore these fields for our comparison
      @attributes[$sigUid]= 0;
      @attributes[$sigMode]= 0;

      $secondSignature= pack("N$indices", @attributes[@indices]);
      printf("%12s[%08x %08x %08x %08x %08x]\n\n", "",
             unpack("N$indices", $secondSignature)) if $debug;
      $secondMtime= @attributes[$mtime];
    }
    else
    {
      TrackingAdvice($secondDirectory);
    }
    
    unless ($firstSignature eq $secondSignature)
    {                           # choose the older snapshot if they differ
      print "Snapshot signatures do not match!\n" if $debug;
      if ($firstMtime > $secondMtime)
      {
        print "Will use <$secondDirectory$dSyncDBFileName>\n\n" if $debug;
        return ($secondDirectory.$dSyncDBFileName, $firstDirectory.$dSyncDBFileName,
                $firstDirectory, $secondDirectory);
      }
      else
      {
        print "Will use <$firstDirectory$dSyncDBFileName>\n\n" if $debug;
        return ($firstDirectory.$dSyncDBFileName, $secondDirectory.$dSyncDBFileName,
                $secondDirectory, $firstDirectory);
      }
    }
  }
                                # default behavior
  print "Will use <$firstDirectory$dSyncDBFileName>\n\n" if $debug;
  return ($firstDirectory.$dSyncDBFileName, $secondDirectory.$dSyncDBFileName,
          $firstDirectory, $secondDirectory);
}


# FindDeleted
#
# Identify files deleted from either directory tree
#
sub FindDeleted
{
  my $snapshot= shift;          # a pointer to a snapshot listing hash
  my $first= shift;             # a pointer to a first directory listing hash
  my $second= shift;            # a pointer to a second directory listing hash
  my %delete= ();               # a hash of items to remove
  my $deleteStatus;             # a given item's removal status

  foreach my $item (keys %$snapshot)
  {                             # find orphaned snapshot entries
    $deleteStatus= $ofNone;
    
    $deleteStatus|= $ofFirst    # seems to be gone from the first directory
      unless defined ${$first}{$item};
    
    $deleteStatus|= $ofSecond   # seems to be gone from the second directory
      unless defined ${$second}{$item};

                                # mark this item if it is gone from only one side
    if ($deleteStatus == $ofFirst)
    {                           # only gone from the primary/master, mark it
      $delete{$item}= $deleteStatus;
      print "Flagged for removal from secondary: <$item>\n" if $debug;
    }
    elsif ($deleteStatus == $ofSecond)
    {                           # only gone from the secondary, mark it
      $delete{$item}= $deleteStatus;
      if ($master)
      {                         # for restoration from the master
        print "Flagged for restoration to secondary: <$item>\n" if $debug;
      }
      else
      {                         # or for removal from the primary
        print "Flagged for removal from primary: <$item>\n" if $debug;
      }
    }
    elsif ($deleteStatus == $ofBoth)
    {                           # just clean up the snapshot
      print "Gone from both sides: <$item>\n" if $debug;
      delete(${$snapshot}{$item});
    }
  }

  print "\n" if $debug;         # pretty formatting
  return %delete;
}


# FindChanged
#
# Identify files changed or created under either directory tree
#
sub FindChanged
{
  my $delete= shift;            # a pointer to a list of items marked for removal
  my $snapshot= shift;          # a pointer to a snapshot listing hash
  my $first= shift;             # a pointer to a first directory listing hash
  my $second= shift;            # a pointer to a second directory listing hash
  my %copy= ();                 # a hash of items to remove
  my @differences= ();          # types of differences
  my $payload= $false;          # flag for payload changes

  foreach my $item (keys %$first)
  {                             # check the first directory for modified files
    if (defined ${$snapshot}{$item})
    {
      next                      # skip unchanged items
        if (${$snapshot}{$item} eq ${$first}{$item});

      @differences= Differences(${$snapshot}{$item}, ${$first}{$item}, $false);
      $payload= shift @differences;
    }
    else
    {                           # apparently, a new item
      $payload= $true;
      @differences= ("new item");
    }

    if (@differences)
    {                           # they are indeed different
      print $payload ? "Payload" : "Attributes",
      " (", join(", ", @differences), ")",
      " changed in the ", $master ? "master" : "primary",
      " directory: <$item>\n" if $debug;
      
      $copy{$item}=             # a new or a changed item
        [($ofFirst, $payload, @differences)];
    }
  }

  foreach my $item (keys %$second)
  {                             # check the second directory for modified files
    if (defined ${$snapshot}{$item})
    {
      next                      # skip unchanged items
        if (${$snapshot}{$item} eq ${$second}{$item});

      @differences= Differences(${$snapshot}{$item}, ${$second}{$item}, $lax);
      $payload= shift @differences;
    }
    else
    {                           # apparently, a new item
      $payload= $true;
      @differences= ("new item");
    }

    if (@differences)
    {                           # they are indeed different
       print $payload ? "Payload" : "Attributes",
       " (", join(", ", @differences), ")",
       " changed in the secondary directory: <$item>\n" if $debug;
   
       if (defined $copy{$item})
       {                        # item changed on both sides
         if (${$first}{$item} eq ${$second}{$item})
         {                      # but is the same between directories
           $copy{$item}= [($ofNone)];
         }
         else
         {                      # and is different between directories
           $copy{$item}= [($master ? $ofFirst: $ofBoth, $payload, @differences)];
         }
       }
       else
       {                        # item changed in the second directory only
         if ($master)
         {                      # force contents from the master to the slave
           if (defined ${$first}{$item})
           {                    # copy the master item
             $copy{$item}= [($ofFirst, $payload, @differences)]
           }
           elsif (defined ${$second}{$item})
           {                    # remove the slave item
             ${$delete}{$item}= $ofFirst;
           }
         }
         else
         {                      # unbiased operation
           $copy{$item}= [($ofSecond, $payload, @differences)];
         }
       }
    }
  }

  if ($master)                  
  {
    foreach my $item (keys %$delete)
    {                           # check for conflicts with items marked for removal
      if (${$delete}{$item} == $ofSecond)
      {                         # force-copy items from the master directory
                                # if they are missing from the slave
        delete(${$delete}{$item}); # clear the conflict
        $copy{$item}=           # and schedule a remedial copy
          [($ofFirst, $true, "restore missing")];
      }
    }
  }
  else
  {                             # normal synchronization mode
    foreach my $item (keys %$delete)
    {                           # check for conflicts with items marked for removal
      next unless defined $copy{$item};
      
      ${$delete}{$item}= $ofNone; # note the conflict
    }
  }
  
  print "\n" if $debug;         # pretty formatting
  return %copy;
}


# DeleteItems
#
# Remove items marked for deletion
#
sub DeleteItems
{
  my $snapshot= shift;          # a pointer to a snapshot listing hash
  my $items= shift;             # items marked for removal
  my $reset= shift;             # items to reset later
  my $firstDirectory= shift;    # path to the first directory
  my $secondDirectory= shift;   # path to the second directory
  my $status;                   # status of the item marked for removal

  unless ($quiet)
  {
    if ($simulation)
    {                           # Just say, don't do
      print "\nItems that would have been removed:\n";
    }
    else
    {                           # Say and do
      print "\nRemoving items:\n";
    }
  }

  foreach my $item (reverse sort (keys %$items))
  {                             # start from the depth of the tree and work up
    $status= ${$items}{$item};
    
    if ($status == $ofFirst)
    {                           # gone from first, remove from second
      print "\t[  rm] <$item>\n" unless $quiet;
      unless ($simulation)
      {                         # remove item
        print "\t\tRemoving <$secondDirectory$item>\n\n" if $debug;
        if (Delete($secondDirectory.$item))
        {                       # update snapshot and schedule a parent directory reset
          delete(${$snapshot}{$item});
          ${$reset}{dirname($item)}|= $ofSecond;
          delete(${$reset}{$item})
            if exists(${$reset}{$item});
        }
      }
    }
    elsif ($status == $ofSecond)
    {                           # gone from second, remove from first
      print "\t[rm  ] <$item>\n" unless $quiet;
      unless ($simulation)
      {                         # remove item and update snapshot
        print "\t\tRemoving <$firstDirectory$item>\n\n" if $debug;
        if (Delete($firstDirectory.$item))
        {                       # update snapshot and schedule a parent directory reset
          delete(${$snapshot}{$item});
          ${$reset}{dirname($item)}|= $ofFirst;
          delete(${$reset}{$item})
            if exists(${$reset}{$item});
        }
      }
    }
    elsif ($status == $ofNone)
    {                           # conflict with a future copy
      print "\t[keep] <$item>\n" unless $quiet;
      print "\t\tA scheduled copy trumps this deletion of <$item>\n\n"
        if $debug;
    }
  }
}


# CopyItems
#
# Copy items marked as changed
#
sub CopyItems
{
  my $snapshot= shift;          # a pointer to a snapshot listing hash
  my $items= shift;             # items marked for copying
  my $reset= shift;             # items to reset later
  my $first= shift;             # a pointer to a first directory listing hash
  my $second= shift;            # a pointer to a second directory listing hash
  my $firstDirectory= shift;    # path to the first directory
  my $secondDirectory= shift;   # path to the second directory
  my $status;                   # status of the item marked for copying
  my $payload;                  # degree of differences between changed items

  unless ($quiet)
  {
    if ($simulation)
    {                           # Just say, don't do
      print "\nItems that would have been updated:\n";
    }
    else
    {                           # Say and do
      print "\nUpdating items:\n";
    }
  }

  foreach my $item (sort (keys %$items))
  {                             # start from the depth of the tree and work up
    my @changes=                # a list of changes
      @{${$items}{$item}};

    $status= shift(@changes);
    $payload= shift(@changes);
    if ((-d $firstDirectory.$item) and (-d $secondDirectory.$item))
    {                           # special case for directories
      @changes= grep(!/time|size/, @changes);
      $payload= $false;
    }
    
    if ($status == $ofFirst)
    {                           # changed in first, copy to second
      unless ($quiet)
      {
        if (@changes)
        {                       # announce planned changes
          print "\t[ -> ] <$item> (", join(", ", @changes),
          $payload ? "; copy data" : "", ")\n";
        }
        else
        {                       # announce deferred
          print "\t[ -> ] <$item> (defer reset)\n";
        }
      }
      unless ($simulation)
      {                         # copy, update snapshot and
                                # schedule a parent directory reset
        unless (@changes)
        {                       # defer to later
          ${$reset}{$item}|= $ofSecond;
        }
        else
        {                       # do now
          ${$reset}{dirname($item)}|= $ofSecond
            if Copy($item, $firstDirectory, $secondDirectory,
                    ${$first}{$item}, ${$second}{$item}, $payload);
        }
        ${$snapshot}{$item}= ${$first}{$item};
      }
    }
    elsif ($status == $ofSecond)
    {                           # changed in the second, copy to the first
      unless ($quiet)
      {
        if (@changes)
        {                       # announce planned changes
          print "\t[ <- ] <$item> (", join(", ", @changes),
          $payload ? "; copy data" : "", ")\n";
        }
        else
        {                       # announce deferred
          print "\t[ <- ] <$item> (defer reset)\n";
        }
      }
      unless ($simulation)
      {                         # copy, update snapshot and
                                # schedule a parent directory reset
        unless (@changes)
        {                       # defer to later
          ${$reset}{$item}|= $ofFirst;
        }
        else
        {                       # do now
          ${$reset}{dirname($item)}|= $ofFirst
            if Copy($item, $secondDirectory, $firstDirectory,
                    ${$first}{$item}, ${$second}{$item}, $payload);
        }
        ${$snapshot}{$item}= ${$second}{$item};
      }
    }
    elsif ($status == $ofNone)
    {                           # changed, but already the same in both trees
      print "\t[ == ] <$item>\n" if $debug;
      unless ($simulation)
      {                         # update snapshot
        print "\t\tUpdating snapshot -- items are the same\n\n" if $debug;
        ${$snapshot}{$item}= ${$first}{$item};
      }
    }
    else
    {                           # conflict!
      unless ($quiet)
      {
        if (@changes)
        {                       # announce planned changes
          print "\t[-><-] <$item> (", join(", ", @changes),
          $payload ? "; copy data" : "", ")\n";
        }
        else
        {                       # announce deferred
          print "\t[ -> ] <$item> (defer reset)\n";
        }
      }
      unless ($simulation)
      {                         # resolve conflict, update snapshot
                                # schedule a parent directory reset
        unless (@changes)
        {                       # defer to later
          ${$reset}{$item}|= $ofSecond;
        }
        else
        {                       # do now
          ${$reset}{dirname($item)}|= $ofBoth
            if ResolveConflict($item, $firstDirectory, $secondDirectory,
                               ${$first}{$item}, ${$second}{$item}, $payload);
        }
        ${$snapshot}{$item}= pack("N$indices", Attributes($firstDirectory.$item));
      }
    }
  }
}


# ResolveConflict
#
# Deal with a conflict
#
sub ResolveConflict
{
  my $item= shift;              # relative path to the item in conflict
  my $sourceDirectory= shift;   # source file hierarchy
  my $destinationDirectory= shift; # destination file hierarchy
  my $sourceSignature= shift;   # source file signature
  my $destinationSignature= shift; # destination file signature
  my $payload= shift;           # degree of differences between changed items
  my $success= $true;           # outcome flag

  unless ((-d $sourceDirectory.$item) and (-d $destinationDirectory.$item))
  {                             # at least one is not a directory
    if ($payload)
    {                           # preserve one and copy the other over
      if ($success= MoveToConflictDirectory($destinationDirectory.$item))
      {                         # successfully preserved one
        print "\t\tCopying <$sourceDirectory$item>\n",
        "\t\t\tto <$destinationDirectory$item>\n" if $debug;

        $success= copy($sourceDirectory.$item, $destinationDirectory.$item)
          or warn "Could not copy <$sourceDirectory$item>",
          " to <$destinationDirectory$item>: ", &MacOSX::File::strerr, "\n";
      }
    }
  }

  if ($success)
  {                             # update attributes
    print "\t\tUpdating attributes on <$destinationDirectory$item>\n",
    "\t\t\tto match those of <$sourceDirectory$item>\n" if $debug;
      
    CopyAttributes($sourceDirectory.$item, $destinationDirectory.$item, !$remote);
  }

  print "\n" if $debug;         # pretty formatting
  return $success;
}


# MoveToConflictDirectory
#
# Move an item to a special directory to preserve it
#
sub MoveToConflictDirectory
{
  my $item= shift;              # relative path to the item
  my $directory=                # item's directory
     dirname($item) . "/";
  my $name=                     # item's name
     basename($item);
  my $success= $true;           # outcome flag

                                # create a conflicts directory, if necessary
  $success= mkdir($directory.$dSyncConflictDirectoryName, 0700)
    unless (-d $directory.$dSyncConflictDirectoryName);

  if ($success and -e $directory.$dSyncConflictDirectoryName.$name)
  {                             # collisions with a previous conflict, sigh
    $success=                   # move the previous one out of the way
      MoveToConflictDirectory($directory.$dSyncConflictDirectoryName.$name);
  }

  if ($success and !$simulation)
  {                             # attempt to move it
    print "\t\tPreserving <$item)>\n",
    "\t\t\tas <$directory.$dSyncConflictDirectoryName.$name>\n"
      if $debug;

    $success= move($item, $directory.$dSyncConflictDirectoryName.$name);
    warn "Could not move <$item> to",
    " <$directory$dSyncConflictDirectoryName$name>: $!\n"
      unless $success;
  }

  return $success;
}


# Copy
#
# Copy an item from one directory to another
#
sub Copy
{
  my $item= shift;              # relative path to the item
  my $sourceDirectory= shift;   # source file hierarchy
  my $destinationDirectory= shift; # destination file hierarchy
  my $sourceSignature= shift;   # source file signature
  my $destinationSignature= shift; # desitnation file signature
  my $payload= shift;           # degree of differences between changed items
  my $link= $false;             # is the item a link?
  my $success= $true;           # outcome flag

  if (-l $sourceDirectory.$item)
  {                             # it's a symbolic link, create a parallel one
    my $symLink= "";            # where the symbolic link points
    my $recreate= $false;       # should we recreate it?
    $link= $true;               # mark it...

    if ($symLink= readlink($sourceDirectory.$item))
    {      
      if (-e $destinationDirectory.$item)
      {
        if (-l $destinationDirectory.$item)
        {                       # compare links
          if ($symLink ne readlink($destinationDirectory.$item))
          {                     # failed to confirm
            $success= unlink($destinationDirectory.$item)
              or warn "Cound not delete old symbolic link ",
              "<$destinationDirectory.$item>: $!\n";

            $recreate= $success; 
          }
          else
          {                     # they point to the same thing
            $recreate= $false;  # leave the other one alone
          }
        }
        else
        {                       # old item is not a link!
          $success= MoveToConflictDirectory($destinationDirectory.$item);
          $recreate= $success; 
        }
      }
      else
      {
        $recreate = $true;
      }
      
      $success= symlink($symLink, $destinationDirectory.$item)
        or warn "Could not save a new symbolic link as ",
        "<$destinationDirectory$item>: $!\n"
        if $recreate;
    }
    else
    {                           # could not read the original
      $success= $false;
      warn "Cound not read source symbolic link <$sourceDirectory$item>: $!\n";
    }
  }
  elsif (-d $sourceDirectory.$item)
  {                             # it's a directory, create a parallel one
    unless (-d $destinationDirectory.$item)
    {                           # if the other one is not a directory or is missing
      $success= unlink($destinationDirectory.$item)
        if -l $destinationDirectory.$item; # old item is a link
      $success= MoveToConflictDirectory($destinationDirectory.$item)
        if -f $destinationDirectory.$item; # old item is a file

      $success= mkdir($destinationDirectory.$item) if $success;
      warn "Failed to create directory <$destinationDirectory$item>: $!\n"
        unless $success;
    }
  }
  elsif (-f $sourceDirectory.$item)
  {                             # it's a file
    if (-e $destinationDirectory.$item and ! -f $destinationDirectory.$item)
    {                           # old item is not a file!
      $success= MoveToConflictDirectory($destinationDirectory.$item);
      $payload= $true;
    }

    if ($payload and $success)
    {                           # payload differences -- copy the file
      print "\t\tCopying <$sourceDirectory$item>\n",
      "\t\t\tto <$destinationDirectory$item>\n" if $debug;

      $success= copy($sourceDirectory.$item, $destinationDirectory.$item)
        or warn "Could not copy <$sourceDirectory$item>",
        " to <$destinationDirectory$item>: ", &MacOSX::File::strerr, "\n";
    }
  }
  else
  {                             # what is it?
    print "\t\tFailed to deal with <$sourceDirectory$item>\n" if $debug;
    $success= $false;
  }

  if ($success)
  {                             # update attributes
    print "\t\tUpdating attributes on <$destinationDirectory$item>\n",
    "\t\t\tto match those of <$sourceDirectory$item>\n" if $debug;
      
    CopyAttributes($sourceDirectory.$item, $destinationDirectory.$item, !$remote)
      unless $link;
  }

  print "\n" if $debug;         # pretty formatting
  return $success;
}


# Differences
#
# Determine differences between items
#
sub Differences
{
                                # signature components
  my @firstAttributes= unpack("N$indices", shift);
  my @secondAttributes= unpack("N$indices", shift);
  my $lax= shift;               # should we be lax when noting differences?

  my @differences= ();          # a list of differences
  my $payload= $false;          # payload or attributes?

  if (@firstAttributes[$sigSize] != @secondAttributes[$sigSize])
  {                             # size mismatch
    push(@differences, "size");
    $payload= $true;
  }

  return $payload, @differences # don't care about the rest during lax checking
    if $lax;

  if (@firstAttributes[$sigMtime] != @secondAttributes[$sigMtime])
  {                             # time stamp mismatch
    push(@differences, "time");
    $payload= $true;
  }
  if (S_IFMT(@firstAttributes[$sigMode]) != S_IFMT(@secondAttributes[$sigMode]))
  {                             # type mismatch
    push(@differences, "type");
    $payload= $true;
  }
  if (@firstAttributes[$sigMode] & 07777 != @secondAttributes[$sigMode] & 07777)
  {                             # permissions mismatch
    push(@differences, "permissions")
      unless $remote;
  }
  if (@firstAttributes[$sigUid] != @secondAttributes[$sigUid])
  {                             # owner mismatch
    push(@differences, "owner");
  }
  if (@firstAttributes[$sigGid] != @secondAttributes[$sigGid])
  {                             # group mismatch
    my $falsePositive= ($remote and
                        ((@firstAttributes[$sigGid] == $unknownGroup) or
                        (@secondAttributes[$sigGid] == $unknownGroup)));

    push(@differences, "group") unless $falsePositive;

    print "Group mismatch: <", @firstAttributes[$sigGid], "> v. <",
    @secondAttributes[$sigGid], ">; lax=<$lax>\n" unless ($falsePositive or !$debug);
  }

  return $payload, @differences;
}


# CopyAttributes
#
# Copy an original item's attributes to its copy
#
sub CopyAttributes
{
  my $original= shift;          # the original item
  my $target= shift;            # the target item
  my $permissions= shift;       # should we replicate permissions?
  my $info;                     # FileInfo record
  my @attributes= ();           # a list of an item's attributes
  my $success= $true;           # outcome flag

  $success= setfinfo(getfinfo($original), $target)
    or warn "Could not replicate FileInfo attributes from <$original>",
    " to <$target>: ", &MacOSX::File::strerr, "\n";

  if (@attributes= (lstat($original))[@indices])
  {                             # replicate attributes
    if ($permissions)
    {
      unless (chmod(@attributes[$sigMode] & 07777, $target))
      {
        $success= $false;
        warn "Failed to replicate permissions from <$original> to <$target>: $!\n";
      }
      
      if (@attributes[$sigGid] == $unknownGroup)
      {                         # special work-around for group IDs clobbered by AFP
        @attributes[$sigGid]= (lstat($target))[$gid];
      }
      unless (chown(@attributes[$sigUid], @attributes[$sigGid], $target))
      {
        $success= $false;
        warn "Failed to replicate ownership from <$original> to <$target>: $!\n";
      }                 
    }
    
    unless (utime(@attributes[$attAtime], @attributes[$sigMtime], $target))
    {
      $success= $false;
      warn "Failed to replicate time stamps from <$original> to <$target>: $!\n";
    }
  }
  else
  {
    warn "Could not read attributes for <$original>: $!\n";
    $success= $false;
  }

  return $success;
}


# Delete
#
# Remove an item
#
sub Delete
{
  my $item= shift;              # path to the item
  my $success= $true;           # outcome flag
  
  if (-f $item)
  {                             # it's a file, remove it
    if ($success= unlink($item))
    {                           # clean up associated frass
      my $frassItem= dirname($item) . "._" . basename($item);

      if (-f "$frassItem")
      {
        unlink("$frassItem")
          or warn "Failed to remove file <$frassItem>: $!\n";
      }
    }
    else
    {                           # warn of failure
      warn "Failed to remove file <$item>: $!\n";
    }
  }
  elsif (-l $item)
  {                             # it's a symbolic link, remove it
                                # symbolic links to directories will return true
                                # for directory tests; thus, should test for links first (Sigh)
    $success= unlink($item);
    warn "Failed to remove symbolic link <$item>: $!\n"
      unless $success;
  }
  elsif (-d $item)
  {                             # it's a directory, remove it
    $success= DeleteDirectory($item);
  }
  else
  {                             # what is it?
    $success= $false;
  }

  return $success;
}


# DeleteDirectory
#
# Remove an item
#
sub DeleteDirectory
{
  my $directory= shift;         # the directory to remove
  my $success= $true;           # result condition

  foreach my $item (ListDirectory($directory))
  {                             # delete each item inside the directory first
    $success= Delete("$directory/$item")
      or last;
  }  

  $success= rmdir($directory)
    or warn "Failed to remove directory <$directory>: $!\n"
    if $success;

  return $success;
}


# CleanUp
#
# Reset attributes of artificially affected items
#
sub CleanUp
{
  my $snapshot= shift;          # a pointer to a snapshot listing hash
  my $reset= shift;             # items to reset during clean-up
  my $firstDirectory= shift;    # path to the first directory
  my $secondDirectory= shift;   # path to the second directory
  my $originalMtime= 0;         # preserved, original modification time
  my $now= time();              # note current time
  
  print "\nCleaning up:\n" unless $quiet;

  foreach my $item (reverse sort (keys %$reset))
  {                             # start from the depth of the tree and work up
    print "\t[rset] <$item>\n" unless $quiet;

    $originalMtime= (unpack("N$indices", ${$snapshot}{$item}))[$sigMtime];
    if (${$reset}{$item} && $ofFirst)
    {                           # update the first hierarchy item
      printf("\t\tUpdating modification time of <$firstDirectory$item>\n",
             "\t\t\tto [%08x]\n", $originalMtime) if $debug;
      utime($now, $originalMtime, $firstDirectory.$item)
        or warn "Failed to reset time stamps on <$firstDirectory$item>\n";
    }
    if (${$reset}{$item} && $ofSecond)
    {                           # update the second hierarchy item
      printf("\t\tUpdating modification time of <$secondDirectory$item>\n",
             "\t\t\tto [%08x]\n", $originalMtime) if $debug;
      utime($now, $originalMtime, $secondDirectory.$item)
        or warn "Failed to reset time stamps on <$secondDirectory$item>\n";
    }
    print "\n" if $debug;       # pretty formatting
  }
}


# ClearTmpStorage
#
# Remove temporary files
#
sub ClearTmpStorage
{
  for (@_)
  {                             # clear each file specified
    print "Removing temporary storage file <$_>\n" if $debug;
    unlink($_)
      or warn "Cound not remove temporary storage file <$_>: $!\n";
  }
}


# TrackingAdvice
#
# Dump tracking hints and scuttle
#
sub TrackingAdvice
{
  my $directory= shift;         # the directory without a snapshot

  die "\nCould not access a saved snapshot for <$directory>\n",
  "Changes may not be accurately tracked --\n",
  "Re-run $me in \"master\" or \"snapshot\" mode.\n",
  "\n",
  "Usage: $me --master <master_directory> <target_directory>\n",
  "Usage: $me --snapshot <snapshot_directory>\n",
  "\n\n",
  "Consult documentation or run $me in \"help\" mode for more information.\n",
  "\n",
  "Usage: $me --help\n",
  "\n";
}


# Usage
#
# Dump usage hints and scuttle
#
sub Usage
{
  die "\nYou did not invoke $me properly!\n",
  "\n",
  "Usage: $me <first_directory> <second_directory>\n",
  "Usage: $me --master <master_directory> <target_directory>\n",
  "Usage: $me --snapshot <snapshot_directory>\n",
  "Usage: $me --help\n",
  "\n";
}


# Help
#
# Display terse help information
#
sub Help
{
  print "\nWithin Help...\n" if $debug;

  system("perldoc $me")
    and die "Problems accessing perldoc... Try the following line at the prompt:\n",
    "perldoc $me";
}


# Version
#
# Display terse version information
#
sub Version
{
  print "\nWithin Version...\n" if $debug;

  print "\n$versionString\n\n";
}

#
# Terse help
#

__END__
=pod
=head1 NAME

dsync -- synchronize contents of two directories

=head1 SYNOPSIS

B<dsync> [B<-n>] [B<-q> | B<-d>] [B<-x> I<pathname>]... I<pathname> I<pathname>

B<dsync> [B<-n>] [B<-q> | B<-d>] [B<-x> I<pathname>]... [B<-s> I<pathname>]

B<dsync> [B<-n>] [B<-q> | B<-d>] [B<-x> I<pathname>]... [B<-m> I<pathname>] I<pathname>

B<dsync> B<-h>

B<dsync> B<-v>

(See the OPTIONS section for alternate option syntax with long option names.)

=head1 DESCRIPTION

B<dsync> synchronizes contents of two directories. B<dsync> remembers
state between invocations and tries to track and reconcile changes
between successive runs and between independent synchronizations
of multiple targets (local and backup, local and remote, etc.).

While B<dsync> will normally consider bidirectional changes,
it may also mirror one directory to another if a master directory
is specified. Such may be used for backups or to force conflict
resolution.

Alternatively, B<dsync> may be invoked to simply update state of a given
directory in order to reset state from a previous run or
to update a given directory to current.

Options

=over 4

=item B<-n>, B<--simulation>

Merely print what will be done, but don't actually do it; 
directories and state snapshots remain unaffected.

=item B<-q>, B<--quiet>

Suppress all but the most critical output.

=item B<-d>, B<--debug>

Describe state and actions at appropriate points during execution.

=item B<-x> I<pathname>, B<--ignore> I<pathname>

Ignore indicated item during processing. Repeat this option to list
multiple items.

=item B<-r>, B<--remote>

Hint that a directory being synchronized is on a remote volume
and that not all file attributes are accurate or accessible.
Forgo some attribute related operations but still reflect them
in the saved snapshot.

=item B<-l> I<pathname>, B<--lax> I<pathname>

Option to ignore any attribute changes on the secondary side.
This behavior may be useful when backing up to foreign file systems
as attrbiutes such as permissions, ownership, time, and type
may not be accurately preserved. Only changes in size of files
on the secondary side matter.
This option is more extreme than B<--remote>

=item B<-s> I<pathname>, B<--snapshot> I<pathname>

Take a snapshot of the indicated directory.

=item B<-m> I<pathname>, B<--master> I<pathname>

Treat the indicated directory as the master copy and mirror its
content in the secondary directory.

=item B<-v>, B<-V>, B<--version>

Print version information and exit.

=item B<-h>, B<-H>, B<--help>

Print this terse manual and exit.

=back

=head1 EXAMPLE

To synchronize a local Documents directory and a remotely mounted
analog, invoke via:

C<sudo dsync ~user/Documents /Volumes/user/Documents>

assuming that the user has identically named accounts on both machines
and that the user's remote home directory was mounted locally.

=head1 FILES

=over 4

=item .dsync.db

Berkeley DB Hash file used to store states of files between executions.

=back

=head1 REQUIRES

Perl 5.6, Getopt::Long, Fcntl, POSIX, DB_File, File::Basename, MacOSX::File,
MacOSX::File::Copy, MacOSX::File::Info

=head1 SEE ALSO

perl(1)

=head1 BUGS

=over 4

This is an early release; it has not been tested comprehensively.

Send bug reports, questions, and requests to dsync@ayradyss.org.

Mac OS X obfuscates some file attributes and prevents updates
to volumes mounted via AFP. Dsync requires the B<--remote> hint in order
to ignore fallacious attributes and to skip operations that would
otherwise fail. A future version may support a special "snapshot
restore" mode that will allow Dsync to properly reset attributes
when invoked locally after a remote synchronization run.

=back

=head1 AUTHOR

Igor S. Livshits <dsync@ayradyss.org>

=head1 COPYRIGHT

Copyright (C) 2006 Igor S. Livshits

Use and distribute this tool as per the Artistic License

=cut