verzia 1.1, 2005/04/17 16:31:07 |
verzia 1.2, 2005/05/24 11:55:46 |
|
|
#!/usr/bin/perl |
#!/usr/bin/perl -w |
|
|
# |
# |
# hardlink-files.pl - create hardlinks from duplicate files |
# hardlink-files.pl - create hardlinks from duplicate files |
|
|
# 2005-04-17 - created |
# 2005-04-17 - created |
# |
# |
|
|
# $Platon$ |
# $Platon: scripts/perl/filesystem/hardlink-files.pl,v 1.1 2005/04/17 16:31:07 rajo Exp $ |
|
|
use strict; |
use strict; |
#use Data::Dumper; |
use File::Find; |
|
use Cwd qw( abs_path ); |
|
use Digest::MD5; |
|
#use Digest::SHA1; |
|
use Data::Dumper; |
|
|
$| = 1; |
$| = 1; |
|
|
my $md5binary; |
use vars qw(*file *dir *prune); |
|
*file = *File::Find::name; |
|
*dir = *File::Find::dir; |
|
*prune = *File::Find::prune; |
|
|
|
my $md5binary; |
my $md5sums = { |
my $md5sums = { |
}; |
}; |
|
my $cache = { |
|
}; |
|
|
if ($^O eq 'linux') { |
sub wanted() |
$md5binary = 'md5sum'; |
{ # {{{ |
} |
my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks); |
elsif ($^O eq 'freebsd') { |
|
$md5binary = 'md5 -r'; |
if (-f $file) { |
} |
($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = lstat($file); |
else { |
|
die "Unsupported platform, please, specify path to md5sum binary manualy and send patch."; |
open(FILE, $file) or die "Can't open file '$file': $!"; |
} |
binmode(FILE); |
|
my $md5 = Digest::MD5->new->addfile(*FILE)->hexdigest; |
|
push @{$md5sums->{$md5}}, { |
|
filename => abs_path($file), |
|
dir => $dir, |
|
dev => $dev, |
|
ino => $ino, |
|
mode => $mode, |
|
nlink => $nlink, |
|
uid => $uid, |
|
gid => $gid, |
|
rdev => $rdev, |
|
size => $size, |
|
atime => $atime, |
|
mtime => $mtime, |
|
ctime => $ctime, |
|
blksize => $blksize, |
|
blocks => $blocks, |
|
}; |
|
close(FILE); |
|
} |
|
} # }}} |
|
|
|
# |
|
# Load cache |
|
# |
|
foreach my $xdir (@ARGV) { |
|
if (-f "$xdir/.hardlink-cache") { |
|
|
foreach my $path (@ARGV) { |
|
open(FIND, "find $path -type f -exec $md5binary {} \\; |"); |
|
while (my $line = <FIND>) { |
|
chomp $line; |
|
if ($line =~ m/^([^\s]+)\s+(.*)$/g) { # Linux format |
|
my ($md5, $filename) = ($1, $2); |
|
push @{$md5sums->{$md5}}, $filename; |
|
} |
|
} |
} |
close(FIND); |
|
} |
} |
|
|
|
find({ |
|
wanted => \&wanted, |
|
bydepth => 0, |
|
no_chdir => 1, |
|
}, @ARGV); |
|
|
|
#print Dumper($md5sums); |
|
|
print scalar(keys %$md5sums), " unique files found\n"; |
print scalar(keys %$md5sums), " unique files found\n"; |
|
|
foreach my $key (keys %$md5sums) { |
foreach my $key (keys %$md5sums) { |
my $arr = $md5sums->{$key}; |
my $arr = $md5sums->{$key}; |
if (scalar(@$arr) > 1) { |
if (scalar(@$arr) > 1) { |
my $source = $arr->[0]; |
my $source = $arr->[0]->{filename}; |
print "$source <--- "; |
my $inum = $arr->[0]->{ino}; |
|
print $source; |
for (my $i = 1; $i < scalar(@$arr); $i++) { |
for (my $i = 1; $i < scalar(@$arr); $i++) { |
print $arr->[$i]; |
print "\n\t<-- ", $arr->[$i]->{filename}; |
unlink $arr->[$i] or warn "\n\nunlink error: $!\n"; |
if ($arr->[$i]->{ino} == $inum) { |
link $source, $arr->[$i] or warn "\n\nlink error: $!\n"; |
print " [already linked]"; |
|
} |
|
else { |
|
unlink $arr->[$i]->{filename} or warn "\n\nunlink error: $!\n"; |
|
link $source, $arr->[$i]->{filename} or warn "\n\nlink error: $!\n"; |
|
} |
} |
} |
print "\n\n"; |
print "\n\n"; |
} |
} |