#!/usr/bin/perl -w
#
# hardlink-files.pl - create hardlinks from duplicate files
#
# Tested on Linux and FreeBSD.
#
# Developed by Lubomir Host 'rajo' <rajo AT platon.sk>
# Copyright (c) 2005 Platon SDG, http://platon.sk/
# Licensed under terms of GNU General Public License.
# All rights reserved.
#
# Changelog:
# 2005-04-17 - created
#
# $Platon: scripts/perl/filesystem/hardlink-files.pl,v 1.2 2005/05/24 11:55:46 rajo Exp $
use strict;
use File::Find;
use Cwd qw( abs_path );
use Digest::MD5;
#use Digest::SHA1;
use Data::Dumper;
$| = 1;
use vars qw(*file *dir *prune);
*file = *File::Find::name;
*dir = *File::Find::dir;
*prune = *File::Find::prune;
my $md5binary;
my $md5sums = {
};
my $cache = {
};
sub wanted()
{ # {{{
my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks);
if (-f $file) {
($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks) = lstat($file);
open(FILE, $file) or die "Can't open file '$file': $!";
binmode(FILE);
my $md5 = Digest::MD5->new->addfile(*FILE)->hexdigest;
push @{$md5sums->{$dev}->{$md5}}, {
filename => abs_path($file),
dir => $dir,
dev => $dev,
ino => $ino,
mode => $mode,
nlink => $nlink,
uid => $uid,
gid => $gid,
rdev => $rdev,
size => $size,
atime => $atime,
mtime => $mtime,
ctime => $ctime,
blksize => $blksize,
blocks => $blocks,
};
close(FILE);
}
} # }}}
#
# Load cache
#
foreach my $xdir (@ARGV) {
if (-f "$xdir/.hardlink-cache") {
}
}
find({
wanted => \&wanted,
bydepth => 0,
no_chdir => 1,
}, @ARGV);
#print Dumper($md5sums);
my $hashes;
foreach my $dev (keys %$md5sums) {
foreach my $key (keys %{$md5sums->{$dev}}) {
$hashes->{$key} = 1;
}
}
print scalar(keys %$hashes), " unique files found\n";
foreach my $dev (keys %$md5sums) {
foreach my $key (keys %{$md5sums->{$dev}}) {
my $arr = $md5sums->{$dev}->{$key};
if (scalar(@$arr) > 1) {
my $source = $arr->[0]->{filename};
my $inum = $arr->[0]->{ino};
print $source;
for (my $i = 1; $i < scalar(@$arr); $i++) {
print "\n\t<-- ", $arr->[$i]->{filename};
if ($arr->[$i]->{ino} == $inum) {
print " [already linked]";
}
else {
unlink $arr->[$i]->{filename} or warn "\n\nunlink error: $!\n";
link $source, $arr->[$i]->{filename} or warn "\n\nlink error: $!\n";
}
}
print "\n\n";
}
}
}
#print Dumper($md5sums);
# vim: ts=4
# vim600: fdm=marker fdl=0 fdc=3
Platon Group <platon@platon.sk> http://platon.sk/
|