删除目录中重复的文件

清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>

require 'find'
require 'digest/md5'

uniqueFileTable = Hash.new
sameFileTable = Hash.new

#puts ARGV[0]
directory = ARGV[0]#.encode("UTF-8")

def have_same?(table, file)
	size = File.size(file)
	if table.has_value?(size) then
		buf = Digest::MD5.hexdigest open(table.key(size),"rb"){|f| f.read}
		bufAnother = Digest::MD5.hexdigest open(file,"rb"){|f| f.read}
		if buf == bufAnother then
			return table.key(size)  
		end
	end
	return nil
end

Find.find(directory) do |file|
	if File.file?(file) then
		originFile = have_same?(uniqueFileTable, file)
		if originFile != nil then
			sameFileTable[file] = originFile
		else
			uniqueFileTable[file] = File.size(file)
		end
	end

end

=begin
puts "\nuniqueFileTable:"
uniqueFileTable.each do |fileName, size|
  puts "%s : %d" % [fileName, size]
end
=end

if sameFileTable.length > 0
	puts "\nsameFileTable:"
	sameFileTable.each do |fileName, fileName2|
		puts "%s : %s" % [fileName, fileName2]
	end

	puts "Remove All #{sameFileTable.length} reduplicate Files?(y = yes to all, c = confirm, n = do nothing)"
	userInput = STDIN.gets.chomp

	if userInput == "y"
		sameFileTable.each_key do |fileName|
			File.delete(fileName)
		end
	elsif userInput == "c"
		puts "For each confirm : y = yes, a = all, n = no."
		userInputInner = ""
		sameFileTable.each_key do |fileName|
			if userInputInner == "a"
				File.delete(fileName)
				next
			end

			puts "Delete? #{fileName}"

			while true
				userInputInner = STDIN.gets.chomp
				if userInputInner == "y" || userInputInner == "a"
					File.delete(fileName)
					break
				elsif userInputInner == "n"
					break
				end
			end
		end
	end

else
	puts "No Same File."
end