Getting the SHA-1 (or MD5) hash of a directory
Allows you to "fingerprint" a directory and tell if anything has changed in it
By
definition a cryptographic hash is, "a deterministic procedure that takes an arbitrary block of data and returns a fixed-size bit string, the (cryptographic) hash value, such that an accidental or intentional change to the data will change the hash value".
Usually these hashes are used on files to "fingerprint" them, but in order to do the same to a directory you have to do something like this:
# http://akiscode.com/articles/sha-1directoryhash.shtml
# Copyright (c) 2009 Stephen Akiki
# MIT License (Means you can do whatever you want with this)
# See http://www.opensource.org/licenses/mit-license.php
# Error Codes:
# -1 -> Directory does not exist
# -2 -> General error (see stack traceback)
def GetHashofDirs(directory, verbose=0):
import hashlib, os
SHAhash = hashlib.sha1()
if not os.path.exists (directory):
return -1
try:
for root, dirs, files in os.walk(directory):
for names in files:
if verbose == 1:
print 'Hashing', names
filepath = os.path.join(root,names)
try:
f1 = open(filepath, 'rb')
except:
# You can't open the file for some reason
f1.close()
continue
while 1:
# Read file in as little chunks
buf = f1.read(4096)
if not buf : break
SHAhash.update(hashlib.sha1(buf).hexdigest())
f1.close()
except:
import traceback
# Print the stack traceback
traceback.print_exc()
return -2
return SHAhash.hexdigest()
print GetHashofDirs('My Documents', 1)