#!/usr/bin/python # # qdiff - Perform "quick diff" between two directories # # Author: Tim Bird # # Copyright 2004,2007 Sony Electronics, Inc. # # This program is a wrapper for regular diff, that # avoids calling diff for files that have the same size # and datestamp in the two directories. This # speeds up diffs when the new directory was copied from # the old (with timestamps preserved), and only has a # few modifications in it. # # This program also has an option ("--synctimes") to # make the timestamp on the files the same if the # file contents are the same between the two directories. # # BUGS: # - scans and compares don't work if directories specified # on the command line don't have trailing slashes # - diff is messed up if more than one level of directory # is specified (eg 'qdiff dir/subdir dir2/subdir' results # in call to diff with 'dir/subdir/subdir' import sys import os import getopt import commands import re import string def usage(): print """Usage: qdiff [options] Options are either processed locally by qdiff or passed directly to diff. Locally-processed options are: -v Be verbose -h, --help Show this usage help --synctimes Synchronize the timestamp on files if they are identical between old_dir and new_dir -X Process exclude patterns in the specified file, the same way that diff would. -x Process exclude pattern the same way that diff would. All other options are passed to diff without interpretation. qdiff does not handle any diff options which take arguments, besides -X. An example qdiff command line would look like this: qdiff -X ~/dontdiff -pruN linux-2.6.9.orig/ new-dir/ """ sys.exit(1) def error(message): sys.stderr.write(message+"\n") def strip_leading_dir(filename): # there should always be a leading directory - so this shouldn't fail (junk, filename) = filename.split('/',1) return filename def add_exclude(exclude_list, pat): # change to python wildcard syntax pat = re.sub("[.]","[.]", pat) pat = re.sub("[*]",".*", pat) pat = re.sub("[?]",".?", pat) pat1 = "^"+pat+"$" #pat1 = pat patc1 = re.compile(pat1) exclude_list.append((patc1,pat1)) def is_excluded(exclude_list, pathname): match = 0 #print "checking file: ", pathname for (patc, pat) in exclude_list: #print " against pattern", pat if patc.match(pathname): match = 1 break return match # this routine is used to avoid processing the # the exclude patterns for every single file # most files are .c and .h files. # (Can't use this since there are many .c and .h files in dontdiff) def is_included(pathname): if pathname[-2:]==".c" or pathname[-2:]==".h": return 1 else: return 0 # this routine recursively walks the indicated directory, # and compiles a list of non-excluded files in the current # directory and sub-directories def scan_dir(exclude_list, dir): cur_list = os.listdir(dir) cur_list.sort() result = [] for item in cur_list: path_item = os.path.join(dir, item) #print "examining %s" % path_item #if not is_included(item) and is_excluded(exclude_list, item): if is_excluded(exclude_list, item): #print "excluding item", item continue if os.path.isdir(path_item): #print "scanning subdir" sub_list = scan_dir(exclude_list, path_item) for sub in sub_list: result.append(sub) continue result.append(path_item) return result # find all files, not matching expressions in ~/dontdiff, which have # different size and datestamp between the two directories def get_file_list(exclude_pat_file, ex_pats, old_dir, new_dir): global verbose exclude_list=[] if exclude_pat_file: # read exclude pattern file try: ex_pat_lines = open(exclude_pat_file).readlines() except: error("Error opening exclude pattern file %s\n" % exclude_pat_file) sys.exit(1) else: for pat in ex_pat_lines: # remove trailing \n pat = pat[:-1] if pat: add_exclude(exclude_list, pat) # add individually specified patterns for pat in ex_pats: add_exclude(exclude_list, pat) if verbose: error("Scanning directory %s" % new_dir) new_list = scan_dir(exclude_list, new_dir) if verbose: error("Scanning directory %s" % old_dir) old_list = scan_dir(exclude_list, old_dir) if verbose: error("Building full file list...") full_list = [] new_map = {} for filename in new_list: filename = strip_leading_dir(filename) full_list.append(filename) new_map[filename]=1 for filename in old_list: filename = strip_leading_dir(filename) # add filenames that are not already there if not new_map.has_key(filename): full_list.append(filename) # sort the list after adding some full_list.sort() if verbose: error("Excluding similar files...") short_list = [] for filename in full_list: # omit file if size and timestamp match # get size and timestamps try: old_file = old_dir+filename old_time = os.path.getmtime(old_file) old_size = os.path.getsize(old_file) new_file = new_dir+filename new_time = os.path.getmtime(new_file) new_size = os.path.getsize(new_file) except: # if file is missing in either newdir or olddir, # add it to the list (and let diff examine this case) short_list.append(filename) continue # if times or sizes don't match, add filename to list if (old_time != new_time) or (old_size != new_size): #print filename #print "old time=", old_time, "new_time=", new_time #print "old size=", old_size, "new_size=", new_size short_list.append(filename) #print "Omitting same attr filename:", filename return short_list def main(): global verbose # process command-line arguments. diff_args = [] my_args = [] save_next_X = 0 save_next_x = 0 ex_pat_file = "" ex_pats=[] verbose = 0 do_synctimes = 0 # if arg starts with a dash, use it as arg with diff, unmodified # take special note of -X dontdiff argument for arg in sys.argv[1:]: if arg=="-v": verbose = 1 continue if arg=="--synctimes": do_synctimes = 1 continue if save_next_X: diff_args.append(arg) ex_pat_file = arg save_next_X = 0 continue if save_next_x: diff_args.append(arg) ex_pats.append(arg) save_next_x = 0 continue if arg[0]=='-': diff_args.append(arg) if arg=='-X': save_next_X = 1 # get exclude pattern file if arg=='-x': save_next_x = 1 # get exclude pattern # FIXTHIS - should add more options from diff that # take arguments here continue my_args.append(arg) try: old_dir = my_args[0] new_dir = my_args[1] except: error("Error: directory argument missing on command line") usage() # make sure dir paths end in '/' if os.path.isdir(old_dir) and not old_dir.endswith('/'): old_dir += '/' if os.path.isdir(new_dir) and not new_dir.endswith('/'): new_dir += '/' file_list = get_file_list(ex_pat_file, ex_pats, old_dir, new_dir) if verbose: error("=== List of files to diff: ===") for file in file_list: error(" "+file) error("======") diff_arg_str = string.join(diff_args, " ") if old_dir[-1] != "/": old_dir = old_dir+"/" if new_dir[-1] != "/": new_dir = new_dir+"/" if do_synctimes: synctimes(old_dir, new_dir, file_list) sys.exit(0) # call diff for each file in the file_list identicals = 0 for file in file_list: command = "diff %s %s%s %s%s" % \ ( diff_arg_str, old_dir, file, new_dir, file) (status, output) = commands.getstatusoutput(command) if status==0: # files were identical, skip this one identicals = identicals + 1 continue # output the result from diff, just like a recursive diff would print command print output # report identical files (which may indicate a need for a # synctimes operation). if verbose and identicals: error("") error("Note: There are %d identical files with non-matching timestamps." % identicals) error("Use the --synctimes option to fix this.") # make new_file's access and modification time the same as old_file's def sync_time(old_file, new_file): global verbose old_atime = os.path.getatime(old_file) old_mtime = os.path.getmtime(old_file) if verbose: error("Synchronizing time on file: %s" % new_file) os.utime(new_file, (old_atime, old_mtime)) # go through the file list, looking for identical content # if files are same, synchronize their modification and access times. def synctimes(old_dir, new_dir, file_list): global verbose for file in file_list: command = "diff %s%s %s%s" % \ ( old_dir, file, new_dir, file) (status, output) = commands.getstatusoutput(command) if status==0: # files are identical # make the times identical (use the old time) sync_time(old_dir+file, new_dir+file) if __name__=="__main__": if len(sys.argv)==1 or sys.argv[1]=="-h" or sys.argv[1]=="--help": usage() main()