#!/usr/bin/python -u # diffinfo - replacement for diffstat which provides more detailed # information about the patch. Also, it can filter and re-write # patches based on various criteria. # # pcomp - patch compare - program to compare two patches and report # differences between them. # # The files and hunks in the patch can be filtered by: # - arbitrary regex match # (useful for C variables or structs, or for CONFIG_VARS) # - filename pattern match # # This is kind of grep-like feature, which is useful # for analyzing a large patch. # # This program only works with unified-style diffs. # (patches made using diff with the -u option) # # This tool was originally written to help split large patches # into several standalone fragments. # # Author: Tim Bird # Copyright (C) 2003 Sony Electronics, Inc. # # TODO: # - ability to write non-matching parts to a file # - this can be done now, using inverse flags and 2 passes # - support multiple filenames (loop over args) # - check for intersection between two patches # - compare hunks for same file, and look for overlapping regions # - support context-style (-c) diffs # - handle "Only in" files better # # CHANGELOG # version 1.02 - skip blank (empty) lines # version 1.03 - skip some lines found in '--git' diffs # fix some bugs in pcomp - it kind-a works at the patch-stat level major_version = 1 minor_version = 0 revision = 3 import os import sys import getopt import string import re import copy # try to compile with psyco, if present try: import psyco #psyco.log("/tmp/psyco.log") psyco.full() except: pass def vprint(message): global verbose # if verbose is not present in this module, lookup verbose # in the main module namespace try: verbose=verbose except: import __main__ verbose = __main__.__dict__['verbose'] if verbose: print message def dprint(message): global debug # if debug is not present in this module, lookup debug # in the main module namespace try: debug=debug except: import __main__ debug = __main__.__dict__['debug'] if debug: print message def perror(message): sys.stderr.write(message+"\n") # # Note that --debug is an undocumented option, which emits # debug information during processing. # def diffinfo_usage(progname): print """Usage: %s [options] [] Reads , and produces information about the number of lines changed and number of hunks for each file referenced. It can produce a list of hunks matching a regular expression, and can optionally write out only those hunks to a new patch file. This allows you to extract sub-patches from a single large patch. If no file is specified on the command line, the program reads the patch from stdin. -h, --help Show this usage help. -v, --verbose Show verbose output (including hunk details) -f, --force Force continuing even if errors are encountered -d Show info in 'diffstat' format (without histogram) -p Show info for files matching path -q Specify NOT (inverse) operation of path match. -s Show info for pattern string -a Specify AND operation with a second pattern match -o Specify OR operation with a second pattern match -i Specify NOT (inverse) operation of pattern match -n Write matching hunks to new patch If is "-", then output to stdout. Note: This writes the entire patch unless used with a matching option (-s or -p). -l show a simple list of files -V, --version Print the version number """ % os.path.basename(progname) sys.exit(1) # class for holding file header information, and a list of hunks class fileinfo: def __init__(self, filename, header): self.filename = filename self.header = header self.hunks = [] def write_header(self,file): for line in self.header: file.write(line) # class for holding information about a single hunk class hunkinfo: def __init__(self, filename, range, id): self.filename = filename self.id = id self.pluses = 0 self.minuses = 0 self.configvars = 0 self.lines = [range] # separate out the range information rparts = range.split() self.range = rparts[1:3] def add_line(self, line): self.lines.append(line) def write_lines(self, file): for line in self.lines: file.write(line) def line_match(self, pat): for line in self.lines: if (line.startswith("-") or line.startswith("+")) and \ pat.findall(line): return 1 return 0 # Find filename in header (either 3-line or 2-line) # header looks like this: #diff -u -ruN linux-2.4.20.orig/CREDITS celf-030822.orig/CREDITS #--- linux-2.4.20.orig/CREDITS Thu Nov 28 15:53:08 2002 #+++ celf-030822.orig/CREDITS Fri Aug 22 16:20:11 2003 # # or it may look like this, for one-file patches #--- testfile Mon Oct 13 10:55:14 2003 #+++ testfile2 Mon Oct 13 10:55:04 2003 # # or it may look like this, when ??? flags are used with diff: #Index: testfile #============================================================================ #--- testfile Mon Oct 13 10:55:14 2003 #+++ testfile2 Mon Oct 13 10:55:04 2003 # # or it may look like this, when produced with --git: #diff --git a/ipc/msg.c b/ipc/msg.c #index fbf7570..2cd9342 100644 #--- a/kernel/Makefile #+++ b/kernel/Makefile # # or it may look like this, when produced with --git: #diff ---git a/include/asm-arm/ltt.h b/include/asm-arm/ltt.h #new file mode 100644 #index 0000000..cab8582 #--- /dev/null #+++ b/include/asm-arm26/ltt.h def extract_filename(file_header): # find file path on last line, in 2nd token position # (with tokens separated by whitespace) file_line = file_header[-1] file_path = file_line.split()[1] # remove first directory of file path # FIXTHIS - should remove all common prefixes, like diffstat file_path_parts = file_path.split("/") if len(file_path_parts)>1: del(file_path_parts[0]) file_name = string.join(file_path_parts,"/") return(file_name) # function used to set correct ending for plural words # (return an "s" if number is not 1) def s(x): if x==1: return "" else: return "s" # this routine is intended to output results in exactly the same format as # diffstat, except for the histogram. def show_diffstat_results(files, hunks): filenames = files.keys() filenames.sort() # find longest filename and use for format string width = 0 for filename in filenames: if width 1: # show info for each hunk i = 1 for hunk in file_info.hunks: print " hunk %5d: %5d+ %5d- @ %s" % \ (i, hunk.pluses, hunk.minuses, hunk.range[1]) i += 1 print " %d file%s changed, %d insertion%s(+), %d deletion%s(-), %d hunk%s" % \ (tf,s(tf),tp,s(tp),tm,s(tm),th,s(th)) def show_files_only(files, pat, hunks, hunk_matches, path_matches): if pat: # find the set of files with matching hunks fileset = {} for hunk_id in hunk_matches.keys(): hunk = hunk_matches[hunk_id] fileset[hunk.filename] = 1 filenames = fileset.keys() else: filenames = files.keys() filenames.sort() # find longest filename and use for format string for filename in filenames: # filter using path_matches if path_matches and not path_matches.has_key(filename): continue print filename def show_pat_results(files, hunks, hunk_matches, path_matches): # find the set of files with matching hunks fileset = {} for hunk_id in hunk_matches.keys(): hunk = hunk_matches[hunk_id] fileset[hunk.filename] = 1 filenames = fileset.keys() filenames.sort() # find longest filename and use for format string width = 0 for filename in filenames: # filter using path_matches if path_matches and not path_matches.has_key(filename): continue if width " % line) dprint("len(line)=%d" % len(line)) dprint("in_header=%s" % in_header) is_change = 0 if in_header: file_header.append(line) # ignore --git lines in the hunk header if line.startswith("new file mode"): continue if line.startswith("index "): continue # count down through header lines if in_header > 1: in_header -= 1 continue else: cur_filename = extract_filename(file_header) dprint("filename=%s" % cur_filename) cur_file = fileinfo(cur_filename, file_header) files[cur_filename] = cur_file # check filename for path match if is_path_match(cur_filename, path_pat_str, path_inverse): dprint("path_match") path_matches[cur_filename] = cur_file in_header = 0 continue if line.startswith("diff "): dprint("found new file header (3-line)") file_header = [line] # in_header has lines remaining in header in_header = 2 in_patch = 1 continue if line.startswith("Index: "): dprint("found new file header (4-line)") file_header = [line] # in_header has lines remaining in header in_header = 3 in_patch = 1 continue if line.startswith("--- "): dprint("found new file header (2-line)") file_header = [line] in_header = 1 in_patch = 1 continue # skip everything until the first header is found if not in_patch: continue if line.startswith("@@ "): dprint("found new hunk") # do pattern matching on old hunk, if it exists if cur_hunk and pat: if is_match(cur_hunk, pat, pat_operator, pat2, pat_inverse): hunk_matches[hunk_id] = cur_hunk hunk_id += 1 cur_hunk = hunkinfo(cur_filename, line, hunk_id) cur_file.hunks.append(cur_hunk) hunks[cur_filename+line] = cur_hunk continue # check for special diff strings # No newline at end of file if line.startswith("\ No newline"): continue # end of patch if line=="_\n": continue # FIXTHIS - doesn't handle "Only in" line, # like following: #Only in celf-030822.orig/net/ipv6: anycast.c if line.startswith("Only in"): if not force: perror('Error: Missing data:') perror(line[:-1]) perror("Please use diff '-N' option to create the patch with full file contents") perror("or use the --force option to force operation despite this error.") sys.exit(1) else: perror("Missing data:") perror(line[:-1]) perror("Please use diff '-N' option to create the patch with full file contents\n") continue # skip blank lines if len(line)==1: continue # # should look for source file or dir, do 'wc' and specially # denote it. if line.startswith("+"): cur_hunk.pluses += 1 is_change = 1 if line.startswith("-"): cur_hunk.minuses += 1 is_change = 1 cur_hunk.add_line(line) # FIXTHIS - scan lines for config_var matches # if we're writing a new patch, but # no pattern is specified, # treat every line in every hunk as a match if new_filename and not pat: hunk_matches[hunk_id] = cur_hunk # catch stuff we don't recognize if not is_change and not line.startswith(" ") and not \ (line.startswith("Binary files") and \ line.endswith("differ\n")): perror("Error: unrecognized patch element") perror("line=%s." % line) # don't quit on this error -- just continue # do pattern matching on last hunk, if it exists if cur_hunk and pat: if is_match(cur_hunk, pat, pat_operator, pat2, pat_inverse): hunk_matches[hunk_id] = cur_hunk # we're done with input patch_file.close() return (files, hunks, hunk_matches, path_matches) def diffinfo_main(): global verbose, debug # parse command line args try: opts, args = getopt.getopt(sys.argv[1:], "hvfdc:p:qs:aoin:lV",["help", "force", "verbose", "debug","version"]) except getopt.GetoptError: # print help information and exit diffinfo_usage(sys.argv[0]) force = 0 verbose = 0 debug = 0 diffstat_format = 0 files_only = 0 config_var = "" path_pat_str = "" path_inverse = 0 pat = None pat_str = "" pat2 = None pat_operator = "AND" pat_inverse = 0 new_filename = "" # handle options for o,a in opts: if o in ("-h", "--help"): diffinfo_usage(sys.argv[0]) if o in ("-v", "--verbose"): verbose = 1 if o == "--debug": debug = 1 if o == "-d": diffstat_format = 1 if o == "-c": config_var = a if o in ("-f", "--force"): force = 1 if o == "-p": path_pat_str = a if o == "-s": if not pat_str: pat_str = a pat = re.compile(pat_str) else: pat_str2 = a pat2 = re.compile(pat_str2) # reform pat_str for display pat_str += " "+ pat_operator+" "+pat_str2 if o == "-a": pat_operator = "AND" if o == "-o": pat_operator = "OR" if o == "-i": pat_inverse = 1 if o == "-q": path_inverse = 1 if o == "-n": new_filename = a if o == "-l": files_only = 1 if o in ["-V","--version"]: print "diffinfo version %d.%d.%d" % (major_version, minor_version, revision) sys.exit(1) # if no argument, then read patch from stdin if len(args)<1: patch_file = sys.stdin else: # otherwise, gather the data from the named file # FIXTHIS - should loop through input files, if more than # one is specified try: patch_file = open(args[0], "r") except IOError, arg: print "Error - can't open patch file: %s" % args[0] print arg.strerror sys.exit(1) (files, hunks, hunk_matches, path_matches) = \ read_patch(patch_file, path_pat_str, path_inverse, \ pat, pat_operator, pat2, pat_inverse, new_filename) # path_matches is used as a boolean to indicate that # we tried to match based on path_pat_str if path_pat_str: path_matches[1] = 1 if files_only: show_files_only(files, pat, hunks, hunk_matches, path_matches) sys.exit(0) # show results if pat: print "files matching pattern: '%s'" % pat_str show_pat_results(files, hunks, hunk_matches, path_matches) else: if diffstat_format: show_diffstat_results(files, hunks) else: show_results(files, hunks, path_matches) # try writing a new patch, if requested. if new_filename: write_new_patch(new_filename, files, hunk_matches, path_matches) def sum_info(hunks): # add up data for the hunks p = 0 m = 0 # assume file is not new newflag="|" for hunk in hunks: p += hunk.pluses m += hunk.minuses # see if file is new if hunk.range[0]=="-0,0": newflag="N" c = len(hunks) return (p,m,newflag,c) def show_hunk_intersection(filename, info1, info2, format): global verbose, debug #FIXTHIS - need to finish this # get data for each file (p1,m1,newflag1,c1) = sum_info(info1.hunks) (p2,m2,newflag2,c2) = sum_info(info2.hunks) # if all aggregates are the same, assume hunks are same if p1==p2 and m1==m2 and newflag1==newflag2 and c1==c2: # FIXTHIS - should really compare hunks line-by-line here return(0,0) print "-"+format % (filename, newflag1, p1, m1), print "(%d hunk%s)" % (c1, s(c1)) print "+"+format % (filename, newflag2, p2, m2), print "(%d hunk%s)" % (c2, s(c2)) return (1,1) # show the intersection of two patches def show_intersection(files1, hunks1, files2, hunks2): global verbose files = copy.deepcopy(files1) files.update(files2) filenames = files.keys() filenames.sort() # find longest filename and use for format string width = 0 for filename in filenames: if width Compares 2 patch files and shows the difference between them. -h, --help Show this usage help. -v, --verbose Show verbose output (including hunk details) -V, --version Print the version number """ % os.path.basename(progname) sys.exit(1) def pcomp_main(): global verbose, debug # parse command line args try: opts, args = getopt.getopt(sys.argv[1:], "hvV",["help", "verbose", "debug","version"]) except getopt.GetoptError: # print help information and exit diffinfo_usage(sys.argv[0]) verbose = 0 debug = 0 # handle options for o,a in opts: if o in ("-h", "--help"): pcomp_usage(sys.argv[0]) if o in ("-v", "--verbose"): verbose = 1 if o == "--debug": debug = 1 if o in ("-f", "--force"): force = 1 if o in ["-V","--version"]: print "pcomp version %d.%d" % (major_version, minor_version) sys.exit(1) # gather the data for the first patch if len(args)<2: print "Error - you must specify 2 files to compare" pcomp_usage(sys.argv[0]) filename1 = args[0] filename2 = args[1] try: patch_file1 = open(filename1, "r") except IOError, arg: print "Error - can't open patch file: %s" % filename1 print arg.strerror sys.exit(1) try: patch_file2 = open(filename2, "r") except IOError, arg: print "Error - can't open patch file: %s" % filename2 print arg.strerror sys.exit(1) (files1, hunks1, hunk_matches, path_matches) = \ read_patch(patch_file1, "", 0, None, "", None, 0, "") (files2, hunks2, hunk_matches, path_matches) = \ read_patch(patch_file2, "", 0, None, "", None, 0, "") # FIXTHIS - should write results to cache file here to save # time on subsequent invocations print "Intersection:" show_intersection(files1, hunks1, files2, hunks2) if __name__=="__main__": progname = os.path.basename(sys.argv[0]) if progname=="diffinfo": diffinfo_main() sys.exit(0) if progname=="pcomp": pcomp_main() sys.exit(0) print "diffinfo program: unknown argv0 command - %s" % progname sys.exit(1)