#!/usr/bin/env gawk -f
#
# indent-diff.gawk -- Filter indentation changes from context diffs.
#    Only diff line groups with indentation changes are printed.
#
#   Indentation is significant for control structures in Python code.
#   On matching change lines (starting with a "!" in context diffs),
#   if the indentation is different, the indentation count is inserted
#   after the "! " to highlight the change.  e.g:
#
#      ***************
#      *** 33,35 ****
#                    draw_vane( bond, a1py, a2py, ord_pi_y, rad, col)
#      !  8      if ord_pi_z:
#                    draw_vane( bond, a1pz, a2pz, ord_pi_z, rad, col) 
#      --- 33,35 ----
#                    draw_vane( bond, a1py, a2py, ord_pi_y, rad, col)
#      ! 12          if ord_pi_z:
#                    draw_vane( bond, a1pz, a2pz, ord_pi_z, rad, col) 
#
#   Note: Tab characters go to 8-space tab stops, as Python assumes.

BEGIN {
    debug = 0 ## 1
    # Minimum indentation difference to report.
    mindiff = 2
}

# Capture the file header lines.
/^\*\*\* [a-zA-Z]/ {
    if ( entry ) doGroup(); entry = 0;
    ofile = $0;
    didHdr = 0;
    if ( debug ) print "Starting", ofile;
    next;
}
/^\-\-\- [a-zA-Z]/ { nfile = $0; next; }

# Collect and process diff line groups.
# Line group separator.
/^\*\*\*\*\*\*/ {
    if ( entry ) doGroup();
    entry = 1; oline = 1; nline = 0;
}
# Separator between old and new line sections.
/^--- [1-9]/ { nline = 1; }
# Store lines.
{ if ( nline ) nlines[nline++] = $0; else olines[oline++] = $0; next; }
END { if ( entry ) doGroup(); }

# Process a diff line group within a file diff.
function doGroup() {
    # Compare the indentation on old and new lines in a line group.
    nonmatches = 0;
    for ( o = n = 1; o < oline && n < nline; o++ ) {
        osig = sig( ol = olines[o] );
        # Ignore old, non-change ("!") lines.
        if ( substr(ol, 1, 1) != "!" ) continue;
        # Look for a corresponding new change line.
	nonmatched = 1;
        oind = indLen(ol);
        for ( ; n < nline; n++ ) {
            nsig = sig( nl = nlines[n] );
	    ##printf "sigs\n  %s|%s\n  %s|%s\n", osig, ol, nsig, nl;
            # Ignore new, non-change ("!") lines.
            if ( substr(nl, 1, 1) != "!" ) continue;
            if ( nsig != osig ) {
	        if ( debug ) printf "different signatures\n  %s\n  %s\n", ol, nl;
	    } else {
                # Matching signatures, compare indentation.
                nind = indLen(nl);
                if ( nind <= (oind + mindiff) && nind >= (oind - mindiff)  ) {
		    if ( debug ) \
			printf "%s %d/%d\n  %s\n  %s\n", \
			    "matched sigs & indentation", oind, nind, ol, nl;
		    nonmatched = 0;
		}
		else {
		    # Non-match: Insert indentation lengths to show where.
		    olines[o] = substr(olines[o], 1, 2) sprintf("%2d", oind) \
				substr(olines[o], 5);
		    nlines[n] = substr(nlines[n], 1, 2) sprintf("%2d", nind) \
				substr(nlines[n], 5);
		    if ( debug ) \
			printf "Different indentation %d/%d\n  %s\n  %s\n", \
			    oind, nind, olines[o], nlines[n]
		}
		n++;   # Matched signatures, go on to next line.
	        break; # Out of the new-line loop.
            }
	}
	nonmatches += nonmatched
    }

    # Print line groups with indentation that isn't known to match.
    if ( nonmatches ) {
        if ( ! didHdr ) { print ofile; print nfile; didHdr = 1; }
	for ( o = 1; o < oline; o++ ) print olines[o];
	for ( n = 1; n < nline; n++ ) print nlines[n];
    }
}

# Use the first two words on the line as a signature.
function sig(line) {
    notWord = "[^a-zA-Z0-9_]+";
    word = "([a-zA-Z0-9_]+)";
    twoWords = "^" notWord word notWord word ".*";
    oneWord = "^" notWord word ".*";
    ret = gensub(twoWords, "\\1 \\2", 1, line);

    # There may not be two words on the line, or any.
    if ( ret == line ) ret = gensub(oneWord, "\\1", 1, line);
    if ( ret == line ) ret = ""

    ##print "sig", match(line, pat), ret
    return ret;
}
    
# The length of indentation on a line in a context diff entry.
function indLen(line) {
    # Whitespace from the beginning of the line.
    # Skip the first two characters, which are prefixed by diff.
    ws = gensub("..([ \t]*).*", "\\1", 1, line);
    ##print "wslen=", length(ws), " ws='" ws "'"

    # Convert tabs to spaces using 8-space tab stops, as Python assumes.
    spaces = 0
    for (i = 1; i <= length(ws); i++) {
        if (substr(ws, i, 1) == " ") spaces++;
	else spaces += 8 - spaces % 8;
	##print spaces;
    }

    return spaces;
}