summaryrefslogtreecommitdiff
path: root/cad/src/tools/Refactoring/indent-diff.gawk
blob: 1ce45f053895dce656de9358ba8e10c1f8231bf0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env gawk -f
#
# indent-diff.gawk -- Filter indentation changes from context diffs.
#    Only diff line groups with indentation changes are printed.
#
#   Indentation is significant for control structures in Python code.
#   On matching change lines (starting with a "!" in context diffs),
#   if the indentation is different, the indentation count is inserted
#   after the "! " to highlight the change.  e.g:
#
#      ***************
#      *** 33,35 ****
#                    draw_vane( bond, a1py, a2py, ord_pi_y, rad, col)
#      !  8      if ord_pi_z:
#                    draw_vane( bond, a1pz, a2pz, ord_pi_z, rad, col) 
#      --- 33,35 ----
#                    draw_vane( bond, a1py, a2py, ord_pi_y, rad, col)
#      ! 12          if ord_pi_z:
#                    draw_vane( bond, a1pz, a2pz, ord_pi_z, rad, col) 
#
#   Note: Tab characters go to 8-space tab stops, as Python assumes.

BEGIN {
    debug = 0 ## 1
    # Minimum indentation difference to report.
    mindiff = 2
}

# Capture the file header lines.
/^\*\*\* [a-zA-Z]/ {
    if ( entry ) doGroup(); entry = 0;
    ofile = $0;
    didHdr = 0;
    if ( debug ) print "Starting", ofile;
    next;
}
/^\-\-\- [a-zA-Z]/ { nfile = $0; next; }

# Collect and process diff line groups.
# Line group separator.
/^\*\*\*\*\*\*/ {
    if ( entry ) doGroup();
    entry = 1; oline = 1; nline = 0;
}
# Separator between old and new line sections.
/^--- [1-9]/ { nline = 1; }
# Store lines.
{ if ( nline ) nlines[nline++] = $0; else olines[oline++] = $0; next; }
END { if ( entry ) doGroup(); }

# Process a diff line group within a file diff.
function doGroup() {
    # Compare the indentation on old and new lines in a line group.
    nonmatches = 0;
    for ( o = n = 1; o < oline && n < nline; o++ ) {
        osig = sig( ol = olines[o] );
        # Ignore old, non-change ("!") lines.
        if ( substr(ol, 1, 1) != "!" ) continue;
        # Look for a corresponding new change line.
	nonmatched = 1;
        oind = indLen(ol);
        for ( ; n < nline; n++ ) {
            nsig = sig( nl = nlines[n] );
	    ##printf "sigs\n  %s|%s\n  %s|%s\n", osig, ol, nsig, nl;
            # Ignore new, non-change ("!") lines.
            if ( substr(nl, 1, 1) != "!" ) continue;
            if ( nsig != osig ) {
	        if ( debug ) printf "different signatures\n  %s\n  %s\n", ol, nl;
	    } else {
                # Matching signatures, compare indentation.
                nind = indLen(nl);
                if ( nind <= (oind + mindiff) && nind >= (oind - mindiff)  ) {
		    if ( debug ) \
			printf "%s %d/%d\n  %s\n  %s\n", \
			    "matched sigs & indentation", oind, nind, ol, nl;
		    nonmatched = 0;
		}
		else {
		    # Non-match: Insert indentation lengths to show where.
		    olines[o] = substr(olines[o], 1, 2) sprintf("%2d", oind) \
				substr(olines[o], 5);
		    nlines[n] = substr(nlines[n], 1, 2) sprintf("%2d", nind) \
				substr(nlines[n], 5);
		    if ( debug ) \
			printf "Different indentation %d/%d\n  %s\n  %s\n", \
			    oind, nind, olines[o], nlines[n]
		}
		n++;   # Matched signatures, go on to next line.
	        break; # Out of the new-line loop.
            }
	}
	nonmatches += nonmatched
    }

    # Print line groups with indentation that isn't known to match.
    if ( nonmatches ) {
        if ( ! didHdr ) { print ofile; print nfile; didHdr = 1; }
	for ( o = 1; o < oline; o++ ) print olines[o];
	for ( n = 1; n < nline; n++ ) print nlines[n];
    }
}

# Use the first two words on the line as a signature.
function sig(line) {
    notWord = "[^a-zA-Z0-9_]+";
    word = "([a-zA-Z0-9_]+)";
    twoWords = "^" notWord word notWord word ".*";
    oneWord = "^" notWord word ".*";
    ret = gensub(twoWords, "\\1 \\2", 1, line);

    # There may not be two words on the line, or any.
    if ( ret == line ) ret = gensub(oneWord, "\\1", 1, line);
    if ( ret == line ) ret = ""

    ##print "sig", match(line, pat), ret
    return ret;
}
    
# The length of indentation on a line in a context diff entry.
function indLen(line) {
    # Whitespace from the beginning of the line.
    # Skip the first two characters, which are prefixed by diff.
    ws = gensub("..([ \t]*).*", "\\1", 1, line);
    ##print "wslen=", length(ws), " ws='" ws "'"

    # Convert tabs to spaces using 8-space tab stops, as Python assumes.
    spaces = 0
    for (i = 1; i <= length(ws); i++) {
        if (substr(ws, i, 1) == " ") spaces++;
	else spaces += 8 - spaces % 8;
	##print spaces;
    }

    return spaces;
}