1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
|
#!/usr/bin/env python
# Copyright 2007 Nanorex, Inc. See LICENSE file for details.
"""
simplify-reloads - executable python script to remove reload_once lines
$Id$
This is a standalone script (which depends on some code in cad/src)
to remove the first two lines from three-line groups that look like this:
import MODULE
reload_once(MODULE)
from MODULE import SYMBOLS
The MODULE might be a simple or dotted module name (e.g. Overlay or exprs.Overlay).
Usage: ./simplify-reloads file1.py file2.py ...
saves the modified file1.py into FIXED-file1.py, same for file2.py, etc,
but if there were no changes removes FIXED-file1.py instead,
with prints to stderr summarizing the changes or having other debug info.
(no prints to stdout)
"""
__author__ = "bruce"
# algorithm: match individual lines to these patterns (assume no continuation lines need to be handled).
# find groups of three lines which match the three patterns in order, using the same Names, and modify them.
DEBUG = True # various debug prints to stderr
DEBUG_REJECTS = False # debug print for each match failure
DEBUG_MATCHES = False # debug print for each match success
DEBUG_JUST_PRINT_THE_TOKENS = False # if True, just print the tokens nicely, to stdout, and do nothing else.
REALLY_REMOVE_FILES = True # call os.remove on output files we might write anew (whether or not we actually do so)
# (only matters if not OVERWRITE_ORIGINAL_FILES)
OVERWRITE_ORIGINAL_FILES = True # remove original files and replace them with modified ones (when they would differ)
# TODO: really this should be a command-line option, default false,
# but this is a one-use script, so now that it's debugged I won't bother.
from tokenize import generate_tokens
from tokenize import tok_name # dictionary, token (int, used in generate_tokens retval) to its name (e.g. 'NEWLINE')
import sys, os
if __name__ == '__main__':
thisdir = os.path.dirname(__file__)
## if thisdir != '.':
## print >> sys.stderr, "running in directory %r" % (thisdir,)
cad_src_dir = os.path.join( thisdir, "../..") # location of cad/src directory
sys.path.append( cad_src_dir )
# ==
# ISSUE: in parse_utils, IGNORED_TOKNAMES = ('NL', 'COMMENT') -- they might be bad here -- make them a parameter of TokenType? ###TODO
from parse_utils import parse_top, Op, TokenType, Seq, Name, Newline, Optional
def invert_dict(dict1): #bruce 050705 (modified from version in bond_constants.py, should refile)
res = {}
for key, val in dict1.items():
res[val] = key
assert len(res) == len(dict1), "dict passed to invert_dict had nonunique values"
return res
_toktype_from_name = invert_dict(tok_name)
def token_with_given_name(name):
return _toktype_from_name[name]
NewlineToken = token_with_given_name('NEWLINE') # fyi: the lines of interest to us all end in this, not NL
def SpecificName( name_string):
return TokenType('NAME', (lambda token, name_string = name_string: token == name_string) )
def Keyword( keyword_string):
# note that generate_tokens doesn't know which names are Python keywords, so we can't do this:
## return TokenType('KEYWORD', (lambda token, keyword_string = keyword_string: token == keyword_string) )
return SpecificName( keyword_string)
IMPORT = Keyword('import')
FROM = Keyword('from')
RELOAD_ONCE = SpecificName('reload_once')
LeftParen = Op('(')
RightParen = Op(')')
ModuleName = Seq( Name, Optional( Seq( Op('.'), Name )))
# note: this matches module names containing 0 or 1 dot;
# if we need to match more dots
# we could make it recursive using ForwardDef, or just lengthen it.
def describe_logical_line_tokens(line_tokens):
assert line_tokens[0][-1] == line_tokens[-1][-1]
return line_tokens[0][-1] # their common logical line
def tok_srow( token):
toktype, tokstring, (srow, scol), (erow, ecol), line = token
return srow # WARNING: first line is srow 1 but is index 0!
def prepend_to_basename( prefix, filename):
dir, file = os.path.split(filename)
return os.path.join( dir, prefix + file)
class _linepat:
"""
A pattern for one logical line, containing a ModuleName which we can return if it matches.
"""
def __init__(self, pattern, namepos = None, debugname = None, incomplete_ok = False):
assert namepos is not None
assert debugname is not None
self.pattern = pattern
self.namepos = namepos
self.debugname = debugname
self.incomplete_ok = incomplete_ok # ok to match just the first part of the line?
return
def match_to_line_tokens(self, line_tokens):
"""
line_tokens is a list of tokens (which are tuples as returned by generate_tokens),
corresponding to a single logical line (i.e. all with the same last element);
return None if we don't match it,
or the matched ModuleName within it if we do (NOT as a single string,
but as a nested list of strings that needs flattening -- should fix someday).
"""
pat = self.pattern
res, newrest = parse_top( pat, line_tokens)
if newrest is None:
# note: this is how parse_top returns a parse failure or syntax error
# (with res being a string error message)
if DEBUG_REJECTS:
why = res
print >> sys.stderr, "fyi: %r rejecting line %r because %s" % (self, describe_logical_line_tokens(line_tokens), why)
return None
if not self.incomplete_ok:
if newrest:
# not all of the logical line was matched
if DEBUG_REJECTS:
print >> sys.stderr, "fyi: %r rejecting line %r due to incomplete match" % \
(self, describe_logical_line_tokens(line_tokens))
return None
if DEBUG_MATCHES:
print >> sys.stderr, "fyi: %r matchline internal res before namepos: %r" % (self, res,)
res1 = res[self.namepos]
### TODO: join it into a proper string -- not needed yet, we only compare it for equality,
# and it looks like ['exprs', ['.', 'Rect']] which will compare fine.
if DEBUG_MATCHES:
print >> sys.stderr, "fyi: %r match returning %r" % (self, res1)
return res1
def __repr__(self):
return "<%s %r at %#x>" % (self.__class__.__name__, self.debugname, id(self))
pass
# Line patterns
# note: whitespace is implicitly skipped by generate_tokens
Line1 = _linepat(
Seq( IMPORT, ModuleName, Newline ),
namepos = 1,
debugname = "Line1"
)
Line2 = _linepat(
Seq( RELOAD_ONCE, LeftParen, ModuleName, RightParen, Newline),
namepos = 2,
debugname = "Line2"
)
Line3 = _linepat(
Seq( FROM, ModuleName, IMPORT), # no Newline since we don't care what comes next on that line
namepos = 1,
debugname = "Line3",
incomplete_ok = True
)
# ==
def process_filename(filename):
print >> sys.stderr, "processing file %r" % filename
# always print this, so user knows which file any error messages are about
file = open(filename, 'rU')
if OVERWRITE_ORIGINAL_FILES:
res = process_file(file, filename, overwrite = True) # this closes file itself, before removing/rewriting it
else:
res = process_file(file, prepend_to_basename("FIXED-", filename))
file.close()
### TODO:
## if res:
## print >> sys.stderr, "moving fixed file over original one, backing up original"
## # or just let svn diff/revert serve to "back up original"?
return
def process_file(file, output_filename, overwrite = False):
"""
Remove some lines from file and save the result into a new file of the given name,
unless no lines would be removed,
in which case just remove the output file if it was present (and if not overwrite).
Overwrite option revises behavior for when output_filename refers to the same file as file:
don't remove original file then, just close it and rewrite it if it changes.
Return a boolean saying whether you removed any lines and left an output file.
"""
if not overwrite:
if os.path.isfile(output_filename):
if REALLY_REMOVE_FILES:
print >> sys.stderr, "REMOVING", output_filename
os.remove(output_filename) # if this fails, let the exception abort the script
else:
print >> sys.stderr, "SHOULD REMOVE (unless we remake it)", output_filename
gentok = generate_tokens(file.readline)
# a generator of all tokens in the file
if DEBUG_JUST_PRINT_THE_TOKENS:
last_logical_line = None
for token in gentok:
toktype, tokstring, (srow, scol), (erow, ecol), logical_line = token
if logical_line != last_logical_line:
print "\nlogical line: [%s]" % (logical_line,) # not to stderr
# note: apparent bug in generate_tokens:
# after a multiline string literal,
# logical_line can be '"""' for the ending NEWLINE token.
last_logical_line = logical_line
print "\n%s %r (%d %d) (%d %d)" % (tok_name[toktype], tokstring, srow, scol, erow, ecol) # not to stderr
return
# now split them by logical lines (tok[-1] fields)
# TODO: rewrite to use "groupby"
donelines = [] # a growing list of logical lines, each being a list of token tuples
currentline = [] # current logical line (a list of token tuples)
last_logical_line = None
for token in gentok:
toktype, tokstring, (srow, scol), (erow, ecol), logical_line = token
del toktype, tokstring, srow, scol, erow, ecol
if logical_line != last_logical_line:
if currentline:
donelines.append(currentline)
currentline = []
last_logical_line = logical_line
currentline.append(token)
continue
if currentline:
donelines.append(currentline)
currentline = []
del currentline
lines = donelines
del donelines
if DEBUG:
print >> sys.stderr, "got %d logical lines" % len(lines)
# now do the matching alg on the lines
deletes = [] # list of indices of logical lines to delete (only the first of each pair)
i = 0
while i < len(lines) - 2:
m1 = Line1.match_to_line_tokens( lines[i+0] )
if m1:
m2 = Line2.match_to_line_tokens( lines[i+1] )
if m2:
m3 = Line3.match_to_line_tokens( lines[i+2] )
if m3:
if m1 == m2 == m3:
deletes.append(i)
i += 2 # avoid overlapping matches
else:
print >> sys.stderr, "warning: rejected only due to names not the same:", i, m1,m2,m3
i += 1
continue
if deletes:
print >> sys.stderr, "found %d linepairs to delete" % len(deletes)
file.seek(0) # rewind the file
textlines = file.readlines()
# WARNING: these are physical lines!
# So their numbering doesn't match logical lines,
# even if the specific lines to be deleted contain no continuations.
# We have to use the srow/erow token fields instead, to know what physical textlines to delete.
if DEBUG:
print >> sys.stderr, "file has %d physical lines" % len(textlines)
oldlen = len(textlines)
for i in deletes[::-1]: # process in reverse order, to not mess up line numbering as we modify textlines
logical1 = lines[i][0] # first token to delete
logical2 = lines[i+1][-1] # last token to delete
dline1 = tok_srow(logical1) # first physical line number to delete
dline2 = tok_srow(logical2) # last physical line number to delete
# turn line numbers (starting 1) into indices (starting 0)
dline1 -= 1
dline2 -= 1
drange = range(dline1, dline2+1) # inclusive range of physical line indices to delete
if DEBUG:
if i == deletes[::-1][0]: # print it only for the first one
print >> sys.stderr, "\nwill delete these lines:\n"
for lineno in drange:
print >> sys.stderr, ">>>", textlines[lineno], ### sanity check
print >> sys.stderr
for lineno in drange[::-1]:
del textlines[lineno]
newlen1 = len(textlines)
# also delete any lines that exactly match "from exprs.reload import reload_once"
count1 = count2 = 0
for i in range(len(textlines))[::-1]:
if textlines[i].strip() == "from exprs.reload import reload_once":
del textlines[i]
count1 += 1
if 0 < i < len(textlines) - 1:
if not textlines[i-1].strip() and not textlines[i].strip():
# two blank lines are now adjacent; delete one of them
# (the one at index i, which a moment ago had index i+1)
del textlines[i]
count2 += 1
if count1:
print >> sys.stderr, "also deleted %d imports of reload_once and %d subsequent blank lines" % (count1, count2)
else:
assert not count2
# now open the output file and print the lines we're not deleting to the output file
# use output_filename as name of new file
if overwrite:
file.close()
os.remove(output_filename) # the same file, in this case
output = open(output_filename, "w")
output.writelines(textlines)
output.close()
newlen = len(textlines)
print >> sys.stderr, "wrote %d lines to %r" % (newlen, output_filename)
assert oldlen - 2 * len(deletes) == newlen1
assert newlen1 - count1 - count2 == newlen
pass
else:
print >> sys.stderr, "no linepairs to delete"
print >> sys.stderr
return not not deletes # from process_file
# ==
if __name__ == '__main__':
for filename in sys.argv[1:]:
process_filename(filename)
if DEBUG:
print >> sys.stderr, "done"
# end
|