summaryrefslogtreecommitdiff
path: root/cad/src/tools/FindPythonGlobals.py
blob: 0e1a88c81d5734cd6e85bed055d7a2f2b8464b84 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/env python

# Copyright 2007 Nanorex, Inc.  See LICENSE file for details.
"""
  FindPythonGlobals.py

  This is a standalone program for static analysis of python source
  files.  For each file given as an argument, it writes a list of all
  of the global symbols defined by the file, each accompanied by the
  name of the file.  The resulting list can be sorted and used to
  locate a source file given a symbol name.  Symbols which are defined
  in multiple files can be identified as well.

  The program relies on the internal python parser.  As such, it must
  be targeted to a specific version of python.  The initial code was
  written for the python2.5 parser.  Even a small change to the python
  grammar could change the way symbols in the parse tree need to be
  handled.  Turning on the verbose flag prints the complete parse tree
  of the input file, which can be used to determine how to search the
  tree for global definitions.

  To find global symbols, the search routine looks at top level
  statements, and inside of if, for, while, and try statements,
  recursively.  Within these contexts, it looks for def and class
  statements (which it does not look inside of).  It also looks for
  expressions of the form "NAME = ...".

  Note: this will not find globals defined in assignments with
  multiple =, like "a = b = False".  The global b would be missed
  in this case.

  @author: Eric Messick
  @version: $Id$
  @copyright: 2007 Nanorex, Inc.
  @license: GPL
"""

import sys
import string
import parser
import symbol
import token

verbose = False
#verbose = True
"""Set to true to print entire parse tree."""

s_fileName = ""
"""File currently being parsed."""

traverseTheseTokens = [
    symbol.encoding_decl,
    symbol.file_input,
    symbol.stmt,
    symbol.simple_stmt,
    symbol.small_stmt,
    symbol.compound_stmt,
    symbol.testlist,
    symbol.test,
    symbol.or_test,
    symbol.and_test,
    symbol.not_test,
    symbol.comparison,
    symbol.expr,
    symbol.xor_expr,
    symbol.and_expr,
    symbol.shift_expr,
    symbol.arith_expr,
    symbol.term,
    symbol.factor,
    symbol.power,
    ]
"""
   Parse tree nodes of these types should have their contents searched
   when they are encountered.  These are the intervening nodes that
   show up between the top level node in the parse tree (file_input)
   and the actual definition nodes that we're looking for.  They
   mostly describe components of expressions.
"""

suiteTokens = [
    symbol.if_stmt,
    symbol.for_stmt,
    symbol.while_stmt,
    symbol.try_stmt,
    ]
"""
   Parse tree nodes which contain suite nodes, each of which we wish
   to traverse.  These are generally the introducers of compound
   statements.
"""

def printToken(tok, level):
    """
       Print the symbolic name for a token or symbol from a python
       parse tree.

       @param tok: token to be printed.
       @type tok: integer, values defined in symbol or token modules

       @param level: recursion depth, specified as a string of spaces.
       @type level: string
    """
    if (token.ISTERMINAL(tok)):
        print level + token.tok_name[tok]
    else:
        print level + symbol.sym_name[tok]

def printParseTree(tree, level):
    """
       Print the whole parse tree beneath the current node.

       @param tree: the top level node being printed.
       @type tree: list, first elements of which are integer tokens

       @param level: recursion depth, specified as a string of spaces.
       @type level: string
    """
    printToken(tree[0], level)
    for element in tree[1:]:
        if (isinstance(element, list)):
            printParseTree(element, level + ".")
        else:
            print level + repr(element)
#    print level + "end"

def lookAt(tree):
    """
       Visit a node in the parse tree looking for a global symbol
       definition.

       @param tree: the parse node being visited.
       @type tree: list, first elements of which are integer tokens
    """
    nodeType = tree[0]
    if (verbose):
        printToken(nodeType, "looking at: ")

    # Look inside nodes of these types.
    if (nodeType in traverseTheseTokens):
        for element in tree[1:]:
            lookAt(element)

    # Look inside of assignment statements (name = ...).
    elif (nodeType == symbol.expr_stmt):
        if (len(tree) > 2):
            if (tree[2][0] == token.EQUAL):
                lookAt(tree[1])

    # Look inside suites of statements within block introducers like
    # if and while.
    elif (nodeType in suiteTokens):
        for outerElement in tree[1:]:
            if (outerElement[0] == symbol.suite):
                for suiteElement in outerElement[1:]:
                    if (suiteElement[0] == symbol.stmt):
                        lookAt(suiteElement)

    # What we're actually looking for: def, class, and atoms.
    elif (nodeType == symbol.funcdef):
        functionName = tree[2]
        print functionName[1] + ": " + s_fileName
    elif (nodeType == symbol.classdef):
        className = tree[2]
        print className[1] + ": " + s_fileName
    elif (nodeType == symbol.atom):
        atomName = tree[1]
        if (atomName[0] == token.NAME):
            print atomName[1] + ": " + s_fileName

    elif (verbose):
        printToken(nodeType, "ignoring: ")


def parsePythonFile(filename):
    """
       Read a python source file and print the global symbols defined
       in it.

       The named file is opened and read.  Newlines are canonicalized.
       The whole file is handed to the internal python parser, which
       generates an Abstract Syntax Tree.  This is converted into a
       list form which is then scanned by lookAt() for global symbol
       definitions.

       @param filename: name of a python source file.
       @type filename: string
    """
    file = open(filename)
    codeString = file.read()
    codeString = string.replace(codeString, "\r\n", "\n")
    codeString = string.replace(codeString, "\r", "\n")
    if codeString and codeString[-1] != '\n' :
        codeString = codeString + '\n'
#    print "file: %s" % codeString
    file.close()
    try:
        ast = parser.suite(codeString)
    except SyntaxError:
        return
    parseTree = parser.ast2list(ast)
    if (verbose):
        printParseTree(parseTree, "")
    lookAt(parseTree)

if (__name__ == '__main__'):
    for s_fileName in sys.argv[1:]:
#        print >>sys.stderr, "processing " + s_fileName
        parsePythonFile(s_fileName)