summaryrefslogtreecommitdiff
path: root/cad/src/utilities/Comparison.py
blob: b601e1432de16f8e6686a64d32e2e7c1da513ae6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
# Copyright 2005-2009 Nanorex, Inc.  See LICENSE file for details.
"""
utilities/Comparison.py - provides same_vals, for correct equality comparison.
See also state_utils.py, which contains the closely related copy_val.

@author: Bruce
@version: $Id$
@copyright: 2005-2009 Nanorex, Inc.  See LICENSE file for details.

History:

same_vals was written as part of state_utils.py [bruce]

moved same_vals into utilities/Comparison.py to break an import cycle
[ericm 071005]

moved SAMEVALS_SPEEDUP and "import samevals" along with it
(but left the associated files in cad/src, namely samevals.c [by wware],
setup2.py [now in outtakes], and part of Makefile) [bruce 071005]
"""

from types import InstanceType

_haveNumeric = True # might be modified below

try:
    from Numeric import array, PyObject
except:
    # this gets warned about in state_utils
    _haveNumeric = False

_haveNumpy = True # might be modified below

try:
    import numpy
    numpy.ndarray # make sure this exists
except:
    print "fyi: python same_vals can't import numpy.ndarray, won't handle it" ###
    _haveNumpy = False

import foundation.env as env

_debug_same_vals = False #bruce 060419; relates to bug 1869

SAMEVALS_SPEEDUP = True
    # If true, try to use the C extension version in samevals.c
    # [which is not yet fully correct, IIRC -- bruce 071005 comment];
    # will be set to False if "import samevals" fails below.

    # Note: samevals.c [by wware] is still built and resides in cad/src,
    # not cad/src/utilities, as of 071005, but that should not
    # prevent import samevals from working here. If samevals.c is moved
    # here into utilities/, then setup2.py [now in outtakes, nevermind]
    # and part of Makefile need to be moved along with it. [bruce 071005 comment]
    #bruce 080403 update: samevals.c has been replaced by samevals.pyx
    # and samevalshelp.c, built by Makefile, all still at toplevel.

if SAMEVALS_SPEEDUP:
    try:
        # If we're using the samevals extension, we need to tell the
        # extension what a Numeric array looks like, since the symbol
        # PyArray_Type was not available at link time when we built
        # the extension. [wware]

        from samevals import setArrayType
        import Numeric
        setArrayType(type(Numeric.array((1,2,3))))
        print "SAMEVALS_SPEEDUP is True, and import samevals succeeded"
    except ImportError:
        # Note: this error could be from importing samevals
        # (an optional dll built from samevals.c) or Numeric.
        # If the latter, it was avoidable using _haveNumeric,
        # but I don't know whether samevals.c permits use when
        # setArrayType was never called, so I'll let this code
        # continue to disable SAMEVALS_SPEEDUP in either case.
        # [bruce 071005]
        print "samevals.so/dll or Numeric not available, not using SAMEVALS_SPEEDUP"
        SAMEVALS_SPEEDUP = False

# ==

def same_vals(v1, v2): #060303
    """
    (Note: there is a C version of this method which is normally used by NE1.
     It has the same name as this method and overwrites this one
     due to an assignment near the end of this method's source file.
     This method is the reference version, coded in Python.
     This version is used by some developers who don't build the C version
     for some reason.)

    Efficiently scan v1 and v2 in parallel to determine whether they're the
    same, for purposes of undoable state or saved state.

    Note: the only reason we really need this method (as opposed to just
    using Python '==' or '!=' and our own __eq__ methods)
    is because Numeric.array.__eq__ is erroneously defined, and if we were
    using '==' or '!=' on a Python tuple containing  a Numeric array,
    we'd have no way of preventing this issue from making '==' or '!='
    come out wrong on the tuple.

    (For details, see bruce email to 'all' of 060302, partially included below.)

    It turns out that you can safely naively use != on Numeric arrays,
    but not ==, since they both act elementwise, and this only
    does what you usually want with != . I knew this in the past
    (and fixed some weird bugs caused by it) but forgot it recently,
    so Undo was thinking that atom position arrays had not changed
    provided at least one coordinate of one atom had not changed.

    [But note that you can't use either '==' or '!=' on tuples that might
    contain Numeric arrays, since either way, Python uses '==' on the
    tuple elements.]

    In particular:

    a = Numeric.array((1, 2, 3))
    b = Numeric.array((1, 2, 3))
    assert a == b                  # result: [1 1 1], interpreted as True
    assert not a != b              # result: [0 0 0], interpreted as False
    b = Numeric.array((1, 4, 5))
    assert a != b                  # result: [1 0 0], interpreted as True
    assert not a == b              # result: [0 1 1], interpreted as True
    # the last assertion fails!

    Do the maintainers of Numeric consider this to be correct
    behavior?!?!?!?  Probably.

    What they should have done was define a new ufunc for equality
    testing, and made the semantics of __eq__ and __ne__ work as
    expected.  Probably too late to expect them to change this now.

    As long as we have it, we might as well make it a bit more stringent
    than Python '==' in other ways too, like not imitating the behaviors
    (which are good for '==') of 1.0 == 1, array([1]) == [1], etc.
    The only reason we'll count non-identical objects as equal is that
    we're not interested in their addresses or in whether someone
    will change one of them and not the other (for whole objects or for
    their parts).

    ###doc for InstanceType... note that we get what we want by using
    __eq__ for the most part...
    """

    if v1 is v2:
        # Optimization:
        # this will happen in practice when whole undoable attrvals are
        # immutable (so that we're comparing originals, not different copies),
        # therefore it's probably common enough to optimize for.
        # It's just as well we're not doing it in the recursive helper,
        # since it would probably slow us down when done at every level.
        # [060303 11pm]
        return True
    try:
        _same_vals_helper(v1, v2)
    except _NotTheSame:
        if _debug_same_vals and not (v1 != v2):
            print "debug_same_vals: " \
                  "same_vals says False but 'not !=' says True, for", v1, v2
                # happens for bug 1869 (even though it's fixed;
                # cause is understood)
        return False
    if _debug_same_vals and (v1 != v2):
        print "debug_same_vals: " \
              "same_vals says True but '!=' also says True, for", v1, v2
              ##@@ remove when pattern seen
    return True

class _NotTheSame(Exception):
    pass

def _same_list_helper(v1, v2):
    n = len(v1)
    if n != len(v2):
        raise _NotTheSame
    for i in xrange(n):
        _same_vals_helper(v1[i], v2[i])
    return

_same_tuple_helper = _same_list_helper

def _same_dict_helper(v1, v2):
    if len(v1) != len(v2):
        raise _NotTheSame
    for key, val1 in v1.iteritems():
        if not v2.has_key(key):
            raise _NotTheSame
        _same_vals_helper(val1, v2[key])
    # if we get this far, no need to check for extra keys in v2,
    # since lengths were the same
    return

# implem/design discussion
# [very old; slightly clarified, bruce 090205; see new summary below]:
#
# Choice 1:
# no need for _same_InstanceType_helper; we set up all (old-style) classes
# so that their __eq__ method is good enough; this only works if we assume
# that any container-like instances (which compare their parts) are ones we
# wrote, so they don't use == on Numeric arrays, and don't use == or != on
# general values.
#
# Choice 2:
# on naive objects, we just require id(v1) == id(v2).
# Downside: legitimate data-like classes by others, with proper __eq__
# methods, will compare different when they should be same.
# Upside: if those classes have Numeric parts and compare them with ==,
# that's a bug, which we'll avoid.
# Note that if it's only our own classes which run, and if they have no bugs,
# then it makes no difference which choice we use.
#
### UNDECIDED. For now, doing nothing is equivalent to Choice 1.
# but note that choice 2 is probably safer.
# in fact, if I do that, i'd no longer need _eq_id_mixin just due to StateMixin.
# (only when __getattr__ and someone calls '==') [060303]
#
# Update 060306: some objects will need _s_same_as(self, other) different from
# __eq__, since __eq__ *might* want to compare some components with !=
# (like int and float) rather than be as strict as same_vals.
# Even __eq__ needs to try to avoid the "Numeric array in list" bug,
# which in some cases will force it to also call same_vals,
# but when types are known it's plausible that it won't have to,
# so the distinct methods might be needed.
# When we first need _s_same_as, that will force use of a new
# _same_InstanceType_helper func. Do we need it before then? Not sure.
# Maybe not; need to define __eq__ better in GAMESS Jig (bug 1616) but
# _s_same_as can probably be the same method. OTOH should we let DataMixin
# be the thing that makes _s_same_as default to __eq__?? ###
######@@@@@@
#
# update, bruce 060419, after thinking about bug 1869
# (complaint about different bonds with same key):
# - The Bond object needs to use id for sameness, in Undo diffs at least
#  (only caller of same_vals?) (but can't use id for __eq__ yet).
#   - Q: What is it about Bond that decides that -- Bond? StateMixin? not DataMixin?
#     A: The fact that scan_children treats it as a "child object",
#     not as a data object (see obj_is_data method).
#     That's what makes Undo change attrs in it, which only makes sense if
#     Undo treats refs to it (in values of other attrs, which it's diffing)
#     as the same iff their id is same.
# Conclusion: we need to use the same criterion in same_vals, via a new
# _same_InstanceType_helper -- *not* (only) a new method
# [later: I guess I meant the '_s_same_as' method -- nim, discussed only here]
# as suggested above and in a comment I added to bug 1869 report.
# For now, we don't need the new method at all.
#
# ==
#
# Update, bruce 090205: reviewing the above, there are only the following cases
# where same_vals needs to disagree with '==', given that we are free to define
# proper __eq__/__ne__ methods in our own code:
#
# * Numeric arrays (due to the design flaw in their __eq__ method semantics)
#
# * Python data objects which might contain Numeric arrays
#   (i.e. list, dict, tuple -- also 'set', if we start using that in model
#    state -- we don't do so yet, since it's not supported in our minimum
#    supported Python version)
#
# * supporting same_vals(1, 1.0) == False, for conservatism in Undo
#   (but note that most __eq__ methods don't bother to worry about that
#    when comparing components in data-like instances; often this is justified,
#    either since they treat those values equivalently or only store them with
#    one type, so it's reasonable to permit it even though it could in theory
#    lead to bugs)
#
# * any future similar cases of same_vals being more conservative than __eq__,
#   especially if they apply within instances, motivating us to define a new
#   "data API" method called '_s_same_as' (not needed for now); OTOH, any new
#   cases of that should be deprecated, as far as I know
#
# * for Bond, as long as it has an __eq__ method more liberal than id comparison
#   (used by same_vals), since as a State holder, id comparison is correct in
#   principle. It only needs the looser __eq__ due to old code in a bad style,
#   but it's hard to know whether that code is entirely gone (and I think it's
#   not and is hard to finally remove it).
#
# How does this affect the issue of changing Node to a new-style class?
# If we do this with no code changes, Nodes lose the services of
# _same_InstanceType_helper, but the above suggests they might never have
# needed it anyway -- of the above issues, only the ones concerned with
# '_s_same_as' and 'Bond' apply to instances of old or new classes.
#
# Conclusion: we can ignore extending _same_InstanceType_helper to new-style
# Nodes -- in fact, we could dispense with it entirely in current code except
# for Bond. (See also the 090205 comments in the docstring of state_utils.py.)
#
# (FYI: If we were to write a completely new framework, I think we'd use our
# own classes rather than Numeric, with proper semantics for ==/!=,
# and then dispense with same_vals entirely, relying on '==' even for Undo.)


def _same_InstanceType_helper(obj1, obj2):
    #bruce 060419, relates to bug 1869; see detailed comment above
    if obj1 is obj2:
        return # not just an optim -- remaining code assumes obj1 is not obj2
    # We might like to ask classify_instance(obj1).obj_is_data,
    # but we have no canonical object-classifier to ask,
    # so for A7 (no time to clean this up) we'll just duplicate its code instead
    # (and optimize it too).
    class1 = obj1.__class__
    ###k don't check copiers_for_InstanceType_class_names.has_key(class1.__name__),
    # since that's always False for now.
    obj_is_data = hasattr(class1, '_s_isPureData')
    if obj_is_data:
        if obj1 != obj2: # rely on our implem of __eq__
            raise _NotTheSame
        else:
            return
    else:
        # otherwise the 'is' test above caught sameness
        raise _NotTheSame
    pass


def _same_Numeric_array_helper(obj1, obj2):
    if obj1.typecode() != obj2.typecode():
        raise _NotTheSame
    if obj1.shape != obj2.shape:
        raise _NotTheSame
    if obj1.typecode() == PyObject:
        if env.debug():
            print "atom_debug: ran _same_Numeric_array_helper, PyObject case"
                # remove when works once ###@@@
        # assume not multi-dimensional (if we are, this should work [untested]
        # but it will be inefficient)
        for i in xrange(len(obj1)):
            # two PyObjects (if obj1 is 1-dim) or two lower-dim Numeric arrays
            _same_vals_helper(obj1[i], obj2[i])
    else:
        if obj1 != obj2:
            # take pointwise !=, then boolean value of that (correct, but is
            # there a more efficient Numeric function?)
            # note: using '==' here (and negating boolean value of result)
            # would NOT be correct
            raise _NotTheSame
    return


def _same_numpy_ndarray_helper(obj1, obj2): #bruce 081202
    """
    Given two objects of type numpy.ndarray,
    raise _NotTheSame if they are not equal.
    """
    # For documentation, see http://www.scipy.org/Tentative_NumPy_Tutorial .
    # Note that we only need this function because:
    # - for some developers, some PyOpenGL functions can return objects of this
    #   type (e.g. glGetDoublev( GL_MODELVIEW_MATRIX));
    # - numpy has the same design flaw in ==/!= that Numeric has.
    # CAVEATS:
    # - this implementation might be wrong if obj1.data (a python buffer)
    #   can contain padding, or if element types can be python object pointers,
    #   or if my guesses from the incomplete documentation I found (on ndarray
    #   and on buffer) are wrong.
    ### TODO:
    # - support this in the C version of same_vals
    # - support it in copy_val
    # - not sure if it needs support elsewhere in state_utils.py
    if obj1.shape != obj2.shape:
        raise _NotTheSame
    if obj1.dtype != obj2.dtype:
        raise _NotTheSame
    # compare the data
    # note: type(obj1.data) is <type 'buffer'>;
    # python documentation only hints that this can be compared using == or !=;
    # doing so seems to work by tests, e.g. buffer("abc") != buffer("def") => True,
    # and I verified that the following is capable of finding same or different
    # and the printed obj1, obj2 when it did this look correct. [bruce 081202]
    if obj1.data != obj2.data:
        raise _NotTheSame
    return

_known_type_same_helpers = {}

_known_type_same_helpers[type([])] = _same_list_helper
_known_type_same_helpers[type({})] = _same_dict_helper
_known_type_same_helpers[type(())] = _same_tuple_helper
_known_type_same_helpers[ InstanceType ] = _same_InstanceType_helper
    # note: see long comment below, which concludes "we can ignore
    # extending _same_InstanceType_helper to new-style Nodes"
    # (re changing class Node to be a new-style class), since we can
    # rely on our overrides of __eq__/__ne__. [bruce 090205 comment]

if _haveNumeric:
    # note: related code exists in state_utils.py.
    numeric_array_type = type(array(range(2)))
        # __name__ is 'array', but Numeric.array itself is a built-in function,
        # not a type
    assert numeric_array_type != InstanceType
    _known_type_same_helpers[ numeric_array_type ] = _same_Numeric_array_helper
    del numeric_array_type

if _haveNumpy:
    numpy_ndarray_type = numpy.ndarray
    assert numpy_ndarray_type != InstanceType
    _known_type_same_helpers[ numpy_ndarray_type ] = _same_numpy_ndarray_helper
    del numpy_ndarray_type

def _same_vals_helper(v1, v2): #060303
    """
    [private recursive helper for same_vals]

    raise _NotTheSame if v1 is not the same as v2
    (i.e. if their type or structure differs,
     or if any corresponding parts are not the same)
    """
    typ = type(v1)
        # note: if v1 is an instance of an old-style class,
        # type(v1) is InstanceType;
        # if v1 is an instance of a new-style class,
        # type(v1) is the class itself.
        # Either way, v1.__class__ is the class itself.
    if typ is not type(v2):
        raise _NotTheSame
    same_helper = _known_type_same_helpers.get(typ) # a fixed public dictionary
        # note: this has an entry for InstanceType (necessary only for Bond)
        # but not for new-style classes. This is ok (as long as Bond remains an
        # old-style class); see comments dated 090205.
    if same_helper is not None:
        # we optim by not storing any scanner for atomic types, or a few others
        same_helper(v1, v2) # might raise _NotTheSame
    else:
        # general case -- depend on __eq__/__ne__. The things this is wrong for
        # are listed in comments dated 090205, and are all covered by
        # entries in _known_type_same_helpers.
        if v1 != v2:
            raise _NotTheSame
    return

    # old comment, still true but might be redundant:
    # (If not for Numeric arrays of type PyObject, we could safely use !=
    #  in the above code on a pair of Numeric arrays --
    #  just not on things that might contain them, in case their type's !=
    #  method used == on the Numeric arrays,
    #  whose boolean value doesn't correctly say whether they're equal
    #  (instead it says whether one or more
    #   corresponding elements are equal).
    #  Another difference is that 1 == 1.0, but we'll say those are not the
    #  same; but that aspect of our specification doesn't matter much.)

# ==

if SAMEVALS_SPEEDUP:
    # Replace definition above with the extension's version.
    # (This is done for same_vals here in utilities/Comparison.py,
    #  and for copy_val in state_utils.py, which sets COPYVAL_SPEEDUP
    #  if it works, since it might fail even if SAMEVALS_SPEEDUP works.)
    from samevals import same_vals
        # this overwrites the public global which other modules import
    # note: there is no point in saving the python version before this
    # assignment (e.g. for testing), since it uses this global for its
    # recursion, so after this import it would be recursing into the
    # C version instead of into itself. Fixing this would require
    # modifying the global before each test -- not presently worth
    # the trouble. [bruce 080922 comment]
    pass

# end