cad/src/operations/bonds_from_atoms.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487

# Copyright 2005-2007 Nanorex, Inc.  See LICENSE file for details. 
"""
bonds_from_atoms.py -- experimental code for inferring bonds from
atom positions and elements alone

@author: Dr. K. Eric Drexler
@version: $Id$
@copyright: 2005-2007 Nanorex, Inc.  See LICENSE file for details. 

History:

bruce 050906 translated into Python some Lisp code
contributed by Dr. K. Eric Drexler.

bruce 071030 moved inferBonds interface to that code (probably by Will) here,
from bonds.py.

TODO: make this directly accessible as one or more user operations.
(As of 071030 it's only used when importing some PDB files.)
"""

# Translation into Python of Lisp code contributed by Dr. K. Eric Drexler.
# Some comments are from contributed code, perhaps paraphrased.

#e Plans: for efficiency, we'll further translate this into Pyrex or C, and/or combine
# with atom-position hashtable rather than scanning all pairs of atoms.
# This implem is just for testing and experimentation.

# This code does not yet consider the possibility of non-sp3 atomtypes,
# and will need changes to properly handle those.
# For now it ignores existing atomtypes and creates new bonds appropriate for sp3
# perhaps plus some extra too-long bonds at the end, if permitted by valence.

import math

from geometry.VQT import vlen
from geometry.VQT import atom_angle_radians

import foundation.env as env

from model.bonds import bond_atoms_faster
from geometry.NeighborhoodGenerator import NeighborhoodGenerator

from model.bond_constants import atoms_are_bonded # was: from bonds import bonded
from model.bond_constants import V_SINGLE
from model.bond_constants import bond_params

# constants; angles are in radians

degrees = math.pi / 180

TARGET_ANGLE = 114 * degrees  #e this will probably need to be generalized for non-sp3 atoms
MIN_BOND_ANGLE = 30 * degrees # accepts moderately distorted three-membered rings
ANGLE_ACCEPT_DIST = 0.9       # ignore angle cutoff below this distance (in Angstroms)
MAX_DIST_RATIO_HUNGRY = 2.0   # prohibit bonds way longer than their proper length
MAX_DIST_RATIO_NON_HUNGRY = 1.4   # prohibit bonds a little longer than their proper length
DIST_COST_FACTOR = 5.0        # multiplier on square of distance-ratio beyond optimal

# (utilities used by contributed code, defined differently for nE-1)

def atm_distance(atom1, atom2):
    return vlen(atom1.posn() - atom2.posn())

atm_angle = atom_angle_radians # args are three atoms

# utility for creating mutable linked lists in Python
def linked_list( lis1, func = None ):
    """
    Given a list of 0 or more elements (e.g. [a,b,c,d]),
    return a "python mutable linked list" of the form [a, [b, [c, [d, None]]]].
       If func is supplied, apply it to each element of the original list
    (e.g. return [f(a),[f(b),[f(c),[f(d),None]]]] for f = func).
       Note that we terminate the resulting linked list with None, not [] or f(None).
    It might be easier for some applications (which want to append elements to the result,
    leaving it in linked list form) if we terminated with [], so this is a possible future change in API.
    """
    assert type(lis1) == type([])
    lis1.reverse()
    res = None # correct result for 0-length lis1
    for elt in lis1:
        if func is not None:
            elt = func(elt)
        res = [elt, res]
    return res

#e unlink_list?

def idealBondLength(atm1, atm2):
    """
    Return the ideal length of a single bond between atm1 and atm2,
    assuming they have their current elements but their default atomtypes
    (ignoring their current atomtypes).

    @see: similar function, ideal_bond_length in bond_constants.py
          (not directly useable by this function)
    """
    # don't use getEquilibriumDistanceForBond directly, in case pyrex sim (ND-1)
    # is not available [bruce 060620]
    r1, r2 = bond_params(atm1.element.atomtypes[0], atm2.element.atomtypes[0], V_SINGLE)
    return r1 + r2

def max_atom_bonds(atom, special_cases={'H':  1,
                                        'B':  4,
                                        'C':  4,
                                        'N':  4,
                                        'O':  2,
                                        'F':  1,
                                        'Si': 4,
                                        'P':  5,
                                        'S':  4,
                                        'Cl': 1}):   # coded differently for nE-1
    """
    Return max number of bonds permitted on this atom, based only on its element
    (for any atomtype, ignoring current atomtype of atom). (Returns 0 for noble gases.)
    """
    elt = atom.element
    sym = elt.symbol
    if special_cases.has_key(sym):
        return special_cases[sym]
    else:
        maxbonds = 0
        for atype in elt.atomtypes:
            if atype.numbonds > maxbonds:
                maxbonds = atype.numbonds
        return maxbonds

def min_atom_bonds(atom): # coded differently for nE-1
    """
    Return min number of bonds permitted on this atom, based only on its element
    (for any atomtype, ignoring current atomtype of atom). (Returns 0 for noble gases.)
    That is, find the atomtype with the smallest number of bonds (e.g. sp for carbon,
    which can have just two double bonds) and return that number of bonds. This is the
    smallest number of bonds that could possibly make this element happy.
    """
    minbonds = 10000
    for atype in atom.element.atomtypes:
        if atype.numbonds < minbonds:
            minbonds = atype.numbonds
    return minbonds

"""
Eric D writes: If the bond-number limit for each atom is based on its
atom-type, rather than on its element-type, there should be no
problem. Or, if atom-types are unknown, then we can use the maximum
valence for that atom that occurs in non-exotic chemistry. Damian
should cross-check this, but I'd use:

H  1
B  4
C  4
N  4
O  2
F  1
Si 4
P  5
S  4
Cl 1

Many elements can have any one of several atomtypes based on
hybridization. Our concepts of these appear in the _mendeleev table in
elements.py. So for each element there is a minimum number of possible
bonds and a maximum number of possible bonds; for carbon these are
respectively 2 (two double bonds for sp hybridization) and 4 (sp3).

There are two things that could be the independent variable. Either
you derive the atomtype from the number of bonds, or you hold the
atomtype fixed and permit only the number of bonds it allows.

Currently max_atom_bonds() is looking at
atom.element.atomtypes[0].numbonds to determine how many bonds are OK
for this atom. That presumes the first atomtype permits the most bonds,
which is presently [and still, 071101] true, but it would be better
to take an explicit maximum.

So we DO want the maximum number of bonds for ANY atomtype for this
element, with the presumption that somebody else will later
rehybridize the atom to get the right atomtype. We don't need to do
that here.

The other messy thing is this: If we know we don't have enough bonds
for the element (i.e. fewer than the smallest number of bonds for any
of its atomtypes) then we should use MAX_DIST_RATIO = 2.0 because we
are hungry for more bonds. When we get enough for the minimum, we
reduce MAX_DIST_RATIO to 1.4 because we're not so hungry any more.

MAX_DIST_RATIO is used in two places. One is in list_potential_bonds,
where we clearly want this policy. (Complication: there are two
atoms involved - we will use the smaller value only when BOTH are
non-hungry.) The other place is atm_distance_cost, another case
where there are two atoms involved. I think it applies there too.
"""

def max_dist_ratio(atm1, atm2):
    def is_hungry(atm):
        return len(atm.realNeighbors()) < min_atom_bonds(atm)
    if is_hungry(atm1) or is_hungry(atm2):
        return MAX_DIST_RATIO_HUNGRY
    else:
        return MAX_DIST_RATIO_NON_HUNGRY

def bondable_atm(atom): # coded differently for nE-1 due to open bonds
    """
    Could this atom accept any more bonds
    (assuming it could have any of its atomtypes,
     and ignoring positions and elements of atoms it's already bonded to,
     and ignoring open bonds,
     and treating all existing bonds as single bonds)?
    """
    #e len(atom.bonds) would be faster but would not ignore open bonds;
    # entire alg could be recoded to avoid ever letting open bonds exist,
    # and then this could be speeded up.
    return len(atom.realNeighbors()) < max_atom_bonds(atom)

def bond_angle_cost(angle, accept, bond_length_ratio):
    """
    Return the cost of the given angle, or None if that cost is infinite.
    Note that the return value can be 0.0, so callers should only
    test it for "is None", not for its boolean value.
       If accept is true, don't use the minimum-angle cutoff (i.e. no angle
    is too small to be accepted).
    """
    # if bond is too short, bond angle constraint changes
    if not (accept or angle > MIN_BOND_ANGLE * 1.0 + (2.0 * max(0.0, bond_length_ratio - 1.0)**2)):
        return None
    diff = min(0.0, angle - TARGET_ANGLE) # for heuristic cost, treat all angles as approximately tetrahedral
    square = diff * diff
    if 0.0 < diff:
        # wide angle
        return square
    else:
        # tight angle -- larger quadratic penalty
        return 2.0 * square

def atm_angle_cost(atm1, atm2, ratio):
    """
    Return total cost of all bond-angles which include the atm1-atm2 bond
    (where one bond angle is said to include the two bonds whose angle it describes);
    None means infinite cost.
    """
    accept = atm_distance(atm1, atm2) < ANGLE_ACCEPT_DIST
    sum = 0.0
    for atm in atm1.realNeighbors():
        cost = bond_angle_cost( atm_angle(atm, atm1, atm2), accept, ratio)
        if cost is None: # cost can be 0.0, so don't use a boolean test here [bruce 050906]
            return None
        sum += cost
    for atm in atm2.realNeighbors():
        # note different order of atm2, atm1
        cost = bond_angle_cost( atm_angle(atm, atm2, atm1), accept, ratio)
        if cost is None:
            return None
        sum += cost
    return sum

covrad_table = dict( [
    # from webelements.com (via contributed code)
    ('H',  0.37),
    ('C',  0.77), ('N', 0.75), ('O', 0.73), ('F',  0.72),
    ('Si', 1.11), ('P', 1.06), ('S', 1.02), ('Cl', 0.99),
 ])

def covalent_radius(atm):
    """
    Return atm's covalent radius (assuming default atomtype, not its current one), always as a float.
    """
    try:
        return float( covrad_table[atm.element.symbol] ) # use radius from contributed code, if defined
    except KeyError:
        print "fyi: covalent radius not in table:",atm.element.symbol # make sure I didn't misspell any symbol names
        return float( atm.element.atomtypes[0].rcovalent ) # otherwise use nE-1 radius
    pass

def atm_distance_cost(atm1, atm2, ratio):
    """
    Return cost (due to length alone) of a hypothetical bond between two atoms; None means infinite
    """
    if not (ratio < max_dist_ratio(atm1, atm2)):
        return None
    if ratio < 1.0:
        # short bond
        return ratio * 0.01 # weak preference for smallest of small distances
    else:
        # long bond -- note, long and short bond cost is the same, where they join at ratio == 1.0
        return 0.01 + DIST_COST_FACTOR * (ratio - 1.0) ** 2 # quadratic penalty for long bonds
    pass

_enegs = ['F', 'Cl', 'O', 'S', 'N', 'P']

def bond_element_cost(atm1, atm2, _enegs=_enegs):
    """
    Avoid bonding a pair of electronegative atoms
    """
    if atm1.element.symbol in _enegs and atm2.element.symbol in _enegs:
        return 1.0
    else:
        return 0.0

def bond_cost(atm1, atm2):
    """
    Return total cost of hypothetical new bond between two atoms, or None if bond is not permitted or already there
    """
    if not (bondable_atm(atm1) and bondable_atm(atm2)): # check valence of existing bonds
        return None
    if atoms_are_bonded(atm1, atm2): # already bonded? (redundant after list-potential-bonds) ###
        return None
    distance = atm_distance(atm1, atm2)
    # note the assumption that we are talking about SINGLE bonds, which runs throughout this code
    # some day we should consider the possibility of higher-order bonds; a stab in this direction
    # is the bondtyp argument in make_bonds(), but that's really a kludge
    best_dist = idealBondLength(atm1, atm2)
    # previously: best_dist = covalent_radius(atm1) + covalent_radius(atm2)
    if not best_dist:
        return None # avoid ZeroDivision exception from pondering a He-He bond
    ratio = distance / best_dist # best_dist is always a float, so this is never "integer division"
    dc = atm_distance_cost(atm1, atm2, ratio)
    if dc is None:
        return None
    ac = atm_angle_cost(atm1, atm2, ratio)
    if ac is None:
        return None
    ec = bond_element_cost(atm1, atm2)
    return ac + dc + ec

def list_potential_bonds(atmlist0):
    """
    Given a list of atoms, return a list of triples (cost, atm1, atm2) for all bondable pairs of atoms in the list.
    Each pair of atoms is considered separately, as if only it would be bonded, in addition to all existing bonds.
    In other words, the returned bonds can't necessarily all be made (due to atom valence), but any one alone can be made,
    in addition to whatever bonds the atoms currently have.
       Warning: the current implementation takes quadratic time in len(atmlist0). The return value will have reasonable
    size for physically realistic atmlists, but could be quadratic in size for unrealistic ones (e.g. if all atom
    positions were compressed into a small region of space).
    """
    atmlist = filter( bondable_atm, atmlist0 )
    lst = []
    maxBondLength = 2.0
    ngen = NeighborhoodGenerator(atmlist, maxBondLength)
    for atm1 in atmlist:
        key1 = atm1.key
        pos1 = atm1.posn()
        for atm2 in ngen.region(pos1):
            bondLen = vlen(pos1 - atm2.posn())
            idealBondLen = idealBondLength(atm1, atm2)
            if atm2.key < key1 and bondLen < max_dist_ratio(atm1, atm2) * idealBondLen:
                # i.e. for each pair (atm1, atm2) of bondable atoms
                cost = bond_cost(atm1, atm2)
                if cost is not None:
                    lst.append((cost, atm1, atm2))
    lst.sort() # least cost first
    return lst

def make_bonds(atmlist, bondtyp = V_SINGLE):
    """
    Make some bonds between the given atoms. At any moment make the cheapest permitted unmade bond;
    stop only when no more bonds are permitted (i.e. all potential bonds have infinite cost).
       Assume that newly made bonds can never decrease the cost of potential bonds.
    (This is needed to justify the algorithm, which moves potential bonds later in an ordered list
    when their cost has increased since last checked;
    it's true since the bond cost (as defined elsewhere in this module) is a sum of terms,
    and adding a bond can add new terms but doesn't change the value of any existing terms.)
       Return the number of bonds created.
    """
    # Implementation note: the only way I know of to do this efficiently is to use a linked list
    # (as the Lisp code did), even though this is less natural in Python.
    bondlst0 = list_potential_bonds(atmlist) # a list of triples (cost, atm1, atm2)
    bondlst = linked_list(bondlst0, list) # arg2 (list) is a function to turn the triple (cost, atm1, atm2) into a list.
        # value is a list [[cost, atm1, atm2], next]; needs to be mutable in next and cost elements
        # (Note: Lisp code used a Lisp linked list of triples, (cost atm1 atm2 . next), but all Lisp lists are mutable.)
    res = 0
    while bondlst:
        entry, bondlst = bondlst
            # original code assumed this was too early to change bondlst -- we might insert a new element right after entry;
            # but I think that's not possible, so we can move forward now [bruce 050906]
        oldcostjunk, atm1, atm2 = entry
        cost = bond_cost(atm1, atm2) # might be different than last recorded cost
            #e optim: could invalidate changed costs, avoid recomputing some of them, incrementally adjust others
        if cost is not None:
            if (bondlst is None) or bondlst[0][0] >= cost:
                # if there's no next-best bond, or its cost is no better than this one's, make this bond
                bond_atoms_faster(atm1, atm2, bondtyp) # optimized bond_atoms, and doesn't make any open bonds
                res += 1
            else:
                # cost has increased beyond next bond in list -- update entry and move it down list
                entry[0] = cost
                curr = bondlst # loop variable - next possible list element after which we might insert entry
                while 1:
                    # (at this point, we know curr is not None, and we already compared cost to curr[0][0])
                    junk, next = curr
                    if (next is None) or next[0][0] >= cost:
                        break # found insertion point: right after curr, before next (next might be None)
                    curr = next
                assert curr[1] is next #e remove when works
                # insert entry after curr, before next
                curr[1] = [entry, next]
            pass
        pass
    return res

# end of translation of contributed code

# ==

def inferBonds(mol): # [probably by Will; TODO: needs docstring]
    
    #bruce 071030 moved this from bonds.py to bonds_from_atoms.py
    
    # not sure how big a margin we should have for "coincident"
    maxBondLength = 2.0
    # first remove any coincident singlets
    singlets = filter(lambda a: a.is_singlet(), mol.atoms.values())
    removable = { }
    sngen = NeighborhoodGenerator(singlets, maxBondLength)
    for sing1 in singlets:
        key1 = sing1.key
        pos1 = sing1.posn()
        for sing2 in sngen.region(pos1):
            key2 = sing2.key
            dist = vlen(pos1 - sing2.posn())
            if key1 != key2:
                removable[key1] = sing1
                removable[key2] = sing2
    for badGuy in removable.values():
        badGuy.kill()
    from operations.bonds_from_atoms import make_bonds
    make_bonds(mol.atoms.values())
    return

# ==

from utilities.debug import register_debug_menu_command

def remake_bonds_in_selection( glpane ):
    """
    Remake all bonds between selected atoms (or between atoms in selected chunks),
    in the given Selection object (produced by e.g. selection_from_part),
    by destroying all old bonds between selected atoms and all open bonds on them,
    changing all selected atoms to their default atomtype,
    and creating new single bonds using Eric Drexler's greedy algorithm which considers
    bond lengths and angles for sp3 atoms.
       Note: bonds between selected and unselected atoms are not altered, but are noticed
    when deciding what new bonds to make.
       Note: the current algorithm might make additional stretched bonds, in cases when
    it ought to infer non-sp3 atomtypes and make fewer bonds.
    """
    #bruce 071030 fixed several bugs in this function I wrote long ago;
    # evidently it never worked -- was it finished?? Now it works, at least
    # for the trivial test case of 2 nearby C(sp3) atoms.
    atmlist = glpane.assy.getSelectedAtoms()
        # notes: this includes atoms inside selected chunks;
        # it also includes a selected jig's atoms, unlike most atom operations.
    atmdict = dict([(atm.key, atm) for atm in atmlist]) # for efficiency of membership test below
    n_bonds_destroyed = 0
    n_atomtypes_changed = 0
    n_atoms = len(atmlist)
    for atm in atmlist:
        if atm.atomtype is not atm.element.atomtypes[0]:
            n_atomtypes_changed += 1 # this assume all atoms will be changed to default atomtype, not an inferred one
            # count them all before changing them or destroying any bonds,
            # in case some atomtypes weren't initialized yet
            # (since their getattr method looks at number of bonds)
    for atm in atmlist:
        for b in atm.bonds[:]:
            atm2 = b.other(atm)
            if atm2.key in atmdict:
                ###e to also zap singlets we'd need "or atm2.element is Singlet" and to prevent b.bust from remaking them!
                # (singlets can't be selected)
                b.bust()
                n_bonds_destroyed += 1 # (count real bonds only)
        atm.set_atomtype(atm.element.atomtypes[0]) ###k this might remake singlets if it changes atomtype
        #e future optim: revise above to also destroy singlets and bonds to them
        # (btw I think make_bonds doesn't make any singlets as it runs)
    n_bonds_made = make_bonds(atmlist)
        #e it would be nice to figure out how many of these are the same as the ones we destroyed, etc
    for atm in atmlist:
        atm.remake_bondpoints()
    env.history.message(
        "on %d selected atoms, replaced %d old bond(s) with %d new (or same) bond(s); changed %d atomtype(s) to default" %
        (n_atoms, n_bonds_destroyed, n_bonds_made, n_atomtypes_changed)
     )
    #e note, present implem marks lots of atoms as changed (from destroying and remaking bonds) which did not change;
    # this only matters much for redrawing speed (of unchanged chunks) and since file is always marked as changed
    # even if nothing changed at all.
    return

register_debug_menu_command( "Remake Bonds", remake_bonds_in_selection )

#end