summaryrefslogtreecommitdiff
path: root/cad/src/dna/updater/dna_updater_ladders.py
blob: 752a0b87d5082e0afec71875a33cf83299e93651 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
# Copyright 2007-2008 Nanorex, Inc.  See LICENSE file for details.
"""
dna_updater_ladders.py - ladder-related helpers for dna_updater_chunks

@author: Bruce
@version: $Id$
@copyright: 2007-2008 Nanorex, Inc.  See LICENSE file for details.

See also: DnaLadder
"""

from utilities import debug_flags

from dna.updater.dna_updater_follow_strand import dna_updater_follow_strand

from dna.model.DnaLadder import DnaLadder, DnaSingleStrandDomain

from dna.updater.dna_updater_globals import _f_get_invalid_dna_ladders
from dna.updater.dna_updater_globals import DNALADDER_INVAL_IS_OK
from dna.updater.dna_updater_globals import temporarily_set_dnaladder_inval_policy
from dna.updater.dna_updater_globals import DNALADDER_INVAL_IS_ERROR

from dna.model.dna_model_constants import MAX_LADDER_LENGTH

from model.elements import Pl5

# ==

def dissolve_or_fragment_invalid_ladders( changed_atoms):
    """
    #doc... [some is in a caller comment]

    Also make sure that live atoms that are no longer in valid ladders
    (due to dissolved or fragmented ladders) are included in the caller's
    subsequent step of finding changed chains,
    or that the chains they are in are covered. This is necessary so that
    the found chains (by the caller, after this call)
    cover all atoms in every "base pair" (Ss-Ax-Ss) they cover any atom in.
    This might be done by adding some of their atoms into changed_atoms
    in this method (but only live atoms).
    """
    # assume ladder rails are DnaLadderRailChunk instances,
    # or were such until atoms got removed (calling their delatom methods).

    changed_chunks = {}

    for atom in changed_atoms.itervalues():
        chunk = atom.molecule
        ## print "DEBUG: changed atom %r -> chunk %r" % (atom, chunk)
        changed_chunks[id(chunk)] = chunk
        if atom.element is Pl5:
            #bruce 080529 fix bug 2887 (except for the lone Pl atom part,
            #  really a separate bug)
            # Only needed for Pl5 whose structure actually changed
            # (so no need to do it recursively for the ones we add to
            #  changed_atoms lower down). Needed because, without it,
            # breaking a Pl-Ss bond can leave a Pl whose only real bond
            # goes to a valid ladder, resulting in finding a strand chain
            # with only that Pl atom (thus no baseatoms), which assertfails,
            # and would probably cause other problems if it didn't.
            for Ss in atom.strand_neighbors():
                chunk = Ss.molecule
                changed_chunks[id(chunk)] = chunk
                # no need to put Ss into changed_atoms,
                # that will happen lower down when chunk ladder becomes invalid
                # (and if somehow it doesn't, probably not needed anyway,
                #  though that ought to be reviewed)
                continue
            pass
        continue

    for chunk in changed_chunks.itervalues():
        if debug_flags.DEBUG_DNA_UPDATER_VERBOSE: # was useful for bug 080120 9pm
            print "dna updater: fyi: tell changed chunk %r -> inval its ladder %r" % \
                  (chunk, getattr(chunk, 'ladder', "<has none>"))
            pass
        # todo: assert not killed, not nullMol, is a Chunk
        ## chunk.invalidate_ladder()
        chunk.invalidate_ladder_and_assert_permitted()
            # fyi: noop except in DnaLadderRailChunk
            # this just invals chunk.ladder (and below code will dissolve it);
            # a future optim could fragment it instead,
            # if we also recorded which basepair positions
            # were invalid, and made sure their atoms were covered
            # below so their chains will get scanned by caller,
            # e.g. due to changed_atoms.

    # Changed atoms that got removed from their ladders invalidated them
    # at the time (in DnaLadderRailChunk.delatom). Those that didn't get
    # removed from them are still in them, and invalidated them just above.
    # So the following will now give us a complete list of invalid ladders,
    # all of whose atoms we want to scan here.

    invalid_ladders = _f_get_invalid_dna_ladders()

    # now that we grabbed invalid ladders, and callers will make new valid
    # ones soon, any uncontrolled/unexpected inval of ladders is a bug --
    # so make it an error except for specific times when we temporarily
    # permit it and make it a noop (using DNALADDER_INVAL_IS_NOOP_BUT_OK).
    # [bruce 080413]
    _old = temporarily_set_dnaladder_inval_policy( DNALADDER_INVAL_IS_ERROR )
        # note: the restore happens elsewhere, which is why we assert what the
        # old policy was, rather than bothering to later pass it to the
        # restore function (not called before we return), restore_dnaladder_inval_policy.
    assert _old == DNALADDER_INVAL_IS_OK

    for ladder in invalid_ladders:
        # WARNING: the following is only reviewed for the case of the above code
        # dissolving (invalidating, not fragmenting) chunk's ladder.
        #e Could optim if we know some rails were untouched, by
        # "including them whole" rather than rescanning them, in caller.

        # Note: what is meant by "dissolving the ladder" (done above)
        # vs "fragmenting it" (not implemented) is the difference between
        # what happens to the atoms no longer in a valid ladder --
        # are they (until updater runs on them) in no valid ladder,
        # or in some newly made smaller one? The former, for now.
        # The comments and docstrings here are unclear about *when*
        # the dissolving or fragmenting is done. The dissolving can
        # be considered done right when the ladder is invalidated,
        # since we don't intend to make fragments from the untouched
        # parts of it. But this function name claims to do the dissolving
        # itself (even on already invalidated ladders). # todo: clarify.

        if debug_flags.DEBUG_DNA_UPDATER_VERBOSE: # was useful for bug 080120 9pm
            print "dna updater: fyi: adding all atoms from dissolved ladder %r" % ladder
        for rail in ladder.all_rails():
            for atom in rail.baseatoms: # probably overkill, maybe just one atom is enough -- not sure
                # note: atom is in a ladder that was valid a moment ago,
                # but even so, it might be killed now, e.g. if you select
                # and delete one strand atom in a duplex.
                if not atom.killed():
                    changed_atoms[atom.key] = atom
        continue

    return

# == helper for make_new_ladders

class chains_to_break:
    """
    Helper class for breaking chains all at once
    after incrementally recording all desired breakpoints.
    """
    def __init__( self, chains):
        self.chains = chains # a list
        self._set_of_breakpoints_for_chain = d = {}
        for chain in chains:
            d[chain] = {} # avoid later setdefault
        return
    def break_chain_later( self, chain, index, whichside):
        """
        Record info about where to later break chain --
        on whichside of index.

        @param chain: a chain that must be in self.chains
        @type chain: DnaChain

        @param index: an atom index in chain; WARNING: would become invalid
                      if we reversed the chain

        @param whichside: which side of the indexed atom the break should be on,
            represented as 1 or -1 -- the desired break is
            between index and index + whichsidetobreak
        """
        ## key = (chain, index, whichside)
        # no, we don't need to record the distinction between break point and direction,
        # just enough to know where the break is; and we might as well collect
        # it up per chain:
        if whichside == -1:
            # break_indices = index - 1, index
            break_before = index
        else:
            assert whichside == 1
            # break_indices = index, index + 1
            break_before = index + 1
        # not needed: assert chain in self.chains
        ## self._set_of_breakpoints_for_chain.setdefault(chain, {})[break_before] = None
        self._set_of_breakpoints_for_chain[chain][break_before] = None # arb value
        return
    def break_between_Q( self, chain, index1, index2):
        """
        Is there a break between the given two adjacent indices in chain?
        @note: adjacent indices can be passed in either order.
        """
        breaks = self._set_of_breakpoints_for_chain[chain]
        assert abs(index1 - index2) == 1
        break_before = max(index1, index2)
        return breaks.has_key(break_before)
    def breakit(self, chain):
        """
        Return a sequence of (start_index, length) pairs,
        suitable for passing to chain.virtual_fragment,
        which describes the fragments of chain into which our
        recorded breaks should break it.

        @param chain: a chain that must be in self.chains
        @type chain: DnaChain

        @return: sequence of (start_index, length) pairs
        """
        breaks = self._set_of_breakpoints_for_chain[chain].keys() # might be empty
        breaks.sort()
        start_index = chain.start_baseindex() # modified during loop
        limit_index = chain.baselength() + start_index
        breaks.append(limit_index)
        res = []
        num_bases = 0 # for asserts only
        for break_before in breaks:
            length = break_before - start_index
            num_bases += length # for asserts only
            # length can be 0 here, due to explicit breaks at the ends
            if length:
                res.append( (start_index, length) )
            start_index = break_before
            continue
        if debug_flags.DEBUG_DNA_UPDATER_VERBOSE: # made verbose on 080201
            print "will break %r -> %d pieces == %r" % (chain, len(res), res)
        assert num_bases == chain.baselength()
        return res
    pass

# ==

def make_new_ladders(axis_chains, strand_chains):
    """
    Make new DnaLadders and/or DnaSingleStrandDomains out of the given (partial) atom chains
    (which should contain only PAM atoms no longer in valid old ladders,
     and which are able to form complete new ladders, since they contain
     all or no PAM atoms from each "base pair" (Ss-Ax-Ss unit) or "single
     strand base" (either Ss-Ax- with no other Ss on that Ax, or Ss with
     no Ax (possibly with 'unpaired-base' atoms which we mostly ignore),
    fragmenting, reversing, and shifting the chains as needed.

    The newly made ladders might be more fragmented than required
    (i.e. new/new ladder merges might be possible), and they might be
    mergeable with old ladders. Their length can be as short as 1.
    They might be rings (or moebius rings), or some of their rails
    might be rings, but this is not detected or encoded explicitly.

    The new ladders will have rails in the same direction
    and with proper bond directions. (If necessary, we fix
    inconsistent bond directions.) [### partly nim; see ladder.error flag --
    also it would be better to not change them but to break bonds. @@@]

    @return: tuple of two lists: newly made DnaLadders, newly made DnaSingleStrandDomains
    """
    # Basic algorithm: scan each axis chain (or ring), and follow along its two
    # bound strands to see when one or both of them leave it or stop (i.e. when
    # the next atom along the strand is not bound to the next atom along the
    # axis). This should classify all strand atoms as well (since bare ones were
    # deleted earlier -- if they weren't, remaining strand fragments can also
    # be found).

    # possible simplification: should we just make lots of length==1 ladders
    # (one per axis atom), then merge them all? In theory, this should work,
    # and it might even be faster -- that depends on how often this step
    # manages to return ladders much longer than 1. It would eliminate
    # most code in this function. (Try it if this function is hard to debug? @@@)

    strand_axis_info = {}

    for axis in axis_chains:
        for atom, index in axis.baseatom_index_pairs():
            # assume no termination atoms here

            new_strand_atoms = atom.strand_neighbors()
                # todo: make an AxisAtom method from this new Atom method

            # For efficiency, at this stage we just store info about these strand atoms' axis locations;
            # later we scan strands all at once and look at that info. This avoids trying both pairings
            # of prior and current strand atoms, and checking for bonding (perhaps thru Pl) or adjacency
            # (worrying about ring index wraparound).

            for s_atom in new_strand_atoms:
                strand_axis_info[s_atom.key] = (axis, index)
                    # store axis object (not id) so it can help with ring indices

    # Make helper objects to record the strand and axis breaks we'll need to do
    # (in terms of putting their pieces into separate ladders/rails).
    #
    # Note: we don't actually do the breaks until we've scanned everything.
    # (We might even do some re-merging of unnecessary breaks before doing
    #  any real breaks of axes.)
    #
    # Note that matched axis and strand indices might be in reversed relative
    # order. We won't reverse any chains until the scan is done and the
    # ladders are made (or being made, since merging ladders requires it).


    axis_breaker = chains_to_break(axis_chains)
    strand_breaker = chains_to_break(strand_chains)

    # helper functions; args are chain, index, whichside:
    break_axis = axis_breaker.break_chain_later
    break_strand = strand_breaker.break_chain_later

    for strand in strand_chains:
        # Loop over strand's base atoms, watching the strand join and leave
        # axis chains and move along them; virtually break both axis and strand
        # whenever it joins, leaves, or moves discontinuously
        # along axis (treating lack of Ax as moving continuously on an
        # imaginary Ax chain different from all real ones)
        # (for details, see the helper function).
        # (The break_ functions store the virtual breaks for later use
        #  in fragmenting the chains.)
        dna_updater_follow_strand(1, strand, strand_axis_info, break_axis, break_strand)

    for strand in strand_chains:
        # Now copy any axis breaks that strand moves over
        # onto strand breaks, in case they originated from the
        # other strand at that point on the axis.
        dna_updater_follow_strand(2, strand, strand_axis_info, None, break_strand, axis_breaker.break_between_Q )

    # Now use the recorded axis breaks to decide what new ladder fragments
    # to make, and the recorded strand breaks to make strand fragments to
    # assign to ladder fragments. (Note that matching index directions may
    # be reversed, and current indices are offset from those of the
    # fragments.)
    #
    # All fragments in the same ladder will now have the same length
    # (which can be as low as 1). Every ladder will have one or two
    # strands, assuming prior updater stage rules were not violated.
    # (The maximum of two strands comes from a maximum of two strand atoms
    #  bonded to one axis atom. The minimum of 1 comes from deleting bare
    #  axis atoms. The coverage of all strand atoms in this matching
    #  process comes from the lack of bare strand atoms. All of this
    #  is only true since a change to any old ladder atom makes us
    #  process all its atoms in this updater cycle, or at least,
    #  either all or no atoms from the 3 atoms in each of its rungs.)
    #
    # Maybe: do everything virtually at first, in case we can merge some
    # ladders before actually breaking chains. (I'm not sure if this is
    # likely if there were no non-physical situations, though, nor if it's
    # worth the trouble then, since we need code to merge real ladder
    # fragments too, in case new ones should merge with old unchanged ones.)

    ladder_locator = {} # atom.key -> ladder, for end axis atoms of each ladder we're making

    def end_baseatoms(rail):
        return rail.end_baseatoms()

    ladders = []
    singlestrands = []

    for axis in axis_chains:
        frags = axis_breaker.breakit(axis)
        for frag in frags:
            start_index, length = frag
            axis_rail = axis.virtual_fragment(start_index, length)
            ladder = DnaLadder(axis_rail)
            for atom in axis_rail.end_baseatoms():
                ladder_locator[atom.key] = ladder
            ladders.append(ladder)

    for strand in strand_chains:
        frags = strand_breaker.breakit(strand)
        for frag in frags:
            start_index, length = frag
            strand_rail = strand.virtual_fragment(start_index, length)
            # find ladder to put it in (Ax attached to arbitrary strand atom)
            atom = strand_rail.end_baseatoms()[0].axis_neighbor()
            if atom is None:
                # single strand with no Ax (will be true of every Ss in chain)
##                print "dna updater: fyi: found single strand domain %r" % (strand_rail,)
                for atom2 in strand_rail.baseatoms:
                    assert atom2.axis_neighbor() is None, \
                           "%r.axis_neighbor() should be None, is %r; atom is %r, sr is %r" % \
                           (atom2, atom2.axis_neighbor(), atom, strand_rail)
                        # remove when works?? has failed once, 080325 for tom...
                        # and once for me, after exception in pam conversion, 080413
                singlestrand = DnaSingleStrandDomain(strand_rail)
                singlestrands.append(singlestrand)
            else:
                # Ax is present
                ### REVIEW or BUG: why can we assume this works, for an arb (not just end) strand atom? @@@@
                # but wait, it's not an arb atom, it's a strand_rail end atom. Should be ok -- FIX THE COMMENT ABOVE.
                # but there is a bug where dissolving a ladder failed to put axis atoms into changed_atoms... 080120 327p
                try:
                    ladder = ladder_locator[atom.key]
                except KeyError:
                    # this happens in a bug tom reported thismorning, so try to survive it and print debug info
                    # [bruce 080219]
                    print "\n***BUG: dna updater: ladder_locator lacks %r -- specialcase might cause bugs" % (atom,) # @@@@@
                    # specialcase is variant of single strand code above
                    for atom2 in strand_rail.baseatoms:
                        if not (atom2.axis_neighbor() is None):
                            print " sub-bug: %r has %r with an axis_neighbor, %r" % \
                                  (strand_rail, atom2, atom2.axis_neighbor())
                    singlestrand = DnaSingleStrandDomain(strand_rail)
                    singlestrands.append(singlestrand)
                else:
                    ladder.add_strand_rail(strand_rail)

    for ladder in ladders:
        ladder.finished()
            # this ensures ladder has one or two strands, antiparallel in standard bond directions, aligned with axis rungs
            # (it reverses chains as needed, for rung alignment and strand bond direction)
            # @@@ (does it rotate ring chains?)

    return ladders, singlestrands # from make_new_ladders

# ==

def merge_ladders(new_ladders):
    """
    Merge end-to-end-connected ladders (new/new or new/old) into larger
    ones, when that doesn't make the resulting ladders too long.

    @return: list of merged (or new and unable to be merged) ladders

    @note: each returned ladder is either entirely new (perhaps merged
           or perhaps one of the ones the caller passed in),
           or the result of merging (one or more) new and (presumably
           just one) old ladders.
    """
    # Initial implem - might be too slow (quadratic in atomcount) if
    # repeated merges from small to large chain sizes occur.
    # Note, these ladders' rails might be real chunks (merge is slower)
    # or some sort of real or virtual atom chains (merge is faster).
    # (As of 080114 I think they are real atom chains, not yet associated
    #  with chunks.)

    res = [] # collects ladders that can't be merged

    while new_ladders: # has ladders untested for can_merge

        # note about invalid ladders during these loops:
        # in the first iteration of this outer loop, the ladders in
        # new_ladders are newly made, all valid (though they might be invalid
        # by the time the inner loop reaches them, if they got merged into a
        # ladder encountered earlier in that loop over new_ladders). In all
        # other iterations they are newly merged ladders, but they might
        # *already* be invalid if they got merged again by a later ladder
        # in the inner loop that merged them. (Or they might become invalid
        # during the loop, just as in the first iteration.)
        # In all cases it means nothing if they are invalid now,
        # and they should just be skipped if they are invalid when encountered.
        # [comment and behavior revised to fix bug from incorrect assertion,
        #  bruce 080222; earlier partial version of skip, bruce 080122]

        next = [] # new_ladders for next iteration

        for ladder in new_ladders:
            if not ladder.valid:
                # just skip it (not an error, as explained above)
                pass
            else:
                can_merge_info = ladder.can_merge() # (at either end)
                if can_merge_info:
                    merged_ladder = ladder.do_merge(can_merge_info)
                        # note: this invals the old ladders, one of which is
                        # ladder; the other might be later in new_ladders, or
                        # already appended to next earlier during this loop,
                        # or not known to this function.
                        # Q: what if the rails (also merged here) are already
                        # contained in wholechains?
                        # A: they're not yet contained in those -- we find those
                        # later from the merged ladders.
                    assert not ladder.valid
                    assert merged_ladder.valid
                    next.append(merged_ladder)
                else:
                    res.append(ladder)
        new_ladders = next
    for ladder in res:
        assert ladder.valid # required by caller, trivially true here
    return res

def split_ladders(ladders): # STUB but should cause no harm @@@@
    res = []
    for ladder in ladders:
        # REVIEW: higher threshhold for split than merge, for "hysteresis"?? maybe not needed...
        if len(ladder) > MAX_LADDER_LENGTH:
            print "NIM: split long ladder %r" % ladder
            res0 = [ladder] # the pieces
                # (real split function should inval original, then validate pieces)
            res.extend(res0)
        else:
            res.append(ladder)
    return res

def merge_and_split_ladders(ladders, debug_msg = ""):
    """
    See docstrings of merge_ladders and split_ladders
    (which this does in succession).

    @param ladders: list of 0 or more new DnaLadders

    @param debug_msg: string for debug prints
    """
    len1 = len(ladders) # only for debug prints

    ladders = merge_ladders(ladders)

    len2 = len(ladders)

    ladders = split_ladders(ladders)

    len3 = len(ladders)

    assert len2 == len3 ### REMOVE WHEN WORKS (says split is a stub) @@@@

    if debug_flags.DEBUG_DNA_UPDATER:
        if debug_flags.DEBUG_DNA_UPDATER_VERBOSE or len2 != len1 or len3 != len1:
            # note: _DEBUG_FINISH_AND_MERGE(sp?) is in another file
            if debug_msg:
                debug_msg = " (%s)" % debug_msg
            print "dna updater: merge_and_split_ladders%s: %d ladders, merged to %d, split to %d" % \
                  (debug_msg, len1, len2, len3)
    return ladders

# end