cad/src/dna/model/WholeChain.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948

# Copyright 2008 Nanorex, Inc.  See LICENSE file for details.
"""
WholeChain.py - a complete chain or ring of PAM atoms, made of one or more
smaller chains (or 1 smaller ring) often known as rails (short for ladder rails,
referring to DnaLadder), with refs to markers and a strand or segment

@author: Bruce
@version: $Id$
@copyright: 2008 Nanorex, Inc.  See LICENSE file for details.
"""

from dna.model.dna_model_constants import LADDER_END0
from dna.model.dna_model_constants import LADDER_END1

from dna.model.DnaMarker import DnaMarker # for isinstance
from dna.model.DnaMarker import DnaSegmentMarker # constructor
from dna.model.DnaMarker import DnaStrandMarker # constructor

from utilities.debug import print_compact_traceback
from utilities.debug import print_compact_stack

class WholeChain(object):
    """
    A WholeChain knows a sequence of 1 or more rails, i.e. smaller chains
    (of DNA PAM atoms) which are linked into a single longer chain or ring
    (or which were so linked when it was constructed).

    (If it's not a ring, then it is required to be a maximal chain,
    i.e. to not be able to be extended with more atoms at the ends,
    at the time it's constructed.)

    It is immutable once constructed, as are its rails.
    Modifications to the underlying chaining from which it and its
    rails are derived result in its becoming invalid
    (in various ways needed by different users of it)
    and (for some of those users) replaced by one or more newly
    constructed wholechains.

    Its jobs and role in the DNA Data Model include:

    - Understand base indexing for any whole segment or strand.

    - Be able to find (and/or keep track of) all the DnaMarkers
    along its atoms.

    - Help a DnaMarker scan its atoms to decide where to move to,
    and help it actually move.

    - Decide which marker should determine strand or segment identity
    and base indexing scheme for the atoms in this wholechain
    (if it's valid). Markers might contain user-settable attributes
    (or inherit those from their owning strand or segment) to know
    how to do this, e.g. a higher or lower priority in controlling
    base indexing, policies for which direction to move in and how
    to adjust base index when moving, how to move when new bases
    are inserted (detectable if these go between the two marker
    atoms), etc. [### todo: say some of that on DnaMarker docstring]

    Specialized kinds of markers might serve as, for example,
    startpoints or endpoints of sequence constraints, or good places
    for a base index discontinuity in a ring (if the index origin
    should not be used).

    Invariants, and how it maintains them:

    - At any given time, just after any dna updater run, the valid WholeChains
    are in 1-1 correspondence with the set of DnaSegment and DnaStrand objects,
    via their controlling markers.

    - Just after any dna updater run, each WholeChain knows about the DnaMarkers
    on its atoms, of which there are one or more, with exactly one being a
    "controlling marker", which determines the identity of the
    DnaSegment and DnaStrand object which own the wholechain's atoms,
    and determines the base indexing scheme they use (origin and direction).

    - When things change (i.e. when user operations make wholechains invalid),
    and the dna updater runs again (always before the next undo checkpoint
    takes its snapshot of the model state), the dna updater
    (and methods in the markers and wholechains) decide how to maintain this
    association (wholechain -> controlling marker -> owning strand or segment)
    based on how the controlling marker moves and whether it stays / stops being
    / becomes controlling. (The wholechain chooses
    or makes a new controlling marker as needed for this.)

    - To do this, the wholechain uses the now-invalid wholechains to help it
    figure out when and how to modify existing DnaSegment or DnaStrand objects,
    and/or delete them or make new ones. (It does this mainly by moving
    DnaMarkers whose atoms got deleted to new locations (of non-deleted atoms)
    along their old wholechains, and then having new wholechains decide which
    markers should determine their strand's or segment's identity, by being
    the wholechain's one "controlling marker".)

    State:

    - A WholeChain contains no state for undo/copy/save; that's in the associated
    DnaSegment and DnaStrand objects and their DnaMarkers. (The set of DnaMarkers
    belonging to a DnaSegment and DnaStrand object is the same as the set of
    all the valid markers along its wholechain's atoms.)
    """

    # default values of subclass-specific constants
    _DnaMarker_class = None # class of DnaMarker to make

    # default values of instance variables
    _strand_or_segment = None
    _controlling_marker = None
    _num_bases = -1 # unknown, to start with (used in __repr__ and __len__)
    _all_markers = () # in instances, will be a mutable dict
        # (len is used in __repr__; could be needed before __init__ done)

    def __init__(self, dict_of_rails): # fyi: called by update_PAM_chunks
        """
        Construct self, own our chunks (and therefore our atoms).

        @note: this does not choose or make a marker or own our markers.
        For that see own_markers.

        @param dict_of_rails: maps id(rail) -> rail for all rails in wholechain

        @note: we can assume that rail._f_update_neighbor_baseatoms() has
               already been called (during this run of the dna updater)
               for every rail (preexisting or new) in dict_of_rails.
               Therefore it is ok for us to rely on rail.neighbor_baseatoms
               being set and correct for those rails, but conversely it is
               too late to use that info in any old wholechains of markers.
        """
        assert dict_of_rails, "a WholeChain can't be empty"
        self._dict_of_rails = dict_of_rails
        self.ringQ = (len(self.end_baseatoms()) == 0) # 080311

        # Scan all our atoms, for several reasons.
        # One is to find all the DnaMarkers on them.
        # These markers may have moved along their old wholechain,
        # and even if they didn't, they may no longer be on a pair of
        # adjacent atoms on the same wholechain, so don't assume they are.
        # (E.g. we might find one with one atom on self and the other on some
        #  other wholechain.) For each marker we find, figure out whether it's
        # still valid, record its position if so, kill it if not.

        markers_1 = {} # collects markers from all our atoms during loop, maps them to (rail, baseindex) for their marked_atom
        markers_2 = {} # same, but for their next_atom (helps check whether they're still valid, and compute new position)
        num_bases = 0
        end0_baseatoms = self._end0_baseatoms = {} # end0 atom key -> rail (aka chain)
        end1_baseatoms = self._end1_baseatoms = {}
        for rail in dict_of_rails.itervalues():
            baseatoms = rail.baseatoms
            assert baseatoms
            num_bases += len(baseatoms)
            end0_baseatoms[baseatoms[0].key] = rail
            end1_baseatoms[baseatoms[-1].key] = rail
            chunk = baseatoms[0].molecule
            chunk.set_wholechain(self)
            for baseindex in range(len(baseatoms)):
                atom = baseatoms[baseindex]
                for jig in atom.jigs:
                    # note: this is only correct because marker move step1 has
                    # found new atoms for them (or reconfirmed old ones), and
                    # has called marker.setAtoms to ensure they are recorded
                    # on those atoms.
                    if isinstance(jig, DnaMarker):
                        marker = jig
                        assert not marker.killed(), "marker %r is killed" % ( marker, )
                            # might fail if they're not yet all in the model @@@
                            # someday: possible optim: cache the set of markers on each rail
                            # (and maintain it as markers move)?
                            # might matter when lots of old rails.
                        if marker.marked_atom is atom:
                            markers_1[marker] = (rail, baseindex)
                        if marker.next_atom is atom: # not elif, both can be true
                            markers_2[marker] = (rail, baseindex)
            continue

        self._num_bases = num_bases

        # kill markers we only found on one of their atoms
        # or that are not on adjacent atoms in self,
        # and determine and record position for the others
        # [revised 080311]

        markers = {} # maps markers found on both atoms to (atom1info, atom2info),
            # but values are later replaced with PositionInWholeChains or discarded

        for marker in markers_1.iterkeys():
            if not marker in markers_2:
                marker._f_kill_during_move(self, "different wholechains")
            else:
                markers[marker] = (markers_1[marker], markers_2.pop(marker))
            continue
        for marker in markers_2.iterkeys(): # note the markers_2.pop above
            marker._f_kill_during_move(self, "different wholechains")
            continue

        del markers_1, markers_2

        for marker in markers.keys():
            # figure out new position and direction,
            # store it in same dict (or remove marker if invalid)
            # LOGIC BUG - des this need _all_markers stored, to use self.yield_... @@@@@
            pos0 = marker._f_new_position_for(self, markers[marker])
            if pos0 is not None:
                rail, baseindex, direction = pos0
                pos = PositionInWholeChain(self, rail, baseindex, direction)
                assert isinstance(pos, PositionInWholeChain) # sanity check, for now
                markers[marker] = pos
                marker._f_new_pos_ok_during_move(self) # for debug
            else:
                marker._f_kill_during_move(self, "not on adjacent atoms on wholechain")
                del markers[marker]

        self._all_markers = markers

        return # from __init__

    def __len__(self):
        if self._num_bases == -1:
            # We're being called too early during init to know the value.
            # If we just return -1, Python raises ValueError: __len__() should return >= 0.
            # That is not understandable, so raise a better exception.
            # Don't just work: it might hide bugs, and returning 0 might make
            # us seem "boolean false"! (We define __nonzero__ to try to avoid that,
            # but still it makes me nervous to return 0 here.)
            # Note that we need __repr__ to work now (e.g. in the following line),
            # so don't make it use len() unless it checks _num_bases first.
            # (As of now it uses _num_bases directly.)
            assert 0, "too early for len(%r)" % self
        assert self._num_bases > 0 # if this fails, we're being called too early (during __init__), or had some bug during __init__
        return self._num_bases

    def __nonzero__(self):
        return True # needed for safety & efficiency, now that we have __len__! @@@@  TODO: same in other things with __len__; __eq__ too?

    destroyed = False #bruce 080403

    def destroy(self): # 080120 7pm untested
        # note: this can be called from chunk._undo_update from one
        # of our chunks; it needs to be ok to call it multiple times.
        # I fixed a bug in that in two ways (either one is enough,
        # each is a precaution if the other is present):
        # a destroyed flag, and reset _all_markers to {} not ().
        # [bruce 080403]
        if self.destroyed:
            return
        self.destroyed = True # do this now, in case of exception or recursion
        for marker in self.all_markers():
            marker.forget_wholechain(self)
        self._all_markers = {}
        self._controlling_marker = None
        self._strand_or_segment = None # review: need to tell it to forget us, too? @@@
        for rail in self.rails():
            chunk = rail.baseatoms[0].molecule
##            rail.destroy() # IMPLEM @@@@ [also, goodness is partly a guess]
            if hasattr(chunk, 'forget_wholechain'): # KLUGE
                chunk.forget_wholechain(self)
            continue
        self._dict_of_rails = {}
        return

    def rails(self):
        """
        Return all our rails, IN ARBITRARY ORDER (that might be revised)
        """
        return self._dict_of_rails.values()

    def end_rails(self): # bruce 080212; rename?
        """
        Return a list of those of our rails which have end_atoms of self
        as a whole (which they determine by which neighbor_baseatoms they
        have).

        @note: if we are a ring, this list will have length 0.
               if we are a length-1 wholechain, it will have length 1.
               if we are a longer wholechain, it will have length 1 or 2,
               depending on whether we're made of one rail or more.
               (We assert that the length is 0 or 1 or 2.)
        """
        # note: if this shows up in any profiles, it can be easily
        # optimized by caching its value.
        res = [rail for rail in self.rails() if rail.at_wholechain_end()]
        assert len(res) <= 2
        return res

    def end_baseatoms(self):
        """
        Return a list of our end_baseatoms, based on rail.neighbor_baseatoms
        (used via rail.wholechain_end_baseatoms())
        for each of our rails (set by dna updater, not always valid during it).

        @note: result is asserted length 0 or 2; will be 0 if we are a ring
        or 2 if we are a chain.

        @note: Intended for use by user ops between dna updater runs,
               but legal to call during self.__init__ as soon as
               self._dict_of_rails has been stored.
        """
        res = []
        for rail in self.end_rails():
            res.extend(rail.wholechain_end_baseatoms())
        assert len(res) in (0, 2), "impossible set of %r.end_baseatoms(): %r" % \
               (self, res)
        return res

    def get_all_baseatoms(self):
        """
        @return: a list of all baseatoms within self, IN ARBITRARY ORDER
        @rtype: list

        @see: self.get_all_baseatoms_in_order() which returns the base atoms
              in a fixed order
        """
        baseAtomList = []
        for rail in self.rails():
            baseAtomList.extend(rail.baseatoms)
        return baseAtomList

    def get_all_baseatoms_in_order(self): #Ninad 2008-08-06
        """
        @return: a list of all baseatoms within self, in a fixed
                 order -- from first baseatom of first rail to
                 last base atom of last rail, in same order as
                 wholechain_baseindex indices.
        @rtype: list

        @see: self.get_rails_in_order()
        """
        rails = self.get_rails_in_order()
        atomList = []
        for rail in rails:
            baseatoms = list(rail.baseatoms)
            first_index = self.wholechain_baseindex(rail, 0)
            last_index = self.wholechain_baseindex(rail, len(baseatoms) - 1)
            if first_index > last_index:
                baseatoms.reverse()

            atomList.extend(baseatoms)

        return atomList

    def __repr__(self):
        classname = self.__class__.__name__.split('.')[-1]
        res = "<%s (%d bases, %d markers) at %#x>" % \
              (classname, self._num_bases, len(self._all_markers), id(self))
        return res

    def all_markers(self):
        """
        Assuming we still own all our atoms (not checked),
        return all the DnaMarkers on them.
        """
        return self._all_markers.keys()

    def own_markers(self):
        """
        Choose or make one of your markers to be controlling,
        then tell them all that you own them and whether
        they're controlling (which might kill some of them).
        (But don't move them in the model tree, not even the newly made ones.)

        Also cache whatever base-indexing info is needed
        (in self, our rails/chunks, and/or their atoms).
        [As of 080116 this part is not yet needed or done.]
        """
        self._controlling_marker = self._choose_or_make_controlling_marker()
        for (marker, position_holder) in self._all_markers.items(): # can't be iteritems!
##            print "debug loop: %r.own_marker %r" % (self, marker)
            controllingQ = (marker is self._controlling_marker)
            ## assert not marker.killed(), \
            # [precaution 080222 - change assert to debug print & bug mitigation:]
            if marker.killed():
                print "\n***BUG: " \
                   "marker %r (our controlling = %r) is killed" % \
                   ( marker, controllingQ )
                # this might fail if they're not yet all in the model,
                # but we put them there when we make them, and we don't kill them when they
                # lose atoms, so they ought to be there @@@
                # (it should not fail from them being killed between __init__ and now
                #  and unable to tell us, since nothing happens in caller to kill them)
                self._f_marker_killed(marker) #k guess 080222
                continue
            marker.set_wholechain(self, position_holder, controlling = controllingQ)
                # own it; tell it whether controlling (some might die then,
                # and if so they'll call self._f_marker_killed
                # which removes them from self._all_markers)
            continue
        for marker in self.all_markers():
            # [precaution 080222 - change assert to debug print & bug mitigation:]
            ## assert not marker.killed(), \
            if marker.killed():
                print "\n***BUG: " \
                    "marker %r died without telling %r" % (marker, self)
                self._f_marker_killed(marker)
            continue

        # todo: use controlling marker to work out base indexing per rail...

        return

    def _f_marker_killed(self, marker):
        """
        [friend method for DnaMarker]
        One of our markers is being killed
        (and calls this to advise us of that).
        [Also called by self if we realize that failed to happen.]
        """
        try:
            self._all_markers.pop(marker)
        except:
            msg = "bug: can't pop %r from %r._all_markers: " % (marker, self)
            print_compact_traceback( msg)
            pass
        return

    # ==

    def find_strand_or_segment(self):
        """
        Return our associated DnaStrandOrSegment, which is required
        to be already defined in self.
        """
        assert self._strand_or_segment
        return self._strand_or_segment

    def find_or_make_strand_or_segment(self):
        """
        Return our associated DnaStrandOrSegment. If we don't yet know it
        or don't have one, ask our controlling marker (which we must have
        before now) to find or make it.
        """
        if self._strand_or_segment:
            return self._strand_or_segment
        assert self._controlling_marker, "%r should have called _choose_or_make_controlling_marker before now" % self
        strand_or_segment = self._controlling_marker._f_get_owning_strand_or_segment(make = True)
        assert strand_or_segment, "%r._controlling_marker == %r has no " \
               "owning_strand_or_segment and can't make one" % \
               (self, self._controlling_marker)
        self._strand_or_segment = strand_or_segment
        return strand_or_segment

    # == ### review below

    def _choose_or_make_controlling_marker(self):
        """
        [private]
        Choose one of our markers to control this wholechain, or make a new one
        (at a good position on one of our atoms) to do that.
        Return it, but don't save it anywhere (caller must do that).
        """
        marker = self._choose_controlling_marker()
        if not marker:
            marker = self._make_new_controlling_marker()
        assert marker
        return marker

    def _choose_controlling_marker(self):
        """
        [private]
        Look at the existing markers on our atoms which are able to be
        controlling markers.
        If there are none, return None.
        Otherwise choose one of them to be our controlling marker,
        and return it (don't save it anywhere, or tell any markers
        whether they are controlling now; caller must do those things
        if/when desired; fyi, as of 080116 own_markers does this).

        If one or more are already controlling, it will be one of them. [### REVIEW -- not sure this point is right]
        If more than one are controlling, we have rules to let them compete.
        If none is, ditto.
        """
        all_markers = self.all_markers() # valid anytime after __init__
        candidates = [marker
                      for marker in all_markers
                      if marker.wants_to_be_controlling()]
        if not candidates:
            return None
        # choose one of the candidates
        list_of_preferred_markers_this_time = [] ### stub - in real life let external code add markers to this list, pop them all here
        items = []
        for marker in candidates:
            order_info = (marker in list_of_preferred_markers_this_time, # note: False < True in Python
                          marker.control_priority,
                          marker.get_oldness(), # assume oldness is unique -- inverse of something allocated by a counter
                          )
            items.append( (order_info, marker) )
        items.sort()
        return items[-1][1]

    def _make_new_controlling_marker(self):
        """
        [private]
        self has no marker which wants_to_be_controlling().
        Make one (on some good choice of atom on self)
        (and in the same group in the model tree as that atom)
        and return it. (Don't store it as our controlling marker,
        or tell it it's controlling -- caller must do that if desired.
        But *do* add it to our list of all markers on our atoms.)

        @note: we always make one, even if this is a 1-atom wholechain
        (which means (for now) that it's not possible to make a good
         or fully correct one).
        We do that because the callers really need every wholechain to have one.
        """
        # improvements, 080222: should pick end atom with lowest .key, but to be easier and work for rings,
        # might pick end atom of any rail with lowest key (over all rails). Also this is best for segments
        # and also ok for strands but maybe not best for strands (who should look at Ax atoms in same basepairs,
        # but note, these can be same for different strand atoms!).

        end_atoms = self.end_baseatoms() # should find 2 atoms unless we're a ring
            # review: if we're a chain of length 1, does it find our lone atom once, or twice?
            # I don't think it matters re following code, but review that too.
            # [bruce 080422 comment]
        if not end_atoms:
            # we're a ring - just consider all end atoms of all our rails
            # (don't bother not including some twice for length-1 rails,
            #  no need in following code)
            end_atoms = []
            for rail in self.rails():
                end_atoms.append( rail.baseatoms[0] )
                end_atoms.append( rail.baseatoms[-1] )
            pass
        assert end_atoms

        candidates = [(atom.key, atom) for atom in end_atoms]
            # todo: for strands, first sort by attached Ax key?
            # todo: include info to help find next_atom?
        candidates.sort() # lowest key means first-made basepair by Dna Generator
        atom = candidates[0][1]

        # now pick the best next_atom

        rail, index, direction_into_chain = self._find_end_atom_chain_and_index( atom)
            # Q. is index for end1 positive or -1?
            # A. positive (or 0 for length-1 rail), but following code doesn't care.
            # Q. does best next_atom come from same rail if possible?
            # A. (guess yes, doing that for now)

            # WARNING: if len(rail) == 1, then direction_into_chain is arbitrary.
            # the following code doesn't use it in that case.
            # [bruce 080422 comment]

        if len(rail.baseatoms) > 1:
            next_atom = rail.baseatoms[index + direction_into_chain]
            position = (rail, index, direction_into_chain)
        elif rail.neighbor_baseatoms[LADDER_END1]: # ignore index and direction_into_chain
            next_atom = rail.neighbor_baseatoms[LADDER_END1]
            # direction within rail is 1, not necessarily same as direction within next_atom's rail
            position = (rail, index, 1)
        elif rail.neighbor_baseatoms[LADDER_END0]:
            next_atom = rail.neighbor_baseatoms[LADDER_END0] # reverse direction!
            atom, next_atom = next_atom, atom
            # direction within rail is -1, but we need the direction within the next rail!
            rail2, index2, dir2junk = self._find_end_atom_chain_and_index(next_atom)
            # if rail2 is len 1, dir2 and index2 are useless for finding
            # direction2 (direction in next rail), so always do this:
            if atom is rail2.neighbor_baseatoms[LADDER_END1]:
                direction2 = 1
            else:
                assert atom is rail2.neighbor_baseatoms[LADDER_END0]
                direction2 = -1
            position = rail2, index2, direction2
        else:
            # a 1-atom wholechain, hmm ...
            # DnaMarker support for this added 080216, not fully tested
            # (note that a 1-atom wholechain *ring* is not possible,
            #  but if it was it'd be handled by the above if/else cases)
            next_atom = atom
            direction = 1 # arbitrary, but needs to be in (-1, 1)
            position = (rail, index, direction)

        # now make the marker on those atoms
        # (todo: in future, we might give it some user settings too)
        assert atom.molecule, "%r has no .molecule" % atom

        assy = atom.molecule.assy

        assert assy
        assert atom.molecule.part, "%r has no .part; .molecule is %r" % (atom, atom.molecule)
        assert next_atom.molecule, "%r has no .molecule" % next_atom
        assert next_atom.molecule.part, "%r has no .part; .molecule is %r" % (next_atom, next_atom.molecule)
        assert atom.molecule.part is next_atom.molecule.part, \
               "%r in part %r, %r in part %r, should be same" % \
               (atom, atom.molecule.part, next_atom, next_atom.molecule.part)

        marker_class = self._DnaMarker_class # subclass-specific constant
        marker = marker_class(assy, [atom, next_atom]) # doesn't give it a wholechain yet

        # give it a temporary home in the model (so it doesn't seem killed) --
        # in fact, it matters where we put it, since later code assumes it
        # *might* already be inside the right DnaStrandOrSegment (and checks
        # this). So we put it in the same group as the marked atom.

        part = atom.molecule.part
        part.place_new_jig(marker)
            # (overkill in runtime, but should be correct, since both marker's
            #  atoms should be in the same group)

        # and remember it's on our atoms, and where on them it is
        self._append_marker(marker, *position)

        return marker

    def _append_marker(self, marker, rail, baseindex, direction): # 080306
        assert not marker in self._all_markers
        self._all_markers[marker] = PositionInWholeChain(self, rail, baseindex, direction)
        return

    def _find_end_atom_chain_and_index(self, atom, next_atom = None):
        # REVIEW: rename chain -> rail, in this method name? (and all local vars in file)
        """
        Assume atom is an end_baseatom of one of our rails (aka chains).
        (If not true, raise KeyError.)
        Find that rail and atom's index in it, and return them as
        the tuple (rail, index_in_rail, direction_into_rail).

        The index is nonnegative, meaning that we use 0 for
        either end of a length-1 rail (there is no distinction
        between the ends in that case). But in case the direction_into_rail
        return value component matters then, use the optional next_atom
        argument to disambiguate it -- if given, it must be next to end_atom,
        and we'll return the direction pointing away from it.
        """
        key = atom.key
        try:
            rail = self._end0_baseatoms[key]
            index_in_rail = 0
            direction_into_rail = 1
        except KeyError:
            rail = self._end1_baseatoms[key]
                # raise KeyError if atom is not an end_atom of any of our rails
            index_in_rail = len(rail) - 1
            direction_into_rail = -1
        if next_atom is not None:
            #bruce 080422 bugfix and new assertions
            assert next_atom in rail.neighbor_baseatoms
            if next_atom is rail.neighbor_baseatoms[LADDER_END0]:
                direction_to_next_atom = -1
            else:
                assert next_atom is rail.neighbor_baseatoms[LADDER_END1]
                direction_to_next_atom = 1
            necessary_direction_into_rail = - direction_to_next_atom
            if len(rail) == 1:
                if necessary_direction_into_rail != direction_into_rail:
                    print "fyi: fixing direction_into_rail to ", necessary_direction_into_rail ### remove when works
                direction_into_rail = necessary_direction_into_rail
            else:
                ## assert necessary_direction_into_rail == direction_into_rail
                if necessary_direction_into_rail != direction_into_rail:
                    print "BUG: necessary_direction_into_rail %d != direction_into_rail %d, rail %r" % \
                          (necessary_direction_into_rail, direction_into_rail, rail)
                pass
            pass
        return rail, index_in_rail, direction_into_rail

    # todo: methods related to base indexing

    def yield_rail_index_direction_counter(self,  # in class WholeChain
                                           pos,
                                           counter = 0,
                                           countby = 1,
                                           relative_direction = 1):
        """
        #doc
        @note: the first position we yield is always the one passed,
               with counter at its initial value
        """
        # possible optim: option to skip (most) killed atoms, and optimize that
        # to notice entire dead rails (noticeable when their chunks get killed)
        # and pass them in a single step. We might still need to yield the
        # final repeat of starting pos.
        rail, index, direction = pos
        pos = rail, index, direction
            # make sure pos is a tuple (required by comparison code below)
        # assert one of our rails, valid index in it
        assert direction in (-1, 1)
        while 1:
            # yield, move, adjust, check, continue
            yield rail, index, direction, counter
            # move
            counter += countby
            index += direction * relative_direction
            # adjust
            def jump_off(rail, end):
                neighbor_atom = rail.neighbor_baseatoms[end]
                # neighbor_atom might be None, atom, or -1 if not yet set
                assert neighbor_atom != -1
                if neighbor_atom is None:
                    new_rail = None # outer code will return due to this, ending the generated sequence
                    index, direction = None, None # illegal values (to detect bugs in outer code)
                else:
                    this_atom = rail.end_baseatoms()[end] #bruce 080422 bugfix
                    new_rail, index, direction = self._find_end_atom_chain_and_index(neighbor_atom, this_atom)
                    direction *= relative_direction
                    assert new_rail
                    # can't assert new_rail is not rail -- might be a ring of one rail
                return new_rail, index, direction
            if index < 0:
                # jump off END0 of this rail
                rail, index, direction = jump_off(rail, LADDER_END0)
            elif index >= len(rail):
                # jump off END1 of this rail
                rail, index, direction = jump_off(rail, LADDER_END1)
            else:
                pass
            # check
            if not rail:
                return
            assert 0 <= index < len(rail)
            assert direction in (-1, 1)
            if (rail, index, direction) == pos:
                # we wrapped around a ring to our starting position.
                # (or, someday, to another limit pos passed by caller?)
                # yield it one more time, as a signal that we're a ring, and
                # to help with algorithms looking at every pair of successive
                # positions; then return.
                yield rail, index, direction, counter
                return
            elif (rail, index) == pos[0:2]:
                ## assert 0, \
                # this is failing, but might be harmless, so mitigate it by
                # just printing rather than assertfailing, and otherwise
                # treating this the same way as above. [bruce 080422 bug mitigation]
                # note: the underlying bug was probably then fixed by a change above,
                # in the same commit, passing this_atom to _find_end_atom_chain_and_index.
                print \
                       "bug: direction got flipped somehow in " \
                       "%r.yield_rail_index_direction_counter%r at %r" % \
                       ( self,
                         (pos, counter, countby, relative_direction),
                         (rail, index, direction, counter) )
                yield rail, index, direction, counter
                return
            continue
        assert 0 # not reached
        pass

    _rail_to_wholechain_baseindex_data = None
    _wholechain_baseindex_range = None

    def wholechain_baseindex(self, rail, baseindex): #bruce 080421 (not in rc2)
        """
        @param rail: one of our rails.

        @param baseindex: a baseindex within rail
        @type baseindex: int, from 0 to len(rail) - 1

        @return: the corresponding baseindex within self as a whole
        @rtype: int

        @note: this function is expensive on the first call for self,
               and cheap thereafter.

        @note: if self is a ring, the baseindex origin and direction
               is determined by self's controlling marker.
               (WARNING: in initial implem this origin might be arbitrary, instead)

        @see: len(self) gives the total number of bases in self, but their
              indices don't necessarily start at 0

        @see: self.wholechain_baseindex_range()
        """
        if not self._rail_to_wholechain_baseindex_data:
            self._compute_wholechain_baseindices()
        baseindex_0, increment = self._rail_to_wholechain_baseindex_data[rail]
        return baseindex_0 + increment * baseindex

    def wholechain_baseindex_range(self): #bruce 080421 (not in rc2)
        """
        Return the minimum and maximum wholechain_baseindex of all bases
        in all rails in self.

        @return: (min_baseindex, max_baseindex), where min_baseindex <= max_baseindex
        @rtype: a tuple of two ints

        @note: this function might be expensive on the first call for self,
               but is cheap thereafter.

        @see: self.wholechain_baseindex(rail, baseindex)
        """
        if not self._wholechain_baseindex_range:
            self._compute_wholechain_baseindices()
        min_baseindex, max_baseindex = self._wholechain_baseindex_range
        return min_baseindex, max_baseindex

    def wholechain_baseindex_range_for_rail(self, rail): #bruce 080421 (not in rc2)
        """
        @param rail: one of our rails.

        @return: the first and last wholechain_baseindex within rail
        @rtype: a tuple of two ints

        @see: self.wholechain_baseindex_range()
        """
        first = self.wholechain_baseindex(rail, 0)
        last  = self.wholechain_baseindex(rail, len(rail) - 1)
        return first, last

    def get_rails_in_order(self): #Ninad 2008-08-06
        """
        @return: a list containing self's rails, in increasing
                 order of the index of each rail's baseatoms,
                 according to wholechain_baseindex_range_for_rail.
        @rtype: list

        @see: self.get_all_baseatoms_in_order()
        @see: self.rails() (much faster when order doesn't matter)
        """
        rails = self.rails()
        lst = []

        for rail in rails:
            first_baseindex, last_baseindex = self.wholechain_baseindex_range_for_rail(rail)
            lst.append( (first_baseindex, rail) )

        # Sort the list so that the rails are arranged in increasing order
        # of the baseindex of the first (or any) baseatom of each rail.
        # (Which baseatom is used in each rail doesn't matter, since within
        #  any single rail the indices are contiguous.)
        lst.sort()

        return [rail for (baseindex, rail) in lst]

    def _compute_wholechain_baseindices(self): #bruce 080421 (not in rc2)
        """
        Compute and assign the correct values to
        self._rail_to_wholechain_baseindex_data
        and self._wholechain_baseindex_range.
        """
        self._rail_to_wholechain_baseindex_data = {} # modified herein
        marker = self._controlling_marker
            # for now, its marked_atom and next_atom will be treated as having
            # wholechain_baseindices of 0 and 1 respectively. In the future,
            # marker properties would specify this.
        min_so_far = 2 * len(self) # bigger than any possible wholechain_baseindex in self
        max_so_far = -2 * len(self) # smaller than any ...
        for direction_of_slide in (1, -1):
            if self.ringQ and direction_of_slide == -1:
                break
            pos_holder = marker._position_holder ### kluge
            assert pos_holder.wholechain is self
            pos_generator = pos_holder.yield_rail_index_direction_counter(
                                relative_direction = direction_of_slide,
                                counter = 0,
                                countby = direction_of_slide,
                             )
                # TODO: optimization: pass a new option to skip from each
                # position returned to a position in the next rail.
                # (implem note: be sure to still return the precise initial
                #  pos at the end, for a ring.)
            old_atom1, old_atom2 = marker.marked_atom, marker.next_atom
            if direction_of_slide == 1:
                _check_atoms = [old_atom1, old_atom2]
                    # for assertions only -- make sure we hit these atoms first,
                    # in this order
            else:
                _check_atoms = [old_atom1] # if we knew pos of atom2 we could
                    # start there, and we could save it from when we go to the
                    # right, but there's no need.
            last_rail = None
            for pos in pos_generator:
                rail, index, direction, counter = pos
                atom = rail.baseatoms[index]
                if _check_atoms:
                    popped = _check_atoms.pop(0)
                    if not (atom is popped):
                        # FIX: remove this code once it works, and certainly
                        # before implementing the optim in pos_generator
                        # to return each rail only once.
                        print "\n*** BUG: not (atom %r is _check_atoms.pop(0) %r), remaining _check_atoms %r, other data %r" % \
                              (atom, popped, _check_atoms, (marker, pos_holder))
                # define the wholechain_baseindex of pos to be counter;
                # from this and direction, infer the index range for rail
                # and record it, also tracking min and max wholechain indices.
                if rail is not last_rail: # optim, until pos_generator can return each rail only once
                    last_rail = rail
                    def rail_index_to_whole_index(baseindex):
                        return (baseindex - index) * direction + counter
                    self._rail_to_wholechain_baseindex_data[rail] = (
                        rail_index_to_whole_index(0),
                        direction
                    )
                    for index in (rail_index_to_whole_index(0),
                                  rail_index_to_whole_index(len(rail) - 1) ):
                        if index < min_so_far:
                            min_so_far = index
                        if index > max_so_far:
                            max_so_far = index
                        continue
                    pass # if rail was not yet seen
                continue # next pos from pos_generator
            continue # next direction_of_slide
        self._wholechain_baseindex_range = ( min_so_far, max_so_far )
        return # from _compute_wholechain_baseindices

    pass # end of class WholeChain

# ==

class PositionInWholeChain(object):
    """
    A mutable position (and direction) along a WholeChain.
    """
    def __init__(self, wholechain, rail, index, direction):
        self.wholechain = wholechain
        self.set_position(rail, index, direction)

    def set_position(self, rail, index, direction):
        self.rail = rail
        self.index = index # base index in current rail
        self.direction = direction # in current rail only
            # note: our direction in each rail is unrelated
        self.pos = (rail, index, direction)
        return

    def yield_rail_index_direction_counter(self, **options): # in class PositionInWholeChain
        return self.wholechain.yield_rail_index_direction_counter( self.pos, **options )

    # maybe: method to scan in both directions
    # (for now, our main caller does that itself)

    pass

# ==

class Axis_WholeChain(WholeChain):
    _DnaMarker_class = DnaSegmentMarker
    """
    A WholeChain for axis atoms.
    """
    pass

class Strand_WholeChain(WholeChain):
    _DnaMarker_class = DnaStrandMarker
    """
    A WholeChain for strand atoms.
    """
    pass

# ==

def new_Group_around_Node(node, group_class): #e refile, might use in other ways too [not used now, but might be correct]
    node.part.ensure_toplevel_group() # might not be needed
    name = "(internal Group)" # stub
    assy = node.assy
    dad = None #k legal?? arg needed?
    group = group_class(name, assy, dad) # same args as for Group.__init__(self, name, assy, dad) [review: reorder those anytime soon??]
    node.addsibling(group)
    group.addchild(node)
    return group

# end