summaryrefslogtreecommitdiff
path: root/cad/src/files/dpb_trajectory/moviefile.py
blob: 39e9e8fdc333066b146e50dbc47038b4d3d3fb74 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
# Copyright 2005-2007 Nanorex, Inc.  See LICENSE file for details. 
"""
moviefile.py -- classes and other code for interpreting movie files
(of various formats, once we have them)

@author: Bruce
@version: $Id$
@copyright: 2005-2007 Nanorex, Inc.  See LICENSE file for details.
"""

# note that in future there's more than one class, and a function to figure out the right one to use
# for an existing file, or to be told this (being told the format) for a new file we'll cause to be made...
#
# so the external code should rarely know the actual classnames in this file!

# these imports are anticipated, perhaps not all needed
import os, sys
from struct import unpack # fyi: used for old-format header, no longer for delta frames
## from VQT import A
from Numeric import array, Int8
from utilities import debug_flags
from utilities.debug import print_compact_stack, print_compact_traceback
import foundation.env as env

def MovieFile(filename): #bruce 050913 removed history arg, since all callers passed env.history
    """
    Given the name of an existing old-format movie file,
    return an object which can read frames from it
    (perhaps only after receiving further advice from its client code,
     like the absolute atom positions for one specific frame).
       The returned object should also know whatever can be known
    from the moviefile itself about the movie... like number of frames and atoms...
    for the old format, that's all there is.
       In case of a fatal error, print an appropriate message to env.history
    and return None. We might also print warnings to history (I don't know #k).
       In future, when new format is available, we'll detect the format in the file
    and open it in the proper way, perhaps also taking optional arguments for a trace filename,
    perhaps handling files that have not yet been finished or perhaps not yet even started, etc....
       See also the docstring of class OldFormatMovieFile.
    """
    # for now, assume old format, and assume file exists and has reached its final size.
    reader = OldFormatMovieFile_startup( filename)
    if reader.open_and_read_header_errQ():
        return None
    return OldFormatMovieFile( reader)

class OldFormatMovieFile_startup:
    #e maybe make these same obj, so easier to recheck header later, and big one needs invalid state anyway
    def __init__(self, filename): #bruce 050913 removed history arg
        self.filename = filename
        self.fileobj = None
        self.errcode = None
    def open_and_read_header_errQ(self):
        # because we assume file is fully written, we can do all this stuff immediately for now:
        #e try/except too?
        self.open_file()
        self.read_header()
        return self.errcode
    def open_file(self):
        assert not self.fileobj #e if we relax this, then worry about whether we should seek to start of file
        self.fileobj = open(self.filename,'rb') ###@@@ missing file is possible when we reopen after closing; this is caught below
    def read_header(self):
        # assume we're at start of file
        # Read header (4 bytes) from file containing the number of frames in the moviefile.
        self.totalFramesActual = unpack('i',self.fileobj.read(4))[0]
        # Compute natoms
        filesize = os.path.getsize(self.filename)
        self.natoms = (filesize - 4) / (self.totalFramesActual * 3)
        if self.natoms * (self.totalFramesActual * 3) != (filesize - 4):
            msg = "Movie file [%s] has invalid length -- might be truncated or still being written." % self.filename
                # (Without knowing correct natoms (as we don't in this case), we can't reliably say whether it has
                #  missing frames, partly written frames, etc.)
                # For new format files this would only be a warning; we could use the whole frames.
                # But for old-format files, it means our calc of natoms will be wrong for them! So it's fatal.
            self.error(msg)
            return # how far do we return, on error?? (I mean, should we raise an exception instead?) ###k
        return
    def error(self, msg):
        self.errcode = msg or "error" # public attr for callers to see...
        from utilities.Log import redmsg
        env.history.message( redmsg( msg))
        return
    def delta_frame_bytes(self, n):
        """
        return the bytes of the delta frame which has index n (assuming our file is open and n is within legal range)
        """
        # note: the first one has index 1 (since it gives delta from time 0 to time 1).
        assert n > 0
        nbytes = self.natoms * 3 # number of bytes in frame (if complete) -- no relation to frame index n
        filepos = ((n-1) * nbytes) + 4
        try:
            # several things here can fail if file is changing on disk in various ways
            if not self.fileobj:
                # this might not yet ever happen, not sure.
                self.open_file() #e check for error? check length still the same, etc? 
            self.fileobj.seek( filepos) ####@@@@ failure here might be possible if file got shorter after size measured -- not sure
            res = self.fileobj.read(nbytes)
            assert len(res) == nbytes # this can fail, if file got shorter after we measured its size...
        except:
            # might be good to detect, warn, set flag, return 0s.... ####@@@@ test this
            if debug_flags.atom_debug: # if this happens at all it might happen a lot...
                print_compact_traceback( "atom_debug: ignoring exception reading delta_frame %d, returning all 00s: " % n)
            res = "\x00" * nbytes
            assert len(res) == nbytes, "mistake in python zero-byte syntax" # but I checked it, should be ok
        return res
    def close(self):
        if self.fileobj:
            self.fileobj.close()
        self.fileobj = None
    close_file = close
    def destroy(self):
        self.close()
        return # no other large state in this object
    pass 

class OldFormatMovieFile: #bruce 050426 
    """
    Know the filename and format of an existing moviefile, and enough about it to read requested frames from it
    and report absolute atom positions (even if those frames, or all frames in the file, are differential frames).
       Provide methods for renaming it (actually moving or copying the file), when this is safe.
       Sometimes keep it open with a known file pointer, for speed.
       Sometimes keep cached arrays of absolute positions (like key frames but perhaps computed rather than taken from the file),
    either for speed or since it's the only way to derive absolute positions from delta frames in the file.
       [#e Someday we'll work even on files that are still growing, but this probably won't be tried for Alpha5.]
       What we *don't* know includes: anything about the actual atoms (if any) being repositioned as this file is read;
    anything about associated files (like a tracefile) or sim run parameters (if any), except whatever might be needed
    to do our job of interpreting the one actual moviefile we know about.
       Possible generalizations: really we're one kind of "atom-set trajectory", and in future there might be other kinds
    which don't get their data from files. (E.g. an object to morph atom-set-positions smoothly between two endpoints.)
    """
    ##e How to extend this class to the new movie file format:
    # split it into one superclass with the toplevel caching-logic,
    # and two subclasses (one per file format) with the lower-level skills
    # specific to how often those files contain key frames (never vs. regularly),
    # and their other differences. But note that this obj's job is not really to interpret
    # the general parts of the new-format file header, only the "trajectory part".
    # So probably some other object would parse the header and only then hand off the rest of the file
    # to one of these.
    def __init__(self, filereader):
        self.filereader = filereader # this has file pointer, knows filename & header, can read raw frames

        #e someday:
        # conceivably the file does not yet exist and this is not an error
        # (if we're being used on a still-growing file whose producer didn't quite start writing it yet),
        # so let's check these things as needed/requested rather than on init.
        # For now we'll just "know that we don't know them".
        # This might be best done using __getattr__... and perhaps some quantities
        # are different each time we look (like file length)... not yet decided.
        #
        # now: just assume the file is complete before we're called, and grab these things from filereader.

        # Q: But does the caller need to tell us the set of starting atom positions,
        # in case the file doesn't? What if it doesn't know (for new-format file being explored)?
        # A: the way it tells us is by calling donate_immutable_cached_frame if it needs to,
        # on any single frame it wants (often but not always frame 0).
        # Re bug 1297, it better give us sim-corrected positions rather than raw ones, for any open bonds!
        # (For more info on that see comment in get_sim_posns.) [bruce 060111]

        self.totalFramesActual = filereader.totalFramesActual
        self.natoms = filereader.natoms # maybe these should be the same object... not sure
        self.temp_mutable_frames = {}
        self.cached_immutable_frames = {} #e for some callers, store a cached frame 0 here

    def get_totalFramesActual(self):
        return self.totalFramesActual
    def matches_alist(self, alist):
        return self.natoms == len(alist) #e could someday check more...
    def recheck_matches_alist(self, alist):
        return self.matches_alist(alist) ###@@@ stub, fails to recheck the file! should verify same header and same or larger nframes.
    def destroy(self):
        self.cached_immutable_frames = self.temp_mutable_frames = None
        self.filereader.destroy()
        self.filereader = None

    def frame_index_in_range(self, n):
        assert type(n) == type(1)
        return 0 <= n <= self.totalFramesActual # I think inclusive at both ends is correct...

    def ref_to_transient_frame_n(self, n):
        """
        [This is meant to be the main external method for retrieving our atom positions,
         when the caller cares about speed but doesn't need to keep this array
         (e.g. when it's playing us as a movie).]
        
        Return a Numeric array containing the absolute atom positions for frame n.
        Caller promises not to modify this array, and to never use it again after
        the next time anything calls any(??) method of this object. (Since we might
        keep modifying and returning the same mutable array, each time this method
        is called for the next n during a scan.)
           [#e Someday we might need to document some methods that are safe to call even while
        the caller still wants to use this array, and/or provide a scheme by which the
        caller can ask whether it's holding of that array remains valid or not -- note that
        several callers might have "different holdings of the same physical array" for which
        that answer differs. Note that a "copy on write" scheme (an alternative to much of this)
        might be better in the long run, but I'm not sure it's practical for Numeric arrays,
        or for advising self on which frames to keep and which to discard.
        """
        res = self.copy_of_frame(n) # res is owned by this method-run...
        self.donate_mutable_known_frame(n, res) # ... but not anymore!
            # But since we're a private implem, we can depend on res still
            # being valid and constant right now, and until the next method
            # on self is called sometime after we return.
        return res
    
    def copy_of_frame(self, n):
        """
        Return the array of absolute atom positions corresponding to
        the specified frame-number (0 = array of initial positions).
        If necessary, scan through the file as needed (from a key frame, in future format,
        or from the position of a frame whose abs posns we have cached, in old format)
        in order to figure this out.
        """
        assert self.frame_index_in_range(n)
        n0 = self.nearest_knownposns_frame_index(n)
        frame0 = self.copy_of_known_frame_or_None(n0) # an array of absposns we're allowed to modify, valid for n0
        assert frame0 is not None # don't test it as a boolean -- it might be all 0.0 which in Numeric means it's false!
        while n0 < n:
            # move forwards using a delta frame (which is never cached, tho this code doesn't need to know that)
            # (##e btw it might be faster to read several at once and combine them into one, or add all at once, using Numeric ops!
            #  I'm not sure, since intermediate states would use 4x the memory, so we might do smaller pieces at a time...)
            n0 += 1
            df = self.delta_frame(n0) # array of differences to add to frame n0-1 to get frame n0
            try:
                frame0 += df # note: += modifies frame0 in place (if it's a Numeric array, as we hope); that's desired
            except ValueError: # frames are not aligned -- happens when slider reaches right end
                print "frames not aligned; shapes:",frame0.shape, df.shape
                raise
        while n0 > n:
            # (this never happens if we just moved forwards, but there's no need to "confirm" or "enforce" that fact)
            # move backwards using a delta frame
            # (btw it might be faster to read all dfs forwards to make one big one to subtract all at once...
            #  or perhaps even to read them all at once into a single Numeric 2d array, and turn them into
            #  one big one using Numeric ops! ###e)
            df = self.delta_frame(n0)
            n0 -= 1 # note: we did this after grabbing the frame, not beforehand as above
            frame0 -= df
        #e future:
        #e   If we'd especially like to keep a cached copy for future speed, make one now...
        #e   Or do this inside forward-going loop?
        #e   Or in caller, having it stop for breath every so many frames, perhaps also to process user events?
##        if 0: #bruce 060111 debug code (two places), safe but could be removed when not needed (tiny bit slow) [bruce 060111] ###@@@
##            # maybe print coords for one atom
##            import runSim
##            if runSim.debug_all_frames:
##                ii = runSim.debug_all_frames_atom_index
##                print "copy_of_frame %d[%d] is" % (n, ii), frame0[ii]
        return frame0

    def donate_mutable_known_frame(self, n, frame):
        """
        Caller has a frame of absolute atom positions it no longer needs --
        add this to our cache of known frames, marked as able to be modified further as needed
        (i.e. as its data not needing to be retained in self after it's next returned by copy_of_known_frame_or_None).
        This optimizes serial scans of the file, since the donated frame tends to be one frame away
        from the next desired frame.
        """
        self.temp_mutable_frames[n] = frame
            # it's probably ok if we already had one for the same n and this discards it --
            # we don't really need more than one per n
        ###e do something to affect the retval of nearest_knownposns_frame_index?? it should optimize for the last one of these being near...
        return

    def donate_immutable_cached_frame(self, n, frame): # (this is how client code can tell us abs posns to start with)
        """
        Caller gives us the frame of abs positions for frame-index n,
        which we can keep and will never modify (in case caller wants to keep using it too),
        and caller also promises to never modify it (so we can keep trusting and copying it).
        This is the only way for client code using us on an all-differential file
        can tell us a known absolute frame from which other absolute frames can be derived.
        Note that that known frame need not be frame 0, and perhaps will sometimes not be
        (I don't know, as of 050426 2pm). [As of shortly before 060111 it can indeed be any frame.]
           Note: these positions better be sim-corrected (as if X was H) rather than raw, for any open bonds!
        Otherwise we'll get bug 1297. In fact, they really need to be a copy of positions the sim had,
        not just newly corrected H positions (see get_sim_posns comment for more info). [bruce 060111]
        """
        self.cached_immutable_frames[n] = frame
            # note: we only need one per n! so don't worry if this replaces an older one.
        ###e do something to affect the retval of nearest_knownposns_frame_index??
        return
    
    def copy_of_known_frame_or_None(self, n):
        """
        If we have a mutable known frame at index n, return it
        (and forget it internally since caller is allowed to modify it).
        If not, we should have an immutable one, or the file should have one (i.e. a key frame).
        Make a copy and return it.
        If we can't, return None (for some callers this will be an error; detecting it is up to them).
        """
        try:
            return self.temp_mutable_frames.pop(n)
        except KeyError:
            try:
                #e we don't yet support files with key frames, so a cached one is our only chance.
                frame_notouch = self.cached_immutable_frames[n]
            except KeyError:
                return None
            else:
                return + frame_notouch # the unary "+" makes a copy (since it's a Numeric array)
        pass

    def nearest_knownposns_frame_index(self, n):
        """
        Figure out and return n0, the nearest frame index to n
        for which we already know the absolute positions, either since it's a key frame in the file
        or since we've kept a cached copy of the positions (or been given those positions by our client code) --
        either a mutable copy or an immutable one.
        (This index is suitable for passing to copy_of_known_frame_or_None, but that method might or might not
         have to actually copy anything in order to come up with a mutable frame to return.)
           By "nearest", we really mean "fastest to scan over the delta frames from n0 to n",
        so if scanning forwards is faster then we should be biased towards returning n0 < n,
        but this issue is ignored for now (and will become barely relevant once we use the new file format
        with frequent occurrence of key frames).
           It's not an error if n is out of range, but the returned n0 will always be in range.
           If we can't find *any* known frame, return None (but for most callers this will be an error). ###e better to asfail??
        """
        # It's common during sequential movie playing that the frame we want is 1 away from what we have...
        # so it's worth testing this quickly, at least for the mutable frames used during that process.
        # (In fact, this is probably more common than an exact match! So we test it first.)
        if n - 1 in self.temp_mutable_frames:
            return n - 1
        if n + 1 in self.temp_mutable_frames:
            return n + 1
        # It's also common than n is already known, so test that quickly too.
        if n in self.temp_mutable_frames or n in self.cached_immutable_frames:
            return n
        # No exact match. In present code, we won't have very many known frames,
        # so it's ok to just scan them all and find the one that's actually nearest.
        # (For future moviefile format with frequent key frames, we'll revise this quite a bit.)
        max_lower = too_low = -1
        min_higher = too_high = 100000000000000000000000 # higher than any actual frame number (I hope!)
        for n0 in self.temp_mutable_frames.keys() + self.cached_immutable_frames.keys():
            if n0 < n:
                max_lower = max( max_lower, n0)
            else:
                min_higher = min( min_higher, n0)
        if max_lower > too_low and min_higher != too_high:
            # which is best: scanning forwards by n - max_lower, or scanning backwards by min_higher - n ?
            if n - max_lower <= min_higher - n:
                return max_lower
            else:
                return min_higher
        elif max_lower > too_low:
            return max_lower
        elif min_higher != too_high:
            return min_higher
        else:
            assert 0, "no known frame!" # for now, since I think this should never happen (unless file of new format is very incomplete)
            return None
        pass

    def delta_frame(self, n):
        """
        return the delta frame with index n, as an appropriately-typed Numeric array
        """
        bytes = self.filereader.delta_frame_bytes(n)
        ## older code: delta = A(unpack('bbb',file.read(3)))*0.01
        # (with luck, reading the whole frame at once will be a nice speedup for fast-forwarding...)
        res = array(bytes,Int8)
        res.shape = (-1,3)
        res = res * 0.01
            #e it might be nice to move that multiply into caller (for speedup and error-reduction):
            #bruce 060110 comment: 0.01 is not represented exactly, so including it here might introduce cumulative
            # roundoff errors into callers who add up lots of delta frames! Let's estimate the size: assume 53 significant bits,
            # and typical values within 10^6 of 0, and need for 10^-3 precision, then we're using about 30 bits,
            # so an error in last bit, accumulating badly, still has say 1/4 * 2^23 = >> 10^6 steps to cause trouble...
            # and I think adding or subtracting this delta frame should reverse properly (same error gets added or removed),
            # except perhaps for special coord values that won't occur most times, so if true, what matters is movie length
            # rather than how often user plays it forwards and back. So I think we can tolerate this error for A7, at least,
            # and I think we can rule it out as a possible cause of bug 1297 (and an experiment also seems to rule that out).
            # [For the true cause, see 060111 comment in get_sim_posns.]
            # In the long run, we should fix this, though I never observed a problem. ####@@@@
##        if 0: #bruce 060111 debug code (two places), safe but could be removed when not needed (tiny bit slow) [bruce 060111] ###@@@
##            # maybe print deltas for one atom
##            import runSim
##            if runSim.debug_all_frames:
##                ii = runSim.debug_all_frames_atom_index
##                print "delta_frame %d[%d] is" % (n, ii), res[ii]
        return res

    def close_file(self):
        self.filereader.close_file() # but don't forget about it!
    
    pass # end of class MovieFile