Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""bundletrajectory - a module for I/O from large MD simulations.
3The BundleTrajectory class writes trajectory into a directory with the
4following structure, except we now use JSON instead of pickle,
5so this text needs updating::
7 filename.bundle (dir)
8 metadata.json Data about the file format, and about which
9 data is present.
10 frames The number of frames (ascii file)
11 F0 (dir) Frame number 0
12 smalldata.ulm Small data structures in a dictionary
13 (pbc, cell, ...)
14 numbers.ulm Atomic numbers
15 positions.ulm Positions
16 momenta.ulm Momenta
17 ...
18 F1 (dir)
20There is a folder for each frame, and the data is in the ASE Ulm format.
21"""
23import os
24import sys
25import shutil
26import time
27from pathlib import Path
29import numpy as np
31from ase import Atoms
32# The system json module causes memory leaks! Use ase's own.
33# import json
34from ase.io import jsonio
35from ase.io.ulm import open as ulmopen
36from ase.parallel import paropen, world, barrier
37from ase.calculators.singlepoint import (SinglePointCalculator,
38 PropertyNotImplementedError)
41class BundleTrajectory:
42 """Reads and writes atoms into a .bundle directory.
44 The BundleTrajectory is an alternative way of storing
45 trajectories, intended for large-scale molecular dynamics
46 simulations, where a single flat file becomes unwieldy. Instead,
47 the data is stored in directory, a 'bundle' (the name bundle is
48 inspired from bundles in Mac OS, which are really just directories
49 the user is supposed to think of as a single file-like unit).
51 Parameters:
53 filename:
54 The name of the directory. Preferably ending in .bundle.
56 mode (optional):
57 The file opening mode. 'r' means open for reading, 'w' for
58 writing and 'a' for appending. Default: 'r'. If opening in
59 write mode, and the filename already exists, the old file is
60 renamed to .bak (any old .bak file is deleted), except if the
61 existing file is empty.
63 atoms (optional):
64 The atoms that will be written. Can only be specified in
65 write or append mode. If not specified, the atoms must be
66 given as an argument to the .write() method instead.
68 backup=True:
69 Use backup=False to disable renaming of an existing file.
71 backend='ulm':
72 Request a backend. Currently only 'ulm' is supported.
73 Only honored when writing.
75 singleprecision=False:
76 Store floating point data in single precision (ulm backend only).
77 """
78 slavelog = True # Log from all nodes
80 def __init__(self, filename, mode='r', atoms=None, backup=True,
81 backend='ulm', singleprecision=False):
82 self.state = 'constructing'
83 self.filename = filename
84 self.pre_observers = [] # callback functions before write is performed
85 self.post_observers = [] # callback functions after write is performed
86 self.master = world.rank == 0
87 self.extra_data = []
88 self.singleprecision = singleprecision
89 self._set_defaults()
90 if mode == 'r':
91 if atoms is not None:
92 raise ValueError('You cannot specify atoms in read mode.')
93 self._open_read()
94 elif mode == 'w':
95 self._open_write(atoms, backup, backend)
96 elif mode == 'a':
97 self._open_append(atoms)
98 else:
99 raise ValueError('Unknown mode: ' + str(mode))
101 def _set_defaults(self):
102 "Set default values for internal parameters."
103 self.version = 1
104 self.subtype = 'normal'
105 self.datatypes = {'positions': True,
106 'numbers': 'once',
107 'tags': 'once',
108 'masses': 'once',
109 'momenta': True,
110 'forces': True,
111 'energy': True,
112 'energies': False,
113 'stress': False,
114 'magmoms': True}
116 def _set_backend(self, backend):
117 """Set the backed doing the actual I/O."""
118 if backend is not None:
119 self.backend_name = backend
121 if self.backend_name == 'ulm':
122 self.backend = UlmBundleBackend(self.master, self.singleprecision)
123 else:
124 raise NotImplementedError(
125 'This version of ASE cannot use BundleTrajectory '
126 'with backend "%s"' % self.backend_name)
128 def write(self, atoms=None):
129 """Write the atoms to the file.
131 If the atoms argument is not given, the atoms object specified
132 when creating the trajectory object is used.
133 """
134 # Check that we are in write mode
135 if self.state == 'prewrite':
136 self.state = 'write'
137 assert self.nframes == 0
138 elif self.state != 'write':
139 raise RuntimeError('Cannot write in ' + self.state + ' mode.')
141 if atoms is None:
142 atoms = self.atoms
144 for image in atoms.iterimages():
145 self._write_atoms(image)
147 def _write_atoms(self, atoms):
148 # OK, it is a real atoms object. Write it.
149 self._call_observers(self.pre_observers)
150 self.log('Beginning to write frame ' + str(self.nframes))
151 framedir = self._make_framedir(self.nframes)
153 # Check which data should be written the first time:
154 # Modify datatypes so any element of type 'once' becomes true
155 # for the first frame but false for subsequent frames.
156 datatypes = {}
157 for k, v in self.datatypes.items():
158 if v == 'once':
159 v = (self.nframes == 0)
160 datatypes[k] = v
162 # Write 'small' data structures. They are written jointly.
163 smalldata = {'pbc': atoms.get_pbc(),
164 'cell': atoms.get_cell(),
165 'natoms': atoms.get_global_number_of_atoms(),
166 'constraints': atoms.constraints}
167 if datatypes.get('energy'):
168 try:
169 smalldata['energy'] = atoms.get_potential_energy()
170 except (RuntimeError, PropertyNotImplementedError):
171 self.datatypes['energy'] = False
172 if datatypes.get('stress'):
173 try:
174 smalldata['stress'] = atoms.get_stress()
175 except PropertyNotImplementedError:
176 self.datatypes['stress'] = False
177 self.backend.write_small(framedir, smalldata)
179 # Write the large arrays.
180 if datatypes.get('positions'):
181 self.backend.write(framedir, 'positions', atoms.get_positions())
182 if datatypes.get('numbers'):
183 self.backend.write(framedir, 'numbers', atoms.get_atomic_numbers())
184 if datatypes.get('tags'):
185 if atoms.has('tags'):
186 self.backend.write(framedir, 'tags', atoms.get_tags())
187 else:
188 self.datatypes['tags'] = False
189 if datatypes.get('masses'):
190 if atoms.has('masses'):
191 self.backend.write(framedir, 'masses', atoms.get_masses())
192 else:
193 self.datatypes['masses'] = False
194 if datatypes.get('momenta'):
195 if atoms.has('momenta'):
196 self.backend.write(framedir, 'momenta', atoms.get_momenta())
197 else:
198 self.datatypes['momenta'] = False
199 if datatypes.get('magmoms'):
200 if atoms.has('initial_magmoms'):
201 self.backend.write(framedir, 'magmoms',
202 atoms.get_initial_magnetic_moments())
203 else:
204 self.datatypes['magmoms'] = False
205 if datatypes.get('forces'):
206 try:
207 x = atoms.get_forces()
208 except (RuntimeError, PropertyNotImplementedError):
209 self.datatypes['forces'] = False
210 else:
211 self.backend.write(framedir, 'forces', x)
212 del x
213 if datatypes.get('energies'):
214 try:
215 x = atoms.get_potential_energies()
216 except (RuntimeError, PropertyNotImplementedError):
217 self.datatypes['energies'] = False
218 else:
219 self.backend.write(framedir, 'energies', x)
220 del x
221 # Write any extra data
222 for (label, source, once) in self.extra_data:
223 if self.nframes == 0 or not once:
224 if source is not None:
225 x = source()
226 else:
227 x = atoms.get_array(label)
228 self.backend.write(framedir, label, x)
229 del x
230 if once:
231 self.datatypes[label] = 'once'
232 else:
233 self.datatypes[label] = True
234 # Finally, write metadata if it is the first frame
235 if self.nframes == 0:
236 metadata = {'datatypes': self.datatypes}
237 self._write_metadata(metadata)
238 self._write_nframes(self.nframes + 1)
239 self._call_observers(self.post_observers)
240 self.log('Done writing frame ' + str(self.nframes))
241 self.nframes += 1
243 def select_data(self, data, value):
244 """Selects if a given data type should be written.
246 Data can be written in every frame (specify True), in the
247 first frame only (specify 'only') or not at all (specify
248 False). Not all data types support the 'only' keyword, if not
249 supported it is interpreted as True.
251 The following data types are supported, the letter in parenthesis
252 indicates the default:
254 positions (T), numbers (O), tags (O), masses (O), momenta (T),
255 forces (T), energy (T), energies (F), stress (F), magmoms (T)
257 If a given property is not present during the first write, it
258 will be not be saved at all.
259 """
260 if value not in (True, False, 'once'):
261 raise ValueError('Unknown write mode')
262 if data not in self.datatypes:
263 raise ValueError('Unsupported data type: ' + data)
264 self.datatypes[data] = value
266 def set_extra_data(self, name, source=None, once=False):
267 """Adds extra data to be written.
269 Parameters:
270 name: The name of the data.
272 source (optional): If specified, a callable object returning
273 the data to be written. If not specified it is instead
274 assumed that the atoms contains the data as an array of the
275 same name.
277 once (optional): If specified and True, the data will only be
278 written to the first frame.
279 """
280 self.extra_data.append((name, source, once))
282 def close(self):
283 "Closes the trajectory."
284 self.state = 'closed'
285 lf = getattr(self, 'logfile', None)
286 self.backend.close(log=lf)
287 if lf is not None:
288 lf.close()
289 del self.logfile
291 def log(self, text):
292 """Write to the log file in the bundle.
294 Logging is only possible in write/append mode.
296 This function is mainly for internal use, but can also be called by
297 the user.
298 """
299 if not (self.master or self.slavelog):
300 return
301 text = time.asctime() + ': ' + text
302 if hasattr(self, 'logfile'):
303 # Logging enabled
304 if self.logfile is None:
305 # Logfile not yet open
306 try:
307 self.logdata.append(text)
308 except AttributeError:
309 self.logdata = [text]
310 else:
311 self.logfile.write(text + '\n')
312 self.logfile.flush()
313 else:
314 raise RuntimeError('Cannot write to log file in mode ' +
315 self.state)
317 # __getitem__ is the main reading method.
318 def __getitem__(self, n):
319 return self._read(n)
321 def _read(self, n):
322 """Read an atoms object from the BundleTrajectory."""
323 if self.state != 'read':
324 raise IOError('Cannot read in %s mode' % (self.state,))
325 if n < 0:
326 n += self.nframes
327 if n < 0 or n >= self.nframes:
328 raise IndexError('Trajectory index %d out of range [0, %d['
329 % (n, self.nframes))
331 framedir = os.path.join(self.filename, 'F' + str(n))
332 framezero = os.path.join(self.filename, 'F0')
333 smalldata = self.backend.read_small(framedir)
334 data = {}
335 data['pbc'] = smalldata['pbc']
336 data['cell'] = smalldata['cell']
337 data['constraint'] = smalldata['constraints']
338 if self.subtype == 'split':
339 self.backend.set_fragments(smalldata['fragments'])
340 self.atom_id, dummy = self.backend.read_split(framedir, 'ID')
341 else:
342 self.atom_id = None
343 atoms = Atoms(**data)
344 natoms = smalldata['natoms']
345 for name in ('positions', 'numbers', 'tags', 'masses',
346 'momenta'):
347 if self.datatypes.get(name):
348 atoms.arrays[name] = self._read_data(framezero, framedir,
349 name, self.atom_id)
350 assert len(atoms.arrays[name]) == natoms
352 # Create the atoms object
353 if self.datatypes.get('energy'):
354 if self.datatypes.get('forces'):
355 forces = self.backend.read(framedir, 'forces')
356 else:
357 forces = None
358 if self.datatypes.get('magmoms'):
359 magmoms = self.backend.read(framedir, 'magmoms')
360 else:
361 magmoms = None
362 calc = SinglePointCalculator(atoms,
363 energy=smalldata.get('energy'),
364 forces=forces,
365 stress=smalldata.get('stress'),
366 magmoms=magmoms)
367 atoms.calc = calc
368 return atoms
370 def read_extra_data(self, name, n=0):
371 """Read extra data stored alongside the atoms.
373 Currently only used to read data stored by an NPT dynamics object.
374 The data is not associated with individual atoms.
375 """
376 if self.state != 'read':
377 raise IOError('Cannot read extra data in %s mode' % (self.state,))
378 # Handle negative n.
379 if n < 0:
380 n += self.nframes
381 if n < 0 or n >= self.nframes:
382 raise IndexError('Trajectory index %d out of range [0, %d['
383 % (n, self.nframes))
384 framedir = os.path.join(self.filename, 'F' + str(n))
385 framezero = os.path.join(self.filename, 'F0')
386 return self._read_data(framezero, framedir, name, self.atom_id)
388 def _read_data(self, f0, f, name, atom_id):
389 "Read single data item."
391 if self.subtype == 'normal':
392 if self.datatypes[name] == 'once':
393 d = self.backend.read(f0, name)
394 else:
395 d = self.backend.read(f, name)
396 elif self.subtype == 'split':
397 if self.datatypes[name] == 'once':
398 d, issplit = self.backend.read_split(f0, name)
399 atom_id, dummy = self.backend.read_split(f0, 'ID')
400 else:
401 d, issplit = self.backend.read_split(f, name)
402 if issplit:
403 assert atom_id is not None
404 assert len(d) == len(atom_id)
405 d[atom_id] = np.array(d)
406 return d
408 def __len__(self):
409 return self.nframes
411 def _open_log(self):
412 if not (self.master or self.slavelog):
413 return
414 if self.master:
415 lfn = os.path.join(self.filename, 'log.txt')
416 else:
417 lfn = os.path.join(self.filename, ('log-node%d.txt' %
418 (world.rank,)))
419 self.logfile = open(lfn, 'a', 1) # Append to log if it exists.
420 if hasattr(self, 'logdata'):
421 for text in self.logdata:
422 self.logfile.write(text + '\n')
423 self.logfile.flush()
424 del self.logdata
426 def _open_write(self, atoms, backup, backend):
427 "Open a bundle trajectory for writing."
428 self._set_backend(backend)
429 self.logfile = None # enable delayed logging
430 self.atoms = atoms
431 if os.path.exists(self.filename):
432 # The output directory already exists.
433 barrier() # all must have time to see it exists
434 if not self.is_bundle(self.filename, allowempty=True):
435 raise IOError(
436 'Filename "' + self.filename +
437 '" already exists, but is not a BundleTrajectory.' +
438 ' Cowardly refusing to remove it.')
439 if self.is_empty_bundle(self.filename):
440 barrier()
441 self.log('Deleting old "%s" as it is empty' % (self.filename,))
442 self.delete_bundle(self.filename)
443 elif not backup:
444 barrier()
445 self.log('Deleting old "%s" as backup is turned off.' %
446 (self.filename,))
447 self.delete_bundle(self.filename)
448 else:
449 barrier()
450 # Make a backup file
451 bakname = self.filename + '.bak'
452 if os.path.exists(bakname):
453 barrier() # All must see it exists
454 self.log('Deleting old backup file "%s"' % (bakname,))
455 self.delete_bundle(bakname)
456 self.log('Renaming "%s" to "%s"' % (self.filename, bakname))
457 self._rename_bundle(self.filename, bakname)
458 # Ready to create a new bundle.
459 barrier()
460 self.log('Creating new "%s"' % (self.filename,))
461 self._make_bundledir(self.filename)
462 self.state = 'prewrite'
463 self._write_metadata({})
464 self._write_nframes(0) # Mark new bundle as empty
465 self._open_log()
466 self.nframes = 0
468 def _open_read(self):
469 "Open a bundle trajectory for reading."
470 if not os.path.exists(self.filename):
471 raise IOError('File not found: ' + self.filename)
472 if not self.is_bundle(self.filename):
473 raise IOError('Not a BundleTrajectory: ' + self.filename)
474 self.state = 'read'
475 # Read the metadata
476 metadata = self._read_metadata()
477 self.metadata = metadata
478 if metadata['version'] > self.version:
479 raise NotImplementedError(
480 'This version of ASE cannot read a BundleTrajectory version ' +
481 str(metadata['version']))
482 if metadata['subtype'] not in ('normal', 'split'):
483 raise NotImplementedError(
484 'This version of ASE cannot read BundleTrajectory subtype ' +
485 metadata['subtype'])
486 self.subtype = metadata['subtype']
487 if metadata['backend'] == 'ulm':
488 self.singleprecision = metadata['ulm.singleprecision']
489 self._set_backend(metadata['backend'])
490 self.nframes = self._read_nframes()
491 if self.nframes == 0:
492 raise IOError('Empty BundleTrajectory')
493 self.datatypes = metadata['datatypes']
494 try:
495 self.pythonmajor = metadata['python_ver'][0]
496 except KeyError:
497 self.pythonmajor = 2 # Assume written with Python 2.
498 # We need to know if we are running Python 3.X and try to read
499 # a bundle written with Python 2.X
500 self.backend.readpy2 = (self.pythonmajor == 2)
501 self.state = 'read'
503 def _open_append(self, atoms):
504 if not os.path.exists(self.filename):
505 # OK, no old bundle. Open as for write instead.
506 barrier()
507 self._open_write(atoms, False)
508 return
509 if not self.is_bundle(self.filename):
510 raise IOError('Not a BundleTrajectory: ' + self.filename)
511 self.state = 'read'
512 metadata = self._read_metadata()
513 self.metadata = metadata
514 if metadata['version'] != self.version:
515 raise NotImplementedError(
516 'Cannot append to a BundleTrajectory version '
517 '%s (supported version is %s)' % (str(metadata['version']),
518 str(self.version)))
519 if metadata['subtype'] not in ('normal', 'split'):
520 raise NotImplementedError(
521 'This version of ASE cannot append to BundleTrajectory '
522 'subtype ' + metadata['subtype'])
523 self.subtype = metadata['subtype']
524 if metadata['backend'] == 'ulm':
525 self.singleprecision = metadata['ulm.singleprecision']
526 self._set_backend(metadata['backend'])
527 self.nframes = self._read_nframes()
528 self._open_log()
529 self.log('Opening "%s" in append mode (nframes=%i)' % (self.filename,
530 self.nframes))
531 self.state = 'write'
532 self.atoms = atoms
534 @property
535 def path(self):
536 return Path(self.filename)
538 @property
539 def metadata_path(self):
540 return self.path / 'metadata.json'
542 def _write_nframes(self, n):
543 "Write the number of frames in the bundle."
544 assert self.state == 'write' or self.state == 'prewrite'
545 with paropen(self.path / 'frames', 'w') as fd:
546 fd.write(str(n) + '\n')
548 def _read_nframes(self):
549 "Read the number of frames."
550 return int((self.path / 'frames').read_text())
552 def _write_metadata(self, metadata):
553 """Write the metadata file of the bundle.
555 Modifies the medadata dictionary!
556 """
557 # Add standard fields that must always be present.
558 assert self.state == 'write' or self.state == 'prewrite'
559 metadata['format'] = 'BundleTrajectory'
560 metadata['version'] = self.version
561 metadata['subtype'] = self.subtype
562 metadata['backend'] = self.backend_name
563 if self.backend_name == 'ulm':
564 metadata['ulm.singleprecision'] = self.singleprecision
565 metadata['python_ver'] = tuple(sys.version_info)
566 encode = jsonio.MyEncoder(indent=4).encode
567 fido = encode(metadata)
568 with paropen(self.metadata_path, 'w') as fd:
569 fd.write(fido)
571 def _read_metadata(self):
572 """Read the metadata."""
573 assert self.state == 'read'
574 return jsonio.decode(self.metadata_path.read_text())
576 @staticmethod
577 def is_bundle(filename, allowempty=False):
578 """Check if a filename exists and is a BundleTrajectory.
580 If allowempty=True, an empty folder is regarded as an
581 empty BundleTrajectory."""
582 filename = Path(filename)
583 if not filename.is_dir():
584 return False
585 if allowempty and not os.listdir(filename):
586 return True # An empty BundleTrajectory
587 metaname = filename / 'metadata.json'
588 if metaname.is_file():
589 mdata = jsonio.decode(metaname.read_text())
590 else:
591 return False
593 try:
594 return mdata['format'] == 'BundleTrajectory'
595 except KeyError:
596 return False
598 @staticmethod
599 def is_empty_bundle(filename):
600 """Check if a filename is an empty bundle.
602 Assumes that it is a bundle."""
603 if not os.listdir(filename):
604 return True # Empty folders are empty bundles.
605 with open(os.path.join(filename, 'frames'), 'rb') as fd:
606 nframes = int(fd.read())
608 # File may be removed by the master immediately after this.
609 barrier()
610 return nframes == 0
612 @classmethod
613 def delete_bundle(cls, filename):
614 "Deletes a bundle."
615 if world.rank == 0:
616 # Only the master deletes
617 if not cls.is_bundle(filename, allowempty=True):
618 raise IOError(
619 'Cannot remove "%s" as it is not a bundle trajectory.'
620 % (filename,))
621 if os.path.islink(filename):
622 # A symbolic link to a bundle. Only remove the link.
623 os.remove(filename)
624 else:
625 # A real bundle
626 shutil.rmtree(filename)
627 else:
628 # All other tasks wait for the directory to go away.
629 while os.path.exists(filename):
630 time.sleep(1)
631 # The master may not proceed before all tasks have seen the
632 # directory go away, as it might otherwise create a new bundle
633 # with the same name, fooling the wait loop in _make_bundledir.
634 barrier()
636 def _rename_bundle(self, oldname, newname):
637 "Rename a bundle. Used to create the .bak"
638 if self.master:
639 os.rename(oldname, newname)
640 else:
641 while os.path.exists(oldname):
642 time.sleep(1)
643 # The master may not proceed before all tasks have seen the
644 # directory go away.
645 barrier()
647 def _make_bundledir(self, filename):
648 """Make the main bundle directory.
650 Since all MPI tasks might write to it, all tasks must wait for
651 the directory to appear.
652 """
653 self.log('Making directory ' + filename)
654 assert not os.path.isdir(filename)
655 barrier()
656 if self.master:
657 os.mkdir(filename)
658 else:
659 i = 0
660 while not os.path.isdir(filename):
661 time.sleep(1)
662 i += 1
663 if i > 10:
664 self.log('Waiting %d seconds for %s to appear!'
665 % (i, filename))
667 def _make_framedir(self, frame):
668 """Make subdirectory for the frame.
670 As only the master writes to it, no synchronization between
671 MPI tasks is necessary.
672 """
673 framedir = os.path.join(self.filename, 'F' + str(frame))
674 if self.master:
675 self.log('Making directory ' + framedir)
676 os.mkdir(framedir)
677 return framedir
679 def pre_write_attach(self, function, interval=1, *args, **kwargs):
680 """Attach a function to be called before writing begins.
682 function: The function or callable object to be called.
684 interval: How often the function is called. Default: every time (1).
686 All other arguments are stored, and passed to the function.
687 """
688 if not callable(function):
689 raise ValueError('Callback object must be callable.')
690 self.pre_observers.append((function, interval, args, kwargs))
692 def post_write_attach(self, function, interval=1, *args, **kwargs):
693 """Attach a function to be called after writing ends.
695 function: The function or callable object to be called.
697 interval: How often the function is called. Default: every time (1).
699 All other arguments are stored, and passed to the function.
700 """
701 if not callable(function):
702 raise ValueError('Callback object must be callable.')
703 self.post_observers.append((function, interval, args, kwargs))
705 def _call_observers(self, obs):
706 "Call pre/post write observers."
707 for function, interval, args, kwargs in obs:
708 if (self.nframes + 1) % interval == 0:
709 function(*args, **kwargs)
712class UlmBundleBackend:
713 """Backend for BundleTrajectories stored as ASE Ulm files."""
715 def __init__(self, master, singleprecision):
716 # Store if this backend will actually write anything
717 self.writesmall = master
718 self.writelarge = master
719 self.singleprecision = singleprecision
721 # Integer data may be downconverted to the following types
722 self.integral_dtypes = ['uint8', 'int8', 'uint16', 'int16',
723 'uint32', 'int32', 'uint64', 'int64']
724 # Dict comprehensions not supported in Python 2.6 :-(
725 self.int_dtype = dict((k, getattr(np, k))
726 for k in self.integral_dtypes)
727 self.int_minval = dict((k, np.iinfo(self.int_dtype[k]).min)
728 for k in self.integral_dtypes)
729 self.int_maxval = dict((k, np.iinfo(self.int_dtype[k]).max)
730 for k in self.integral_dtypes)
731 self.int_itemsize = dict((k, np.dtype(self.int_dtype[k]).itemsize)
732 for k in self.integral_dtypes)
734 def write_small(self, framedir, smalldata):
735 "Write small data to be written jointly."
736 if self.writesmall:
737 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'w') as fd:
738 fd.write(**smalldata)
740 def write(self, framedir, name, data):
741 "Write data to separate file."
742 if self.writelarge:
743 shape = data.shape
744 dtype = str(data.dtype)
745 stored_as = dtype
746 all_identical = False
747 # Check if it a type that can be stored with less space
748 if np.issubdtype(data.dtype, np.integer):
749 # An integer type, we may want to convert
750 minval = data.min()
751 maxval = data.max()
753 # ulm cannot write np.bool_:
754 all_identical = bool(minval == maxval)
755 if all_identical:
756 data = int(data.flat[0]) # Convert to standard integer
757 else:
758 for typ in self.integral_dtypes:
759 if (minval >= self.int_minval[typ] and
760 maxval <= self.int_maxval[typ] and
761 data.itemsize > self.int_itemsize[typ]):
763 # Convert to smaller type
764 stored_as = typ
765 data = data.astype(self.int_dtype[typ])
766 elif data.dtype == np.float32 or data.dtype == np.float64:
767 all_identical = bool(data.min() == data.max())
768 if all_identical:
769 data = float(data.flat[0]) # Convert to standard float
770 elif data.dtype == np.float64 and self.singleprecision:
771 # Downconvert double to single precision
772 stored_as = 'float32'
773 data = data.astype(np.float32)
774 fn = os.path.join(framedir, name + '.ulm')
775 with ulmopen(fn, 'w') as fd:
776 fd.write(shape=shape,
777 dtype=dtype,
778 stored_as=stored_as,
779 all_identical=all_identical,
780 data=data)
782 def read_small(self, framedir):
783 "Read small data."
784 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'r') as fd:
785 return fd.asdict()
787 def read(self, framedir, name):
788 "Read data from separate file."
789 fn = os.path.join(framedir, name + '.ulm')
790 with ulmopen(fn, 'r') as fd:
791 if fd.all_identical:
792 # Only a single data value
793 data = np.zeros(fd.shape,
794 dtype=getattr(np, fd.dtype)) + fd.data
795 elif fd.dtype == fd.stored_as:
796 # Easy, the array can be returned as-is.
797 data = fd.data
798 else:
799 # Cast the data back
800 data = fd.data.astype(getattr(np, fd.dtype))
801 return data
803 def read_info(self, framedir, name, split=None):
804 """Read information about file contents without reading the data.
806 Information is a dictionary containing as aminimum the shape and
807 type.
808 """
809 fn = os.path.join(framedir, name + '.ulm')
810 if split is None or os.path.exists(fn):
811 with ulmopen(fn, 'r') as fd:
812 info = dict()
813 info['shape'] = fd.shape
814 info['type'] = fd.dtype
815 info['stored_as'] = fd.stored_as
816 info['identical'] = fd.all_identical
817 return info
818 else:
819 info = dict()
820 for i in range(split):
821 fn = os.path.join(framedir, name + '_' + str(i) + '.ulm')
822 with ulmopen(fn, 'r') as fd:
823 if i == 0:
824 info['shape'] = list(fd.shape)
825 info['type'] = fd.dtype
826 info['stored_as'] = fd.stored_as
827 info['identical'] = fd.all_identical
828 else:
829 info['shape'][0] += fd.shape[0]
830 assert info['type'] == fd.dtype
831 info['identical'] = info['identical'] and fd.all_identical
832 info['shape'] = tuple(info['shape'])
833 return info
835 def set_fragments(self, nfrag):
836 self.nfrag = nfrag
838 def read_split(self, framedir, name):
839 """Read data from multiple files.
841 Falls back to reading from single file if that is how data is stored.
843 Returns the data and an object indicating if the data was really
844 read from split files. The latter object is False if not
845 read from split files, but is an array of the segment length if
846 split files were used.
847 """
848 data = []
849 if os.path.exists(os.path.join(framedir, name + '.ulm')):
850 # Not stored in split form!
851 return (self.read(framedir, name), False)
852 for i in range(self.nfrag):
853 suf = '_%d' % (i,)
854 data.append(self.read(framedir, name + suf))
855 seglengths = [len(d) for d in data]
856 return (np.concatenate(data), seglengths)
858 def close(self, log=None):
859 """Close anything that needs to be closed by the backend.
861 The default backend does nothing here.
862 """
863 pass
866def read_bundletrajectory(filename, index=-1):
867 """Reads one or more atoms objects from a BundleTrajectory.
869 Arguments:
871 filename: str
872 The name of the bundle (really a directory!)
873 index: int
874 An integer specifying which frame to read, or an index object
875 for reading multiple frames. Default: -1 (reads the last
876 frame).
877 """
878 traj = BundleTrajectory(filename, mode='r')
879 for i in range(*index.indices(len(traj))):
880 yield traj[i]
883def write_bundletrajectory(filename, images, append=False):
884 """Write image(s) to a BundleTrajectory.
886 Write also energy, forces, and stress if they are already
887 calculated.
888 """
890 if append:
891 mode = 'a'
892 else:
893 mode = 'w'
894 traj = BundleTrajectory(filename, mode=mode)
896 if hasattr(images, 'get_positions'):
897 images = [images]
899 for atoms in images:
900 # Avoid potentially expensive calculations:
901 calc = atoms.calc
902 if hasattr(calc, 'calculation_required'):
903 for quantity in ('energy', 'forces', 'stress', 'magmoms'):
904 traj.select_data(quantity,
905 not calc.calculation_required(atoms,
906 [quantity]))
907 traj.write(atoms)
908 traj.close()
911def print_bundletrajectory_info(filename):
912 """Prints information about a BundleTrajectory.
914 Mainly intended to be called from a command line tool.
915 """
916 if not BundleTrajectory.is_bundle(filename):
917 raise ValueError('Not a BundleTrajectory!')
918 if BundleTrajectory.is_empty_bundle(filename):
919 print(filename, 'is an empty BundleTrajectory.')
920 return
921 # Read the metadata
922 fn = os.path.join(filename, 'metadata.json')
923 with open(fn, 'r') as fd:
924 metadata = jsonio.decode(fd.read())
926 print('Metadata information of BundleTrajectory "%s":' % (filename,))
927 for k, v in metadata.items():
928 if k != 'datatypes':
929 print(" %s: %s" % (k, v))
930 with open(os.path.join(filename, 'frames'), 'rb') as fd:
931 nframes = int(fd.read())
932 print('Number of frames: %i' % (nframes,))
933 print('Data types:')
934 for k, v in metadata['datatypes'].items():
935 if v == 'once':
936 print(' %s: First frame only.' % (k,))
937 elif v:
938 print(' %s: All frames.' % (k,))
939 # Look at first frame
940 if metadata['backend'] == 'ulm':
941 backend = UlmBundleBackend(True, False)
942 else:
943 raise NotImplementedError('Backend %s not supported.'
944 % (metadata['backend'],))
945 frame = os.path.join(filename, 'F0')
946 small = backend.read_small(frame)
947 print('Contents of first frame:')
948 for k, v in small.items():
949 if k == 'constraints':
950 if v:
951 print(' {} constraints are present'.format(len(v)))
952 else:
953 print(' Constraints are absent.')
954 elif k == 'pbc':
955 print(' Periodic boundary conditions: %s' % (str(v),))
956 elif k == 'natoms':
957 print(' Number of atoms: %i' % (v,))
958 elif hasattr(v, 'shape'):
959 print(' %s: shape = %s, type = %s' %
960 (k, str(v.shape), str(v.dtype)))
961 if k == 'cell':
962 print(' [[%12.6f, %12.6f, %12.6f],' % tuple(v[0]))
963 print(' [%12.6f, %12.6f, %12.6f],' % tuple(v[1]))
964 print(' [%12.6f, %12.6f, %12.6f]]' % tuple(v[2]))
965 else:
966 print(' %s: %s' % (k, str(v)))
967 # Read info from separate files.
968 if metadata['subtype'] == 'split':
969 nsplit = small['fragments']
970 else:
971 nsplit = False
972 for k, v in metadata['datatypes'].items():
973 if v and k not in small:
974 info = backend.read_info(frame, k, nsplit)
975 infoline = ' %s: ' % (k,)
976 for k, v in info.items():
977 infoline += '%s = %s, ' % (k, str(v))
978 infoline = infoline[:-2] + '.' # Fix punctuation.
979 print(infoline)
982class PickleBundleBackend:
983 # Leave placeholder class so importing asap3 won't crash.
984 pass
987def main():
988 import optparse
989 parser = optparse.OptionParser(
990 usage='python -m ase.io.bundletrajectory '
991 'a.bundle [b.bundle ...]',
992 description='Print information about '
993 'the contents of one or more bundletrajectories.')
994 opts, args = parser.parse_args()
995 for name in args:
996 print_bundletrajectory_info(name)
999if __name__ == '__main__':
1000 main()