Coverage for /builds/debichem-team/python-ase/ase/io/bundletrajectory.py: 74.13%
576 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
1"""bundletrajectory - a module for I/O from large MD simulations.
3The BundleTrajectory class writes trajectory into a directory with the
4following structure, except we now use JSON instead of pickle,
5so this text needs updating::
7 filename.bundle (dir)
8 metadata.json Data about the file format, and about which
9 data is present.
10 frames The number of frames (ascii file)
11 F0 (dir) Frame number 0
12 smalldata.ulm Small data structures in a dictionary
13 (pbc, cell, ...)
14 numbers.ulm Atomic numbers
15 positions.ulm Positions
16 momenta.ulm Momenta
17 ...
18 F1 (dir)
20There is a folder for each frame, and the data is in the ASE Ulm format.
21"""
23import os
24import shutil
25import sys
26import time
27from pathlib import Path
29import numpy as np
31from ase import Atoms
32from ase.calculators.singlepoint import (
33 PropertyNotImplementedError,
34 SinglePointCalculator,
35)
37# The system json module causes memory leaks! Use ase's own.
38# import json
39from ase.io import jsonio
40from ase.io.ulm import open as ulmopen
41from ase.parallel import barrier, paropen, world
44class BundleTrajectory:
45 """Reads and writes atoms into a .bundle directory.
47 The BundleTrajectory is an alternative way of storing
48 trajectories, intended for large-scale molecular dynamics
49 simulations, where a single flat file becomes unwieldy. Instead,
50 the data is stored in directory, a 'bundle' (the name bundle is
51 inspired from bundles in Mac OS, which are really just directories
52 the user is supposed to think of as a single file-like unit).
54 Parameters:
56 filename:
57 The name of the directory. Preferably ending in .bundle.
59 mode (optional):
60 The file opening mode. 'r' means open for reading, 'w' for
61 writing and 'a' for appending. Default: 'r'. If opening in
62 write mode, and the filename already exists, the old file is
63 renamed to .bak (any old .bak file is deleted), except if the
64 existing file is empty.
66 atoms (optional):
67 The atoms that will be written. Can only be specified in
68 write or append mode. If not specified, the atoms must be
69 given as an argument to the .write() method instead.
71 backup=True:
72 Use backup=False to disable renaming of an existing file.
74 backend='ulm':
75 Request a backend. Currently only 'ulm' is supported.
76 Only honored when writing.
78 singleprecision=False:
79 Store floating point data in single precision (ulm backend only).
80 """
81 slavelog = True # Log from all nodes
83 def __init__(self, filename, mode='r', atoms=None, backup=True,
84 backend='ulm', singleprecision=False):
85 self.state = 'constructing'
86 self.filename = filename
87 self.pre_observers = [] # callback functions before write is performed
88 self.post_observers = [] # callback functions after write is performed
89 self.master = world.rank == 0
90 self.extra_data = []
91 self.singleprecision = singleprecision
92 self._set_defaults()
93 if mode == 'r':
94 if atoms is not None:
95 raise ValueError('You cannot specify atoms in read mode.')
96 self._open_read()
97 elif mode == 'w':
98 self._open_write(atoms, backup, backend)
99 elif mode == 'a':
100 self._open_append(atoms, backend)
101 else:
102 raise ValueError('Unknown mode: ' + str(mode))
104 def _set_defaults(self):
105 "Set default values for internal parameters."
106 self.version = 1
107 self.subtype = 'normal'
108 self.datatypes = {'positions': True,
109 'numbers': 'once',
110 'tags': 'once',
111 'masses': 'once',
112 'momenta': True,
113 'forces': True,
114 'energy': True,
115 'energies': False,
116 'stress': False,
117 'magmoms': True}
119 def _set_backend(self, backend):
120 """Set the backed doing the actual I/O."""
121 if backend is not None:
122 self.backend_name = backend
124 if self.backend_name == 'ulm':
125 self.backend = UlmBundleBackend(self.master, self.singleprecision)
126 else:
127 raise NotImplementedError(
128 'This version of ASE cannot use BundleTrajectory '
129 'with backend "%s"' % self.backend_name)
131 def write(self, atoms=None):
132 """Write the atoms to the file.
134 If the atoms argument is not given, the atoms object specified
135 when creating the trajectory object is used.
136 """
137 # Check that we are in write mode
138 if self.state == 'prewrite':
139 self.state = 'write'
140 assert self.nframes == 0
141 elif self.state != 'write':
142 raise RuntimeError('Cannot write in ' + self.state + ' mode.')
144 if atoms is None:
145 atoms = self.atoms
147 # Handle NEB etc. If atoms is just a normal Atoms object, it is used
148 # as-is.
149 for image in atoms.iterimages():
150 self._write_atoms(image)
152 def _write_atoms(self, atoms):
153 "Write a single atoms object to file."
154 self._call_observers(self.pre_observers)
155 self.log('Beginning to write frame ' + str(self.nframes))
156 framedir = self._make_framedir(self.nframes)
158 # Check which data should be written the first time:
159 # Modify datatypes so any element of type 'once' becomes true
160 # for the first frame but false for subsequent frames.
161 datatypes = {}
162 for k, v in self.datatypes.items():
163 if v == 'once':
164 v = (self.nframes == 0)
165 datatypes[k] = v
167 # Write 'small' data structures. They are written jointly.
168 smalldata = {'pbc': atoms.get_pbc(),
169 'cell': atoms.get_cell(),
170 'natoms': atoms.get_global_number_of_atoms(),
171 'constraints': atoms.constraints}
172 if datatypes.get('energy'):
173 try:
174 smalldata['energy'] = atoms.get_potential_energy()
175 except (RuntimeError, PropertyNotImplementedError):
176 self.datatypes['energy'] = False
177 if datatypes.get('stress'):
178 try:
179 smalldata['stress'] = atoms.get_stress()
180 except PropertyNotImplementedError:
181 self.datatypes['stress'] = False
182 self.backend.write_small(framedir, smalldata)
184 # Write the large arrays.
185 if datatypes.get('positions'):
186 self.backend.write(framedir, 'positions', atoms.get_positions())
187 if datatypes.get('numbers'):
188 self.backend.write(framedir, 'numbers', atoms.get_atomic_numbers())
189 if datatypes.get('tags'):
190 if atoms.has('tags'):
191 self.backend.write(framedir, 'tags', atoms.get_tags())
192 else:
193 self.datatypes['tags'] = False
194 if datatypes.get('masses'):
195 if atoms.has('masses'):
196 self.backend.write(framedir, 'masses', atoms.get_masses())
197 else:
198 self.datatypes['masses'] = False
199 if datatypes.get('momenta'):
200 if atoms.has('momenta'):
201 self.backend.write(framedir, 'momenta', atoms.get_momenta())
202 else:
203 self.datatypes['momenta'] = False
204 if datatypes.get('magmoms'):
205 if atoms.has('initial_magmoms'):
206 self.backend.write(framedir, 'magmoms',
207 atoms.get_initial_magnetic_moments())
208 else:
209 self.datatypes['magmoms'] = False
210 if datatypes.get('forces'):
211 try:
212 x = atoms.get_forces()
213 except (RuntimeError, PropertyNotImplementedError):
214 self.datatypes['forces'] = False
215 else:
216 self.backend.write(framedir, 'forces', x)
217 del x
218 if datatypes.get('energies'):
219 try:
220 x = atoms.get_potential_energies()
221 except (RuntimeError, PropertyNotImplementedError):
222 self.datatypes['energies'] = False
223 else:
224 self.backend.write(framedir, 'energies', x)
225 del x
226 # Write any extra data
227 for (label, source, once) in self.extra_data:
228 if self.nframes == 0 or not once:
229 if source is not None:
230 x = source()
231 else:
232 x = atoms.get_array(label)
233 self.backend.write(framedir, label, x)
234 del x
235 if once:
236 self.datatypes[label] = 'once'
237 else:
238 self.datatypes[label] = True
239 # Finally, write metadata if it is the first frame
240 if self.nframes == 0:
241 metadata = {'datatypes': self.datatypes}
242 self._write_metadata(metadata)
243 self._write_nframes(self.nframes + 1)
244 self._call_observers(self.post_observers)
245 self.log('Done writing frame ' + str(self.nframes))
246 self.nframes += 1
248 def select_data(self, data, value):
249 """Selects if a given data type should be written.
251 Data can be written in every frame (specify True), in the
252 first frame only (specify 'only') or not at all (specify
253 False). Not all data types support the 'only' keyword, if not
254 supported it is interpreted as True.
256 The following data types are supported, the letter in parenthesis
257 indicates the default:
259 positions (T), numbers (O), tags (O), masses (O), momenta (T),
260 forces (T), energy (T), energies (F), stress (F), magmoms (T)
262 If a given property is not present during the first write, it
263 will be not be saved at all.
264 """
265 if value not in (True, False, 'once'):
266 raise ValueError('Unknown write mode')
267 if data not in self.datatypes:
268 raise ValueError('Unsupported data type: ' + data)
269 self.datatypes[data] = value
271 def set_extra_data(self, name, source=None, once=False):
272 """Adds extra data to be written.
274 Parameters:
275 name: The name of the data.
277 source (optional): If specified, a callable object returning
278 the data to be written. If not specified it is instead
279 assumed that the atoms contains the data as an array of the
280 same name.
282 once (optional): If specified and True, the data will only be
283 written to the first frame.
284 """
285 self.extra_data.append((name, source, once))
287 def close(self):
288 "Closes the trajectory."
289 self.state = 'closed'
290 lf = getattr(self, 'logfile', None)
291 self.backend.close(log=lf)
292 if lf is not None:
293 lf.close()
294 del self.logfile
296 def log(self, text):
297 """Write to the log file in the bundle.
299 Logging is only possible in write/append mode.
301 This function is mainly for internal use, but can also be called by
302 the user.
303 """
304 if not (self.master or self.slavelog):
305 return
306 text = time.asctime() + ': ' + text
307 if hasattr(self, 'logfile'):
308 # Logging enabled
309 if self.logfile is None:
310 # Logfile not yet open
311 try:
312 self.logdata.append(text)
313 except AttributeError:
314 self.logdata = [text]
315 else:
316 self.logfile.write(text + '\n')
317 self.logfile.flush()
318 else:
319 raise RuntimeError('Cannot write to log file in mode ' +
320 self.state)
322 # __getitem__ is the main reading method.
323 def __getitem__(self, n):
324 return self._read(n)
326 def _read(self, n):
327 """Read an atoms object from the BundleTrajectory."""
328 if self.state != 'read':
329 raise OSError(f'Cannot read in {self.state} mode')
330 if n < 0:
331 n += self.nframes
332 if n < 0 or n >= self.nframes:
333 raise IndexError('Trajectory index %d out of range [0, %d['
334 % (n, self.nframes))
336 framedir = os.path.join(self.filename, 'F' + str(n))
337 framezero = os.path.join(self.filename, 'F0')
338 smalldata = self.backend.read_small(framedir)
339 data = {}
340 data['pbc'] = smalldata['pbc']
341 data['cell'] = smalldata['cell']
342 data['constraint'] = smalldata['constraints']
343 if self.subtype == 'split':
344 self.backend.set_fragments(smalldata['fragments'])
345 self.atom_id, _dummy = self.backend.read_split(framedir, 'ID')
346 else:
347 self.atom_id = None
348 atoms = Atoms(**data)
349 natoms = smalldata['natoms']
350 for name in ('positions', 'numbers', 'tags', 'masses',
351 'momenta'):
352 if self.datatypes.get(name):
353 atoms.arrays[name] = self._read_data(framezero, framedir,
354 name, self.atom_id)
355 assert len(atoms.arrays[name]) == natoms
357 # Create the atoms object
358 if self.datatypes.get('energy'):
359 if self.datatypes.get('forces'):
360 forces = self.backend.read(framedir, 'forces')
361 else:
362 forces = None
363 if self.datatypes.get('magmoms'):
364 magmoms = self.backend.read(framedir, 'magmoms')
365 else:
366 magmoms = None
367 calc = SinglePointCalculator(atoms,
368 energy=smalldata.get('energy'),
369 forces=forces,
370 stress=smalldata.get('stress'),
371 magmoms=magmoms)
372 atoms.calc = calc
373 return atoms
375 def read_extra_data(self, name, n=0):
376 """Read extra data stored alongside the atoms.
378 Currently only used to read data stored by an NPT dynamics object.
379 The data is not associated with individual atoms.
380 """
381 if self.state != 'read':
382 raise OSError(f'Cannot read extra data in {self.state} mode')
383 # Handle negative n.
384 if n < 0:
385 n += self.nframes
386 if n < 0 or n >= self.nframes:
387 raise IndexError('Trajectory index %d out of range [0, %d['
388 % (n, self.nframes))
389 framedir = os.path.join(self.filename, 'F' + str(n))
390 framezero = os.path.join(self.filename, 'F0')
391 return self._read_data(framezero, framedir, name, self.atom_id)
393 def _read_data(self, f0, f, name, atom_id):
394 "Read single data item."
396 if self.subtype == 'normal':
397 if self.datatypes[name] == 'once':
398 d = self.backend.read(f0, name)
399 else:
400 d = self.backend.read(f, name)
401 elif self.subtype == 'split':
402 if self.datatypes[name] == 'once':
403 d, issplit = self.backend.read_split(f0, name)
404 atom_id, _dummy = self.backend.read_split(f0, 'ID')
405 else:
406 d, issplit = self.backend.read_split(f, name)
407 if issplit:
408 assert atom_id is not None
409 assert len(d) == len(atom_id)
410 d[atom_id] = np.array(d)
411 return d
413 def __len__(self):
414 return self.nframes
416 def _open_log(self):
417 "Open the log file."
418 if not (self.master or self.slavelog):
419 return
420 if self.master:
421 lfn = os.path.join(self.filename, 'log.txt')
422 else:
423 lfn = os.path.join(self.filename, ('log-node%d.txt' %
424 (world.rank,)))
425 self.logfile = open(lfn, 'a', 1) # Append to log if it exists.
426 if hasattr(self, 'logdata'):
427 for text in self.logdata:
428 self.logfile.write(text + '\n')
429 self.logfile.flush()
430 del self.logdata
432 def _open_write(self, atoms, backup, backend):
433 """Open a bundle trajectory for writing.
435 Parameters:
436 atoms: Object to be written.
437 backup: (bool) Whether a backup is kept if file already exists.
438 backend: Which type of backend to use.
440 Note that the file name of the bundle is already stored as an attribute.
441 """
442 self._set_backend(backend)
443 self.logfile = None # enable delayed logging
444 self.atoms = atoms
445 if os.path.exists(self.filename):
446 # The output directory already exists.
447 barrier() # all must have time to see it exists
448 if not self.is_bundle(self.filename, allowempty=True):
449 raise OSError(
450 'Filename "' + self.filename +
451 '" already exists, but is not a BundleTrajectory.' +
452 ' Cowardly refusing to remove it.')
453 if self.is_empty_bundle(self.filename):
454 barrier()
455 self.log(f'Deleting old "{self.filename}" as it is empty')
456 self.delete_bundle(self.filename)
457 elif not backup:
458 barrier()
459 self.log(
460 f'Deleting old "{self.filename}" as backup is turned off.')
461 self.delete_bundle(self.filename)
462 else:
463 barrier()
464 # Make a backup file
465 bakname = self.filename + '.bak'
466 if os.path.exists(bakname):
467 barrier() # All must see it exists
468 self.log(f'Deleting old backup file "{bakname}"')
469 self.delete_bundle(bakname)
470 self.log(f'Renaming "{self.filename}" to "{bakname}"')
471 self._rename_bundle(self.filename, bakname)
472 # Ready to create a new bundle.
473 barrier()
474 self.log(f'Creating new "{self.filename}"')
475 self._make_bundledir(self.filename)
476 self.state = 'prewrite'
477 self._write_metadata({})
478 self._write_nframes(0) # Mark new bundle as empty
479 self._open_log()
480 self.nframes = 0
482 def _open_read(self):
483 "Open a bundle trajectory for reading."
484 if not os.path.exists(self.filename):
485 raise OSError('File not found: ' + self.filename)
486 if not self.is_bundle(self.filename):
487 raise OSError('Not a BundleTrajectory: ' + self.filename)
488 self.state = 'read'
489 # Read the metadata
490 metadata = self._read_metadata()
491 self.metadata = metadata
492 if metadata['version'] > self.version:
493 raise NotImplementedError(
494 'This version of ASE cannot read a BundleTrajectory version ' +
495 str(metadata['version']))
496 if metadata['subtype'] not in ('normal', 'split'):
497 raise NotImplementedError(
498 'This version of ASE cannot read BundleTrajectory subtype ' +
499 metadata['subtype'])
500 self.subtype = metadata['subtype']
501 if metadata['backend'] == 'ulm':
502 self.singleprecision = metadata['ulm.singleprecision']
503 self._set_backend(metadata['backend'])
504 self.nframes = self._read_nframes()
505 if self.nframes == 0:
506 raise OSError('Empty BundleTrajectory')
507 self.datatypes = metadata['datatypes']
508 try:
509 self.pythonmajor = metadata['python_ver'][0]
510 except KeyError:
511 self.pythonmajor = 2 # Assume written with Python 2.
512 # We need to know if we are running Python 3.X and try to read
513 # a bundle written with Python 2.X
514 self.backend.readpy2 = (self.pythonmajor == 2)
515 self.state = 'read'
517 def _open_append(self, atoms, backend):
518 """Open a trajectory for writing in append mode.
520 If there is no pre-existing bundle, it is just opened in write mode
521 instead.
523 Parameters:
524 atoms: Object to be written.
525 backend: The backend to be used if a new bundle is opened. Ignored
526 if we append to existing bundle, as the backend cannot be
527 changed.
529 The filename is already stored as an attribute.
530 """
531 if not os.path.exists(self.filename):
532 # OK, no old bundle. Open as for write instead.
533 barrier()
534 self._open_write(atoms, False, backend)
535 return
536 if not self.is_bundle(self.filename):
537 raise OSError('Not a BundleTrajectory: ' + self.filename)
538 self.state = 'read'
539 metadata = self._read_metadata()
540 self.metadata = metadata
541 if metadata['version'] != self.version:
542 raise NotImplementedError(
543 'Cannot append to a BundleTrajectory version '
544 '%s (supported version is %s)' % (str(metadata['version']),
545 str(self.version)))
546 if metadata['subtype'] not in ('normal', 'split'):
547 raise NotImplementedError(
548 'This version of ASE cannot append to BundleTrajectory '
549 'subtype ' + metadata['subtype'])
550 self.subtype = metadata['subtype']
551 if metadata['backend'] == 'ulm':
552 self.singleprecision = metadata['ulm.singleprecision']
553 self._set_backend(metadata['backend'])
554 self.nframes = self._read_nframes()
555 self._open_log()
556 self.log('Opening "%s" in append mode (nframes=%i)' % (self.filename,
557 self.nframes))
558 self.state = 'write'
559 self.atoms = atoms
561 @property
562 def path(self):
563 return Path(self.filename)
565 @property
566 def metadata_path(self):
567 return self.path / 'metadata.json'
569 def _write_nframes(self, n):
570 "Write the number of frames in the bundle."
571 assert self.state == 'write' or self.state == 'prewrite'
572 with paropen(self.path / 'frames', 'w') as fd:
573 fd.write(str(n) + '\n')
575 def _read_nframes(self):
576 "Read the number of frames."
577 return int((self.path / 'frames').read_text())
579 def _write_metadata(self, metadata):
580 """Write the metadata file of the bundle.
582 Modifies the medadata dictionary!
583 """
584 # Add standard fields that must always be present.
585 assert self.state == 'write' or self.state == 'prewrite'
586 metadata['format'] = 'BundleTrajectory'
587 metadata['version'] = self.version
588 metadata['subtype'] = self.subtype
589 metadata['backend'] = self.backend_name
590 if self.backend_name == 'ulm':
591 metadata['ulm.singleprecision'] = self.singleprecision
592 metadata['python_ver'] = tuple(sys.version_info)
593 encode = jsonio.MyEncoder(indent=4).encode
594 fido = encode(metadata)
595 with paropen(self.metadata_path, 'w') as fd:
596 fd.write(fido)
598 def _read_metadata(self):
599 """Read the metadata."""
600 assert self.state == 'read'
601 return jsonio.decode(self.metadata_path.read_text())
603 @staticmethod
604 def is_bundle(filename, allowempty=False):
605 """Check if a filename exists and is a BundleTrajectory.
607 If allowempty=True, an empty folder is regarded as an
608 empty BundleTrajectory."""
609 filename = Path(filename)
610 if not filename.is_dir():
611 return False
612 if allowempty and not os.listdir(filename):
613 return True # An empty BundleTrajectory
614 metaname = filename / 'metadata.json'
615 if metaname.is_file():
616 mdata = jsonio.decode(metaname.read_text())
617 else:
618 return False
620 try:
621 return mdata['format'] == 'BundleTrajectory'
622 except KeyError:
623 return False
625 @staticmethod
626 def is_empty_bundle(filename):
627 """Check if a filename is an empty bundle.
629 Assumes that it is a bundle."""
630 if not os.listdir(filename):
631 return True # Empty folders are empty bundles.
632 with open(os.path.join(filename, 'frames'), 'rb') as fd:
633 nframes = int(fd.read())
635 # File may be removed by the master immediately after this.
636 barrier()
637 return nframes == 0
639 @classmethod
640 def delete_bundle(cls, filename):
641 "Deletes a bundle."
642 if world.rank == 0:
643 # Only the master deletes
644 if not cls.is_bundle(filename, allowempty=True):
645 raise OSError(
646 f'Cannot remove "{filename}" as it is not a bundle '
647 'trajectory.'
648 )
649 if os.path.islink(filename):
650 # A symbolic link to a bundle. Only remove the link.
651 os.remove(filename)
652 else:
653 # A real bundle
654 shutil.rmtree(filename)
655 else:
656 # All other tasks wait for the directory to go away.
657 while os.path.exists(filename):
658 time.sleep(1)
659 # The master may not proceed before all tasks have seen the
660 # directory go away, as it might otherwise create a new bundle
661 # with the same name, fooling the wait loop in _make_bundledir.
662 barrier()
664 def _rename_bundle(self, oldname, newname):
665 "Rename a bundle. Used to create the .bak"
666 if self.master:
667 os.rename(oldname, newname)
668 else:
669 while os.path.exists(oldname):
670 time.sleep(1)
671 # The master may not proceed before all tasks have seen the
672 # directory go away.
673 barrier()
675 def _make_bundledir(self, filename):
676 """Make the main bundle directory.
678 Since all MPI tasks might write to it, all tasks must wait for
679 the directory to appear.
680 """
681 self.log('Making directory ' + filename)
682 assert not os.path.isdir(filename)
683 barrier()
684 if self.master:
685 os.mkdir(filename)
686 else:
687 i = 0
688 while not os.path.isdir(filename):
689 time.sleep(1)
690 i += 1
691 if i > 10:
692 self.log('Waiting %d seconds for %s to appear!'
693 % (i, filename))
695 def _make_framedir(self, frame):
696 """Make subdirectory for the frame.
698 As only the master writes to it, no synchronization between
699 MPI tasks is necessary.
700 """
701 framedir = os.path.join(self.filename, 'F' + str(frame))
702 if self.master:
703 self.log('Making directory ' + framedir)
704 os.mkdir(framedir)
705 return framedir
707 def pre_write_attach(self, function, interval=1, *args, **kwargs):
708 """Attach a function to be called before writing begins.
710 function: The function or callable object to be called.
712 interval: How often the function is called. Default: every time (1).
714 All other arguments are stored, and passed to the function.
715 """
716 if not callable(function):
717 raise ValueError('Callback object must be callable.')
718 self.pre_observers.append((function, interval, args, kwargs))
720 def post_write_attach(self, function, interval=1, *args, **kwargs):
721 """Attach a function to be called after writing ends.
723 function: The function or callable object to be called.
725 interval: How often the function is called. Default: every time (1).
727 All other arguments are stored, and passed to the function.
728 """
729 if not callable(function):
730 raise ValueError('Callback object must be callable.')
731 self.post_observers.append((function, interval, args, kwargs))
733 def _call_observers(self, obs):
734 "Call pre/post write observers."
735 for function, interval, args, kwargs in obs:
736 if (self.nframes + 1) % interval == 0:
737 function(*args, **kwargs)
740class UlmBundleBackend:
741 """Backend for BundleTrajectories stored as ASE Ulm files."""
743 def __init__(self, master, singleprecision):
744 # Store if this backend will actually write anything
745 self.writesmall = master
746 self.writelarge = master
747 self.singleprecision = singleprecision
749 # Integer data may be downconverted to the following types
750 self.integral_dtypes = ['uint8', 'int8', 'uint16', 'int16',
751 'uint32', 'int32', 'uint64', 'int64']
752 # Dict comprehensions not supported in Python 2.6 :-(
753 self.int_dtype = {k: getattr(np, k)
754 for k in self.integral_dtypes}
755 self.int_minval = {k: np.iinfo(self.int_dtype[k]).min
756 for k in self.integral_dtypes}
757 self.int_maxval = {k: np.iinfo(self.int_dtype[k]).max
758 for k in self.integral_dtypes}
759 self.int_itemsize = {k: np.dtype(self.int_dtype[k]).itemsize
760 for k in self.integral_dtypes}
762 def write_small(self, framedir, smalldata):
763 "Write small data to be written jointly."
764 if self.writesmall:
765 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'w') as fd:
766 fd.write(**smalldata)
768 def write(self, framedir, name, data):
769 "Write data to separate file."
770 if self.writelarge:
771 shape = data.shape
772 dtype = str(data.dtype)
773 stored_as = dtype
774 all_identical = False
775 # Check if it a type that can be stored with less space
776 if np.issubdtype(data.dtype, np.integer):
777 # An integer type, we may want to convert
778 minval = data.min()
779 maxval = data.max()
781 # ulm cannot write np.bool_:
782 all_identical = bool(minval == maxval)
783 if all_identical:
784 data = int(data.flat[0]) # Convert to standard integer
785 else:
786 for typ in self.integral_dtypes:
787 if (minval >= self.int_minval[typ] and
788 maxval <= self.int_maxval[typ] and
789 data.itemsize > self.int_itemsize[typ]):
791 # Convert to smaller type
792 stored_as = typ
793 data = data.astype(self.int_dtype[typ])
794 elif data.dtype == np.float32 or data.dtype == np.float64:
795 all_identical = bool(data.min() == data.max())
796 if all_identical:
797 data = float(data.flat[0]) # Convert to standard float
798 elif data.dtype == np.float64 and self.singleprecision:
799 # Downconvert double to single precision
800 stored_as = 'float32'
801 data = data.astype(np.float32)
802 fn = os.path.join(framedir, name + '.ulm')
803 with ulmopen(fn, 'w') as fd:
804 fd.write(shape=shape,
805 dtype=dtype,
806 stored_as=stored_as,
807 all_identical=all_identical,
808 data=data)
810 def read_small(self, framedir):
811 "Read small data."
812 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'r') as fd:
813 return fd.asdict()
815 def read(self, framedir, name):
816 "Read data from separate file."
817 fn = os.path.join(framedir, name + '.ulm')
818 with ulmopen(fn, 'r') as fd:
819 if fd.all_identical:
820 # Only a single data value
821 data = np.zeros(fd.shape,
822 dtype=getattr(np, fd.dtype)) + fd.data
823 elif fd.dtype == fd.stored_as:
824 # Easy, the array can be returned as-is.
825 data = fd.data
826 else:
827 # Cast the data back
828 data = fd.data.astype(getattr(np, fd.dtype))
829 return data
831 def read_info(self, framedir, name, split=None):
832 """Read information about file contents without reading the data.
834 Information is a dictionary containing as aminimum the shape and
835 type.
836 """
837 fn = os.path.join(framedir, name + '.ulm')
838 if split is None or os.path.exists(fn):
839 with ulmopen(fn, 'r') as fd:
840 info = {}
841 info['shape'] = fd.shape
842 info['type'] = fd.dtype
843 info['stored_as'] = fd.stored_as
844 info['identical'] = fd.all_identical
845 return info
846 else:
847 info = {}
848 for i in range(split):
849 fn = os.path.join(framedir, name + '_' + str(i) + '.ulm')
850 with ulmopen(fn, 'r') as fd:
851 if i == 0:
852 info['shape'] = list(fd.shape)
853 info['type'] = fd.dtype
854 info['stored_as'] = fd.stored_as
855 info['identical'] = fd.all_identical
856 else:
857 info['shape'][0] += fd.shape[0]
858 assert info['type'] == fd.dtype
859 info['identical'] = (info['identical']
860 and fd.all_identical)
861 info['shape'] = tuple(info['shape'])
862 return info
864 def set_fragments(self, nfrag):
865 self.nfrag = nfrag
867 def read_split(self, framedir, name):
868 """Read data from multiple files.
870 Falls back to reading from single file if that is how data is stored.
872 Returns the data and an object indicating if the data was really
873 read from split files. The latter object is False if not
874 read from split files, but is an array of the segment length if
875 split files were used.
876 """
877 data = []
878 if os.path.exists(os.path.join(framedir, name + '.ulm')):
879 # Not stored in split form!
880 return (self.read(framedir, name), False)
881 for i in range(self.nfrag):
882 suf = '_%d' % (i,)
883 data.append(self.read(framedir, name + suf))
884 seglengths = [len(d) for d in data]
885 return (np.concatenate(data), seglengths)
887 def close(self, log=None):
888 """Close anything that needs to be closed by the backend.
890 The default backend does nothing here.
891 """
894def read_bundletrajectory(filename, index=-1):
895 """Reads one or more atoms objects from a BundleTrajectory.
897 Arguments:
899 filename: str
900 The name of the bundle (really a directory!)
901 index: int
902 An integer specifying which frame to read, or an index object
903 for reading multiple frames. Default: -1 (reads the last
904 frame).
905 """
906 traj = BundleTrajectory(filename, mode='r')
907 if isinstance(index, int):
908 yield traj[index]
910 for i in range(*index.indices(len(traj))):
911 yield traj[i]
914def write_bundletrajectory(filename, images, append=False):
915 """Write image(s) to a BundleTrajectory.
917 Write also energy, forces, and stress if they are already
918 calculated.
919 """
921 if append:
922 mode = 'a'
923 else:
924 mode = 'w'
925 traj = BundleTrajectory(filename, mode=mode)
927 if hasattr(images, 'get_positions'):
928 images = [images]
930 for atoms in images:
931 # Avoid potentially expensive calculations:
932 calc = atoms.calc
933 if hasattr(calc, 'calculation_required'):
934 for quantity in ('energy', 'forces', 'stress', 'magmoms'):
935 traj.select_data(quantity,
936 not calc.calculation_required(atoms,
937 [quantity]))
938 traj.write(atoms)
939 traj.close()
942def print_bundletrajectory_info(filename):
943 """Prints information about a BundleTrajectory.
945 Mainly intended to be called from a command line tool.
946 """
947 if not BundleTrajectory.is_bundle(filename):
948 raise ValueError('Not a BundleTrajectory!')
949 if BundleTrajectory.is_empty_bundle(filename):
950 print(filename, 'is an empty BundleTrajectory.')
951 return
952 # Read the metadata
953 fn = os.path.join(filename, 'metadata.json')
954 with open(fn) as fd:
955 metadata = jsonio.decode(fd.read())
957 print(f'Metadata information of BundleTrajectory "{filename}":')
958 for k, v in metadata.items():
959 if k != 'datatypes':
960 print(f" {k}: {v}")
961 with open(os.path.join(filename, 'frames'), 'rb') as fd:
962 nframes = int(fd.read())
963 print('Number of frames: %i' % (nframes,))
964 print('Data types:')
965 for k, v in metadata['datatypes'].items():
966 if v == 'once':
967 print(f' {k}: First frame only.')
968 elif v:
969 print(f' {k}: All frames.')
970 # Look at first frame
971 if metadata['backend'] == 'ulm':
972 backend = UlmBundleBackend(True, False)
973 else:
974 raise NotImplementedError(
975 f'Backend {metadata["backend"]} not supported.')
976 frame = os.path.join(filename, 'F0')
977 small = backend.read_small(frame)
978 print('Contents of first frame:')
979 for k, v in small.items():
980 if k == 'constraints':
981 if v:
982 print(f' {len(v)} constraints are present')
983 else:
984 print(' Constraints are absent.')
985 elif k == 'pbc':
986 print(f' Periodic boundary conditions: {v!s}')
987 elif k == 'natoms':
988 print(' Number of atoms: %i' % (v,))
989 elif hasattr(v, 'shape'):
990 print(f' {k}: shape = {v.shape!s}, type = {v.dtype!s}')
991 if k == 'cell':
992 print(' [[%12.6f, %12.6f, %12.6f],' % tuple(v[0]))
993 print(' [%12.6f, %12.6f, %12.6f],' % tuple(v[1]))
994 print(' [%12.6f, %12.6f, %12.6f]]' % tuple(v[2]))
995 else:
996 print(f' {k}: {v!s}')
997 # Read info from separate files.
998 if metadata['subtype'] == 'split':
999 nsplit = small['fragments']
1000 else:
1001 nsplit = False
1002 for k, v in metadata['datatypes'].items():
1003 if v and k not in small:
1004 info = backend.read_info(frame, k, nsplit)
1005 infoline = f' {k}: '
1006 for k, v in info.items():
1007 infoline += f'{k} = {v!s}, '
1008 infoline = infoline[:-2] + '.' # Fix punctuation.
1009 print(infoline)
1012def main():
1013 import optparse
1014 parser = optparse.OptionParser(
1015 usage='python -m ase.io.bundletrajectory '
1016 'a.bundle [b.bundle ...]',
1017 description='Print information about '
1018 'the contents of one or more bundletrajectories.')
1019 _opts, args = parser.parse_args()
1020 for name in args:
1021 print_bundletrajectory_info(name)
1024if __name__ == '__main__':
1025 main()