Coverage for /builds/debichem-team/python-ase/ase/io/bundletrajectory.py: 74.13%

576 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-03-06 04:00 +0000

1"""bundletrajectory - a module for I/O from large MD simulations. 

2 

3The BundleTrajectory class writes trajectory into a directory with the 

4following structure, except we now use JSON instead of pickle, 

5so this text needs updating:: 

6 

7 filename.bundle (dir) 

8 metadata.json Data about the file format, and about which 

9 data is present. 

10 frames The number of frames (ascii file) 

11 F0 (dir) Frame number 0 

12 smalldata.ulm Small data structures in a dictionary 

13 (pbc, cell, ...) 

14 numbers.ulm Atomic numbers 

15 positions.ulm Positions 

16 momenta.ulm Momenta 

17 ... 

18 F1 (dir) 

19 

20There is a folder for each frame, and the data is in the ASE Ulm format. 

21""" 

22 

23import os 

24import shutil 

25import sys 

26import time 

27from pathlib import Path 

28 

29import numpy as np 

30 

31from ase import Atoms 

32from ase.calculators.singlepoint import ( 

33 PropertyNotImplementedError, 

34 SinglePointCalculator, 

35) 

36 

37# The system json module causes memory leaks! Use ase's own. 

38# import json 

39from ase.io import jsonio 

40from ase.io.ulm import open as ulmopen 

41from ase.parallel import barrier, paropen, world 

42 

43 

44class BundleTrajectory: 

45 """Reads and writes atoms into a .bundle directory. 

46 

47 The BundleTrajectory is an alternative way of storing 

48 trajectories, intended for large-scale molecular dynamics 

49 simulations, where a single flat file becomes unwieldy. Instead, 

50 the data is stored in directory, a 'bundle' (the name bundle is 

51 inspired from bundles in Mac OS, which are really just directories 

52 the user is supposed to think of as a single file-like unit). 

53 

54 Parameters: 

55 

56 filename: 

57 The name of the directory. Preferably ending in .bundle. 

58 

59 mode (optional): 

60 The file opening mode. 'r' means open for reading, 'w' for 

61 writing and 'a' for appending. Default: 'r'. If opening in 

62 write mode, and the filename already exists, the old file is 

63 renamed to .bak (any old .bak file is deleted), except if the 

64 existing file is empty. 

65 

66 atoms (optional): 

67 The atoms that will be written. Can only be specified in 

68 write or append mode. If not specified, the atoms must be 

69 given as an argument to the .write() method instead. 

70 

71 backup=True: 

72 Use backup=False to disable renaming of an existing file. 

73 

74 backend='ulm': 

75 Request a backend. Currently only 'ulm' is supported. 

76 Only honored when writing. 

77 

78 singleprecision=False: 

79 Store floating point data in single precision (ulm backend only). 

80 """ 

81 slavelog = True # Log from all nodes 

82 

83 def __init__(self, filename, mode='r', atoms=None, backup=True, 

84 backend='ulm', singleprecision=False): 

85 self.state = 'constructing' 

86 self.filename = filename 

87 self.pre_observers = [] # callback functions before write is performed 

88 self.post_observers = [] # callback functions after write is performed 

89 self.master = world.rank == 0 

90 self.extra_data = [] 

91 self.singleprecision = singleprecision 

92 self._set_defaults() 

93 if mode == 'r': 

94 if atoms is not None: 

95 raise ValueError('You cannot specify atoms in read mode.') 

96 self._open_read() 

97 elif mode == 'w': 

98 self._open_write(atoms, backup, backend) 

99 elif mode == 'a': 

100 self._open_append(atoms, backend) 

101 else: 

102 raise ValueError('Unknown mode: ' + str(mode)) 

103 

104 def _set_defaults(self): 

105 "Set default values for internal parameters." 

106 self.version = 1 

107 self.subtype = 'normal' 

108 self.datatypes = {'positions': True, 

109 'numbers': 'once', 

110 'tags': 'once', 

111 'masses': 'once', 

112 'momenta': True, 

113 'forces': True, 

114 'energy': True, 

115 'energies': False, 

116 'stress': False, 

117 'magmoms': True} 

118 

119 def _set_backend(self, backend): 

120 """Set the backed doing the actual I/O.""" 

121 if backend is not None: 

122 self.backend_name = backend 

123 

124 if self.backend_name == 'ulm': 

125 self.backend = UlmBundleBackend(self.master, self.singleprecision) 

126 else: 

127 raise NotImplementedError( 

128 'This version of ASE cannot use BundleTrajectory ' 

129 'with backend "%s"' % self.backend_name) 

130 

131 def write(self, atoms=None): 

132 """Write the atoms to the file. 

133 

134 If the atoms argument is not given, the atoms object specified 

135 when creating the trajectory object is used. 

136 """ 

137 # Check that we are in write mode 

138 if self.state == 'prewrite': 

139 self.state = 'write' 

140 assert self.nframes == 0 

141 elif self.state != 'write': 

142 raise RuntimeError('Cannot write in ' + self.state + ' mode.') 

143 

144 if atoms is None: 

145 atoms = self.atoms 

146 

147 # Handle NEB etc. If atoms is just a normal Atoms object, it is used 

148 # as-is. 

149 for image in atoms.iterimages(): 

150 self._write_atoms(image) 

151 

152 def _write_atoms(self, atoms): 

153 "Write a single atoms object to file." 

154 self._call_observers(self.pre_observers) 

155 self.log('Beginning to write frame ' + str(self.nframes)) 

156 framedir = self._make_framedir(self.nframes) 

157 

158 # Check which data should be written the first time: 

159 # Modify datatypes so any element of type 'once' becomes true 

160 # for the first frame but false for subsequent frames. 

161 datatypes = {} 

162 for k, v in self.datatypes.items(): 

163 if v == 'once': 

164 v = (self.nframes == 0) 

165 datatypes[k] = v 

166 

167 # Write 'small' data structures. They are written jointly. 

168 smalldata = {'pbc': atoms.get_pbc(), 

169 'cell': atoms.get_cell(), 

170 'natoms': atoms.get_global_number_of_atoms(), 

171 'constraints': atoms.constraints} 

172 if datatypes.get('energy'): 

173 try: 

174 smalldata['energy'] = atoms.get_potential_energy() 

175 except (RuntimeError, PropertyNotImplementedError): 

176 self.datatypes['energy'] = False 

177 if datatypes.get('stress'): 

178 try: 

179 smalldata['stress'] = atoms.get_stress() 

180 except PropertyNotImplementedError: 

181 self.datatypes['stress'] = False 

182 self.backend.write_small(framedir, smalldata) 

183 

184 # Write the large arrays. 

185 if datatypes.get('positions'): 

186 self.backend.write(framedir, 'positions', atoms.get_positions()) 

187 if datatypes.get('numbers'): 

188 self.backend.write(framedir, 'numbers', atoms.get_atomic_numbers()) 

189 if datatypes.get('tags'): 

190 if atoms.has('tags'): 

191 self.backend.write(framedir, 'tags', atoms.get_tags()) 

192 else: 

193 self.datatypes['tags'] = False 

194 if datatypes.get('masses'): 

195 if atoms.has('masses'): 

196 self.backend.write(framedir, 'masses', atoms.get_masses()) 

197 else: 

198 self.datatypes['masses'] = False 

199 if datatypes.get('momenta'): 

200 if atoms.has('momenta'): 

201 self.backend.write(framedir, 'momenta', atoms.get_momenta()) 

202 else: 

203 self.datatypes['momenta'] = False 

204 if datatypes.get('magmoms'): 

205 if atoms.has('initial_magmoms'): 

206 self.backend.write(framedir, 'magmoms', 

207 atoms.get_initial_magnetic_moments()) 

208 else: 

209 self.datatypes['magmoms'] = False 

210 if datatypes.get('forces'): 

211 try: 

212 x = atoms.get_forces() 

213 except (RuntimeError, PropertyNotImplementedError): 

214 self.datatypes['forces'] = False 

215 else: 

216 self.backend.write(framedir, 'forces', x) 

217 del x 

218 if datatypes.get('energies'): 

219 try: 

220 x = atoms.get_potential_energies() 

221 except (RuntimeError, PropertyNotImplementedError): 

222 self.datatypes['energies'] = False 

223 else: 

224 self.backend.write(framedir, 'energies', x) 

225 del x 

226 # Write any extra data 

227 for (label, source, once) in self.extra_data: 

228 if self.nframes == 0 or not once: 

229 if source is not None: 

230 x = source() 

231 else: 

232 x = atoms.get_array(label) 

233 self.backend.write(framedir, label, x) 

234 del x 

235 if once: 

236 self.datatypes[label] = 'once' 

237 else: 

238 self.datatypes[label] = True 

239 # Finally, write metadata if it is the first frame 

240 if self.nframes == 0: 

241 metadata = {'datatypes': self.datatypes} 

242 self._write_metadata(metadata) 

243 self._write_nframes(self.nframes + 1) 

244 self._call_observers(self.post_observers) 

245 self.log('Done writing frame ' + str(self.nframes)) 

246 self.nframes += 1 

247 

248 def select_data(self, data, value): 

249 """Selects if a given data type should be written. 

250 

251 Data can be written in every frame (specify True), in the 

252 first frame only (specify 'only') or not at all (specify 

253 False). Not all data types support the 'only' keyword, if not 

254 supported it is interpreted as True. 

255 

256 The following data types are supported, the letter in parenthesis 

257 indicates the default: 

258 

259 positions (T), numbers (O), tags (O), masses (O), momenta (T), 

260 forces (T), energy (T), energies (F), stress (F), magmoms (T) 

261 

262 If a given property is not present during the first write, it 

263 will be not be saved at all. 

264 """ 

265 if value not in (True, False, 'once'): 

266 raise ValueError('Unknown write mode') 

267 if data not in self.datatypes: 

268 raise ValueError('Unsupported data type: ' + data) 

269 self.datatypes[data] = value 

270 

271 def set_extra_data(self, name, source=None, once=False): 

272 """Adds extra data to be written. 

273 

274 Parameters: 

275 name: The name of the data. 

276 

277 source (optional): If specified, a callable object returning 

278 the data to be written. If not specified it is instead 

279 assumed that the atoms contains the data as an array of the 

280 same name. 

281 

282 once (optional): If specified and True, the data will only be 

283 written to the first frame. 

284 """ 

285 self.extra_data.append((name, source, once)) 

286 

287 def close(self): 

288 "Closes the trajectory." 

289 self.state = 'closed' 

290 lf = getattr(self, 'logfile', None) 

291 self.backend.close(log=lf) 

292 if lf is not None: 

293 lf.close() 

294 del self.logfile 

295 

296 def log(self, text): 

297 """Write to the log file in the bundle. 

298 

299 Logging is only possible in write/append mode. 

300 

301 This function is mainly for internal use, but can also be called by 

302 the user. 

303 """ 

304 if not (self.master or self.slavelog): 

305 return 

306 text = time.asctime() + ': ' + text 

307 if hasattr(self, 'logfile'): 

308 # Logging enabled 

309 if self.logfile is None: 

310 # Logfile not yet open 

311 try: 

312 self.logdata.append(text) 

313 except AttributeError: 

314 self.logdata = [text] 

315 else: 

316 self.logfile.write(text + '\n') 

317 self.logfile.flush() 

318 else: 

319 raise RuntimeError('Cannot write to log file in mode ' + 

320 self.state) 

321 

322 # __getitem__ is the main reading method. 

323 def __getitem__(self, n): 

324 return self._read(n) 

325 

326 def _read(self, n): 

327 """Read an atoms object from the BundleTrajectory.""" 

328 if self.state != 'read': 

329 raise OSError(f'Cannot read in {self.state} mode') 

330 if n < 0: 

331 n += self.nframes 

332 if n < 0 or n >= self.nframes: 

333 raise IndexError('Trajectory index %d out of range [0, %d[' 

334 % (n, self.nframes)) 

335 

336 framedir = os.path.join(self.filename, 'F' + str(n)) 

337 framezero = os.path.join(self.filename, 'F0') 

338 smalldata = self.backend.read_small(framedir) 

339 data = {} 

340 data['pbc'] = smalldata['pbc'] 

341 data['cell'] = smalldata['cell'] 

342 data['constraint'] = smalldata['constraints'] 

343 if self.subtype == 'split': 

344 self.backend.set_fragments(smalldata['fragments']) 

345 self.atom_id, _dummy = self.backend.read_split(framedir, 'ID') 

346 else: 

347 self.atom_id = None 

348 atoms = Atoms(**data) 

349 natoms = smalldata['natoms'] 

350 for name in ('positions', 'numbers', 'tags', 'masses', 

351 'momenta'): 

352 if self.datatypes.get(name): 

353 atoms.arrays[name] = self._read_data(framezero, framedir, 

354 name, self.atom_id) 

355 assert len(atoms.arrays[name]) == natoms 

356 

357 # Create the atoms object 

358 if self.datatypes.get('energy'): 

359 if self.datatypes.get('forces'): 

360 forces = self.backend.read(framedir, 'forces') 

361 else: 

362 forces = None 

363 if self.datatypes.get('magmoms'): 

364 magmoms = self.backend.read(framedir, 'magmoms') 

365 else: 

366 magmoms = None 

367 calc = SinglePointCalculator(atoms, 

368 energy=smalldata.get('energy'), 

369 forces=forces, 

370 stress=smalldata.get('stress'), 

371 magmoms=magmoms) 

372 atoms.calc = calc 

373 return atoms 

374 

375 def read_extra_data(self, name, n=0): 

376 """Read extra data stored alongside the atoms. 

377 

378 Currently only used to read data stored by an NPT dynamics object. 

379 The data is not associated with individual atoms. 

380 """ 

381 if self.state != 'read': 

382 raise OSError(f'Cannot read extra data in {self.state} mode') 

383 # Handle negative n. 

384 if n < 0: 

385 n += self.nframes 

386 if n < 0 or n >= self.nframes: 

387 raise IndexError('Trajectory index %d out of range [0, %d[' 

388 % (n, self.nframes)) 

389 framedir = os.path.join(self.filename, 'F' + str(n)) 

390 framezero = os.path.join(self.filename, 'F0') 

391 return self._read_data(framezero, framedir, name, self.atom_id) 

392 

393 def _read_data(self, f0, f, name, atom_id): 

394 "Read single data item." 

395 

396 if self.subtype == 'normal': 

397 if self.datatypes[name] == 'once': 

398 d = self.backend.read(f0, name) 

399 else: 

400 d = self.backend.read(f, name) 

401 elif self.subtype == 'split': 

402 if self.datatypes[name] == 'once': 

403 d, issplit = self.backend.read_split(f0, name) 

404 atom_id, _dummy = self.backend.read_split(f0, 'ID') 

405 else: 

406 d, issplit = self.backend.read_split(f, name) 

407 if issplit: 

408 assert atom_id is not None 

409 assert len(d) == len(atom_id) 

410 d[atom_id] = np.array(d) 

411 return d 

412 

413 def __len__(self): 

414 return self.nframes 

415 

416 def _open_log(self): 

417 "Open the log file." 

418 if not (self.master or self.slavelog): 

419 return 

420 if self.master: 

421 lfn = os.path.join(self.filename, 'log.txt') 

422 else: 

423 lfn = os.path.join(self.filename, ('log-node%d.txt' % 

424 (world.rank,))) 

425 self.logfile = open(lfn, 'a', 1) # Append to log if it exists. 

426 if hasattr(self, 'logdata'): 

427 for text in self.logdata: 

428 self.logfile.write(text + '\n') 

429 self.logfile.flush() 

430 del self.logdata 

431 

432 def _open_write(self, atoms, backup, backend): 

433 """Open a bundle trajectory for writing. 

434 

435 Parameters: 

436 atoms: Object to be written. 

437 backup: (bool) Whether a backup is kept if file already exists. 

438 backend: Which type of backend to use. 

439 

440 Note that the file name of the bundle is already stored as an attribute. 

441 """ 

442 self._set_backend(backend) 

443 self.logfile = None # enable delayed logging 

444 self.atoms = atoms 

445 if os.path.exists(self.filename): 

446 # The output directory already exists. 

447 barrier() # all must have time to see it exists 

448 if not self.is_bundle(self.filename, allowempty=True): 

449 raise OSError( 

450 'Filename "' + self.filename + 

451 '" already exists, but is not a BundleTrajectory.' + 

452 ' Cowardly refusing to remove it.') 

453 if self.is_empty_bundle(self.filename): 

454 barrier() 

455 self.log(f'Deleting old "{self.filename}" as it is empty') 

456 self.delete_bundle(self.filename) 

457 elif not backup: 

458 barrier() 

459 self.log( 

460 f'Deleting old "{self.filename}" as backup is turned off.') 

461 self.delete_bundle(self.filename) 

462 else: 

463 barrier() 

464 # Make a backup file 

465 bakname = self.filename + '.bak' 

466 if os.path.exists(bakname): 

467 barrier() # All must see it exists 

468 self.log(f'Deleting old backup file "{bakname}"') 

469 self.delete_bundle(bakname) 

470 self.log(f'Renaming "{self.filename}" to "{bakname}"') 

471 self._rename_bundle(self.filename, bakname) 

472 # Ready to create a new bundle. 

473 barrier() 

474 self.log(f'Creating new "{self.filename}"') 

475 self._make_bundledir(self.filename) 

476 self.state = 'prewrite' 

477 self._write_metadata({}) 

478 self._write_nframes(0) # Mark new bundle as empty 

479 self._open_log() 

480 self.nframes = 0 

481 

482 def _open_read(self): 

483 "Open a bundle trajectory for reading." 

484 if not os.path.exists(self.filename): 

485 raise OSError('File not found: ' + self.filename) 

486 if not self.is_bundle(self.filename): 

487 raise OSError('Not a BundleTrajectory: ' + self.filename) 

488 self.state = 'read' 

489 # Read the metadata 

490 metadata = self._read_metadata() 

491 self.metadata = metadata 

492 if metadata['version'] > self.version: 

493 raise NotImplementedError( 

494 'This version of ASE cannot read a BundleTrajectory version ' + 

495 str(metadata['version'])) 

496 if metadata['subtype'] not in ('normal', 'split'): 

497 raise NotImplementedError( 

498 'This version of ASE cannot read BundleTrajectory subtype ' + 

499 metadata['subtype']) 

500 self.subtype = metadata['subtype'] 

501 if metadata['backend'] == 'ulm': 

502 self.singleprecision = metadata['ulm.singleprecision'] 

503 self._set_backend(metadata['backend']) 

504 self.nframes = self._read_nframes() 

505 if self.nframes == 0: 

506 raise OSError('Empty BundleTrajectory') 

507 self.datatypes = metadata['datatypes'] 

508 try: 

509 self.pythonmajor = metadata['python_ver'][0] 

510 except KeyError: 

511 self.pythonmajor = 2 # Assume written with Python 2. 

512 # We need to know if we are running Python 3.X and try to read 

513 # a bundle written with Python 2.X 

514 self.backend.readpy2 = (self.pythonmajor == 2) 

515 self.state = 'read' 

516 

517 def _open_append(self, atoms, backend): 

518 """Open a trajectory for writing in append mode. 

519 

520 If there is no pre-existing bundle, it is just opened in write mode 

521 instead. 

522 

523 Parameters: 

524 atoms: Object to be written. 

525 backend: The backend to be used if a new bundle is opened. Ignored 

526 if we append to existing bundle, as the backend cannot be 

527 changed. 

528 

529 The filename is already stored as an attribute. 

530 """ 

531 if not os.path.exists(self.filename): 

532 # OK, no old bundle. Open as for write instead. 

533 barrier() 

534 self._open_write(atoms, False, backend) 

535 return 

536 if not self.is_bundle(self.filename): 

537 raise OSError('Not a BundleTrajectory: ' + self.filename) 

538 self.state = 'read' 

539 metadata = self._read_metadata() 

540 self.metadata = metadata 

541 if metadata['version'] != self.version: 

542 raise NotImplementedError( 

543 'Cannot append to a BundleTrajectory version ' 

544 '%s (supported version is %s)' % (str(metadata['version']), 

545 str(self.version))) 

546 if metadata['subtype'] not in ('normal', 'split'): 

547 raise NotImplementedError( 

548 'This version of ASE cannot append to BundleTrajectory ' 

549 'subtype ' + metadata['subtype']) 

550 self.subtype = metadata['subtype'] 

551 if metadata['backend'] == 'ulm': 

552 self.singleprecision = metadata['ulm.singleprecision'] 

553 self._set_backend(metadata['backend']) 

554 self.nframes = self._read_nframes() 

555 self._open_log() 

556 self.log('Opening "%s" in append mode (nframes=%i)' % (self.filename, 

557 self.nframes)) 

558 self.state = 'write' 

559 self.atoms = atoms 

560 

561 @property 

562 def path(self): 

563 return Path(self.filename) 

564 

565 @property 

566 def metadata_path(self): 

567 return self.path / 'metadata.json' 

568 

569 def _write_nframes(self, n): 

570 "Write the number of frames in the bundle." 

571 assert self.state == 'write' or self.state == 'prewrite' 

572 with paropen(self.path / 'frames', 'w') as fd: 

573 fd.write(str(n) + '\n') 

574 

575 def _read_nframes(self): 

576 "Read the number of frames." 

577 return int((self.path / 'frames').read_text()) 

578 

579 def _write_metadata(self, metadata): 

580 """Write the metadata file of the bundle. 

581 

582 Modifies the medadata dictionary! 

583 """ 

584 # Add standard fields that must always be present. 

585 assert self.state == 'write' or self.state == 'prewrite' 

586 metadata['format'] = 'BundleTrajectory' 

587 metadata['version'] = self.version 

588 metadata['subtype'] = self.subtype 

589 metadata['backend'] = self.backend_name 

590 if self.backend_name == 'ulm': 

591 metadata['ulm.singleprecision'] = self.singleprecision 

592 metadata['python_ver'] = tuple(sys.version_info) 

593 encode = jsonio.MyEncoder(indent=4).encode 

594 fido = encode(metadata) 

595 with paropen(self.metadata_path, 'w') as fd: 

596 fd.write(fido) 

597 

598 def _read_metadata(self): 

599 """Read the metadata.""" 

600 assert self.state == 'read' 

601 return jsonio.decode(self.metadata_path.read_text()) 

602 

603 @staticmethod 

604 def is_bundle(filename, allowempty=False): 

605 """Check if a filename exists and is a BundleTrajectory. 

606 

607 If allowempty=True, an empty folder is regarded as an 

608 empty BundleTrajectory.""" 

609 filename = Path(filename) 

610 if not filename.is_dir(): 

611 return False 

612 if allowempty and not os.listdir(filename): 

613 return True # An empty BundleTrajectory 

614 metaname = filename / 'metadata.json' 

615 if metaname.is_file(): 

616 mdata = jsonio.decode(metaname.read_text()) 

617 else: 

618 return False 

619 

620 try: 

621 return mdata['format'] == 'BundleTrajectory' 

622 except KeyError: 

623 return False 

624 

625 @staticmethod 

626 def is_empty_bundle(filename): 

627 """Check if a filename is an empty bundle. 

628 

629 Assumes that it is a bundle.""" 

630 if not os.listdir(filename): 

631 return True # Empty folders are empty bundles. 

632 with open(os.path.join(filename, 'frames'), 'rb') as fd: 

633 nframes = int(fd.read()) 

634 

635 # File may be removed by the master immediately after this. 

636 barrier() 

637 return nframes == 0 

638 

639 @classmethod 

640 def delete_bundle(cls, filename): 

641 "Deletes a bundle." 

642 if world.rank == 0: 

643 # Only the master deletes 

644 if not cls.is_bundle(filename, allowempty=True): 

645 raise OSError( 

646 f'Cannot remove "{filename}" as it is not a bundle ' 

647 'trajectory.' 

648 ) 

649 if os.path.islink(filename): 

650 # A symbolic link to a bundle. Only remove the link. 

651 os.remove(filename) 

652 else: 

653 # A real bundle 

654 shutil.rmtree(filename) 

655 else: 

656 # All other tasks wait for the directory to go away. 

657 while os.path.exists(filename): 

658 time.sleep(1) 

659 # The master may not proceed before all tasks have seen the 

660 # directory go away, as it might otherwise create a new bundle 

661 # with the same name, fooling the wait loop in _make_bundledir. 

662 barrier() 

663 

664 def _rename_bundle(self, oldname, newname): 

665 "Rename a bundle. Used to create the .bak" 

666 if self.master: 

667 os.rename(oldname, newname) 

668 else: 

669 while os.path.exists(oldname): 

670 time.sleep(1) 

671 # The master may not proceed before all tasks have seen the 

672 # directory go away. 

673 barrier() 

674 

675 def _make_bundledir(self, filename): 

676 """Make the main bundle directory. 

677 

678 Since all MPI tasks might write to it, all tasks must wait for 

679 the directory to appear. 

680 """ 

681 self.log('Making directory ' + filename) 

682 assert not os.path.isdir(filename) 

683 barrier() 

684 if self.master: 

685 os.mkdir(filename) 

686 else: 

687 i = 0 

688 while not os.path.isdir(filename): 

689 time.sleep(1) 

690 i += 1 

691 if i > 10: 

692 self.log('Waiting %d seconds for %s to appear!' 

693 % (i, filename)) 

694 

695 def _make_framedir(self, frame): 

696 """Make subdirectory for the frame. 

697 

698 As only the master writes to it, no synchronization between 

699 MPI tasks is necessary. 

700 """ 

701 framedir = os.path.join(self.filename, 'F' + str(frame)) 

702 if self.master: 

703 self.log('Making directory ' + framedir) 

704 os.mkdir(framedir) 

705 return framedir 

706 

707 def pre_write_attach(self, function, interval=1, *args, **kwargs): 

708 """Attach a function to be called before writing begins. 

709 

710 function: The function or callable object to be called. 

711 

712 interval: How often the function is called. Default: every time (1). 

713 

714 All other arguments are stored, and passed to the function. 

715 """ 

716 if not callable(function): 

717 raise ValueError('Callback object must be callable.') 

718 self.pre_observers.append((function, interval, args, kwargs)) 

719 

720 def post_write_attach(self, function, interval=1, *args, **kwargs): 

721 """Attach a function to be called after writing ends. 

722 

723 function: The function or callable object to be called. 

724 

725 interval: How often the function is called. Default: every time (1). 

726 

727 All other arguments are stored, and passed to the function. 

728 """ 

729 if not callable(function): 

730 raise ValueError('Callback object must be callable.') 

731 self.post_observers.append((function, interval, args, kwargs)) 

732 

733 def _call_observers(self, obs): 

734 "Call pre/post write observers." 

735 for function, interval, args, kwargs in obs: 

736 if (self.nframes + 1) % interval == 0: 

737 function(*args, **kwargs) 

738 

739 

740class UlmBundleBackend: 

741 """Backend for BundleTrajectories stored as ASE Ulm files.""" 

742 

743 def __init__(self, master, singleprecision): 

744 # Store if this backend will actually write anything 

745 self.writesmall = master 

746 self.writelarge = master 

747 self.singleprecision = singleprecision 

748 

749 # Integer data may be downconverted to the following types 

750 self.integral_dtypes = ['uint8', 'int8', 'uint16', 'int16', 

751 'uint32', 'int32', 'uint64', 'int64'] 

752 # Dict comprehensions not supported in Python 2.6 :-( 

753 self.int_dtype = {k: getattr(np, k) 

754 for k in self.integral_dtypes} 

755 self.int_minval = {k: np.iinfo(self.int_dtype[k]).min 

756 for k in self.integral_dtypes} 

757 self.int_maxval = {k: np.iinfo(self.int_dtype[k]).max 

758 for k in self.integral_dtypes} 

759 self.int_itemsize = {k: np.dtype(self.int_dtype[k]).itemsize 

760 for k in self.integral_dtypes} 

761 

762 def write_small(self, framedir, smalldata): 

763 "Write small data to be written jointly." 

764 if self.writesmall: 

765 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'w') as fd: 

766 fd.write(**smalldata) 

767 

768 def write(self, framedir, name, data): 

769 "Write data to separate file." 

770 if self.writelarge: 

771 shape = data.shape 

772 dtype = str(data.dtype) 

773 stored_as = dtype 

774 all_identical = False 

775 # Check if it a type that can be stored with less space 

776 if np.issubdtype(data.dtype, np.integer): 

777 # An integer type, we may want to convert 

778 minval = data.min() 

779 maxval = data.max() 

780 

781 # ulm cannot write np.bool_: 

782 all_identical = bool(minval == maxval) 

783 if all_identical: 

784 data = int(data.flat[0]) # Convert to standard integer 

785 else: 

786 for typ in self.integral_dtypes: 

787 if (minval >= self.int_minval[typ] and 

788 maxval <= self.int_maxval[typ] and 

789 data.itemsize > self.int_itemsize[typ]): 

790 

791 # Convert to smaller type 

792 stored_as = typ 

793 data = data.astype(self.int_dtype[typ]) 

794 elif data.dtype == np.float32 or data.dtype == np.float64: 

795 all_identical = bool(data.min() == data.max()) 

796 if all_identical: 

797 data = float(data.flat[0]) # Convert to standard float 

798 elif data.dtype == np.float64 and self.singleprecision: 

799 # Downconvert double to single precision 

800 stored_as = 'float32' 

801 data = data.astype(np.float32) 

802 fn = os.path.join(framedir, name + '.ulm') 

803 with ulmopen(fn, 'w') as fd: 

804 fd.write(shape=shape, 

805 dtype=dtype, 

806 stored_as=stored_as, 

807 all_identical=all_identical, 

808 data=data) 

809 

810 def read_small(self, framedir): 

811 "Read small data." 

812 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'r') as fd: 

813 return fd.asdict() 

814 

815 def read(self, framedir, name): 

816 "Read data from separate file." 

817 fn = os.path.join(framedir, name + '.ulm') 

818 with ulmopen(fn, 'r') as fd: 

819 if fd.all_identical: 

820 # Only a single data value 

821 data = np.zeros(fd.shape, 

822 dtype=getattr(np, fd.dtype)) + fd.data 

823 elif fd.dtype == fd.stored_as: 

824 # Easy, the array can be returned as-is. 

825 data = fd.data 

826 else: 

827 # Cast the data back 

828 data = fd.data.astype(getattr(np, fd.dtype)) 

829 return data 

830 

831 def read_info(self, framedir, name, split=None): 

832 """Read information about file contents without reading the data. 

833 

834 Information is a dictionary containing as aminimum the shape and 

835 type. 

836 """ 

837 fn = os.path.join(framedir, name + '.ulm') 

838 if split is None or os.path.exists(fn): 

839 with ulmopen(fn, 'r') as fd: 

840 info = {} 

841 info['shape'] = fd.shape 

842 info['type'] = fd.dtype 

843 info['stored_as'] = fd.stored_as 

844 info['identical'] = fd.all_identical 

845 return info 

846 else: 

847 info = {} 

848 for i in range(split): 

849 fn = os.path.join(framedir, name + '_' + str(i) + '.ulm') 

850 with ulmopen(fn, 'r') as fd: 

851 if i == 0: 

852 info['shape'] = list(fd.shape) 

853 info['type'] = fd.dtype 

854 info['stored_as'] = fd.stored_as 

855 info['identical'] = fd.all_identical 

856 else: 

857 info['shape'][0] += fd.shape[0] 

858 assert info['type'] == fd.dtype 

859 info['identical'] = (info['identical'] 

860 and fd.all_identical) 

861 info['shape'] = tuple(info['shape']) 

862 return info 

863 

864 def set_fragments(self, nfrag): 

865 self.nfrag = nfrag 

866 

867 def read_split(self, framedir, name): 

868 """Read data from multiple files. 

869 

870 Falls back to reading from single file if that is how data is stored. 

871 

872 Returns the data and an object indicating if the data was really 

873 read from split files. The latter object is False if not 

874 read from split files, but is an array of the segment length if 

875 split files were used. 

876 """ 

877 data = [] 

878 if os.path.exists(os.path.join(framedir, name + '.ulm')): 

879 # Not stored in split form! 

880 return (self.read(framedir, name), False) 

881 for i in range(self.nfrag): 

882 suf = '_%d' % (i,) 

883 data.append(self.read(framedir, name + suf)) 

884 seglengths = [len(d) for d in data] 

885 return (np.concatenate(data), seglengths) 

886 

887 def close(self, log=None): 

888 """Close anything that needs to be closed by the backend. 

889 

890 The default backend does nothing here. 

891 """ 

892 

893 

894def read_bundletrajectory(filename, index=-1): 

895 """Reads one or more atoms objects from a BundleTrajectory. 

896 

897 Arguments: 

898 

899 filename: str 

900 The name of the bundle (really a directory!) 

901 index: int 

902 An integer specifying which frame to read, or an index object 

903 for reading multiple frames. Default: -1 (reads the last 

904 frame). 

905 """ 

906 traj = BundleTrajectory(filename, mode='r') 

907 if isinstance(index, int): 

908 yield traj[index] 

909 

910 for i in range(*index.indices(len(traj))): 

911 yield traj[i] 

912 

913 

914def write_bundletrajectory(filename, images, append=False): 

915 """Write image(s) to a BundleTrajectory. 

916 

917 Write also energy, forces, and stress if they are already 

918 calculated. 

919 """ 

920 

921 if append: 

922 mode = 'a' 

923 else: 

924 mode = 'w' 

925 traj = BundleTrajectory(filename, mode=mode) 

926 

927 if hasattr(images, 'get_positions'): 

928 images = [images] 

929 

930 for atoms in images: 

931 # Avoid potentially expensive calculations: 

932 calc = atoms.calc 

933 if hasattr(calc, 'calculation_required'): 

934 for quantity in ('energy', 'forces', 'stress', 'magmoms'): 

935 traj.select_data(quantity, 

936 not calc.calculation_required(atoms, 

937 [quantity])) 

938 traj.write(atoms) 

939 traj.close() 

940 

941 

942def print_bundletrajectory_info(filename): 

943 """Prints information about a BundleTrajectory. 

944 

945 Mainly intended to be called from a command line tool. 

946 """ 

947 if not BundleTrajectory.is_bundle(filename): 

948 raise ValueError('Not a BundleTrajectory!') 

949 if BundleTrajectory.is_empty_bundle(filename): 

950 print(filename, 'is an empty BundleTrajectory.') 

951 return 

952 # Read the metadata 

953 fn = os.path.join(filename, 'metadata.json') 

954 with open(fn) as fd: 

955 metadata = jsonio.decode(fd.read()) 

956 

957 print(f'Metadata information of BundleTrajectory "{filename}":') 

958 for k, v in metadata.items(): 

959 if k != 'datatypes': 

960 print(f" {k}: {v}") 

961 with open(os.path.join(filename, 'frames'), 'rb') as fd: 

962 nframes = int(fd.read()) 

963 print('Number of frames: %i' % (nframes,)) 

964 print('Data types:') 

965 for k, v in metadata['datatypes'].items(): 

966 if v == 'once': 

967 print(f' {k}: First frame only.') 

968 elif v: 

969 print(f' {k}: All frames.') 

970 # Look at first frame 

971 if metadata['backend'] == 'ulm': 

972 backend = UlmBundleBackend(True, False) 

973 else: 

974 raise NotImplementedError( 

975 f'Backend {metadata["backend"]} not supported.') 

976 frame = os.path.join(filename, 'F0') 

977 small = backend.read_small(frame) 

978 print('Contents of first frame:') 

979 for k, v in small.items(): 

980 if k == 'constraints': 

981 if v: 

982 print(f' {len(v)} constraints are present') 

983 else: 

984 print(' Constraints are absent.') 

985 elif k == 'pbc': 

986 print(f' Periodic boundary conditions: {v!s}') 

987 elif k == 'natoms': 

988 print(' Number of atoms: %i' % (v,)) 

989 elif hasattr(v, 'shape'): 

990 print(f' {k}: shape = {v.shape!s}, type = {v.dtype!s}') 

991 if k == 'cell': 

992 print(' [[%12.6f, %12.6f, %12.6f],' % tuple(v[0])) 

993 print(' [%12.6f, %12.6f, %12.6f],' % tuple(v[1])) 

994 print(' [%12.6f, %12.6f, %12.6f]]' % tuple(v[2])) 

995 else: 

996 print(f' {k}: {v!s}') 

997 # Read info from separate files. 

998 if metadata['subtype'] == 'split': 

999 nsplit = small['fragments'] 

1000 else: 

1001 nsplit = False 

1002 for k, v in metadata['datatypes'].items(): 

1003 if v and k not in small: 

1004 info = backend.read_info(frame, k, nsplit) 

1005 infoline = f' {k}: ' 

1006 for k, v in info.items(): 

1007 infoline += f'{k} = {v!s}, ' 

1008 infoline = infoline[:-2] + '.' # Fix punctuation. 

1009 print(infoline) 

1010 

1011 

1012def main(): 

1013 import optparse 

1014 parser = optparse.OptionParser( 

1015 usage='python -m ase.io.bundletrajectory ' 

1016 'a.bundle [b.bundle ...]', 

1017 description='Print information about ' 

1018 'the contents of one or more bundletrajectories.') 

1019 _opts, args = parser.parse_args() 

1020 for name in args: 

1021 print_bundletrajectory_info(name) 

1022 

1023 

1024if __name__ == '__main__': 

1025 main()