Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""bundletrajectory - a module for I/O from large MD simulations. 

2 

3The BundleTrajectory class writes trajectory into a directory with the 

4following structure, except we now use JSON instead of pickle, 

5so this text needs updating:: 

6 

7 filename.bundle (dir) 

8 metadata.json Data about the file format, and about which 

9 data is present. 

10 frames The number of frames (ascii file) 

11 F0 (dir) Frame number 0 

12 smalldata.ulm Small data structures in a dictionary 

13 (pbc, cell, ...) 

14 numbers.ulm Atomic numbers 

15 positions.ulm Positions 

16 momenta.ulm Momenta 

17 ... 

18 F1 (dir) 

19 

20There is a folder for each frame, and the data is in the ASE Ulm format. 

21""" 

22 

23import os 

24import sys 

25import shutil 

26import time 

27from pathlib import Path 

28 

29import numpy as np 

30 

31from ase import Atoms 

32# The system json module causes memory leaks! Use ase's own. 

33# import json 

34from ase.io import jsonio 

35from ase.io.ulm import open as ulmopen 

36from ase.parallel import paropen, world, barrier 

37from ase.calculators.singlepoint import (SinglePointCalculator, 

38 PropertyNotImplementedError) 

39 

40 

41class BundleTrajectory: 

42 """Reads and writes atoms into a .bundle directory. 

43 

44 The BundleTrajectory is an alternative way of storing 

45 trajectories, intended for large-scale molecular dynamics 

46 simulations, where a single flat file becomes unwieldy. Instead, 

47 the data is stored in directory, a 'bundle' (the name bundle is 

48 inspired from bundles in Mac OS, which are really just directories 

49 the user is supposed to think of as a single file-like unit). 

50 

51 Parameters: 

52 

53 filename: 

54 The name of the directory. Preferably ending in .bundle. 

55 

56 mode (optional): 

57 The file opening mode. 'r' means open for reading, 'w' for 

58 writing and 'a' for appending. Default: 'r'. If opening in 

59 write mode, and the filename already exists, the old file is 

60 renamed to .bak (any old .bak file is deleted), except if the 

61 existing file is empty. 

62 

63 atoms (optional): 

64 The atoms that will be written. Can only be specified in 

65 write or append mode. If not specified, the atoms must be 

66 given as an argument to the .write() method instead. 

67 

68 backup=True: 

69 Use backup=False to disable renaming of an existing file. 

70 

71 backend='ulm': 

72 Request a backend. Currently only 'ulm' is supported. 

73 Only honored when writing. 

74 

75 singleprecision=False: 

76 Store floating point data in single precision (ulm backend only). 

77 """ 

78 slavelog = True # Log from all nodes 

79 

80 def __init__(self, filename, mode='r', atoms=None, backup=True, 

81 backend='ulm', singleprecision=False): 

82 self.state = 'constructing' 

83 self.filename = filename 

84 self.pre_observers = [] # callback functions before write is performed 

85 self.post_observers = [] # callback functions after write is performed 

86 self.master = world.rank == 0 

87 self.extra_data = [] 

88 self.singleprecision = singleprecision 

89 self._set_defaults() 

90 if mode == 'r': 

91 if atoms is not None: 

92 raise ValueError('You cannot specify atoms in read mode.') 

93 self._open_read() 

94 elif mode == 'w': 

95 self._open_write(atoms, backup, backend) 

96 elif mode == 'a': 

97 self._open_append(atoms) 

98 else: 

99 raise ValueError('Unknown mode: ' + str(mode)) 

100 

101 def _set_defaults(self): 

102 "Set default values for internal parameters." 

103 self.version = 1 

104 self.subtype = 'normal' 

105 self.datatypes = {'positions': True, 

106 'numbers': 'once', 

107 'tags': 'once', 

108 'masses': 'once', 

109 'momenta': True, 

110 'forces': True, 

111 'energy': True, 

112 'energies': False, 

113 'stress': False, 

114 'magmoms': True} 

115 

116 def _set_backend(self, backend): 

117 """Set the backed doing the actual I/O.""" 

118 if backend is not None: 

119 self.backend_name = backend 

120 

121 if self.backend_name == 'ulm': 

122 self.backend = UlmBundleBackend(self.master, self.singleprecision) 

123 else: 

124 raise NotImplementedError( 

125 'This version of ASE cannot use BundleTrajectory ' 

126 'with backend "%s"' % self.backend_name) 

127 

128 def write(self, atoms=None): 

129 """Write the atoms to the file. 

130 

131 If the atoms argument is not given, the atoms object specified 

132 when creating the trajectory object is used. 

133 """ 

134 # Check that we are in write mode 

135 if self.state == 'prewrite': 

136 self.state = 'write' 

137 assert self.nframes == 0 

138 elif self.state != 'write': 

139 raise RuntimeError('Cannot write in ' + self.state + ' mode.') 

140 

141 if atoms is None: 

142 atoms = self.atoms 

143 

144 for image in atoms.iterimages(): 

145 self._write_atoms(image) 

146 

147 def _write_atoms(self, atoms): 

148 # OK, it is a real atoms object. Write it. 

149 self._call_observers(self.pre_observers) 

150 self.log('Beginning to write frame ' + str(self.nframes)) 

151 framedir = self._make_framedir(self.nframes) 

152 

153 # Check which data should be written the first time: 

154 # Modify datatypes so any element of type 'once' becomes true 

155 # for the first frame but false for subsequent frames. 

156 datatypes = {} 

157 for k, v in self.datatypes.items(): 

158 if v == 'once': 

159 v = (self.nframes == 0) 

160 datatypes[k] = v 

161 

162 # Write 'small' data structures. They are written jointly. 

163 smalldata = {'pbc': atoms.get_pbc(), 

164 'cell': atoms.get_cell(), 

165 'natoms': atoms.get_global_number_of_atoms(), 

166 'constraints': atoms.constraints} 

167 if datatypes.get('energy'): 

168 try: 

169 smalldata['energy'] = atoms.get_potential_energy() 

170 except (RuntimeError, PropertyNotImplementedError): 

171 self.datatypes['energy'] = False 

172 if datatypes.get('stress'): 

173 try: 

174 smalldata['stress'] = atoms.get_stress() 

175 except PropertyNotImplementedError: 

176 self.datatypes['stress'] = False 

177 self.backend.write_small(framedir, smalldata) 

178 

179 # Write the large arrays. 

180 if datatypes.get('positions'): 

181 self.backend.write(framedir, 'positions', atoms.get_positions()) 

182 if datatypes.get('numbers'): 

183 self.backend.write(framedir, 'numbers', atoms.get_atomic_numbers()) 

184 if datatypes.get('tags'): 

185 if atoms.has('tags'): 

186 self.backend.write(framedir, 'tags', atoms.get_tags()) 

187 else: 

188 self.datatypes['tags'] = False 

189 if datatypes.get('masses'): 

190 if atoms.has('masses'): 

191 self.backend.write(framedir, 'masses', atoms.get_masses()) 

192 else: 

193 self.datatypes['masses'] = False 

194 if datatypes.get('momenta'): 

195 if atoms.has('momenta'): 

196 self.backend.write(framedir, 'momenta', atoms.get_momenta()) 

197 else: 

198 self.datatypes['momenta'] = False 

199 if datatypes.get('magmoms'): 

200 if atoms.has('initial_magmoms'): 

201 self.backend.write(framedir, 'magmoms', 

202 atoms.get_initial_magnetic_moments()) 

203 else: 

204 self.datatypes['magmoms'] = False 

205 if datatypes.get('forces'): 

206 try: 

207 x = atoms.get_forces() 

208 except (RuntimeError, PropertyNotImplementedError): 

209 self.datatypes['forces'] = False 

210 else: 

211 self.backend.write(framedir, 'forces', x) 

212 del x 

213 if datatypes.get('energies'): 

214 try: 

215 x = atoms.get_potential_energies() 

216 except (RuntimeError, PropertyNotImplementedError): 

217 self.datatypes['energies'] = False 

218 else: 

219 self.backend.write(framedir, 'energies', x) 

220 del x 

221 # Write any extra data 

222 for (label, source, once) in self.extra_data: 

223 if self.nframes == 0 or not once: 

224 if source is not None: 

225 x = source() 

226 else: 

227 x = atoms.get_array(label) 

228 self.backend.write(framedir, label, x) 

229 del x 

230 if once: 

231 self.datatypes[label] = 'once' 

232 else: 

233 self.datatypes[label] = True 

234 # Finally, write metadata if it is the first frame 

235 if self.nframes == 0: 

236 metadata = {'datatypes': self.datatypes} 

237 self._write_metadata(metadata) 

238 self._write_nframes(self.nframes + 1) 

239 self._call_observers(self.post_observers) 

240 self.log('Done writing frame ' + str(self.nframes)) 

241 self.nframes += 1 

242 

243 def select_data(self, data, value): 

244 """Selects if a given data type should be written. 

245 

246 Data can be written in every frame (specify True), in the 

247 first frame only (specify 'only') or not at all (specify 

248 False). Not all data types support the 'only' keyword, if not 

249 supported it is interpreted as True. 

250 

251 The following data types are supported, the letter in parenthesis 

252 indicates the default: 

253 

254 positions (T), numbers (O), tags (O), masses (O), momenta (T), 

255 forces (T), energy (T), energies (F), stress (F), magmoms (T) 

256 

257 If a given property is not present during the first write, it 

258 will be not be saved at all. 

259 """ 

260 if value not in (True, False, 'once'): 

261 raise ValueError('Unknown write mode') 

262 if data not in self.datatypes: 

263 raise ValueError('Unsupported data type: ' + data) 

264 self.datatypes[data] = value 

265 

266 def set_extra_data(self, name, source=None, once=False): 

267 """Adds extra data to be written. 

268 

269 Parameters: 

270 name: The name of the data. 

271 

272 source (optional): If specified, a callable object returning 

273 the data to be written. If not specified it is instead 

274 assumed that the atoms contains the data as an array of the 

275 same name. 

276 

277 once (optional): If specified and True, the data will only be 

278 written to the first frame. 

279 """ 

280 self.extra_data.append((name, source, once)) 

281 

282 def close(self): 

283 "Closes the trajectory." 

284 self.state = 'closed' 

285 lf = getattr(self, 'logfile', None) 

286 self.backend.close(log=lf) 

287 if lf is not None: 

288 lf.close() 

289 del self.logfile 

290 

291 def log(self, text): 

292 """Write to the log file in the bundle. 

293 

294 Logging is only possible in write/append mode. 

295 

296 This function is mainly for internal use, but can also be called by 

297 the user. 

298 """ 

299 if not (self.master or self.slavelog): 

300 return 

301 text = time.asctime() + ': ' + text 

302 if hasattr(self, 'logfile'): 

303 # Logging enabled 

304 if self.logfile is None: 

305 # Logfile not yet open 

306 try: 

307 self.logdata.append(text) 

308 except AttributeError: 

309 self.logdata = [text] 

310 else: 

311 self.logfile.write(text + '\n') 

312 self.logfile.flush() 

313 else: 

314 raise RuntimeError('Cannot write to log file in mode ' + 

315 self.state) 

316 

317 # __getitem__ is the main reading method. 

318 def __getitem__(self, n): 

319 return self._read(n) 

320 

321 def _read(self, n): 

322 """Read an atoms object from the BundleTrajectory.""" 

323 if self.state != 'read': 

324 raise IOError('Cannot read in %s mode' % (self.state,)) 

325 if n < 0: 

326 n += self.nframes 

327 if n < 0 or n >= self.nframes: 

328 raise IndexError('Trajectory index %d out of range [0, %d[' 

329 % (n, self.nframes)) 

330 

331 framedir = os.path.join(self.filename, 'F' + str(n)) 

332 framezero = os.path.join(self.filename, 'F0') 

333 smalldata = self.backend.read_small(framedir) 

334 data = {} 

335 data['pbc'] = smalldata['pbc'] 

336 data['cell'] = smalldata['cell'] 

337 data['constraint'] = smalldata['constraints'] 

338 if self.subtype == 'split': 

339 self.backend.set_fragments(smalldata['fragments']) 

340 self.atom_id, dummy = self.backend.read_split(framedir, 'ID') 

341 else: 

342 self.atom_id = None 

343 atoms = Atoms(**data) 

344 natoms = smalldata['natoms'] 

345 for name in ('positions', 'numbers', 'tags', 'masses', 

346 'momenta'): 

347 if self.datatypes.get(name): 

348 atoms.arrays[name] = self._read_data(framezero, framedir, 

349 name, self.atom_id) 

350 assert len(atoms.arrays[name]) == natoms 

351 

352 # Create the atoms object 

353 if self.datatypes.get('energy'): 

354 if self.datatypes.get('forces'): 

355 forces = self.backend.read(framedir, 'forces') 

356 else: 

357 forces = None 

358 if self.datatypes.get('magmoms'): 

359 magmoms = self.backend.read(framedir, 'magmoms') 

360 else: 

361 magmoms = None 

362 calc = SinglePointCalculator(atoms, 

363 energy=smalldata.get('energy'), 

364 forces=forces, 

365 stress=smalldata.get('stress'), 

366 magmoms=magmoms) 

367 atoms.calc = calc 

368 return atoms 

369 

370 def read_extra_data(self, name, n=0): 

371 """Read extra data stored alongside the atoms. 

372 

373 Currently only used to read data stored by an NPT dynamics object. 

374 The data is not associated with individual atoms. 

375 """ 

376 if self.state != 'read': 

377 raise IOError('Cannot read extra data in %s mode' % (self.state,)) 

378 # Handle negative n. 

379 if n < 0: 

380 n += self.nframes 

381 if n < 0 or n >= self.nframes: 

382 raise IndexError('Trajectory index %d out of range [0, %d[' 

383 % (n, self.nframes)) 

384 framedir = os.path.join(self.filename, 'F' + str(n)) 

385 framezero = os.path.join(self.filename, 'F0') 

386 return self._read_data(framezero, framedir, name, self.atom_id) 

387 

388 def _read_data(self, f0, f, name, atom_id): 

389 "Read single data item." 

390 

391 if self.subtype == 'normal': 

392 if self.datatypes[name] == 'once': 

393 d = self.backend.read(f0, name) 

394 else: 

395 d = self.backend.read(f, name) 

396 elif self.subtype == 'split': 

397 if self.datatypes[name] == 'once': 

398 d, issplit = self.backend.read_split(f0, name) 

399 atom_id, dummy = self.backend.read_split(f0, 'ID') 

400 else: 

401 d, issplit = self.backend.read_split(f, name) 

402 if issplit: 

403 assert atom_id is not None 

404 assert len(d) == len(atom_id) 

405 d[atom_id] = np.array(d) 

406 return d 

407 

408 def __len__(self): 

409 return self.nframes 

410 

411 def _open_log(self): 

412 if not (self.master or self.slavelog): 

413 return 

414 if self.master: 

415 lfn = os.path.join(self.filename, 'log.txt') 

416 else: 

417 lfn = os.path.join(self.filename, ('log-node%d.txt' % 

418 (world.rank,))) 

419 self.logfile = open(lfn, 'a', 1) # Append to log if it exists. 

420 if hasattr(self, 'logdata'): 

421 for text in self.logdata: 

422 self.logfile.write(text + '\n') 

423 self.logfile.flush() 

424 del self.logdata 

425 

426 def _open_write(self, atoms, backup, backend): 

427 "Open a bundle trajectory for writing." 

428 self._set_backend(backend) 

429 self.logfile = None # enable delayed logging 

430 self.atoms = atoms 

431 if os.path.exists(self.filename): 

432 # The output directory already exists. 

433 barrier() # all must have time to see it exists 

434 if not self.is_bundle(self.filename, allowempty=True): 

435 raise IOError( 

436 'Filename "' + self.filename + 

437 '" already exists, but is not a BundleTrajectory.' + 

438 ' Cowardly refusing to remove it.') 

439 if self.is_empty_bundle(self.filename): 

440 barrier() 

441 self.log('Deleting old "%s" as it is empty' % (self.filename,)) 

442 self.delete_bundle(self.filename) 

443 elif not backup: 

444 barrier() 

445 self.log('Deleting old "%s" as backup is turned off.' % 

446 (self.filename,)) 

447 self.delete_bundle(self.filename) 

448 else: 

449 barrier() 

450 # Make a backup file 

451 bakname = self.filename + '.bak' 

452 if os.path.exists(bakname): 

453 barrier() # All must see it exists 

454 self.log('Deleting old backup file "%s"' % (bakname,)) 

455 self.delete_bundle(bakname) 

456 self.log('Renaming "%s" to "%s"' % (self.filename, bakname)) 

457 self._rename_bundle(self.filename, bakname) 

458 # Ready to create a new bundle. 

459 barrier() 

460 self.log('Creating new "%s"' % (self.filename,)) 

461 self._make_bundledir(self.filename) 

462 self.state = 'prewrite' 

463 self._write_metadata({}) 

464 self._write_nframes(0) # Mark new bundle as empty 

465 self._open_log() 

466 self.nframes = 0 

467 

468 def _open_read(self): 

469 "Open a bundle trajectory for reading." 

470 if not os.path.exists(self.filename): 

471 raise IOError('File not found: ' + self.filename) 

472 if not self.is_bundle(self.filename): 

473 raise IOError('Not a BundleTrajectory: ' + self.filename) 

474 self.state = 'read' 

475 # Read the metadata 

476 metadata = self._read_metadata() 

477 self.metadata = metadata 

478 if metadata['version'] > self.version: 

479 raise NotImplementedError( 

480 'This version of ASE cannot read a BundleTrajectory version ' + 

481 str(metadata['version'])) 

482 if metadata['subtype'] not in ('normal', 'split'): 

483 raise NotImplementedError( 

484 'This version of ASE cannot read BundleTrajectory subtype ' + 

485 metadata['subtype']) 

486 self.subtype = metadata['subtype'] 

487 if metadata['backend'] == 'ulm': 

488 self.singleprecision = metadata['ulm.singleprecision'] 

489 self._set_backend(metadata['backend']) 

490 self.nframes = self._read_nframes() 

491 if self.nframes == 0: 

492 raise IOError('Empty BundleTrajectory') 

493 self.datatypes = metadata['datatypes'] 

494 try: 

495 self.pythonmajor = metadata['python_ver'][0] 

496 except KeyError: 

497 self.pythonmajor = 2 # Assume written with Python 2. 

498 # We need to know if we are running Python 3.X and try to read 

499 # a bundle written with Python 2.X 

500 self.backend.readpy2 = (self.pythonmajor == 2) 

501 self.state = 'read' 

502 

503 def _open_append(self, atoms): 

504 if not os.path.exists(self.filename): 

505 # OK, no old bundle. Open as for write instead. 

506 barrier() 

507 self._open_write(atoms, False) 

508 return 

509 if not self.is_bundle(self.filename): 

510 raise IOError('Not a BundleTrajectory: ' + self.filename) 

511 self.state = 'read' 

512 metadata = self._read_metadata() 

513 self.metadata = metadata 

514 if metadata['version'] != self.version: 

515 raise NotImplementedError( 

516 'Cannot append to a BundleTrajectory version ' 

517 '%s (supported version is %s)' % (str(metadata['version']), 

518 str(self.version))) 

519 if metadata['subtype'] not in ('normal', 'split'): 

520 raise NotImplementedError( 

521 'This version of ASE cannot append to BundleTrajectory ' 

522 'subtype ' + metadata['subtype']) 

523 self.subtype = metadata['subtype'] 

524 if metadata['backend'] == 'ulm': 

525 self.singleprecision = metadata['ulm.singleprecision'] 

526 self._set_backend(metadata['backend']) 

527 self.nframes = self._read_nframes() 

528 self._open_log() 

529 self.log('Opening "%s" in append mode (nframes=%i)' % (self.filename, 

530 self.nframes)) 

531 self.state = 'write' 

532 self.atoms = atoms 

533 

534 @property 

535 def path(self): 

536 return Path(self.filename) 

537 

538 @property 

539 def metadata_path(self): 

540 return self.path / 'metadata.json' 

541 

542 def _write_nframes(self, n): 

543 "Write the number of frames in the bundle." 

544 assert self.state == 'write' or self.state == 'prewrite' 

545 with paropen(self.path / 'frames', 'w') as fd: 

546 fd.write(str(n) + '\n') 

547 

548 def _read_nframes(self): 

549 "Read the number of frames." 

550 return int((self.path / 'frames').read_text()) 

551 

552 def _write_metadata(self, metadata): 

553 """Write the metadata file of the bundle. 

554 

555 Modifies the medadata dictionary! 

556 """ 

557 # Add standard fields that must always be present. 

558 assert self.state == 'write' or self.state == 'prewrite' 

559 metadata['format'] = 'BundleTrajectory' 

560 metadata['version'] = self.version 

561 metadata['subtype'] = self.subtype 

562 metadata['backend'] = self.backend_name 

563 if self.backend_name == 'ulm': 

564 metadata['ulm.singleprecision'] = self.singleprecision 

565 metadata['python_ver'] = tuple(sys.version_info) 

566 encode = jsonio.MyEncoder(indent=4).encode 

567 fido = encode(metadata) 

568 with paropen(self.metadata_path, 'w') as fd: 

569 fd.write(fido) 

570 

571 def _read_metadata(self): 

572 """Read the metadata.""" 

573 assert self.state == 'read' 

574 return jsonio.decode(self.metadata_path.read_text()) 

575 

576 @staticmethod 

577 def is_bundle(filename, allowempty=False): 

578 """Check if a filename exists and is a BundleTrajectory. 

579 

580 If allowempty=True, an empty folder is regarded as an 

581 empty BundleTrajectory.""" 

582 filename = Path(filename) 

583 if not filename.is_dir(): 

584 return False 

585 if allowempty and not os.listdir(filename): 

586 return True # An empty BundleTrajectory 

587 metaname = filename / 'metadata.json' 

588 if metaname.is_file(): 

589 mdata = jsonio.decode(metaname.read_text()) 

590 else: 

591 return False 

592 

593 try: 

594 return mdata['format'] == 'BundleTrajectory' 

595 except KeyError: 

596 return False 

597 

598 @staticmethod 

599 def is_empty_bundle(filename): 

600 """Check if a filename is an empty bundle. 

601 

602 Assumes that it is a bundle.""" 

603 if not os.listdir(filename): 

604 return True # Empty folders are empty bundles. 

605 with open(os.path.join(filename, 'frames'), 'rb') as fd: 

606 nframes = int(fd.read()) 

607 

608 # File may be removed by the master immediately after this. 

609 barrier() 

610 return nframes == 0 

611 

612 @classmethod 

613 def delete_bundle(cls, filename): 

614 "Deletes a bundle." 

615 if world.rank == 0: 

616 # Only the master deletes 

617 if not cls.is_bundle(filename, allowempty=True): 

618 raise IOError( 

619 'Cannot remove "%s" as it is not a bundle trajectory.' 

620 % (filename,)) 

621 if os.path.islink(filename): 

622 # A symbolic link to a bundle. Only remove the link. 

623 os.remove(filename) 

624 else: 

625 # A real bundle 

626 shutil.rmtree(filename) 

627 else: 

628 # All other tasks wait for the directory to go away. 

629 while os.path.exists(filename): 

630 time.sleep(1) 

631 # The master may not proceed before all tasks have seen the 

632 # directory go away, as it might otherwise create a new bundle 

633 # with the same name, fooling the wait loop in _make_bundledir. 

634 barrier() 

635 

636 def _rename_bundle(self, oldname, newname): 

637 "Rename a bundle. Used to create the .bak" 

638 if self.master: 

639 os.rename(oldname, newname) 

640 else: 

641 while os.path.exists(oldname): 

642 time.sleep(1) 

643 # The master may not proceed before all tasks have seen the 

644 # directory go away. 

645 barrier() 

646 

647 def _make_bundledir(self, filename): 

648 """Make the main bundle directory. 

649 

650 Since all MPI tasks might write to it, all tasks must wait for 

651 the directory to appear. 

652 """ 

653 self.log('Making directory ' + filename) 

654 assert not os.path.isdir(filename) 

655 barrier() 

656 if self.master: 

657 os.mkdir(filename) 

658 else: 

659 i = 0 

660 while not os.path.isdir(filename): 

661 time.sleep(1) 

662 i += 1 

663 if i > 10: 

664 self.log('Waiting %d seconds for %s to appear!' 

665 % (i, filename)) 

666 

667 def _make_framedir(self, frame): 

668 """Make subdirectory for the frame. 

669 

670 As only the master writes to it, no synchronization between 

671 MPI tasks is necessary. 

672 """ 

673 framedir = os.path.join(self.filename, 'F' + str(frame)) 

674 if self.master: 

675 self.log('Making directory ' + framedir) 

676 os.mkdir(framedir) 

677 return framedir 

678 

679 def pre_write_attach(self, function, interval=1, *args, **kwargs): 

680 """Attach a function to be called before writing begins. 

681 

682 function: The function or callable object to be called. 

683 

684 interval: How often the function is called. Default: every time (1). 

685 

686 All other arguments are stored, and passed to the function. 

687 """ 

688 if not callable(function): 

689 raise ValueError('Callback object must be callable.') 

690 self.pre_observers.append((function, interval, args, kwargs)) 

691 

692 def post_write_attach(self, function, interval=1, *args, **kwargs): 

693 """Attach a function to be called after writing ends. 

694 

695 function: The function or callable object to be called. 

696 

697 interval: How often the function is called. Default: every time (1). 

698 

699 All other arguments are stored, and passed to the function. 

700 """ 

701 if not callable(function): 

702 raise ValueError('Callback object must be callable.') 

703 self.post_observers.append((function, interval, args, kwargs)) 

704 

705 def _call_observers(self, obs): 

706 "Call pre/post write observers." 

707 for function, interval, args, kwargs in obs: 

708 if (self.nframes + 1) % interval == 0: 

709 function(*args, **kwargs) 

710 

711 

712class UlmBundleBackend: 

713 """Backend for BundleTrajectories stored as ASE Ulm files.""" 

714 

715 def __init__(self, master, singleprecision): 

716 # Store if this backend will actually write anything 

717 self.writesmall = master 

718 self.writelarge = master 

719 self.singleprecision = singleprecision 

720 

721 # Integer data may be downconverted to the following types 

722 self.integral_dtypes = ['uint8', 'int8', 'uint16', 'int16', 

723 'uint32', 'int32', 'uint64', 'int64'] 

724 # Dict comprehensions not supported in Python 2.6 :-( 

725 self.int_dtype = dict((k, getattr(np, k)) 

726 for k in self.integral_dtypes) 

727 self.int_minval = dict((k, np.iinfo(self.int_dtype[k]).min) 

728 for k in self.integral_dtypes) 

729 self.int_maxval = dict((k, np.iinfo(self.int_dtype[k]).max) 

730 for k in self.integral_dtypes) 

731 self.int_itemsize = dict((k, np.dtype(self.int_dtype[k]).itemsize) 

732 for k in self.integral_dtypes) 

733 

734 def write_small(self, framedir, smalldata): 

735 "Write small data to be written jointly." 

736 if self.writesmall: 

737 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'w') as fd: 

738 fd.write(**smalldata) 

739 

740 def write(self, framedir, name, data): 

741 "Write data to separate file." 

742 if self.writelarge: 

743 shape = data.shape 

744 dtype = str(data.dtype) 

745 stored_as = dtype 

746 all_identical = False 

747 # Check if it a type that can be stored with less space 

748 if np.issubdtype(data.dtype, np.integer): 

749 # An integer type, we may want to convert 

750 minval = data.min() 

751 maxval = data.max() 

752 

753 # ulm cannot write np.bool_: 

754 all_identical = bool(minval == maxval) 

755 if all_identical: 

756 data = int(data.flat[0]) # Convert to standard integer 

757 else: 

758 for typ in self.integral_dtypes: 

759 if (minval >= self.int_minval[typ] and 

760 maxval <= self.int_maxval[typ] and 

761 data.itemsize > self.int_itemsize[typ]): 

762 

763 # Convert to smaller type 

764 stored_as = typ 

765 data = data.astype(self.int_dtype[typ]) 

766 elif data.dtype == np.float32 or data.dtype == np.float64: 

767 all_identical = bool(data.min() == data.max()) 

768 if all_identical: 

769 data = float(data.flat[0]) # Convert to standard float 

770 elif data.dtype == np.float64 and self.singleprecision: 

771 # Downconvert double to single precision 

772 stored_as = 'float32' 

773 data = data.astype(np.float32) 

774 fn = os.path.join(framedir, name + '.ulm') 

775 with ulmopen(fn, 'w') as fd: 

776 fd.write(shape=shape, 

777 dtype=dtype, 

778 stored_as=stored_as, 

779 all_identical=all_identical, 

780 data=data) 

781 

782 def read_small(self, framedir): 

783 "Read small data." 

784 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'r') as fd: 

785 return fd.asdict() 

786 

787 def read(self, framedir, name): 

788 "Read data from separate file." 

789 fn = os.path.join(framedir, name + '.ulm') 

790 with ulmopen(fn, 'r') as fd: 

791 if fd.all_identical: 

792 # Only a single data value 

793 data = np.zeros(fd.shape, 

794 dtype=getattr(np, fd.dtype)) + fd.data 

795 elif fd.dtype == fd.stored_as: 

796 # Easy, the array can be returned as-is. 

797 data = fd.data 

798 else: 

799 # Cast the data back 

800 data = fd.data.astype(getattr(np, fd.dtype)) 

801 return data 

802 

803 def read_info(self, framedir, name, split=None): 

804 """Read information about file contents without reading the data. 

805 

806 Information is a dictionary containing as aminimum the shape and 

807 type. 

808 """ 

809 fn = os.path.join(framedir, name + '.ulm') 

810 if split is None or os.path.exists(fn): 

811 with ulmopen(fn, 'r') as fd: 

812 info = dict() 

813 info['shape'] = fd.shape 

814 info['type'] = fd.dtype 

815 info['stored_as'] = fd.stored_as 

816 info['identical'] = fd.all_identical 

817 return info 

818 else: 

819 info = dict() 

820 for i in range(split): 

821 fn = os.path.join(framedir, name + '_' + str(i) + '.ulm') 

822 with ulmopen(fn, 'r') as fd: 

823 if i == 0: 

824 info['shape'] = list(fd.shape) 

825 info['type'] = fd.dtype 

826 info['stored_as'] = fd.stored_as 

827 info['identical'] = fd.all_identical 

828 else: 

829 info['shape'][0] += fd.shape[0] 

830 assert info['type'] == fd.dtype 

831 info['identical'] = info['identical'] and fd.all_identical 

832 info['shape'] = tuple(info['shape']) 

833 return info 

834 

835 def set_fragments(self, nfrag): 

836 self.nfrag = nfrag 

837 

838 def read_split(self, framedir, name): 

839 """Read data from multiple files. 

840 

841 Falls back to reading from single file if that is how data is stored. 

842 

843 Returns the data and an object indicating if the data was really 

844 read from split files. The latter object is False if not 

845 read from split files, but is an array of the segment length if 

846 split files were used. 

847 """ 

848 data = [] 

849 if os.path.exists(os.path.join(framedir, name + '.ulm')): 

850 # Not stored in split form! 

851 return (self.read(framedir, name), False) 

852 for i in range(self.nfrag): 

853 suf = '_%d' % (i,) 

854 data.append(self.read(framedir, name + suf)) 

855 seglengths = [len(d) for d in data] 

856 return (np.concatenate(data), seglengths) 

857 

858 def close(self, log=None): 

859 """Close anything that needs to be closed by the backend. 

860 

861 The default backend does nothing here. 

862 """ 

863 pass 

864 

865 

866def read_bundletrajectory(filename, index=-1): 

867 """Reads one or more atoms objects from a BundleTrajectory. 

868 

869 Arguments: 

870 

871 filename: str 

872 The name of the bundle (really a directory!) 

873 index: int 

874 An integer specifying which frame to read, or an index object 

875 for reading multiple frames. Default: -1 (reads the last 

876 frame). 

877 """ 

878 traj = BundleTrajectory(filename, mode='r') 

879 for i in range(*index.indices(len(traj))): 

880 yield traj[i] 

881 

882 

883def write_bundletrajectory(filename, images, append=False): 

884 """Write image(s) to a BundleTrajectory. 

885 

886 Write also energy, forces, and stress if they are already 

887 calculated. 

888 """ 

889 

890 if append: 

891 mode = 'a' 

892 else: 

893 mode = 'w' 

894 traj = BundleTrajectory(filename, mode=mode) 

895 

896 if hasattr(images, 'get_positions'): 

897 images = [images] 

898 

899 for atoms in images: 

900 # Avoid potentially expensive calculations: 

901 calc = atoms.calc 

902 if hasattr(calc, 'calculation_required'): 

903 for quantity in ('energy', 'forces', 'stress', 'magmoms'): 

904 traj.select_data(quantity, 

905 not calc.calculation_required(atoms, 

906 [quantity])) 

907 traj.write(atoms) 

908 traj.close() 

909 

910 

911def print_bundletrajectory_info(filename): 

912 """Prints information about a BundleTrajectory. 

913 

914 Mainly intended to be called from a command line tool. 

915 """ 

916 if not BundleTrajectory.is_bundle(filename): 

917 raise ValueError('Not a BundleTrajectory!') 

918 if BundleTrajectory.is_empty_bundle(filename): 

919 print(filename, 'is an empty BundleTrajectory.') 

920 return 

921 # Read the metadata 

922 fn = os.path.join(filename, 'metadata.json') 

923 with open(fn, 'r') as fd: 

924 metadata = jsonio.decode(fd.read()) 

925 

926 print('Metadata information of BundleTrajectory "%s":' % (filename,)) 

927 for k, v in metadata.items(): 

928 if k != 'datatypes': 

929 print(" %s: %s" % (k, v)) 

930 with open(os.path.join(filename, 'frames'), 'rb') as fd: 

931 nframes = int(fd.read()) 

932 print('Number of frames: %i' % (nframes,)) 

933 print('Data types:') 

934 for k, v in metadata['datatypes'].items(): 

935 if v == 'once': 

936 print(' %s: First frame only.' % (k,)) 

937 elif v: 

938 print(' %s: All frames.' % (k,)) 

939 # Look at first frame 

940 if metadata['backend'] == 'ulm': 

941 backend = UlmBundleBackend(True, False) 

942 else: 

943 raise NotImplementedError('Backend %s not supported.' 

944 % (metadata['backend'],)) 

945 frame = os.path.join(filename, 'F0') 

946 small = backend.read_small(frame) 

947 print('Contents of first frame:') 

948 for k, v in small.items(): 

949 if k == 'constraints': 

950 if v: 

951 print(' {} constraints are present'.format(len(v))) 

952 else: 

953 print(' Constraints are absent.') 

954 elif k == 'pbc': 

955 print(' Periodic boundary conditions: %s' % (str(v),)) 

956 elif k == 'natoms': 

957 print(' Number of atoms: %i' % (v,)) 

958 elif hasattr(v, 'shape'): 

959 print(' %s: shape = %s, type = %s' % 

960 (k, str(v.shape), str(v.dtype))) 

961 if k == 'cell': 

962 print(' [[%12.6f, %12.6f, %12.6f],' % tuple(v[0])) 

963 print(' [%12.6f, %12.6f, %12.6f],' % tuple(v[1])) 

964 print(' [%12.6f, %12.6f, %12.6f]]' % tuple(v[2])) 

965 else: 

966 print(' %s: %s' % (k, str(v))) 

967 # Read info from separate files. 

968 if metadata['subtype'] == 'split': 

969 nsplit = small['fragments'] 

970 else: 

971 nsplit = False 

972 for k, v in metadata['datatypes'].items(): 

973 if v and k not in small: 

974 info = backend.read_info(frame, k, nsplit) 

975 infoline = ' %s: ' % (k,) 

976 for k, v in info.items(): 

977 infoline += '%s = %s, ' % (k, str(v)) 

978 infoline = infoline[:-2] + '.' # Fix punctuation. 

979 print(infoline) 

980 

981 

982class PickleBundleBackend: 

983 # Leave placeholder class so importing asap3 won't crash. 

984 pass 

985 

986 

987def main(): 

988 import optparse 

989 parser = optparse.OptionParser( 

990 usage='python -m ase.io.bundletrajectory ' 

991 'a.bundle [b.bundle ...]', 

992 description='Print information about ' 

993 'the contents of one or more bundletrajectories.') 

994 opts, args = parser.parse_args() 

995 for name in args: 

996 print_bundletrajectory_info(name) 

997 

998 

999if __name__ == '__main__': 

1000 main()