Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""File formats. 

2 

3This module implements the read(), iread() and write() functions in ase.io. 

4For each file format there is an IOFormat object. 

5 

6There is a dict, ioformats, which stores the objects. 

7 

8Example 

9======= 

10 

11The xyz format is implemented in the ase/io/xyz.py file which has a 

12read_xyz() generator and a write_xyz() function. This and other 

13information can be obtained from ioformats['xyz']. 

14""" 

15 

16import io 

17import re 

18import functools 

19import inspect 

20import os 

21import sys 

22import numbers 

23import warnings 

24from pathlib import Path, PurePath 

25from typing import ( 

26 IO, List, Any, Iterable, Tuple, Union, Sequence, Dict, Optional) 

27 

28from ase.atoms import Atoms 

29from importlib import import_module 

30from ase.parallel import parallel_function, parallel_generator 

31 

32 

33PEEK_BYTES = 50000 

34 

35 

36class UnknownFileTypeError(Exception): 

37 pass 

38 

39 

40class IOFormat: 

41 def __init__(self, name: str, desc: str, code: str, module_name: str, 

42 encoding: str = None) -> None: 

43 self.name = name 

44 self.description = desc 

45 assert len(code) == 2 

46 assert code[0] in list('+1') 

47 assert code[1] in list('BFS') 

48 self.code = code 

49 self.module_name = module_name 

50 self.encoding = encoding 

51 

52 # (To be set by define_io_format()) 

53 self.extensions: List[str] = [] 

54 self.globs: List[str] = [] 

55 self.magic: List[str] = [] 

56 self.magic_regex: Optional[bytes] = None 

57 

58 def open(self, fname, mode: str = 'r') -> IO: 

59 # We might want append mode, too 

60 # We can allow more flags as needed (buffering etc.) 

61 if mode not in list('rwa'): 

62 raise ValueError("Only modes allowed are 'r', 'w', and 'a'") 

63 if mode == 'r' and not self.can_read: 

64 raise NotImplementedError('No reader implemented for {} format' 

65 .format(self.name)) 

66 if mode == 'w' and not self.can_write: 

67 raise NotImplementedError('No writer implemented for {} format' 

68 .format(self.name)) 

69 if mode == 'a' and not self.can_append: 

70 raise NotImplementedError('Appending not supported by {} format' 

71 .format(self.name)) 

72 

73 if self.isbinary: 

74 mode += 'b' 

75 

76 path = Path(fname) 

77 return path.open(mode, encoding=self.encoding) 

78 

79 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO: 

80 encoding = self.encoding 

81 if encoding is None: 

82 encoding = 'utf-8' # Best hacky guess. 

83 

84 if self.isbinary: 

85 if isinstance(data, str): 

86 data = data.encode(encoding) 

87 else: 

88 if isinstance(data, bytes): 

89 data = data.decode(encoding) 

90 

91 return self._ioclass(data) 

92 

93 @property 

94 def _ioclass(self): 

95 if self.isbinary: 

96 return io.BytesIO 

97 else: 

98 return io.StringIO 

99 

100 def parse_images(self, data: Union[str, bytes], 

101 **kwargs) -> Sequence[Atoms]: 

102 with self._buf_as_filelike(data) as fd: 

103 outputs = self.read(fd, **kwargs) 

104 if self.single: 

105 assert isinstance(outputs, Atoms) 

106 return [outputs] 

107 else: 

108 return list(self.read(fd, **kwargs)) 

109 

110 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms: 

111 images = self.parse_images(data, **kwargs) 

112 return images[-1] 

113 

114 @property 

115 def can_read(self) -> bool: 

116 return self._readfunc() is not None 

117 

118 @property 

119 def can_write(self) -> bool: 

120 return self._writefunc() is not None 

121 

122 @property 

123 def can_append(self) -> bool: 

124 writefunc = self._writefunc() 

125 return self.can_write and 'append' in writefunc.__code__.co_varnames 

126 

127 def __repr__(self) -> str: 

128 tokens = ['{}={}'.format(name, repr(value)) 

129 for name, value in vars(self).items()] 

130 return 'IOFormat({})'.format(', '.join(tokens)) 

131 

132 def __getitem__(self, i): 

133 # For compatibility. 

134 # 

135 # Historically, the ioformats were listed as tuples 

136 # with (description, code). We look like such a tuple. 

137 return (self.description, self.code)[i] 

138 

139 @property 

140 def single(self) -> bool: 

141 """Whether this format is for a single Atoms object.""" 

142 return self.code[0] == '1' 

143 

144 @property 

145 def _formatname(self) -> str: 

146 return self.name.replace('-', '_') 

147 

148 def _readfunc(self): 

149 return getattr(self.module, 'read_' + self._formatname, None) 

150 

151 def _writefunc(self): 

152 return getattr(self.module, 'write_' + self._formatname, None) 

153 

154 @property 

155 def read(self): 

156 if not self.can_read: 

157 self._warn_none('read') 

158 return None 

159 

160 return self._read_wrapper 

161 

162 def _read_wrapper(self, *args, **kwargs): 

163 function = self._readfunc() 

164 if function is None: 

165 self._warn_none('read') 

166 return None 

167 if not inspect.isgeneratorfunction(function): 

168 function = functools.partial(wrap_read_function, function) 

169 return function(*args, **kwargs) 

170 

171 def _warn_none(self, action): 

172 msg = ('Accessing the IOFormat.{action} property on a format ' 

173 'without {action} support will change behaviour in the ' 

174 'future and return a callable instead of None. ' 

175 'Use IOFormat.can_{action} to check whether {action} ' 

176 'is supported.') 

177 warnings.warn(msg.format(action=action), FutureWarning) 

178 

179 @property 

180 def write(self): 

181 if not self.can_write: 

182 self._warn_none('write') 

183 return None 

184 

185 return self._write_wrapper 

186 

187 def _write_wrapper(self, *args, **kwargs): 

188 function = self._writefunc() 

189 if function is None: 

190 raise ValueError(f'Cannot write to {self.name}-format') 

191 return function(*args, **kwargs) 

192 

193 @property 

194 def modes(self) -> str: 

195 modes = '' 

196 if self.can_read: 

197 modes += 'r' 

198 if self.can_write: 

199 modes += 'w' 

200 return modes 

201 

202 def full_description(self) -> str: 

203 lines = [f'Name: {self.name}', 

204 f'Description: {self.description}', 

205 f'Modes: {self.modes}', 

206 f'Encoding: {self.encoding}', 

207 f'Module: {self.module_name}', 

208 f'Code: {self.code}', 

209 f'Extensions: {self.extensions}', 

210 f'Globs: {self.globs}', 

211 f'Magic: {self.magic}'] 

212 return '\n'.join(lines) 

213 

214 @property 

215 def acceptsfd(self) -> bool: 

216 return self.code[1] != 'S' 

217 

218 @property 

219 def isbinary(self) -> bool: 

220 return self.code[1] == 'B' 

221 

222 @property 

223 def module(self): 

224 if not self.module_name.startswith('ase.io.'): 

225 raise ValueError('Will only import modules from ase.io, ' 

226 'not {}'.format(self.module_name)) 

227 try: 

228 return import_module(self.module_name) 

229 except ImportError as err: 

230 raise UnknownFileTypeError( 

231 f'File format not recognized: {self.name}. Error: {err}') 

232 

233 def match_name(self, basename: str) -> bool: 

234 from fnmatch import fnmatch 

235 return any(fnmatch(basename, pattern) 

236 for pattern in self.globs) 

237 

238 def match_magic(self, data: bytes) -> bool: 

239 if self.magic_regex: 

240 assert not self.magic, 'Define only one of magic and magic_regex' 

241 match = re.match(self.magic_regex, data, re.M | re.S) 

242 return match is not None 

243 

244 from fnmatch import fnmatchcase 

245 return any(fnmatchcase(data, magic + b'*') # type: ignore 

246 for magic in self.magic) 

247 

248 

249ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time. 

250extension2format = {} 

251 

252 

253all_formats = ioformats # Aliased for compatibility only. Please do not use. 

254format2modulename = {} # Left for compatibility only. 

255 

256 

257def define_io_format(name, desc, code, *, module=None, ext=None, 

258 glob=None, magic=None, encoding=None, 

259 magic_regex=None): 

260 if module is None: 

261 module = name.replace('-', '_') 

262 format2modulename[name] = module 

263 

264 def normalize_patterns(strings): 

265 if strings is None: 

266 strings = [] 

267 elif isinstance(strings, (str, bytes)): 

268 strings = [strings] 

269 else: 

270 strings = list(strings) 

271 return strings 

272 

273 fmt = IOFormat(name, desc, code, module_name='ase.io.' + module, 

274 encoding=encoding) 

275 fmt.extensions = normalize_patterns(ext) 

276 fmt.globs = normalize_patterns(glob) 

277 fmt.magic = normalize_patterns(magic) 

278 

279 if magic_regex is not None: 

280 fmt.magic_regex = magic_regex 

281 

282 for ext in fmt.extensions: 

283 if ext in extension2format: 

284 raise ValueError('extension "{}" already registered'.format(ext)) 

285 extension2format[ext] = fmt 

286 

287 ioformats[name] = fmt 

288 return fmt 

289 

290 

291def get_ioformat(name: str) -> IOFormat: 

292 """Return ioformat object or raise appropriate error.""" 

293 if name not in ioformats: 

294 raise UnknownFileTypeError(name) 

295 fmt = ioformats[name] 

296 # Make sure module is importable, since this could also raise an error. 

297 fmt.module 

298 return ioformats[name] 

299 

300 

301# We define all the IO formats below. Each IO format has a code, 

302# such as '1F', which defines some of the format's properties: 

303# 

304# 1=single atoms object 

305# +=multiple atoms objects 

306# F=accepts a file-descriptor 

307# S=needs a file-name str 

308# B=like F, but opens in binary mode 

309 

310F = define_io_format 

311F('abinit-in', 'ABINIT input file', '1F', 

312 module='abinit', magic=b'*znucl *') 

313F('abinit-out', 'ABINIT output file', '1F', 

314 module='abinit', magic=b'*.Version * of ABINIT') 

315F('aims', 'FHI-aims geometry file', '1S', ext='in') 

316F('aims-output', 'FHI-aims output', '+S', 

317 module='aims', magic=b'*Invoking FHI-aims ...') 

318F('bundletrajectory', 'ASE bundle trajectory', '+S') 

319F('castep-castep', 'CASTEP output file', '+F', 

320 module='castep', ext='castep') 

321F('castep-cell', 'CASTEP geom file', '1F', 

322 module='castep', ext='cell') 

323F('castep-geom', 'CASTEP trajectory file', '+F', 

324 module='castep', ext='geom') 

325F('castep-md', 'CASTEP molecular dynamics file', '+F', 

326 module='castep', ext='md') 

327F('castep-phonon', 'CASTEP phonon file', '1F', 

328 module='castep', ext='phonon') 

329F('cfg', 'AtomEye configuration', '1F') 

330F('cif', 'CIF-file', '+B', ext='cif') 

331F('cmdft', 'CMDFT-file', '1F', glob='*I_info') 

332F('cml', 'Chemical json file', '1F', ext='cml') 

333F('cp2k-dcd', 'CP2K DCD file', '+B', 

334 module='cp2k', ext='dcd') 

335F('cp2k-restart', 'CP2K restart file', '1F', 

336 module='cp2k', ext='restart') 

337F('crystal', 'Crystal fort.34 format', '1F', 

338 ext=['f34', '34'], glob=['f34', '34']) 

339F('cube', 'CUBE file', '1F', ext='cube') 

340F('dacapo-text', 'Dacapo text output', '1F', 

341 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n') 

342F('db', 'ASE SQLite database file', '+S') 

343F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry') 

344F('dlp4', 'DL_POLY_4 CONFIG file', '1F', 

345 module='dlp4', ext='config', glob=['*CONFIG*']) 

346F('dlp-history', 'DL_POLY HISTORY file', '+F', 

347 module='dlp4', glob='HISTORY') 

348F('dmol-arc', 'DMol3 arc file', '+S', 

349 module='dmol') 

350F('dmol-car', 'DMol3 structure file', '1S', 

351 module='dmol', ext='car') 

352F('dmol-incoor', 'DMol3 structure file', '1S', 

353 module='dmol') 

354F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F', 

355 glob=['GEOMETRY.OUT']) 

356F('elk-in', 'ELK input file', '1F', module='elk') 

357F('eon', 'EON CON file', '+F', 

358 ext='con') 

359F('eps', 'Encapsulated Postscript', '1S') 

360F('espresso-in', 'Quantum espresso in file', '1F', 

361 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM']) 

362F('espresso-out', 'Quantum espresso out file', '+F', 

363 module='espresso', ext=['out', 'pwo'], magic=b'*Program PWSCF') 

364F('exciting', 'exciting input', '1F', glob='input.xml') 

365F('extxyz', 'Extended XYZ file', '+F', ext='xyz') 

366F('findsym', 'FINDSYM-format', '+F') 

367F('gamess-us-out', 'GAMESS-US output file', '1F', 

368 module='gamess_us', magic=b'*GAMESS') 

369F('gamess-us-in', 'GAMESS-US input file', '1F', 

370 module='gamess_us') 

371F('gamess-us-punch', 'GAMESS-US punchcard file', '1F', 

372 module='gamess_us', magic=b' $DATA', ext='dat') 

373F('gaussian-in', 'Gaussian com (input) file', '1F', 

374 module='gaussian', ext=['com', 'gjf']) 

375F('gaussian-out', 'Gaussian output file', '+F', 

376 module='gaussian', ext='log', magic=b'*Entering Gaussian System') 

377F('acemolecule-out', 'ACE output file', '1S', 

378 module='acemolecule') 

379F('acemolecule-input', 'ACE input file', '1S', 

380 module='acemolecule') 

381F('gen', 'DFTBPlus GEN format', '1F') 

382F('gif', 'Graphics interchange format', '+S', 

383 module='animation') 

384F('gpaw-out', 'GPAW text output', '+F', 

385 magic=b'* ___ ___ ___ _ _ _') 

386F('gpumd', 'GPUMD input file', '1F', glob='xyz.in') 

387F('gpw', 'GPAW restart-file', '1S', 

388 magic=[b'- of UlmGPAW', b'AFFormatGPAW']) 

389F('gromacs', 'Gromacs coordinates', '1F', 

390 ext='gro') 

391F('gromos', 'Gromos96 geometry file', '1F', ext='g96') 

392F('html', 'X3DOM HTML', '1F', module='x3d') 

393F('json', 'ASE JSON database file', '+F', ext='json', module='db') 

394F('jsv', 'JSV file format', '1F') 

395F('lammps-dump-text', 'LAMMPS text dump file', '+F', 

396 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$') 

397F('lammps-dump-binary', 'LAMMPS binary dump file', '+B', 

398 module='lammpsrun') 

399F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata', 

400 encoding='ascii') 

401F('magres', 'MAGRES ab initio NMR data file', '1F') 

402F('mol', 'MDL Molfile', '1F') 

403F('mp4', 'MP4 animation', '+S', 

404 module='animation') 

405F('mustem', 'muSTEM xtl file', '1F', 

406 ext='xtl') 

407F('mysql', 'ASE MySQL database file', '+S', 

408 module='db') 

409F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S', 

410 magic=b'CDF') 

411F('nomad-json', 'JSON from Nomad archive', '+F', 

412 ext='nomad-json') 

413F('nwchem-in', 'NWChem input file', '1F', 

414 module='nwchem', ext='nwi') 

415F('nwchem-out', 'NWChem output file', '+F', 

416 module='nwchem', ext='nwo', 

417 magic=b'*Northwest Computational Chemistry Package') 

418F('octopus-in', 'Octopus input file', '1F', 

419 module='octopus', glob='inp') 

420F('proteindatabank', 'Protein Data Bank', '+F', 

421 ext='pdb') 

422F('png', 'Portable Network Graphics', '1B') 

423F('postgresql', 'ASE PostgreSQL database file', '+S', module='db') 

424F('pov', 'Persistance of Vision', '1S') 

425# prismatic: Should have ext='xyz' if/when multiple formats can have the same 

426# extension 

427F('prismatic', 'prismatic and computem XYZ-file', '1F') 

428F('py', 'Python file', '+F') 

429F('sys', 'qball sys file', '1F') 

430F('qbox', 'QBOX output file', '+F', 

431 magic=b'*:simulation xmlns:') 

432F('res', 'SHELX format', '1S', ext='shelx') 

433F('rmc6f', 'RMCProfile', '1S', ext='rmc6f') 

434F('sdf', 'SDF format', '1F') 

435F('siesta-xv', 'Siesta .XV file', '1F', 

436 glob='*.XV', module='siesta') 

437F('struct', 'WIEN2k structure file', '1S', module='wien2k') 

438F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta') 

439F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj', 

440 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory']) 

441F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord', 

442 magic=b'$coord') 

443F('turbomole-gradient', 'TURBOMOLE gradient file', '+F', 

444 module='turbomole', glob='gradient', magic=b'$grad') 

445F('v-sim', 'V_Sim ascii file', '1F', ext='ascii') 

446F('vasp', 'VASP POSCAR/CONTCAR', '1F', 

447 ext='poscar', glob=['*POSCAR*', '*CONTCAR*']) 

448F('vasp-out', 'VASP OUTCAR file', '+F', 

449 module='vasp', glob='*OUTCAR*') 

450F('vasp-xdatcar', 'VASP XDATCAR file', '+F', 

451 module='vasp', glob='*XDATCAR*') 

452F('vasp-xml', 'VASP vasprun.xml file', '+F', 

453 module='vasp', glob='*vasp*.xml') 

454F('vti', 'VTK XML Image Data', '1F', module='vtkxml') 

455F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu') 

456F('wout', 'Wannier90 output', '1F', module='wannier90') 

457F('x3d', 'X3D', '1S') 

458F('xsd', 'Materials Studio file', '1F') 

459F('xsf', 'XCrySDen Structure File', '+F', 

460 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER', 

461 b'*\nMOLECULE', b'*\nATOMS']) 

462F('xtd', 'Materials Studio file', '+F') 

463# xyz: No `ext='xyz'` in the definition below. 

464# The .xyz files are handled by the extxyz module by default. 

465F('xyz', 'XYZ-file', '+F') 

466 

467 

468def get_compression(filename: str) -> Tuple[str, Optional[str]]: 

469 """ 

470 Parse any expected file compression from the extension of a filename. 

471 Return the filename without the extension, and the extension. Recognises 

472 ``.gz``, ``.bz2``, ``.xz``. 

473 

474 >>> get_compression('H2O.pdb.gz') 

475 ('H2O.pdb', 'gz') 

476 >>> get_compression('crystal.cif') 

477 ('crystal.cif', None) 

478 

479 Parameters 

480 ========== 

481 filename: str 

482 Full filename including extension. 

483 

484 Returns 

485 ======= 

486 (root, extension): (str, str or None) 

487 Filename split into root without extension, and the extension 

488 indicating compression format. Will not split if compression 

489 is not recognised. 

490 """ 

491 # Update if anything is added 

492 valid_compression = ['gz', 'bz2', 'xz'] 

493 

494 # Use stdlib as it handles most edge cases 

495 root, compression = os.path.splitext(filename) 

496 

497 # extension keeps the '.' so remember to remove it 

498 if compression.strip('.') in valid_compression: 

499 return root, compression.strip('.') 

500 else: 

501 return filename, None 

502 

503 

504def open_with_compression(filename: str, mode: str = 'r') -> IO: 

505 """ 

506 Wrapper around builtin `open` that will guess compression of a file 

507 from the filename and open it for reading or writing as if it were 

508 a standard file. 

509 

510 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). 

511 

512 Supported modes are: 

513 * 'r', 'rt', 'w', 'wt' for text mode read and write. 

514 * 'rb, 'wb' for binary read and write. 

515 

516 Parameters 

517 ========== 

518 filename: str 

519 Path to the file to open, including any extensions that indicate 

520 the compression used. 

521 mode: str 

522 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. 

523 

524 Returns 

525 ======= 

526 fd: file 

527 File-like object open with the specified mode. 

528 """ 

529 

530 # Compressed formats sometimes default to binary, so force text mode. 

531 if mode == 'r': 

532 mode = 'rt' 

533 elif mode == 'w': 

534 mode = 'wt' 

535 elif mode == 'a': 

536 mode = 'at' 

537 

538 root, compression = get_compression(filename) 

539 

540 if compression == 'gz': 

541 import gzip 

542 return gzip.open(filename, mode=mode) # type: ignore 

543 elif compression == 'bz2': 

544 import bz2 

545 return bz2.open(filename, mode=mode) 

546 elif compression == 'xz': 

547 import lzma 

548 return lzma.open(filename, mode) 

549 else: 

550 # Either None or unknown string 

551 return open(filename, mode) 

552 

553 

554def wrap_read_function(read, filename, index=None, **kwargs): 

555 """Convert read-function to generator.""" 

556 if index is None: 

557 yield read(filename, **kwargs) 

558 else: 

559 for atoms in read(filename, index, **kwargs): 

560 yield atoms 

561 

562 

563NameOrFile = Union[str, PurePath, IO] 

564 

565 

566def write( 

567 filename: NameOrFile, 

568 images: Union[Atoms, Sequence[Atoms]], 

569 format: str = None, 

570 parallel: bool = True, 

571 append: bool = False, 

572 **kwargs: dict 

573) -> None: 

574 """Write Atoms object(s) to file. 

575 

576 filename: str or file 

577 Name of the file to write to or a file descriptor. The name '-' 

578 means standard output. 

579 images: Atoms object or list of Atoms objects 

580 A single Atoms object or a list of Atoms objects. 

581 format: str 

582 Used to specify the file-format. If not given, the 

583 file-format will be taken from suffix of the filename. 

584 parallel: bool 

585 Default is to write on master only. Use parallel=False to write 

586 from all slaves. 

587 append: bool 

588 Default is to open files in 'w' or 'wb' mode, overwriting 

589 existing files. In some cases opening the file in 'a' or 'ab' 

590 mode (appending) is useful, 

591 e.g. writing trajectories or saving multiple Atoms objects in one file. 

592 WARNING: If the file format does not support multiple entries without 

593 additional keywords/headers, files created using 'append=True' 

594 might not be readable by any program! They will nevertheless be 

595 written without error message. 

596 

597 The use of additional keywords is format specific. write() may 

598 return an object after writing certain formats, but this behaviour 

599 may change in the future. 

600 

601 """ 

602 

603 if isinstance(filename, PurePath): 

604 filename = str(filename) 

605 

606 if isinstance(filename, str): 

607 fd = None 

608 if filename == '-': 

609 fd = sys.stdout 

610 filename = None # type: ignore 

611 elif format is None: 

612 format = filetype(filename, read=False) 

613 assert isinstance(format, str) 

614 else: 

615 fd = filename # type: ignore 

616 if format is None: 

617 try: 

618 format = filetype(filename, read=False) 

619 assert isinstance(format, str) 

620 except UnknownFileTypeError: 

621 format = None 

622 filename = None # type: ignore 

623 

624 format = format or 'json' # default is json 

625 

626 io = get_ioformat(format) 

627 

628 return _write(filename, fd, format, io, images, 

629 parallel=parallel, append=append, **kwargs) 

630 

631 

632@parallel_function 

633def _write(filename, fd, format, io, images, parallel=None, append=False, 

634 **kwargs): 

635 if isinstance(images, Atoms): 

636 images = [images] 

637 

638 if io.single: 

639 if len(images) > 1: 

640 raise ValueError('{}-format can only store 1 Atoms object.' 

641 .format(format)) 

642 images = images[0] 

643 

644 if not io.can_write: 

645 raise ValueError("Can't write to {}-format".format(format)) 

646 

647 # Special case for json-format: 

648 if format == 'json' and (len(images) > 1 or append): 

649 if filename is not None: 

650 return io.write(filename, images, append=append, **kwargs) 

651 raise ValueError("Can't write more than one image to file-descriptor " 

652 'using json-format.') 

653 

654 if io.acceptsfd: 

655 open_new = (fd is None) 

656 try: 

657 if open_new: 

658 mode = 'wb' if io.isbinary else 'w' 

659 if append: 

660 mode = mode.replace('w', 'a') 

661 fd = open_with_compression(filename, mode) 

662 # XXX remember to re-enable compressed open 

663 # fd = io.open(filename, mode) 

664 return io.write(fd, images, **kwargs) 

665 finally: 

666 if open_new and fd is not None: 

667 fd.close() 

668 else: 

669 if fd is not None: 

670 raise ValueError("Can't write {}-format to file-descriptor" 

671 .format(format)) 

672 if io.can_append: 

673 return io.write(filename, images, append=append, **kwargs) 

674 elif append: 

675 raise ValueError("Cannot append to {}-format, write-function " 

676 "does not support the append keyword." 

677 .format(format)) 

678 else: 

679 return io.write(filename, images, **kwargs) 

680 

681 

682def read( 

683 filename: NameOrFile, 

684 index: Any = None, 

685 format: str = None, 

686 parallel: bool = True, 

687 do_not_split_by_at_sign: bool = False, 

688 **kwargs 

689) -> Union[Atoms, List[Atoms]]: 

690 """Read Atoms object(s) from file. 

691 

692 filename: str or file 

693 Name of the file to read from or a file descriptor. 

694 index: int, slice or str 

695 The last configuration will be returned by default. Examples: 

696 

697 * ``index=0``: first configuration 

698 * ``index=-2``: second to last 

699 * ``index=':'`` or ``index=slice(None)``: all 

700 * ``index='-3:'`` or ``index=slice(-3, None)``: three last 

701 * ``index='::2'`` or ``index=slice(0, None, 2)``: even 

702 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd 

703 format: str 

704 Used to specify the file-format. If not given, the 

705 file-format will be guessed by the *filetype* function. 

706 parallel: bool 

707 Default is to read on master and broadcast to slaves. Use 

708 parallel=False to read on all slaves. 

709 do_not_split_by_at_sign: bool 

710 If False (default) ``filename`` is splited by at sign ``@`` 

711 

712 Many formats allow on open file-like object to be passed instead 

713 of ``filename``. In this case the format cannot be auto-decected, 

714 so the ``format`` argument should be explicitly given.""" 

715 

716 if isinstance(filename, PurePath): 

717 filename = str(filename) 

718 if filename == '-': 

719 filename = sys.stdin 

720 if isinstance(index, str): 

721 try: 

722 index = string2index(index) 

723 except ValueError: 

724 pass 

725 

726 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

727 if index is None: 

728 index = -1 

729 format = format or filetype(filename, read=isinstance(filename, str)) 

730 

731 io = get_ioformat(format) 

732 if isinstance(index, (slice, str)): 

733 return list(_iread(filename, index, format, io, parallel=parallel, 

734 **kwargs)) 

735 else: 

736 return next(_iread(filename, slice(index, None), format, io, 

737 parallel=parallel, **kwargs)) 

738 

739 

740def iread( 

741 filename: NameOrFile, 

742 index: Any = None, 

743 format: str = None, 

744 parallel: bool = True, 

745 do_not_split_by_at_sign: bool = False, 

746 **kwargs 

747) -> Iterable[Atoms]: 

748 """Iterator for reading Atoms objects from file. 

749 

750 Works as the `read` function, but yields one Atoms object at a time 

751 instead of all at once.""" 

752 

753 if isinstance(filename, PurePath): 

754 filename = str(filename) 

755 

756 if isinstance(index, str): 

757 index = string2index(index) 

758 

759 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

760 

761 if index is None or index == ':': 

762 index = slice(None, None, None) 

763 

764 if not isinstance(index, (slice, str)): 

765 index = slice(index, (index + 1) or None) 

766 

767 format = format or filetype(filename, read=isinstance(filename, str)) 

768 io = get_ioformat(format) 

769 

770 for atoms in _iread(filename, index, format, io, parallel=parallel, 

771 **kwargs): 

772 yield atoms 

773 

774 

775@parallel_generator 

776def _iread(filename, index, format, io, parallel=None, full_output=False, 

777 **kwargs): 

778 

779 if not io.can_read: 

780 raise ValueError("Can't read from {}-format".format(format)) 

781 

782 if io.single: 

783 start = index.start 

784 assert start is None or start == 0 or start == -1 

785 args = () 

786 else: 

787 args = (index,) 

788 

789 must_close_fd = False 

790 if isinstance(filename, str): 

791 if io.acceptsfd: 

792 mode = 'rb' if io.isbinary else 'r' 

793 fd = open_with_compression(filename, mode) 

794 must_close_fd = True 

795 else: 

796 fd = filename 

797 else: 

798 assert io.acceptsfd 

799 fd = filename 

800 

801 # Make sure fd is closed in case loop doesn't finish: 

802 try: 

803 for dct in io.read(fd, *args, **kwargs): 

804 if not isinstance(dct, dict): 

805 dct = {'atoms': dct} 

806 if full_output: 

807 yield dct 

808 else: 

809 yield dct['atoms'] 

810 finally: 

811 if must_close_fd: 

812 fd.close() 

813 

814 

815def parse_filename(filename, index=None, do_not_split_by_at_sign=False): 

816 if not isinstance(filename, str): 

817 return filename, index 

818 

819 basename = os.path.basename(filename) 

820 if do_not_split_by_at_sign or '@' not in basename: 

821 return filename, index 

822 

823 newindex = None 

824 newfilename, newindex = filename.rsplit('@', 1) 

825 

826 if isinstance(index, slice): 

827 return newfilename, index 

828 try: 

829 newindex = string2index(newindex) 

830 except ValueError: 

831 warnings.warn('Can not parse index for path \n' 

832 ' "%s" \nConsider set ' 

833 'do_not_split_by_at_sign=True \nif ' 

834 'there is no index.' % filename) 

835 return newfilename, newindex 

836 

837 

838def match_magic(data: bytes) -> IOFormat: 

839 data = data[:PEEK_BYTES] 

840 for ioformat in ioformats.values(): 

841 if ioformat.match_magic(data): 

842 return ioformat 

843 raise UnknownFileTypeError('Cannot guess file type from contents') 

844 

845 

846def string2index(string: str) -> Union[int, slice, str]: 

847 """Convert index string to either int or slice""" 

848 if ':' not in string: 

849 # may contain database accessor 

850 try: 

851 return int(string) 

852 except ValueError: 

853 return string 

854 i: List[Optional[int]] = [] 

855 for s in string.split(':'): 

856 if s == '': 

857 i.append(None) 

858 else: 

859 i.append(int(s)) 

860 i += (3 - len(i)) * [None] 

861 return slice(*i) 

862 

863 

864def filetype( 

865 filename: NameOrFile, 

866 read: bool = True, 

867 guess: bool = True, 

868) -> str: 

869 """Try to guess the type of the file. 

870 

871 First, special signatures in the filename will be checked for. If that 

872 does not identify the file type, then the first 2000 bytes of the file 

873 will be read and analysed. Turn off this second part by using 

874 read=False. 

875 

876 Can be used from the command-line also:: 

877 

878 $ ase info filename ... 

879 """ 

880 

881 orig_filename = filename 

882 if hasattr(filename, 'name'): 

883 filename = filename.name # type: ignore 

884 

885 ext = None 

886 if isinstance(filename, str): 

887 if os.path.isdir(filename): 

888 if os.path.basename(os.path.normpath(filename)) == 'states': 

889 return 'eon' 

890 return 'bundletrajectory' 

891 

892 if filename.startswith('postgres'): 

893 return 'postgresql' 

894 

895 if filename.startswith('mysql') or filename.startswith('mariadb'): 

896 return 'mysql' 

897 

898 # strip any compression extensions that can be read 

899 root, compression = get_compression(filename) 

900 basename = os.path.basename(root) 

901 

902 if '.' in basename: 

903 ext = os.path.splitext(basename)[1].strip('.').lower() 

904 

905 for fmt in ioformats.values(): 

906 if fmt.match_name(basename): 

907 return fmt.name 

908 

909 if not read: 

910 if ext is None: 

911 raise UnknownFileTypeError('Could not guess file type') 

912 ioformat = extension2format.get(ext) 

913 if ioformat: 

914 return ioformat.name 

915 

916 # askhl: This is strange, we don't know if ext is a format: 

917 return ext 

918 

919 if orig_filename == filename: 

920 fd = open_with_compression(filename, 'rb') 

921 else: 

922 fd = orig_filename # type: ignore 

923 else: 

924 fd = filename # type: ignore 

925 if fd is sys.stdin: 

926 return 'json' 

927 

928 data = fd.read(PEEK_BYTES) 

929 if fd is not filename: 

930 fd.close() 

931 else: 

932 fd.seek(0) 

933 

934 if len(data) == 0: 

935 raise UnknownFileTypeError('Empty file: ' + filename) # type: ignore 

936 

937 try: 

938 return match_magic(data).name 

939 except UnknownFileTypeError: 

940 pass 

941 

942 format = None 

943 if ext in extension2format: 

944 format = extension2format[ext].name 

945 

946 if format is None and guess: 

947 format = ext 

948 if format is None: 

949 # Do quick xyz check: 

950 lines = data.splitlines() 

951 if lines and lines[0].strip().isdigit(): 

952 return extension2format['xyz'].name 

953 

954 raise UnknownFileTypeError('Could not guess file type') 

955 assert isinstance(format, str) 

956 return format 

957 

958 

959def index2range(index, length): 

960 """Convert slice or integer to range. 

961 

962 If index is an integer, range will contain only that integer.""" 

963 obj = range(length)[index] 

964 if isinstance(obj, numbers.Integral): 

965 obj = range(obj, obj + 1) 

966 return obj