Coverage for /builds/debichem-team/python-ase/ase/io/formats.py: 89.52%

544 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-03-06 04:00 +0000

1"""File formats. 

2 

3This module implements the read(), iread() and write() functions in ase.io. 

4For each file format there is an IOFormat object. 

5 

6There is a dict, ioformats, which stores the objects. 

7 

8Example 

9======= 

10 

11The xyz format is implemented in the ase/io/xyz.py file which has a 

12read_xyz() generator and a write_xyz() function. This and other 

13information can be obtained from ioformats['xyz']. 

14""" 

15 

16import functools 

17import inspect 

18import io 

19import numbers 

20import os 

21import re 

22import sys 

23import warnings 

24from importlib import import_module 

25from importlib.metadata import entry_points 

26from pathlib import Path, PurePath 

27from typing import ( 

28 IO, 

29 Any, 

30 Dict, 

31 Iterable, 

32 List, 

33 Optional, 

34 Sequence, 

35 Tuple, 

36 Union, 

37) 

38 

39from ase.atoms import Atoms 

40from ase.parallel import parallel_function, parallel_generator 

41from ase.utils import string2index 

42from ase.utils.plugins import ExternalIOFormat 

43 

44PEEK_BYTES = 50000 

45 

46 

47class UnknownFileTypeError(Exception): 

48 pass 

49 

50 

51class IOFormat: 

52 def __init__(self, name: str, desc: str, code: str, module_name: str, 

53 encoding: str = None) -> None: 

54 self.name = name 

55 self.description = desc 

56 assert len(code) == 2 

57 assert code[0] in list('+1') 

58 assert code[1] in list('BFS') 

59 self.code = code 

60 self.module_name = module_name 

61 self.encoding = encoding 

62 

63 # (To be set by define_io_format()) 

64 self.extensions: List[str] = [] 

65 self.globs: List[str] = [] 

66 self.magic: List[str] = [] 

67 self.magic_regex: Optional[bytes] = None 

68 

69 def open(self, fname, mode: str = 'r') -> IO: 

70 # We might want append mode, too 

71 # We can allow more flags as needed (buffering etc.) 

72 if mode not in list('rwa'): 

73 raise ValueError("Only modes allowed are 'r', 'w', and 'a'") 

74 if mode == 'r' and not self.can_read: 

75 raise NotImplementedError('No reader implemented for {} format' 

76 .format(self.name)) 

77 if mode == 'w' and not self.can_write: 

78 raise NotImplementedError('No writer implemented for {} format' 

79 .format(self.name)) 

80 if mode == 'a' and not self.can_append: 

81 raise NotImplementedError('Appending not supported by {} format' 

82 .format(self.name)) 

83 

84 if self.isbinary: 

85 mode += 'b' 

86 

87 path = Path(fname) 

88 return path.open(mode, encoding=self.encoding) 

89 

90 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO: 

91 encoding = self.encoding 

92 if encoding is None: 

93 encoding = 'utf-8' # Best hacky guess. 

94 

95 if self.isbinary: 

96 if isinstance(data, str): 

97 data = data.encode(encoding) 

98 else: 

99 if isinstance(data, bytes): 

100 data = data.decode(encoding) 

101 

102 return self._ioclass(data) 

103 

104 @property 

105 def _ioclass(self): 

106 if self.isbinary: 

107 return io.BytesIO 

108 else: 

109 return io.StringIO 

110 

111 def parse_images(self, data: Union[str, bytes], 

112 **kwargs) -> Sequence[Atoms]: 

113 with self._buf_as_filelike(data) as fd: 

114 outputs = self.read(fd, **kwargs) 

115 if self.single: 

116 assert isinstance(outputs, Atoms) 

117 return [outputs] 

118 else: 

119 return list(self.read(fd, **kwargs)) 

120 

121 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms: 

122 images = self.parse_images(data, **kwargs) 

123 return images[-1] 

124 

125 @property 

126 def can_read(self) -> bool: 

127 return self._readfunc() is not None 

128 

129 @property 

130 def can_write(self) -> bool: 

131 return self._writefunc() is not None 

132 

133 @property 

134 def can_append(self) -> bool: 

135 writefunc = self._writefunc() 

136 return self.can_write and 'append' in writefunc.__code__.co_varnames 

137 

138 def __repr__(self) -> str: 

139 tokens = [f'{name}={value!r}' 

140 for name, value in vars(self).items()] 

141 return 'IOFormat({})'.format(', '.join(tokens)) 

142 

143 def __getitem__(self, i): 

144 # For compatibility. 

145 # 

146 # Historically, the ioformats were listed as tuples 

147 # with (description, code). We look like such a tuple. 

148 return (self.description, self.code)[i] 

149 

150 @property 

151 def single(self) -> bool: 

152 """Whether this format is for a single Atoms object.""" 

153 return self.code[0] == '1' 

154 

155 @property 

156 def _formatname(self) -> str: 

157 return self.name.replace('-', '_') 

158 

159 def _readfunc(self): 

160 return getattr(self.module, 'read_' + self._formatname, None) 

161 

162 def _writefunc(self): 

163 return getattr(self.module, 'write_' + self._formatname, None) 

164 

165 @property 

166 def read(self): 

167 if not self.can_read: 

168 self._warn_none('read') 

169 return None 

170 

171 return self._read_wrapper 

172 

173 def _read_wrapper(self, *args, **kwargs): 

174 function = self._readfunc() 

175 if function is None: 

176 self._warn_none('read') 

177 return None 

178 if not inspect.isgeneratorfunction(function): 

179 function = functools.partial(wrap_read_function, function) 

180 return function(*args, **kwargs) 

181 

182 def _warn_none(self, action): 

183 msg = ('Accessing the IOFormat.{action} property on a format ' 

184 'without {action} support will change behaviour in the ' 

185 'future and return a callable instead of None. ' 

186 'Use IOFormat.can_{action} to check whether {action} ' 

187 'is supported.') 

188 warnings.warn(msg.format(action=action), FutureWarning) 

189 

190 @property 

191 def write(self): 

192 if not self.can_write: 

193 self._warn_none('write') 

194 return None 

195 

196 return self._write_wrapper 

197 

198 def _write_wrapper(self, *args, **kwargs): 

199 function = self._writefunc() 

200 if function is None: 

201 raise ValueError(f'Cannot write to {self.name}-format') 

202 return function(*args, **kwargs) 

203 

204 @property 

205 def modes(self) -> str: 

206 modes = '' 

207 if self.can_read: 

208 modes += 'r' 

209 if self.can_write: 

210 modes += 'w' 

211 return modes 

212 

213 def full_description(self) -> str: 

214 lines = [f'Name: {self.name}', 

215 f'Description: {self.description}', 

216 f'Modes: {self.modes}', 

217 f'Encoding: {self.encoding}', 

218 f'Module: {self.module_name}', 

219 f'Code: {self.code}', 

220 f'Extensions: {self.extensions}', 

221 f'Globs: {self.globs}', 

222 f'Magic: {self.magic}'] 

223 return '\n'.join(lines) 

224 

225 @property 

226 def acceptsfd(self) -> bool: 

227 return self.code[1] != 'S' 

228 

229 @property 

230 def isbinary(self) -> bool: 

231 return self.code[1] == 'B' 

232 

233 @property 

234 def module(self): 

235 try: 

236 return import_module(self.module_name) 

237 except ImportError as err: 

238 raise UnknownFileTypeError( 

239 f'File format not recognized: {self.name}. Error: {err}') 

240 

241 def match_name(self, basename: str) -> bool: 

242 from fnmatch import fnmatch 

243 return any(fnmatch(basename, pattern) 

244 for pattern in self.globs) 

245 

246 def match_magic(self, data: bytes) -> bool: 

247 if self.magic_regex: 

248 assert not self.magic, 'Define only one of magic and magic_regex' 

249 match = re.match(self.magic_regex, data, re.M | re.S) 

250 return match is not None 

251 

252 from fnmatch import fnmatchcase 

253 return any( 

254 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var] 

255 for magic in self.magic 

256 ) 

257 

258 

259ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time. 

260extension2format = {} 

261 

262 

263all_formats = ioformats # Aliased for compatibility only. Please do not use. 

264format2modulename = {} # Left for compatibility only. 

265 

266 

267def define_io_format(name, desc, code, *, module=None, ext=None, 

268 glob=None, magic=None, encoding=None, 

269 magic_regex=None, external=False): 

270 if module is None: 

271 module = name.replace('-', '_') 

272 format2modulename[name] = module 

273 

274 if not external: 

275 module = 'ase.io.' + module 

276 

277 def normalize_patterns(strings): 

278 if strings is None: 

279 strings = [] 

280 elif isinstance(strings, (str, bytes)): 

281 strings = [strings] 

282 else: 

283 strings = list(strings) 

284 return strings 

285 

286 fmt = IOFormat(name, desc, code, module_name=module, 

287 encoding=encoding) 

288 fmt.extensions = normalize_patterns(ext) 

289 fmt.globs = normalize_patterns(glob) 

290 fmt.magic = normalize_patterns(magic) 

291 

292 if magic_regex is not None: 

293 fmt.magic_regex = magic_regex 

294 

295 for ext in fmt.extensions: 

296 if ext in extension2format: 

297 raise ValueError(f'extension "{ext}" already registered') 

298 extension2format[ext] = fmt 

299 

300 ioformats[name] = fmt 

301 return fmt 

302 

303 

304def get_ioformat(name: str) -> IOFormat: 

305 """Return ioformat object or raise appropriate error.""" 

306 if name not in ioformats: 

307 raise UnknownFileTypeError(name) 

308 fmt = ioformats[name] 

309 # Make sure module is importable, since this could also raise an error. 

310 fmt.module 

311 return ioformats[name] 

312 

313 

314def register_external_io_formats(group): 

315 if hasattr(entry_points(), 'select'): 

316 fmt_entry_points = entry_points().select(group=group) 

317 else: 

318 fmt_entry_points = entry_points().get(group, ()) 

319 

320 for entry_point in fmt_entry_points: 

321 try: 

322 define_external_io_format(entry_point) 

323 except Exception as exc: 

324 warnings.warn( 

325 'Failed to register external ' 

326 f'IO format {entry_point.name}: {exc}' 

327 ) 

328 

329 

330def define_external_io_format(entry_point): 

331 

332 fmt = entry_point.load() 

333 if entry_point.name in ioformats: 

334 raise ValueError(f'Format {entry_point.name} already defined') 

335 if not isinstance(fmt, ExternalIOFormat): 

336 raise TypeError('Wrong type for registering external IO formats ' 

337 f'in format {entry_point.name}, expected ' 

338 'ExternalIOFormat') 

339 F(entry_point.name, **fmt._asdict(), external=True) 

340 

341 

342# We define all the IO formats below. Each IO format has a code, 

343# such as '1F', which defines some of the format's properties: 

344# 

345# 1=single atoms object 

346# +=multiple atoms objects 

347# F=accepts a file-descriptor 

348# S=needs a file-name str 

349# B=like F, but opens in binary mode 

350 

351F = define_io_format 

352F('abinit-gsr', 'ABINIT GSR file', '1S', 

353 module='abinit', glob='*o_GSR.nc') 

354F('abinit-in', 'ABINIT input file', '1F', 

355 module='abinit', magic=b'*znucl *') 

356F('abinit-out', 'ABINIT output file', '1F', 

357 module='abinit', magic=b'*.Version * of ABINIT') 

358F('aims', 'FHI-aims geometry file', '1S', ext='in') 

359F('aims-output', 'FHI-aims output', '+S', 

360 module='aims', magic=b'*Invoking FHI-aims ...') 

361F('bundletrajectory', 'ASE bundle trajectory', '+S') 

362F('castep-castep', 'CASTEP output file', '+F', 

363 module='castep', ext='castep') 

364F('castep-cell', 'CASTEP geom file', '1F', 

365 module='castep', ext='cell') 

366F('castep-geom', 'CASTEP trajectory file', '+F', 

367 module='castep', ext='geom') 

368F('castep-md', 'CASTEP molecular dynamics file', '+F', 

369 module='castep', ext='md') 

370F('castep-phonon', 'CASTEP phonon file', '1F', 

371 module='castep', ext='phonon') 

372F('cfg', 'AtomEye configuration', '1F') 

373F('cif', 'CIF-file', '+B', ext='cif') 

374F('cmdft', 'CMDFT-file', '1F', glob='*I_info') 

375F('cjson', 'Chemical json file', '1F', ext='cjson') 

376F('cp2k-dcd', 'CP2K DCD file', '+B', 

377 module='cp2k', ext='dcd') 

378F('cp2k-restart', 'CP2K restart file', '1F', 

379 module='cp2k', ext='restart') 

380F('crystal', 'Crystal fort.34 format', '1F', 

381 ext=['f34', '34'], glob=['f34', '34']) 

382F('cube', 'CUBE file', '1F', ext='cube') 

383F('dacapo-text', 'Dacapo text output', '1F', 

384 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n') 

385F('db', 'ASE SQLite database file', '+S') 

386F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry') 

387F('dlp4', 'DL_POLY_4 CONFIG file', '1F', 

388 module='dlp4', ext='config', glob=['*CONFIG*']) 

389F('dlp-history', 'DL_POLY HISTORY file', '+F', 

390 module='dlp4', glob='HISTORY') 

391F('dmol-arc', 'DMol3 arc file', '+S', 

392 module='dmol', ext='arc') 

393F('dmol-car', 'DMol3 structure file', '1S', 

394 module='dmol', ext='car') 

395F('dmol-incoor', 'DMol3 structure file', '1S', 

396 module='dmol') 

397F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F', 

398 glob=['GEOMETRY.OUT']) 

399F('elk-in', 'ELK input file', '1F', module='elk') 

400F('eon', 'EON CON file', '+F', 

401 ext='con') 

402F('eps', 'Encapsulated Postscript', '1S') 

403F('espresso-in', 'Quantum espresso in file', '1F', 

404 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM']) 

405F('espresso-out', 'Quantum espresso out file', '+F', 

406 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF') 

407F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml') 

408F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out') 

409F('extxyz', 'Extended XYZ file', '+F', ext='xyz') 

410F('findsym', 'FINDSYM-format', '+F') 

411F('gamess-us-out', 'GAMESS-US output file', '1F', 

412 module='gamess_us', magic=b'*GAMESS') 

413F('gamess-us-in', 'GAMESS-US input file', '1F', 

414 module='gamess_us') 

415F('gamess-us-punch', 'GAMESS-US punchcard file', '1F', 

416 module='gamess_us', magic=b' $DATA', ext='dat') 

417F('gaussian-in', 'Gaussian com (input) file', '1F', 

418 module='gaussian', ext=['com', 'gjf']) 

419F('gaussian-out', 'Gaussian output file', '+F', 

420 module='gaussian', ext='log', magic=b'*Entering Gaussian System') 

421F('acemolecule-out', 'ACE output file', '1S', 

422 module='acemolecule') 

423F('acemolecule-input', 'ACE input file', '1S', 

424 module='acemolecule') 

425F('gen', 'DFTBPlus GEN format', '1F') 

426F('gif', 'Graphics interchange format', '+S', 

427 module='animation') 

428F('gpaw-out', 'GPAW text output', '+F', 

429 magic=b'* ___ ___ ___ _ _ _') 

430F('gpumd', 'GPUMD input file', '1F', glob='xyz.in') 

431F('gpw', 'GPAW restart-file', '1S', 

432 magic=[b'- of UlmGPAW', b'AFFormatGPAW']) 

433F('gromacs', 'Gromacs coordinates', '1F', 

434 ext='gro') 

435F('gromos', 'Gromos96 geometry file', '1F', ext='g96') 

436F('html', 'X3DOM HTML', '1F', module='x3d') 

437F('json', 'ASE JSON database file', '+F', ext='json', module='db') 

438F('jsv', 'JSV file format', '1F') 

439F('lammps-dump-text', 'LAMMPS text dump file', '+F', 

440 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$') 

441F('lammps-dump-binary', 'LAMMPS binary dump file', '+B', 

442 module='lammpsrun') 

443F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata', 

444 encoding='ascii') 

445F('magres', 'MAGRES ab initio NMR data file', '1F') 

446F('mol', 'MDL Molfile', '1F') 

447F('mp4', 'MP4 animation', '+S', 

448 module='animation') 

449F('mustem', 'muSTEM xtl file', '1F', 

450 ext='xtl') 

451F('mysql', 'ASE MySQL database file', '+S', 

452 module='db') 

453F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S', 

454 magic=b'CDF') 

455F('nomad-json', 'JSON from Nomad archive', '+F', 

456 ext='nomad-json') 

457F('nwchem-in', 'NWChem input file', '1F', 

458 module='nwchem', ext='nwi') 

459F('nwchem-out', 'NWChem output file', '+F', 

460 module='nwchem', ext='nwo', 

461 magic=b'*Northwest Computational Chemistry Package') 

462F('octopus-in', 'Octopus input file', '1F', 

463 module='octopus', glob='inp') 

464F('onetep-out', 'ONETEP output file', '+F', 

465 module='onetep', 

466 magic=b'*Linear-Scaling Ab Initio Total Energy Program*') 

467F('onetep-in', 'ONETEP input file', '1F', 

468 module='onetep', 

469 magic=[b'*lock species ', 

470 b'*LOCK SPECIES ', 

471 b'*--- INPUT FILE ---*']) 

472F('proteindatabank', 'Protein Data Bank', '+F', 

473 ext='pdb') 

474F('png', 'Portable Network Graphics', '1B') 

475F('postgresql', 'ASE PostgreSQL database file', '+S', module='db') 

476F('pov', 'Persistance of Vision', '1S') 

477# prismatic: Should have ext='xyz' if/when multiple formats can have the same 

478# extension 

479F('prismatic', 'prismatic and computem XYZ-file', '1F') 

480F('py', 'Python file', '+F') 

481F('sys', 'qball sys file', '1F') 

482F('qbox', 'QBOX output file', '+F', 

483 magic=b'*:simulation xmlns:') 

484F('res', 'SHELX format', '1S', ext='shelx') 

485F('rmc6f', 'RMCProfile', '1S', ext='rmc6f') 

486F('sdf', 'SDF format', '1F') 

487F('siesta-xv', 'Siesta .XV file', '1F', 

488 glob='*.XV', module='siesta') 

489F('struct', 'WIEN2k structure file', '1S', module='wien2k') 

490F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta') 

491F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj', 

492 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory']) 

493F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord', 

494 magic=b'$coord') 

495F('turbomole-gradient', 'TURBOMOLE gradient file', '+F', 

496 module='turbomole', glob='gradient', magic=b'$grad') 

497F('v-sim', 'V_Sim ascii file', '1F', ext='ascii') 

498F('vasp', 'VASP POSCAR/CONTCAR', '1F', 

499 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*']) 

500F('vasp-out', 'VASP OUTCAR file', '+F', 

501 module='vasp', glob='*OUTCAR*') 

502F('vasp-xdatcar', 'VASP XDATCAR file', '+F', 

503 module='vasp', glob='*XDATCAR*') 

504F('vasp-xml', 'VASP vasprun.xml file', '+F', 

505 module='vasp', glob='*vasp*.xml') 

506F('vti', 'VTK XML Image Data', '1F', module='vtkxml') 

507F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu') 

508F('wout', 'Wannier90 output', '1F', module='wannier90') 

509F('x3d', 'X3D', '1S') 

510F('xsd', 'Materials Studio file', '1F') 

511F('xsf', 'XCrySDen Structure File', '+F', 

512 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER', 

513 b'*\nMOLECULE', b'*\nATOMS']) 

514F('xtd', 'Materials Studio file', '+F') 

515# xyz: No `ext='xyz'` in the definition below. 

516# The .xyz files are handled by the extxyz module by default. 

517F('xyz', 'XYZ-file', '+F') 

518 

519# Register IO formats exposed through the ase.ioformats entry point 

520register_external_io_formats('ase.ioformats') 

521 

522 

523def get_compression(filename: str) -> Tuple[str, Optional[str]]: 

524 """ 

525 Parse any expected file compression from the extension of a filename. 

526 Return the filename without the extension, and the extension. Recognises 

527 ``.gz``, ``.bz2``, ``.xz``. 

528 

529 >>> get_compression('H2O.pdb.gz') 

530 ('H2O.pdb', 'gz') 

531 >>> get_compression('crystal.cif') 

532 ('crystal.cif', None) 

533 

534 Parameters 

535 ========== 

536 filename: str 

537 Full filename including extension. 

538 

539 Returns 

540 ======= 

541 (root, extension): (str, str or None) 

542 Filename split into root without extension, and the extension 

543 indicating compression format. Will not split if compression 

544 is not recognised. 

545 """ 

546 # Update if anything is added 

547 valid_compression = ['gz', 'bz2', 'xz'] 

548 

549 # Use stdlib as it handles most edge cases 

550 root, compression = os.path.splitext(filename) 

551 

552 # extension keeps the '.' so remember to remove it 

553 if compression.strip('.') in valid_compression: 

554 return root, compression.strip('.') 

555 else: 

556 return filename, None 

557 

558 

559def open_with_compression(filename: str, mode: str = 'r') -> IO: 

560 """ 

561 Wrapper around builtin `open` that will guess compression of a file 

562 from the filename and open it for reading or writing as if it were 

563 a standard file. 

564 

565 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). 

566 

567 Supported modes are: 

568 * 'r', 'rt', 'w', 'wt' for text mode read and write. 

569 * 'rb, 'wb' for binary read and write. 

570 

571 Parameters 

572 ========== 

573 filename: str 

574 Path to the file to open, including any extensions that indicate 

575 the compression used. 

576 mode: str 

577 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. 

578 

579 Returns 

580 ======= 

581 fd: file 

582 File-like object open with the specified mode. 

583 """ 

584 

585 # Compressed formats sometimes default to binary, so force text mode. 

586 if mode == 'r': 

587 mode = 'rt' 

588 elif mode == 'w': 

589 mode = 'wt' 

590 elif mode == 'a': 

591 mode = 'at' 

592 

593 _root, compression = get_compression(filename) 

594 

595 if compression == 'gz': 

596 import gzip 

597 return gzip.open(filename, mode=mode) # type: ignore[return-value] 

598 elif compression == 'bz2': 

599 import bz2 

600 return bz2.open(filename, mode=mode) 

601 elif compression == 'xz': 

602 import lzma 

603 return lzma.open(filename, mode) 

604 else: 

605 # Either None or unknown string 

606 return open(filename, mode) 

607 

608 

609def is_compressed(fd: io.BufferedIOBase) -> bool: 

610 """Check if the file object is in a compressed format.""" 

611 compressed = False 

612 

613 # We'd like to avoid triggering imports unless already imported. 

614 # Also, Python can be compiled without e.g. lzma so we need to 

615 # protect against that: 

616 if 'gzip' in sys.modules: 

617 import gzip 

618 compressed = compressed or isinstance(fd, gzip.GzipFile) 

619 if 'bz2' in sys.modules: 

620 import bz2 

621 compressed = compressed or isinstance(fd, bz2.BZ2File) 

622 if 'lzma' in sys.modules: 

623 import lzma 

624 compressed = compressed or isinstance(fd, lzma.LZMAFile) 

625 return compressed 

626 

627 

628def wrap_read_function(read, filename, index=None, **kwargs): 

629 """Convert read-function to generator.""" 

630 if index is None: 

631 yield read(filename, **kwargs) 

632 else: 

633 yield from read(filename, index, **kwargs) 

634 

635 

636NameOrFile = Union[str, PurePath, IO] 

637 

638 

639def write( 

640 filename: NameOrFile, 

641 images: Union[Atoms, Sequence[Atoms]], 

642 format: str = None, 

643 parallel: bool = True, 

644 append: bool = False, 

645 **kwargs: Any 

646) -> None: 

647 """Write Atoms object(s) to file. 

648 

649 filename: str or file 

650 Name of the file to write to or a file descriptor. The name '-' 

651 means standard output. 

652 images: Atoms object or list of Atoms objects 

653 A single Atoms object or a list of Atoms objects. 

654 format: str 

655 Used to specify the file-format. If not given, the 

656 file-format will be taken from suffix of the filename. 

657 parallel: bool 

658 Default is to write on master only. Use parallel=False to write 

659 from all slaves. 

660 append: bool 

661 Default is to open files in 'w' or 'wb' mode, overwriting 

662 existing files. In some cases opening the file in 'a' or 'ab' 

663 mode (appending) is useful, 

664 e.g. writing trajectories or saving multiple Atoms objects in one file. 

665 WARNING: If the file format does not support multiple entries without 

666 additional keywords/headers, files created using 'append=True' 

667 might not be readable by any program! They will nevertheless be 

668 written without error message. 

669 

670 The use of additional keywords is format specific. write() may 

671 return an object after writing certain formats, but this behaviour 

672 may change in the future. 

673 

674 """ 

675 

676 if isinstance(filename, PurePath): 

677 filename = str(filename) 

678 

679 if isinstance(filename, str): 

680 fd = None 

681 if filename == '-': 

682 fd = sys.stdout 

683 filename = None # type: ignore[assignment] 

684 elif format is None: 

685 format = filetype(filename, read=False) 

686 assert isinstance(format, str) 

687 else: 

688 fd = filename # type: ignore[assignment] 

689 if format is None: 

690 try: 

691 format = filetype(filename, read=False) 

692 assert isinstance(format, str) 

693 except UnknownFileTypeError: 

694 format = None 

695 filename = None # type: ignore[assignment] 

696 

697 format = format or 'json' # default is json 

698 

699 io = get_ioformat(format) 

700 

701 return _write(filename, fd, format, io, images, 

702 parallel=parallel, append=append, **kwargs) 

703 

704 

705@parallel_function 

706def _write(filename, fd, format, io, images, parallel=None, append=False, 

707 **kwargs): 

708 if isinstance(images, Atoms): 

709 images = [images] 

710 

711 if io.single: 

712 if len(images) > 1: 

713 raise ValueError('{}-format can only store 1 Atoms object.' 

714 .format(format)) 

715 images = images[0] 

716 

717 if not io.can_write: 

718 raise ValueError(f"Can't write to {format}-format") 

719 

720 # Special case for json-format: 

721 if format == 'json' and (len(images) > 1 or append): 

722 if filename is not None: 

723 return io.write(filename, images, append=append, **kwargs) 

724 raise ValueError("Can't write more than one image to file-descriptor " 

725 'using json-format.') 

726 

727 if io.acceptsfd: 

728 open_new = (fd is None) 

729 try: 

730 if open_new: 

731 mode = 'wb' if io.isbinary else 'w' 

732 if append: 

733 mode = mode.replace('w', 'a') 

734 fd = open_with_compression(filename, mode) 

735 # XXX remember to re-enable compressed open 

736 # fd = io.open(filename, mode) 

737 return io.write(fd, images, **kwargs) 

738 finally: 

739 if open_new and fd is not None: 

740 fd.close() 

741 else: 

742 if fd is not None: 

743 raise ValueError("Can't write {}-format to file-descriptor" 

744 .format(format)) 

745 if io.can_append: 

746 return io.write(filename, images, append=append, **kwargs) 

747 elif append: 

748 raise ValueError("Cannot append to {}-format, write-function " 

749 "does not support the append keyword." 

750 .format(format)) 

751 else: 

752 return io.write(filename, images, **kwargs) 

753 

754 

755def read( 

756 filename: NameOrFile, 

757 index: Any = None, 

758 format: Optional[str] = None, 

759 parallel: bool = True, 

760 do_not_split_by_at_sign: bool = False, 

761 **kwargs 

762) -> Union[Atoms, List[Atoms]]: 

763 """Read Atoms object(s) from file. 

764 

765 filename: str or file 

766 Name of the file to read from or a file descriptor. 

767 index: int, slice or str 

768 The last configuration will be returned by default. Examples: 

769 

770 * ``index=0``: first configuration 

771 * ``index=-2``: second to last 

772 * ``index=':'`` or ``index=slice(None)``: all 

773 * ``index='-3:'`` or ``index=slice(-3, None)``: three last 

774 * ``index='::2'`` or ``index=slice(0, None, 2)``: even 

775 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd 

776 format: str 

777 Used to specify the file-format. If not given, the 

778 file-format will be guessed by the *filetype* function. 

779 parallel: bool 

780 Default is to read on master and broadcast to slaves. Use 

781 parallel=False to read on all slaves. 

782 do_not_split_by_at_sign: bool 

783 If False (default) ``filename`` is splitted by at sign ``@`` 

784 

785 Many formats allow on open file-like object to be passed instead 

786 of ``filename``. In this case the format cannot be auto-detected, 

787 so the ``format`` argument should be explicitly given.""" 

788 

789 if isinstance(filename, PurePath): 

790 filename = str(filename) 

791 if filename == '-': 

792 filename = sys.stdin 

793 if isinstance(index, str): 

794 try: 

795 index = string2index(index) 

796 except ValueError: 

797 pass 

798 

799 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

800 if index is None: 

801 index = -1 

802 format = format or filetype(filename, read=isinstance(filename, str)) 

803 

804 io = get_ioformat(format) 

805 if isinstance(index, (slice, str)): 

806 return list(_iread(filename, index, format, io, parallel=parallel, 

807 **kwargs)) 

808 else: 

809 return next(_iread(filename, slice(index, None), format, io, 

810 parallel=parallel, **kwargs)) 

811 

812 

813def iread( 

814 filename: NameOrFile, 

815 index: Any = None, 

816 format: str = None, 

817 parallel: bool = True, 

818 do_not_split_by_at_sign: bool = False, 

819 **kwargs 

820) -> Iterable[Atoms]: 

821 """Iterator for reading Atoms objects from file. 

822 

823 Works as the `read` function, but yields one Atoms object at a time 

824 instead of all at once.""" 

825 

826 if isinstance(filename, PurePath): 

827 filename = str(filename) 

828 

829 if isinstance(index, str): 

830 index = string2index(index) 

831 

832 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

833 

834 if index is None or index == ':': 

835 index = slice(None, None, None) 

836 

837 if not isinstance(index, (slice, str)): 

838 index = slice(index, (index + 1) or None) 

839 

840 format = format or filetype(filename, read=isinstance(filename, str)) 

841 io = get_ioformat(format) 

842 

843 yield from _iread(filename, index, format, io, parallel=parallel, 

844 **kwargs) 

845 

846 

847@parallel_generator 

848def _iread(filename, index, format, io, parallel=None, full_output=False, 

849 **kwargs): 

850 

851 if not io.can_read: 

852 raise ValueError(f"Can't read from {format}-format") 

853 

854 if io.single: 

855 start = index.start 

856 assert start is None or start == 0 or start == -1 

857 args = () 

858 else: 

859 args = (index,) 

860 

861 must_close_fd = False 

862 if isinstance(filename, str): 

863 if io.acceptsfd: 

864 mode = 'rb' if io.isbinary else 'r' 

865 fd = open_with_compression(filename, mode) 

866 must_close_fd = True 

867 else: 

868 fd = filename 

869 else: 

870 assert io.acceptsfd 

871 fd = filename 

872 

873 # Make sure fd is closed in case loop doesn't finish: 

874 try: 

875 for dct in io.read(fd, *args, **kwargs): 

876 if not isinstance(dct, dict): 

877 dct = {'atoms': dct} 

878 if full_output: 

879 yield dct 

880 else: 

881 yield dct['atoms'] 

882 finally: 

883 if must_close_fd: 

884 fd.close() 

885 

886 

887def parse_filename(filename, index=None, do_not_split_by_at_sign=False): 

888 if not isinstance(filename, str): 

889 return filename, index 

890 

891 basename = os.path.basename(filename) 

892 if do_not_split_by_at_sign or '@' not in basename: 

893 return filename, index 

894 

895 newindex = None 

896 newfilename, newindex = filename.rsplit('@', 1) 

897 

898 if isinstance(index, slice): 

899 return newfilename, index 

900 try: 

901 newindex = string2index(newindex) 

902 except ValueError: 

903 warnings.warn('Can not parse index for path \n' 

904 ' "%s" \nConsider set ' 

905 'do_not_split_by_at_sign=True \nif ' 

906 'there is no index.' % filename) 

907 return newfilename, newindex 

908 

909 

910def match_magic(data: bytes) -> IOFormat: 

911 data = data[:PEEK_BYTES] 

912 for ioformat in ioformats.values(): 

913 if ioformat.match_magic(data): 

914 return ioformat 

915 raise UnknownFileTypeError('Cannot guess file type from contents') 

916 

917 

918def filetype( 

919 filename: NameOrFile, 

920 read: bool = True, 

921 guess: bool = True, 

922) -> str: 

923 """Try to guess the type of the file. 

924 

925 First, special signatures in the filename will be checked for. If that 

926 does not identify the file type, then the first 2000 bytes of the file 

927 will be read and analysed. Turn off this second part by using 

928 read=False. 

929 

930 Can be used from the command-line also:: 

931 

932 $ ase info filename ... 

933 """ 

934 

935 orig_filename = filename 

936 if hasattr(filename, 'name'): 

937 filename = filename.name 

938 

939 ext = None 

940 if isinstance(filename, str): 

941 if os.path.isdir(filename): 

942 if os.path.basename(os.path.normpath(filename)) == 'states': 

943 return 'eon' 

944 return 'bundletrajectory' 

945 

946 if filename.startswith('postgres'): 

947 return 'postgresql' 

948 

949 if filename.startswith('mysql') or filename.startswith('mariadb'): 

950 return 'mysql' 

951 

952 # strip any compression extensions that can be read 

953 root, _compression = get_compression(filename) 

954 basename = os.path.basename(root) 

955 

956 if '.' in basename: 

957 ext = os.path.splitext(basename)[1].strip('.').lower() 

958 

959 for fmt in ioformats.values(): 

960 if fmt.match_name(basename): 

961 return fmt.name 

962 

963 if not read: 

964 if ext is None: 

965 raise UnknownFileTypeError('Could not guess file type') 

966 ioformat = extension2format.get(ext) 

967 if ioformat: 

968 return ioformat.name 

969 

970 # askhl: This is strange, we don't know if ext is a format: 

971 return ext 

972 

973 if orig_filename == filename: 

974 fd = open_with_compression(filename, 'rb') 

975 else: 

976 fd = orig_filename # type: ignore[assignment] 

977 else: 

978 fd = filename 

979 if fd is sys.stdin: 

980 return 'json' 

981 

982 data = fd.read(PEEK_BYTES) 

983 if fd is not filename: 

984 fd.close() 

985 else: 

986 fd.seek(0) 

987 

988 if len(data) == 0: 

989 raise UnknownFileTypeError('Empty file: ' + filename) 

990 

991 try: 

992 return match_magic(data).name 

993 except UnknownFileTypeError: 

994 pass 

995 

996 format = None 

997 if ext in extension2format: 

998 format = extension2format[ext].name 

999 

1000 if format is None and guess: 

1001 format = ext 

1002 if format is None: 

1003 # Do quick xyz check: 

1004 lines = data.splitlines() 

1005 if lines and lines[0].strip().isdigit(): 

1006 return extension2format['xyz'].name 

1007 

1008 raise UnknownFileTypeError('Could not guess file type') 

1009 assert isinstance(format, str) 

1010 return format 

1011 

1012 

1013def index2range(index, length): 

1014 """Convert slice or integer to range. 

1015 

1016 If index is an integer, range will contain only that integer.""" 

1017 obj = range(length)[index] 

1018 if isinstance(obj, numbers.Integral): 

1019 obj = range(obj, obj + 1) 

1020 return obj