Coverage for /builds/debichem-team/python-ase/ase/io/formats.py: 89.52%

1"""File formats.

3This module implements the read(), iread() and write() functions in ase.io.

4For each file format there is an IOFormat object.

6There is a dict, ioformats, which stores the objects.

8Example

9=======

11The xyz format is implemented in the ase/io/xyz.py file which has a

12read_xyz() generator and a write_xyz() function. This and other

13information can be obtained from ioformats['xyz'].

14"""

16import functools

17import inspect

18import io

19import numbers

20import os

21import re

22import sys

23import warnings

24from importlib import import_module

25from importlib.metadata import entry_points

26from pathlib import Path, PurePath

27from typing import (

28 IO,

29 Any,

30 Dict,

31 Iterable,

32 List,

33 Optional,

34 Sequence,

35 Tuple,

36 Union,

37)

39from ase.atoms import Atoms

40from ase.parallel import parallel_function, parallel_generator

41from ase.utils import string2index

42from ase.utils.plugins import ExternalIOFormat

44PEEK_BYTES = 50000

47class UnknownFileTypeError(Exception):

48 pass

51class IOFormat:

52 def __init__(self, name: str, desc: str, code: str, module_name: str,

53 encoding: str = None) -> None:

54 self.name = name

55 self.description = desc

56 assert len(code) == 2

57 assert code[0] in list('+1')

58 assert code[1] in list('BFS')

59 self.code = code

60 self.module_name = module_name

61 self.encoding = encoding

63 # (To be set by define_io_format())

64 self.extensions: List[str] = []

65 self.globs: List[str] = []

66 self.magic: List[str] = []

67 self.magic_regex: Optional[bytes] = None

69 def open(self, fname, mode: str = 'r') -> IO:

70 # We might want append mode, too

71 # We can allow more flags as needed (buffering etc.)

72 if mode not in list('rwa'):

73 raise ValueError("Only modes allowed are 'r', 'w', and 'a'")

74 if mode == 'r' and not self.can_read:

75 raise NotImplementedError('No reader implemented for {} format'

76 .format(self.name))

77 if mode == 'w' and not self.can_write:

78 raise NotImplementedError('No writer implemented for {} format'

79 .format(self.name))

80 if mode == 'a' and not self.can_append:

81 raise NotImplementedError('Appending not supported by {} format'

82 .format(self.name))

84 if self.isbinary:

85 mode += 'b'

87 path = Path(fname)

88 return path.open(mode, encoding=self.encoding)

90 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:

91 encoding = self.encoding

92 if encoding is None:

93 encoding = 'utf-8' # Best hacky guess.

95 if self.isbinary:

96 if isinstance(data, str):

97 data = data.encode(encoding)

98 else:

99 if isinstance(data, bytes):

100 data = data.decode(encoding)

101

102 return self._ioclass(data)

103

104 @property

105 def _ioclass(self):

106 if self.isbinary:

107 return io.BytesIO

108 else:

109 return io.StringIO

110

111 def parse_images(self, data: Union[str, bytes],

112 **kwargs) -> Sequence[Atoms]:

113 with self._buf_as_filelike(data) as fd:

114 outputs = self.read(fd, **kwargs)

115 if self.single:

116 assert isinstance(outputs, Atoms)

117 return [outputs]

118 else:

119 return list(self.read(fd, **kwargs))

120

121 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:

122 images = self.parse_images(data, **kwargs)

123 return images[-1]

124

125 @property

126 def can_read(self) -> bool:

127 return self._readfunc() is not None

128

129 @property

130 def can_write(self) -> bool:

131 return self._writefunc() is not None

132

133 @property

134 def can_append(self) -> bool:

135 writefunc = self._writefunc()

136 return self.can_write and 'append' in writefunc.__code__.co_varnames

137

138 def __repr__(self) -> str:

139 tokens = [f'{name}={value!r}'

140 for name, value in vars(self).items()]

141 return 'IOFormat({})'.format(', '.join(tokens))

142

143 def __getitem__(self, i):

144 # For compatibility.

145 #

146 # Historically, the ioformats were listed as tuples

147 # with (description, code). We look like such a tuple.

148 return (self.description, self.code)[i]

149

150 @property

151 def single(self) -> bool:

152 """Whether this format is for a single Atoms object."""

153 return self.code[0] == '1'

154

155 @property

156 def _formatname(self) -> str:

157 return self.name.replace('-', '_')

158

159 def _readfunc(self):

160 return getattr(self.module, 'read_' + self._formatname, None)

161

162 def _writefunc(self):

163 return getattr(self.module, 'write_' + self._formatname, None)

164

165 @property

166 def read(self):

167 if not self.can_read:

168 self._warn_none('read')

169 return None

170

171 return self._read_wrapper

172

173 def _read_wrapper(self, *args, **kwargs):

174 function = self._readfunc()

175 if function is None:

176 self._warn_none('read')

177 return None

178 if not inspect.isgeneratorfunction(function):

179 function = functools.partial(wrap_read_function, function)

180 return function(*args, **kwargs)

181

182 def _warn_none(self, action):

183 msg = ('Accessing the IOFormat.{action} property on a format '

184 'without {action} support will change behaviour in the '

185 'future and return a callable instead of None. '

186 'Use IOFormat.can_{action} to check whether {action} '

187 'is supported.')

188 warnings.warn(msg.format(action=action), FutureWarning)

189

190 @property

191 def write(self):

192 if not self.can_write:

193 self._warn_none('write')

194 return None

195

196 return self._write_wrapper

197

198 def _write_wrapper(self, *args, **kwargs):

199 function = self._writefunc()

200 if function is None:

201 raise ValueError(f'Cannot write to {self.name}-format')

202 return function(*args, **kwargs)

203

204 @property

205 def modes(self) -> str:

206 modes = ''

207 if self.can_read:

208 modes += 'r'

209 if self.can_write:

210 modes += 'w'

211 return modes

212

213 def full_description(self) -> str:

214 lines = [f'Name: {self.name}',

215 f'Description: {self.description}',

216 f'Modes: {self.modes}',

217 f'Encoding: {self.encoding}',

218 f'Module: {self.module_name}',

219 f'Code: {self.code}',

220 f'Extensions: {self.extensions}',

221 f'Globs: {self.globs}',

222 f'Magic: {self.magic}']

223 return '\n'.join(lines)

224

225 @property

226 def acceptsfd(self) -> bool:

227 return self.code[1] != 'S'

228

229 @property

230 def isbinary(self) -> bool:

231 return self.code[1] == 'B'

232

233 @property

234 def module(self):

235 try:

236 return import_module(self.module_name)

237 except ImportError as err:

238 raise UnknownFileTypeError(

239 f'File format not recognized: {self.name}. Error: {err}')

240

241 def match_name(self, basename: str) -> bool:

242 from fnmatch import fnmatch

243 return any(fnmatch(basename, pattern)

244 for pattern in self.globs)

245

246 def match_magic(self, data: bytes) -> bool:

247 if self.magic_regex:

248 assert not self.magic, 'Define only one of magic and magic_regex'

249 match = re.match(self.magic_regex, data, re.M | re.S)

250 return match is not None

251

252 from fnmatch import fnmatchcase

253 return any(

254 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]

255 for magic in self.magic

256 )

257

258

259ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.

260extension2format = {}

261

262

263all_formats = ioformats # Aliased for compatibility only. Please do not use.

264format2modulename = {} # Left for compatibility only.

265

266

267def define_io_format(name, desc, code, *, module=None, ext=None,

268 glob=None, magic=None, encoding=None,

269 magic_regex=None, external=False):

270 if module is None:

271 module = name.replace('-', '_')

272 format2modulename[name] = module

273

274 if not external:

275 module = 'ase.io.' + module

276

277 def normalize_patterns(strings):

278 if strings is None:

279 strings = []

280 elif isinstance(strings, (str, bytes)):

281 strings = [strings]

282 else:

283 strings = list(strings)

284 return strings

285

286 fmt = IOFormat(name, desc, code, module_name=module,

287 encoding=encoding)

288 fmt.extensions = normalize_patterns(ext)

289 fmt.globs = normalize_patterns(glob)

290 fmt.magic = normalize_patterns(magic)

291

292 if magic_regex is not None:

293 fmt.magic_regex = magic_regex

294

295 for ext in fmt.extensions:

296 if ext in extension2format:

297 raise ValueError(f'extension "{ext}" already registered')

298 extension2format[ext] = fmt

299

300 ioformats[name] = fmt

301 return fmt

302

303

304def get_ioformat(name: str) -> IOFormat:

305 """Return ioformat object or raise appropriate error."""

306 if name not in ioformats:

307 raise UnknownFileTypeError(name)

308 fmt = ioformats[name]

309 # Make sure module is importable, since this could also raise an error.

310 fmt.module

311 return ioformats[name]

312

313

314def register_external_io_formats(group):

315 if hasattr(entry_points(), 'select'):

316 fmt_entry_points = entry_points().select(group=group)

317 else:

318 fmt_entry_points = entry_points().get(group, ())

319

320 for entry_point in fmt_entry_points:

321 try:

322 define_external_io_format(entry_point)

323 except Exception as exc:

324 warnings.warn(

325 'Failed to register external '

326 f'IO format {entry_point.name}: {exc}'

327 )

328

329

330def define_external_io_format(entry_point):

331

332 fmt = entry_point.load()

333 if entry_point.name in ioformats:

334 raise ValueError(f'Format {entry_point.name} already defined')

335 if not isinstance(fmt, ExternalIOFormat):

336 raise TypeError('Wrong type for registering external IO formats '

337 f'in format {entry_point.name}, expected '

338 'ExternalIOFormat')

339 F(entry_point.name, **fmt._asdict(), external=True)

340

341

342# We define all the IO formats below. Each IO format has a code,

343# such as '1F', which defines some of the format's properties:

344#

345# 1=single atoms object

346# +=multiple atoms objects

347# F=accepts a file-descriptor

348# S=needs a file-name str

349# B=like F, but opens in binary mode

350

351F = define_io_format

352F('abinit-gsr', 'ABINIT GSR file', '1S',

353 module='abinit', glob='*o_GSR.nc')

354F('abinit-in', 'ABINIT input file', '1F',

355 module='abinit', magic=b'*znucl *')

356F('abinit-out', 'ABINIT output file', '1F',

357 module='abinit', magic=b'*.Version * of ABINIT')

358F('aims', 'FHI-aims geometry file', '1S', ext='in')

359F('aims-output', 'FHI-aims output', '+S',

360 module='aims', magic=b'*Invoking FHI-aims ...')

361F('bundletrajectory', 'ASE bundle trajectory', '+S')

362F('castep-castep', 'CASTEP output file', '+F',

363 module='castep', ext='castep')

364F('castep-cell', 'CASTEP geom file', '1F',

365 module='castep', ext='cell')

366F('castep-geom', 'CASTEP trajectory file', '+F',

367 module='castep', ext='geom')

368F('castep-md', 'CASTEP molecular dynamics file', '+F',

369 module='castep', ext='md')

370F('castep-phonon', 'CASTEP phonon file', '1F',

371 module='castep', ext='phonon')

372F('cfg', 'AtomEye configuration', '1F')

373F('cif', 'CIF-file', '+B', ext='cif')

374F('cmdft', 'CMDFT-file', '1F', glob='*I_info')

375F('cjson', 'Chemical json file', '1F', ext='cjson')

376F('cp2k-dcd', 'CP2K DCD file', '+B',

377 module='cp2k', ext='dcd')

378F('cp2k-restart', 'CP2K restart file', '1F',

379 module='cp2k', ext='restart')

380F('crystal', 'Crystal fort.34 format', '1F',

381 ext=['f34', '34'], glob=['f34', '34'])

382F('cube', 'CUBE file', '1F', ext='cube')

383F('dacapo-text', 'Dacapo text output', '1F',

384 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')

385F('db', 'ASE SQLite database file', '+S')

386F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')

387F('dlp4', 'DL_POLY_4 CONFIG file', '1F',

388 module='dlp4', ext='config', glob=['*CONFIG*'])

389F('dlp-history', 'DL_POLY HISTORY file', '+F',

390 module='dlp4', glob='HISTORY')

391F('dmol-arc', 'DMol3 arc file', '+S',

392 module='dmol', ext='arc')

393F('dmol-car', 'DMol3 structure file', '1S',

394 module='dmol', ext='car')

395F('dmol-incoor', 'DMol3 structure file', '1S',

396 module='dmol')

397F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',

398 glob=['GEOMETRY.OUT'])

399F('elk-in', 'ELK input file', '1F', module='elk')

400F('eon', 'EON CON file', '+F',

401 ext='con')

402F('eps', 'Encapsulated Postscript', '1S')

403F('espresso-in', 'Quantum espresso in file', '1F',

404 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])

405F('espresso-out', 'Quantum espresso out file', '+F',

406 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')

407F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')

408F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')

409F('extxyz', 'Extended XYZ file', '+F', ext='xyz')

410F('findsym', 'FINDSYM-format', '+F')

411F('gamess-us-out', 'GAMESS-US output file', '1F',

412 module='gamess_us', magic=b'*GAMESS')

413F('gamess-us-in', 'GAMESS-US input file', '1F',

414 module='gamess_us')

415F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',

416 module='gamess_us', magic=b' $DATA', ext='dat')

417F('gaussian-in', 'Gaussian com (input) file', '1F',

418 module='gaussian', ext=['com', 'gjf'])

419F('gaussian-out', 'Gaussian output file', '+F',

420 module='gaussian', ext='log', magic=b'*Entering Gaussian System')

421F('acemolecule-out', 'ACE output file', '1S',

422 module='acemolecule')

423F('acemolecule-input', 'ACE input file', '1S',

424 module='acemolecule')

425F('gen', 'DFTBPlus GEN format', '1F')

426F('gif', 'Graphics interchange format', '+S',

427 module='animation')

428F('gpaw-out', 'GPAW text output', '+F',

429 magic=b'* ___ ___ ___ _ _ _')

430F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')

431F('gpw', 'GPAW restart-file', '1S',

432 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])

433F('gromacs', 'Gromacs coordinates', '1F',

434 ext='gro')

435F('gromos', 'Gromos96 geometry file', '1F', ext='g96')

436F('html', 'X3DOM HTML', '1F', module='x3d')

437F('json', 'ASE JSON database file', '+F', ext='json', module='db')

438F('jsv', 'JSV file format', '1F')

439F('lammps-dump-text', 'LAMMPS text dump file', '+F',

440 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')

441F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',

442 module='lammpsrun')

443F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',

444 encoding='ascii')

445F('magres', 'MAGRES ab initio NMR data file', '1F')

446F('mol', 'MDL Molfile', '1F')

447F('mp4', 'MP4 animation', '+S',

448 module='animation')

449F('mustem', 'muSTEM xtl file', '1F',

450 ext='xtl')

451F('mysql', 'ASE MySQL database file', '+S',

452 module='db')

453F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',

454 magic=b'CDF')

455F('nomad-json', 'JSON from Nomad archive', '+F',

456 ext='nomad-json')

457F('nwchem-in', 'NWChem input file', '1F',

458 module='nwchem', ext='nwi')

459F('nwchem-out', 'NWChem output file', '+F',

460 module='nwchem', ext='nwo',

461 magic=b'*Northwest Computational Chemistry Package')

462F('octopus-in', 'Octopus input file', '1F',

463 module='octopus', glob='inp')

464F('onetep-out', 'ONETEP output file', '+F',

465 module='onetep',

466 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')

467F('onetep-in', 'ONETEP input file', '1F',

468 module='onetep',

469 magic=[b'*lock species ',

470 b'*LOCK SPECIES ',

471 b'*--- INPUT FILE ---*'])

472F('proteindatabank', 'Protein Data Bank', '+F',

473 ext='pdb')

474F('png', 'Portable Network Graphics', '1B')

475F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')

476F('pov', 'Persistance of Vision', '1S')

477# prismatic: Should have ext='xyz' if/when multiple formats can have the same

478# extension

479F('prismatic', 'prismatic and computem XYZ-file', '1F')

480F('py', 'Python file', '+F')

481F('sys', 'qball sys file', '1F')

482F('qbox', 'QBOX output file', '+F',

483 magic=b'*:simulation xmlns:')

484F('res', 'SHELX format', '1S', ext='shelx')

485F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')

486F('sdf', 'SDF format', '1F')

487F('siesta-xv', 'Siesta .XV file', '1F',

488 glob='*.XV', module='siesta')

489F('struct', 'WIEN2k structure file', '1S', module='wien2k')

490F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')

491F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',

492 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])

493F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',

494 magic=b'$coord')

495F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',

496 module='turbomole', glob='gradient', magic=b'$grad')

497F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')

498F('vasp', 'VASP POSCAR/CONTCAR', '1F',

499 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])

500F('vasp-out', 'VASP OUTCAR file', '+F',

501 module='vasp', glob='*OUTCAR*')

502F('vasp-xdatcar', 'VASP XDATCAR file', '+F',

503 module='vasp', glob='*XDATCAR*')

504F('vasp-xml', 'VASP vasprun.xml file', '+F',

505 module='vasp', glob='*vasp*.xml')

506F('vti', 'VTK XML Image Data', '1F', module='vtkxml')

507F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')

508F('wout', 'Wannier90 output', '1F', module='wannier90')

509F('x3d', 'X3D', '1S')

510F('xsd', 'Materials Studio file', '1F')

511F('xsf', 'XCrySDen Structure File', '+F',

512 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',

513 b'*\nMOLECULE', b'*\nATOMS'])

514F('xtd', 'Materials Studio file', '+F')

515# xyz: No `ext='xyz'` in the definition below.

516# The .xyz files are handled by the extxyz module by default.

517F('xyz', 'XYZ-file', '+F')

518

519# Register IO formats exposed through the ase.ioformats entry point

520register_external_io_formats('ase.ioformats')

521

522

523def get_compression(filename: str) -> Tuple[str, Optional[str]]:

524 """

525 Parse any expected file compression from the extension of a filename.

526 Return the filename without the extension, and the extension. Recognises

527 ``.gz``, ``.bz2``, ``.xz``.

528

529 >>> get_compression('H2O.pdb.gz')

530 ('H2O.pdb', 'gz')

531 >>> get_compression('crystal.cif')

532 ('crystal.cif', None)

533

534 Parameters

535 ==========

536 filename: str

537 Full filename including extension.

538

539 Returns

540 =======

541 (root, extension): (str, str or None)

542 Filename split into root without extension, and the extension

543 indicating compression format. Will not split if compression

544 is not recognised.

545 """

546 # Update if anything is added

547 valid_compression = ['gz', 'bz2', 'xz']

548

549 # Use stdlib as it handles most edge cases

550 root, compression = os.path.splitext(filename)

551

552 # extension keeps the '.' so remember to remove it

553 if compression.strip('.') in valid_compression:

554 return root, compression.strip('.')

555 else:

556 return filename, None

557

558

559def open_with_compression(filename: str, mode: str = 'r') -> IO:

560 """

561 Wrapper around builtin `open` that will guess compression of a file

562 from the filename and open it for reading or writing as if it were

563 a standard file.

564

565 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).

566

567 Supported modes are:

568 * 'r', 'rt', 'w', 'wt' for text mode read and write.

569 * 'rb, 'wb' for binary read and write.

570

571 Parameters

572 ==========

573 filename: str

574 Path to the file to open, including any extensions that indicate

575 the compression used.

576 mode: str

577 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.

578

579 Returns

580 =======

581 fd: file

582 File-like object open with the specified mode.

583 """

584

585 # Compressed formats sometimes default to binary, so force text mode.

586 if mode == 'r':

587 mode = 'rt'

588 elif mode == 'w':

589 mode = 'wt'

590 elif mode == 'a':

591 mode = 'at'

592

593 _root, compression = get_compression(filename)

594

595 if compression == 'gz':

596 import gzip

597 return gzip.open(filename, mode=mode) # type: ignore[return-value]

598 elif compression == 'bz2':

599 import bz2

600 return bz2.open(filename, mode=mode)

601 elif compression == 'xz':

602 import lzma

603 return lzma.open(filename, mode)

604 else:

605 # Either None or unknown string

606 return open(filename, mode)

607

608

609def is_compressed(fd: io.BufferedIOBase) -> bool:

610 """Check if the file object is in a compressed format."""

611 compressed = False

612

613 # We'd like to avoid triggering imports unless already imported.

614 # Also, Python can be compiled without e.g. lzma so we need to

615 # protect against that:

616 if 'gzip' in sys.modules:

617 import gzip

618 compressed = compressed or isinstance(fd, gzip.GzipFile)

619 if 'bz2' in sys.modules:

620 import bz2

621 compressed = compressed or isinstance(fd, bz2.BZ2File)

622 if 'lzma' in sys.modules:

623 import lzma

624 compressed = compressed or isinstance(fd, lzma.LZMAFile)

625 return compressed

626

627

628def wrap_read_function(read, filename, index=None, **kwargs):

629 """Convert read-function to generator."""

630 if index is None:

631 yield read(filename, **kwargs)

632 else:

633 yield from read(filename, index, **kwargs)

634

635

636NameOrFile = Union[str, PurePath, IO]

637

638

639def write(

640 filename: NameOrFile,

641 images: Union[Atoms, Sequence[Atoms]],

642 format: str = None,

643 parallel: bool = True,

644 append: bool = False,

645 **kwargs: Any

646) -> None:

647 """Write Atoms object(s) to file.

648

649 filename: str or file

650 Name of the file to write to or a file descriptor. The name '-'

651 means standard output.

652 images: Atoms object or list of Atoms objects

653 A single Atoms object or a list of Atoms objects.

654 format: str

655 Used to specify the file-format. If not given, the

656 file-format will be taken from suffix of the filename.

657 parallel: bool

658 Default is to write on master only. Use parallel=False to write

659 from all slaves.

660 append: bool

661 Default is to open files in 'w' or 'wb' mode, overwriting

662 existing files. In some cases opening the file in 'a' or 'ab'

663 mode (appending) is useful,

664 e.g. writing trajectories or saving multiple Atoms objects in one file.

665 WARNING: If the file format does not support multiple entries without

666 additional keywords/headers, files created using 'append=True'

667 might not be readable by any program! They will nevertheless be

668 written without error message.

669

670 The use of additional keywords is format specific. write() may

671 return an object after writing certain formats, but this behaviour

672 may change in the future.

673

674 """

675

676 if isinstance(filename, PurePath):

677 filename = str(filename)

678

679 if isinstance(filename, str):

680 fd = None

681 if filename == '-':

682 fd = sys.stdout

683 filename = None # type: ignore[assignment]

684 elif format is None:

685 format = filetype(filename, read=False)

686 assert isinstance(format, str)

687 else:

688 fd = filename # type: ignore[assignment]

689 if format is None:

690 try:

691 format = filetype(filename, read=False)

692 assert isinstance(format, str)

693 except UnknownFileTypeError:

694 format = None

695 filename = None # type: ignore[assignment]

696

697 format = format or 'json' # default is json

698

699 io = get_ioformat(format)

700

701 return _write(filename, fd, format, io, images,

702 parallel=parallel, append=append, **kwargs)

703

704

705@parallel_function

706def _write(filename, fd, format, io, images, parallel=None, append=False,

707 **kwargs):

708 if isinstance(images, Atoms):

709 images = [images]

710

711 if io.single:

712 if len(images) > 1:

713 raise ValueError('{}-format can only store 1 Atoms object.'

714 .format(format))

715 images = images[0]

716

717 if not io.can_write:

718 raise ValueError(f"Can't write to {format}-format")

719

720 # Special case for json-format:

721 if format == 'json' and (len(images) > 1 or append):

722 if filename is not None:

723 return io.write(filename, images, append=append, **kwargs)

724 raise ValueError("Can't write more than one image to file-descriptor "

725 'using json-format.')

726

727 if io.acceptsfd:

728 open_new = (fd is None)

729 try:

730 if open_new:

731 mode = 'wb' if io.isbinary else 'w'

732 if append:

733 mode = mode.replace('w', 'a')

734 fd = open_with_compression(filename, mode)

735 # XXX remember to re-enable compressed open

736 # fd = io.open(filename, mode)

737 return io.write(fd, images, **kwargs)

738 finally:

739 if open_new and fd is not None:

740 fd.close()

741 else:

742 if fd is not None:

743 raise ValueError("Can't write {}-format to file-descriptor"

744 .format(format))

745 if io.can_append:

746 return io.write(filename, images, append=append, **kwargs)

747 elif append:

748 raise ValueError("Cannot append to {}-format, write-function "

749 "does not support the append keyword."

750 .format(format))

751 else:

752 return io.write(filename, images, **kwargs)

753

754

755def read(

756 filename: NameOrFile,

757 index: Any = None,

758 format: Optional[str] = None,

759 parallel: bool = True,

760 do_not_split_by_at_sign: bool = False,

761 **kwargs

762) -> Union[Atoms, List[Atoms]]:

763 """Read Atoms object(s) from file.

764

765 filename: str or file

766 Name of the file to read from or a file descriptor.

767 index: int, slice or str

768 The last configuration will be returned by default. Examples:

769

770 * ``index=0``: first configuration

771 * ``index=-2``: second to last

772 * ``index=':'`` or ``index=slice(None)``: all

773 * ``index='-3:'`` or ``index=slice(-3, None)``: three last

774 * ``index='::2'`` or ``index=slice(0, None, 2)``: even

775 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd

776 format: str

777 Used to specify the file-format. If not given, the

778 file-format will be guessed by the *filetype* function.

779 parallel: bool

780 Default is to read on master and broadcast to slaves. Use

781 parallel=False to read on all slaves.

782 do_not_split_by_at_sign: bool

783 If False (default) ``filename`` is splitted by at sign ``@``

784

785 Many formats allow on open file-like object to be passed instead

786 of ``filename``. In this case the format cannot be auto-detected,

787 so the ``format`` argument should be explicitly given."""

788

789 if isinstance(filename, PurePath):

790 filename = str(filename)

791 if filename == '-':

792 filename = sys.stdin

793 if isinstance(index, str):

794 try:

795 index = string2index(index)

796 except ValueError:

797 pass

798

799 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

800 if index is None:

801 index = -1

802 format = format or filetype(filename, read=isinstance(filename, str))

803

804 io = get_ioformat(format)

805 if isinstance(index, (slice, str)):

806 return list(_iread(filename, index, format, io, parallel=parallel,

807 **kwargs))

808 else:

809 return next(_iread(filename, slice(index, None), format, io,

810 parallel=parallel, **kwargs))

811

812

813def iread(

814 filename: NameOrFile,

815 index: Any = None,

816 format: str = None,

817 parallel: bool = True,

818 do_not_split_by_at_sign: bool = False,

819 **kwargs

820) -> Iterable[Atoms]:

821 """Iterator for reading Atoms objects from file.

822

823 Works as the `read` function, but yields one Atoms object at a time

824 instead of all at once."""

825

826 if isinstance(filename, PurePath):

827 filename = str(filename)

828

829 if isinstance(index, str):

830 index = string2index(index)

831

832 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

833

834 if index is None or index == ':':

835 index = slice(None, None, None)

836

837 if not isinstance(index, (slice, str)):

838 index = slice(index, (index + 1) or None)

839

840 format = format or filetype(filename, read=isinstance(filename, str))

841 io = get_ioformat(format)

842

843 yield from _iread(filename, index, format, io, parallel=parallel,

844 **kwargs)

845

846

847@parallel_generator

848def _iread(filename, index, format, io, parallel=None, full_output=False,

849 **kwargs):

850

851 if not io.can_read:

852 raise ValueError(f"Can't read from {format}-format")

853

854 if io.single:

855 start = index.start

856 assert start is None or start == 0 or start == -1

857 args = ()

858 else:

859 args = (index,)

860

861 must_close_fd = False

862 if isinstance(filename, str):

863 if io.acceptsfd:

864 mode = 'rb' if io.isbinary else 'r'

865 fd = open_with_compression(filename, mode)

866 must_close_fd = True

867 else:

868 fd = filename

869 else:

870 assert io.acceptsfd

871 fd = filename

872

873 # Make sure fd is closed in case loop doesn't finish:

874 try:

875 for dct in io.read(fd, *args, **kwargs):

876 if not isinstance(dct, dict):

877 dct = {'atoms': dct}

878 if full_output:

879 yield dct

880 else:

881 yield dct['atoms']

882 finally:

883 if must_close_fd:

884 fd.close()

885

886

887def parse_filename(filename, index=None, do_not_split_by_at_sign=False):

888 if not isinstance(filename, str):

889 return filename, index

890

891 basename = os.path.basename(filename)

892 if do_not_split_by_at_sign or '@' not in basename:

893 return filename, index

894

895 newindex = None

896 newfilename, newindex = filename.rsplit('@', 1)

897

898 if isinstance(index, slice):

899 return newfilename, index

900 try:

901 newindex = string2index(newindex)

902 except ValueError:

903 warnings.warn('Can not parse index for path \n'

904 ' "%s" \nConsider set '

905 'do_not_split_by_at_sign=True \nif '

906 'there is no index.' % filename)

907 return newfilename, newindex

908

909

910def match_magic(data: bytes) -> IOFormat:

911 data = data[:PEEK_BYTES]

912 for ioformat in ioformats.values():

913 if ioformat.match_magic(data):

914 return ioformat

915 raise UnknownFileTypeError('Cannot guess file type from contents')

916

917

918def filetype(

919 filename: NameOrFile,

920 read: bool = True,

921 guess: bool = True,

922) -> str:

923 """Try to guess the type of the file.

924

925 First, special signatures in the filename will be checked for. If that

926 does not identify the file type, then the first 2000 bytes of the file

927 will be read and analysed. Turn off this second part by using

928 read=False.

929

930 Can be used from the command-line also::

931

932 $ ase info filename ...

933 """

934

935 orig_filename = filename

936 if hasattr(filename, 'name'):

937 filename = filename.name

938

939 ext = None

940 if isinstance(filename, str):

941 if os.path.isdir(filename):

942 if os.path.basename(os.path.normpath(filename)) == 'states':

943 return 'eon'

944 return 'bundletrajectory'

945

946 if filename.startswith('postgres'):

947 return 'postgresql'

948

949 if filename.startswith('mysql') or filename.startswith('mariadb'):

950 return 'mysql'

951

952 # strip any compression extensions that can be read

953 root, _compression = get_compression(filename)

954 basename = os.path.basename(root)

955

956 if '.' in basename:

957 ext = os.path.splitext(basename)[1].strip('.').lower()

958

959 for fmt in ioformats.values():

960 if fmt.match_name(basename):

961 return fmt.name

962

963 if not read:

964 if ext is None:

965 raise UnknownFileTypeError('Could not guess file type')

966 ioformat = extension2format.get(ext)

967 if ioformat:

968 return ioformat.name

969

970 # askhl: This is strange, we don't know if ext is a format:

971 return ext

972

973 if orig_filename == filename:

974 fd = open_with_compression(filename, 'rb')

975 else:

976 fd = orig_filename # type: ignore[assignment]

977 else:

978 fd = filename

979 if fd is sys.stdin:

980 return 'json'

981

982 data = fd.read(PEEK_BYTES)

983 if fd is not filename:

984 fd.close()

985 else:

986 fd.seek(0)

987

988 if len(data) == 0:

989 raise UnknownFileTypeError('Empty file: ' + filename)

990

991 try:

992 return match_magic(data).name

993 except UnknownFileTypeError:

994 pass

995

996 format = None

997 if ext in extension2format:

998 format = extension2format[ext].name

999

1000 if format is None and guess:

1001 format = ext

1002 if format is None:

1003 # Do quick xyz check:

1004 lines = data.splitlines()

1005 if lines and lines[0].strip().isdigit():

1006 return extension2format['xyz'].name

1007

1008 raise UnknownFileTypeError('Could not guess file type')

1009 assert isinstance(format, str)

1010 return format

1011

1012

1013def index2range(index, length):

1014 """Convert slice or integer to range.

1015

1016 If index is an integer, range will contain only that integer."""

1017 obj = range(length)[index]

1018 if isinstance(obj, numbers.Integral):

1019 obj = range(obj, obj + 1)

1020 return obj