Coverage for /builds/debichem-team/python-ase/ase/io/formats.py: 89.52%
544 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
1"""File formats.
3This module implements the read(), iread() and write() functions in ase.io.
4For each file format there is an IOFormat object.
6There is a dict, ioformats, which stores the objects.
8Example
9=======
11The xyz format is implemented in the ase/io/xyz.py file which has a
12read_xyz() generator and a write_xyz() function. This and other
13information can be obtained from ioformats['xyz'].
14"""
16import functools
17import inspect
18import io
19import numbers
20import os
21import re
22import sys
23import warnings
24from importlib import import_module
25from importlib.metadata import entry_points
26from pathlib import Path, PurePath
27from typing import (
28 IO,
29 Any,
30 Dict,
31 Iterable,
32 List,
33 Optional,
34 Sequence,
35 Tuple,
36 Union,
37)
39from ase.atoms import Atoms
40from ase.parallel import parallel_function, parallel_generator
41from ase.utils import string2index
42from ase.utils.plugins import ExternalIOFormat
44PEEK_BYTES = 50000
47class UnknownFileTypeError(Exception):
48 pass
51class IOFormat:
52 def __init__(self, name: str, desc: str, code: str, module_name: str,
53 encoding: str = None) -> None:
54 self.name = name
55 self.description = desc
56 assert len(code) == 2
57 assert code[0] in list('+1')
58 assert code[1] in list('BFS')
59 self.code = code
60 self.module_name = module_name
61 self.encoding = encoding
63 # (To be set by define_io_format())
64 self.extensions: List[str] = []
65 self.globs: List[str] = []
66 self.magic: List[str] = []
67 self.magic_regex: Optional[bytes] = None
69 def open(self, fname, mode: str = 'r') -> IO:
70 # We might want append mode, too
71 # We can allow more flags as needed (buffering etc.)
72 if mode not in list('rwa'):
73 raise ValueError("Only modes allowed are 'r', 'w', and 'a'")
74 if mode == 'r' and not self.can_read:
75 raise NotImplementedError('No reader implemented for {} format'
76 .format(self.name))
77 if mode == 'w' and not self.can_write:
78 raise NotImplementedError('No writer implemented for {} format'
79 .format(self.name))
80 if mode == 'a' and not self.can_append:
81 raise NotImplementedError('Appending not supported by {} format'
82 .format(self.name))
84 if self.isbinary:
85 mode += 'b'
87 path = Path(fname)
88 return path.open(mode, encoding=self.encoding)
90 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:
91 encoding = self.encoding
92 if encoding is None:
93 encoding = 'utf-8' # Best hacky guess.
95 if self.isbinary:
96 if isinstance(data, str):
97 data = data.encode(encoding)
98 else:
99 if isinstance(data, bytes):
100 data = data.decode(encoding)
102 return self._ioclass(data)
104 @property
105 def _ioclass(self):
106 if self.isbinary:
107 return io.BytesIO
108 else:
109 return io.StringIO
111 def parse_images(self, data: Union[str, bytes],
112 **kwargs) -> Sequence[Atoms]:
113 with self._buf_as_filelike(data) as fd:
114 outputs = self.read(fd, **kwargs)
115 if self.single:
116 assert isinstance(outputs, Atoms)
117 return [outputs]
118 else:
119 return list(self.read(fd, **kwargs))
121 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:
122 images = self.parse_images(data, **kwargs)
123 return images[-1]
125 @property
126 def can_read(self) -> bool:
127 return self._readfunc() is not None
129 @property
130 def can_write(self) -> bool:
131 return self._writefunc() is not None
133 @property
134 def can_append(self) -> bool:
135 writefunc = self._writefunc()
136 return self.can_write and 'append' in writefunc.__code__.co_varnames
138 def __repr__(self) -> str:
139 tokens = [f'{name}={value!r}'
140 for name, value in vars(self).items()]
141 return 'IOFormat({})'.format(', '.join(tokens))
143 def __getitem__(self, i):
144 # For compatibility.
145 #
146 # Historically, the ioformats were listed as tuples
147 # with (description, code). We look like such a tuple.
148 return (self.description, self.code)[i]
150 @property
151 def single(self) -> bool:
152 """Whether this format is for a single Atoms object."""
153 return self.code[0] == '1'
155 @property
156 def _formatname(self) -> str:
157 return self.name.replace('-', '_')
159 def _readfunc(self):
160 return getattr(self.module, 'read_' + self._formatname, None)
162 def _writefunc(self):
163 return getattr(self.module, 'write_' + self._formatname, None)
165 @property
166 def read(self):
167 if not self.can_read:
168 self._warn_none('read')
169 return None
171 return self._read_wrapper
173 def _read_wrapper(self, *args, **kwargs):
174 function = self._readfunc()
175 if function is None:
176 self._warn_none('read')
177 return None
178 if not inspect.isgeneratorfunction(function):
179 function = functools.partial(wrap_read_function, function)
180 return function(*args, **kwargs)
182 def _warn_none(self, action):
183 msg = ('Accessing the IOFormat.{action} property on a format '
184 'without {action} support will change behaviour in the '
185 'future and return a callable instead of None. '
186 'Use IOFormat.can_{action} to check whether {action} '
187 'is supported.')
188 warnings.warn(msg.format(action=action), FutureWarning)
190 @property
191 def write(self):
192 if not self.can_write:
193 self._warn_none('write')
194 return None
196 return self._write_wrapper
198 def _write_wrapper(self, *args, **kwargs):
199 function = self._writefunc()
200 if function is None:
201 raise ValueError(f'Cannot write to {self.name}-format')
202 return function(*args, **kwargs)
204 @property
205 def modes(self) -> str:
206 modes = ''
207 if self.can_read:
208 modes += 'r'
209 if self.can_write:
210 modes += 'w'
211 return modes
213 def full_description(self) -> str:
214 lines = [f'Name: {self.name}',
215 f'Description: {self.description}',
216 f'Modes: {self.modes}',
217 f'Encoding: {self.encoding}',
218 f'Module: {self.module_name}',
219 f'Code: {self.code}',
220 f'Extensions: {self.extensions}',
221 f'Globs: {self.globs}',
222 f'Magic: {self.magic}']
223 return '\n'.join(lines)
225 @property
226 def acceptsfd(self) -> bool:
227 return self.code[1] != 'S'
229 @property
230 def isbinary(self) -> bool:
231 return self.code[1] == 'B'
233 @property
234 def module(self):
235 try:
236 return import_module(self.module_name)
237 except ImportError as err:
238 raise UnknownFileTypeError(
239 f'File format not recognized: {self.name}. Error: {err}')
241 def match_name(self, basename: str) -> bool:
242 from fnmatch import fnmatch
243 return any(fnmatch(basename, pattern)
244 for pattern in self.globs)
246 def match_magic(self, data: bytes) -> bool:
247 if self.magic_regex:
248 assert not self.magic, 'Define only one of magic and magic_regex'
249 match = re.match(self.magic_regex, data, re.M | re.S)
250 return match is not None
252 from fnmatch import fnmatchcase
253 return any(
254 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]
255 for magic in self.magic
256 )
259ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.
260extension2format = {}
263all_formats = ioformats # Aliased for compatibility only. Please do not use.
264format2modulename = {} # Left for compatibility only.
267def define_io_format(name, desc, code, *, module=None, ext=None,
268 glob=None, magic=None, encoding=None,
269 magic_regex=None, external=False):
270 if module is None:
271 module = name.replace('-', '_')
272 format2modulename[name] = module
274 if not external:
275 module = 'ase.io.' + module
277 def normalize_patterns(strings):
278 if strings is None:
279 strings = []
280 elif isinstance(strings, (str, bytes)):
281 strings = [strings]
282 else:
283 strings = list(strings)
284 return strings
286 fmt = IOFormat(name, desc, code, module_name=module,
287 encoding=encoding)
288 fmt.extensions = normalize_patterns(ext)
289 fmt.globs = normalize_patterns(glob)
290 fmt.magic = normalize_patterns(magic)
292 if magic_regex is not None:
293 fmt.magic_regex = magic_regex
295 for ext in fmt.extensions:
296 if ext in extension2format:
297 raise ValueError(f'extension "{ext}" already registered')
298 extension2format[ext] = fmt
300 ioformats[name] = fmt
301 return fmt
304def get_ioformat(name: str) -> IOFormat:
305 """Return ioformat object or raise appropriate error."""
306 if name not in ioformats:
307 raise UnknownFileTypeError(name)
308 fmt = ioformats[name]
309 # Make sure module is importable, since this could also raise an error.
310 fmt.module
311 return ioformats[name]
314def register_external_io_formats(group):
315 if hasattr(entry_points(), 'select'):
316 fmt_entry_points = entry_points().select(group=group)
317 else:
318 fmt_entry_points = entry_points().get(group, ())
320 for entry_point in fmt_entry_points:
321 try:
322 define_external_io_format(entry_point)
323 except Exception as exc:
324 warnings.warn(
325 'Failed to register external '
326 f'IO format {entry_point.name}: {exc}'
327 )
330def define_external_io_format(entry_point):
332 fmt = entry_point.load()
333 if entry_point.name in ioformats:
334 raise ValueError(f'Format {entry_point.name} already defined')
335 if not isinstance(fmt, ExternalIOFormat):
336 raise TypeError('Wrong type for registering external IO formats '
337 f'in format {entry_point.name}, expected '
338 'ExternalIOFormat')
339 F(entry_point.name, **fmt._asdict(), external=True)
342# We define all the IO formats below. Each IO format has a code,
343# such as '1F', which defines some of the format's properties:
344#
345# 1=single atoms object
346# +=multiple atoms objects
347# F=accepts a file-descriptor
348# S=needs a file-name str
349# B=like F, but opens in binary mode
351F = define_io_format
352F('abinit-gsr', 'ABINIT GSR file', '1S',
353 module='abinit', glob='*o_GSR.nc')
354F('abinit-in', 'ABINIT input file', '1F',
355 module='abinit', magic=b'*znucl *')
356F('abinit-out', 'ABINIT output file', '1F',
357 module='abinit', magic=b'*.Version * of ABINIT')
358F('aims', 'FHI-aims geometry file', '1S', ext='in')
359F('aims-output', 'FHI-aims output', '+S',
360 module='aims', magic=b'*Invoking FHI-aims ...')
361F('bundletrajectory', 'ASE bundle trajectory', '+S')
362F('castep-castep', 'CASTEP output file', '+F',
363 module='castep', ext='castep')
364F('castep-cell', 'CASTEP geom file', '1F',
365 module='castep', ext='cell')
366F('castep-geom', 'CASTEP trajectory file', '+F',
367 module='castep', ext='geom')
368F('castep-md', 'CASTEP molecular dynamics file', '+F',
369 module='castep', ext='md')
370F('castep-phonon', 'CASTEP phonon file', '1F',
371 module='castep', ext='phonon')
372F('cfg', 'AtomEye configuration', '1F')
373F('cif', 'CIF-file', '+B', ext='cif')
374F('cmdft', 'CMDFT-file', '1F', glob='*I_info')
375F('cjson', 'Chemical json file', '1F', ext='cjson')
376F('cp2k-dcd', 'CP2K DCD file', '+B',
377 module='cp2k', ext='dcd')
378F('cp2k-restart', 'CP2K restart file', '1F',
379 module='cp2k', ext='restart')
380F('crystal', 'Crystal fort.34 format', '1F',
381 ext=['f34', '34'], glob=['f34', '34'])
382F('cube', 'CUBE file', '1F', ext='cube')
383F('dacapo-text', 'Dacapo text output', '1F',
384 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
385F('db', 'ASE SQLite database file', '+S')
386F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')
387F('dlp4', 'DL_POLY_4 CONFIG file', '1F',
388 module='dlp4', ext='config', glob=['*CONFIG*'])
389F('dlp-history', 'DL_POLY HISTORY file', '+F',
390 module='dlp4', glob='HISTORY')
391F('dmol-arc', 'DMol3 arc file', '+S',
392 module='dmol', ext='arc')
393F('dmol-car', 'DMol3 structure file', '1S',
394 module='dmol', ext='car')
395F('dmol-incoor', 'DMol3 structure file', '1S',
396 module='dmol')
397F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',
398 glob=['GEOMETRY.OUT'])
399F('elk-in', 'ELK input file', '1F', module='elk')
400F('eon', 'EON CON file', '+F',
401 ext='con')
402F('eps', 'Encapsulated Postscript', '1S')
403F('espresso-in', 'Quantum espresso in file', '1F',
404 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])
405F('espresso-out', 'Quantum espresso out file', '+F',
406 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')
407F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')
408F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')
409F('extxyz', 'Extended XYZ file', '+F', ext='xyz')
410F('findsym', 'FINDSYM-format', '+F')
411F('gamess-us-out', 'GAMESS-US output file', '1F',
412 module='gamess_us', magic=b'*GAMESS')
413F('gamess-us-in', 'GAMESS-US input file', '1F',
414 module='gamess_us')
415F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',
416 module='gamess_us', magic=b' $DATA', ext='dat')
417F('gaussian-in', 'Gaussian com (input) file', '1F',
418 module='gaussian', ext=['com', 'gjf'])
419F('gaussian-out', 'Gaussian output file', '+F',
420 module='gaussian', ext='log', magic=b'*Entering Gaussian System')
421F('acemolecule-out', 'ACE output file', '1S',
422 module='acemolecule')
423F('acemolecule-input', 'ACE input file', '1S',
424 module='acemolecule')
425F('gen', 'DFTBPlus GEN format', '1F')
426F('gif', 'Graphics interchange format', '+S',
427 module='animation')
428F('gpaw-out', 'GPAW text output', '+F',
429 magic=b'* ___ ___ ___ _ _ _')
430F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')
431F('gpw', 'GPAW restart-file', '1S',
432 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])
433F('gromacs', 'Gromacs coordinates', '1F',
434 ext='gro')
435F('gromos', 'Gromos96 geometry file', '1F', ext='g96')
436F('html', 'X3DOM HTML', '1F', module='x3d')
437F('json', 'ASE JSON database file', '+F', ext='json', module='db')
438F('jsv', 'JSV file format', '1F')
439F('lammps-dump-text', 'LAMMPS text dump file', '+F',
440 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')
441F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',
442 module='lammpsrun')
443F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',
444 encoding='ascii')
445F('magres', 'MAGRES ab initio NMR data file', '1F')
446F('mol', 'MDL Molfile', '1F')
447F('mp4', 'MP4 animation', '+S',
448 module='animation')
449F('mustem', 'muSTEM xtl file', '1F',
450 ext='xtl')
451F('mysql', 'ASE MySQL database file', '+S',
452 module='db')
453F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',
454 magic=b'CDF')
455F('nomad-json', 'JSON from Nomad archive', '+F',
456 ext='nomad-json')
457F('nwchem-in', 'NWChem input file', '1F',
458 module='nwchem', ext='nwi')
459F('nwchem-out', 'NWChem output file', '+F',
460 module='nwchem', ext='nwo',
461 magic=b'*Northwest Computational Chemistry Package')
462F('octopus-in', 'Octopus input file', '1F',
463 module='octopus', glob='inp')
464F('onetep-out', 'ONETEP output file', '+F',
465 module='onetep',
466 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')
467F('onetep-in', 'ONETEP input file', '1F',
468 module='onetep',
469 magic=[b'*lock species ',
470 b'*LOCK SPECIES ',
471 b'*--- INPUT FILE ---*'])
472F('proteindatabank', 'Protein Data Bank', '+F',
473 ext='pdb')
474F('png', 'Portable Network Graphics', '1B')
475F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')
476F('pov', 'Persistance of Vision', '1S')
477# prismatic: Should have ext='xyz' if/when multiple formats can have the same
478# extension
479F('prismatic', 'prismatic and computem XYZ-file', '1F')
480F('py', 'Python file', '+F')
481F('sys', 'qball sys file', '1F')
482F('qbox', 'QBOX output file', '+F',
483 magic=b'*:simulation xmlns:')
484F('res', 'SHELX format', '1S', ext='shelx')
485F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')
486F('sdf', 'SDF format', '1F')
487F('siesta-xv', 'Siesta .XV file', '1F',
488 glob='*.XV', module='siesta')
489F('struct', 'WIEN2k structure file', '1S', module='wien2k')
490F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')
491F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',
492 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])
493F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',
494 magic=b'$coord')
495F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',
496 module='turbomole', glob='gradient', magic=b'$grad')
497F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')
498F('vasp', 'VASP POSCAR/CONTCAR', '1F',
499 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])
500F('vasp-out', 'VASP OUTCAR file', '+F',
501 module='vasp', glob='*OUTCAR*')
502F('vasp-xdatcar', 'VASP XDATCAR file', '+F',
503 module='vasp', glob='*XDATCAR*')
504F('vasp-xml', 'VASP vasprun.xml file', '+F',
505 module='vasp', glob='*vasp*.xml')
506F('vti', 'VTK XML Image Data', '1F', module='vtkxml')
507F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')
508F('wout', 'Wannier90 output', '1F', module='wannier90')
509F('x3d', 'X3D', '1S')
510F('xsd', 'Materials Studio file', '1F')
511F('xsf', 'XCrySDen Structure File', '+F',
512 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',
513 b'*\nMOLECULE', b'*\nATOMS'])
514F('xtd', 'Materials Studio file', '+F')
515# xyz: No `ext='xyz'` in the definition below.
516# The .xyz files are handled by the extxyz module by default.
517F('xyz', 'XYZ-file', '+F')
519# Register IO formats exposed through the ase.ioformats entry point
520register_external_io_formats('ase.ioformats')
523def get_compression(filename: str) -> Tuple[str, Optional[str]]:
524 """
525 Parse any expected file compression from the extension of a filename.
526 Return the filename without the extension, and the extension. Recognises
527 ``.gz``, ``.bz2``, ``.xz``.
529 >>> get_compression('H2O.pdb.gz')
530 ('H2O.pdb', 'gz')
531 >>> get_compression('crystal.cif')
532 ('crystal.cif', None)
534 Parameters
535 ==========
536 filename: str
537 Full filename including extension.
539 Returns
540 =======
541 (root, extension): (str, str or None)
542 Filename split into root without extension, and the extension
543 indicating compression format. Will not split if compression
544 is not recognised.
545 """
546 # Update if anything is added
547 valid_compression = ['gz', 'bz2', 'xz']
549 # Use stdlib as it handles most edge cases
550 root, compression = os.path.splitext(filename)
552 # extension keeps the '.' so remember to remove it
553 if compression.strip('.') in valid_compression:
554 return root, compression.strip('.')
555 else:
556 return filename, None
559def open_with_compression(filename: str, mode: str = 'r') -> IO:
560 """
561 Wrapper around builtin `open` that will guess compression of a file
562 from the filename and open it for reading or writing as if it were
563 a standard file.
565 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).
567 Supported modes are:
568 * 'r', 'rt', 'w', 'wt' for text mode read and write.
569 * 'rb, 'wb' for binary read and write.
571 Parameters
572 ==========
573 filename: str
574 Path to the file to open, including any extensions that indicate
575 the compression used.
576 mode: str
577 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.
579 Returns
580 =======
581 fd: file
582 File-like object open with the specified mode.
583 """
585 # Compressed formats sometimes default to binary, so force text mode.
586 if mode == 'r':
587 mode = 'rt'
588 elif mode == 'w':
589 mode = 'wt'
590 elif mode == 'a':
591 mode = 'at'
593 _root, compression = get_compression(filename)
595 if compression == 'gz':
596 import gzip
597 return gzip.open(filename, mode=mode) # type: ignore[return-value]
598 elif compression == 'bz2':
599 import bz2
600 return bz2.open(filename, mode=mode)
601 elif compression == 'xz':
602 import lzma
603 return lzma.open(filename, mode)
604 else:
605 # Either None or unknown string
606 return open(filename, mode)
609def is_compressed(fd: io.BufferedIOBase) -> bool:
610 """Check if the file object is in a compressed format."""
611 compressed = False
613 # We'd like to avoid triggering imports unless already imported.
614 # Also, Python can be compiled without e.g. lzma so we need to
615 # protect against that:
616 if 'gzip' in sys.modules:
617 import gzip
618 compressed = compressed or isinstance(fd, gzip.GzipFile)
619 if 'bz2' in sys.modules:
620 import bz2
621 compressed = compressed or isinstance(fd, bz2.BZ2File)
622 if 'lzma' in sys.modules:
623 import lzma
624 compressed = compressed or isinstance(fd, lzma.LZMAFile)
625 return compressed
628def wrap_read_function(read, filename, index=None, **kwargs):
629 """Convert read-function to generator."""
630 if index is None:
631 yield read(filename, **kwargs)
632 else:
633 yield from read(filename, index, **kwargs)
636NameOrFile = Union[str, PurePath, IO]
639def write(
640 filename: NameOrFile,
641 images: Union[Atoms, Sequence[Atoms]],
642 format: str = None,
643 parallel: bool = True,
644 append: bool = False,
645 **kwargs: Any
646) -> None:
647 """Write Atoms object(s) to file.
649 filename: str or file
650 Name of the file to write to or a file descriptor. The name '-'
651 means standard output.
652 images: Atoms object or list of Atoms objects
653 A single Atoms object or a list of Atoms objects.
654 format: str
655 Used to specify the file-format. If not given, the
656 file-format will be taken from suffix of the filename.
657 parallel: bool
658 Default is to write on master only. Use parallel=False to write
659 from all slaves.
660 append: bool
661 Default is to open files in 'w' or 'wb' mode, overwriting
662 existing files. In some cases opening the file in 'a' or 'ab'
663 mode (appending) is useful,
664 e.g. writing trajectories or saving multiple Atoms objects in one file.
665 WARNING: If the file format does not support multiple entries without
666 additional keywords/headers, files created using 'append=True'
667 might not be readable by any program! They will nevertheless be
668 written without error message.
670 The use of additional keywords is format specific. write() may
671 return an object after writing certain formats, but this behaviour
672 may change in the future.
674 """
676 if isinstance(filename, PurePath):
677 filename = str(filename)
679 if isinstance(filename, str):
680 fd = None
681 if filename == '-':
682 fd = sys.stdout
683 filename = None # type: ignore[assignment]
684 elif format is None:
685 format = filetype(filename, read=False)
686 assert isinstance(format, str)
687 else:
688 fd = filename # type: ignore[assignment]
689 if format is None:
690 try:
691 format = filetype(filename, read=False)
692 assert isinstance(format, str)
693 except UnknownFileTypeError:
694 format = None
695 filename = None # type: ignore[assignment]
697 format = format or 'json' # default is json
699 io = get_ioformat(format)
701 return _write(filename, fd, format, io, images,
702 parallel=parallel, append=append, **kwargs)
705@parallel_function
706def _write(filename, fd, format, io, images, parallel=None, append=False,
707 **kwargs):
708 if isinstance(images, Atoms):
709 images = [images]
711 if io.single:
712 if len(images) > 1:
713 raise ValueError('{}-format can only store 1 Atoms object.'
714 .format(format))
715 images = images[0]
717 if not io.can_write:
718 raise ValueError(f"Can't write to {format}-format")
720 # Special case for json-format:
721 if format == 'json' and (len(images) > 1 or append):
722 if filename is not None:
723 return io.write(filename, images, append=append, **kwargs)
724 raise ValueError("Can't write more than one image to file-descriptor "
725 'using json-format.')
727 if io.acceptsfd:
728 open_new = (fd is None)
729 try:
730 if open_new:
731 mode = 'wb' if io.isbinary else 'w'
732 if append:
733 mode = mode.replace('w', 'a')
734 fd = open_with_compression(filename, mode)
735 # XXX remember to re-enable compressed open
736 # fd = io.open(filename, mode)
737 return io.write(fd, images, **kwargs)
738 finally:
739 if open_new and fd is not None:
740 fd.close()
741 else:
742 if fd is not None:
743 raise ValueError("Can't write {}-format to file-descriptor"
744 .format(format))
745 if io.can_append:
746 return io.write(filename, images, append=append, **kwargs)
747 elif append:
748 raise ValueError("Cannot append to {}-format, write-function "
749 "does not support the append keyword."
750 .format(format))
751 else:
752 return io.write(filename, images, **kwargs)
755def read(
756 filename: NameOrFile,
757 index: Any = None,
758 format: Optional[str] = None,
759 parallel: bool = True,
760 do_not_split_by_at_sign: bool = False,
761 **kwargs
762) -> Union[Atoms, List[Atoms]]:
763 """Read Atoms object(s) from file.
765 filename: str or file
766 Name of the file to read from or a file descriptor.
767 index: int, slice or str
768 The last configuration will be returned by default. Examples:
770 * ``index=0``: first configuration
771 * ``index=-2``: second to last
772 * ``index=':'`` or ``index=slice(None)``: all
773 * ``index='-3:'`` or ``index=slice(-3, None)``: three last
774 * ``index='::2'`` or ``index=slice(0, None, 2)``: even
775 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd
776 format: str
777 Used to specify the file-format. If not given, the
778 file-format will be guessed by the *filetype* function.
779 parallel: bool
780 Default is to read on master and broadcast to slaves. Use
781 parallel=False to read on all slaves.
782 do_not_split_by_at_sign: bool
783 If False (default) ``filename`` is splitted by at sign ``@``
785 Many formats allow on open file-like object to be passed instead
786 of ``filename``. In this case the format cannot be auto-detected,
787 so the ``format`` argument should be explicitly given."""
789 if isinstance(filename, PurePath):
790 filename = str(filename)
791 if filename == '-':
792 filename = sys.stdin
793 if isinstance(index, str):
794 try:
795 index = string2index(index)
796 except ValueError:
797 pass
799 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
800 if index is None:
801 index = -1
802 format = format or filetype(filename, read=isinstance(filename, str))
804 io = get_ioformat(format)
805 if isinstance(index, (slice, str)):
806 return list(_iread(filename, index, format, io, parallel=parallel,
807 **kwargs))
808 else:
809 return next(_iread(filename, slice(index, None), format, io,
810 parallel=parallel, **kwargs))
813def iread(
814 filename: NameOrFile,
815 index: Any = None,
816 format: str = None,
817 parallel: bool = True,
818 do_not_split_by_at_sign: bool = False,
819 **kwargs
820) -> Iterable[Atoms]:
821 """Iterator for reading Atoms objects from file.
823 Works as the `read` function, but yields one Atoms object at a time
824 instead of all at once."""
826 if isinstance(filename, PurePath):
827 filename = str(filename)
829 if isinstance(index, str):
830 index = string2index(index)
832 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
834 if index is None or index == ':':
835 index = slice(None, None, None)
837 if not isinstance(index, (slice, str)):
838 index = slice(index, (index + 1) or None)
840 format = format or filetype(filename, read=isinstance(filename, str))
841 io = get_ioformat(format)
843 yield from _iread(filename, index, format, io, parallel=parallel,
844 **kwargs)
847@parallel_generator
848def _iread(filename, index, format, io, parallel=None, full_output=False,
849 **kwargs):
851 if not io.can_read:
852 raise ValueError(f"Can't read from {format}-format")
854 if io.single:
855 start = index.start
856 assert start is None or start == 0 or start == -1
857 args = ()
858 else:
859 args = (index,)
861 must_close_fd = False
862 if isinstance(filename, str):
863 if io.acceptsfd:
864 mode = 'rb' if io.isbinary else 'r'
865 fd = open_with_compression(filename, mode)
866 must_close_fd = True
867 else:
868 fd = filename
869 else:
870 assert io.acceptsfd
871 fd = filename
873 # Make sure fd is closed in case loop doesn't finish:
874 try:
875 for dct in io.read(fd, *args, **kwargs):
876 if not isinstance(dct, dict):
877 dct = {'atoms': dct}
878 if full_output:
879 yield dct
880 else:
881 yield dct['atoms']
882 finally:
883 if must_close_fd:
884 fd.close()
887def parse_filename(filename, index=None, do_not_split_by_at_sign=False):
888 if not isinstance(filename, str):
889 return filename, index
891 basename = os.path.basename(filename)
892 if do_not_split_by_at_sign or '@' not in basename:
893 return filename, index
895 newindex = None
896 newfilename, newindex = filename.rsplit('@', 1)
898 if isinstance(index, slice):
899 return newfilename, index
900 try:
901 newindex = string2index(newindex)
902 except ValueError:
903 warnings.warn('Can not parse index for path \n'
904 ' "%s" \nConsider set '
905 'do_not_split_by_at_sign=True \nif '
906 'there is no index.' % filename)
907 return newfilename, newindex
910def match_magic(data: bytes) -> IOFormat:
911 data = data[:PEEK_BYTES]
912 for ioformat in ioformats.values():
913 if ioformat.match_magic(data):
914 return ioformat
915 raise UnknownFileTypeError('Cannot guess file type from contents')
918def filetype(
919 filename: NameOrFile,
920 read: bool = True,
921 guess: bool = True,
922) -> str:
923 """Try to guess the type of the file.
925 First, special signatures in the filename will be checked for. If that
926 does not identify the file type, then the first 2000 bytes of the file
927 will be read and analysed. Turn off this second part by using
928 read=False.
930 Can be used from the command-line also::
932 $ ase info filename ...
933 """
935 orig_filename = filename
936 if hasattr(filename, 'name'):
937 filename = filename.name
939 ext = None
940 if isinstance(filename, str):
941 if os.path.isdir(filename):
942 if os.path.basename(os.path.normpath(filename)) == 'states':
943 return 'eon'
944 return 'bundletrajectory'
946 if filename.startswith('postgres'):
947 return 'postgresql'
949 if filename.startswith('mysql') or filename.startswith('mariadb'):
950 return 'mysql'
952 # strip any compression extensions that can be read
953 root, _compression = get_compression(filename)
954 basename = os.path.basename(root)
956 if '.' in basename:
957 ext = os.path.splitext(basename)[1].strip('.').lower()
959 for fmt in ioformats.values():
960 if fmt.match_name(basename):
961 return fmt.name
963 if not read:
964 if ext is None:
965 raise UnknownFileTypeError('Could not guess file type')
966 ioformat = extension2format.get(ext)
967 if ioformat:
968 return ioformat.name
970 # askhl: This is strange, we don't know if ext is a format:
971 return ext
973 if orig_filename == filename:
974 fd = open_with_compression(filename, 'rb')
975 else:
976 fd = orig_filename # type: ignore[assignment]
977 else:
978 fd = filename
979 if fd is sys.stdin:
980 return 'json'
982 data = fd.read(PEEK_BYTES)
983 if fd is not filename:
984 fd.close()
985 else:
986 fd.seek(0)
988 if len(data) == 0:
989 raise UnknownFileTypeError('Empty file: ' + filename)
991 try:
992 return match_magic(data).name
993 except UnknownFileTypeError:
994 pass
996 format = None
997 if ext in extension2format:
998 format = extension2format[ext].name
1000 if format is None and guess:
1001 format = ext
1002 if format is None:
1003 # Do quick xyz check:
1004 lines = data.splitlines()
1005 if lines and lines[0].strip().isdigit():
1006 return extension2format['xyz'].name
1008 raise UnknownFileTypeError('Could not guess file type')
1009 assert isinstance(format, str)
1010 return format
1013def index2range(index, length):
1014 """Convert slice or integer to range.
1016 If index is an integer, range will contain only that integer."""
1017 obj = range(length)[index]
1018 if isinstance(obj, numbers.Integral):
1019 obj = range(obj, obj + 1)
1020 return obj