Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""File formats.
3This module implements the read(), iread() and write() functions in ase.io.
4For each file format there is an IOFormat object.
6There is a dict, ioformats, which stores the objects.
8Example
9=======
11The xyz format is implemented in the ase/io/xyz.py file which has a
12read_xyz() generator and a write_xyz() function. This and other
13information can be obtained from ioformats['xyz'].
14"""
16import io
17import re
18import functools
19import inspect
20import os
21import sys
22import numbers
23import warnings
24from pathlib import Path, PurePath
25from typing import (
26 IO, List, Any, Iterable, Tuple, Union, Sequence, Dict, Optional)
28from ase.atoms import Atoms
29from importlib import import_module
30from ase.parallel import parallel_function, parallel_generator
33PEEK_BYTES = 50000
36class UnknownFileTypeError(Exception):
37 pass
40class IOFormat:
41 def __init__(self, name: str, desc: str, code: str, module_name: str,
42 encoding: str = None) -> None:
43 self.name = name
44 self.description = desc
45 assert len(code) == 2
46 assert code[0] in list('+1')
47 assert code[1] in list('BFS')
48 self.code = code
49 self.module_name = module_name
50 self.encoding = encoding
52 # (To be set by define_io_format())
53 self.extensions: List[str] = []
54 self.globs: List[str] = []
55 self.magic: List[str] = []
56 self.magic_regex: Optional[bytes] = None
58 def open(self, fname, mode: str = 'r') -> IO:
59 # We might want append mode, too
60 # We can allow more flags as needed (buffering etc.)
61 if mode not in list('rwa'):
62 raise ValueError("Only modes allowed are 'r', 'w', and 'a'")
63 if mode == 'r' and not self.can_read:
64 raise NotImplementedError('No reader implemented for {} format'
65 .format(self.name))
66 if mode == 'w' and not self.can_write:
67 raise NotImplementedError('No writer implemented for {} format'
68 .format(self.name))
69 if mode == 'a' and not self.can_append:
70 raise NotImplementedError('Appending not supported by {} format'
71 .format(self.name))
73 if self.isbinary:
74 mode += 'b'
76 path = Path(fname)
77 return path.open(mode, encoding=self.encoding)
79 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:
80 encoding = self.encoding
81 if encoding is None:
82 encoding = 'utf-8' # Best hacky guess.
84 if self.isbinary:
85 if isinstance(data, str):
86 data = data.encode(encoding)
87 else:
88 if isinstance(data, bytes):
89 data = data.decode(encoding)
91 return self._ioclass(data)
93 @property
94 def _ioclass(self):
95 if self.isbinary:
96 return io.BytesIO
97 else:
98 return io.StringIO
100 def parse_images(self, data: Union[str, bytes],
101 **kwargs) -> Sequence[Atoms]:
102 with self._buf_as_filelike(data) as fd:
103 outputs = self.read(fd, **kwargs)
104 if self.single:
105 assert isinstance(outputs, Atoms)
106 return [outputs]
107 else:
108 return list(self.read(fd, **kwargs))
110 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:
111 images = self.parse_images(data, **kwargs)
112 return images[-1]
114 @property
115 def can_read(self) -> bool:
116 return self._readfunc() is not None
118 @property
119 def can_write(self) -> bool:
120 return self._writefunc() is not None
122 @property
123 def can_append(self) -> bool:
124 writefunc = self._writefunc()
125 return self.can_write and 'append' in writefunc.__code__.co_varnames
127 def __repr__(self) -> str:
128 tokens = ['{}={}'.format(name, repr(value))
129 for name, value in vars(self).items()]
130 return 'IOFormat({})'.format(', '.join(tokens))
132 def __getitem__(self, i):
133 # For compatibility.
134 #
135 # Historically, the ioformats were listed as tuples
136 # with (description, code). We look like such a tuple.
137 return (self.description, self.code)[i]
139 @property
140 def single(self) -> bool:
141 """Whether this format is for a single Atoms object."""
142 return self.code[0] == '1'
144 @property
145 def _formatname(self) -> str:
146 return self.name.replace('-', '_')
148 def _readfunc(self):
149 return getattr(self.module, 'read_' + self._formatname, None)
151 def _writefunc(self):
152 return getattr(self.module, 'write_' + self._formatname, None)
154 @property
155 def read(self):
156 if not self.can_read:
157 self._warn_none('read')
158 return None
160 return self._read_wrapper
162 def _read_wrapper(self, *args, **kwargs):
163 function = self._readfunc()
164 if function is None:
165 self._warn_none('read')
166 return None
167 if not inspect.isgeneratorfunction(function):
168 function = functools.partial(wrap_read_function, function)
169 return function(*args, **kwargs)
171 def _warn_none(self, action):
172 msg = ('Accessing the IOFormat.{action} property on a format '
173 'without {action} support will change behaviour in the '
174 'future and return a callable instead of None. '
175 'Use IOFormat.can_{action} to check whether {action} '
176 'is supported.')
177 warnings.warn(msg.format(action=action), FutureWarning)
179 @property
180 def write(self):
181 if not self.can_write:
182 self._warn_none('write')
183 return None
185 return self._write_wrapper
187 def _write_wrapper(self, *args, **kwargs):
188 function = self._writefunc()
189 if function is None:
190 raise ValueError(f'Cannot write to {self.name}-format')
191 return function(*args, **kwargs)
193 @property
194 def modes(self) -> str:
195 modes = ''
196 if self.can_read:
197 modes += 'r'
198 if self.can_write:
199 modes += 'w'
200 return modes
202 def full_description(self) -> str:
203 lines = [f'Name: {self.name}',
204 f'Description: {self.description}',
205 f'Modes: {self.modes}',
206 f'Encoding: {self.encoding}',
207 f'Module: {self.module_name}',
208 f'Code: {self.code}',
209 f'Extensions: {self.extensions}',
210 f'Globs: {self.globs}',
211 f'Magic: {self.magic}']
212 return '\n'.join(lines)
214 @property
215 def acceptsfd(self) -> bool:
216 return self.code[1] != 'S'
218 @property
219 def isbinary(self) -> bool:
220 return self.code[1] == 'B'
222 @property
223 def module(self):
224 if not self.module_name.startswith('ase.io.'):
225 raise ValueError('Will only import modules from ase.io, '
226 'not {}'.format(self.module_name))
227 try:
228 return import_module(self.module_name)
229 except ImportError as err:
230 raise UnknownFileTypeError(
231 f'File format not recognized: {self.name}. Error: {err}')
233 def match_name(self, basename: str) -> bool:
234 from fnmatch import fnmatch
235 return any(fnmatch(basename, pattern)
236 for pattern in self.globs)
238 def match_magic(self, data: bytes) -> bool:
239 if self.magic_regex:
240 assert not self.magic, 'Define only one of magic and magic_regex'
241 match = re.match(self.magic_regex, data, re.M | re.S)
242 return match is not None
244 from fnmatch import fnmatchcase
245 return any(fnmatchcase(data, magic + b'*') # type: ignore
246 for magic in self.magic)
249ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.
250extension2format = {}
253all_formats = ioformats # Aliased for compatibility only. Please do not use.
254format2modulename = {} # Left for compatibility only.
257def define_io_format(name, desc, code, *, module=None, ext=None,
258 glob=None, magic=None, encoding=None,
259 magic_regex=None):
260 if module is None:
261 module = name.replace('-', '_')
262 format2modulename[name] = module
264 def normalize_patterns(strings):
265 if strings is None:
266 strings = []
267 elif isinstance(strings, (str, bytes)):
268 strings = [strings]
269 else:
270 strings = list(strings)
271 return strings
273 fmt = IOFormat(name, desc, code, module_name='ase.io.' + module,
274 encoding=encoding)
275 fmt.extensions = normalize_patterns(ext)
276 fmt.globs = normalize_patterns(glob)
277 fmt.magic = normalize_patterns(magic)
279 if magic_regex is not None:
280 fmt.magic_regex = magic_regex
282 for ext in fmt.extensions:
283 if ext in extension2format:
284 raise ValueError('extension "{}" already registered'.format(ext))
285 extension2format[ext] = fmt
287 ioformats[name] = fmt
288 return fmt
291def get_ioformat(name: str) -> IOFormat:
292 """Return ioformat object or raise appropriate error."""
293 if name not in ioformats:
294 raise UnknownFileTypeError(name)
295 fmt = ioformats[name]
296 # Make sure module is importable, since this could also raise an error.
297 fmt.module
298 return ioformats[name]
301# We define all the IO formats below. Each IO format has a code,
302# such as '1F', which defines some of the format's properties:
303#
304# 1=single atoms object
305# +=multiple atoms objects
306# F=accepts a file-descriptor
307# S=needs a file-name str
308# B=like F, but opens in binary mode
310F = define_io_format
311F('abinit-in', 'ABINIT input file', '1F',
312 module='abinit', magic=b'*znucl *')
313F('abinit-out', 'ABINIT output file', '1F',
314 module='abinit', magic=b'*.Version * of ABINIT')
315F('aims', 'FHI-aims geometry file', '1S', ext='in')
316F('aims-output', 'FHI-aims output', '+S',
317 module='aims', magic=b'*Invoking FHI-aims ...')
318F('bundletrajectory', 'ASE bundle trajectory', '+S')
319F('castep-castep', 'CASTEP output file', '+F',
320 module='castep', ext='castep')
321F('castep-cell', 'CASTEP geom file', '1F',
322 module='castep', ext='cell')
323F('castep-geom', 'CASTEP trajectory file', '+F',
324 module='castep', ext='geom')
325F('castep-md', 'CASTEP molecular dynamics file', '+F',
326 module='castep', ext='md')
327F('castep-phonon', 'CASTEP phonon file', '1F',
328 module='castep', ext='phonon')
329F('cfg', 'AtomEye configuration', '1F')
330F('cif', 'CIF-file', '+B', ext='cif')
331F('cmdft', 'CMDFT-file', '1F', glob='*I_info')
332F('cml', 'Chemical json file', '1F', ext='cml')
333F('cp2k-dcd', 'CP2K DCD file', '+B',
334 module='cp2k', ext='dcd')
335F('cp2k-restart', 'CP2K restart file', '1F',
336 module='cp2k', ext='restart')
337F('crystal', 'Crystal fort.34 format', '1F',
338 ext=['f34', '34'], glob=['f34', '34'])
339F('cube', 'CUBE file', '1F', ext='cube')
340F('dacapo-text', 'Dacapo text output', '1F',
341 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
342F('db', 'ASE SQLite database file', '+S')
343F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')
344F('dlp4', 'DL_POLY_4 CONFIG file', '1F',
345 module='dlp4', ext='config', glob=['*CONFIG*'])
346F('dlp-history', 'DL_POLY HISTORY file', '+F',
347 module='dlp4', glob='HISTORY')
348F('dmol-arc', 'DMol3 arc file', '+S',
349 module='dmol')
350F('dmol-car', 'DMol3 structure file', '1S',
351 module='dmol', ext='car')
352F('dmol-incoor', 'DMol3 structure file', '1S',
353 module='dmol')
354F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',
355 glob=['GEOMETRY.OUT'])
356F('elk-in', 'ELK input file', '1F', module='elk')
357F('eon', 'EON CON file', '+F',
358 ext='con')
359F('eps', 'Encapsulated Postscript', '1S')
360F('espresso-in', 'Quantum espresso in file', '1F',
361 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])
362F('espresso-out', 'Quantum espresso out file', '+F',
363 module='espresso', ext=['out', 'pwo'], magic=b'*Program PWSCF')
364F('exciting', 'exciting input', '1F', glob='input.xml')
365F('extxyz', 'Extended XYZ file', '+F', ext='xyz')
366F('findsym', 'FINDSYM-format', '+F')
367F('gamess-us-out', 'GAMESS-US output file', '1F',
368 module='gamess_us', magic=b'*GAMESS')
369F('gamess-us-in', 'GAMESS-US input file', '1F',
370 module='gamess_us')
371F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',
372 module='gamess_us', magic=b' $DATA', ext='dat')
373F('gaussian-in', 'Gaussian com (input) file', '1F',
374 module='gaussian', ext=['com', 'gjf'])
375F('gaussian-out', 'Gaussian output file', '+F',
376 module='gaussian', ext='log', magic=b'*Entering Gaussian System')
377F('acemolecule-out', 'ACE output file', '1S',
378 module='acemolecule')
379F('acemolecule-input', 'ACE input file', '1S',
380 module='acemolecule')
381F('gen', 'DFTBPlus GEN format', '1F')
382F('gif', 'Graphics interchange format', '+S',
383 module='animation')
384F('gpaw-out', 'GPAW text output', '+F',
385 magic=b'* ___ ___ ___ _ _ _')
386F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')
387F('gpw', 'GPAW restart-file', '1S',
388 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])
389F('gromacs', 'Gromacs coordinates', '1F',
390 ext='gro')
391F('gromos', 'Gromos96 geometry file', '1F', ext='g96')
392F('html', 'X3DOM HTML', '1F', module='x3d')
393F('json', 'ASE JSON database file', '+F', ext='json', module='db')
394F('jsv', 'JSV file format', '1F')
395F('lammps-dump-text', 'LAMMPS text dump file', '+F',
396 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')
397F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',
398 module='lammpsrun')
399F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',
400 encoding='ascii')
401F('magres', 'MAGRES ab initio NMR data file', '1F')
402F('mol', 'MDL Molfile', '1F')
403F('mp4', 'MP4 animation', '+S',
404 module='animation')
405F('mustem', 'muSTEM xtl file', '1F',
406 ext='xtl')
407F('mysql', 'ASE MySQL database file', '+S',
408 module='db')
409F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',
410 magic=b'CDF')
411F('nomad-json', 'JSON from Nomad archive', '+F',
412 ext='nomad-json')
413F('nwchem-in', 'NWChem input file', '1F',
414 module='nwchem', ext='nwi')
415F('nwchem-out', 'NWChem output file', '+F',
416 module='nwchem', ext='nwo',
417 magic=b'*Northwest Computational Chemistry Package')
418F('octopus-in', 'Octopus input file', '1F',
419 module='octopus', glob='inp')
420F('proteindatabank', 'Protein Data Bank', '+F',
421 ext='pdb')
422F('png', 'Portable Network Graphics', '1B')
423F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')
424F('pov', 'Persistance of Vision', '1S')
425# prismatic: Should have ext='xyz' if/when multiple formats can have the same
426# extension
427F('prismatic', 'prismatic and computem XYZ-file', '1F')
428F('py', 'Python file', '+F')
429F('sys', 'qball sys file', '1F')
430F('qbox', 'QBOX output file', '+F',
431 magic=b'*:simulation xmlns:')
432F('res', 'SHELX format', '1S', ext='shelx')
433F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')
434F('sdf', 'SDF format', '1F')
435F('siesta-xv', 'Siesta .XV file', '1F',
436 glob='*.XV', module='siesta')
437F('struct', 'WIEN2k structure file', '1S', module='wien2k')
438F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')
439F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',
440 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])
441F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',
442 magic=b'$coord')
443F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',
444 module='turbomole', glob='gradient', magic=b'$grad')
445F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')
446F('vasp', 'VASP POSCAR/CONTCAR', '1F',
447 ext='poscar', glob=['*POSCAR*', '*CONTCAR*'])
448F('vasp-out', 'VASP OUTCAR file', '+F',
449 module='vasp', glob='*OUTCAR*')
450F('vasp-xdatcar', 'VASP XDATCAR file', '+F',
451 module='vasp', glob='*XDATCAR*')
452F('vasp-xml', 'VASP vasprun.xml file', '+F',
453 module='vasp', glob='*vasp*.xml')
454F('vti', 'VTK XML Image Data', '1F', module='vtkxml')
455F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')
456F('wout', 'Wannier90 output', '1F', module='wannier90')
457F('x3d', 'X3D', '1S')
458F('xsd', 'Materials Studio file', '1F')
459F('xsf', 'XCrySDen Structure File', '+F',
460 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',
461 b'*\nMOLECULE', b'*\nATOMS'])
462F('xtd', 'Materials Studio file', '+F')
463# xyz: No `ext='xyz'` in the definition below.
464# The .xyz files are handled by the extxyz module by default.
465F('xyz', 'XYZ-file', '+F')
468def get_compression(filename: str) -> Tuple[str, Optional[str]]:
469 """
470 Parse any expected file compression from the extension of a filename.
471 Return the filename without the extension, and the extension. Recognises
472 ``.gz``, ``.bz2``, ``.xz``.
474 >>> get_compression('H2O.pdb.gz')
475 ('H2O.pdb', 'gz')
476 >>> get_compression('crystal.cif')
477 ('crystal.cif', None)
479 Parameters
480 ==========
481 filename: str
482 Full filename including extension.
484 Returns
485 =======
486 (root, extension): (str, str or None)
487 Filename split into root without extension, and the extension
488 indicating compression format. Will not split if compression
489 is not recognised.
490 """
491 # Update if anything is added
492 valid_compression = ['gz', 'bz2', 'xz']
494 # Use stdlib as it handles most edge cases
495 root, compression = os.path.splitext(filename)
497 # extension keeps the '.' so remember to remove it
498 if compression.strip('.') in valid_compression:
499 return root, compression.strip('.')
500 else:
501 return filename, None
504def open_with_compression(filename: str, mode: str = 'r') -> IO:
505 """
506 Wrapper around builtin `open` that will guess compression of a file
507 from the filename and open it for reading or writing as if it were
508 a standard file.
510 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).
512 Supported modes are:
513 * 'r', 'rt', 'w', 'wt' for text mode read and write.
514 * 'rb, 'wb' for binary read and write.
516 Parameters
517 ==========
518 filename: str
519 Path to the file to open, including any extensions that indicate
520 the compression used.
521 mode: str
522 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.
524 Returns
525 =======
526 fd: file
527 File-like object open with the specified mode.
528 """
530 # Compressed formats sometimes default to binary, so force text mode.
531 if mode == 'r':
532 mode = 'rt'
533 elif mode == 'w':
534 mode = 'wt'
535 elif mode == 'a':
536 mode = 'at'
538 root, compression = get_compression(filename)
540 if compression == 'gz':
541 import gzip
542 return gzip.open(filename, mode=mode) # type: ignore
543 elif compression == 'bz2':
544 import bz2
545 return bz2.open(filename, mode=mode)
546 elif compression == 'xz':
547 import lzma
548 return lzma.open(filename, mode)
549 else:
550 # Either None or unknown string
551 return open(filename, mode)
554def wrap_read_function(read, filename, index=None, **kwargs):
555 """Convert read-function to generator."""
556 if index is None:
557 yield read(filename, **kwargs)
558 else:
559 for atoms in read(filename, index, **kwargs):
560 yield atoms
563NameOrFile = Union[str, PurePath, IO]
566def write(
567 filename: NameOrFile,
568 images: Union[Atoms, Sequence[Atoms]],
569 format: str = None,
570 parallel: bool = True,
571 append: bool = False,
572 **kwargs: dict
573) -> None:
574 """Write Atoms object(s) to file.
576 filename: str or file
577 Name of the file to write to or a file descriptor. The name '-'
578 means standard output.
579 images: Atoms object or list of Atoms objects
580 A single Atoms object or a list of Atoms objects.
581 format: str
582 Used to specify the file-format. If not given, the
583 file-format will be taken from suffix of the filename.
584 parallel: bool
585 Default is to write on master only. Use parallel=False to write
586 from all slaves.
587 append: bool
588 Default is to open files in 'w' or 'wb' mode, overwriting
589 existing files. In some cases opening the file in 'a' or 'ab'
590 mode (appending) is useful,
591 e.g. writing trajectories or saving multiple Atoms objects in one file.
592 WARNING: If the file format does not support multiple entries without
593 additional keywords/headers, files created using 'append=True'
594 might not be readable by any program! They will nevertheless be
595 written without error message.
597 The use of additional keywords is format specific. write() may
598 return an object after writing certain formats, but this behaviour
599 may change in the future.
601 """
603 if isinstance(filename, PurePath):
604 filename = str(filename)
606 if isinstance(filename, str):
607 fd = None
608 if filename == '-':
609 fd = sys.stdout
610 filename = None # type: ignore
611 elif format is None:
612 format = filetype(filename, read=False)
613 assert isinstance(format, str)
614 else:
615 fd = filename # type: ignore
616 if format is None:
617 try:
618 format = filetype(filename, read=False)
619 assert isinstance(format, str)
620 except UnknownFileTypeError:
621 format = None
622 filename = None # type: ignore
624 format = format or 'json' # default is json
626 io = get_ioformat(format)
628 return _write(filename, fd, format, io, images,
629 parallel=parallel, append=append, **kwargs)
632@parallel_function
633def _write(filename, fd, format, io, images, parallel=None, append=False,
634 **kwargs):
635 if isinstance(images, Atoms):
636 images = [images]
638 if io.single:
639 if len(images) > 1:
640 raise ValueError('{}-format can only store 1 Atoms object.'
641 .format(format))
642 images = images[0]
644 if not io.can_write:
645 raise ValueError("Can't write to {}-format".format(format))
647 # Special case for json-format:
648 if format == 'json' and (len(images) > 1 or append):
649 if filename is not None:
650 return io.write(filename, images, append=append, **kwargs)
651 raise ValueError("Can't write more than one image to file-descriptor "
652 'using json-format.')
654 if io.acceptsfd:
655 open_new = (fd is None)
656 try:
657 if open_new:
658 mode = 'wb' if io.isbinary else 'w'
659 if append:
660 mode = mode.replace('w', 'a')
661 fd = open_with_compression(filename, mode)
662 # XXX remember to re-enable compressed open
663 # fd = io.open(filename, mode)
664 return io.write(fd, images, **kwargs)
665 finally:
666 if open_new and fd is not None:
667 fd.close()
668 else:
669 if fd is not None:
670 raise ValueError("Can't write {}-format to file-descriptor"
671 .format(format))
672 if io.can_append:
673 return io.write(filename, images, append=append, **kwargs)
674 elif append:
675 raise ValueError("Cannot append to {}-format, write-function "
676 "does not support the append keyword."
677 .format(format))
678 else:
679 return io.write(filename, images, **kwargs)
682def read(
683 filename: NameOrFile,
684 index: Any = None,
685 format: str = None,
686 parallel: bool = True,
687 do_not_split_by_at_sign: bool = False,
688 **kwargs
689) -> Union[Atoms, List[Atoms]]:
690 """Read Atoms object(s) from file.
692 filename: str or file
693 Name of the file to read from or a file descriptor.
694 index: int, slice or str
695 The last configuration will be returned by default. Examples:
697 * ``index=0``: first configuration
698 * ``index=-2``: second to last
699 * ``index=':'`` or ``index=slice(None)``: all
700 * ``index='-3:'`` or ``index=slice(-3, None)``: three last
701 * ``index='::2'`` or ``index=slice(0, None, 2)``: even
702 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd
703 format: str
704 Used to specify the file-format. If not given, the
705 file-format will be guessed by the *filetype* function.
706 parallel: bool
707 Default is to read on master and broadcast to slaves. Use
708 parallel=False to read on all slaves.
709 do_not_split_by_at_sign: bool
710 If False (default) ``filename`` is splited by at sign ``@``
712 Many formats allow on open file-like object to be passed instead
713 of ``filename``. In this case the format cannot be auto-decected,
714 so the ``format`` argument should be explicitly given."""
716 if isinstance(filename, PurePath):
717 filename = str(filename)
718 if filename == '-':
719 filename = sys.stdin
720 if isinstance(index, str):
721 try:
722 index = string2index(index)
723 except ValueError:
724 pass
726 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
727 if index is None:
728 index = -1
729 format = format or filetype(filename, read=isinstance(filename, str))
731 io = get_ioformat(format)
732 if isinstance(index, (slice, str)):
733 return list(_iread(filename, index, format, io, parallel=parallel,
734 **kwargs))
735 else:
736 return next(_iread(filename, slice(index, None), format, io,
737 parallel=parallel, **kwargs))
740def iread(
741 filename: NameOrFile,
742 index: Any = None,
743 format: str = None,
744 parallel: bool = True,
745 do_not_split_by_at_sign: bool = False,
746 **kwargs
747) -> Iterable[Atoms]:
748 """Iterator for reading Atoms objects from file.
750 Works as the `read` function, but yields one Atoms object at a time
751 instead of all at once."""
753 if isinstance(filename, PurePath):
754 filename = str(filename)
756 if isinstance(index, str):
757 index = string2index(index)
759 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
761 if index is None or index == ':':
762 index = slice(None, None, None)
764 if not isinstance(index, (slice, str)):
765 index = slice(index, (index + 1) or None)
767 format = format or filetype(filename, read=isinstance(filename, str))
768 io = get_ioformat(format)
770 for atoms in _iread(filename, index, format, io, parallel=parallel,
771 **kwargs):
772 yield atoms
775@parallel_generator
776def _iread(filename, index, format, io, parallel=None, full_output=False,
777 **kwargs):
779 if not io.can_read:
780 raise ValueError("Can't read from {}-format".format(format))
782 if io.single:
783 start = index.start
784 assert start is None or start == 0 or start == -1
785 args = ()
786 else:
787 args = (index,)
789 must_close_fd = False
790 if isinstance(filename, str):
791 if io.acceptsfd:
792 mode = 'rb' if io.isbinary else 'r'
793 fd = open_with_compression(filename, mode)
794 must_close_fd = True
795 else:
796 fd = filename
797 else:
798 assert io.acceptsfd
799 fd = filename
801 # Make sure fd is closed in case loop doesn't finish:
802 try:
803 for dct in io.read(fd, *args, **kwargs):
804 if not isinstance(dct, dict):
805 dct = {'atoms': dct}
806 if full_output:
807 yield dct
808 else:
809 yield dct['atoms']
810 finally:
811 if must_close_fd:
812 fd.close()
815def parse_filename(filename, index=None, do_not_split_by_at_sign=False):
816 if not isinstance(filename, str):
817 return filename, index
819 basename = os.path.basename(filename)
820 if do_not_split_by_at_sign or '@' not in basename:
821 return filename, index
823 newindex = None
824 newfilename, newindex = filename.rsplit('@', 1)
826 if isinstance(index, slice):
827 return newfilename, index
828 try:
829 newindex = string2index(newindex)
830 except ValueError:
831 warnings.warn('Can not parse index for path \n'
832 ' "%s" \nConsider set '
833 'do_not_split_by_at_sign=True \nif '
834 'there is no index.' % filename)
835 return newfilename, newindex
838def match_magic(data: bytes) -> IOFormat:
839 data = data[:PEEK_BYTES]
840 for ioformat in ioformats.values():
841 if ioformat.match_magic(data):
842 return ioformat
843 raise UnknownFileTypeError('Cannot guess file type from contents')
846def string2index(string: str) -> Union[int, slice, str]:
847 """Convert index string to either int or slice"""
848 if ':' not in string:
849 # may contain database accessor
850 try:
851 return int(string)
852 except ValueError:
853 return string
854 i: List[Optional[int]] = []
855 for s in string.split(':'):
856 if s == '':
857 i.append(None)
858 else:
859 i.append(int(s))
860 i += (3 - len(i)) * [None]
861 return slice(*i)
864def filetype(
865 filename: NameOrFile,
866 read: bool = True,
867 guess: bool = True,
868) -> str:
869 """Try to guess the type of the file.
871 First, special signatures in the filename will be checked for. If that
872 does not identify the file type, then the first 2000 bytes of the file
873 will be read and analysed. Turn off this second part by using
874 read=False.
876 Can be used from the command-line also::
878 $ ase info filename ...
879 """
881 orig_filename = filename
882 if hasattr(filename, 'name'):
883 filename = filename.name # type: ignore
885 ext = None
886 if isinstance(filename, str):
887 if os.path.isdir(filename):
888 if os.path.basename(os.path.normpath(filename)) == 'states':
889 return 'eon'
890 return 'bundletrajectory'
892 if filename.startswith('postgres'):
893 return 'postgresql'
895 if filename.startswith('mysql') or filename.startswith('mariadb'):
896 return 'mysql'
898 # strip any compression extensions that can be read
899 root, compression = get_compression(filename)
900 basename = os.path.basename(root)
902 if '.' in basename:
903 ext = os.path.splitext(basename)[1].strip('.').lower()
905 for fmt in ioformats.values():
906 if fmt.match_name(basename):
907 return fmt.name
909 if not read:
910 if ext is None:
911 raise UnknownFileTypeError('Could not guess file type')
912 ioformat = extension2format.get(ext)
913 if ioformat:
914 return ioformat.name
916 # askhl: This is strange, we don't know if ext is a format:
917 return ext
919 if orig_filename == filename:
920 fd = open_with_compression(filename, 'rb')
921 else:
922 fd = orig_filename # type: ignore
923 else:
924 fd = filename # type: ignore
925 if fd is sys.stdin:
926 return 'json'
928 data = fd.read(PEEK_BYTES)
929 if fd is not filename:
930 fd.close()
931 else:
932 fd.seek(0)
934 if len(data) == 0:
935 raise UnknownFileTypeError('Empty file: ' + filename) # type: ignore
937 try:
938 return match_magic(data).name
939 except UnknownFileTypeError:
940 pass
942 format = None
943 if ext in extension2format:
944 format = extension2format[ext].name
946 if format is None and guess:
947 format = ext
948 if format is None:
949 # Do quick xyz check:
950 lines = data.splitlines()
951 if lines and lines[0].strip().isdigit():
952 return extension2format['xyz'].name
954 raise UnknownFileTypeError('Could not guess file type')
955 assert isinstance(format, str)
956 return format
959def index2range(index, length):
960 """Convert slice or integer to range.
962 If index is an integer, range will contain only that integer."""
963 obj = range(length)[index]
964 if isinstance(obj, numbers.Integral):
965 obj = range(obj, obj + 1)
966 return obj