Coverage for /builds/debichem-team/python-ase/ase/io/formats.py: 89.67%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""File formats.

3This module implements the read(), iread() and write() functions in ase.io.

4For each file format there is an IOFormat object.

6There is a dict, ioformats, which stores the objects.

8Example

9=======

11The xyz format is implemented in the ase/io/xyz.py file which has a

12read_xyz() generator and a write_xyz() function. This and other

13information can be obtained from ioformats['xyz'].

14"""

16import io

17import re

18import functools

19import inspect

20import os

21import sys

22import numbers

23import warnings

24from pathlib import Path, PurePath

25from typing import (

26 IO, List, Any, Iterable, Tuple, Union, Sequence, Dict, Optional)

28from ase.atoms import Atoms

29from importlib import import_module

30from ase.parallel import parallel_function, parallel_generator

33PEEK_BYTES = 50000

36class UnknownFileTypeError(Exception):

37 pass

40class IOFormat:

41 def __init__(self, name: str, desc: str, code: str, module_name: str,

42 encoding: str = None) -> None:

43 self.name = name

44 self.description = desc

45 assert len(code) == 2

46 assert code[0] in list('+1')

47 assert code[1] in list('BFS')

48 self.code = code

49 self.module_name = module_name

50 self.encoding = encoding

52 # (To be set by define_io_format())

53 self.extensions: List[str] = []

54 self.globs: List[str] = []

55 self.magic: List[str] = []

56 self.magic_regex: Optional[bytes] = None

58 def open(self, fname, mode: str = 'r') -> IO:

59 # We might want append mode, too

60 # We can allow more flags as needed (buffering etc.)

61 if mode not in list('rwa'):

62 raise ValueError("Only modes allowed are 'r', 'w', and 'a'")

63 if mode == 'r' and not self.can_read:

64 raise NotImplementedError('No reader implemented for {} format'

65 .format(self.name))

66 if mode == 'w' and not self.can_write:

67 raise NotImplementedError('No writer implemented for {} format'

68 .format(self.name))

69 if mode == 'a' and not self.can_append:

70 raise NotImplementedError('Appending not supported by {} format'

71 .format(self.name))

73 if self.isbinary:

74 mode += 'b'

76 path = Path(fname)

77 return path.open(mode, encoding=self.encoding)

79 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:

80 encoding = self.encoding

81 if encoding is None:

82 encoding = 'utf-8' # Best hacky guess.

84 if self.isbinary:

85 if isinstance(data, str):

86 data = data.encode(encoding)

87 else:

88 if isinstance(data, bytes):

89 data = data.decode(encoding)

91 return self._ioclass(data)

93 @property

94 def _ioclass(self):

95 if self.isbinary:

96 return io.BytesIO

97 else:

98 return io.StringIO

100 def parse_images(self, data: Union[str, bytes],

101 **kwargs) -> Sequence[Atoms]:

102 with self._buf_as_filelike(data) as fd:

103 outputs = self.read(fd, **kwargs)

104 if self.single:

105 assert isinstance(outputs, Atoms)

106 return [outputs]

107 else:

108 return list(self.read(fd, **kwargs))

109

110 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:

111 images = self.parse_images(data, **kwargs)

112 return images[-1]

113

114 @property

115 def can_read(self) -> bool:

116 return self._readfunc() is not None

117

118 @property

119 def can_write(self) -> bool:

120 return self._writefunc() is not None

121

122 @property

123 def can_append(self) -> bool:

124 writefunc = self._writefunc()

125 return self.can_write and 'append' in writefunc.__code__.co_varnames

126

127 def __repr__(self) -> str:

128 tokens = ['{}={}'.format(name, repr(value))

129 for name, value in vars(self).items()]

130 return 'IOFormat({})'.format(', '.join(tokens))

131

132 def __getitem__(self, i):

133 # For compatibility.

134 #

135 # Historically, the ioformats were listed as tuples

136 # with (description, code). We look like such a tuple.

137 return (self.description, self.code)[i]

138

139 @property

140 def single(self) -> bool:

141 """Whether this format is for a single Atoms object."""

142 return self.code[0] == '1'

143

144 @property

145 def _formatname(self) -> str:

146 return self.name.replace('-', '_')

147

148 def _readfunc(self):

149 return getattr(self.module, 'read_' + self._formatname, None)

150

151 def _writefunc(self):

152 return getattr(self.module, 'write_' + self._formatname, None)

153

154 @property

155 def read(self):

156 if not self.can_read:

157 self._warn_none('read')

158 return None

159

160 return self._read_wrapper

161

162 def _read_wrapper(self, *args, **kwargs):

163 function = self._readfunc()

164 if function is None:

165 self._warn_none('read')

166 return None

167 if not inspect.isgeneratorfunction(function):

168 function = functools.partial(wrap_read_function, function)

169 return function(*args, **kwargs)

170

171 def _warn_none(self, action):

172 msg = ('Accessing the IOFormat.{action} property on a format '

173 'without {action} support will change behaviour in the '

174 'future and return a callable instead of None. '

175 'Use IOFormat.can_{action} to check whether {action} '

176 'is supported.')

177 warnings.warn(msg.format(action=action), FutureWarning)

178

179 @property

180 def write(self):

181 if not self.can_write:

182 self._warn_none('write')

183 return None

184

185 return self._write_wrapper

186

187 def _write_wrapper(self, *args, **kwargs):

188 function = self._writefunc()

189 if function is None:

190 raise ValueError(f'Cannot write to {self.name}-format')

191 return function(*args, **kwargs)

192

193 @property

194 def modes(self) -> str:

195 modes = ''

196 if self.can_read:

197 modes += 'r'

198 if self.can_write:

199 modes += 'w'

200 return modes

201

202 def full_description(self) -> str:

203 lines = [f'Name: {self.name}',

204 f'Description: {self.description}',

205 f'Modes: {self.modes}',

206 f'Encoding: {self.encoding}',

207 f'Module: {self.module_name}',

208 f'Code: {self.code}',

209 f'Extensions: {self.extensions}',

210 f'Globs: {self.globs}',

211 f'Magic: {self.magic}']

212 return '\n'.join(lines)

213

214 @property

215 def acceptsfd(self) -> bool:

216 return self.code[1] != 'S'

217

218 @property

219 def isbinary(self) -> bool:

220 return self.code[1] == 'B'

221

222 @property

223 def module(self):

224 if not self.module_name.startswith('ase.io.'):

225 raise ValueError('Will only import modules from ase.io, '

226 'not {}'.format(self.module_name))

227 try:

228 return import_module(self.module_name)

229 except ImportError as err:

230 raise UnknownFileTypeError(

231 f'File format not recognized: {self.name}. Error: {err}')

232

233 def match_name(self, basename: str) -> bool:

234 from fnmatch import fnmatch

235 return any(fnmatch(basename, pattern)

236 for pattern in self.globs)

237

238 def match_magic(self, data: bytes) -> bool:

239 if self.magic_regex:

240 assert not self.magic, 'Define only one of magic and magic_regex'

241 match = re.match(self.magic_regex, data, re.M | re.S)

242 return match is not None

243

244 from fnmatch import fnmatchcase

245 return any(fnmatchcase(data, magic + b'*') # type: ignore

246 for magic in self.magic)

247

248

249ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.

250extension2format = {}

251

252

253all_formats = ioformats # Aliased for compatibility only. Please do not use.

254format2modulename = {} # Left for compatibility only.

255

256

257def define_io_format(name, desc, code, *, module=None, ext=None,

258 glob=None, magic=None, encoding=None,

259 magic_regex=None):

260 if module is None:

261 module = name.replace('-', '_')

262 format2modulename[name] = module

263

264 def normalize_patterns(strings):

265 if strings is None:

266 strings = []

267 elif isinstance(strings, (str, bytes)):

268 strings = [strings]

269 else:

270 strings = list(strings)

271 return strings

272

273 fmt = IOFormat(name, desc, code, module_name='ase.io.' + module,

274 encoding=encoding)

275 fmt.extensions = normalize_patterns(ext)

276 fmt.globs = normalize_patterns(glob)

277 fmt.magic = normalize_patterns(magic)

278

279 if magic_regex is not None:

280 fmt.magic_regex = magic_regex

281

282 for ext in fmt.extensions:

283 if ext in extension2format:

284 raise ValueError('extension "{}" already registered'.format(ext))

285 extension2format[ext] = fmt

286

287 ioformats[name] = fmt

288 return fmt

289

290

291def get_ioformat(name: str) -> IOFormat:

292 """Return ioformat object or raise appropriate error."""

293 if name not in ioformats:

294 raise UnknownFileTypeError(name)

295 fmt = ioformats[name]

296 # Make sure module is importable, since this could also raise an error.

297 fmt.module

298 return ioformats[name]

299

300

301# We define all the IO formats below. Each IO format has a code,

302# such as '1F', which defines some of the format's properties:

303#

304# 1=single atoms object

305# +=multiple atoms objects

306# F=accepts a file-descriptor

307# S=needs a file-name str

308# B=like F, but opens in binary mode

309

310F = define_io_format

311F('abinit-in', 'ABINIT input file', '1F',

312 module='abinit', magic=b'*znucl *')

313F('abinit-out', 'ABINIT output file', '1F',

314 module='abinit', magic=b'*.Version * of ABINIT')

315F('aims', 'FHI-aims geometry file', '1S', ext='in')

316F('aims-output', 'FHI-aims output', '+S',

317 module='aims', magic=b'*Invoking FHI-aims ...')

318F('bundletrajectory', 'ASE bundle trajectory', '+S')

319F('castep-castep', 'CASTEP output file', '+F',

320 module='castep', ext='castep')

321F('castep-cell', 'CASTEP geom file', '1F',

322 module='castep', ext='cell')

323F('castep-geom', 'CASTEP trajectory file', '+F',

324 module='castep', ext='geom')

325F('castep-md', 'CASTEP molecular dynamics file', '+F',

326 module='castep', ext='md')

327F('castep-phonon', 'CASTEP phonon file', '1F',

328 module='castep', ext='phonon')

329F('cfg', 'AtomEye configuration', '1F')

330F('cif', 'CIF-file', '+B', ext='cif')

331F('cmdft', 'CMDFT-file', '1F', glob='*I_info')

332F('cml', 'Chemical json file', '1F', ext='cml')

333F('cp2k-dcd', 'CP2K DCD file', '+B',

334 module='cp2k', ext='dcd')

335F('cp2k-restart', 'CP2K restart file', '1F',

336 module='cp2k', ext='restart')

337F('crystal', 'Crystal fort.34 format', '1F',

338 ext=['f34', '34'], glob=['f34', '34'])

339F('cube', 'CUBE file', '1F', ext='cube')

340F('dacapo-text', 'Dacapo text output', '1F',

341 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')

342F('db', 'ASE SQLite database file', '+S')

343F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')

344F('dlp4', 'DL_POLY_4 CONFIG file', '1F',

345 module='dlp4', ext='config', glob=['*CONFIG*'])

346F('dlp-history', 'DL_POLY HISTORY file', '+F',

347 module='dlp4', glob='HISTORY')

348F('dmol-arc', 'DMol3 arc file', '+S',

349 module='dmol')

350F('dmol-car', 'DMol3 structure file', '1S',

351 module='dmol', ext='car')

352F('dmol-incoor', 'DMol3 structure file', '1S',

353 module='dmol')

354F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',

355 glob=['GEOMETRY.OUT'])

356F('elk-in', 'ELK input file', '1F', module='elk')

357F('eon', 'EON CON file', '+F',

358 ext='con')

359F('eps', 'Encapsulated Postscript', '1S')

360F('espresso-in', 'Quantum espresso in file', '1F',

361 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])

362F('espresso-out', 'Quantum espresso out file', '+F',

363 module='espresso', ext=['out', 'pwo'], magic=b'*Program PWSCF')

364F('exciting', 'exciting input', '1F', glob='input.xml')

365F('extxyz', 'Extended XYZ file', '+F', ext='xyz')

366F('findsym', 'FINDSYM-format', '+F')

367F('gamess-us-out', 'GAMESS-US output file', '1F',

368 module='gamess_us', magic=b'*GAMESS')

369F('gamess-us-in', 'GAMESS-US input file', '1F',

370 module='gamess_us')

371F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',

372 module='gamess_us', magic=b' $DATA', ext='dat')

373F('gaussian-in', 'Gaussian com (input) file', '1F',

374 module='gaussian', ext=['com', 'gjf'])

375F('gaussian-out', 'Gaussian output file', '+F',

376 module='gaussian', ext='log', magic=b'*Entering Gaussian System')

377F('acemolecule-out', 'ACE output file', '1S',

378 module='acemolecule')

379F('acemolecule-input', 'ACE input file', '1S',

380 module='acemolecule')

381F('gen', 'DFTBPlus GEN format', '1F')

382F('gif', 'Graphics interchange format', '+S',

383 module='animation')

384F('gpaw-out', 'GPAW text output', '+F',

385 magic=b'* ___ ___ ___ _ _ _')

386F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')

387F('gpw', 'GPAW restart-file', '1S',

388 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])

389F('gromacs', 'Gromacs coordinates', '1F',

390 ext='gro')

391F('gromos', 'Gromos96 geometry file', '1F', ext='g96')

392F('html', 'X3DOM HTML', '1F', module='x3d')

393F('json', 'ASE JSON database file', '+F', ext='json', module='db')

394F('jsv', 'JSV file format', '1F')

395F('lammps-dump-text', 'LAMMPS text dump file', '+F',

396 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')

397F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',

398 module='lammpsrun')

399F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',

400 encoding='ascii')

401F('magres', 'MAGRES ab initio NMR data file', '1F')

402F('mol', 'MDL Molfile', '1F')

403F('mp4', 'MP4 animation', '+S',

404 module='animation')

405F('mustem', 'muSTEM xtl file', '1F',

406 ext='xtl')

407F('mysql', 'ASE MySQL database file', '+S',

408 module='db')

409F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',

410 magic=b'CDF')

411F('nomad-json', 'JSON from Nomad archive', '+F',

412 ext='nomad-json')

413F('nwchem-in', 'NWChem input file', '1F',

414 module='nwchem', ext='nwi')

415F('nwchem-out', 'NWChem output file', '+F',

416 module='nwchem', ext='nwo',

417 magic=b'*Northwest Computational Chemistry Package')

418F('octopus-in', 'Octopus input file', '1F',

419 module='octopus', glob='inp')

420F('proteindatabank', 'Protein Data Bank', '+F',

421 ext='pdb')

422F('png', 'Portable Network Graphics', '1B')

423F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')

424F('pov', 'Persistance of Vision', '1S')

425# prismatic: Should have ext='xyz' if/when multiple formats can have the same

426# extension

427F('prismatic', 'prismatic and computem XYZ-file', '1F')

428F('py', 'Python file', '+F')

429F('sys', 'qball sys file', '1F')

430F('qbox', 'QBOX output file', '+F',

431 magic=b'*:simulation xmlns:')

432F('res', 'SHELX format', '1S', ext='shelx')

433F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')

434F('sdf', 'SDF format', '1F')

435F('siesta-xv', 'Siesta .XV file', '1F',

436 glob='*.XV', module='siesta')

437F('struct', 'WIEN2k structure file', '1S', module='wien2k')

438F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')

439F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',

440 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])

441F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',

442 magic=b'$coord')

443F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',

444 module='turbomole', glob='gradient', magic=b'$grad')

445F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')

446F('vasp', 'VASP POSCAR/CONTCAR', '1F',

447 ext='poscar', glob=['*POSCAR*', '*CONTCAR*'])

448F('vasp-out', 'VASP OUTCAR file', '+F',

449 module='vasp', glob='*OUTCAR*')

450F('vasp-xdatcar', 'VASP XDATCAR file', '+F',

451 module='vasp', glob='*XDATCAR*')

452F('vasp-xml', 'VASP vasprun.xml file', '+F',

453 module='vasp', glob='*vasp*.xml')

454F('vti', 'VTK XML Image Data', '1F', module='vtkxml')

455F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')

456F('wout', 'Wannier90 output', '1F', module='wannier90')

457F('x3d', 'X3D', '1S')

458F('xsd', 'Materials Studio file', '1F')

459F('xsf', 'XCrySDen Structure File', '+F',

460 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',

461 b'*\nMOLECULE', b'*\nATOMS'])

462F('xtd', 'Materials Studio file', '+F')

463# xyz: No `ext='xyz'` in the definition below.

464# The .xyz files are handled by the extxyz module by default.

465F('xyz', 'XYZ-file', '+F')

466

467

468def get_compression(filename: str) -> Tuple[str, Optional[str]]:

469 """

470 Parse any expected file compression from the extension of a filename.

471 Return the filename without the extension, and the extension. Recognises

472 ``.gz``, ``.bz2``, ``.xz``.

473

474 >>> get_compression('H2O.pdb.gz')

475 ('H2O.pdb', 'gz')

476 >>> get_compression('crystal.cif')

477 ('crystal.cif', None)

478

479 Parameters

480 ==========

481 filename: str

482 Full filename including extension.

483

484 Returns

485 =======

486 (root, extension): (str, str or None)

487 Filename split into root without extension, and the extension

488 indicating compression format. Will not split if compression

489 is not recognised.

490 """

491 # Update if anything is added

492 valid_compression = ['gz', 'bz2', 'xz']

493

494 # Use stdlib as it handles most edge cases

495 root, compression = os.path.splitext(filename)

496

497 # extension keeps the '.' so remember to remove it

498 if compression.strip('.') in valid_compression:

499 return root, compression.strip('.')

500 else:

501 return filename, None

502

503

504def open_with_compression(filename: str, mode: str = 'r') -> IO:

505 """

506 Wrapper around builtin `open` that will guess compression of a file

507 from the filename and open it for reading or writing as if it were

508 a standard file.

509

510 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).

511

512 Supported modes are:

513 * 'r', 'rt', 'w', 'wt' for text mode read and write.

514 * 'rb, 'wb' for binary read and write.

515

516 Parameters

517 ==========

518 filename: str

519 Path to the file to open, including any extensions that indicate

520 the compression used.

521 mode: str

522 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.

523

524 Returns

525 =======

526 fd: file

527 File-like object open with the specified mode.

528 """

529

530 # Compressed formats sometimes default to binary, so force text mode.

531 if mode == 'r':

532 mode = 'rt'

533 elif mode == 'w':

534 mode = 'wt'

535 elif mode == 'a':

536 mode = 'at'

537

538 root, compression = get_compression(filename)

539

540 if compression == 'gz':

541 import gzip

542 return gzip.open(filename, mode=mode) # type: ignore

543 elif compression == 'bz2':

544 import bz2

545 return bz2.open(filename, mode=mode)

546 elif compression == 'xz':

547 import lzma

548 return lzma.open(filename, mode)

549 else:

550 # Either None or unknown string

551 return open(filename, mode)

552

553

554def wrap_read_function(read, filename, index=None, **kwargs):

555 """Convert read-function to generator."""

556 if index is None:

557 yield read(filename, **kwargs)

558 else:

559 for atoms in read(filename, index, **kwargs):

560 yield atoms

561

562

563NameOrFile = Union[str, PurePath, IO]

564

565

566def write(

567 filename: NameOrFile,

568 images: Union[Atoms, Sequence[Atoms]],

569 format: str = None,

570 parallel: bool = True,

571 append: bool = False,

572 **kwargs: dict

573) -> None:

574 """Write Atoms object(s) to file.

575

576 filename: str or file

577 Name of the file to write to or a file descriptor. The name '-'

578 means standard output.

579 images: Atoms object or list of Atoms objects

580 A single Atoms object or a list of Atoms objects.

581 format: str

582 Used to specify the file-format. If not given, the

583 file-format will be taken from suffix of the filename.

584 parallel: bool

585 Default is to write on master only. Use parallel=False to write

586 from all slaves.

587 append: bool

588 Default is to open files in 'w' or 'wb' mode, overwriting

589 existing files. In some cases opening the file in 'a' or 'ab'

590 mode (appending) is useful,

591 e.g. writing trajectories or saving multiple Atoms objects in one file.

592 WARNING: If the file format does not support multiple entries without

593 additional keywords/headers, files created using 'append=True'

594 might not be readable by any program! They will nevertheless be

595 written without error message.

596

597 The use of additional keywords is format specific. write() may

598 return an object after writing certain formats, but this behaviour

599 may change in the future.

600

601 """

602

603 if isinstance(filename, PurePath):

604 filename = str(filename)

605

606 if isinstance(filename, str):

607 fd = None

608 if filename == '-':

609 fd = sys.stdout

610 filename = None # type: ignore

611 elif format is None:

612 format = filetype(filename, read=False)

613 assert isinstance(format, str)

614 else:

615 fd = filename # type: ignore

616 if format is None:

617 try:

618 format = filetype(filename, read=False)

619 assert isinstance(format, str)

620 except UnknownFileTypeError:

621 format = None

622 filename = None # type: ignore

623

624 format = format or 'json' # default is json

625

626 io = get_ioformat(format)

627

628 return _write(filename, fd, format, io, images,

629 parallel=parallel, append=append, **kwargs)

630

631

632@parallel_function

633def _write(filename, fd, format, io, images, parallel=None, append=False,

634 **kwargs):

635 if isinstance(images, Atoms):

636 images = [images]

637

638 if io.single:

639 if len(images) > 1:

640 raise ValueError('{}-format can only store 1 Atoms object.'

641 .format(format))

642 images = images[0]

643

644 if not io.can_write:

645 raise ValueError("Can't write to {}-format".format(format))

646

647 # Special case for json-format:

648 if format == 'json' and (len(images) > 1 or append):

649 if filename is not None:

650 return io.write(filename, images, append=append, **kwargs)

651 raise ValueError("Can't write more than one image to file-descriptor "

652 'using json-format.')

653

654 if io.acceptsfd:

655 open_new = (fd is None)

656 try:

657 if open_new:

658 mode = 'wb' if io.isbinary else 'w'

659 if append:

660 mode = mode.replace('w', 'a')

661 fd = open_with_compression(filename, mode)

662 # XXX remember to re-enable compressed open

663 # fd = io.open(filename, mode)

664 return io.write(fd, images, **kwargs)

665 finally:

666 if open_new and fd is not None:

667 fd.close()

668 else:

669 if fd is not None:

670 raise ValueError("Can't write {}-format to file-descriptor"

671 .format(format))

672 if io.can_append:

673 return io.write(filename, images, append=append, **kwargs)

674 elif append:

675 raise ValueError("Cannot append to {}-format, write-function "

676 "does not support the append keyword."

677 .format(format))

678 else:

679 return io.write(filename, images, **kwargs)

680

681

682def read(

683 filename: NameOrFile,

684 index: Any = None,

685 format: str = None,

686 parallel: bool = True,

687 do_not_split_by_at_sign: bool = False,

688 **kwargs

689) -> Union[Atoms, List[Atoms]]:

690 """Read Atoms object(s) from file.

691

692 filename: str or file

693 Name of the file to read from or a file descriptor.

694 index: int, slice or str

695 The last configuration will be returned by default. Examples:

696

697 * ``index=0``: first configuration

698 * ``index=-2``: second to last

699 * ``index=':'`` or ``index=slice(None)``: all

700 * ``index='-3:'`` or ``index=slice(-3, None)``: three last

701 * ``index='::2'`` or ``index=slice(0, None, 2)``: even

702 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd

703 format: str

704 Used to specify the file-format. If not given, the

705 file-format will be guessed by the *filetype* function.

706 parallel: bool

707 Default is to read on master and broadcast to slaves. Use

708 parallel=False to read on all slaves.

709 do_not_split_by_at_sign: bool

710 If False (default) ``filename`` is splited by at sign ``@``

711

712 Many formats allow on open file-like object to be passed instead

713 of ``filename``. In this case the format cannot be auto-decected,

714 so the ``format`` argument should be explicitly given."""

715

716 if isinstance(filename, PurePath):

717 filename = str(filename)

718 if filename == '-':

719 filename = sys.stdin

720 if isinstance(index, str):

721 try:

722 index = string2index(index)

723 except ValueError:

724 pass

725

726 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

727 if index is None:

728 index = -1

729 format = format or filetype(filename, read=isinstance(filename, str))

730

731 io = get_ioformat(format)

732 if isinstance(index, (slice, str)):

733 return list(_iread(filename, index, format, io, parallel=parallel,

734 **kwargs))

735 else:

736 return next(_iread(filename, slice(index, None), format, io,

737 parallel=parallel, **kwargs))

738

739

740def iread(

741 filename: NameOrFile,

742 index: Any = None,

743 format: str = None,

744 parallel: bool = True,

745 do_not_split_by_at_sign: bool = False,

746 **kwargs

747) -> Iterable[Atoms]:

748 """Iterator for reading Atoms objects from file.

749

750 Works as the `read` function, but yields one Atoms object at a time

751 instead of all at once."""

752

753 if isinstance(filename, PurePath):

754 filename = str(filename)

755

756 if isinstance(index, str):

757 index = string2index(index)

758

759 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

760

761 if index is None or index == ':':

762 index = slice(None, None, None)

763

764 if not isinstance(index, (slice, str)):

765 index = slice(index, (index + 1) or None)

766

767 format = format or filetype(filename, read=isinstance(filename, str))

768 io = get_ioformat(format)

769

770 for atoms in _iread(filename, index, format, io, parallel=parallel,

771 **kwargs):

772 yield atoms

773

774

775@parallel_generator

776def _iread(filename, index, format, io, parallel=None, full_output=False,

777 **kwargs):

778

779 if not io.can_read:

780 raise ValueError("Can't read from {}-format".format(format))

781

782 if io.single:

783 start = index.start

784 assert start is None or start == 0 or start == -1

785 args = ()

786 else:

787 args = (index,)

788

789 must_close_fd = False

790 if isinstance(filename, str):

791 if io.acceptsfd:

792 mode = 'rb' if io.isbinary else 'r'

793 fd = open_with_compression(filename, mode)

794 must_close_fd = True

795 else:

796 fd = filename

797 else:

798 assert io.acceptsfd

799 fd = filename

800

801 # Make sure fd is closed in case loop doesn't finish:

802 try:

803 for dct in io.read(fd, *args, **kwargs):

804 if not isinstance(dct, dict):

805 dct = {'atoms': dct}

806 if full_output:

807 yield dct

808 else:

809 yield dct['atoms']

810 finally:

811 if must_close_fd:

812 fd.close()

813

814

815def parse_filename(filename, index=None, do_not_split_by_at_sign=False):

816 if not isinstance(filename, str):

817 return filename, index

818

819 basename = os.path.basename(filename)

820 if do_not_split_by_at_sign or '@' not in basename:

821 return filename, index

822

823 newindex = None

824 newfilename, newindex = filename.rsplit('@', 1)

825

826 if isinstance(index, slice):

827 return newfilename, index

828 try:

829 newindex = string2index(newindex)

830 except ValueError:

831 warnings.warn('Can not parse index for path \n'

832 ' "%s" \nConsider set '

833 'do_not_split_by_at_sign=True \nif '

834 'there is no index.' % filename)

835 return newfilename, newindex

836

837

838def match_magic(data: bytes) -> IOFormat:

839 data = data[:PEEK_BYTES]

840 for ioformat in ioformats.values():

841 if ioformat.match_magic(data):

842 return ioformat

843 raise UnknownFileTypeError('Cannot guess file type from contents')

844

845

846def string2index(string: str) -> Union[int, slice, str]:

847 """Convert index string to either int or slice"""

848 if ':' not in string:

849 # may contain database accessor

850 try:

851 return int(string)

852 except ValueError:

853 return string

854 i: List[Optional[int]] = []

855 for s in string.split(':'):

856 if s == '':

857 i.append(None)

858 else:

859 i.append(int(s))

860 i += (3 - len(i)) * [None]

861 return slice(*i)

862

863

864def filetype(

865 filename: NameOrFile,

866 read: bool = True,

867 guess: bool = True,

868) -> str:

869 """Try to guess the type of the file.

870

871 First, special signatures in the filename will be checked for. If that

872 does not identify the file type, then the first 2000 bytes of the file

873 will be read and analysed. Turn off this second part by using

874 read=False.

875

876 Can be used from the command-line also::

877

878 $ ase info filename ...

879 """

880

881 orig_filename = filename

882 if hasattr(filename, 'name'):

883 filename = filename.name # type: ignore

884

885 ext = None

886 if isinstance(filename, str):

887 if os.path.isdir(filename):

888 if os.path.basename(os.path.normpath(filename)) == 'states':

889 return 'eon'

890 return 'bundletrajectory'

891

892 if filename.startswith('postgres'):

893 return 'postgresql'

894

895 if filename.startswith('mysql') or filename.startswith('mariadb'):

896 return 'mysql'

897

898 # strip any compression extensions that can be read

899 root, compression = get_compression(filename)

900 basename = os.path.basename(root)

901

902 if '.' in basename:

903 ext = os.path.splitext(basename)[1].strip('.').lower()

904

905 for fmt in ioformats.values():

906 if fmt.match_name(basename):

907 return fmt.name

908

909 if not read:

910 if ext is None:

911 raise UnknownFileTypeError('Could not guess file type')

912 ioformat = extension2format.get(ext)

913 if ioformat:

914 return ioformat.name

915

916 # askhl: This is strange, we don't know if ext is a format:

917 return ext

918

919 if orig_filename == filename:

920 fd = open_with_compression(filename, 'rb')

921 else:

922 fd = orig_filename # type: ignore

923 else:

924 fd = filename # type: ignore

925 if fd is sys.stdin:

926 return 'json'

927

928 data = fd.read(PEEK_BYTES)

929 if fd is not filename:

930 fd.close()

931 else:

932 fd.seek(0)

933

934 if len(data) == 0:

935 raise UnknownFileTypeError('Empty file: ' + filename) # type: ignore

936

937 try:

938 return match_magic(data).name

939 except UnknownFileTypeError:

940 pass

941

942 format = None

943 if ext in extension2format:

944 format = extension2format[ext].name

945

946 if format is None and guess:

947 format = ext

948 if format is None:

949 # Do quick xyz check:

950 lines = data.splitlines()

951 if lines and lines[0].strip().isdigit():

952 return extension2format['xyz'].name

953

954 raise UnknownFileTypeError('Could not guess file type')

955 assert isinstance(format, str)

956 return format

957

958

959def index2range(index, length):

960 """Convert slice or integer to range.

961

962 If index is an integer, range will contain only that integer."""

963 obj = range(length)[index]

964 if isinstance(obj, numbers.Integral):

965 obj = range(obj, obj + 1)

966 return obj

Coverage for /builds/debichem-team/python-ase/ase/io/formats.py : 89.67%

523 statements

Coverage for /builds/debichem-team/python-ase/ase/io/formats.py : 89.67%

523 statements 469 run 54 missing 0 excluded

523 statements