Coverage for /builds/debichem-team/python-ase/ase/io/extxyz.py: 81.90%

1"""

2Extended XYZ support

4Read/write files in "extended" XYZ format, storing additional

5per-configuration information as key-value pairs on the XYZ

6comment line, and additional per-atom properties as extra columns.

8Contributed by James Kermode <james.kermode@gmail.com>

9"""

10import json

11import numbers

12import re

13import warnings

14from io import StringIO, UnsupportedOperation

16import numpy as np

18from ase.atoms import Atoms

19from ase.calculators.singlepoint import SinglePointCalculator

20from ase.constraints import FixAtoms, FixCartesian

21from ase.io.formats import index2range

22from ase.io.utils import ImageIterator

23from ase.outputs import ArrayProperty, all_outputs

24from ase.spacegroup.spacegroup import Spacegroup

25from ase.stress import voigt_6_to_full_3x3_stress

26from ase.utils import reader, writer

28__all__ = ['read_xyz', 'write_xyz', 'iread_xyz']

30PROPERTY_NAME_MAP = {'positions': 'pos',

31 'numbers': 'Z',

32 'charges': 'charge',

33 'symbols': 'species'}

35REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(),

36 PROPERTY_NAME_MAP.keys()))

38KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)'

39 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*')

40KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*='

41 + r'\s*([^\s]+)\s*')

42KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*')

44UNPROCESSED_KEYS = {'uid'}

46SPECIAL_3_3_KEYS = {'Lattice', 'virial', 'stress'}

48# 'per-atom' and 'per-config'

49per_atom_properties = []

50per_config_properties = []

51for key, val in all_outputs.items():

52 if isinstance(val, ArrayProperty) and val.shapespec[0] == 'natoms':

53 per_atom_properties.append(key)

54 else:

55 per_config_properties.append(key)

58def key_val_str_to_dict(string, sep=None):

59 """

60 Parse an xyz properties string in a key=value and return a dict with

61 various values parsed to native types.

63 Accepts brackets or quotes to delimit values. Parses integers, floats

64 booleans and arrays thereof. Arrays with 9 values whose name is listed

65 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering.

67 If sep is None, string will split on whitespace, otherwise will split

68 key value pairs with the given separator.

70 """

71 # store the closing delimiters to match opening ones

72 delimiters = {

73 "'": "'",

74 '"': '"',

75 '{': '}',

76 '[': ']'

77 }

79 # Make pairs and process afterwards

80 kv_pairs = [

81 [[]]] # List of characters for each entry, add a new list for new value

82 cur_delimiter = None # push and pop closing delimiters

83 escaped = False # add escaped sequences verbatim

85 # parse character-by-character unless someone can do nested brackets

86 # and escape sequences in a regex

87 for char in string.strip():

88 if escaped: # bypass everything if escaped

89 kv_pairs[-1][-1].append(char)

90 escaped = False

91 elif char == '\\': # escape the next thing

92 escaped = True

93 elif cur_delimiter: # inside brackets

94 if char == cur_delimiter: # found matching delimiter

95 cur_delimiter = None

96 else:

97 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim

98 elif char in delimiters:

99 cur_delimiter = delimiters[char] # brackets or quotes

100 elif (sep is None and char.isspace()) or char == sep:

101 if kv_pairs == [[[]]]: # empty, beginning of string

102 continue

103 elif kv_pairs[-1][-1] == []:

104 continue

105 else:

106 kv_pairs.append([[]])

107 elif char == '=':

108 if kv_pairs[-1] == [[]]:

109 del kv_pairs[-1]

110 kv_pairs[-1].append([]) # value

111 else:

112 kv_pairs[-1][-1].append(char)

113

114 kv_dict = {}

115

116 for kv_pair in kv_pairs:

117 if len(kv_pair) == 0: # empty line

118 continue

119 elif len(kv_pair) == 1: # default to True

120 key, value = ''.join(kv_pair[0]), 'T'

121 else: # Smush anything else with kv-splitter '=' between them

122 key, value = ''.join(kv_pair[0]), '='.join(

123 ''.join(x) for x in kv_pair[1:])

124

125 if key.lower() not in UNPROCESSED_KEYS:

126 # Try to convert to (arrays of) floats, ints

127 split_value = re.findall(r'[^\s,]+', value)

128 try:

129 try:

130 numvalue = np.array(split_value, dtype=int)

131 except (ValueError, OverflowError):

132 # don't catch errors here so it falls through to bool

133 numvalue = np.array(split_value, dtype=float)

134 if len(numvalue) == 1:

135 numvalue = numvalue[0] # Only one number

136 value = numvalue

137 except (ValueError, OverflowError):

138 pass # value is unchanged

139

140 # convert special 3x3 matrices

141 if key in SPECIAL_3_3_KEYS:

142 if not isinstance(value, np.ndarray) or value.shape != (9,):

143 raise ValueError("Got info item {}, expecting special 3x3 "

144 "matrix, but value is not in the form of "

145 "a 9-long numerical vector".format(key))

146 value = np.array(value).reshape((3, 3), order='F')

147

148 # parse special strings as boolean or JSON

149 if isinstance(value, str):

150 # Parse boolean values:

151 # T or [tT]rue or TRUE -> True

152 # F or [fF]alse or FALSE -> False

153 # For list: 'T T F' -> [True, True, False]

154 # Cannot use `.lower()` to reduce `str_to_bool` mapping because

155 # 't'/'f' not accepted

156 str_to_bool = {

157 'T': True, 'F': False, 'true': True, 'false': False,

158 'True': True, 'False': False, 'TRUE': True, 'FALSE': False

159 }

160 try:

161 boolvalue = [str_to_bool[vpart] for vpart in

162 re.findall(r'[^\s,]+', value)]

163

164 if len(boolvalue) == 1:

165 value = boolvalue[0]

166 else:

167 value = boolvalue

168 except KeyError:

169 # Try to parse JSON

170 if value.startswith("_JSON "):

171 d = json.loads(value.replace("_JSON ", "", 1))

172 value = np.array(d)

173 if value.dtype.kind not in ['i', 'f', 'b']:

174 value = d

175

176 kv_dict[key] = value

177

178 return kv_dict

179

180

181def key_val_str_to_dict_regex(s):

182 """

183 Parse strings in the form 'key1=value1 key2="quoted value"'

184 """

185 d = {}

186 s = s.strip()

187 while True:

188 # Match quoted string first, then fall through to plain key=value

189 m = KEY_QUOTED_VALUE.match(s)

190 if m is None:

191 m = KEY_VALUE.match(s)

192 if m is not None:

193 s = KEY_VALUE.sub('', s, 1)

194 else:

195 # Just a key with no value

196 m = KEY_RE.match(s)

197 if m is not None:

198 s = KEY_RE.sub('', s, 1)

199 else:

200 s = KEY_QUOTED_VALUE.sub('', s, 1)

201

202 if m is None:

203 break # No more matches

204

205 key = m.group(1)

206 try:

207 value = m.group(2)

208 except IndexError:

209 # default value is 'T' (True)

210 value = 'T'

211

212 if key.lower() not in UNPROCESSED_KEYS:

213 # Try to convert to (arrays of) floats, ints

214 try:

215 numvalue = []

216 for x in value.split():

217 if x.find('.') == -1:

218 numvalue.append(int(float(x)))

219 else:

220 numvalue.append(float(x))

221 if len(numvalue) == 1:

222 numvalue = numvalue[0] # Only one number

223 elif len(numvalue) == 9:

224 # special case: 3x3 matrix, fortran ordering

225 numvalue = np.array(numvalue).reshape((3, 3), order='F')

226 else:

227 numvalue = np.array(numvalue) # vector

228 value = numvalue

229 except (ValueError, OverflowError):

230 pass

231

232 # Parse boolean values: 'T' -> True, 'F' -> False,

233 # 'T T F' -> [True, True, False]

234 if isinstance(value, str):

235 str_to_bool = {'T': True, 'F': False}

236

237 if len(value.split()) > 1:

238 if all(x in str_to_bool for x in value.split()):

239 value = [str_to_bool[x] for x in value.split()]

240 elif value in str_to_bool:

241 value = str_to_bool[value]

243 d[key] = value

245 return d

248def escape(string):

249 if (' ' in string or

250 '"' in string or "'" in string or

251 '{' in string or '}' in string or

252 '[' in string or ']' in string):

253 string = string.replace('"', '\\"')

254 string = f'"{string}"'

255 return string

256

257

258def key_val_dict_to_str(dct, sep=' '):

259 """

260 Convert atoms.info dictionary to extended XYZ string representation

261 """

262

263 def array_to_string(key, val):

264 # some ndarrays are special (special 3x3 keys, and scalars/vectors of

265 # numbers or bools), handle them here

266 if key in SPECIAL_3_3_KEYS:

267 # special 3x3 matrix, flatten in Fortran order

268 val = val.reshape(val.size, order='F')

269 if val.dtype.kind in ['i', 'f', 'b']:

270 # numerical or bool scalars/vectors are special, for backwards

271 # compat.

272 if len(val.shape) == 0:

273 # scalar

274 val = str(known_types_to_str(val))

275 elif len(val.shape) == 1:

276 # vector

277 val = ' '.join(str(known_types_to_str(v)) for v in val)

278 return val

279

280 def known_types_to_str(val):

281 if isinstance(val, (bool, np.bool_)):

282 return 'T' if val else 'F'

283 elif isinstance(val, numbers.Real):

284 return f'{val}'

285 elif isinstance(val, Spacegroup):

286 return val.symbol

287 else:

288 return val

289

290 if len(dct) == 0:

291 return ''

292

293 string = ''

294 for key in dct:

295 val = dct[key]

296

297 if isinstance(val, np.ndarray):

298 val = array_to_string(key, val)

299 else:

300 # convert any known types to string

301 val = known_types_to_str(val)

302

303 if val is not None and not isinstance(val, str):

304 # what's left is an object, try using JSON

305 if isinstance(val, np.ndarray):

306 val = val.tolist()

307 try:

308 val = '_JSON ' + json.dumps(val)

309 # if this fails, let give up

310 except TypeError:

311 warnings.warn('Skipping unhashable information '

312 '{}'.format(key))

313 continue

314

315 key = escape(key) # escape and quote key

316 eq = "="

317 # Should this really be setting empty value that's going to be

318 # interpreted as bool True?

319 if val is None:

320 val = ""

321 eq = ""

322 val = escape(val) # escape and quote val

323

324 string += f'{key}{eq}{val}{sep}'

325

326 return string.strip()

327

328

329def parse_properties(prop_str):

330 """

331 Parse extended XYZ properties format string

332

333 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3".

334 NAME is the name of the property.

335 TYPE is one of R, I, S, L for real, integer, string and logical.

336 NCOLS is number of columns for that property.

337 """

338

339 properties = {}

340 properties_list = []

341 dtypes = []

342 converters = []

343

344 fields = prop_str.split(':')

345

346 def parse_bool(x):

347 """

348 Parse bool to string

349 """

350 return {'T': True, 'F': False,

351 'True': True, 'False': False}.get(x)

352

353 fmt_map = {'R': ('d', float),

354 'I': ('i', int),

355 'S': (object, str),

356 'L': ('bool', parse_bool)}

357

358 for name, ptype, cols in zip(fields[::3],

359 fields[1::3],

360 [int(x) for x in fields[2::3]]):

361 if ptype not in ('R', 'I', 'S', 'L'):

362 raise ValueError('Unknown property type: ' + ptype)

363 ase_name = REV_PROPERTY_NAME_MAP.get(name, name)

364

365 dtype, converter = fmt_map[ptype]

366 if cols == 1:

367 dtypes.append((name, dtype))

368 converters.append(converter)

369 else:

370 for c in range(cols):

371 dtypes.append((name + str(c), dtype))

372 converters.append(converter)

373

374 properties[name] = (ase_name, cols)

375 properties_list.append(name)

376

377 dtype = np.dtype(dtypes)

378 return properties, properties_list, dtype, converters

379

380

381def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict,

382 nvec=0):

383 # comment line

384 line = next(lines).strip()

385 if nvec > 0:

386 info = {'comment': line}

387 else:

388 info = properties_parser(line) if line else {}

389

390 pbc = None

391 if 'pbc' in info:

392 pbc = info.pop('pbc')

393 elif 'Lattice' in info:

394 # default pbc for extxyz file containing Lattice

395 # is True in all directions

396 pbc = [True, True, True]

397 elif nvec > 0:

398 # cell information given as pseudo-Atoms

399 pbc = [False, False, False]

400

401 cell = None

402 if 'Lattice' in info:

403 # NB: ASE cell is transpose of extended XYZ lattice

404 cell = info['Lattice'].T

405 del info['Lattice']

406 elif nvec > 0:

407 # cell information given as pseudo-Atoms

408 cell = np.zeros((3, 3))

409

410 if 'Properties' not in info:

411 # Default set of properties is atomic symbols and positions only

412 info['Properties'] = 'species:S:1:pos:R:3'

413 properties, names, dtype, convs = parse_properties(info['Properties'])

414 del info['Properties']

415

416 data = []

417 for _ in range(natoms):

418 try:

419 line = next(lines)

420 except StopIteration:

421 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}'

422 .format(len(data), natoms))

423 vals = line.split()

424 row = tuple(conv(val) for conv, val in zip(convs, vals))

425 data.append(row)

426

427 try:

428 data = np.array(data, dtype)

429 except TypeError:

430 raise XYZError('Badly formatted data '

431 'or end of file reached before end of frame')

432

433 # Read VEC entries if present

434 if nvec > 0:

435 for ln in range(nvec):

436 try:

437 line = next(lines)

438 except StopIteration:

439 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, '

440 'expected {}'.format(len(cell), nvec))

441 entry = line.split()

442

443 if not entry[0].startswith('VEC'):

444 raise XYZError(f'Expected cell vector, got {entry[0]}')

445

446 try:

447 n = int(entry[0][3:])

448 except ValueError as e:

449 raise XYZError('Expected VEC{}, got VEC{}'

450 .format(ln + 1, entry[0][3:])) from e

451 if n != ln + 1:

452 raise XYZError('Expected VEC{}, got VEC{}'

453 .format(ln + 1, n))

454

455 cell[ln] = np.array([float(x) for x in entry[1:]])

456 pbc[ln] = True

457 if nvec != pbc.count(True):

458 raise XYZError('Problem with number of cell vectors')

459 pbc = tuple(pbc)

460

461 arrays = {}

462 for name in names:

463 ase_name, cols = properties[name]

464 if cols == 1:

465 value = data[name]

466 else:

467 value = np.vstack([data[name + str(c)]

468 for c in range(cols)]).T

469 arrays[ase_name] = value

470

471 numbers = arrays.pop('numbers', None)

472 symbols = arrays.pop('symbols', None)

473

474 if symbols is not None:

475 symbols = [s.capitalize() for s in symbols]

476

477 atoms = Atoms(numbers if numbers is not None else symbols,

478 positions=arrays.pop('positions', None),

479 charges=arrays.pop('initial_charges', None),

480 cell=cell,

481 pbc=pbc,

482 info=info)

483

484 # Read and set constraints

485 if 'move_mask' in arrays:

486 move_mask = arrays.pop('move_mask').astype(bool)

487 if properties['move_mask'][1] == 3:

488 cons = []

489 for a in range(natoms):

490 cons.append(FixCartesian(a, mask=~move_mask[a, :]))

491 atoms.set_constraint(cons)

492 elif properties['move_mask'][1] == 1:

493 atoms.set_constraint(FixAtoms(mask=~move_mask))

494 else:

495 raise XYZError('Not implemented constraint')

496

497 set_calc_and_arrays(atoms, arrays)

498 return atoms

499

500

501def set_calc_and_arrays(atoms, arrays):

502 results = {}

503

504 for name, array in arrays.items():

505 if name in all_outputs:

506 results[name] = array

507 else:

508 atoms.new_array(name, array)

509

510 for key in list(atoms.info):

511 if key in per_config_properties:

512 results[key] = atoms.info.pop(key)

513 # special case for stress- convert to Voigt 6-element form

514 if key == 'stress' and results[key].shape == (3, 3):

515 stress = results[key]

516 stress = np.array([stress[0, 0],

517 stress[1, 1],

518 stress[2, 2],

519 stress[1, 2],

520 stress[0, 2],

521 stress[0, 1]])

522 results[key] = stress

523

524 if results:

525 atoms.calc = SinglePointCalculator(atoms, **results)

526

527

528class XYZError(IOError):

529 pass

530

531

532class XYZChunk:

533 def __init__(self, lines, natoms):

534 self.lines = lines

535 self.natoms = natoms

536

537 def build(self):

538 """Convert unprocessed chunk into Atoms."""

539 return _read_xyz_frame(iter(self.lines), self.natoms)

540

541

542def ixyzchunks(fd):

543 """Yield unprocessed chunks (header, lines) for each xyz image."""

544 while True:

545 line = next(fd).strip() # Raises StopIteration on empty file

546 try:

547 natoms = int(line)

548 except ValueError:

549 raise XYZError(f'Expected integer, found "{line}"')

550 try:

551 lines = [next(fd) for _ in range(1 + natoms)]

552 except StopIteration:

553 raise XYZError('Incomplete XYZ chunk')

554 yield XYZChunk(lines, natoms)

555

556

557iread_xyz = ImageIterator(ixyzchunks)

558

559

560@reader

561def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict):

562 r"""

563 Read from a file in Extended XYZ format

564

565 index is the frame to read, default is last frame (index=-1).

566 properties_parser is the parse to use when converting the properties line

567 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can

568 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly

569 faster but has fewer features.

570

571 Extended XYZ format is an enhanced version of the `basic XYZ format

572 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra

573 columns to be present in the file for additonal per-atom properties as

574 well as standardising the format of the comment line to include the

575 cell lattice and other per-frame parameters.

576

577 It's easiest to describe the format with an example. Here is a

578 standard XYZ file containing a bulk cubic 8 atom silicon cell ::

579

580 8

581 Cubic bulk silicon cell

582 Si 0.00000000 0.00000000 0.00000000

583 Si 1.36000000 1.36000000 1.36000000

584 Si 2.72000000 2.72000000 0.00000000

585 Si 4.08000000 4.08000000 1.36000000

586 Si 2.72000000 0.00000000 2.72000000

587 Si 4.08000000 1.36000000 4.08000000

588 Si 0.00000000 2.72000000 2.72000000

589 Si 1.36000000 4.08000000 4.08000000

590

591 The first line is the number of atoms, followed by a comment and

592 then one line per atom, giving the element symbol and cartesian

593 x y, and z coordinates in Angstroms.

594

595 Here's the same configuration in extended XYZ format ::

596

597 8

598 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0

599 Si 0.00000000 0.00000000 0.00000000

600 Si 1.36000000 1.36000000 1.36000000

601 Si 2.72000000 2.72000000 0.00000000

602 Si 4.08000000 4.08000000 1.36000000

603 Si 2.72000000 0.00000000 2.72000000

604 Si 4.08000000 1.36000000 4.08000000

605 Si 0.00000000 2.72000000 2.72000000

606 Si 1.36000000 4.08000000 4.08000000

607

608 In extended XYZ format, the comment line is replaced by a series of

609 key/value pairs. The keys should be strings and values can be

610 integers, reals, logicals (denoted by `T` and `F` for true and false)

611 or strings. Quotes are required if a value contains any spaces (like

612 `Lattice` above). There are two mandatory parameters that any

613 extended XYZ: `Lattice` and `Properties`. Other parameters --

614 e.g. `Time` in the example above --- can be added to the parameter line

615 as needed.

616

617 `Lattice` is a Cartesian 3x3 matrix representation of the cell

618 vectors, with each vector stored as a column and the 9 values listed in

619 Fortran column-major order, i.e. in the form ::

620

621 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z"

622

623 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the

624 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second

625 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the

626 third lattice vector (:math:`\mathbf{c}`).

627

628 The list of properties in the file is described by the `Properties`

629 parameter, which should take the form of a series of colon separated

630 triplets giving the name, format (`R` for real, `I` for integer) and

631 number of columns of each property. For example::

632

633 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1"

634

635 indicates the first column represents atomic species, the next three

636 columns represent atomic positions, the next three velcoities, and the

637 last is an single integer called `select`. With this property

638 definition, the line ::

639

640 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1

641

642 would describe a silicon atom at position (4.08,4.08,1.36) with zero

643 velocity and the `select` property set to 1.

644

645 The property names `pos`, `Z`, `mass`, and `charge` map to ASE

646 :attr:`ase.atoms.Atoms.arrays` entries named

647 `positions`, `numbers`, `masses` and `charges` respectively.

648

649 Additional key-value pairs in the comment line are parsed into the

650 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions

651

652 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are

653 included to ease command-line usage as the `{}` are not treated

654 specially by the shell)

655 - Quotes within keys or values can be escaped with `\"`.

656 - Keys with special names `stress` or `virial` are treated as 3x3 matrices

657 in Fortran order, as for `Lattice` above.

658 - Otherwise, values with multiple elements are treated as 1D arrays, first

659 assuming integer format and falling back to float if conversion is

660 unsuccessful.

661 - A missing value defaults to `True`, e.g. the comment line

662 `"cutoff=3.4 have_energy"` leads to

663 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`.

664 - Value strings starting with `"_JSON"` are interpreted as JSON content;

665 similarly, when writing, anything which does not match the criteria above

666 is serialised as JSON.

667

668 The extended XYZ format is also supported by the

669 the `Ovito <https://www.ovito.org>`_ visualisation tool.

670 """ # noqa: E501

671

672 if not isinstance(index, int) and not isinstance(index, slice):

673 raise TypeError('Index argument is neither slice nor integer!')

674

675 # If possible, build a partial index up to the last frame required

676 last_frame = None

677 if isinstance(index, int) and index >= 0:

678 last_frame = index

679 elif isinstance(index, slice):

680 if index.stop is not None and index.stop >= 0:

681 last_frame = index.stop

682

683 # scan through file to find where the frames start

684 try:

685 fileobj.seek(0)

686 except UnsupportedOperation:

687 fileobj = StringIO(fileobj.read())

688 fileobj.seek(0)

689 frames = []

690 while True:

691 frame_pos = fileobj.tell()

692 line = fileobj.readline()

693 if line.strip() == '':

694 break

695 try:

696 natoms = int(line)

697 except ValueError as err:

698 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}'

699 .format(err))

700 fileobj.readline() # read comment line

701 for _ in range(natoms):

702 fileobj.readline()

703 # check for VEC

704 nvec = 0

705 while True:

706 lastPos = fileobj.tell()

707 line = fileobj.readline()

708 if line.lstrip().startswith('VEC'):

709 nvec += 1

710 if nvec > 3:

711 raise XYZError('ase.io.extxyz: More than 3 VECX entries')

712 else:

713 fileobj.seek(lastPos)

714 break

715 frames.append((frame_pos, natoms, nvec))

716 if last_frame is not None and len(frames) > last_frame:

717 break

718

719 trbl = index2range(index, len(frames))

720

721 for index in trbl:

722 frame_pos, natoms, nvec = frames[index]

723 fileobj.seek(frame_pos)

724 # check for consistency with frame index table

725 assert int(fileobj.readline()) == natoms

726 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec)

727

728

729def output_column_format(atoms, columns, arrays, write_info=True):

730 """

731 Helper function to build extended XYZ comment line

732 """

733 fmt_map = {'d': ('R', '%16.8f'),

734 'f': ('R', '%16.8f'),

735 'i': ('I', '%8d'),

736 'O': ('S', '%s'),

737 'S': ('S', '%s'),

738 'U': ('S', '%-2s'),

739 'b': ('L', ' %.1s')}

740

741 # NB: Lattice is stored as tranpose of ASE cell,

742 # with Fortran array ordering

743 lattice_str = ('Lattice="'

744 + ' '.join([str(x) for x in np.reshape(atoms.cell.T,

745 9, order='F')]) +

746 '"')

747

748 property_names = []

749 property_types = []

750 property_ncols = []

751 dtypes = []

752 formats = []

753

754 for column in columns:

755 array = arrays[column]

756 dtype = array.dtype

757

758 property_name = PROPERTY_NAME_MAP.get(column, column)

759 property_type, fmt = fmt_map[dtype.kind]

760 property_names.append(property_name)

761 property_types.append(property_type)

762

763 if (len(array.shape) == 1

764 or (len(array.shape) == 2 and array.shape[1] == 1)):

765 ncol = 1

766 dtypes.append((column, dtype))

767 else:

768 ncol = array.shape[1]

769 for c in range(ncol):

770 dtypes.append((column + str(c), dtype))

771

772 formats.extend([fmt] * ncol)

773 property_ncols.append(ncol)

774

775 props_str = ':'.join([':'.join(x) for x in

776 zip(property_names,

777 property_types,

778 [str(nc) for nc in property_ncols])])

779

780 comment_str = ''

781 if atoms.cell.any():

782 comment_str += lattice_str + ' '

783 comment_str += f'Properties={props_str}'

784

785 info = {}

786 if write_info:

787 info.update(atoms.info)

788 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions

789 comment_str += ' ' + key_val_dict_to_str(info)

790

791 dtype = np.dtype(dtypes)

792 fmt = ' '.join(formats) + '\n'

793

794 return comment_str, property_ncols, dtype, fmt

795

796

797@writer

798def write_xyz(fileobj, images, comment='', columns=None,

799 write_info=True,

800 write_results=True, plain=False, vec_cell=False):

801 """

802 Write output in extended XYZ format

803

804 Optionally, specify which columns (arrays) to include in output,

805 whether to write the contents of the `atoms.info` dict to the

806 XYZ comment line (default is True), the results of any

807 calculator attached to this Atoms. The `plain` argument

808 can be used to write a simple XYZ file with no additional information.

809 `vec_cell` can be used to write the cell vectors as additional

810 pseudo-atoms.

811

812 See documentation for :func:`read_xyz()` for further details of the extended

813 XYZ file format.

814 """

815

816 if hasattr(images, 'get_positions'):

817 images = [images]

818

819 for atoms in images:

820 natoms = len(atoms)

821

822 if write_results:

823 calculator = atoms.calc

824 atoms = atoms.copy()

825

826 save_calc_results(atoms, calculator, calc_prefix="")

827

828 if atoms.info.get('stress', np.array([])).shape == (6,):

829 atoms.info['stress'] = \

830 voigt_6_to_full_3x3_stress(atoms.info['stress'])

831

832 if columns is None:

833 fr_cols = (['symbols', 'positions']

834 + [key for key in atoms.arrays if

835 key not in ['symbols', 'positions', 'numbers',

836 'species', 'pos']])

837 else:

838 fr_cols = columns[:]

839

840 if vec_cell:

841 plain = True

842

843 if plain:

844 fr_cols = ['symbols', 'positions']

845 write_info = False

846 write_results = False

847

848 # Move symbols and positions to first two properties

849 if 'symbols' in fr_cols:

850 i = fr_cols.index('symbols')

851 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0]

852

853 if 'positions' in fr_cols:

854 i = fr_cols.index('positions')

855 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1]

856

857 # Check first column "looks like" atomic symbols

858 if fr_cols[0] in atoms.arrays:

859 symbols = atoms.arrays[fr_cols[0]]

860 else:

861 symbols = [*atoms.symbols]

862

863 if natoms > 0 and not isinstance(symbols[0], str):

864 raise ValueError('First column must be symbols-like')

865

866 # Check second column "looks like" atomic positions

867 pos = atoms.arrays[fr_cols[1]]

868 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':

869 raise ValueError('Second column must be position-like')

870

871 # if vec_cell add cell information as pseudo-atoms

872 if vec_cell:

873 nPBC = 0

874 for i, b in enumerate(atoms.pbc):

875 if not b:

876 continue

877 nPBC += 1

878 symbols.append('VEC' + str(nPBC))

879 pos = np.vstack((pos, atoms.cell[i]))

880 # add to natoms

881 natoms += nPBC

882 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':

883 raise ValueError(

884 'Pseudo Atoms containing cell have bad coords')

885

886 # Move mask

887 if 'move_mask' in fr_cols:

888 cnstr = images[0]._get_constraints()

889 if len(cnstr) > 0:

890 c0 = cnstr[0]

891 if isinstance(c0, FixAtoms):

892 cnstr = np.ones((natoms,), dtype=bool)

893 for idx in c0.index:

894 cnstr[idx] = False # cnstr: atoms that can be moved

895 elif isinstance(c0, FixCartesian):

896 masks = np.ones((natoms, 3), dtype=bool)

897 for i in range(len(cnstr)):

898 idx = cnstr[i].index

899 masks[idx] = cnstr[i].mask

900 cnstr = ~masks # cnstr: coordinates that can be moved

901 else:

902 fr_cols.remove('move_mask')

903

904 # Collect data to be written out

905 arrays = {}

906 for column in fr_cols:

907 if column == 'positions':

908 arrays[column] = pos

909 elif column in atoms.arrays:

910 arrays[column] = atoms.arrays[column]

911 elif column == 'symbols':

912 arrays[column] = np.array(symbols)

913 elif column == 'move_mask':

914 arrays[column] = cnstr

915 else:

916 raise ValueError(f'Missing array "{column}"')

917

918 comm, ncols, dtype, fmt = output_column_format(atoms,

919 fr_cols,

920 arrays,

921 write_info)

922

923 if plain or comment != '':

924 # override key/value pairs with user-speficied comment string

925 comm = comment.rstrip()

926 if '\n' in comm:

927 raise ValueError('Comment line should not have line breaks.')

928

929 # Pack fr_cols into record array

930 data = np.zeros(natoms, dtype)

931 for column, ncol in zip(fr_cols, ncols):

932 value = arrays[column]

933 if ncol == 1:

934 data[column] = np.squeeze(value)

935 else:

936 for c in range(ncol):

937 data[column + str(c)] = value[:, c]

938

939 nat = natoms

940 if vec_cell:

941 nat -= nPBC

942 # Write the output

943 fileobj.write('%d\n' % nat)

944 fileobj.write(f'{comm}\n')

945 for i in range(natoms):

946 fileobj.write(fmt % tuple(data[i]))

947

948

949def save_calc_results(atoms, calc=None, calc_prefix=None,

950 remove_atoms_calc=False, force=False):

951 """Update information in atoms from results in a calculator

952

953 Args:

954 atoms (ase.atoms.Atoms): Atoms object, modified in place

955 calc (ase.calculators.Calculator, optional): calculator to take results

956 from. Defaults to :attr:`atoms.calc`

957 calc_prefix (str, optional): String to prefix to results names

958 in :attr:`atoms.arrays` and :attr:`atoms.info`. Defaults to

959 calculator class name

960 remove_atoms_calc (bool): remove the calculator from the `atoms`

961 object after saving its results. Defaults to `False`, ignored if

962 `calc` is passed in

963 force (bool, optional): overwrite existing fields with same name,

964 default False

965 """

966 if calc is None:

967 calc_use = atoms.calc

968 else:

969 calc_use = calc

970

971 if calc_use is None:

972 return None, None

973

974 if calc_prefix is None:

975 calc_prefix = calc_use.__class__.__name__ + '_'

976

977 per_config_results = {}

978 per_atom_results = {}

979 for prop, value in calc_use.results.items():

980 if prop in per_config_properties:

981 per_config_results[calc_prefix + prop] = value

982 elif prop in per_atom_properties:

983 per_atom_results[calc_prefix + prop] = value

984

985 if not force:

986 if any(key in atoms.info for key in per_config_results):

987 raise KeyError("key from calculator already exists in atoms.info")

988 if any(key in atoms.arrays for key in per_atom_results):

989 raise KeyError("key from calculator already exists in atoms.arrays")

990

991 atoms.info.update(per_config_results)

992 atoms.arrays.update(per_atom_results)

993

994 if remove_atoms_calc and calc is None:

995 atoms.calc = None

996

997

998# create aliases for read/write functions

999read_extxyz = read_xyz

1000write_extxyz = write_xyz