Coverage for /builds/debichem-team/python-ase/ase/io/extxyz.py: 81.90%

525 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2025-03-06 04:00 +0000

1""" 

2Extended XYZ support 

3 

4Read/write files in "extended" XYZ format, storing additional 

5per-configuration information as key-value pairs on the XYZ 

6comment line, and additional per-atom properties as extra columns. 

7 

8Contributed by James Kermode <james.kermode@gmail.com> 

9""" 

10import json 

11import numbers 

12import re 

13import warnings 

14from io import StringIO, UnsupportedOperation 

15 

16import numpy as np 

17 

18from ase.atoms import Atoms 

19from ase.calculators.singlepoint import SinglePointCalculator 

20from ase.constraints import FixAtoms, FixCartesian 

21from ase.io.formats import index2range 

22from ase.io.utils import ImageIterator 

23from ase.outputs import ArrayProperty, all_outputs 

24from ase.spacegroup.spacegroup import Spacegroup 

25from ase.stress import voigt_6_to_full_3x3_stress 

26from ase.utils import reader, writer 

27 

28__all__ = ['read_xyz', 'write_xyz', 'iread_xyz'] 

29 

30PROPERTY_NAME_MAP = {'positions': 'pos', 

31 'numbers': 'Z', 

32 'charges': 'charge', 

33 'symbols': 'species'} 

34 

35REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(), 

36 PROPERTY_NAME_MAP.keys())) 

37 

38KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)' 

39 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*') 

40KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*=' 

41 + r'\s*([^\s]+)\s*') 

42KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*') 

43 

44UNPROCESSED_KEYS = {'uid'} 

45 

46SPECIAL_3_3_KEYS = {'Lattice', 'virial', 'stress'} 

47 

48# 'per-atom' and 'per-config' 

49per_atom_properties = [] 

50per_config_properties = [] 

51for key, val in all_outputs.items(): 

52 if isinstance(val, ArrayProperty) and val.shapespec[0] == 'natoms': 

53 per_atom_properties.append(key) 

54 else: 

55 per_config_properties.append(key) 

56 

57 

58def key_val_str_to_dict(string, sep=None): 

59 """ 

60 Parse an xyz properties string in a key=value and return a dict with 

61 various values parsed to native types. 

62 

63 Accepts brackets or quotes to delimit values. Parses integers, floats 

64 booleans and arrays thereof. Arrays with 9 values whose name is listed 

65 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering. 

66 

67 If sep is None, string will split on whitespace, otherwise will split 

68 key value pairs with the given separator. 

69 

70 """ 

71 # store the closing delimiters to match opening ones 

72 delimiters = { 

73 "'": "'", 

74 '"': '"', 

75 '{': '}', 

76 '[': ']' 

77 } 

78 

79 # Make pairs and process afterwards 

80 kv_pairs = [ 

81 [[]]] # List of characters for each entry, add a new list for new value 

82 cur_delimiter = None # push and pop closing delimiters 

83 escaped = False # add escaped sequences verbatim 

84 

85 # parse character-by-character unless someone can do nested brackets 

86 # and escape sequences in a regex 

87 for char in string.strip(): 

88 if escaped: # bypass everything if escaped 

89 kv_pairs[-1][-1].append(char) 

90 escaped = False 

91 elif char == '\\': # escape the next thing 

92 escaped = True 

93 elif cur_delimiter: # inside brackets 

94 if char == cur_delimiter: # found matching delimiter 

95 cur_delimiter = None 

96 else: 

97 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim 

98 elif char in delimiters: 

99 cur_delimiter = delimiters[char] # brackets or quotes 

100 elif (sep is None and char.isspace()) or char == sep: 

101 if kv_pairs == [[[]]]: # empty, beginning of string 

102 continue 

103 elif kv_pairs[-1][-1] == []: 

104 continue 

105 else: 

106 kv_pairs.append([[]]) 

107 elif char == '=': 

108 if kv_pairs[-1] == [[]]: 

109 del kv_pairs[-1] 

110 kv_pairs[-1].append([]) # value 

111 else: 

112 kv_pairs[-1][-1].append(char) 

113 

114 kv_dict = {} 

115 

116 for kv_pair in kv_pairs: 

117 if len(kv_pair) == 0: # empty line 

118 continue 

119 elif len(kv_pair) == 1: # default to True 

120 key, value = ''.join(kv_pair[0]), 'T' 

121 else: # Smush anything else with kv-splitter '=' between them 

122 key, value = ''.join(kv_pair[0]), '='.join( 

123 ''.join(x) for x in kv_pair[1:]) 

124 

125 if key.lower() not in UNPROCESSED_KEYS: 

126 # Try to convert to (arrays of) floats, ints 

127 split_value = re.findall(r'[^\s,]+', value) 

128 try: 

129 try: 

130 numvalue = np.array(split_value, dtype=int) 

131 except (ValueError, OverflowError): 

132 # don't catch errors here so it falls through to bool 

133 numvalue = np.array(split_value, dtype=float) 

134 if len(numvalue) == 1: 

135 numvalue = numvalue[0] # Only one number 

136 value = numvalue 

137 except (ValueError, OverflowError): 

138 pass # value is unchanged 

139 

140 # convert special 3x3 matrices 

141 if key in SPECIAL_3_3_KEYS: 

142 if not isinstance(value, np.ndarray) or value.shape != (9,): 

143 raise ValueError("Got info item {}, expecting special 3x3 " 

144 "matrix, but value is not in the form of " 

145 "a 9-long numerical vector".format(key)) 

146 value = np.array(value).reshape((3, 3), order='F') 

147 

148 # parse special strings as boolean or JSON 

149 if isinstance(value, str): 

150 # Parse boolean values: 

151 # T or [tT]rue or TRUE -> True 

152 # F or [fF]alse or FALSE -> False 

153 # For list: 'T T F' -> [True, True, False] 

154 # Cannot use `.lower()` to reduce `str_to_bool` mapping because 

155 # 't'/'f' not accepted 

156 str_to_bool = { 

157 'T': True, 'F': False, 'true': True, 'false': False, 

158 'True': True, 'False': False, 'TRUE': True, 'FALSE': False 

159 } 

160 try: 

161 boolvalue = [str_to_bool[vpart] for vpart in 

162 re.findall(r'[^\s,]+', value)] 

163 

164 if len(boolvalue) == 1: 

165 value = boolvalue[0] 

166 else: 

167 value = boolvalue 

168 except KeyError: 

169 # Try to parse JSON 

170 if value.startswith("_JSON "): 

171 d = json.loads(value.replace("_JSON ", "", 1)) 

172 value = np.array(d) 

173 if value.dtype.kind not in ['i', 'f', 'b']: 

174 value = d 

175 

176 kv_dict[key] = value 

177 

178 return kv_dict 

179 

180 

181def key_val_str_to_dict_regex(s): 

182 """ 

183 Parse strings in the form 'key1=value1 key2="quoted value"' 

184 """ 

185 d = {} 

186 s = s.strip() 

187 while True: 

188 # Match quoted string first, then fall through to plain key=value 

189 m = KEY_QUOTED_VALUE.match(s) 

190 if m is None: 

191 m = KEY_VALUE.match(s) 

192 if m is not None: 

193 s = KEY_VALUE.sub('', s, 1) 

194 else: 

195 # Just a key with no value 

196 m = KEY_RE.match(s) 

197 if m is not None: 

198 s = KEY_RE.sub('', s, 1) 

199 else: 

200 s = KEY_QUOTED_VALUE.sub('', s, 1) 

201 

202 if m is None: 

203 break # No more matches 

204 

205 key = m.group(1) 

206 try: 

207 value = m.group(2) 

208 except IndexError: 

209 # default value is 'T' (True) 

210 value = 'T' 

211 

212 if key.lower() not in UNPROCESSED_KEYS: 

213 # Try to convert to (arrays of) floats, ints 

214 try: 

215 numvalue = [] 

216 for x in value.split(): 

217 if x.find('.') == -1: 

218 numvalue.append(int(float(x))) 

219 else: 

220 numvalue.append(float(x)) 

221 if len(numvalue) == 1: 

222 numvalue = numvalue[0] # Only one number 

223 elif len(numvalue) == 9: 

224 # special case: 3x3 matrix, fortran ordering 

225 numvalue = np.array(numvalue).reshape((3, 3), order='F') 

226 else: 

227 numvalue = np.array(numvalue) # vector 

228 value = numvalue 

229 except (ValueError, OverflowError): 

230 pass 

231 

232 # Parse boolean values: 'T' -> True, 'F' -> False, 

233 # 'T T F' -> [True, True, False] 

234 if isinstance(value, str): 

235 str_to_bool = {'T': True, 'F': False} 

236 

237 if len(value.split()) > 1: 

238 if all(x in str_to_bool for x in value.split()): 

239 value = [str_to_bool[x] for x in value.split()] 

240 elif value in str_to_bool: 

241 value = str_to_bool[value] 

242 

243 d[key] = value 

244 

245 return d 

246 

247 

248def escape(string): 

249 if (' ' in string or 

250 '"' in string or "'" in string or 

251 '{' in string or '}' in string or 

252 '[' in string or ']' in string): 

253 string = string.replace('"', '\\"') 

254 string = f'"{string}"' 

255 return string 

256 

257 

258def key_val_dict_to_str(dct, sep=' '): 

259 """ 

260 Convert atoms.info dictionary to extended XYZ string representation 

261 """ 

262 

263 def array_to_string(key, val): 

264 # some ndarrays are special (special 3x3 keys, and scalars/vectors of 

265 # numbers or bools), handle them here 

266 if key in SPECIAL_3_3_KEYS: 

267 # special 3x3 matrix, flatten in Fortran order 

268 val = val.reshape(val.size, order='F') 

269 if val.dtype.kind in ['i', 'f', 'b']: 

270 # numerical or bool scalars/vectors are special, for backwards 

271 # compat. 

272 if len(val.shape) == 0: 

273 # scalar 

274 val = str(known_types_to_str(val)) 

275 elif len(val.shape) == 1: 

276 # vector 

277 val = ' '.join(str(known_types_to_str(v)) for v in val) 

278 return val 

279 

280 def known_types_to_str(val): 

281 if isinstance(val, (bool, np.bool_)): 

282 return 'T' if val else 'F' 

283 elif isinstance(val, numbers.Real): 

284 return f'{val}' 

285 elif isinstance(val, Spacegroup): 

286 return val.symbol 

287 else: 

288 return val 

289 

290 if len(dct) == 0: 

291 return '' 

292 

293 string = '' 

294 for key in dct: 

295 val = dct[key] 

296 

297 if isinstance(val, np.ndarray): 

298 val = array_to_string(key, val) 

299 else: 

300 # convert any known types to string 

301 val = known_types_to_str(val) 

302 

303 if val is not None and not isinstance(val, str): 

304 # what's left is an object, try using JSON 

305 if isinstance(val, np.ndarray): 

306 val = val.tolist() 

307 try: 

308 val = '_JSON ' + json.dumps(val) 

309 # if this fails, let give up 

310 except TypeError: 

311 warnings.warn('Skipping unhashable information ' 

312 '{}'.format(key)) 

313 continue 

314 

315 key = escape(key) # escape and quote key 

316 eq = "=" 

317 # Should this really be setting empty value that's going to be 

318 # interpreted as bool True? 

319 if val is None: 

320 val = "" 

321 eq = "" 

322 val = escape(val) # escape and quote val 

323 

324 string += f'{key}{eq}{val}{sep}' 

325 

326 return string.strip() 

327 

328 

329def parse_properties(prop_str): 

330 """ 

331 Parse extended XYZ properties format string 

332 

333 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3". 

334 NAME is the name of the property. 

335 TYPE is one of R, I, S, L for real, integer, string and logical. 

336 NCOLS is number of columns for that property. 

337 """ 

338 

339 properties = {} 

340 properties_list = [] 

341 dtypes = [] 

342 converters = [] 

343 

344 fields = prop_str.split(':') 

345 

346 def parse_bool(x): 

347 """ 

348 Parse bool to string 

349 """ 

350 return {'T': True, 'F': False, 

351 'True': True, 'False': False}.get(x) 

352 

353 fmt_map = {'R': ('d', float), 

354 'I': ('i', int), 

355 'S': (object, str), 

356 'L': ('bool', parse_bool)} 

357 

358 for name, ptype, cols in zip(fields[::3], 

359 fields[1::3], 

360 [int(x) for x in fields[2::3]]): 

361 if ptype not in ('R', 'I', 'S', 'L'): 

362 raise ValueError('Unknown property type: ' + ptype) 

363 ase_name = REV_PROPERTY_NAME_MAP.get(name, name) 

364 

365 dtype, converter = fmt_map[ptype] 

366 if cols == 1: 

367 dtypes.append((name, dtype)) 

368 converters.append(converter) 

369 else: 

370 for c in range(cols): 

371 dtypes.append((name + str(c), dtype)) 

372 converters.append(converter) 

373 

374 properties[name] = (ase_name, cols) 

375 properties_list.append(name) 

376 

377 dtype = np.dtype(dtypes) 

378 return properties, properties_list, dtype, converters 

379 

380 

381def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict, 

382 nvec=0): 

383 # comment line 

384 line = next(lines).strip() 

385 if nvec > 0: 

386 info = {'comment': line} 

387 else: 

388 info = properties_parser(line) if line else {} 

389 

390 pbc = None 

391 if 'pbc' in info: 

392 pbc = info.pop('pbc') 

393 elif 'Lattice' in info: 

394 # default pbc for extxyz file containing Lattice 

395 # is True in all directions 

396 pbc = [True, True, True] 

397 elif nvec > 0: 

398 # cell information given as pseudo-Atoms 

399 pbc = [False, False, False] 

400 

401 cell = None 

402 if 'Lattice' in info: 

403 # NB: ASE cell is transpose of extended XYZ lattice 

404 cell = info['Lattice'].T 

405 del info['Lattice'] 

406 elif nvec > 0: 

407 # cell information given as pseudo-Atoms 

408 cell = np.zeros((3, 3)) 

409 

410 if 'Properties' not in info: 

411 # Default set of properties is atomic symbols and positions only 

412 info['Properties'] = 'species:S:1:pos:R:3' 

413 properties, names, dtype, convs = parse_properties(info['Properties']) 

414 del info['Properties'] 

415 

416 data = [] 

417 for _ in range(natoms): 

418 try: 

419 line = next(lines) 

420 except StopIteration: 

421 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}' 

422 .format(len(data), natoms)) 

423 vals = line.split() 

424 row = tuple(conv(val) for conv, val in zip(convs, vals)) 

425 data.append(row) 

426 

427 try: 

428 data = np.array(data, dtype) 

429 except TypeError: 

430 raise XYZError('Badly formatted data ' 

431 'or end of file reached before end of frame') 

432 

433 # Read VEC entries if present 

434 if nvec > 0: 

435 for ln in range(nvec): 

436 try: 

437 line = next(lines) 

438 except StopIteration: 

439 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, ' 

440 'expected {}'.format(len(cell), nvec)) 

441 entry = line.split() 

442 

443 if not entry[0].startswith('VEC'): 

444 raise XYZError(f'Expected cell vector, got {entry[0]}') 

445 

446 try: 

447 n = int(entry[0][3:]) 

448 except ValueError as e: 

449 raise XYZError('Expected VEC{}, got VEC{}' 

450 .format(ln + 1, entry[0][3:])) from e 

451 if n != ln + 1: 

452 raise XYZError('Expected VEC{}, got VEC{}' 

453 .format(ln + 1, n)) 

454 

455 cell[ln] = np.array([float(x) for x in entry[1:]]) 

456 pbc[ln] = True 

457 if nvec != pbc.count(True): 

458 raise XYZError('Problem with number of cell vectors') 

459 pbc = tuple(pbc) 

460 

461 arrays = {} 

462 for name in names: 

463 ase_name, cols = properties[name] 

464 if cols == 1: 

465 value = data[name] 

466 else: 

467 value = np.vstack([data[name + str(c)] 

468 for c in range(cols)]).T 

469 arrays[ase_name] = value 

470 

471 numbers = arrays.pop('numbers', None) 

472 symbols = arrays.pop('symbols', None) 

473 

474 if symbols is not None: 

475 symbols = [s.capitalize() for s in symbols] 

476 

477 atoms = Atoms(numbers if numbers is not None else symbols, 

478 positions=arrays.pop('positions', None), 

479 charges=arrays.pop('initial_charges', None), 

480 cell=cell, 

481 pbc=pbc, 

482 info=info) 

483 

484 # Read and set constraints 

485 if 'move_mask' in arrays: 

486 move_mask = arrays.pop('move_mask').astype(bool) 

487 if properties['move_mask'][1] == 3: 

488 cons = [] 

489 for a in range(natoms): 

490 cons.append(FixCartesian(a, mask=~move_mask[a, :])) 

491 atoms.set_constraint(cons) 

492 elif properties['move_mask'][1] == 1: 

493 atoms.set_constraint(FixAtoms(mask=~move_mask)) 

494 else: 

495 raise XYZError('Not implemented constraint') 

496 

497 set_calc_and_arrays(atoms, arrays) 

498 return atoms 

499 

500 

501def set_calc_and_arrays(atoms, arrays): 

502 results = {} 

503 

504 for name, array in arrays.items(): 

505 if name in all_outputs: 

506 results[name] = array 

507 else: 

508 atoms.new_array(name, array) 

509 

510 for key in list(atoms.info): 

511 if key in per_config_properties: 

512 results[key] = atoms.info.pop(key) 

513 # special case for stress- convert to Voigt 6-element form 

514 if key == 'stress' and results[key].shape == (3, 3): 

515 stress = results[key] 

516 stress = np.array([stress[0, 0], 

517 stress[1, 1], 

518 stress[2, 2], 

519 stress[1, 2], 

520 stress[0, 2], 

521 stress[0, 1]]) 

522 results[key] = stress 

523 

524 if results: 

525 atoms.calc = SinglePointCalculator(atoms, **results) 

526 

527 

528class XYZError(IOError): 

529 pass 

530 

531 

532class XYZChunk: 

533 def __init__(self, lines, natoms): 

534 self.lines = lines 

535 self.natoms = natoms 

536 

537 def build(self): 

538 """Convert unprocessed chunk into Atoms.""" 

539 return _read_xyz_frame(iter(self.lines), self.natoms) 

540 

541 

542def ixyzchunks(fd): 

543 """Yield unprocessed chunks (header, lines) for each xyz image.""" 

544 while True: 

545 line = next(fd).strip() # Raises StopIteration on empty file 

546 try: 

547 natoms = int(line) 

548 except ValueError: 

549 raise XYZError(f'Expected integer, found "{line}"') 

550 try: 

551 lines = [next(fd) for _ in range(1 + natoms)] 

552 except StopIteration: 

553 raise XYZError('Incomplete XYZ chunk') 

554 yield XYZChunk(lines, natoms) 

555 

556 

557iread_xyz = ImageIterator(ixyzchunks) 

558 

559 

560@reader 

561def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict): 

562 r""" 

563 Read from a file in Extended XYZ format 

564 

565 index is the frame to read, default is last frame (index=-1). 

566 properties_parser is the parse to use when converting the properties line 

567 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can 

568 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly 

569 faster but has fewer features. 

570 

571 Extended XYZ format is an enhanced version of the `basic XYZ format 

572 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra 

573 columns to be present in the file for additonal per-atom properties as 

574 well as standardising the format of the comment line to include the 

575 cell lattice and other per-frame parameters. 

576 

577 It's easiest to describe the format with an example. Here is a 

578 standard XYZ file containing a bulk cubic 8 atom silicon cell :: 

579 

580 8 

581 Cubic bulk silicon cell 

582 Si 0.00000000 0.00000000 0.00000000 

583 Si 1.36000000 1.36000000 1.36000000 

584 Si 2.72000000 2.72000000 0.00000000 

585 Si 4.08000000 4.08000000 1.36000000 

586 Si 2.72000000 0.00000000 2.72000000 

587 Si 4.08000000 1.36000000 4.08000000 

588 Si 0.00000000 2.72000000 2.72000000 

589 Si 1.36000000 4.08000000 4.08000000 

590 

591 The first line is the number of atoms, followed by a comment and 

592 then one line per atom, giving the element symbol and cartesian 

593 x y, and z coordinates in Angstroms. 

594 

595 Here's the same configuration in extended XYZ format :: 

596 

597 8 

598 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0 

599 Si 0.00000000 0.00000000 0.00000000 

600 Si 1.36000000 1.36000000 1.36000000 

601 Si 2.72000000 2.72000000 0.00000000 

602 Si 4.08000000 4.08000000 1.36000000 

603 Si 2.72000000 0.00000000 2.72000000 

604 Si 4.08000000 1.36000000 4.08000000 

605 Si 0.00000000 2.72000000 2.72000000 

606 Si 1.36000000 4.08000000 4.08000000 

607 

608 In extended XYZ format, the comment line is replaced by a series of 

609 key/value pairs. The keys should be strings and values can be 

610 integers, reals, logicals (denoted by `T` and `F` for true and false) 

611 or strings. Quotes are required if a value contains any spaces (like 

612 `Lattice` above). There are two mandatory parameters that any 

613 extended XYZ: `Lattice` and `Properties`. Other parameters -- 

614 e.g. `Time` in the example above --- can be added to the parameter line 

615 as needed. 

616 

617 `Lattice` is a Cartesian 3x3 matrix representation of the cell 

618 vectors, with each vector stored as a column and the 9 values listed in 

619 Fortran column-major order, i.e. in the form :: 

620 

621 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z" 

622 

623 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the 

624 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second 

625 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the 

626 third lattice vector (:math:`\mathbf{c}`). 

627 

628 The list of properties in the file is described by the `Properties` 

629 parameter, which should take the form of a series of colon separated 

630 triplets giving the name, format (`R` for real, `I` for integer) and 

631 number of columns of each property. For example:: 

632 

633 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1" 

634 

635 indicates the first column represents atomic species, the next three 

636 columns represent atomic positions, the next three velcoities, and the 

637 last is an single integer called `select`. With this property 

638 definition, the line :: 

639 

640 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1 

641 

642 would describe a silicon atom at position (4.08,4.08,1.36) with zero 

643 velocity and the `select` property set to 1. 

644 

645 The property names `pos`, `Z`, `mass`, and `charge` map to ASE 

646 :attr:`ase.atoms.Atoms.arrays` entries named 

647 `positions`, `numbers`, `masses` and `charges` respectively. 

648 

649 Additional key-value pairs in the comment line are parsed into the 

650 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions 

651 

652 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are 

653 included to ease command-line usage as the `{}` are not treated 

654 specially by the shell) 

655 - Quotes within keys or values can be escaped with `\"`. 

656 - Keys with special names `stress` or `virial` are treated as 3x3 matrices 

657 in Fortran order, as for `Lattice` above. 

658 - Otherwise, values with multiple elements are treated as 1D arrays, first 

659 assuming integer format and falling back to float if conversion is 

660 unsuccessful. 

661 - A missing value defaults to `True`, e.g. the comment line 

662 `"cutoff=3.4 have_energy"` leads to 

663 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`. 

664 - Value strings starting with `"_JSON"` are interpreted as JSON content; 

665 similarly, when writing, anything which does not match the criteria above 

666 is serialised as JSON. 

667 

668 The extended XYZ format is also supported by the 

669 the `Ovito <https://www.ovito.org>`_ visualisation tool. 

670 """ # noqa: E501 

671 

672 if not isinstance(index, int) and not isinstance(index, slice): 

673 raise TypeError('Index argument is neither slice nor integer!') 

674 

675 # If possible, build a partial index up to the last frame required 

676 last_frame = None 

677 if isinstance(index, int) and index >= 0: 

678 last_frame = index 

679 elif isinstance(index, slice): 

680 if index.stop is not None and index.stop >= 0: 

681 last_frame = index.stop 

682 

683 # scan through file to find where the frames start 

684 try: 

685 fileobj.seek(0) 

686 except UnsupportedOperation: 

687 fileobj = StringIO(fileobj.read()) 

688 fileobj.seek(0) 

689 frames = [] 

690 while True: 

691 frame_pos = fileobj.tell() 

692 line = fileobj.readline() 

693 if line.strip() == '': 

694 break 

695 try: 

696 natoms = int(line) 

697 except ValueError as err: 

698 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}' 

699 .format(err)) 

700 fileobj.readline() # read comment line 

701 for _ in range(natoms): 

702 fileobj.readline() 

703 # check for VEC 

704 nvec = 0 

705 while True: 

706 lastPos = fileobj.tell() 

707 line = fileobj.readline() 

708 if line.lstrip().startswith('VEC'): 

709 nvec += 1 

710 if nvec > 3: 

711 raise XYZError('ase.io.extxyz: More than 3 VECX entries') 

712 else: 

713 fileobj.seek(lastPos) 

714 break 

715 frames.append((frame_pos, natoms, nvec)) 

716 if last_frame is not None and len(frames) > last_frame: 

717 break 

718 

719 trbl = index2range(index, len(frames)) 

720 

721 for index in trbl: 

722 frame_pos, natoms, nvec = frames[index] 

723 fileobj.seek(frame_pos) 

724 # check for consistency with frame index table 

725 assert int(fileobj.readline()) == natoms 

726 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec) 

727 

728 

729def output_column_format(atoms, columns, arrays, write_info=True): 

730 """ 

731 Helper function to build extended XYZ comment line 

732 """ 

733 fmt_map = {'d': ('R', '%16.8f'), 

734 'f': ('R', '%16.8f'), 

735 'i': ('I', '%8d'), 

736 'O': ('S', '%s'), 

737 'S': ('S', '%s'), 

738 'U': ('S', '%-2s'), 

739 'b': ('L', ' %.1s')} 

740 

741 # NB: Lattice is stored as tranpose of ASE cell, 

742 # with Fortran array ordering 

743 lattice_str = ('Lattice="' 

744 + ' '.join([str(x) for x in np.reshape(atoms.cell.T, 

745 9, order='F')]) + 

746 '"') 

747 

748 property_names = [] 

749 property_types = [] 

750 property_ncols = [] 

751 dtypes = [] 

752 formats = [] 

753 

754 for column in columns: 

755 array = arrays[column] 

756 dtype = array.dtype 

757 

758 property_name = PROPERTY_NAME_MAP.get(column, column) 

759 property_type, fmt = fmt_map[dtype.kind] 

760 property_names.append(property_name) 

761 property_types.append(property_type) 

762 

763 if (len(array.shape) == 1 

764 or (len(array.shape) == 2 and array.shape[1] == 1)): 

765 ncol = 1 

766 dtypes.append((column, dtype)) 

767 else: 

768 ncol = array.shape[1] 

769 for c in range(ncol): 

770 dtypes.append((column + str(c), dtype)) 

771 

772 formats.extend([fmt] * ncol) 

773 property_ncols.append(ncol) 

774 

775 props_str = ':'.join([':'.join(x) for x in 

776 zip(property_names, 

777 property_types, 

778 [str(nc) for nc in property_ncols])]) 

779 

780 comment_str = '' 

781 if atoms.cell.any(): 

782 comment_str += lattice_str + ' ' 

783 comment_str += f'Properties={props_str}' 

784 

785 info = {} 

786 if write_info: 

787 info.update(atoms.info) 

788 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions 

789 comment_str += ' ' + key_val_dict_to_str(info) 

790 

791 dtype = np.dtype(dtypes) 

792 fmt = ' '.join(formats) + '\n' 

793 

794 return comment_str, property_ncols, dtype, fmt 

795 

796 

797@writer 

798def write_xyz(fileobj, images, comment='', columns=None, 

799 write_info=True, 

800 write_results=True, plain=False, vec_cell=False): 

801 """ 

802 Write output in extended XYZ format 

803 

804 Optionally, specify which columns (arrays) to include in output, 

805 whether to write the contents of the `atoms.info` dict to the 

806 XYZ comment line (default is True), the results of any 

807 calculator attached to this Atoms. The `plain` argument 

808 can be used to write a simple XYZ file with no additional information. 

809 `vec_cell` can be used to write the cell vectors as additional 

810 pseudo-atoms. 

811 

812 See documentation for :func:`read_xyz()` for further details of the extended 

813 XYZ file format. 

814 """ 

815 

816 if hasattr(images, 'get_positions'): 

817 images = [images] 

818 

819 for atoms in images: 

820 natoms = len(atoms) 

821 

822 if write_results: 

823 calculator = atoms.calc 

824 atoms = atoms.copy() 

825 

826 save_calc_results(atoms, calculator, calc_prefix="") 

827 

828 if atoms.info.get('stress', np.array([])).shape == (6,): 

829 atoms.info['stress'] = \ 

830 voigt_6_to_full_3x3_stress(atoms.info['stress']) 

831 

832 if columns is None: 

833 fr_cols = (['symbols', 'positions'] 

834 + [key for key in atoms.arrays if 

835 key not in ['symbols', 'positions', 'numbers', 

836 'species', 'pos']]) 

837 else: 

838 fr_cols = columns[:] 

839 

840 if vec_cell: 

841 plain = True 

842 

843 if plain: 

844 fr_cols = ['symbols', 'positions'] 

845 write_info = False 

846 write_results = False 

847 

848 # Move symbols and positions to first two properties 

849 if 'symbols' in fr_cols: 

850 i = fr_cols.index('symbols') 

851 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0] 

852 

853 if 'positions' in fr_cols: 

854 i = fr_cols.index('positions') 

855 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1] 

856 

857 # Check first column "looks like" atomic symbols 

858 if fr_cols[0] in atoms.arrays: 

859 symbols = atoms.arrays[fr_cols[0]] 

860 else: 

861 symbols = [*atoms.symbols] 

862 

863 if natoms > 0 and not isinstance(symbols[0], str): 

864 raise ValueError('First column must be symbols-like') 

865 

866 # Check second column "looks like" atomic positions 

867 pos = atoms.arrays[fr_cols[1]] 

868 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

869 raise ValueError('Second column must be position-like') 

870 

871 # if vec_cell add cell information as pseudo-atoms 

872 if vec_cell: 

873 nPBC = 0 

874 for i, b in enumerate(atoms.pbc): 

875 if not b: 

876 continue 

877 nPBC += 1 

878 symbols.append('VEC' + str(nPBC)) 

879 pos = np.vstack((pos, atoms.cell[i])) 

880 # add to natoms 

881 natoms += nPBC 

882 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

883 raise ValueError( 

884 'Pseudo Atoms containing cell have bad coords') 

885 

886 # Move mask 

887 if 'move_mask' in fr_cols: 

888 cnstr = images[0]._get_constraints() 

889 if len(cnstr) > 0: 

890 c0 = cnstr[0] 

891 if isinstance(c0, FixAtoms): 

892 cnstr = np.ones((natoms,), dtype=bool) 

893 for idx in c0.index: 

894 cnstr[idx] = False # cnstr: atoms that can be moved 

895 elif isinstance(c0, FixCartesian): 

896 masks = np.ones((natoms, 3), dtype=bool) 

897 for i in range(len(cnstr)): 

898 idx = cnstr[i].index 

899 masks[idx] = cnstr[i].mask 

900 cnstr = ~masks # cnstr: coordinates that can be moved 

901 else: 

902 fr_cols.remove('move_mask') 

903 

904 # Collect data to be written out 

905 arrays = {} 

906 for column in fr_cols: 

907 if column == 'positions': 

908 arrays[column] = pos 

909 elif column in atoms.arrays: 

910 arrays[column] = atoms.arrays[column] 

911 elif column == 'symbols': 

912 arrays[column] = np.array(symbols) 

913 elif column == 'move_mask': 

914 arrays[column] = cnstr 

915 else: 

916 raise ValueError(f'Missing array "{column}"') 

917 

918 comm, ncols, dtype, fmt = output_column_format(atoms, 

919 fr_cols, 

920 arrays, 

921 write_info) 

922 

923 if plain or comment != '': 

924 # override key/value pairs with user-speficied comment string 

925 comm = comment.rstrip() 

926 if '\n' in comm: 

927 raise ValueError('Comment line should not have line breaks.') 

928 

929 # Pack fr_cols into record array 

930 data = np.zeros(natoms, dtype) 

931 for column, ncol in zip(fr_cols, ncols): 

932 value = arrays[column] 

933 if ncol == 1: 

934 data[column] = np.squeeze(value) 

935 else: 

936 for c in range(ncol): 

937 data[column + str(c)] = value[:, c] 

938 

939 nat = natoms 

940 if vec_cell: 

941 nat -= nPBC 

942 # Write the output 

943 fileobj.write('%d\n' % nat) 

944 fileobj.write(f'{comm}\n') 

945 for i in range(natoms): 

946 fileobj.write(fmt % tuple(data[i])) 

947 

948 

949def save_calc_results(atoms, calc=None, calc_prefix=None, 

950 remove_atoms_calc=False, force=False): 

951 """Update information in atoms from results in a calculator 

952 

953 Args: 

954 atoms (ase.atoms.Atoms): Atoms object, modified in place 

955 calc (ase.calculators.Calculator, optional): calculator to take results 

956 from. Defaults to :attr:`atoms.calc` 

957 calc_prefix (str, optional): String to prefix to results names 

958 in :attr:`atoms.arrays` and :attr:`atoms.info`. Defaults to 

959 calculator class name 

960 remove_atoms_calc (bool): remove the calculator from the `atoms` 

961 object after saving its results. Defaults to `False`, ignored if 

962 `calc` is passed in 

963 force (bool, optional): overwrite existing fields with same name, 

964 default False 

965 """ 

966 if calc is None: 

967 calc_use = atoms.calc 

968 else: 

969 calc_use = calc 

970 

971 if calc_use is None: 

972 return None, None 

973 

974 if calc_prefix is None: 

975 calc_prefix = calc_use.__class__.__name__ + '_' 

976 

977 per_config_results = {} 

978 per_atom_results = {} 

979 for prop, value in calc_use.results.items(): 

980 if prop in per_config_properties: 

981 per_config_results[calc_prefix + prop] = value 

982 elif prop in per_atom_properties: 

983 per_atom_results[calc_prefix + prop] = value 

984 

985 if not force: 

986 if any(key in atoms.info for key in per_config_results): 

987 raise KeyError("key from calculator already exists in atoms.info") 

988 if any(key in atoms.arrays for key in per_atom_results): 

989 raise KeyError("key from calculator already exists in atoms.arrays") 

990 

991 atoms.info.update(per_config_results) 

992 atoms.arrays.update(per_atom_results) 

993 

994 if remove_atoms_calc and calc is None: 

995 atoms.calc = None 

996 

997 

998# create aliases for read/write functions 

999read_extxyz = read_xyz 

1000write_extxyz = write_xyz