Coverage for /builds/debichem-team/python-ase/ase/io/extxyz.py: 81.90%
525 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
1"""
2Extended XYZ support
4Read/write files in "extended" XYZ format, storing additional
5per-configuration information as key-value pairs on the XYZ
6comment line, and additional per-atom properties as extra columns.
8Contributed by James Kermode <james.kermode@gmail.com>
9"""
10import json
11import numbers
12import re
13import warnings
14from io import StringIO, UnsupportedOperation
16import numpy as np
18from ase.atoms import Atoms
19from ase.calculators.singlepoint import SinglePointCalculator
20from ase.constraints import FixAtoms, FixCartesian
21from ase.io.formats import index2range
22from ase.io.utils import ImageIterator
23from ase.outputs import ArrayProperty, all_outputs
24from ase.spacegroup.spacegroup import Spacegroup
25from ase.stress import voigt_6_to_full_3x3_stress
26from ase.utils import reader, writer
28__all__ = ['read_xyz', 'write_xyz', 'iread_xyz']
30PROPERTY_NAME_MAP = {'positions': 'pos',
31 'numbers': 'Z',
32 'charges': 'charge',
33 'symbols': 'species'}
35REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(),
36 PROPERTY_NAME_MAP.keys()))
38KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)'
39 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*')
40KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*='
41 + r'\s*([^\s]+)\s*')
42KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*')
44UNPROCESSED_KEYS = {'uid'}
46SPECIAL_3_3_KEYS = {'Lattice', 'virial', 'stress'}
48# 'per-atom' and 'per-config'
49per_atom_properties = []
50per_config_properties = []
51for key, val in all_outputs.items():
52 if isinstance(val, ArrayProperty) and val.shapespec[0] == 'natoms':
53 per_atom_properties.append(key)
54 else:
55 per_config_properties.append(key)
58def key_val_str_to_dict(string, sep=None):
59 """
60 Parse an xyz properties string in a key=value and return a dict with
61 various values parsed to native types.
63 Accepts brackets or quotes to delimit values. Parses integers, floats
64 booleans and arrays thereof. Arrays with 9 values whose name is listed
65 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering.
67 If sep is None, string will split on whitespace, otherwise will split
68 key value pairs with the given separator.
70 """
71 # store the closing delimiters to match opening ones
72 delimiters = {
73 "'": "'",
74 '"': '"',
75 '{': '}',
76 '[': ']'
77 }
79 # Make pairs and process afterwards
80 kv_pairs = [
81 [[]]] # List of characters for each entry, add a new list for new value
82 cur_delimiter = None # push and pop closing delimiters
83 escaped = False # add escaped sequences verbatim
85 # parse character-by-character unless someone can do nested brackets
86 # and escape sequences in a regex
87 for char in string.strip():
88 if escaped: # bypass everything if escaped
89 kv_pairs[-1][-1].append(char)
90 escaped = False
91 elif char == '\\': # escape the next thing
92 escaped = True
93 elif cur_delimiter: # inside brackets
94 if char == cur_delimiter: # found matching delimiter
95 cur_delimiter = None
96 else:
97 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim
98 elif char in delimiters:
99 cur_delimiter = delimiters[char] # brackets or quotes
100 elif (sep is None and char.isspace()) or char == sep:
101 if kv_pairs == [[[]]]: # empty, beginning of string
102 continue
103 elif kv_pairs[-1][-1] == []:
104 continue
105 else:
106 kv_pairs.append([[]])
107 elif char == '=':
108 if kv_pairs[-1] == [[]]:
109 del kv_pairs[-1]
110 kv_pairs[-1].append([]) # value
111 else:
112 kv_pairs[-1][-1].append(char)
114 kv_dict = {}
116 for kv_pair in kv_pairs:
117 if len(kv_pair) == 0: # empty line
118 continue
119 elif len(kv_pair) == 1: # default to True
120 key, value = ''.join(kv_pair[0]), 'T'
121 else: # Smush anything else with kv-splitter '=' between them
122 key, value = ''.join(kv_pair[0]), '='.join(
123 ''.join(x) for x in kv_pair[1:])
125 if key.lower() not in UNPROCESSED_KEYS:
126 # Try to convert to (arrays of) floats, ints
127 split_value = re.findall(r'[^\s,]+', value)
128 try:
129 try:
130 numvalue = np.array(split_value, dtype=int)
131 except (ValueError, OverflowError):
132 # don't catch errors here so it falls through to bool
133 numvalue = np.array(split_value, dtype=float)
134 if len(numvalue) == 1:
135 numvalue = numvalue[0] # Only one number
136 value = numvalue
137 except (ValueError, OverflowError):
138 pass # value is unchanged
140 # convert special 3x3 matrices
141 if key in SPECIAL_3_3_KEYS:
142 if not isinstance(value, np.ndarray) or value.shape != (9,):
143 raise ValueError("Got info item {}, expecting special 3x3 "
144 "matrix, but value is not in the form of "
145 "a 9-long numerical vector".format(key))
146 value = np.array(value).reshape((3, 3), order='F')
148 # parse special strings as boolean or JSON
149 if isinstance(value, str):
150 # Parse boolean values:
151 # T or [tT]rue or TRUE -> True
152 # F or [fF]alse or FALSE -> False
153 # For list: 'T T F' -> [True, True, False]
154 # Cannot use `.lower()` to reduce `str_to_bool` mapping because
155 # 't'/'f' not accepted
156 str_to_bool = {
157 'T': True, 'F': False, 'true': True, 'false': False,
158 'True': True, 'False': False, 'TRUE': True, 'FALSE': False
159 }
160 try:
161 boolvalue = [str_to_bool[vpart] for vpart in
162 re.findall(r'[^\s,]+', value)]
164 if len(boolvalue) == 1:
165 value = boolvalue[0]
166 else:
167 value = boolvalue
168 except KeyError:
169 # Try to parse JSON
170 if value.startswith("_JSON "):
171 d = json.loads(value.replace("_JSON ", "", 1))
172 value = np.array(d)
173 if value.dtype.kind not in ['i', 'f', 'b']:
174 value = d
176 kv_dict[key] = value
178 return kv_dict
181def key_val_str_to_dict_regex(s):
182 """
183 Parse strings in the form 'key1=value1 key2="quoted value"'
184 """
185 d = {}
186 s = s.strip()
187 while True:
188 # Match quoted string first, then fall through to plain key=value
189 m = KEY_QUOTED_VALUE.match(s)
190 if m is None:
191 m = KEY_VALUE.match(s)
192 if m is not None:
193 s = KEY_VALUE.sub('', s, 1)
194 else:
195 # Just a key with no value
196 m = KEY_RE.match(s)
197 if m is not None:
198 s = KEY_RE.sub('', s, 1)
199 else:
200 s = KEY_QUOTED_VALUE.sub('', s, 1)
202 if m is None:
203 break # No more matches
205 key = m.group(1)
206 try:
207 value = m.group(2)
208 except IndexError:
209 # default value is 'T' (True)
210 value = 'T'
212 if key.lower() not in UNPROCESSED_KEYS:
213 # Try to convert to (arrays of) floats, ints
214 try:
215 numvalue = []
216 for x in value.split():
217 if x.find('.') == -1:
218 numvalue.append(int(float(x)))
219 else:
220 numvalue.append(float(x))
221 if len(numvalue) == 1:
222 numvalue = numvalue[0] # Only one number
223 elif len(numvalue) == 9:
224 # special case: 3x3 matrix, fortran ordering
225 numvalue = np.array(numvalue).reshape((3, 3), order='F')
226 else:
227 numvalue = np.array(numvalue) # vector
228 value = numvalue
229 except (ValueError, OverflowError):
230 pass
232 # Parse boolean values: 'T' -> True, 'F' -> False,
233 # 'T T F' -> [True, True, False]
234 if isinstance(value, str):
235 str_to_bool = {'T': True, 'F': False}
237 if len(value.split()) > 1:
238 if all(x in str_to_bool for x in value.split()):
239 value = [str_to_bool[x] for x in value.split()]
240 elif value in str_to_bool:
241 value = str_to_bool[value]
243 d[key] = value
245 return d
248def escape(string):
249 if (' ' in string or
250 '"' in string or "'" in string or
251 '{' in string or '}' in string or
252 '[' in string or ']' in string):
253 string = string.replace('"', '\\"')
254 string = f'"{string}"'
255 return string
258def key_val_dict_to_str(dct, sep=' '):
259 """
260 Convert atoms.info dictionary to extended XYZ string representation
261 """
263 def array_to_string(key, val):
264 # some ndarrays are special (special 3x3 keys, and scalars/vectors of
265 # numbers or bools), handle them here
266 if key in SPECIAL_3_3_KEYS:
267 # special 3x3 matrix, flatten in Fortran order
268 val = val.reshape(val.size, order='F')
269 if val.dtype.kind in ['i', 'f', 'b']:
270 # numerical or bool scalars/vectors are special, for backwards
271 # compat.
272 if len(val.shape) == 0:
273 # scalar
274 val = str(known_types_to_str(val))
275 elif len(val.shape) == 1:
276 # vector
277 val = ' '.join(str(known_types_to_str(v)) for v in val)
278 return val
280 def known_types_to_str(val):
281 if isinstance(val, (bool, np.bool_)):
282 return 'T' if val else 'F'
283 elif isinstance(val, numbers.Real):
284 return f'{val}'
285 elif isinstance(val, Spacegroup):
286 return val.symbol
287 else:
288 return val
290 if len(dct) == 0:
291 return ''
293 string = ''
294 for key in dct:
295 val = dct[key]
297 if isinstance(val, np.ndarray):
298 val = array_to_string(key, val)
299 else:
300 # convert any known types to string
301 val = known_types_to_str(val)
303 if val is not None and not isinstance(val, str):
304 # what's left is an object, try using JSON
305 if isinstance(val, np.ndarray):
306 val = val.tolist()
307 try:
308 val = '_JSON ' + json.dumps(val)
309 # if this fails, let give up
310 except TypeError:
311 warnings.warn('Skipping unhashable information '
312 '{}'.format(key))
313 continue
315 key = escape(key) # escape and quote key
316 eq = "="
317 # Should this really be setting empty value that's going to be
318 # interpreted as bool True?
319 if val is None:
320 val = ""
321 eq = ""
322 val = escape(val) # escape and quote val
324 string += f'{key}{eq}{val}{sep}'
326 return string.strip()
329def parse_properties(prop_str):
330 """
331 Parse extended XYZ properties format string
333 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3".
334 NAME is the name of the property.
335 TYPE is one of R, I, S, L for real, integer, string and logical.
336 NCOLS is number of columns for that property.
337 """
339 properties = {}
340 properties_list = []
341 dtypes = []
342 converters = []
344 fields = prop_str.split(':')
346 def parse_bool(x):
347 """
348 Parse bool to string
349 """
350 return {'T': True, 'F': False,
351 'True': True, 'False': False}.get(x)
353 fmt_map = {'R': ('d', float),
354 'I': ('i', int),
355 'S': (object, str),
356 'L': ('bool', parse_bool)}
358 for name, ptype, cols in zip(fields[::3],
359 fields[1::3],
360 [int(x) for x in fields[2::3]]):
361 if ptype not in ('R', 'I', 'S', 'L'):
362 raise ValueError('Unknown property type: ' + ptype)
363 ase_name = REV_PROPERTY_NAME_MAP.get(name, name)
365 dtype, converter = fmt_map[ptype]
366 if cols == 1:
367 dtypes.append((name, dtype))
368 converters.append(converter)
369 else:
370 for c in range(cols):
371 dtypes.append((name + str(c), dtype))
372 converters.append(converter)
374 properties[name] = (ase_name, cols)
375 properties_list.append(name)
377 dtype = np.dtype(dtypes)
378 return properties, properties_list, dtype, converters
381def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict,
382 nvec=0):
383 # comment line
384 line = next(lines).strip()
385 if nvec > 0:
386 info = {'comment': line}
387 else:
388 info = properties_parser(line) if line else {}
390 pbc = None
391 if 'pbc' in info:
392 pbc = info.pop('pbc')
393 elif 'Lattice' in info:
394 # default pbc for extxyz file containing Lattice
395 # is True in all directions
396 pbc = [True, True, True]
397 elif nvec > 0:
398 # cell information given as pseudo-Atoms
399 pbc = [False, False, False]
401 cell = None
402 if 'Lattice' in info:
403 # NB: ASE cell is transpose of extended XYZ lattice
404 cell = info['Lattice'].T
405 del info['Lattice']
406 elif nvec > 0:
407 # cell information given as pseudo-Atoms
408 cell = np.zeros((3, 3))
410 if 'Properties' not in info:
411 # Default set of properties is atomic symbols and positions only
412 info['Properties'] = 'species:S:1:pos:R:3'
413 properties, names, dtype, convs = parse_properties(info['Properties'])
414 del info['Properties']
416 data = []
417 for _ in range(natoms):
418 try:
419 line = next(lines)
420 except StopIteration:
421 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}'
422 .format(len(data), natoms))
423 vals = line.split()
424 row = tuple(conv(val) for conv, val in zip(convs, vals))
425 data.append(row)
427 try:
428 data = np.array(data, dtype)
429 except TypeError:
430 raise XYZError('Badly formatted data '
431 'or end of file reached before end of frame')
433 # Read VEC entries if present
434 if nvec > 0:
435 for ln in range(nvec):
436 try:
437 line = next(lines)
438 except StopIteration:
439 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, '
440 'expected {}'.format(len(cell), nvec))
441 entry = line.split()
443 if not entry[0].startswith('VEC'):
444 raise XYZError(f'Expected cell vector, got {entry[0]}')
446 try:
447 n = int(entry[0][3:])
448 except ValueError as e:
449 raise XYZError('Expected VEC{}, got VEC{}'
450 .format(ln + 1, entry[0][3:])) from e
451 if n != ln + 1:
452 raise XYZError('Expected VEC{}, got VEC{}'
453 .format(ln + 1, n))
455 cell[ln] = np.array([float(x) for x in entry[1:]])
456 pbc[ln] = True
457 if nvec != pbc.count(True):
458 raise XYZError('Problem with number of cell vectors')
459 pbc = tuple(pbc)
461 arrays = {}
462 for name in names:
463 ase_name, cols = properties[name]
464 if cols == 1:
465 value = data[name]
466 else:
467 value = np.vstack([data[name + str(c)]
468 for c in range(cols)]).T
469 arrays[ase_name] = value
471 numbers = arrays.pop('numbers', None)
472 symbols = arrays.pop('symbols', None)
474 if symbols is not None:
475 symbols = [s.capitalize() for s in symbols]
477 atoms = Atoms(numbers if numbers is not None else symbols,
478 positions=arrays.pop('positions', None),
479 charges=arrays.pop('initial_charges', None),
480 cell=cell,
481 pbc=pbc,
482 info=info)
484 # Read and set constraints
485 if 'move_mask' in arrays:
486 move_mask = arrays.pop('move_mask').astype(bool)
487 if properties['move_mask'][1] == 3:
488 cons = []
489 for a in range(natoms):
490 cons.append(FixCartesian(a, mask=~move_mask[a, :]))
491 atoms.set_constraint(cons)
492 elif properties['move_mask'][1] == 1:
493 atoms.set_constraint(FixAtoms(mask=~move_mask))
494 else:
495 raise XYZError('Not implemented constraint')
497 set_calc_and_arrays(atoms, arrays)
498 return atoms
501def set_calc_and_arrays(atoms, arrays):
502 results = {}
504 for name, array in arrays.items():
505 if name in all_outputs:
506 results[name] = array
507 else:
508 atoms.new_array(name, array)
510 for key in list(atoms.info):
511 if key in per_config_properties:
512 results[key] = atoms.info.pop(key)
513 # special case for stress- convert to Voigt 6-element form
514 if key == 'stress' and results[key].shape == (3, 3):
515 stress = results[key]
516 stress = np.array([stress[0, 0],
517 stress[1, 1],
518 stress[2, 2],
519 stress[1, 2],
520 stress[0, 2],
521 stress[0, 1]])
522 results[key] = stress
524 if results:
525 atoms.calc = SinglePointCalculator(atoms, **results)
528class XYZError(IOError):
529 pass
532class XYZChunk:
533 def __init__(self, lines, natoms):
534 self.lines = lines
535 self.natoms = natoms
537 def build(self):
538 """Convert unprocessed chunk into Atoms."""
539 return _read_xyz_frame(iter(self.lines), self.natoms)
542def ixyzchunks(fd):
543 """Yield unprocessed chunks (header, lines) for each xyz image."""
544 while True:
545 line = next(fd).strip() # Raises StopIteration on empty file
546 try:
547 natoms = int(line)
548 except ValueError:
549 raise XYZError(f'Expected integer, found "{line}"')
550 try:
551 lines = [next(fd) for _ in range(1 + natoms)]
552 except StopIteration:
553 raise XYZError('Incomplete XYZ chunk')
554 yield XYZChunk(lines, natoms)
557iread_xyz = ImageIterator(ixyzchunks)
560@reader
561def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict):
562 r"""
563 Read from a file in Extended XYZ format
565 index is the frame to read, default is last frame (index=-1).
566 properties_parser is the parse to use when converting the properties line
567 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can
568 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly
569 faster but has fewer features.
571 Extended XYZ format is an enhanced version of the `basic XYZ format
572 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra
573 columns to be present in the file for additonal per-atom properties as
574 well as standardising the format of the comment line to include the
575 cell lattice and other per-frame parameters.
577 It's easiest to describe the format with an example. Here is a
578 standard XYZ file containing a bulk cubic 8 atom silicon cell ::
580 8
581 Cubic bulk silicon cell
582 Si 0.00000000 0.00000000 0.00000000
583 Si 1.36000000 1.36000000 1.36000000
584 Si 2.72000000 2.72000000 0.00000000
585 Si 4.08000000 4.08000000 1.36000000
586 Si 2.72000000 0.00000000 2.72000000
587 Si 4.08000000 1.36000000 4.08000000
588 Si 0.00000000 2.72000000 2.72000000
589 Si 1.36000000 4.08000000 4.08000000
591 The first line is the number of atoms, followed by a comment and
592 then one line per atom, giving the element symbol and cartesian
593 x y, and z coordinates in Angstroms.
595 Here's the same configuration in extended XYZ format ::
597 8
598 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0
599 Si 0.00000000 0.00000000 0.00000000
600 Si 1.36000000 1.36000000 1.36000000
601 Si 2.72000000 2.72000000 0.00000000
602 Si 4.08000000 4.08000000 1.36000000
603 Si 2.72000000 0.00000000 2.72000000
604 Si 4.08000000 1.36000000 4.08000000
605 Si 0.00000000 2.72000000 2.72000000
606 Si 1.36000000 4.08000000 4.08000000
608 In extended XYZ format, the comment line is replaced by a series of
609 key/value pairs. The keys should be strings and values can be
610 integers, reals, logicals (denoted by `T` and `F` for true and false)
611 or strings. Quotes are required if a value contains any spaces (like
612 `Lattice` above). There are two mandatory parameters that any
613 extended XYZ: `Lattice` and `Properties`. Other parameters --
614 e.g. `Time` in the example above --- can be added to the parameter line
615 as needed.
617 `Lattice` is a Cartesian 3x3 matrix representation of the cell
618 vectors, with each vector stored as a column and the 9 values listed in
619 Fortran column-major order, i.e. in the form ::
621 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z"
623 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the
624 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second
625 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the
626 third lattice vector (:math:`\mathbf{c}`).
628 The list of properties in the file is described by the `Properties`
629 parameter, which should take the form of a series of colon separated
630 triplets giving the name, format (`R` for real, `I` for integer) and
631 number of columns of each property. For example::
633 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1"
635 indicates the first column represents atomic species, the next three
636 columns represent atomic positions, the next three velcoities, and the
637 last is an single integer called `select`. With this property
638 definition, the line ::
640 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1
642 would describe a silicon atom at position (4.08,4.08,1.36) with zero
643 velocity and the `select` property set to 1.
645 The property names `pos`, `Z`, `mass`, and `charge` map to ASE
646 :attr:`ase.atoms.Atoms.arrays` entries named
647 `positions`, `numbers`, `masses` and `charges` respectively.
649 Additional key-value pairs in the comment line are parsed into the
650 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions
652 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are
653 included to ease command-line usage as the `{}` are not treated
654 specially by the shell)
655 - Quotes within keys or values can be escaped with `\"`.
656 - Keys with special names `stress` or `virial` are treated as 3x3 matrices
657 in Fortran order, as for `Lattice` above.
658 - Otherwise, values with multiple elements are treated as 1D arrays, first
659 assuming integer format and falling back to float if conversion is
660 unsuccessful.
661 - A missing value defaults to `True`, e.g. the comment line
662 `"cutoff=3.4 have_energy"` leads to
663 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`.
664 - Value strings starting with `"_JSON"` are interpreted as JSON content;
665 similarly, when writing, anything which does not match the criteria above
666 is serialised as JSON.
668 The extended XYZ format is also supported by the
669 the `Ovito <https://www.ovito.org>`_ visualisation tool.
670 """ # noqa: E501
672 if not isinstance(index, int) and not isinstance(index, slice):
673 raise TypeError('Index argument is neither slice nor integer!')
675 # If possible, build a partial index up to the last frame required
676 last_frame = None
677 if isinstance(index, int) and index >= 0:
678 last_frame = index
679 elif isinstance(index, slice):
680 if index.stop is not None and index.stop >= 0:
681 last_frame = index.stop
683 # scan through file to find where the frames start
684 try:
685 fileobj.seek(0)
686 except UnsupportedOperation:
687 fileobj = StringIO(fileobj.read())
688 fileobj.seek(0)
689 frames = []
690 while True:
691 frame_pos = fileobj.tell()
692 line = fileobj.readline()
693 if line.strip() == '':
694 break
695 try:
696 natoms = int(line)
697 except ValueError as err:
698 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}'
699 .format(err))
700 fileobj.readline() # read comment line
701 for _ in range(natoms):
702 fileobj.readline()
703 # check for VEC
704 nvec = 0
705 while True:
706 lastPos = fileobj.tell()
707 line = fileobj.readline()
708 if line.lstrip().startswith('VEC'):
709 nvec += 1
710 if nvec > 3:
711 raise XYZError('ase.io.extxyz: More than 3 VECX entries')
712 else:
713 fileobj.seek(lastPos)
714 break
715 frames.append((frame_pos, natoms, nvec))
716 if last_frame is not None and len(frames) > last_frame:
717 break
719 trbl = index2range(index, len(frames))
721 for index in trbl:
722 frame_pos, natoms, nvec = frames[index]
723 fileobj.seek(frame_pos)
724 # check for consistency with frame index table
725 assert int(fileobj.readline()) == natoms
726 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec)
729def output_column_format(atoms, columns, arrays, write_info=True):
730 """
731 Helper function to build extended XYZ comment line
732 """
733 fmt_map = {'d': ('R', '%16.8f'),
734 'f': ('R', '%16.8f'),
735 'i': ('I', '%8d'),
736 'O': ('S', '%s'),
737 'S': ('S', '%s'),
738 'U': ('S', '%-2s'),
739 'b': ('L', ' %.1s')}
741 # NB: Lattice is stored as tranpose of ASE cell,
742 # with Fortran array ordering
743 lattice_str = ('Lattice="'
744 + ' '.join([str(x) for x in np.reshape(atoms.cell.T,
745 9, order='F')]) +
746 '"')
748 property_names = []
749 property_types = []
750 property_ncols = []
751 dtypes = []
752 formats = []
754 for column in columns:
755 array = arrays[column]
756 dtype = array.dtype
758 property_name = PROPERTY_NAME_MAP.get(column, column)
759 property_type, fmt = fmt_map[dtype.kind]
760 property_names.append(property_name)
761 property_types.append(property_type)
763 if (len(array.shape) == 1
764 or (len(array.shape) == 2 and array.shape[1] == 1)):
765 ncol = 1
766 dtypes.append((column, dtype))
767 else:
768 ncol = array.shape[1]
769 for c in range(ncol):
770 dtypes.append((column + str(c), dtype))
772 formats.extend([fmt] * ncol)
773 property_ncols.append(ncol)
775 props_str = ':'.join([':'.join(x) for x in
776 zip(property_names,
777 property_types,
778 [str(nc) for nc in property_ncols])])
780 comment_str = ''
781 if atoms.cell.any():
782 comment_str += lattice_str + ' '
783 comment_str += f'Properties={props_str}'
785 info = {}
786 if write_info:
787 info.update(atoms.info)
788 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions
789 comment_str += ' ' + key_val_dict_to_str(info)
791 dtype = np.dtype(dtypes)
792 fmt = ' '.join(formats) + '\n'
794 return comment_str, property_ncols, dtype, fmt
797@writer
798def write_xyz(fileobj, images, comment='', columns=None,
799 write_info=True,
800 write_results=True, plain=False, vec_cell=False):
801 """
802 Write output in extended XYZ format
804 Optionally, specify which columns (arrays) to include in output,
805 whether to write the contents of the `atoms.info` dict to the
806 XYZ comment line (default is True), the results of any
807 calculator attached to this Atoms. The `plain` argument
808 can be used to write a simple XYZ file with no additional information.
809 `vec_cell` can be used to write the cell vectors as additional
810 pseudo-atoms.
812 See documentation for :func:`read_xyz()` for further details of the extended
813 XYZ file format.
814 """
816 if hasattr(images, 'get_positions'):
817 images = [images]
819 for atoms in images:
820 natoms = len(atoms)
822 if write_results:
823 calculator = atoms.calc
824 atoms = atoms.copy()
826 save_calc_results(atoms, calculator, calc_prefix="")
828 if atoms.info.get('stress', np.array([])).shape == (6,):
829 atoms.info['stress'] = \
830 voigt_6_to_full_3x3_stress(atoms.info['stress'])
832 if columns is None:
833 fr_cols = (['symbols', 'positions']
834 + [key for key in atoms.arrays if
835 key not in ['symbols', 'positions', 'numbers',
836 'species', 'pos']])
837 else:
838 fr_cols = columns[:]
840 if vec_cell:
841 plain = True
843 if plain:
844 fr_cols = ['symbols', 'positions']
845 write_info = False
846 write_results = False
848 # Move symbols and positions to first two properties
849 if 'symbols' in fr_cols:
850 i = fr_cols.index('symbols')
851 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0]
853 if 'positions' in fr_cols:
854 i = fr_cols.index('positions')
855 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1]
857 # Check first column "looks like" atomic symbols
858 if fr_cols[0] in atoms.arrays:
859 symbols = atoms.arrays[fr_cols[0]]
860 else:
861 symbols = [*atoms.symbols]
863 if natoms > 0 and not isinstance(symbols[0], str):
864 raise ValueError('First column must be symbols-like')
866 # Check second column "looks like" atomic positions
867 pos = atoms.arrays[fr_cols[1]]
868 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':
869 raise ValueError('Second column must be position-like')
871 # if vec_cell add cell information as pseudo-atoms
872 if vec_cell:
873 nPBC = 0
874 for i, b in enumerate(atoms.pbc):
875 if not b:
876 continue
877 nPBC += 1
878 symbols.append('VEC' + str(nPBC))
879 pos = np.vstack((pos, atoms.cell[i]))
880 # add to natoms
881 natoms += nPBC
882 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':
883 raise ValueError(
884 'Pseudo Atoms containing cell have bad coords')
886 # Move mask
887 if 'move_mask' in fr_cols:
888 cnstr = images[0]._get_constraints()
889 if len(cnstr) > 0:
890 c0 = cnstr[0]
891 if isinstance(c0, FixAtoms):
892 cnstr = np.ones((natoms,), dtype=bool)
893 for idx in c0.index:
894 cnstr[idx] = False # cnstr: atoms that can be moved
895 elif isinstance(c0, FixCartesian):
896 masks = np.ones((natoms, 3), dtype=bool)
897 for i in range(len(cnstr)):
898 idx = cnstr[i].index
899 masks[idx] = cnstr[i].mask
900 cnstr = ~masks # cnstr: coordinates that can be moved
901 else:
902 fr_cols.remove('move_mask')
904 # Collect data to be written out
905 arrays = {}
906 for column in fr_cols:
907 if column == 'positions':
908 arrays[column] = pos
909 elif column in atoms.arrays:
910 arrays[column] = atoms.arrays[column]
911 elif column == 'symbols':
912 arrays[column] = np.array(symbols)
913 elif column == 'move_mask':
914 arrays[column] = cnstr
915 else:
916 raise ValueError(f'Missing array "{column}"')
918 comm, ncols, dtype, fmt = output_column_format(atoms,
919 fr_cols,
920 arrays,
921 write_info)
923 if plain or comment != '':
924 # override key/value pairs with user-speficied comment string
925 comm = comment.rstrip()
926 if '\n' in comm:
927 raise ValueError('Comment line should not have line breaks.')
929 # Pack fr_cols into record array
930 data = np.zeros(natoms, dtype)
931 for column, ncol in zip(fr_cols, ncols):
932 value = arrays[column]
933 if ncol == 1:
934 data[column] = np.squeeze(value)
935 else:
936 for c in range(ncol):
937 data[column + str(c)] = value[:, c]
939 nat = natoms
940 if vec_cell:
941 nat -= nPBC
942 # Write the output
943 fileobj.write('%d\n' % nat)
944 fileobj.write(f'{comm}\n')
945 for i in range(natoms):
946 fileobj.write(fmt % tuple(data[i]))
949def save_calc_results(atoms, calc=None, calc_prefix=None,
950 remove_atoms_calc=False, force=False):
951 """Update information in atoms from results in a calculator
953 Args:
954 atoms (ase.atoms.Atoms): Atoms object, modified in place
955 calc (ase.calculators.Calculator, optional): calculator to take results
956 from. Defaults to :attr:`atoms.calc`
957 calc_prefix (str, optional): String to prefix to results names
958 in :attr:`atoms.arrays` and :attr:`atoms.info`. Defaults to
959 calculator class name
960 remove_atoms_calc (bool): remove the calculator from the `atoms`
961 object after saving its results. Defaults to `False`, ignored if
962 `calc` is passed in
963 force (bool, optional): overwrite existing fields with same name,
964 default False
965 """
966 if calc is None:
967 calc_use = atoms.calc
968 else:
969 calc_use = calc
971 if calc_use is None:
972 return None, None
974 if calc_prefix is None:
975 calc_prefix = calc_use.__class__.__name__ + '_'
977 per_config_results = {}
978 per_atom_results = {}
979 for prop, value in calc_use.results.items():
980 if prop in per_config_properties:
981 per_config_results[calc_prefix + prop] = value
982 elif prop in per_atom_properties:
983 per_atom_results[calc_prefix + prop] = value
985 if not force:
986 if any(key in atoms.info for key in per_config_results):
987 raise KeyError("key from calculator already exists in atoms.info")
988 if any(key in atoms.arrays for key in per_atom_results):
989 raise KeyError("key from calculator already exists in atoms.arrays")
991 atoms.info.update(per_config_results)
992 atoms.arrays.update(per_atom_results)
994 if remove_atoms_calc and calc is None:
995 atoms.calc = None
998# create aliases for read/write functions
999read_extxyz = read_xyz
1000write_extxyz = write_xyz