Coverage for /builds/debichem-team/python-ase/ase/io/extxyz.py: 78.35%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2Extended XYZ support

4Read/write files in "extended" XYZ format, storing additional

5per-configuration information as key-value pairs on the XYZ

6comment line, and additional per-atom properties as extra columns.

8Contributed by James Kermode <james.kermode@gmail.com>

9"""

12from itertools import islice

13import re

14import warnings

15from io import StringIO, UnsupportedOperation

16import json

18import numpy as np

19import numbers

21from ase.atoms import Atoms

22from ase.calculators.calculator import all_properties, Calculator

23from ase.calculators.singlepoint import SinglePointCalculator

24from ase.spacegroup.spacegroup import Spacegroup

25from ase.parallel import paropen

26from ase.constraints import FixAtoms, FixCartesian

27from ase.io.formats import index2range

28from ase.utils import reader

30__all__ = ['read_xyz', 'write_xyz', 'iread_xyz']

32PROPERTY_NAME_MAP = {'positions': 'pos',

33 'numbers': 'Z',

34 'charges': 'charge',

35 'symbols': 'species'}

37REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(),

38 PROPERTY_NAME_MAP.keys()))

40KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)'

41 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*')

42KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*='

43 + r'\s*([^\s]+)\s*')

44KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*')

46UNPROCESSED_KEYS = ['uid']

48SPECIAL_3_3_KEYS = ['Lattice', 'virial', 'stress']

50# partition ase.calculators.calculator.all_properties into two lists:

51# 'per-atom' and 'per-config'

52per_atom_properties = ['forces', 'stresses', 'charges', 'magmoms', 'energies']

53per_config_properties = ['energy', 'stress', 'dipole', 'magmom', 'free_energy']

56def key_val_str_to_dict(string, sep=None):

57 """

58 Parse an xyz properties string in a key=value and return a dict with

59 various values parsed to native types.

61 Accepts brackets or quotes to delimit values. Parses integers, floats

62 booleans and arrays thereof. Arrays with 9 values whose name is listed

63 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering.

65 If sep is None, string will split on whitespace, otherwise will split

66 key value pairs with the given separator.

68 """

69 # store the closing delimiters to match opening ones

70 delimiters = {

71 "'": "'",

72 '"': '"',

73 '{': '}',

74 '[': ']'

75 }

77 # Make pairs and process afterwards

78 kv_pairs = [

79 [[]]] # List of characters for each entry, add a new list for new value

80 cur_delimiter = None # push and pop closing delimiters

81 escaped = False # add escaped sequences verbatim

83 # parse character-by-character unless someone can do nested brackets

84 # and escape sequences in a regex

85 for char in string.strip():

86 if escaped: # bypass everything if escaped

87 kv_pairs[-1][-1].append(char)

88 escaped = False

89 elif char == '\\': # escape the next thing

90 escaped = True

91 elif cur_delimiter: # inside brackets

92 if char == cur_delimiter: # found matching delimiter

93 cur_delimiter = None

94 else:

95 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim

96 elif char in delimiters:

97 cur_delimiter = delimiters[char] # brackets or quotes

98 elif (sep is None and char.isspace()) or char == sep:

99 if kv_pairs == [[[]]]: # empty, beginning of string

100 continue

101 elif kv_pairs[-1][-1] == []:

102 continue

103 else:

104 kv_pairs.append([[]])

105 elif char == '=':

106 if kv_pairs[-1] == [[]]:

107 del kv_pairs[-1]

108 kv_pairs[-1].append([]) # value

109 else:

110 kv_pairs[-1][-1].append(char)

111

112 kv_dict = {}

113

114 for kv_pair in kv_pairs:

115 if len(kv_pair) == 0: # empty line

116 continue

117 elif len(kv_pair) == 1: # default to True

118 key, value = ''.join(kv_pair[0]), 'T'

119 else: # Smush anything else with kv-splitter '=' between them

120 key, value = ''.join(kv_pair[0]), '='.join(

121 ''.join(x) for x in kv_pair[1:])

122

123 if key.lower() not in UNPROCESSED_KEYS:

124 # Try to convert to (arrays of) floats, ints

125 split_value = re.findall(r'[^\s,]+', value)

126 try:

127 try:

128 numvalue = np.array(split_value, dtype=int)

129 except (ValueError, OverflowError):

130 # don't catch errors here so it falls through to bool

131 numvalue = np.array(split_value, dtype=float)

132 if len(numvalue) == 1:

133 numvalue = numvalue[0] # Only one number

134 value = numvalue

135 except (ValueError, OverflowError):

136 pass # value is unchanged

137

138 # convert special 3x3 matrices

139 if key in SPECIAL_3_3_KEYS:

140 if not isinstance(value, np.ndarray) or value.shape != (9,):

141 raise ValueError("Got info item {}, expecting special 3x3 "

142 "matrix, but value is not in the form of "

143 "a 9-long numerical vector".format(key))

144 value = np.array(value).reshape((3, 3), order='F')

145

146 # parse special strings as boolean or JSON

147 if isinstance(value, str):

148 # Parse boolean values: 'T' -> True, 'F' -> False,

149 # 'T T F' -> [True, True, False]

150 str_to_bool = {'T': True, 'F': False}

151

152 try:

153 boolvalue = [str_to_bool[vpart] for vpart in

154 re.findall(r'[^\s,]+', value)]

155 if len(boolvalue) == 1:

156 value = boolvalue[0]

157 else:

158 value = boolvalue

159 except KeyError:

160 # parse JSON

161 if value.startswith("_JSON "):

162 d = json.loads(value.replace("_JSON ", "", 1))

163 value = np.array(d)

164 if value.dtype.kind not in ['i', 'f', 'b']:

165 value = d

166

167 kv_dict[key] = value

168

169 return kv_dict

170

171

172def key_val_str_to_dict_regex(s):

173 """

174 Parse strings in the form 'key1=value1 key2="quoted value"'

175 """

176 d = {}

177 s = s.strip()

178 while True:

179 # Match quoted string first, then fall through to plain key=value

180 m = KEY_QUOTED_VALUE.match(s)

181 if m is None:

182 m = KEY_VALUE.match(s)

183 if m is not None:

184 s = KEY_VALUE.sub('', s, 1)

185 else:

186 # Just a key with no value

187 m = KEY_RE.match(s)

188 if m is not None:

189 s = KEY_RE.sub('', s, 1)

190 else:

191 s = KEY_QUOTED_VALUE.sub('', s, 1)

192

193 if m is None:

194 break # No more matches

195

196 key = m.group(1)

197 try:

198 value = m.group(2)

199 except IndexError:

200 # default value is 'T' (True)

201 value = 'T'

202

203 if key.lower() not in UNPROCESSED_KEYS:

204 # Try to convert to (arrays of) floats, ints

205 try:

206 numvalue = []

207 for x in value.split():

208 if x.find('.') == -1:

209 numvalue.append(int(float(x)))

210 else:

211 numvalue.append(float(x))

212 if len(numvalue) == 1:

213 numvalue = numvalue[0] # Only one number

214 elif len(numvalue) == 9:

215 # special case: 3x3 matrix, fortran ordering

216 numvalue = np.array(numvalue).reshape((3, 3), order='F')

217 else:

218 numvalue = np.array(numvalue) # vector

219 value = numvalue

220 except (ValueError, OverflowError):

221 pass

222

223 # Parse boolean values: 'T' -> True, 'F' -> False,

224 # 'T T F' -> [True, True, False]

225 if isinstance(value, str):

226 str_to_bool = {'T': True, 'F': False}

227

228 if len(value.split()) > 1:

229 if all([x in str_to_bool.keys() for x in value.split()]):

230 value = [str_to_bool[x] for x in value.split()]

231 elif value in str_to_bool:

232 value = str_to_bool[value]

234 d[key] = value

236 return d

239def escape(string):

240 if (' ' in string or

241 '"' in string or "'" in string or

242 '{' in string or '}' in string or

243 '[' in string or ']' in string):

244 string = string.replace('"', '\\"')

245 string = '"%s"' % string

246 return string

247

248

249def key_val_dict_to_str(dct, sep=' '):

250 """

251 Convert atoms.info dictionary to extended XYZ string representation

252 """

253

254 def array_to_string(key, val):

255 # some ndarrays are special (special 3x3 keys, and scalars/vectors of

256 # numbers or bools), handle them here

257 if key in SPECIAL_3_3_KEYS:

258 # special 3x3 matrix, flatten in Fortran order

259 val = val.reshape(val.size, order='F')

260 if val.dtype.kind in ['i', 'f', 'b']:

261 # numerical or bool scalars/vectors are special, for backwards

262 # compat.

263 if len(val.shape) == 0:

264 # scalar

265 val = str(known_types_to_str(val))

266 elif len(val.shape) == 1:

267 # vector

268 val = ' '.join(str(known_types_to_str(v)) for v in val)

269 return val

270

271 def known_types_to_str(val):

272 if isinstance(val, bool) or isinstance(val, np.bool_):

273 return 'T' if val else 'F'

274 elif isinstance(val, numbers.Real):

275 return '{}'.format(val)

276 elif isinstance(val, Spacegroup):

277 return val.symbol

278 else:

279 return val

280

281 if len(dct) == 0:

282 return ''

283

284 string = ''

285 for key in dct:

286 val = dct[key]

287

288 if isinstance(val, np.ndarray):

289 val = array_to_string(key, val)

290 else:

291 # convert any known types to string

292 val = known_types_to_str(val)

293

294 if val is not None and not isinstance(val, str):

295 # what's left is an object, try using JSON

296 if isinstance(val, np.ndarray):

297 val = val.tolist()

298 try:

299 val = '_JSON ' + json.dumps(val)

300 # if this fails, let give up

301 except TypeError:

302 warnings.warn('Skipping unhashable information '

303 '{0}'.format(key))

304 continue

305

306 key = escape(key) # escape and quote key

307 eq = "="

308 # Should this really be setting empty value that's going to be

309 # interpreted as bool True?

310 if val is None:

311 val = ""

312 eq = ""

313 val = escape(val) # escape and quote val

314

315 string += '%s%s%s%s' % (key, eq, val, sep)

316

317 return string.strip()

318

319

320def parse_properties(prop_str):

321 """

322 Parse extended XYZ properties format string

323

324 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3".

325 NAME is the name of the property.

326 TYPE is one of R, I, S, L for real, integer, string and logical.

327 NCOLS is number of columns for that property.

328 """

329

330 properties = {}

331 properties_list = []

332 dtypes = []

333 converters = []

334

335 fields = prop_str.split(':')

336

337 def parse_bool(x):

338 """

339 Parse bool to string

340 """

341 return {'T': True, 'F': False,

342 'True': True, 'False': False}.get(x)

343

344 fmt_map = {'R': ('d', float),

345 'I': ('i', int),

346 'S': (object, str),

347 'L': ('bool', parse_bool)}

348

349 for name, ptype, cols in zip(fields[::3],

350 fields[1::3],

351 [int(x) for x in fields[2::3]]):

352 if ptype not in ('R', 'I', 'S', 'L'):

353 raise ValueError('Unknown property type: ' + ptype)

354 ase_name = REV_PROPERTY_NAME_MAP.get(name, name)

355

356 dtype, converter = fmt_map[ptype]

357 if cols == 1:

358 dtypes.append((name, dtype))

359 converters.append(converter)

360 else:

361 for c in range(cols):

362 dtypes.append((name + str(c), dtype))

363 converters.append(converter)

364

365 properties[name] = (ase_name, cols)

366 properties_list.append(name)

367

368 dtype = np.dtype(dtypes)

369 return properties, properties_list, dtype, converters

370

371

372def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict,

373 nvec=0):

374 # comment line

375 line = next(lines).strip()

376 if nvec > 0:

377 info = {'comment': line}

378 else:

379 info = properties_parser(line) if line else {}

380

381 pbc = None

382 if 'pbc' in info:

383 pbc = info['pbc']

384 del info['pbc']

385 elif 'Lattice' in info:

386 # default pbc for extxyz file containing Lattice

387 # is True in all directions

388 pbc = [True, True, True]

389 elif nvec > 0:

390 # cell information given as pseudo-Atoms

391 pbc = [False, False, False]

392

393 cell = None

394 if 'Lattice' in info:

395 # NB: ASE cell is transpose of extended XYZ lattice

396 cell = info['Lattice'].T

397 del info['Lattice']

398 elif nvec > 0:

399 # cell information given as pseudo-Atoms

400 cell = np.zeros((3, 3))

401

402 if 'Properties' not in info:

403 # Default set of properties is atomic symbols and positions only

404 info['Properties'] = 'species:S:1:pos:R:3'

405 properties, names, dtype, convs = parse_properties(info['Properties'])

406 del info['Properties']

407

408 data = []

409 for ln in range(natoms):

410 try:

411 line = next(lines)

412 except StopIteration:

413 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}'

414 .format(len(data), natoms))

415 vals = line.split()

416 row = tuple([conv(val) for conv, val in zip(convs, vals)])

417 data.append(row)

418

419 try:

420 data = np.array(data, dtype)

421 except TypeError:

422 raise XYZError('Badly formatted data '

423 'or end of file reached before end of frame')

424

425 # Read VEC entries if present

426 if nvec > 0:

427 for ln in range(nvec):

428 try:

429 line = next(lines)

430 except StopIteration:

431 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, '

432 'expected {}'.format(len(cell), nvec))

433 entry = line.split()

434

435 if not entry[0].startswith('VEC'):

436 raise XYZError('Expected cell vector, got {}'.format(entry[0]))

437

438 try:

439 n = int(entry[0][3:])

440 except ValueError as e:

441 raise XYZError('Expected VEC{}, got VEC{}'

442 .format(ln + 1, entry[0][3:])) from e

443 if n != ln + 1:

444 raise XYZError('Expected VEC{}, got VEC{}'

445 .format(ln + 1, n))

446

447 cell[ln] = np.array([float(x) for x in entry[1:]])

448 pbc[ln] = True

449 if nvec != pbc.count(True):

450 raise XYZError('Problem with number of cell vectors')

451 pbc = tuple(pbc)

452

453 arrays = {}

454 for name in names:

455 ase_name, cols = properties[name]

456 if cols == 1:

457 value = data[name]

458 else:

459 value = np.vstack([data[name + str(c)]

460 for c in range(cols)]).T

461 arrays[ase_name] = value

462

463 symbols = None

464 if 'symbols' in arrays:

465 symbols = [s.capitalize() for s in arrays['symbols']]

466 del arrays['symbols']

467

468 numbers = None

469 duplicate_numbers = None

470 if 'numbers' in arrays:

471 if symbols is None:

472 numbers = arrays['numbers']

473 else:

474 duplicate_numbers = arrays['numbers']

475 del arrays['numbers']

476

477 charges = None

478 if 'charges' in arrays:

479 charges = arrays['charges']

480 del arrays['charges']

481

482 positions = None

483 if 'positions' in arrays:

484 positions = arrays['positions']

485 del arrays['positions']

486

487 atoms = Atoms(symbols=symbols,

488 positions=positions,

489 numbers=numbers,

490 charges=charges,

491 cell=cell,

492 pbc=pbc,

493 info=info)

494

495 # Read and set constraints

496 if 'move_mask' in arrays:

497 if properties['move_mask'][1] == 3:

498 cons = []

499 for a in range(natoms):

500 cons.append(FixCartesian(a, mask=~arrays['move_mask'][a, :]))

501 atoms.set_constraint(cons)

502 elif properties['move_mask'][1] == 1:

503 atoms.set_constraint(FixAtoms(mask=~arrays['move_mask']))

504 else:

505 raise XYZError('Not implemented constraint')

506 del arrays['move_mask']

507

508 for name, array in arrays.items():

509 atoms.new_array(name, array)

510

511 if duplicate_numbers is not None:

512 atoms.set_atomic_numbers(duplicate_numbers)

513

514 # Load results of previous calculations into SinglePointCalculator

515 results = {}

516 for key in list(atoms.info.keys()):

517 if key in per_config_properties:

518 results[key] = atoms.info[key]

519 # special case for stress- convert to Voigt 6-element form

520 if key == 'stress' and results[key].shape == (3, 3):

521 stress = results[key]

522 stress = np.array([stress[0, 0],

523 stress[1, 1],

524 stress[2, 2],

525 stress[1, 2],

526 stress[0, 2],

527 stress[0, 1]])

528 results[key] = stress

529 for key in list(atoms.arrays.keys()):

530 if (key in per_atom_properties and len(value.shape) >= 1

531 and value.shape[0] == len(atoms)):

532 results[key] = atoms.arrays[key]

533 if results != {}:

534 calculator = SinglePointCalculator(atoms, **results)

535 atoms.calc = calculator

536 return atoms

537

538

539class XYZError(IOError):

540 pass

541

542

543class XYZChunk:

544 def __init__(self, lines, natoms):

545 self.lines = lines

546 self.natoms = natoms

547

548 def build(self):

549 """Convert unprocessed chunk into Atoms."""

550 return _read_xyz_frame(iter(self.lines), self.natoms)

551

552

553def ixyzchunks(fd):

554 """Yield unprocessed chunks (header, lines) for each xyz image."""

555 while True:

556 line = next(fd).strip() # Raises StopIteration on empty file

557 try:

558 natoms = int(line)

559 except ValueError:

560 raise XYZError('Expected integer, found "{0}"'.format(line))

561 try:

562 lines = [next(fd) for _ in range(1 + natoms)]

563 except StopIteration:

564 raise XYZError('Incomplete XYZ chunk')

565 yield XYZChunk(lines, natoms)

566

567

568class ImageIterator:

569 """"""

570

571 def __init__(self, ichunks):

572 self.ichunks = ichunks

573

574 def __call__(self, fd, indices=-1):

575 if not hasattr(indices, 'start'):

576 if indices < 0:

577 indices = slice(indices - 1, indices)

578 else:

579 indices = slice(indices, indices + 1)

580

581 for chunk in self._getslice(fd, indices):

582 yield chunk.build()

583

584 def _getslice(self, fd, indices):

585 try:

586 iterator = islice(self.ichunks(fd), indices.start, indices.stop,

587 indices.step)

588 except ValueError:

589 # Negative indices. Go through the whole thing to get the length,

590 # which allows us to evaluate the slice, and then read it again

591 startpos = fd.tell()

592 nchunks = 0

593 for chunk in self.ichunks(fd):

594 nchunks += 1

595 fd.seek(startpos)

596 indices_tuple = indices.indices(nchunks)

597 iterator = islice(self.ichunks(fd), *indices_tuple)

598 return iterator

599

600

601iread_xyz = ImageIterator(ixyzchunks)

602

603

604@reader

605def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict):

606 r"""

607 Read from a file in Extended XYZ format

608

609 index is the frame to read, default is last frame (index=-1).

610 properties_parser is the parse to use when converting the properties line

611 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can

612 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly

613 faster but has fewer features.

614

615 Extended XYZ format is an enhanced version of the `basic XYZ format

616 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra

617 columns to be present in the file for additonal per-atom properties as

618 well as standardising the format of the comment line to include the

619 cell lattice and other per-frame parameters.

620

621 It's easiest to describe the format with an example. Here is a

622 standard XYZ file containing a bulk cubic 8 atom silicon cell ::

623

624 8

625 Cubic bulk silicon cell

626 Si 0.00000000 0.00000000 0.00000000

627 Si 1.36000000 1.36000000 1.36000000

628 Si 2.72000000 2.72000000 0.00000000

629 Si 4.08000000 4.08000000 1.36000000

630 Si 2.72000000 0.00000000 2.72000000

631 Si 4.08000000 1.36000000 4.08000000

632 Si 0.00000000 2.72000000 2.72000000

633 Si 1.36000000 4.08000000 4.08000000

634

635 The first line is the number of atoms, followed by a comment and

636 then one line per atom, giving the element symbol and cartesian

637 x y, and z coordinates in Angstroms.

638

639 Here's the same configuration in extended XYZ format ::

640

641 8

642 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0

643 Si 0.00000000 0.00000000 0.00000000

644 Si 1.36000000 1.36000000 1.36000000

645 Si 2.72000000 2.72000000 0.00000000

646 Si 4.08000000 4.08000000 1.36000000

647 Si 2.72000000 0.00000000 2.72000000

648 Si 4.08000000 1.36000000 4.08000000

649 Si 0.00000000 2.72000000 2.72000000

650 Si 1.36000000 4.08000000 4.08000000

651

652 In extended XYZ format, the comment line is replaced by a series of

653 key/value pairs. The keys should be strings and values can be

654 integers, reals, logicals (denoted by `T` and `F` for true and false)

655 or strings. Quotes are required if a value contains any spaces (like

656 `Lattice` above). There are two mandatory parameters that any

657 extended XYZ: `Lattice` and `Properties`. Other parameters --

658 e.g. `Time` in the example above --- can be added to the parameter line

659 as needed.

660

661 `Lattice` is a Cartesian 3x3 matrix representation of the cell

662 vectors, with each vector stored as a column and the 9 values listed in

663 Fortran column-major order, i.e. in the form ::

664

665 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z"

666

667 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the

668 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second

669 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the

670 third lattice vector (:math:`\mathbf{c}`).

671

672 The list of properties in the file is described by the `Properties`

673 parameter, which should take the form of a series of colon separated

674 triplets giving the name, format (`R` for real, `I` for integer) and

675 number of columns of each property. For example::

676

677 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1"

678

679 indicates the first column represents atomic species, the next three

680 columns represent atomic positions, the next three velcoities, and the

681 last is an single integer called `select`. With this property

682 definition, the line ::

683

684 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1

685

686 would describe a silicon atom at position (4.08,4.08,1.36) with zero

687 velocity and the `select` property set to 1.

688

689 The property names `pos`, `Z`, `mass`, and `charge` map to ASE

690 :attr:`ase.atoms.Atoms.arrays` entries named

691 `positions`, `numbers`, `masses` and `charges` respectively.

692

693 Additional key-value pairs in the comment line are parsed into the

694 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions

695

696 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are

697 included to ease command-line usage as the `{}` are not treated

698 specially by the shell)

699 - Quotes within keys or values can be escaped with `\"`.

700 - Keys with special names `stress` or `virial` are treated as 3x3 matrices

701 in Fortran order, as for `Lattice` above.

702 - Otherwise, values with multiple elements are treated as 1D arrays, first

703 assuming integer format and falling back to float if conversion is

704 unsuccessful.

705 - A missing value defaults to `True`, e.g. the comment line

706 `"cutoff=3.4 have_energy"` leads to

707 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`.

708 - Value strings starting with `"_JSON"` are interpreted as JSON content;

709 similarly, when writing, anything which does not match the criteria above

710 is serialised as JSON.

711

712 The extended XYZ format is also supported by the

713 the `Ovito <http://www.ovito.org>`_ visualisation tool

714 (from `v2.4 beta

715 <http://www.ovito.org/index.php/component/content/article?id=25>`_

716 onwards).

717 """ # noqa: E501

718

719 if not isinstance(index, int) and not isinstance(index, slice):

720 raise TypeError('Index argument is neither slice nor integer!')

721

722 # If possible, build a partial index up to the last frame required

723 last_frame = None

724 if isinstance(index, int) and index >= 0:

725 last_frame = index

726 elif isinstance(index, slice):

727 if index.stop is not None and index.stop >= 0:

728 last_frame = index.stop

729

730 # scan through file to find where the frames start

731 try:

732 fileobj.seek(0)

733 except UnsupportedOperation:

734 fileobj = StringIO(fileobj.read())

735 fileobj.seek(0)

736 frames = []

737 while True:

738 frame_pos = fileobj.tell()

739 line = fileobj.readline()

740 if line.strip() == '':

741 break

742 try:

743 natoms = int(line)

744 except ValueError as err:

745 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}'

746 .format(err))

747 fileobj.readline() # read comment line

748 for i in range(natoms):

749 fileobj.readline()

750 # check for VEC

751 nvec = 0

752 while True:

753 lastPos = fileobj.tell()

754 line = fileobj.readline()

755 if line.lstrip().startswith('VEC'):

756 nvec += 1

757 if nvec > 3:

758 raise XYZError('ase.io.extxyz: More than 3 VECX entries')

759 else:

760 fileobj.seek(lastPos)

761 break

762 frames.append((frame_pos, natoms, nvec))

763 if last_frame is not None and len(frames) > last_frame:

764 break

765

766 trbl = index2range(index, len(frames))

767

768 for index in trbl:

769 frame_pos, natoms, nvec = frames[index]

770 fileobj.seek(frame_pos)

771 # check for consistency with frame index table

772 assert int(fileobj.readline()) == natoms

773 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec)

774

775

776def output_column_format(atoms, columns, arrays,

777 write_info=True, results=None):

778 """

779 Helper function to build extended XYZ comment line

780 """

781 fmt_map = {'d': ('R', '%16.8f'),

782 'f': ('R', '%16.8f'),

783 'i': ('I', '%8d'),

784 'O': ('S', '%s'),

785 'S': ('S', '%s'),

786 'U': ('S', '%-2s'),

787 'b': ('L', ' %.1s')}

788

789 # NB: Lattice is stored as tranpose of ASE cell,

790 # with Fortran array ordering

791 lattice_str = ('Lattice="'

792 + ' '.join([str(x) for x in np.reshape(atoms.cell.T,

793 9, order='F')]) +

794 '"')

795

796 property_names = []

797 property_types = []

798 property_ncols = []

799 dtypes = []

800 formats = []

801

802 for column in columns:

803 array = arrays[column]

804 dtype = array.dtype

805

806 property_name = PROPERTY_NAME_MAP.get(column, column)

807 property_type, fmt = fmt_map[dtype.kind]

808 property_names.append(property_name)

809 property_types.append(property_type)

810

811 if (len(array.shape) == 1

812 or (len(array.shape) == 2 and array.shape[1] == 1)):

813 ncol = 1

814 dtypes.append((column, dtype))

815 else:

816 ncol = array.shape[1]

817 for c in range(ncol):

818 dtypes.append((column + str(c), dtype))

819

820 formats.extend([fmt] * ncol)

821 property_ncols.append(ncol)

822

823 props_str = ':'.join([':'.join(x) for x in

824 zip(property_names,

825 property_types,

826 [str(nc) for nc in property_ncols])])

827

828 comment_str = ''

829 if atoms.cell.any():

830 comment_str += lattice_str + ' '

831 comment_str += 'Properties={}'.format(props_str)

832

833 info = {}

834 if write_info:

835 info.update(atoms.info)

836 if results is not None:

837 info.update(results)

838 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions

839 comment_str += ' ' + key_val_dict_to_str(info)

840

841 dtype = np.dtype(dtypes)

842 fmt = ' '.join(formats) + '\n'

843

844 return comment_str, property_ncols, dtype, fmt

845

846

847def write_xyz(fileobj, images, comment='', columns=None,

848 write_info=True,

849 write_results=True, plain=False, vec_cell=False,

850 append=False):

851 """

852 Write output in extended XYZ format

853

854 Optionally, specify which columns (arrays) to include in output,

855 whether to write the contents of the `atoms.info` dict to the

856 XYZ comment line (default is True), the results of any

857 calculator attached to this Atoms. The `plain` argument

858 can be used to write a simple XYZ file with no additional information.

859 `vec_cell` can be used to write the cell vectors as additional

860 pseudo-atoms. If `append` is set to True, the file is for append (mode `a`),

861 otherwise it is overwritten (mode `w`).

862

863 See documentation for :func:`read_xyz()` for further details of the extended

864 XYZ file format.

865 """

866 if isinstance(fileobj, str):

867 mode = 'w'

868 if append:

869 mode = 'a'

870 fileobj = paropen(fileobj, mode)

871

872 if hasattr(images, 'get_positions'):

873 images = [images]

874

875 for atoms in images:

876 natoms = len(atoms)

877

878 if columns is None:

879 fr_cols = None

880 else:

881 fr_cols = columns[:]

882

883 if fr_cols is None:

884 fr_cols = (['symbols', 'positions']

885 + [key for key in atoms.arrays.keys() if

886 key not in ['symbols', 'positions', 'numbers',

887 'species', 'pos']])

888

889 if vec_cell:

890 plain = True

891

892 if plain:

893 fr_cols = ['symbols', 'positions']

894 write_info = False

895 write_results = False

896

897 per_frame_results = {}

898 per_atom_results = {}

899 if write_results:

900 calculator = atoms.calc

901 if (calculator is not None

902 and isinstance(calculator, Calculator)):

903 for key in all_properties:

904 value = calculator.results.get(key, None)

905 if value is None:

906 # skip missing calculator results

907 continue

908 if (key in per_atom_properties and len(value.shape) >= 1

909 and value.shape[0] == len(atoms)):

910 # per-atom quantities (forces, energies, stresses)

911 per_atom_results[key] = value

912 elif key in per_config_properties:

913 # per-frame quantities (energy, stress)

914 # special case for stress, which should be converted

915 # to 3x3 matrices before writing

916 if key == 'stress':

917 xx, yy, zz, yz, xz, xy = value

918 value = np.array(

919 [(xx, xy, xz), (xy, yy, yz), (xz, yz, zz)])

920 per_frame_results[key] = value

921

922 # Move symbols and positions to first two properties

923 if 'symbols' in fr_cols:

924 i = fr_cols.index('symbols')

925 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0]

926

927 if 'positions' in fr_cols:

928 i = fr_cols.index('positions')

929 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1]

930

931 # Check first column "looks like" atomic symbols

932 if fr_cols[0] in atoms.arrays:

933 symbols = atoms.arrays[fr_cols[0]]

934 else:

935 symbols = atoms.get_chemical_symbols()

936

937 if natoms > 0 and not isinstance(symbols[0], str):

938 raise ValueError('First column must be symbols-like')

939

940 # Check second column "looks like" atomic positions

941 pos = atoms.arrays[fr_cols[1]]

942 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':

943 raise ValueError('Second column must be position-like')

944

945 # if vec_cell add cell information as pseudo-atoms

946 if vec_cell:

947 pbc = list(atoms.get_pbc())

948 cell = atoms.get_cell()

949

950 if True in pbc:

951 nPBC = 0

952 for i, b in enumerate(pbc):

953 if b:

954 nPBC += 1

955 symbols.append('VEC' + str(nPBC))

956 pos = np.vstack((pos, cell[i]))

957 # add to natoms

958 natoms += nPBC

959 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':

960 raise ValueError(

961 'Pseudo Atoms containing cell have bad coords')

962

963 # Move mask

964 if 'move_mask' in fr_cols:

965 cnstr = images[0]._get_constraints()

966 if len(cnstr) > 0:

967 c0 = cnstr[0]

968 if isinstance(c0, FixAtoms):

969 cnstr = np.ones((natoms,), dtype=bool)

970 for idx in c0.index:

971 cnstr[idx] = False

972 elif isinstance(c0, FixCartesian):

973 masks = np.ones((natoms, 3), dtype=bool)

974 for i in range(len(cnstr)):

975 idx = cnstr[i].a

976 masks[idx] = cnstr[i].mask

977 cnstr = masks

978 else:

979 fr_cols.remove('move_mask')

980

981 # Collect data to be written out

982 arrays = {}

983 for column in fr_cols:

984 if column == 'positions':

985 arrays[column] = pos

986 elif column in atoms.arrays:

987 arrays[column] = atoms.arrays[column]

988 elif column == 'symbols':

989 arrays[column] = np.array(symbols)

990 elif column == 'move_mask':

991 arrays[column] = cnstr

992 else:

993 raise ValueError('Missing array "%s"' % column)

994

995 if write_results:

996 for key in per_atom_results:

997 if key not in fr_cols:

998 fr_cols += [key]

999 else:

1000 warnings.warn('write_xyz() overwriting array "{0}" present '

1001 'in atoms.arrays with stored results '

1002 'from calculator'.format(key))

1003 arrays.update(per_atom_results)

1004

1005 comm, ncols, dtype, fmt = output_column_format(atoms,

1006 fr_cols,

1007 arrays,

1008 write_info,

1009 per_frame_results)

1010

1011 if plain or comment != '':

1012 # override key/value pairs with user-speficied comment string

1013 comm = comment.rstrip()

1014 if '\n' in comm:

1015 raise ValueError('Comment line should not have line breaks.')

1016

1017 # Pack fr_cols into record array

1018 data = np.zeros(natoms, dtype)

1019 for column, ncol in zip(fr_cols, ncols):

1020 value = arrays[column]

1021 if ncol == 1:

1022 data[column] = np.squeeze(value)

1023 else:

1024 for c in range(ncol):

1025 data[column + str(c)] = value[:, c]

1026

1027 nat = natoms

1028 if vec_cell:

1029 nat -= nPBC

1030 # Write the output

1031 fileobj.write('%d\n' % nat)

1032 fileobj.write('%s\n' % comm)

1033 for i in range(natoms):

1034 fileobj.write(fmt % tuple(data[i]))

1035

1036

1037# create aliases for read/write functions

1038read_extxyz = read_xyz

1039write_extxyz = write_xyz

Coverage for /builds/debichem-team/python-ase/ase/io/extxyz.py : 78.35%

559 statements

Coverage for /builds/debichem-team/python-ase/ase/io/extxyz.py : 78.35%

559 statements 438 run 121 missing 0 excluded

559 statements