Coverage for /builds/debichem-team/python-ase/ase/io/cif.py: 86.54%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""Module to read and write atoms in cif file format.

3See http://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for a

4description of the file format. STAR extensions as save frames,

5global blocks, nested loops and multi-data values are not supported.

6The "latin-1" encoding is required by the IUCR specification.

7"""

9import io

10import re

11import shlex

12import warnings

13from typing import Dict, List, Tuple, Optional, Union, Iterator, Any, Sequence

14import collections.abc

16import numpy as np

18from ase import Atoms

19from ase.cell import Cell

20from ase.spacegroup import crystal

21from ase.spacegroup.spacegroup import spacegroup_from_data, Spacegroup

22from ase.io.cif_unicode import format_unicode, handle_subscripts

23from ase.utils import iofunction

26rhombohedral_spacegroups = {146, 148, 155, 160, 161, 166, 167}

29old_spacegroup_names = {'Abm2': 'Aem2',

30 'Aba2': 'Aea2',

31 'Cmca': 'Cmce',

32 'Cmma': 'Cmme',

33 'Ccca': 'Ccc1'}

35# CIF maps names to either single values or to multiple values via loops.

36CIFDataValue = Union[str, int, float]

37CIFData = Union[CIFDataValue, List[CIFDataValue]]

40def convert_value(value: str) -> CIFDataValue:

41 """Convert CIF value string to corresponding python type."""

42 value = value.strip()

43 if re.match('(".*")|(\'.*\')$', value):

44 return handle_subscripts(value[1:-1])

45 elif re.match(r'[+-]?\d+$', value):

46 return int(value)

47 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$', value):

48 return float(value)

49 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$\d+$$',

50 value):

51 return float(value[:value.index('(')]) # strip off uncertainties

52 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+$',

53 value):

54 warnings.warn('Badly formed number: "{0}"'.format(value))

55 return float(value[:value.index('(')]) # strip off uncertainties

56 else:

57 return handle_subscripts(value)

60def parse_multiline_string(lines: List[str], line: str) -> str:

61 """Parse semicolon-enclosed multiline string and return it."""

62 assert line[0] == ';'

63 strings = [line[1:].lstrip()]

64 while True:

65 line = lines.pop().strip()

66 if line[:1] == ';':

67 break

68 strings.append(line)

69 return '\n'.join(strings).strip()

72def parse_singletag(lines: List[str], line: str) -> Tuple[str, CIFDataValue]:

73 """Parse a CIF tag (entries starting with underscore). Returns

74 a key-value pair."""

75 kv = line.split(None, 1)

76 if len(kv) == 1:

77 key = line

78 line = lines.pop().strip()

79 while not line or line[0] == '#':

80 line = lines.pop().strip()

81 if line[0] == ';':

82 value = parse_multiline_string(lines, line)

83 else:

84 value = line

85 else:

86 key, value = kv

87 return key, convert_value(value)

90def parse_cif_loop_headers(lines: List[str]) -> Iterator[str]:

91 header_pattern = r'\s*(_\S*)'

93 while lines:

94 line = lines.pop()

95 match = re.match(header_pattern, line)

97 if match:

98 header = match.group(1).lower()

99 yield header

100 elif re.match(r'\s*#', line):

101 # XXX we should filter comments out already.

102 continue

103 else:

104 lines.append(line) # 'undo' pop

105 return

106

107

108def parse_cif_loop_data(lines: List[str],

109 ncolumns: int) -> List[List[CIFDataValue]]:

110 columns: List[List[CIFDataValue]] = [[] for _ in range(ncolumns)]

111

112 tokens = []

113 while lines:

114 line = lines.pop().strip()

115 lowerline = line.lower()

116 if (not line or

117 line.startswith('_') or

118 lowerline.startswith('data_') or

119 lowerline.startswith('loop_')):

120 lines.append(line)

121 break

122

123 if line.startswith('#'):

124 continue

125

126 if line.startswith(';'):

127 moretokens = [parse_multiline_string(lines, line)]

128 else:

129 if ncolumns == 1:

130 moretokens = [line]

131 else:

132 moretokens = shlex.split(line, posix=False)

133

134 tokens += moretokens

135 if len(tokens) < ncolumns:

136 continue

137 if len(tokens) == ncolumns:

138 for i, token in enumerate(tokens):

139 columns[i].append(convert_value(token))

140 else:

141 warnings.warn('Wrong number {} of tokens, expected {}: {}'

142 .format(len(tokens), ncolumns, tokens))

143

144 # (Due to continue statements we cannot move this to start of loop)

145 tokens = []

146

147 if tokens:

148 assert len(tokens) < ncolumns

149 raise RuntimeError('CIF loop ended unexpectedly with incomplete row')

150

151 return columns

152

153

154def parse_loop(lines: List[str]) -> Dict[str, List[CIFDataValue]]:

155 """Parse a CIF loop. Returns a dict with column tag names as keys

156 and a lists of the column content as values."""

157

158 headers = list(parse_cif_loop_headers(lines))

159 # Dict would be better. But there can be repeated headers.

160

161 columns = parse_cif_loop_data(lines, len(headers))

162

163 columns_dict = {}

164 for i, header in enumerate(headers):

165 if header in columns_dict:

166 warnings.warn('Duplicated loop tags: {0}'.format(header))

167 else:

168 columns_dict[header] = columns[i]

169 return columns_dict

170

171

172def parse_items(lines: List[str], line: str) -> Dict[str, CIFData]:

173 """Parse a CIF data items and return a dict with all tags."""

174 tags: Dict[str, CIFData] = {}

175

176 while True:

177 if not lines:

178 break

179 line = lines.pop().strip()

180 if not line:

181 continue

182 lowerline = line.lower()

183 if not line or line.startswith('#'):

184 continue

185 elif line.startswith('_'):

186 key, value = parse_singletag(lines, line)

187 tags[key.lower()] = value

188 elif lowerline.startswith('loop_'):

189 tags.update(parse_loop(lines))

190 elif lowerline.startswith('data_'):

191 if line:

192 lines.append(line)

193 break

194 elif line.startswith(';'):

195 parse_multiline_string(lines, line)

196 else:

197 raise ValueError('Unexpected CIF file entry: "{0}"'.format(line))

198 return tags

199

200

201class NoStructureData(RuntimeError):

202 pass

203

204

205class CIFBlock(collections.abc.Mapping):

206 """A block (i.e., a single system) in a crystallographic information file.

207

208 Use this object to query CIF tags or import information as ASE objects."""

209

210 cell_tags = ['_cell_length_a', '_cell_length_b', '_cell_length_c',

211 '_cell_angle_alpha', '_cell_angle_beta', '_cell_angle_gamma']

212

213 def __init__(self, name: str, tags: Dict[str, CIFData]):

214 self.name = name

215 self._tags = tags

216

217 def __repr__(self) -> str:

218 tags = set(self._tags)

219 return f'CIFBlock({self.name}, tags={tags})'

220

221 def __getitem__(self, key: str) -> CIFData:

222 return self._tags[key]

223

224 def __iter__(self) -> Iterator[str]:

225 return iter(self._tags)

226

227 def __len__(self) -> int:

228 return len(self._tags)

229

230 def get(self, key, default=None):

231 return self._tags.get(key, default)

232

233 def get_cellpar(self) -> Optional[List]:

234 try:

235 return [self[tag] for tag in self.cell_tags]

236 except KeyError:

237 return None

238

239 def get_cell(self) -> Cell:

240 cellpar = self.get_cellpar()

241 if cellpar is None:

242 return Cell.new([0, 0, 0])

243 return Cell.new(cellpar)

244

245 def _raw_scaled_positions(self) -> Optional[np.ndarray]:

246 coords = [self.get(name) for name in ['_atom_site_fract_x',

247 '_atom_site_fract_y',

248 '_atom_site_fract_z']]

249 # XXX Shall we try to handle mixed coordinates?

250 # (Some scaled vs others fractional)

251 if None in coords:

252 return None

253 return np.array(coords).T

254

255 def _raw_positions(self) -> Optional[np.ndarray]:

256 coords = [self.get('_atom_site_cartn_x'),

257 self.get('_atom_site_cartn_y'),

258 self.get('_atom_site_cartn_z')]

259 if None in coords:

260 return None

261 return np.array(coords).T

262

263 def _get_site_coordinates(self):

264 scaled = self._raw_scaled_positions()

265

266 if scaled is not None:

267 return 'scaled', scaled

268

269 cartesian = self._raw_positions()

270

271 if cartesian is None:

272 raise NoStructureData('No positions found in structure')

273

274 return 'cartesian', cartesian

275

276 def _get_symbols_with_deuterium(self):

277 labels = self._get_any(['_atom_site_type_symbol',

278 '_atom_site_label'])

279 if labels is None:

280 raise NoStructureData('No symbols')

281

282 symbols = []

283 for label in labels:

284 if label == '.' or label == '?':

285 raise NoStructureData('Symbols are undetermined')

286 # Strip off additional labeling on chemical symbols

287 match = re.search(r'([A-Z][a-z]?)', label)

288 symbol = match.group(0)

289 symbols.append(symbol)

290 return symbols

291

292 def get_symbols(self) -> List[str]:

293 symbols = self._get_symbols_with_deuterium()

294 return [symbol if symbol != 'D' else 'H' for symbol in symbols]

295

296 def _where_deuterium(self):

297 return np.array([symbol == 'D' for symbol

298 in self._get_symbols_with_deuterium()], bool)

299

300 def _get_masses(self) -> Optional[np.ndarray]:

301 mask = self._where_deuterium()

302 if not any(mask):

303 return None

304

305 symbols = self.get_symbols()

306 masses = Atoms(symbols).get_masses()

307 masses[mask] = 2.01355

308 return masses

309

310 def _get_any(self, names):

311 for name in names:

312 if name in self:

313 return self[name]

314 return None

315

316 def _get_spacegroup_number(self):

317 # Symmetry specification, see

318 # http://www.iucr.org/resources/cif/dictionaries/cif_sym for a

319 # complete list of official keys. In addition we also try to

320 # support some commonly used depricated notations

321 return self._get_any(['_space_group.it_number',

322 '_space_group_it_number',

323 '_symmetry_int_tables_number'])

324

325 def _get_spacegroup_name(self):

326 hm_symbol = self._get_any(['_space_group_name_h-m_alt',

327 '_symmetry_space_group_name_h-m',

328 '_space_group.Patterson_name_h-m',

329 '_space_group.patterson_name_h-m'])

330

331 hm_symbol = old_spacegroup_names.get(hm_symbol, hm_symbol)

332 return hm_symbol

333

334 def _get_sitesym(self):

335 sitesym = self._get_any(['_space_group_symop_operation_xyz',

336 '_space_group_symop.operation_xyz',

337 '_symmetry_equiv_pos_as_xyz'])

338 if isinstance(sitesym, str):

339 sitesym = [sitesym]

340 return sitesym

341

342 def _get_fractional_occupancies(self):

343 return self.get('_atom_site_occupancy')

344

345 def _get_setting(self) -> Optional[int]:

346 setting_str = self.get('_symmetry_space_group_setting')

347 if setting_str is None:

348 return None

349

350 setting = int(setting_str)

351 if setting not in [1, 2]:

352 raise ValueError(

353 f'Spacegroup setting must be 1 or 2, not {setting}')

354 return setting

355

356 def get_spacegroup(self, subtrans_included) -> Spacegroup:

357 # XXX The logic in this method needs serious cleaning up!

358 # The setting needs to be passed as either 1 or two, not None (default)

359 no = self._get_spacegroup_number()

360 hm_symbol = self._get_spacegroup_name()

361 sitesym = self._get_sitesym()

362

363 setting = 1

364 spacegroup = 1

365 if sitesym is not None:

366 subtrans = [(0.0, 0.0, 0.0)] if subtrans_included else None

367 spacegroup = spacegroup_from_data(

368 no=no, symbol=hm_symbol, sitesym=sitesym, subtrans=subtrans,

369 setting=setting)

370 elif no is not None:

371 spacegroup = no

372 elif hm_symbol is not None:

373 spacegroup = hm_symbol

374 else:

375 spacegroup = 1

376

377 setting_std = self._get_setting()

378

379 setting_name = None

380 if '_symmetry_space_group_setting' in self:

381 assert setting_std is not None

382 setting = setting_std

383 elif '_space_group_crystal_system' in self:

384 setting_name = self['_space_group_crystal_system']

385 elif '_symmetry_cell_setting' in self:

386 setting_name = self['_symmetry_cell_setting']

387

388 if setting_name:

389 no = Spacegroup(spacegroup).no

390 if no in rhombohedral_spacegroups:

391 if setting_name == 'hexagonal':

392 setting = 1

393 elif setting_name in ('trigonal', 'rhombohedral'):

394 setting = 2

395 else:

396 warnings.warn(

397 'unexpected crystal system %r for space group %r' % (

398 setting_name, spacegroup))

399 # FIXME - check for more crystal systems...

400 else:

401 warnings.warn(

402 'crystal system %r is not interpreted for space group %r. '

403 'This may result in wrong setting!' % (

404 setting_name, spacegroup))

405

406 spg = Spacegroup(spacegroup, setting)

407 if no is not None:

408 assert int(spg) == no, (int(spg), no)

409 return spg

410

411 def get_unsymmetrized_structure(self) -> Atoms:

412 """Return Atoms without symmetrizing coordinates.

413

414 This returns a (normally) unphysical Atoms object

415 corresponding only to those coordinates included

416 in the CIF file, useful for e.g. debugging.

417

418 This method may change behaviour in the future."""

419 symbols = self.get_symbols()

420 coordtype, coords = self._get_site_coordinates()

421

422 atoms = Atoms(symbols=symbols,

423 cell=self.get_cell(),

424 masses=self._get_masses())

425

426 if coordtype == 'scaled':

427 atoms.set_scaled_positions(coords)

428 else:

429 assert coordtype == 'cartesian'

430 atoms.positions[:] = coords

431

432 return atoms

433

434 def has_structure(self):

435 """Whether this CIF block has an atomic configuration."""

436 try:

437 self.get_symbols()

438 self._get_site_coordinates()

439 except NoStructureData:

440 return False

441 else:

442 return True

443

444 def get_atoms(self, store_tags=False, primitive_cell=False,

445 subtrans_included=True, fractional_occupancies=True) -> Atoms:

446 """Returns an Atoms object from a cif tags dictionary. See read_cif()

447 for a description of the arguments."""

448 if primitive_cell and subtrans_included:

449 raise RuntimeError(

450 'Primitive cell cannot be determined when sublattice '

451 'translations are included in the symmetry operations listed '

452 'in the CIF file, i.e. when `subtrans_included` is True.')

453

454 cell = self.get_cell()

455 assert cell.rank in [0, 3]

456

457 kwargs: Dict[str, Any] = {}

458 if store_tags:

459 kwargs['info'] = self._tags.copy()

460

461 if fractional_occupancies:

462 occupancies = self._get_fractional_occupancies()

463 else:

464 occupancies = None

465

466 if occupancies is not None:

467 # no warnings in this case

468 kwargs['onduplicates'] = 'keep'

469

470 # The unsymmetrized_structure is not the asymmetric unit

471 # because the asymmetric unit should have (in general) a smaller cell,

472 # whereas we have the full cell.

473 unsymmetrized_structure = self.get_unsymmetrized_structure()

474

475 if cell.rank == 3:

476 spacegroup = self.get_spacegroup(subtrans_included)

477 atoms = crystal(unsymmetrized_structure,

478 spacegroup=spacegroup,

479 setting=spacegroup.setting,

480 occupancies=occupancies,

481 primitive_cell=primitive_cell,

482 **kwargs)

483 else:

484 atoms = unsymmetrized_structure

485 if kwargs.get('info') is not None:

486 atoms.info.update(kwargs['info'])

487 if occupancies is not None:

488 # Compile an occupancies dictionary

489 occ_dict = {}

490 for i, sym in enumerate(atoms.symbols):

491 occ_dict[str(i)] = {sym: occupancies[i]}

492 atoms.info['occupancy'] = occ_dict

493

494 return atoms

495

496

497def parse_block(lines: List[str], line: str) -> CIFBlock:

498 assert line.lower().startswith('data_')

499 blockname = line.split('_', 1)[1].rstrip()

500 tags = parse_items(lines, line)

501 return CIFBlock(blockname, tags)

502

503

504def parse_cif(fileobj, reader='ase') -> Iterator[CIFBlock]:

505 if reader == 'ase':

506 return parse_cif_ase(fileobj)

507 elif reader == 'pycodcif':

508 return parse_cif_pycodcif(fileobj)

509 else:

510 raise ValueError(f'No such reader: {reader}')

511

512

513def parse_cif_ase(fileobj) -> Iterator[CIFBlock]:

514 """Parse a CIF file using ase CIF parser."""

515

516 if isinstance(fileobj, str):

517 with open(fileobj, 'rb') as fileobj:

518 data = fileobj.read()

519 else:

520 data = fileobj.read()

521

522 if isinstance(data, bytes):

523 data = data.decode('latin1')

524 data = format_unicode(data)

525 lines = [e for e in data.split('\n') if len(e) > 0]

526 if len(lines) > 0 and lines[0].rstrip() == '#\\#CIF_2.0':

527 warnings.warn('CIF v2.0 file format detected; `ase` CIF reader might '

528 'incorrectly interpret some syntax constructions, use '

529 '`pycodcif` reader instead')

530 lines = [''] + lines[::-1] # all lines (reversed)

531

532 while lines:

533 line = lines.pop().strip()

534 if not line or line.startswith('#'):

535 continue

536

537 yield parse_block(lines, line)

538

539

540def parse_cif_pycodcif(fileobj) -> Iterator[CIFBlock]:

541 """Parse a CIF file using pycodcif CIF parser."""

542 if not isinstance(fileobj, str):

543 fileobj = fileobj.name

544

545 try:

546 from pycodcif import parse

547 except ImportError:

548 raise ImportError(

549 'parse_cif_pycodcif requires pycodcif ' +

550 '(http://wiki.crystallography.net/cod-tools/pycodcif/)')

551

552 data, _, _ = parse(fileobj)

553

554 for datablock in data:

555 tags = datablock['values']

556 for tag in tags.keys():

557 values = [convert_value(x) for x in tags[tag]]

558 if len(values) == 1:

559 tags[tag] = values[0]

560 else:

561 tags[tag] = values

562 yield CIFBlock(datablock['name'], tags)

563

564

565def read_cif(fileobj, index, store_tags=False, primitive_cell=False,

566 subtrans_included=True, fractional_occupancies=True,

567 reader='ase') -> Iterator[Atoms]:

568 """Read Atoms object from CIF file. *index* specifies the data

569 block number or name (if string) to return.

570

571 If *index* is None or a slice object, a list of atoms objects will

572 be returned. In the case of *index* is *None* or *slice(None)*,

573 only blocks with valid crystal data will be included.

574

575 If *store_tags* is true, the *info* attribute of the returned

576 Atoms object will be populated with all tags in the corresponding

577 cif data block.

578

579 If *primitive_cell* is true, the primitive cell will be built instead

580 of the conventional cell.

581

582 If *subtrans_included* is true, sublattice translations are

583 assumed to be included among the symmetry operations listed in the

584 CIF file (seems to be the common behaviour of CIF files).

585 Otherwise the sublattice translations are determined from setting

586 1 of the extracted space group. A result of setting this flag to

587 true, is that it will not be possible to determine the primitive

588 cell.

589

590 If *fractional_occupancies* is true, the resulting atoms object will be

591 tagged equipped with a dictionary `occupancy`. The keys of this dictionary

592 will be integers converted to strings. The conversion to string is done

593 in order to avoid troubles with JSON encoding/decoding of the dictionaries

594 with non-string keys.

595 Also, in case of mixed occupancies, the atom's chemical symbol will be

596 that of the most dominant species.

597

598 String *reader* is used to select CIF reader. Value `ase` selects

599 built-in CIF reader (default), while `pycodcif` selects CIF reader based

600 on `pycodcif` package.

601 """

602 # Find all CIF blocks with valid crystal data

603 images = []

604 for block in parse_cif(fileobj, reader):

605 if not block.has_structure():

606 continue

607

608 atoms = block.get_atoms(

609 store_tags, primitive_cell,

610 subtrans_included,

611 fractional_occupancies=fractional_occupancies)

612 images.append(atoms)

613

614 for atoms in images[index]:

615 yield atoms

616

617

618def format_cell(cell: Cell) -> str:

619 assert cell.rank == 3

620 lines = []

621 for name, value in zip(CIFBlock.cell_tags, cell.cellpar()):

622 line = '{:20} {:g}\n'.format(name, value)

623 lines.append(line)

624 assert len(lines) == 6

625 return ''.join(lines)

626

627

628def format_generic_spacegroup_info() -> str:

629 # We assume no symmetry whatsoever

630 return '\n'.join([

631 '_space_group_name_H-M_alt "P 1"',

632 '_space_group_IT_number 1',

633 '',

634 'loop_',

635 ' _space_group_symop_operation_xyz',

636 " 'x, y, z'",

637 '',

638 ])

639

640

641class CIFLoop:

642 def __init__(self):

643 self.names = []

644 self.formats = []

645 self.arrays = []

646

647 def add(self, name, array, fmt):

648 assert name.startswith('_')

649 self.names.append(name)

650 self.formats.append(fmt)

651 self.arrays.append(array)

652 if len(self.arrays[0]) != len(self.arrays[-1]):

653 raise ValueError(f'Loop data "{name}" has {len(array)} '

654 'elements, expected {len(self.arrays[0])}')

655

656 def tostring(self):

657 lines = []

658 append = lines.append

659 append('loop_')

660 for name in self.names:

661 append(f' {name}')

662

663 template = ' ' + ' '.join(self.formats)

664

665 ncolumns = len(self.arrays)

666 nrows = len(self.arrays[0]) if ncolumns > 0 else 0

667 for row in range(nrows):

668 arraydata = [array[row] for array in self.arrays]

669 line = template.format(*arraydata)

670 append(line)

671 append('')

672 return '\n'.join(lines)

673

674

675@iofunction('wb')

676def write_cif(fd, images, cif_format=None,

677 wrap=True, labels=None, loop_keys=None) -> None:

678 """Write *images* to CIF file.

679

680 wrap: bool

681 Wrap atoms into unit cell.

682

683 labels: list

684 Use this list (shaped list[i_frame][i_atom] = string) for the

685 '_atom_site_label' section instead of automatically generating

686 it from the element symbol.

687

688 loop_keys: dict

689 Add the information from this dictionary to the `loop_`

690 section. Keys are printed to the `loop_` section preceeded by

691 ' _'. dict[key] should contain the data printed for each atom,

692 so it needs to have the setup `dict[key][i_frame][i_atom] =

693 string`. The strings are printed as they are, so take care of

694 formating. Information can be re-read using the `store_tags`

695 option of the cif reader.

696

697 """

698

699 if cif_format is not None:

700 warnings.warn('The cif_format argument is deprecated and may be '

701 'removed in the future. Use loop_keys to customize '

702 'data written in loop.', FutureWarning)

703

704 if loop_keys is None:

705 loop_keys = {}

706

707 if hasattr(images, 'get_positions'):

708 images = [images]

709

710 fd = io.TextIOWrapper(fd, encoding='latin-1')

711 try:

712 for i, atoms in enumerate(images):

713 blockname = f'data_image{i}\n'

714 image_loop_keys = {key: loop_keys[key][i] for key in loop_keys}

715

716 write_cif_image(blockname, atoms, fd,

717 wrap=wrap,

718 labels=None if labels is None else labels[i],

719 loop_keys=image_loop_keys)

720

721 finally:

722 # Using the TextIOWrapper somehow causes the file to close

723 # when this function returns.

724 # Detach in order to circumvent this highly illogical problem:

725 fd.detach()

726

727

728def autolabel(symbols: Sequence[str]) -> List[str]:

729 no: Dict[str, int] = {}

730 labels = []

731 for symbol in symbols:

732 if symbol in no:

733 no[symbol] += 1

734 else:

735 no[symbol] = 1

736 labels.append('%s%d' % (symbol, no[symbol]))

737 return labels

738

739

740def chemical_formula_header(atoms):

741 counts = atoms.symbols.formula.count()

742 formula_sum = ' '.join(f'{sym}{count}' for sym, count

743 in counts.items())

744 return (f'_chemical_formula_structural {atoms.symbols}\n'

745 f'_chemical_formula_sum "{formula_sum}"\n')

746

747

748class BadOccupancies(ValueError):

749 pass

750

751

752def expand_kinds(atoms, coords):

753 # try to fetch occupancies // spacegroup_kinds - occupancy mapping

754 symbols = list(atoms.symbols)

755 coords = list(coords)

756 occupancies = [1] * len(symbols)

757 occ_info = atoms.info.get('occupancy')

758 kinds = atoms.arrays.get('spacegroup_kinds')

759 if occ_info is not None and kinds is not None:

760 for i, kind in enumerate(kinds):

761 occ_info_kind = occ_info[str(kind)]

762 symbol = symbols[i]

763 if symbol not in occ_info_kind:

764 raise BadOccupancies('Occupancies present but no occupancy '

765 'info for "{symbol}"')

766 occupancies[i] = occ_info_kind[symbol]

767 # extend the positions array in case of mixed occupancy

768 for sym, occ in occ_info[str(kind)].items():

769 if sym != symbols[i]:

770 symbols.append(sym)

771 coords.append(coords[i])

772 occupancies.append(occ)

773 return symbols, coords, occupancies

774

775

776def atoms_to_loop_data(atoms, wrap, labels, loop_keys):

777 if atoms.cell.rank == 3:

778 coord_type = 'fract'

779 coords = atoms.get_scaled_positions(wrap).tolist()

780 else:

781 coord_type = 'Cartn'

782 coords = atoms.get_positions(wrap).tolist()

783

784 try:

785 symbols, coords, occupancies = expand_kinds(atoms, coords)

786 except BadOccupancies as err:

787 warnings.warn(str(err))

788 occupancies = [1] * len(atoms)

789 symbols = list(atoms.symbols)

790

791 if labels is None:

792 labels = autolabel(symbols)

793

794 coord_headers = [f'_atom_site_{coord_type}_{axisname}'

795 for axisname in 'xyz']

796

797 loopdata = {}

798 loopdata['_atom_site_label'] = (labels, '{:<8s}')

799 loopdata['_atom_site_occupancy'] = (occupancies, '{:6.4f}')

800

801 _coords = np.array(coords)

802 for i, key in enumerate(coord_headers):

803 loopdata[key] = (_coords[:, i], '{:7.5f}')

804

805 loopdata['_atom_site_type_symbol'] = (symbols, '{:<2s}')

806 loopdata['_atom_site_symmetry_multiplicity'] = (

807 [1.0] * len(symbols), '{}')

808

809 for key in loop_keys:

810 # Should expand the loop_keys like we expand the occupancy stuff.

811 # Otherwise user will never figure out how to do this.

812 values = [loop_keys[key][i] for i in range(len(symbols))]

813 loopdata['_' + key] = (values, '{}')

814

815 return loopdata, coord_headers

816

817

818def write_cif_image(blockname, atoms, fd, *, wrap,

819 labels, loop_keys):

820 fd.write(blockname)

821 fd.write(chemical_formula_header(atoms))

822

823 rank = atoms.cell.rank

824 if rank == 3:

825 fd.write(format_cell(atoms.cell))

826 fd.write('\n')

827 fd.write(format_generic_spacegroup_info())

828 fd.write('\n')

829 elif rank != 0:

830 raise ValueError('CIF format can only represent systems with '

831 f'0 or 3 lattice vectors. Got {rank}.')

832

833 loopdata, coord_headers = atoms_to_loop_data(atoms, wrap, labels,

834 loop_keys)

835

836 headers = [

837 '_atom_site_type_symbol',

838 '_atom_site_label',

839 '_atom_site_symmetry_multiplicity',

840 *coord_headers,

841 '_atom_site_occupancy',

842 ]

843

844 headers += ['_' + key for key in loop_keys]

845

846 loop = CIFLoop()

847 for header in headers:

848 array, fmt = loopdata[header]

849 loop.add(header, array, fmt)

850

851 fd.write(loop.tostring())

Coverage for /builds/debichem-team/python-ase/ase/io/cif.py : 86.54%

483 statements

Coverage for /builds/debichem-team/python-ase/ase/io/cif.py : 86.54%

483 statements 418 run 65 missing 0 excluded

483 statements