Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Module to read and write atoms in cif file format. 

2 

3See http://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for a 

4description of the file format. STAR extensions as save frames, 

5global blocks, nested loops and multi-data values are not supported. 

6The "latin-1" encoding is required by the IUCR specification. 

7""" 

8 

9import io 

10import re 

11import shlex 

12import warnings 

13from typing import Dict, List, Tuple, Optional, Union, Iterator, Any, Sequence 

14import collections.abc 

15 

16import numpy as np 

17 

18from ase import Atoms 

19from ase.cell import Cell 

20from ase.spacegroup import crystal 

21from ase.spacegroup.spacegroup import spacegroup_from_data, Spacegroup 

22from ase.io.cif_unicode import format_unicode, handle_subscripts 

23from ase.utils import iofunction 

24 

25 

26rhombohedral_spacegroups = {146, 148, 155, 160, 161, 166, 167} 

27 

28 

29old_spacegroup_names = {'Abm2': 'Aem2', 

30 'Aba2': 'Aea2', 

31 'Cmca': 'Cmce', 

32 'Cmma': 'Cmme', 

33 'Ccca': 'Ccc1'} 

34 

35# CIF maps names to either single values or to multiple values via loops. 

36CIFDataValue = Union[str, int, float] 

37CIFData = Union[CIFDataValue, List[CIFDataValue]] 

38 

39 

40def convert_value(value: str) -> CIFDataValue: 

41 """Convert CIF value string to corresponding python type.""" 

42 value = value.strip() 

43 if re.match('(".*")|(\'.*\')$', value): 

44 return handle_subscripts(value[1:-1]) 

45 elif re.match(r'[+-]?\d+$', value): 

46 return int(value) 

47 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$', value): 

48 return float(value) 

49 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+\)$', 

50 value): 

51 return float(value[:value.index('(')]) # strip off uncertainties 

52 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+$', 

53 value): 

54 warnings.warn('Badly formed number: "{0}"'.format(value)) 

55 return float(value[:value.index('(')]) # strip off uncertainties 

56 else: 

57 return handle_subscripts(value) 

58 

59 

60def parse_multiline_string(lines: List[str], line: str) -> str: 

61 """Parse semicolon-enclosed multiline string and return it.""" 

62 assert line[0] == ';' 

63 strings = [line[1:].lstrip()] 

64 while True: 

65 line = lines.pop().strip() 

66 if line[:1] == ';': 

67 break 

68 strings.append(line) 

69 return '\n'.join(strings).strip() 

70 

71 

72def parse_singletag(lines: List[str], line: str) -> Tuple[str, CIFDataValue]: 

73 """Parse a CIF tag (entries starting with underscore). Returns 

74 a key-value pair.""" 

75 kv = line.split(None, 1) 

76 if len(kv) == 1: 

77 key = line 

78 line = lines.pop().strip() 

79 while not line or line[0] == '#': 

80 line = lines.pop().strip() 

81 if line[0] == ';': 

82 value = parse_multiline_string(lines, line) 

83 else: 

84 value = line 

85 else: 

86 key, value = kv 

87 return key, convert_value(value) 

88 

89 

90def parse_cif_loop_headers(lines: List[str]) -> Iterator[str]: 

91 header_pattern = r'\s*(_\S*)' 

92 

93 while lines: 

94 line = lines.pop() 

95 match = re.match(header_pattern, line) 

96 

97 if match: 

98 header = match.group(1).lower() 

99 yield header 

100 elif re.match(r'\s*#', line): 

101 # XXX we should filter comments out already. 

102 continue 

103 else: 

104 lines.append(line) # 'undo' pop 

105 return 

106 

107 

108def parse_cif_loop_data(lines: List[str], 

109 ncolumns: int) -> List[List[CIFDataValue]]: 

110 columns: List[List[CIFDataValue]] = [[] for _ in range(ncolumns)] 

111 

112 tokens = [] 

113 while lines: 

114 line = lines.pop().strip() 

115 lowerline = line.lower() 

116 if (not line or 

117 line.startswith('_') or 

118 lowerline.startswith('data_') or 

119 lowerline.startswith('loop_')): 

120 lines.append(line) 

121 break 

122 

123 if line.startswith('#'): 

124 continue 

125 

126 if line.startswith(';'): 

127 moretokens = [parse_multiline_string(lines, line)] 

128 else: 

129 if ncolumns == 1: 

130 moretokens = [line] 

131 else: 

132 moretokens = shlex.split(line, posix=False) 

133 

134 tokens += moretokens 

135 if len(tokens) < ncolumns: 

136 continue 

137 if len(tokens) == ncolumns: 

138 for i, token in enumerate(tokens): 

139 columns[i].append(convert_value(token)) 

140 else: 

141 warnings.warn('Wrong number {} of tokens, expected {}: {}' 

142 .format(len(tokens), ncolumns, tokens)) 

143 

144 # (Due to continue statements we cannot move this to start of loop) 

145 tokens = [] 

146 

147 if tokens: 

148 assert len(tokens) < ncolumns 

149 raise RuntimeError('CIF loop ended unexpectedly with incomplete row') 

150 

151 return columns 

152 

153 

154def parse_loop(lines: List[str]) -> Dict[str, List[CIFDataValue]]: 

155 """Parse a CIF loop. Returns a dict with column tag names as keys 

156 and a lists of the column content as values.""" 

157 

158 headers = list(parse_cif_loop_headers(lines)) 

159 # Dict would be better. But there can be repeated headers. 

160 

161 columns = parse_cif_loop_data(lines, len(headers)) 

162 

163 columns_dict = {} 

164 for i, header in enumerate(headers): 

165 if header in columns_dict: 

166 warnings.warn('Duplicated loop tags: {0}'.format(header)) 

167 else: 

168 columns_dict[header] = columns[i] 

169 return columns_dict 

170 

171 

172def parse_items(lines: List[str], line: str) -> Dict[str, CIFData]: 

173 """Parse a CIF data items and return a dict with all tags.""" 

174 tags: Dict[str, CIFData] = {} 

175 

176 while True: 

177 if not lines: 

178 break 

179 line = lines.pop().strip() 

180 if not line: 

181 continue 

182 lowerline = line.lower() 

183 if not line or line.startswith('#'): 

184 continue 

185 elif line.startswith('_'): 

186 key, value = parse_singletag(lines, line) 

187 tags[key.lower()] = value 

188 elif lowerline.startswith('loop_'): 

189 tags.update(parse_loop(lines)) 

190 elif lowerline.startswith('data_'): 

191 if line: 

192 lines.append(line) 

193 break 

194 elif line.startswith(';'): 

195 parse_multiline_string(lines, line) 

196 else: 

197 raise ValueError('Unexpected CIF file entry: "{0}"'.format(line)) 

198 return tags 

199 

200 

201class NoStructureData(RuntimeError): 

202 pass 

203 

204 

205class CIFBlock(collections.abc.Mapping): 

206 """A block (i.e., a single system) in a crystallographic information file. 

207 

208 Use this object to query CIF tags or import information as ASE objects.""" 

209 

210 cell_tags = ['_cell_length_a', '_cell_length_b', '_cell_length_c', 

211 '_cell_angle_alpha', '_cell_angle_beta', '_cell_angle_gamma'] 

212 

213 def __init__(self, name: str, tags: Dict[str, CIFData]): 

214 self.name = name 

215 self._tags = tags 

216 

217 def __repr__(self) -> str: 

218 tags = set(self._tags) 

219 return f'CIFBlock({self.name}, tags={tags})' 

220 

221 def __getitem__(self, key: str) -> CIFData: 

222 return self._tags[key] 

223 

224 def __iter__(self) -> Iterator[str]: 

225 return iter(self._tags) 

226 

227 def __len__(self) -> int: 

228 return len(self._tags) 

229 

230 def get(self, key, default=None): 

231 return self._tags.get(key, default) 

232 

233 def get_cellpar(self) -> Optional[List]: 

234 try: 

235 return [self[tag] for tag in self.cell_tags] 

236 except KeyError: 

237 return None 

238 

239 def get_cell(self) -> Cell: 

240 cellpar = self.get_cellpar() 

241 if cellpar is None: 

242 return Cell.new([0, 0, 0]) 

243 return Cell.new(cellpar) 

244 

245 def _raw_scaled_positions(self) -> Optional[np.ndarray]: 

246 coords = [self.get(name) for name in ['_atom_site_fract_x', 

247 '_atom_site_fract_y', 

248 '_atom_site_fract_z']] 

249 # XXX Shall we try to handle mixed coordinates? 

250 # (Some scaled vs others fractional) 

251 if None in coords: 

252 return None 

253 return np.array(coords).T 

254 

255 def _raw_positions(self) -> Optional[np.ndarray]: 

256 coords = [self.get('_atom_site_cartn_x'), 

257 self.get('_atom_site_cartn_y'), 

258 self.get('_atom_site_cartn_z')] 

259 if None in coords: 

260 return None 

261 return np.array(coords).T 

262 

263 def _get_site_coordinates(self): 

264 scaled = self._raw_scaled_positions() 

265 

266 if scaled is not None: 

267 return 'scaled', scaled 

268 

269 cartesian = self._raw_positions() 

270 

271 if cartesian is None: 

272 raise NoStructureData('No positions found in structure') 

273 

274 return 'cartesian', cartesian 

275 

276 def _get_symbols_with_deuterium(self): 

277 labels = self._get_any(['_atom_site_type_symbol', 

278 '_atom_site_label']) 

279 if labels is None: 

280 raise NoStructureData('No symbols') 

281 

282 symbols = [] 

283 for label in labels: 

284 if label == '.' or label == '?': 

285 raise NoStructureData('Symbols are undetermined') 

286 # Strip off additional labeling on chemical symbols 

287 match = re.search(r'([A-Z][a-z]?)', label) 

288 symbol = match.group(0) 

289 symbols.append(symbol) 

290 return symbols 

291 

292 def get_symbols(self) -> List[str]: 

293 symbols = self._get_symbols_with_deuterium() 

294 return [symbol if symbol != 'D' else 'H' for symbol in symbols] 

295 

296 def _where_deuterium(self): 

297 return np.array([symbol == 'D' for symbol 

298 in self._get_symbols_with_deuterium()], bool) 

299 

300 def _get_masses(self) -> Optional[np.ndarray]: 

301 mask = self._where_deuterium() 

302 if not any(mask): 

303 return None 

304 

305 symbols = self.get_symbols() 

306 masses = Atoms(symbols).get_masses() 

307 masses[mask] = 2.01355 

308 return masses 

309 

310 def _get_any(self, names): 

311 for name in names: 

312 if name in self: 

313 return self[name] 

314 return None 

315 

316 def _get_spacegroup_number(self): 

317 # Symmetry specification, see 

318 # http://www.iucr.org/resources/cif/dictionaries/cif_sym for a 

319 # complete list of official keys. In addition we also try to 

320 # support some commonly used depricated notations 

321 return self._get_any(['_space_group.it_number', 

322 '_space_group_it_number', 

323 '_symmetry_int_tables_number']) 

324 

325 def _get_spacegroup_name(self): 

326 hm_symbol = self._get_any(['_space_group_name_h-m_alt', 

327 '_symmetry_space_group_name_h-m', 

328 '_space_group.Patterson_name_h-m', 

329 '_space_group.patterson_name_h-m']) 

330 

331 hm_symbol = old_spacegroup_names.get(hm_symbol, hm_symbol) 

332 return hm_symbol 

333 

334 def _get_sitesym(self): 

335 sitesym = self._get_any(['_space_group_symop_operation_xyz', 

336 '_space_group_symop.operation_xyz', 

337 '_symmetry_equiv_pos_as_xyz']) 

338 if isinstance(sitesym, str): 

339 sitesym = [sitesym] 

340 return sitesym 

341 

342 def _get_fractional_occupancies(self): 

343 return self.get('_atom_site_occupancy') 

344 

345 def _get_setting(self) -> Optional[int]: 

346 setting_str = self.get('_symmetry_space_group_setting') 

347 if setting_str is None: 

348 return None 

349 

350 setting = int(setting_str) 

351 if setting not in [1, 2]: 

352 raise ValueError( 

353 f'Spacegroup setting must be 1 or 2, not {setting}') 

354 return setting 

355 

356 def get_spacegroup(self, subtrans_included) -> Spacegroup: 

357 # XXX The logic in this method needs serious cleaning up! 

358 # The setting needs to be passed as either 1 or two, not None (default) 

359 no = self._get_spacegroup_number() 

360 hm_symbol = self._get_spacegroup_name() 

361 sitesym = self._get_sitesym() 

362 

363 setting = 1 

364 spacegroup = 1 

365 if sitesym is not None: 

366 subtrans = [(0.0, 0.0, 0.0)] if subtrans_included else None 

367 spacegroup = spacegroup_from_data( 

368 no=no, symbol=hm_symbol, sitesym=sitesym, subtrans=subtrans, 

369 setting=setting) 

370 elif no is not None: 

371 spacegroup = no 

372 elif hm_symbol is not None: 

373 spacegroup = hm_symbol 

374 else: 

375 spacegroup = 1 

376 

377 setting_std = self._get_setting() 

378 

379 setting_name = None 

380 if '_symmetry_space_group_setting' in self: 

381 assert setting_std is not None 

382 setting = setting_std 

383 elif '_space_group_crystal_system' in self: 

384 setting_name = self['_space_group_crystal_system'] 

385 elif '_symmetry_cell_setting' in self: 

386 setting_name = self['_symmetry_cell_setting'] 

387 

388 if setting_name: 

389 no = Spacegroup(spacegroup).no 

390 if no in rhombohedral_spacegroups: 

391 if setting_name == 'hexagonal': 

392 setting = 1 

393 elif setting_name in ('trigonal', 'rhombohedral'): 

394 setting = 2 

395 else: 

396 warnings.warn( 

397 'unexpected crystal system %r for space group %r' % ( 

398 setting_name, spacegroup)) 

399 # FIXME - check for more crystal systems... 

400 else: 

401 warnings.warn( 

402 'crystal system %r is not interpreted for space group %r. ' 

403 'This may result in wrong setting!' % ( 

404 setting_name, spacegroup)) 

405 

406 spg = Spacegroup(spacegroup, setting) 

407 if no is not None: 

408 assert int(spg) == no, (int(spg), no) 

409 return spg 

410 

411 def get_unsymmetrized_structure(self) -> Atoms: 

412 """Return Atoms without symmetrizing coordinates. 

413 

414 This returns a (normally) unphysical Atoms object 

415 corresponding only to those coordinates included 

416 in the CIF file, useful for e.g. debugging. 

417 

418 This method may change behaviour in the future.""" 

419 symbols = self.get_symbols() 

420 coordtype, coords = self._get_site_coordinates() 

421 

422 atoms = Atoms(symbols=symbols, 

423 cell=self.get_cell(), 

424 masses=self._get_masses()) 

425 

426 if coordtype == 'scaled': 

427 atoms.set_scaled_positions(coords) 

428 else: 

429 assert coordtype == 'cartesian' 

430 atoms.positions[:] = coords 

431 

432 return atoms 

433 

434 def has_structure(self): 

435 """Whether this CIF block has an atomic configuration.""" 

436 try: 

437 self.get_symbols() 

438 self._get_site_coordinates() 

439 except NoStructureData: 

440 return False 

441 else: 

442 return True 

443 

444 def get_atoms(self, store_tags=False, primitive_cell=False, 

445 subtrans_included=True, fractional_occupancies=True) -> Atoms: 

446 """Returns an Atoms object from a cif tags dictionary. See read_cif() 

447 for a description of the arguments.""" 

448 if primitive_cell and subtrans_included: 

449 raise RuntimeError( 

450 'Primitive cell cannot be determined when sublattice ' 

451 'translations are included in the symmetry operations listed ' 

452 'in the CIF file, i.e. when `subtrans_included` is True.') 

453 

454 cell = self.get_cell() 

455 assert cell.rank in [0, 3] 

456 

457 kwargs: Dict[str, Any] = {} 

458 if store_tags: 

459 kwargs['info'] = self._tags.copy() 

460 

461 if fractional_occupancies: 

462 occupancies = self._get_fractional_occupancies() 

463 else: 

464 occupancies = None 

465 

466 if occupancies is not None: 

467 # no warnings in this case 

468 kwargs['onduplicates'] = 'keep' 

469 

470 # The unsymmetrized_structure is not the asymmetric unit 

471 # because the asymmetric unit should have (in general) a smaller cell, 

472 # whereas we have the full cell. 

473 unsymmetrized_structure = self.get_unsymmetrized_structure() 

474 

475 if cell.rank == 3: 

476 spacegroup = self.get_spacegroup(subtrans_included) 

477 atoms = crystal(unsymmetrized_structure, 

478 spacegroup=spacegroup, 

479 setting=spacegroup.setting, 

480 occupancies=occupancies, 

481 primitive_cell=primitive_cell, 

482 **kwargs) 

483 else: 

484 atoms = unsymmetrized_structure 

485 if kwargs.get('info') is not None: 

486 atoms.info.update(kwargs['info']) 

487 if occupancies is not None: 

488 # Compile an occupancies dictionary 

489 occ_dict = {} 

490 for i, sym in enumerate(atoms.symbols): 

491 occ_dict[str(i)] = {sym: occupancies[i]} 

492 atoms.info['occupancy'] = occ_dict 

493 

494 return atoms 

495 

496 

497def parse_block(lines: List[str], line: str) -> CIFBlock: 

498 assert line.lower().startswith('data_') 

499 blockname = line.split('_', 1)[1].rstrip() 

500 tags = parse_items(lines, line) 

501 return CIFBlock(blockname, tags) 

502 

503 

504def parse_cif(fileobj, reader='ase') -> Iterator[CIFBlock]: 

505 if reader == 'ase': 

506 return parse_cif_ase(fileobj) 

507 elif reader == 'pycodcif': 

508 return parse_cif_pycodcif(fileobj) 

509 else: 

510 raise ValueError(f'No such reader: {reader}') 

511 

512 

513def parse_cif_ase(fileobj) -> Iterator[CIFBlock]: 

514 """Parse a CIF file using ase CIF parser.""" 

515 

516 if isinstance(fileobj, str): 

517 with open(fileobj, 'rb') as fileobj: 

518 data = fileobj.read() 

519 else: 

520 data = fileobj.read() 

521 

522 if isinstance(data, bytes): 

523 data = data.decode('latin1') 

524 data = format_unicode(data) 

525 lines = [e for e in data.split('\n') if len(e) > 0] 

526 if len(lines) > 0 and lines[0].rstrip() == '#\\#CIF_2.0': 

527 warnings.warn('CIF v2.0 file format detected; `ase` CIF reader might ' 

528 'incorrectly interpret some syntax constructions, use ' 

529 '`pycodcif` reader instead') 

530 lines = [''] + lines[::-1] # all lines (reversed) 

531 

532 while lines: 

533 line = lines.pop().strip() 

534 if not line or line.startswith('#'): 

535 continue 

536 

537 yield parse_block(lines, line) 

538 

539 

540def parse_cif_pycodcif(fileobj) -> Iterator[CIFBlock]: 

541 """Parse a CIF file using pycodcif CIF parser.""" 

542 if not isinstance(fileobj, str): 

543 fileobj = fileobj.name 

544 

545 try: 

546 from pycodcif import parse 

547 except ImportError: 

548 raise ImportError( 

549 'parse_cif_pycodcif requires pycodcif ' + 

550 '(http://wiki.crystallography.net/cod-tools/pycodcif/)') 

551 

552 data, _, _ = parse(fileobj) 

553 

554 for datablock in data: 

555 tags = datablock['values'] 

556 for tag in tags.keys(): 

557 values = [convert_value(x) for x in tags[tag]] 

558 if len(values) == 1: 

559 tags[tag] = values[0] 

560 else: 

561 tags[tag] = values 

562 yield CIFBlock(datablock['name'], tags) 

563 

564 

565def read_cif(fileobj, index, store_tags=False, primitive_cell=False, 

566 subtrans_included=True, fractional_occupancies=True, 

567 reader='ase') -> Iterator[Atoms]: 

568 """Read Atoms object from CIF file. *index* specifies the data 

569 block number or name (if string) to return. 

570 

571 If *index* is None or a slice object, a list of atoms objects will 

572 be returned. In the case of *index* is *None* or *slice(None)*, 

573 only blocks with valid crystal data will be included. 

574 

575 If *store_tags* is true, the *info* attribute of the returned 

576 Atoms object will be populated with all tags in the corresponding 

577 cif data block. 

578 

579 If *primitive_cell* is true, the primitive cell will be built instead 

580 of the conventional cell. 

581 

582 If *subtrans_included* is true, sublattice translations are 

583 assumed to be included among the symmetry operations listed in the 

584 CIF file (seems to be the common behaviour of CIF files). 

585 Otherwise the sublattice translations are determined from setting 

586 1 of the extracted space group. A result of setting this flag to 

587 true, is that it will not be possible to determine the primitive 

588 cell. 

589 

590 If *fractional_occupancies* is true, the resulting atoms object will be 

591 tagged equipped with a dictionary `occupancy`. The keys of this dictionary 

592 will be integers converted to strings. The conversion to string is done 

593 in order to avoid troubles with JSON encoding/decoding of the dictionaries 

594 with non-string keys. 

595 Also, in case of mixed occupancies, the atom's chemical symbol will be 

596 that of the most dominant species. 

597 

598 String *reader* is used to select CIF reader. Value `ase` selects 

599 built-in CIF reader (default), while `pycodcif` selects CIF reader based 

600 on `pycodcif` package. 

601 """ 

602 # Find all CIF blocks with valid crystal data 

603 images = [] 

604 for block in parse_cif(fileobj, reader): 

605 if not block.has_structure(): 

606 continue 

607 

608 atoms = block.get_atoms( 

609 store_tags, primitive_cell, 

610 subtrans_included, 

611 fractional_occupancies=fractional_occupancies) 

612 images.append(atoms) 

613 

614 for atoms in images[index]: 

615 yield atoms 

616 

617 

618def format_cell(cell: Cell) -> str: 

619 assert cell.rank == 3 

620 lines = [] 

621 for name, value in zip(CIFBlock.cell_tags, cell.cellpar()): 

622 line = '{:20} {:g}\n'.format(name, value) 

623 lines.append(line) 

624 assert len(lines) == 6 

625 return ''.join(lines) 

626 

627 

628def format_generic_spacegroup_info() -> str: 

629 # We assume no symmetry whatsoever 

630 return '\n'.join([ 

631 '_space_group_name_H-M_alt "P 1"', 

632 '_space_group_IT_number 1', 

633 '', 

634 'loop_', 

635 ' _space_group_symop_operation_xyz', 

636 " 'x, y, z'", 

637 '', 

638 ]) 

639 

640 

641class CIFLoop: 

642 def __init__(self): 

643 self.names = [] 

644 self.formats = [] 

645 self.arrays = [] 

646 

647 def add(self, name, array, fmt): 

648 assert name.startswith('_') 

649 self.names.append(name) 

650 self.formats.append(fmt) 

651 self.arrays.append(array) 

652 if len(self.arrays[0]) != len(self.arrays[-1]): 

653 raise ValueError(f'Loop data "{name}" has {len(array)} ' 

654 'elements, expected {len(self.arrays[0])}') 

655 

656 def tostring(self): 

657 lines = [] 

658 append = lines.append 

659 append('loop_') 

660 for name in self.names: 

661 append(f' {name}') 

662 

663 template = ' ' + ' '.join(self.formats) 

664 

665 ncolumns = len(self.arrays) 

666 nrows = len(self.arrays[0]) if ncolumns > 0 else 0 

667 for row in range(nrows): 

668 arraydata = [array[row] for array in self.arrays] 

669 line = template.format(*arraydata) 

670 append(line) 

671 append('') 

672 return '\n'.join(lines) 

673 

674 

675@iofunction('wb') 

676def write_cif(fd, images, cif_format=None, 

677 wrap=True, labels=None, loop_keys=None) -> None: 

678 """Write *images* to CIF file. 

679 

680 wrap: bool 

681 Wrap atoms into unit cell. 

682 

683 labels: list 

684 Use this list (shaped list[i_frame][i_atom] = string) for the 

685 '_atom_site_label' section instead of automatically generating 

686 it from the element symbol. 

687 

688 loop_keys: dict 

689 Add the information from this dictionary to the `loop_` 

690 section. Keys are printed to the `loop_` section preceeded by 

691 ' _'. dict[key] should contain the data printed for each atom, 

692 so it needs to have the setup `dict[key][i_frame][i_atom] = 

693 string`. The strings are printed as they are, so take care of 

694 formating. Information can be re-read using the `store_tags` 

695 option of the cif reader. 

696 

697 """ 

698 

699 if cif_format is not None: 

700 warnings.warn('The cif_format argument is deprecated and may be ' 

701 'removed in the future. Use loop_keys to customize ' 

702 'data written in loop.', FutureWarning) 

703 

704 if loop_keys is None: 

705 loop_keys = {} 

706 

707 if hasattr(images, 'get_positions'): 

708 images = [images] 

709 

710 fd = io.TextIOWrapper(fd, encoding='latin-1') 

711 try: 

712 for i, atoms in enumerate(images): 

713 blockname = f'data_image{i}\n' 

714 image_loop_keys = {key: loop_keys[key][i] for key in loop_keys} 

715 

716 write_cif_image(blockname, atoms, fd, 

717 wrap=wrap, 

718 labels=None if labels is None else labels[i], 

719 loop_keys=image_loop_keys) 

720 

721 finally: 

722 # Using the TextIOWrapper somehow causes the file to close 

723 # when this function returns. 

724 # Detach in order to circumvent this highly illogical problem: 

725 fd.detach() 

726 

727 

728def autolabel(symbols: Sequence[str]) -> List[str]: 

729 no: Dict[str, int] = {} 

730 labels = [] 

731 for symbol in symbols: 

732 if symbol in no: 

733 no[symbol] += 1 

734 else: 

735 no[symbol] = 1 

736 labels.append('%s%d' % (symbol, no[symbol])) 

737 return labels 

738 

739 

740def chemical_formula_header(atoms): 

741 counts = atoms.symbols.formula.count() 

742 formula_sum = ' '.join(f'{sym}{count}' for sym, count 

743 in counts.items()) 

744 return (f'_chemical_formula_structural {atoms.symbols}\n' 

745 f'_chemical_formula_sum "{formula_sum}"\n') 

746 

747 

748class BadOccupancies(ValueError): 

749 pass 

750 

751 

752def expand_kinds(atoms, coords): 

753 # try to fetch occupancies // spacegroup_kinds - occupancy mapping 

754 symbols = list(atoms.symbols) 

755 coords = list(coords) 

756 occupancies = [1] * len(symbols) 

757 occ_info = atoms.info.get('occupancy') 

758 kinds = atoms.arrays.get('spacegroup_kinds') 

759 if occ_info is not None and kinds is not None: 

760 for i, kind in enumerate(kinds): 

761 occ_info_kind = occ_info[str(kind)] 

762 symbol = symbols[i] 

763 if symbol not in occ_info_kind: 

764 raise BadOccupancies('Occupancies present but no occupancy ' 

765 'info for "{symbol}"') 

766 occupancies[i] = occ_info_kind[symbol] 

767 # extend the positions array in case of mixed occupancy 

768 for sym, occ in occ_info[str(kind)].items(): 

769 if sym != symbols[i]: 

770 symbols.append(sym) 

771 coords.append(coords[i]) 

772 occupancies.append(occ) 

773 return symbols, coords, occupancies 

774 

775 

776def atoms_to_loop_data(atoms, wrap, labels, loop_keys): 

777 if atoms.cell.rank == 3: 

778 coord_type = 'fract' 

779 coords = atoms.get_scaled_positions(wrap).tolist() 

780 else: 

781 coord_type = 'Cartn' 

782 coords = atoms.get_positions(wrap).tolist() 

783 

784 try: 

785 symbols, coords, occupancies = expand_kinds(atoms, coords) 

786 except BadOccupancies as err: 

787 warnings.warn(str(err)) 

788 occupancies = [1] * len(atoms) 

789 symbols = list(atoms.symbols) 

790 

791 if labels is None: 

792 labels = autolabel(symbols) 

793 

794 coord_headers = [f'_atom_site_{coord_type}_{axisname}' 

795 for axisname in 'xyz'] 

796 

797 loopdata = {} 

798 loopdata['_atom_site_label'] = (labels, '{:<8s}') 

799 loopdata['_atom_site_occupancy'] = (occupancies, '{:6.4f}') 

800 

801 _coords = np.array(coords) 

802 for i, key in enumerate(coord_headers): 

803 loopdata[key] = (_coords[:, i], '{:7.5f}') 

804 

805 loopdata['_atom_site_type_symbol'] = (symbols, '{:<2s}') 

806 loopdata['_atom_site_symmetry_multiplicity'] = ( 

807 [1.0] * len(symbols), '{}') 

808 

809 for key in loop_keys: 

810 # Should expand the loop_keys like we expand the occupancy stuff. 

811 # Otherwise user will never figure out how to do this. 

812 values = [loop_keys[key][i] for i in range(len(symbols))] 

813 loopdata['_' + key] = (values, '{}') 

814 

815 return loopdata, coord_headers 

816 

817 

818def write_cif_image(blockname, atoms, fd, *, wrap, 

819 labels, loop_keys): 

820 fd.write(blockname) 

821 fd.write(chemical_formula_header(atoms)) 

822 

823 rank = atoms.cell.rank 

824 if rank == 3: 

825 fd.write(format_cell(atoms.cell)) 

826 fd.write('\n') 

827 fd.write(format_generic_spacegroup_info()) 

828 fd.write('\n') 

829 elif rank != 0: 

830 raise ValueError('CIF format can only represent systems with ' 

831 f'0 or 3 lattice vectors. Got {rank}.') 

832 

833 loopdata, coord_headers = atoms_to_loop_data(atoms, wrap, labels, 

834 loop_keys) 

835 

836 headers = [ 

837 '_atom_site_type_symbol', 

838 '_atom_site_label', 

839 '_atom_site_symmetry_multiplicity', 

840 *coord_headers, 

841 '_atom_site_occupancy', 

842 ] 

843 

844 headers += ['_' + key for key in loop_keys] 

845 

846 loop = CIFLoop() 

847 for header in headers: 

848 array, fmt = loopdata[header] 

849 loop.add(header, array, fmt) 

850 

851 fd.write(loop.tostring())