Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Module to read and write atoms in cif file format.
3See http://www.iucr.org/resources/cif/spec/version1.1/cifsyntax for a
4description of the file format. STAR extensions as save frames,
5global blocks, nested loops and multi-data values are not supported.
6The "latin-1" encoding is required by the IUCR specification.
7"""
9import io
10import re
11import shlex
12import warnings
13from typing import Dict, List, Tuple, Optional, Union, Iterator, Any, Sequence
14import collections.abc
16import numpy as np
18from ase import Atoms
19from ase.cell import Cell
20from ase.spacegroup import crystal
21from ase.spacegroup.spacegroup import spacegroup_from_data, Spacegroup
22from ase.io.cif_unicode import format_unicode, handle_subscripts
23from ase.utils import iofunction
26rhombohedral_spacegroups = {146, 148, 155, 160, 161, 166, 167}
29old_spacegroup_names = {'Abm2': 'Aem2',
30 'Aba2': 'Aea2',
31 'Cmca': 'Cmce',
32 'Cmma': 'Cmme',
33 'Ccca': 'Ccc1'}
35# CIF maps names to either single values or to multiple values via loops.
36CIFDataValue = Union[str, int, float]
37CIFData = Union[CIFDataValue, List[CIFDataValue]]
40def convert_value(value: str) -> CIFDataValue:
41 """Convert CIF value string to corresponding python type."""
42 value = value.strip()
43 if re.match('(".*")|(\'.*\')$', value):
44 return handle_subscripts(value[1:-1])
45 elif re.match(r'[+-]?\d+$', value):
46 return int(value)
47 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$', value):
48 return float(value)
49 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+\)$',
50 value):
51 return float(value[:value.index('(')]) # strip off uncertainties
52 elif re.match(r'[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?\(\d+$',
53 value):
54 warnings.warn('Badly formed number: "{0}"'.format(value))
55 return float(value[:value.index('(')]) # strip off uncertainties
56 else:
57 return handle_subscripts(value)
60def parse_multiline_string(lines: List[str], line: str) -> str:
61 """Parse semicolon-enclosed multiline string and return it."""
62 assert line[0] == ';'
63 strings = [line[1:].lstrip()]
64 while True:
65 line = lines.pop().strip()
66 if line[:1] == ';':
67 break
68 strings.append(line)
69 return '\n'.join(strings).strip()
72def parse_singletag(lines: List[str], line: str) -> Tuple[str, CIFDataValue]:
73 """Parse a CIF tag (entries starting with underscore). Returns
74 a key-value pair."""
75 kv = line.split(None, 1)
76 if len(kv) == 1:
77 key = line
78 line = lines.pop().strip()
79 while not line or line[0] == '#':
80 line = lines.pop().strip()
81 if line[0] == ';':
82 value = parse_multiline_string(lines, line)
83 else:
84 value = line
85 else:
86 key, value = kv
87 return key, convert_value(value)
90def parse_cif_loop_headers(lines: List[str]) -> Iterator[str]:
91 header_pattern = r'\s*(_\S*)'
93 while lines:
94 line = lines.pop()
95 match = re.match(header_pattern, line)
97 if match:
98 header = match.group(1).lower()
99 yield header
100 elif re.match(r'\s*#', line):
101 # XXX we should filter comments out already.
102 continue
103 else:
104 lines.append(line) # 'undo' pop
105 return
108def parse_cif_loop_data(lines: List[str],
109 ncolumns: int) -> List[List[CIFDataValue]]:
110 columns: List[List[CIFDataValue]] = [[] for _ in range(ncolumns)]
112 tokens = []
113 while lines:
114 line = lines.pop().strip()
115 lowerline = line.lower()
116 if (not line or
117 line.startswith('_') or
118 lowerline.startswith('data_') or
119 lowerline.startswith('loop_')):
120 lines.append(line)
121 break
123 if line.startswith('#'):
124 continue
126 if line.startswith(';'):
127 moretokens = [parse_multiline_string(lines, line)]
128 else:
129 if ncolumns == 1:
130 moretokens = [line]
131 else:
132 moretokens = shlex.split(line, posix=False)
134 tokens += moretokens
135 if len(tokens) < ncolumns:
136 continue
137 if len(tokens) == ncolumns:
138 for i, token in enumerate(tokens):
139 columns[i].append(convert_value(token))
140 else:
141 warnings.warn('Wrong number {} of tokens, expected {}: {}'
142 .format(len(tokens), ncolumns, tokens))
144 # (Due to continue statements we cannot move this to start of loop)
145 tokens = []
147 if tokens:
148 assert len(tokens) < ncolumns
149 raise RuntimeError('CIF loop ended unexpectedly with incomplete row')
151 return columns
154def parse_loop(lines: List[str]) -> Dict[str, List[CIFDataValue]]:
155 """Parse a CIF loop. Returns a dict with column tag names as keys
156 and a lists of the column content as values."""
158 headers = list(parse_cif_loop_headers(lines))
159 # Dict would be better. But there can be repeated headers.
161 columns = parse_cif_loop_data(lines, len(headers))
163 columns_dict = {}
164 for i, header in enumerate(headers):
165 if header in columns_dict:
166 warnings.warn('Duplicated loop tags: {0}'.format(header))
167 else:
168 columns_dict[header] = columns[i]
169 return columns_dict
172def parse_items(lines: List[str], line: str) -> Dict[str, CIFData]:
173 """Parse a CIF data items and return a dict with all tags."""
174 tags: Dict[str, CIFData] = {}
176 while True:
177 if not lines:
178 break
179 line = lines.pop().strip()
180 if not line:
181 continue
182 lowerline = line.lower()
183 if not line or line.startswith('#'):
184 continue
185 elif line.startswith('_'):
186 key, value = parse_singletag(lines, line)
187 tags[key.lower()] = value
188 elif lowerline.startswith('loop_'):
189 tags.update(parse_loop(lines))
190 elif lowerline.startswith('data_'):
191 if line:
192 lines.append(line)
193 break
194 elif line.startswith(';'):
195 parse_multiline_string(lines, line)
196 else:
197 raise ValueError('Unexpected CIF file entry: "{0}"'.format(line))
198 return tags
201class NoStructureData(RuntimeError):
202 pass
205class CIFBlock(collections.abc.Mapping):
206 """A block (i.e., a single system) in a crystallographic information file.
208 Use this object to query CIF tags or import information as ASE objects."""
210 cell_tags = ['_cell_length_a', '_cell_length_b', '_cell_length_c',
211 '_cell_angle_alpha', '_cell_angle_beta', '_cell_angle_gamma']
213 def __init__(self, name: str, tags: Dict[str, CIFData]):
214 self.name = name
215 self._tags = tags
217 def __repr__(self) -> str:
218 tags = set(self._tags)
219 return f'CIFBlock({self.name}, tags={tags})'
221 def __getitem__(self, key: str) -> CIFData:
222 return self._tags[key]
224 def __iter__(self) -> Iterator[str]:
225 return iter(self._tags)
227 def __len__(self) -> int:
228 return len(self._tags)
230 def get(self, key, default=None):
231 return self._tags.get(key, default)
233 def get_cellpar(self) -> Optional[List]:
234 try:
235 return [self[tag] for tag in self.cell_tags]
236 except KeyError:
237 return None
239 def get_cell(self) -> Cell:
240 cellpar = self.get_cellpar()
241 if cellpar is None:
242 return Cell.new([0, 0, 0])
243 return Cell.new(cellpar)
245 def _raw_scaled_positions(self) -> Optional[np.ndarray]:
246 coords = [self.get(name) for name in ['_atom_site_fract_x',
247 '_atom_site_fract_y',
248 '_atom_site_fract_z']]
249 # XXX Shall we try to handle mixed coordinates?
250 # (Some scaled vs others fractional)
251 if None in coords:
252 return None
253 return np.array(coords).T
255 def _raw_positions(self) -> Optional[np.ndarray]:
256 coords = [self.get('_atom_site_cartn_x'),
257 self.get('_atom_site_cartn_y'),
258 self.get('_atom_site_cartn_z')]
259 if None in coords:
260 return None
261 return np.array(coords).T
263 def _get_site_coordinates(self):
264 scaled = self._raw_scaled_positions()
266 if scaled is not None:
267 return 'scaled', scaled
269 cartesian = self._raw_positions()
271 if cartesian is None:
272 raise NoStructureData('No positions found in structure')
274 return 'cartesian', cartesian
276 def _get_symbols_with_deuterium(self):
277 labels = self._get_any(['_atom_site_type_symbol',
278 '_atom_site_label'])
279 if labels is None:
280 raise NoStructureData('No symbols')
282 symbols = []
283 for label in labels:
284 if label == '.' or label == '?':
285 raise NoStructureData('Symbols are undetermined')
286 # Strip off additional labeling on chemical symbols
287 match = re.search(r'([A-Z][a-z]?)', label)
288 symbol = match.group(0)
289 symbols.append(symbol)
290 return symbols
292 def get_symbols(self) -> List[str]:
293 symbols = self._get_symbols_with_deuterium()
294 return [symbol if symbol != 'D' else 'H' for symbol in symbols]
296 def _where_deuterium(self):
297 return np.array([symbol == 'D' for symbol
298 in self._get_symbols_with_deuterium()], bool)
300 def _get_masses(self) -> Optional[np.ndarray]:
301 mask = self._where_deuterium()
302 if not any(mask):
303 return None
305 symbols = self.get_symbols()
306 masses = Atoms(symbols).get_masses()
307 masses[mask] = 2.01355
308 return masses
310 def _get_any(self, names):
311 for name in names:
312 if name in self:
313 return self[name]
314 return None
316 def _get_spacegroup_number(self):
317 # Symmetry specification, see
318 # http://www.iucr.org/resources/cif/dictionaries/cif_sym for a
319 # complete list of official keys. In addition we also try to
320 # support some commonly used depricated notations
321 return self._get_any(['_space_group.it_number',
322 '_space_group_it_number',
323 '_symmetry_int_tables_number'])
325 def _get_spacegroup_name(self):
326 hm_symbol = self._get_any(['_space_group_name_h-m_alt',
327 '_symmetry_space_group_name_h-m',
328 '_space_group.Patterson_name_h-m',
329 '_space_group.patterson_name_h-m'])
331 hm_symbol = old_spacegroup_names.get(hm_symbol, hm_symbol)
332 return hm_symbol
334 def _get_sitesym(self):
335 sitesym = self._get_any(['_space_group_symop_operation_xyz',
336 '_space_group_symop.operation_xyz',
337 '_symmetry_equiv_pos_as_xyz'])
338 if isinstance(sitesym, str):
339 sitesym = [sitesym]
340 return sitesym
342 def _get_fractional_occupancies(self):
343 return self.get('_atom_site_occupancy')
345 def _get_setting(self) -> Optional[int]:
346 setting_str = self.get('_symmetry_space_group_setting')
347 if setting_str is None:
348 return None
350 setting = int(setting_str)
351 if setting not in [1, 2]:
352 raise ValueError(
353 f'Spacegroup setting must be 1 or 2, not {setting}')
354 return setting
356 def get_spacegroup(self, subtrans_included) -> Spacegroup:
357 # XXX The logic in this method needs serious cleaning up!
358 # The setting needs to be passed as either 1 or two, not None (default)
359 no = self._get_spacegroup_number()
360 hm_symbol = self._get_spacegroup_name()
361 sitesym = self._get_sitesym()
363 setting = 1
364 spacegroup = 1
365 if sitesym is not None:
366 subtrans = [(0.0, 0.0, 0.0)] if subtrans_included else None
367 spacegroup = spacegroup_from_data(
368 no=no, symbol=hm_symbol, sitesym=sitesym, subtrans=subtrans,
369 setting=setting)
370 elif no is not None:
371 spacegroup = no
372 elif hm_symbol is not None:
373 spacegroup = hm_symbol
374 else:
375 spacegroup = 1
377 setting_std = self._get_setting()
379 setting_name = None
380 if '_symmetry_space_group_setting' in self:
381 assert setting_std is not None
382 setting = setting_std
383 elif '_space_group_crystal_system' in self:
384 setting_name = self['_space_group_crystal_system']
385 elif '_symmetry_cell_setting' in self:
386 setting_name = self['_symmetry_cell_setting']
388 if setting_name:
389 no = Spacegroup(spacegroup).no
390 if no in rhombohedral_spacegroups:
391 if setting_name == 'hexagonal':
392 setting = 1
393 elif setting_name in ('trigonal', 'rhombohedral'):
394 setting = 2
395 else:
396 warnings.warn(
397 'unexpected crystal system %r for space group %r' % (
398 setting_name, spacegroup))
399 # FIXME - check for more crystal systems...
400 else:
401 warnings.warn(
402 'crystal system %r is not interpreted for space group %r. '
403 'This may result in wrong setting!' % (
404 setting_name, spacegroup))
406 spg = Spacegroup(spacegroup, setting)
407 if no is not None:
408 assert int(spg) == no, (int(spg), no)
409 return spg
411 def get_unsymmetrized_structure(self) -> Atoms:
412 """Return Atoms without symmetrizing coordinates.
414 This returns a (normally) unphysical Atoms object
415 corresponding only to those coordinates included
416 in the CIF file, useful for e.g. debugging.
418 This method may change behaviour in the future."""
419 symbols = self.get_symbols()
420 coordtype, coords = self._get_site_coordinates()
422 atoms = Atoms(symbols=symbols,
423 cell=self.get_cell(),
424 masses=self._get_masses())
426 if coordtype == 'scaled':
427 atoms.set_scaled_positions(coords)
428 else:
429 assert coordtype == 'cartesian'
430 atoms.positions[:] = coords
432 return atoms
434 def has_structure(self):
435 """Whether this CIF block has an atomic configuration."""
436 try:
437 self.get_symbols()
438 self._get_site_coordinates()
439 except NoStructureData:
440 return False
441 else:
442 return True
444 def get_atoms(self, store_tags=False, primitive_cell=False,
445 subtrans_included=True, fractional_occupancies=True) -> Atoms:
446 """Returns an Atoms object from a cif tags dictionary. See read_cif()
447 for a description of the arguments."""
448 if primitive_cell and subtrans_included:
449 raise RuntimeError(
450 'Primitive cell cannot be determined when sublattice '
451 'translations are included in the symmetry operations listed '
452 'in the CIF file, i.e. when `subtrans_included` is True.')
454 cell = self.get_cell()
455 assert cell.rank in [0, 3]
457 kwargs: Dict[str, Any] = {}
458 if store_tags:
459 kwargs['info'] = self._tags.copy()
461 if fractional_occupancies:
462 occupancies = self._get_fractional_occupancies()
463 else:
464 occupancies = None
466 if occupancies is not None:
467 # no warnings in this case
468 kwargs['onduplicates'] = 'keep'
470 # The unsymmetrized_structure is not the asymmetric unit
471 # because the asymmetric unit should have (in general) a smaller cell,
472 # whereas we have the full cell.
473 unsymmetrized_structure = self.get_unsymmetrized_structure()
475 if cell.rank == 3:
476 spacegroup = self.get_spacegroup(subtrans_included)
477 atoms = crystal(unsymmetrized_structure,
478 spacegroup=spacegroup,
479 setting=spacegroup.setting,
480 occupancies=occupancies,
481 primitive_cell=primitive_cell,
482 **kwargs)
483 else:
484 atoms = unsymmetrized_structure
485 if kwargs.get('info') is not None:
486 atoms.info.update(kwargs['info'])
487 if occupancies is not None:
488 # Compile an occupancies dictionary
489 occ_dict = {}
490 for i, sym in enumerate(atoms.symbols):
491 occ_dict[str(i)] = {sym: occupancies[i]}
492 atoms.info['occupancy'] = occ_dict
494 return atoms
497def parse_block(lines: List[str], line: str) -> CIFBlock:
498 assert line.lower().startswith('data_')
499 blockname = line.split('_', 1)[1].rstrip()
500 tags = parse_items(lines, line)
501 return CIFBlock(blockname, tags)
504def parse_cif(fileobj, reader='ase') -> Iterator[CIFBlock]:
505 if reader == 'ase':
506 return parse_cif_ase(fileobj)
507 elif reader == 'pycodcif':
508 return parse_cif_pycodcif(fileobj)
509 else:
510 raise ValueError(f'No such reader: {reader}')
513def parse_cif_ase(fileobj) -> Iterator[CIFBlock]:
514 """Parse a CIF file using ase CIF parser."""
516 if isinstance(fileobj, str):
517 with open(fileobj, 'rb') as fileobj:
518 data = fileobj.read()
519 else:
520 data = fileobj.read()
522 if isinstance(data, bytes):
523 data = data.decode('latin1')
524 data = format_unicode(data)
525 lines = [e for e in data.split('\n') if len(e) > 0]
526 if len(lines) > 0 and lines[0].rstrip() == '#\\#CIF_2.0':
527 warnings.warn('CIF v2.0 file format detected; `ase` CIF reader might '
528 'incorrectly interpret some syntax constructions, use '
529 '`pycodcif` reader instead')
530 lines = [''] + lines[::-1] # all lines (reversed)
532 while lines:
533 line = lines.pop().strip()
534 if not line or line.startswith('#'):
535 continue
537 yield parse_block(lines, line)
540def parse_cif_pycodcif(fileobj) -> Iterator[CIFBlock]:
541 """Parse a CIF file using pycodcif CIF parser."""
542 if not isinstance(fileobj, str):
543 fileobj = fileobj.name
545 try:
546 from pycodcif import parse
547 except ImportError:
548 raise ImportError(
549 'parse_cif_pycodcif requires pycodcif ' +
550 '(http://wiki.crystallography.net/cod-tools/pycodcif/)')
552 data, _, _ = parse(fileobj)
554 for datablock in data:
555 tags = datablock['values']
556 for tag in tags.keys():
557 values = [convert_value(x) for x in tags[tag]]
558 if len(values) == 1:
559 tags[tag] = values[0]
560 else:
561 tags[tag] = values
562 yield CIFBlock(datablock['name'], tags)
565def read_cif(fileobj, index, store_tags=False, primitive_cell=False,
566 subtrans_included=True, fractional_occupancies=True,
567 reader='ase') -> Iterator[Atoms]:
568 """Read Atoms object from CIF file. *index* specifies the data
569 block number or name (if string) to return.
571 If *index* is None or a slice object, a list of atoms objects will
572 be returned. In the case of *index* is *None* or *slice(None)*,
573 only blocks with valid crystal data will be included.
575 If *store_tags* is true, the *info* attribute of the returned
576 Atoms object will be populated with all tags in the corresponding
577 cif data block.
579 If *primitive_cell* is true, the primitive cell will be built instead
580 of the conventional cell.
582 If *subtrans_included* is true, sublattice translations are
583 assumed to be included among the symmetry operations listed in the
584 CIF file (seems to be the common behaviour of CIF files).
585 Otherwise the sublattice translations are determined from setting
586 1 of the extracted space group. A result of setting this flag to
587 true, is that it will not be possible to determine the primitive
588 cell.
590 If *fractional_occupancies* is true, the resulting atoms object will be
591 tagged equipped with a dictionary `occupancy`. The keys of this dictionary
592 will be integers converted to strings. The conversion to string is done
593 in order to avoid troubles with JSON encoding/decoding of the dictionaries
594 with non-string keys.
595 Also, in case of mixed occupancies, the atom's chemical symbol will be
596 that of the most dominant species.
598 String *reader* is used to select CIF reader. Value `ase` selects
599 built-in CIF reader (default), while `pycodcif` selects CIF reader based
600 on `pycodcif` package.
601 """
602 # Find all CIF blocks with valid crystal data
603 images = []
604 for block in parse_cif(fileobj, reader):
605 if not block.has_structure():
606 continue
608 atoms = block.get_atoms(
609 store_tags, primitive_cell,
610 subtrans_included,
611 fractional_occupancies=fractional_occupancies)
612 images.append(atoms)
614 for atoms in images[index]:
615 yield atoms
618def format_cell(cell: Cell) -> str:
619 assert cell.rank == 3
620 lines = []
621 for name, value in zip(CIFBlock.cell_tags, cell.cellpar()):
622 line = '{:20} {:g}\n'.format(name, value)
623 lines.append(line)
624 assert len(lines) == 6
625 return ''.join(lines)
628def format_generic_spacegroup_info() -> str:
629 # We assume no symmetry whatsoever
630 return '\n'.join([
631 '_space_group_name_H-M_alt "P 1"',
632 '_space_group_IT_number 1',
633 '',
634 'loop_',
635 ' _space_group_symop_operation_xyz',
636 " 'x, y, z'",
637 '',
638 ])
641class CIFLoop:
642 def __init__(self):
643 self.names = []
644 self.formats = []
645 self.arrays = []
647 def add(self, name, array, fmt):
648 assert name.startswith('_')
649 self.names.append(name)
650 self.formats.append(fmt)
651 self.arrays.append(array)
652 if len(self.arrays[0]) != len(self.arrays[-1]):
653 raise ValueError(f'Loop data "{name}" has {len(array)} '
654 'elements, expected {len(self.arrays[0])}')
656 def tostring(self):
657 lines = []
658 append = lines.append
659 append('loop_')
660 for name in self.names:
661 append(f' {name}')
663 template = ' ' + ' '.join(self.formats)
665 ncolumns = len(self.arrays)
666 nrows = len(self.arrays[0]) if ncolumns > 0 else 0
667 for row in range(nrows):
668 arraydata = [array[row] for array in self.arrays]
669 line = template.format(*arraydata)
670 append(line)
671 append('')
672 return '\n'.join(lines)
675@iofunction('wb')
676def write_cif(fd, images, cif_format=None,
677 wrap=True, labels=None, loop_keys=None) -> None:
678 """Write *images* to CIF file.
680 wrap: bool
681 Wrap atoms into unit cell.
683 labels: list
684 Use this list (shaped list[i_frame][i_atom] = string) for the
685 '_atom_site_label' section instead of automatically generating
686 it from the element symbol.
688 loop_keys: dict
689 Add the information from this dictionary to the `loop_`
690 section. Keys are printed to the `loop_` section preceeded by
691 ' _'. dict[key] should contain the data printed for each atom,
692 so it needs to have the setup `dict[key][i_frame][i_atom] =
693 string`. The strings are printed as they are, so take care of
694 formating. Information can be re-read using the `store_tags`
695 option of the cif reader.
697 """
699 if cif_format is not None:
700 warnings.warn('The cif_format argument is deprecated and may be '
701 'removed in the future. Use loop_keys to customize '
702 'data written in loop.', FutureWarning)
704 if loop_keys is None:
705 loop_keys = {}
707 if hasattr(images, 'get_positions'):
708 images = [images]
710 fd = io.TextIOWrapper(fd, encoding='latin-1')
711 try:
712 for i, atoms in enumerate(images):
713 blockname = f'data_image{i}\n'
714 image_loop_keys = {key: loop_keys[key][i] for key in loop_keys}
716 write_cif_image(blockname, atoms, fd,
717 wrap=wrap,
718 labels=None if labels is None else labels[i],
719 loop_keys=image_loop_keys)
721 finally:
722 # Using the TextIOWrapper somehow causes the file to close
723 # when this function returns.
724 # Detach in order to circumvent this highly illogical problem:
725 fd.detach()
728def autolabel(symbols: Sequence[str]) -> List[str]:
729 no: Dict[str, int] = {}
730 labels = []
731 for symbol in symbols:
732 if symbol in no:
733 no[symbol] += 1
734 else:
735 no[symbol] = 1
736 labels.append('%s%d' % (symbol, no[symbol]))
737 return labels
740def chemical_formula_header(atoms):
741 counts = atoms.symbols.formula.count()
742 formula_sum = ' '.join(f'{sym}{count}' for sym, count
743 in counts.items())
744 return (f'_chemical_formula_structural {atoms.symbols}\n'
745 f'_chemical_formula_sum "{formula_sum}"\n')
748class BadOccupancies(ValueError):
749 pass
752def expand_kinds(atoms, coords):
753 # try to fetch occupancies // spacegroup_kinds - occupancy mapping
754 symbols = list(atoms.symbols)
755 coords = list(coords)
756 occupancies = [1] * len(symbols)
757 occ_info = atoms.info.get('occupancy')
758 kinds = atoms.arrays.get('spacegroup_kinds')
759 if occ_info is not None and kinds is not None:
760 for i, kind in enumerate(kinds):
761 occ_info_kind = occ_info[str(kind)]
762 symbol = symbols[i]
763 if symbol not in occ_info_kind:
764 raise BadOccupancies('Occupancies present but no occupancy '
765 'info for "{symbol}"')
766 occupancies[i] = occ_info_kind[symbol]
767 # extend the positions array in case of mixed occupancy
768 for sym, occ in occ_info[str(kind)].items():
769 if sym != symbols[i]:
770 symbols.append(sym)
771 coords.append(coords[i])
772 occupancies.append(occ)
773 return symbols, coords, occupancies
776def atoms_to_loop_data(atoms, wrap, labels, loop_keys):
777 if atoms.cell.rank == 3:
778 coord_type = 'fract'
779 coords = atoms.get_scaled_positions(wrap).tolist()
780 else:
781 coord_type = 'Cartn'
782 coords = atoms.get_positions(wrap).tolist()
784 try:
785 symbols, coords, occupancies = expand_kinds(atoms, coords)
786 except BadOccupancies as err:
787 warnings.warn(str(err))
788 occupancies = [1] * len(atoms)
789 symbols = list(atoms.symbols)
791 if labels is None:
792 labels = autolabel(symbols)
794 coord_headers = [f'_atom_site_{coord_type}_{axisname}'
795 for axisname in 'xyz']
797 loopdata = {}
798 loopdata['_atom_site_label'] = (labels, '{:<8s}')
799 loopdata['_atom_site_occupancy'] = (occupancies, '{:6.4f}')
801 _coords = np.array(coords)
802 for i, key in enumerate(coord_headers):
803 loopdata[key] = (_coords[:, i], '{:7.5f}')
805 loopdata['_atom_site_type_symbol'] = (symbols, '{:<2s}')
806 loopdata['_atom_site_symmetry_multiplicity'] = (
807 [1.0] * len(symbols), '{}')
809 for key in loop_keys:
810 # Should expand the loop_keys like we expand the occupancy stuff.
811 # Otherwise user will never figure out how to do this.
812 values = [loop_keys[key][i] for i in range(len(symbols))]
813 loopdata['_' + key] = (values, '{}')
815 return loopdata, coord_headers
818def write_cif_image(blockname, atoms, fd, *, wrap,
819 labels, loop_keys):
820 fd.write(blockname)
821 fd.write(chemical_formula_header(atoms))
823 rank = atoms.cell.rank
824 if rank == 3:
825 fd.write(format_cell(atoms.cell))
826 fd.write('\n')
827 fd.write(format_generic_spacegroup_info())
828 fd.write('\n')
829 elif rank != 0:
830 raise ValueError('CIF format can only represent systems with '
831 f'0 or 3 lattice vectors. Got {rank}.')
833 loopdata, coord_headers = atoms_to_loop_data(atoms, wrap, labels,
834 loop_keys)
836 headers = [
837 '_atom_site_type_symbol',
838 '_atom_site_label',
839 '_atom_site_symmetry_multiplicity',
840 *coord_headers,
841 '_atom_site_occupancy',
842 ]
844 headers += ['_' + key for key in loop_keys]
846 loop = CIFLoop()
847 for header in headers:
848 array, fmt = loopdata[header]
849 loop.add(header, array, fmt)
851 fd.write(loop.tostring())