Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from math import gcd
2import re
3from typing import Dict, Tuple, List, Sequence, Union
5from ase.data import chemical_symbols, atomic_numbers
8# For type hints (A, A2, A+B):
9Tree = Union[str, Tuple['Tree', int], List['Tree']] # type: ignore
12class Formula:
13 def __init__(self,
14 formula: str = '',
15 *,
16 strict: bool = False,
17 format: str = '',
18 _tree: Tree = None,
19 _count: Dict[str, int] = None):
20 """Chemical formula object.
22 Parameters
23 ----------
24 formula: str
25 Text string representation of formula. Examples: ``'6CO2'``,
26 ``'30Cu+2CO'``, ``'Pt(CO)6'``.
27 strict: bool
28 Only allow real chemical symbols.
29 format: str
30 Reorder according to *format*. Must be one of hill, metal,
31 abc or reduce.
33 Examples
34 --------
35 >>> from ase.formula import Formula
36 >>> w = Formula('H2O')
37 >>> w.count()
38 {'H': 2, 'O': 1}
39 >>> 'H' in w
40 True
41 >>> w == 'HOH'
42 True
43 >>> f'{w:latex}'
44 'H$_{2}$O'
45 >>> w.format('latex')
46 'H$_{2}$O'
47 >>> divmod(6 * w + 'Cu', w)
48 (6, Formula('Cu'))
50 Raises
51 ------
52 ValueError
53 on malformed formula
54 """
55 if format:
56 assert _tree is None and _count is None
57 if format not in {'hill', 'metal', 'abc', 'reduce'}:
58 raise ValueError(f'Illegal format: {format}')
59 formula = Formula(formula).format(format)
60 self._formula = formula
61 self._tree = _tree or parse(formula)
62 self._count = _count or count_tree(self._tree)
63 if strict:
64 for symbol in self._count:
65 if symbol not in atomic_numbers:
66 raise ValueError('Unknown chemical symbol: ' + symbol)
68 def convert(self, fmt: str) -> 'Formula':
69 """Reformat this formula as a new Formula.
71 Same formatting rules as Formula(format=...) keyword.
72 """
73 return Formula(self._formula, format=fmt)
75 def count(self) -> Dict[str, int]:
76 """Return dictionary mapping chemical symbol to number of atoms.
78 Example
79 -------
80 >>> Formula('H2O').count()
81 {'H': 2, 'O': 1}
82 """
83 return self._count.copy()
85 def reduce(self) -> Tuple['Formula', int]:
86 """Reduce formula.
88 Returns
89 -------
90 formula: Formula
91 Reduced formula.
92 n: int
93 Number of reduced formula units.
95 Example
96 -------
97 >>> Formula('2H2O').reduce()
98 (Formula('H2O'), 2)
99 """
100 dct, N = self._reduce()
101 return self.from_dict(dct), N
103 def stoichiometry(self) -> Tuple['Formula', 'Formula', int]:
104 """Reduce to unique stoichiomerty using "chemical symbols" A, B, C, ...
106 Examples
107 --------
108 >>> Formula('CO2').stoichiometry()
109 (Formula('AB2'), Formula('CO2'), 1)
110 >>> Formula('(H2O)4').stoichiometry()
111 (Formula('AB2'), Formula('OH2'), 4)
112 """
113 count1, N = self._reduce()
114 c = ord('A')
115 count2 = {}
116 count3 = {}
117 for n, symb in sorted((n, symb)
118 for symb, n in count1.items()):
119 count2[chr(c)] = n
120 count3[symb] = n
121 c += 1
122 return self.from_dict(count2), self.from_dict(count3), N
124 def format(self, fmt: str = '') -> str:
125 """Format formula as string.
127 Formats:
129 * ``'hill'``: alphabetically ordered with C and H first
130 * ``'metal'``: alphabetically ordered with metals first
131 * ``'abc'``: count ordered first then alphabetically ordered
132 * ``'reduce'``: Reduce and keep order (ABBBC -> AB3C)
133 * ``'latex'``: LaTeX representation
134 * ``'html'``: HTML representation
135 * ``'rest'``: reStructuredText representation
137 Example
138 -------
139 >>> Formula('H2O').format('html')
140 'H<sub>2</sub>O'
141 """
142 return format(self, fmt)
144 def __format__(self, fmt: str) -> str:
145 """Format Formula as str.
147 Possible formats: ``'hill'``, ``'metal'``, ``'abc'``, ``'reduce'``,
148 ``'latex'``, ``'html'``, ``'rest'``.
150 Example
151 -------
152 >>> f = Formula('OH2')
153 >>> '{f}, {f:hill}, {f:latex}'.format(f=f)
154 'OH2, H2O, OH$_{2}$'
155 """
157 if fmt == 'hill':
158 count = self.count()
159 count2 = {}
160 for symb in 'CH':
161 if symb in count:
162 count2[symb] = count.pop(symb)
163 for symb, n in sorted(count.items()):
164 count2[symb] = n
165 return dict2str(count2)
167 if fmt == 'metal':
168 count = self.count()
169 result2 = [(s, count.pop(s)) for s in non_metals if s in count]
170 result = [(s, count[s]) for s in sorted(count)]
171 result += sorted(result2)
172 return dict2str(dict(result))
174 if fmt == 'abc':
175 _, f, N = self.stoichiometry()
176 return dict2str({symb: n * N for symb, n in f._count.items()})
178 if fmt == 'reduce':
179 symbols = list(self)
180 nsymb = len(symbols)
181 parts = []
182 i1 = 0
183 for i2, symbol in enumerate(symbols):
184 if i2 == nsymb - 1 or symbol != symbols[i2 + 1]:
185 parts.append(symbol)
186 m = i2 + 1 - i1
187 if m > 1:
188 parts.append(str(m))
189 i1 = i2 + 1
190 return ''.join(parts)
192 if fmt == 'latex':
193 return self._tostr('$_{', '}$')
195 if fmt == 'html':
196 return self._tostr('<sub>', '</sub>')
198 if fmt == 'rest':
199 return self._tostr(r'\ :sub`', r'`\ ')
201 if fmt == '':
202 return self._formula
204 raise ValueError('Invalid format specifier')
206 @staticmethod
207 def from_dict(dct: Dict[str, int]) -> 'Formula':
208 """Convert dict to Formula.
210 >>> Formula.from_dict({'H': 2})
211 Formula('H2')
212 """
213 dct2 = {}
214 for symb, n in dct.items():
215 if not (isinstance(symb, str) and isinstance(n, int) and n >= 0):
216 raise ValueError('Bad dictionary: {dct}'.format(dct=dct))
217 if n > 0: # filter out n=0 symbols
218 dct2[symb] = n
219 return Formula(dict2str(dct2),
220 _tree=[([(symb, n) for symb, n in dct2.items()], 1)],
221 _count=dct2)
223 @staticmethod
224 def from_list(symbols: Sequence[str]) -> 'Formula':
225 """Convert list of chemical symbols to Formula."""
226 return Formula(''.join(symbols),
227 _tree=[(symbols[:], 1)])
229 def __len__(self) -> int:
230 """Number of atoms."""
231 return sum(self._count.values())
233 def __getitem__(self, symb: str) -> int:
234 """Number of atoms with chemical symbol *symb*."""
235 return self._count.get(symb, 0)
237 def __contains__(self, f: Union[str, 'Formula']) -> bool:
238 """Check if formula contains chemical symbols in *f*.
240 Type of *f* must be str or Formula.
242 Examples
243 --------
244 >>> 'OH' in Formula('H2O')
245 True
246 >>> 'O2' in Formula('H2O')
247 False
248 """
249 if isinstance(f, str):
250 f = Formula(f)
251 for symb, n in f._count.items():
252 if self[symb] < n:
253 return False
254 return True
256 def __eq__(self, other) -> bool:
257 """Equality check.
259 Note that order is not important.
261 Example
262 -------
263 >>> Formula('CO') == Formula('OC')
264 True
265 """
266 if isinstance(other, str):
267 other = Formula(other)
268 elif not isinstance(other, Formula):
269 return False
270 return self._count == other._count
272 def __add__(self, other: Union[str, 'Formula']) -> 'Formula':
273 """Add two formulas."""
274 if not isinstance(other, str):
275 other = other._formula
276 return Formula(self._formula + '+' + other)
278 def __radd__(self, other: str): # -> Formula
279 return Formula(other) + self
281 def __mul__(self, N: int) -> 'Formula':
282 """Repeat formula `N` times."""
283 if N == 0:
284 return Formula('')
285 return self.from_dict({symb: n * N
286 for symb, n in self._count.items()})
288 def __rmul__(self, N: int): # -> Formula
289 return self * N
291 def __divmod__(self,
292 other: Union['Formula', str]) -> Tuple[int, 'Formula']:
293 """Return the tuple (self // other, self % other).
295 Invariant::
297 div, mod = divmod(self, other)
298 div * other + mod == self
300 Example
301 -------
302 >>> divmod(Formula('H2O'), 'H')
303 (2, Formula('O'))
304 """
305 if isinstance(other, str):
306 other = Formula(other)
307 N = min(self[symb] // n for symb, n in other._count.items())
308 dct = self.count()
309 if N:
310 for symb, n in other._count.items():
311 dct[symb] -= n * N
312 if dct[symb] == 0:
313 del dct[symb]
314 return N, self.from_dict(dct)
316 def __rdivmod__(self, other):
317 return divmod(Formula(other), self)
319 def __mod__(self, other):
320 return divmod(self, other)[1]
322 def __rmod__(self, other):
323 return Formula(other) % self
325 def __floordiv__(self, other):
326 return divmod(self, other)[0]
328 def __rfloordiv__(self, other):
329 return Formula(other) // self
331 def __iter__(self, tree=None):
332 if tree is None:
333 tree = self._tree
334 if isinstance(tree, str):
335 yield tree
336 elif isinstance(tree, tuple):
337 tree, N = tree
338 for _ in range(N):
339 yield from self.__iter__(tree)
340 else:
341 for tree in tree:
342 yield from self.__iter__(tree)
344 def __str__(self):
345 return self._formula
347 def __repr__(self):
348 return 'Formula({!r})'.format(self._formula)
350 def _reduce(self):
351 N = 0
352 for n in self._count.values():
353 if N == 0:
354 N = n
355 else:
356 N = gcd(n, N)
357 dct = {symb: n // N for symb, n in self._count.items()}
358 return dct, N
360 def _tostr(self, sub1, sub2):
361 parts = []
362 for tree, n in self._tree:
363 s = tree2str(tree, sub1, sub2)
364 if s[0] == '(' and s[-1] == ')':
365 s = s[1:-1]
366 if n > 1:
367 s = str(n) + s
368 parts.append(s)
369 return '+'.join(parts)
372def dict2str(dct):
373 return ''.join(symb + (str(n) if n > 1 else '')
374 for symb, n in dct.items())
377def parse(f: str): # -> Tree
378 if not f:
379 return []
380 parts = f.split('+')
381 result = []
382 for part in parts:
383 n, f = strip_number(part)
384 result.append((parse2(f), n))
385 return result
388def parse2(f: str) -> Tree:
389 units = []
390 while f:
391 unit: Union[str, Tuple[str, int], Tree]
392 if f[0] == '(':
393 level = 0
394 for i, c in enumerate(f[1:], 1):
395 if c == '(':
396 level += 1
397 elif c == ')':
398 if level == 0:
399 break
400 level -= 1
401 else:
402 raise ValueError
403 f2 = f[1:i]
404 n, f = strip_number(f[i + 1:])
405 unit = (parse2(f2), n)
406 else:
407 m = re.match('([A-Z][a-z]?)([0-9]*)', f)
408 if m is None:
409 raise ValueError
410 symb = m.group(1)
411 number = m.group(2)
412 if number:
413 unit = (symb, int(number))
414 else:
415 unit = symb
416 f = f[m.end():]
417 units.append(unit)
418 if len(units) == 1:
419 return unit
420 return units
423def strip_number(s: str) -> Tuple[int, str]:
424 m = re.match('[0-9]*', s)
425 assert m is not None
426 return int(m.group() or 1), s[m.end():]
429def tree2str(tree: Tree,
430 sub1: str, sub2: str) -> str:
431 if isinstance(tree, str):
432 return tree
433 if isinstance(tree, tuple):
434 tree, N = tree
435 s = tree2str(tree, sub1, sub2)
436 if N == 1:
437 if s[0] == '(' and s[-1] == ')':
438 return s[1:-1]
439 return s
440 return s + sub1 + str(N) + sub2
441 return '(' + ''.join(tree2str(tree, sub1, sub2) for tree in tree) + ')'
444def count_tree(tree: Tree) -> Dict[str, int]:
445 if isinstance(tree, str):
446 return {tree: 1}
447 if isinstance(tree, tuple):
448 tree, N = tree
449 return {symb: n * N for symb, n in count_tree(tree).items()}
450 dct = {} # type: Dict[str, int]
451 for tree in tree:
452 for symb, n in count_tree(tree).items():
453 m = dct.get(symb, 0)
454 dct[symb] = m + n
455 return dct
458# non metals, half-metals/metalloid, halogen, noble gas:
459non_metals = ['H', 'He', 'B', 'C', 'N', 'O', 'F', 'Ne',
460 'Si', 'P', 'S', 'Cl', 'Ar',
461 'Ge', 'As', 'Se', 'Br', 'Kr',
462 'Sb', 'Te', 'I', 'Xe',
463 'Po', 'At', 'Rn']
466# Backwards compatibility:
467def formula_hill(numbers, empirical=False):
468 """Convert list of atomic numbers to a chemical formula as a string.
470 Elements are alphabetically ordered with C and H first.
472 If argument `empirical`, element counts will be divided by greatest common
473 divisor to yield an empirical formula"""
474 symbols = [chemical_symbols[Z] for Z in numbers]
475 f = Formula('', _tree=[(symbols, 1)])
476 if empirical:
477 f, _ = f.reduce()
478 return f.format('hill')
481# Backwards compatibility:
482def formula_metal(numbers, empirical=False):
483 """Convert list of atomic numbers to a chemical formula as a string.
485 Elements are alphabetically ordered with metals first.
487 If argument `empirical`, element counts will be divided by greatest common
488 divisor to yield an empirical formula"""
489 symbols = [chemical_symbols[Z] for Z in numbers]
490 f = Formula('', _tree=[(symbols, 1)])
491 if empirical:
492 f, _ = f.reduce()
493 return f.format('metal')