Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from math import gcd 

2import re 

3from typing import Dict, Tuple, List, Sequence, Union 

4 

5from ase.data import chemical_symbols, atomic_numbers 

6 

7 

8# For type hints (A, A2, A+B): 

9Tree = Union[str, Tuple['Tree', int], List['Tree']] # type: ignore 

10 

11 

12class Formula: 

13 def __init__(self, 

14 formula: str = '', 

15 *, 

16 strict: bool = False, 

17 format: str = '', 

18 _tree: Tree = None, 

19 _count: Dict[str, int] = None): 

20 """Chemical formula object. 

21 

22 Parameters 

23 ---------- 

24 formula: str 

25 Text string representation of formula. Examples: ``'6CO2'``, 

26 ``'30Cu+2CO'``, ``'Pt(CO)6'``. 

27 strict: bool 

28 Only allow real chemical symbols. 

29 format: str 

30 Reorder according to *format*. Must be one of hill, metal, 

31 abc or reduce. 

32 

33 Examples 

34 -------- 

35 >>> from ase.formula import Formula 

36 >>> w = Formula('H2O') 

37 >>> w.count() 

38 {'H': 2, 'O': 1} 

39 >>> 'H' in w 

40 True 

41 >>> w == 'HOH' 

42 True 

43 >>> f'{w:latex}' 

44 'H$_{2}$O' 

45 >>> w.format('latex') 

46 'H$_{2}$O' 

47 >>> divmod(6 * w + 'Cu', w) 

48 (6, Formula('Cu')) 

49 

50 Raises 

51 ------ 

52 ValueError 

53 on malformed formula 

54 """ 

55 if format: 

56 assert _tree is None and _count is None 

57 if format not in {'hill', 'metal', 'abc', 'reduce'}: 

58 raise ValueError(f'Illegal format: {format}') 

59 formula = Formula(formula).format(format) 

60 self._formula = formula 

61 self._tree = _tree or parse(formula) 

62 self._count = _count or count_tree(self._tree) 

63 if strict: 

64 for symbol in self._count: 

65 if symbol not in atomic_numbers: 

66 raise ValueError('Unknown chemical symbol: ' + symbol) 

67 

68 def convert(self, fmt: str) -> 'Formula': 

69 """Reformat this formula as a new Formula. 

70 

71 Same formatting rules as Formula(format=...) keyword. 

72 """ 

73 return Formula(self._formula, format=fmt) 

74 

75 def count(self) -> Dict[str, int]: 

76 """Return dictionary mapping chemical symbol to number of atoms. 

77 

78 Example 

79 ------- 

80 >>> Formula('H2O').count() 

81 {'H': 2, 'O': 1} 

82 """ 

83 return self._count.copy() 

84 

85 def reduce(self) -> Tuple['Formula', int]: 

86 """Reduce formula. 

87 

88 Returns 

89 ------- 

90 formula: Formula 

91 Reduced formula. 

92 n: int 

93 Number of reduced formula units. 

94 

95 Example 

96 ------- 

97 >>> Formula('2H2O').reduce() 

98 (Formula('H2O'), 2) 

99 """ 

100 dct, N = self._reduce() 

101 return self.from_dict(dct), N 

102 

103 def stoichiometry(self) -> Tuple['Formula', 'Formula', int]: 

104 """Reduce to unique stoichiomerty using "chemical symbols" A, B, C, ... 

105 

106 Examples 

107 -------- 

108 >>> Formula('CO2').stoichiometry() 

109 (Formula('AB2'), Formula('CO2'), 1) 

110 >>> Formula('(H2O)4').stoichiometry() 

111 (Formula('AB2'), Formula('OH2'), 4) 

112 """ 

113 count1, N = self._reduce() 

114 c = ord('A') 

115 count2 = {} 

116 count3 = {} 

117 for n, symb in sorted((n, symb) 

118 for symb, n in count1.items()): 

119 count2[chr(c)] = n 

120 count3[symb] = n 

121 c += 1 

122 return self.from_dict(count2), self.from_dict(count3), N 

123 

124 def format(self, fmt: str = '') -> str: 

125 """Format formula as string. 

126 

127 Formats: 

128 

129 * ``'hill'``: alphabetically ordered with C and H first 

130 * ``'metal'``: alphabetically ordered with metals first 

131 * ``'abc'``: count ordered first then alphabetically ordered 

132 * ``'reduce'``: Reduce and keep order (ABBBC -> AB3C) 

133 * ``'latex'``: LaTeX representation 

134 * ``'html'``: HTML representation 

135 * ``'rest'``: reStructuredText representation 

136 

137 Example 

138 ------- 

139 >>> Formula('H2O').format('html') 

140 'H<sub>2</sub>O' 

141 """ 

142 return format(self, fmt) 

143 

144 def __format__(self, fmt: str) -> str: 

145 """Format Formula as str. 

146 

147 Possible formats: ``'hill'``, ``'metal'``, ``'abc'``, ``'reduce'``, 

148 ``'latex'``, ``'html'``, ``'rest'``. 

149 

150 Example 

151 ------- 

152 >>> f = Formula('OH2') 

153 >>> '{f}, {f:hill}, {f:latex}'.format(f=f) 

154 'OH2, H2O, OH$_{2}$' 

155 """ 

156 

157 if fmt == 'hill': 

158 count = self.count() 

159 count2 = {} 

160 for symb in 'CH': 

161 if symb in count: 

162 count2[symb] = count.pop(symb) 

163 for symb, n in sorted(count.items()): 

164 count2[symb] = n 

165 return dict2str(count2) 

166 

167 if fmt == 'metal': 

168 count = self.count() 

169 result2 = [(s, count.pop(s)) for s in non_metals if s in count] 

170 result = [(s, count[s]) for s in sorted(count)] 

171 result += sorted(result2) 

172 return dict2str(dict(result)) 

173 

174 if fmt == 'abc': 

175 _, f, N = self.stoichiometry() 

176 return dict2str({symb: n * N for symb, n in f._count.items()}) 

177 

178 if fmt == 'reduce': 

179 symbols = list(self) 

180 nsymb = len(symbols) 

181 parts = [] 

182 i1 = 0 

183 for i2, symbol in enumerate(symbols): 

184 if i2 == nsymb - 1 or symbol != symbols[i2 + 1]: 

185 parts.append(symbol) 

186 m = i2 + 1 - i1 

187 if m > 1: 

188 parts.append(str(m)) 

189 i1 = i2 + 1 

190 return ''.join(parts) 

191 

192 if fmt == 'latex': 

193 return self._tostr('$_{', '}$') 

194 

195 if fmt == 'html': 

196 return self._tostr('<sub>', '</sub>') 

197 

198 if fmt == 'rest': 

199 return self._tostr(r'\ :sub`', r'`\ ') 

200 

201 if fmt == '': 

202 return self._formula 

203 

204 raise ValueError('Invalid format specifier') 

205 

206 @staticmethod 

207 def from_dict(dct: Dict[str, int]) -> 'Formula': 

208 """Convert dict to Formula. 

209 

210 >>> Formula.from_dict({'H': 2}) 

211 Formula('H2') 

212 """ 

213 dct2 = {} 

214 for symb, n in dct.items(): 

215 if not (isinstance(symb, str) and isinstance(n, int) and n >= 0): 

216 raise ValueError('Bad dictionary: {dct}'.format(dct=dct)) 

217 if n > 0: # filter out n=0 symbols 

218 dct2[symb] = n 

219 return Formula(dict2str(dct2), 

220 _tree=[([(symb, n) for symb, n in dct2.items()], 1)], 

221 _count=dct2) 

222 

223 @staticmethod 

224 def from_list(symbols: Sequence[str]) -> 'Formula': 

225 """Convert list of chemical symbols to Formula.""" 

226 return Formula(''.join(symbols), 

227 _tree=[(symbols[:], 1)]) 

228 

229 def __len__(self) -> int: 

230 """Number of atoms.""" 

231 return sum(self._count.values()) 

232 

233 def __getitem__(self, symb: str) -> int: 

234 """Number of atoms with chemical symbol *symb*.""" 

235 return self._count.get(symb, 0) 

236 

237 def __contains__(self, f: Union[str, 'Formula']) -> bool: 

238 """Check if formula contains chemical symbols in *f*. 

239 

240 Type of *f* must be str or Formula. 

241 

242 Examples 

243 -------- 

244 >>> 'OH' in Formula('H2O') 

245 True 

246 >>> 'O2' in Formula('H2O') 

247 False 

248 """ 

249 if isinstance(f, str): 

250 f = Formula(f) 

251 for symb, n in f._count.items(): 

252 if self[symb] < n: 

253 return False 

254 return True 

255 

256 def __eq__(self, other) -> bool: 

257 """Equality check. 

258 

259 Note that order is not important. 

260 

261 Example 

262 ------- 

263 >>> Formula('CO') == Formula('OC') 

264 True 

265 """ 

266 if isinstance(other, str): 

267 other = Formula(other) 

268 elif not isinstance(other, Formula): 

269 return False 

270 return self._count == other._count 

271 

272 def __add__(self, other: Union[str, 'Formula']) -> 'Formula': 

273 """Add two formulas.""" 

274 if not isinstance(other, str): 

275 other = other._formula 

276 return Formula(self._formula + '+' + other) 

277 

278 def __radd__(self, other: str): # -> Formula 

279 return Formula(other) + self 

280 

281 def __mul__(self, N: int) -> 'Formula': 

282 """Repeat formula `N` times.""" 

283 if N == 0: 

284 return Formula('') 

285 return self.from_dict({symb: n * N 

286 for symb, n in self._count.items()}) 

287 

288 def __rmul__(self, N: int): # -> Formula 

289 return self * N 

290 

291 def __divmod__(self, 

292 other: Union['Formula', str]) -> Tuple[int, 'Formula']: 

293 """Return the tuple (self // other, self % other). 

294 

295 Invariant:: 

296 

297 div, mod = divmod(self, other) 

298 div * other + mod == self 

299 

300 Example 

301 ------- 

302 >>> divmod(Formula('H2O'), 'H') 

303 (2, Formula('O')) 

304 """ 

305 if isinstance(other, str): 

306 other = Formula(other) 

307 N = min(self[symb] // n for symb, n in other._count.items()) 

308 dct = self.count() 

309 if N: 

310 for symb, n in other._count.items(): 

311 dct[symb] -= n * N 

312 if dct[symb] == 0: 

313 del dct[symb] 

314 return N, self.from_dict(dct) 

315 

316 def __rdivmod__(self, other): 

317 return divmod(Formula(other), self) 

318 

319 def __mod__(self, other): 

320 return divmod(self, other)[1] 

321 

322 def __rmod__(self, other): 

323 return Formula(other) % self 

324 

325 def __floordiv__(self, other): 

326 return divmod(self, other)[0] 

327 

328 def __rfloordiv__(self, other): 

329 return Formula(other) // self 

330 

331 def __iter__(self, tree=None): 

332 if tree is None: 

333 tree = self._tree 

334 if isinstance(tree, str): 

335 yield tree 

336 elif isinstance(tree, tuple): 

337 tree, N = tree 

338 for _ in range(N): 

339 yield from self.__iter__(tree) 

340 else: 

341 for tree in tree: 

342 yield from self.__iter__(tree) 

343 

344 def __str__(self): 

345 return self._formula 

346 

347 def __repr__(self): 

348 return 'Formula({!r})'.format(self._formula) 

349 

350 def _reduce(self): 

351 N = 0 

352 for n in self._count.values(): 

353 if N == 0: 

354 N = n 

355 else: 

356 N = gcd(n, N) 

357 dct = {symb: n // N for symb, n in self._count.items()} 

358 return dct, N 

359 

360 def _tostr(self, sub1, sub2): 

361 parts = [] 

362 for tree, n in self._tree: 

363 s = tree2str(tree, sub1, sub2) 

364 if s[0] == '(' and s[-1] == ')': 

365 s = s[1:-1] 

366 if n > 1: 

367 s = str(n) + s 

368 parts.append(s) 

369 return '+'.join(parts) 

370 

371 

372def dict2str(dct): 

373 return ''.join(symb + (str(n) if n > 1 else '') 

374 for symb, n in dct.items()) 

375 

376 

377def parse(f: str): # -> Tree 

378 if not f: 

379 return [] 

380 parts = f.split('+') 

381 result = [] 

382 for part in parts: 

383 n, f = strip_number(part) 

384 result.append((parse2(f), n)) 

385 return result 

386 

387 

388def parse2(f: str) -> Tree: 

389 units = [] 

390 while f: 

391 unit: Union[str, Tuple[str, int], Tree] 

392 if f[0] == '(': 

393 level = 0 

394 for i, c in enumerate(f[1:], 1): 

395 if c == '(': 

396 level += 1 

397 elif c == ')': 

398 if level == 0: 

399 break 

400 level -= 1 

401 else: 

402 raise ValueError 

403 f2 = f[1:i] 

404 n, f = strip_number(f[i + 1:]) 

405 unit = (parse2(f2), n) 

406 else: 

407 m = re.match('([A-Z][a-z]?)([0-9]*)', f) 

408 if m is None: 

409 raise ValueError 

410 symb = m.group(1) 

411 number = m.group(2) 

412 if number: 

413 unit = (symb, int(number)) 

414 else: 

415 unit = symb 

416 f = f[m.end():] 

417 units.append(unit) 

418 if len(units) == 1: 

419 return unit 

420 return units 

421 

422 

423def strip_number(s: str) -> Tuple[int, str]: 

424 m = re.match('[0-9]*', s) 

425 assert m is not None 

426 return int(m.group() or 1), s[m.end():] 

427 

428 

429def tree2str(tree: Tree, 

430 sub1: str, sub2: str) -> str: 

431 if isinstance(tree, str): 

432 return tree 

433 if isinstance(tree, tuple): 

434 tree, N = tree 

435 s = tree2str(tree, sub1, sub2) 

436 if N == 1: 

437 if s[0] == '(' and s[-1] == ')': 

438 return s[1:-1] 

439 return s 

440 return s + sub1 + str(N) + sub2 

441 return '(' + ''.join(tree2str(tree, sub1, sub2) for tree in tree) + ')' 

442 

443 

444def count_tree(tree: Tree) -> Dict[str, int]: 

445 if isinstance(tree, str): 

446 return {tree: 1} 

447 if isinstance(tree, tuple): 

448 tree, N = tree 

449 return {symb: n * N for symb, n in count_tree(tree).items()} 

450 dct = {} # type: Dict[str, int] 

451 for tree in tree: 

452 for symb, n in count_tree(tree).items(): 

453 m = dct.get(symb, 0) 

454 dct[symb] = m + n 

455 return dct 

456 

457 

458# non metals, half-metals/metalloid, halogen, noble gas: 

459non_metals = ['H', 'He', 'B', 'C', 'N', 'O', 'F', 'Ne', 

460 'Si', 'P', 'S', 'Cl', 'Ar', 

461 'Ge', 'As', 'Se', 'Br', 'Kr', 

462 'Sb', 'Te', 'I', 'Xe', 

463 'Po', 'At', 'Rn'] 

464 

465 

466# Backwards compatibility: 

467def formula_hill(numbers, empirical=False): 

468 """Convert list of atomic numbers to a chemical formula as a string. 

469 

470 Elements are alphabetically ordered with C and H first. 

471 

472 If argument `empirical`, element counts will be divided by greatest common 

473 divisor to yield an empirical formula""" 

474 symbols = [chemical_symbols[Z] for Z in numbers] 

475 f = Formula('', _tree=[(symbols, 1)]) 

476 if empirical: 

477 f, _ = f.reduce() 

478 return f.format('hill') 

479 

480 

481# Backwards compatibility: 

482def formula_metal(numbers, empirical=False): 

483 """Convert list of atomic numbers to a chemical formula as a string. 

484 

485 Elements are alphabetically ordered with metals first. 

486 

487 If argument `empirical`, element counts will be divided by greatest common 

488 divisor to yield an empirical formula""" 

489 symbols = [chemical_symbols[Z] for Z in numbers] 

490 f = Formula('', _tree=[(symbols, 1)]) 

491 if empirical: 

492 f, _ = f.reduce() 

493 return f.format('metal')