Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import numpy as np 

2from itertools import combinations_with_replacement 

3from math import erf 

4from scipy.spatial.distance import cdist 

5from ase.neighborlist import NeighborList 

6from ase.utils import pbc2pbc 

7 

8 

9class OFPComparator: 

10 """Implementation of comparison using Oganov's fingerprint (OFP) 

11 functions, based on: 

12 

13 * `Oganov, Valle, J. Chem. Phys. 130, 104504 (2009)`__ 

14 

15 __ https://doi.org/10.1063/1.3079326 

16 

17 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632`__ 

18 

19 __ https://doi.org/10.1016/j.cpc.2010.06.007 

20 

21 Parameters: 

22 

23 n_top: int or None 

24 The number of atoms to optimize (None = include all). 

25 

26 dE: float 

27 Energy difference above which two structures are 

28 automatically considered to be different. (Default 1 eV) 

29 

30 cos_dist_max: float 

31 Maximal cosine distance between two structures in 

32 order to be still considered the same structure. Default 5e-3 

33 

34 rcut: float 

35 Cutoff radius in Angstrom for the fingerprints. 

36 (Default 20 Angstrom) 

37 

38 binwidth: float 

39 Width in Angstrom of the bins over which the fingerprints 

40 are discretized. (Default 0.05 Angstrom) 

41 

42 pbc: list of three booleans or None 

43 Specifies whether to apply periodic boundary conditions 

44 along each of the three unit cell vectors when calculating 

45 the fingerprint. The default (None) is to apply PBCs in all 

46 3 directions. 

47 

48 Note: for isolated systems (pbc = [False, False, False]), 

49 the pair correlation function itself is always short-ranged 

50 (decays to zero beyond a certain radius), so unity is not 

51 subtracted for calculating the fingerprint. Also the 

52 volume normalization disappears. 

53 

54 maxdims: list of three floats or None 

55 If PBCs in only 1 or 2 dimensions are specified, the 

56 maximal thicknesses along the non-periodic directions can 

57 be specified here (the values given for the periodic 

58 directions will not be used). If set to None (the 

59 default), the length of the cell vector along the 

60 non-periodic direction is used. 

61 

62 Note: in this implementation, the cell vectors are 

63 assumed to be orthogonal. 

64 

65 sigma: float 

66 Standard deviation of the gaussian smearing to be applied 

67 in the calculation of the fingerprints (in 

68 Angstrom). Default 0.02 Angstrom. 

69 

70 nsigma: int 

71 Distance (as the number of standard deviations sigma) at 

72 which the gaussian smearing is cut off (i.e. no smearing 

73 beyond that distance). (Default 4) 

74 

75 recalculate: boolean 

76 If True, ignores the fingerprints stored in 

77 atoms.info and recalculates them. (Default False) 

78 

79 """ 

80 

81 def __init__(self, n_top=None, dE=1.0, cos_dist_max=5e-3, rcut=20., 

82 binwidth=0.05, sigma=0.02, nsigma=4, pbc=True, 

83 maxdims=None, recalculate=False): 

84 self.n_top = n_top or 0 

85 self.dE = dE 

86 self.cos_dist_max = cos_dist_max 

87 self.rcut = rcut 

88 self.binwidth = binwidth 

89 self.pbc = pbc2pbc(pbc) 

90 

91 if maxdims is None: 

92 self.maxdims = [None] * 3 

93 else: 

94 self.maxdims = maxdims 

95 

96 self.sigma = sigma 

97 self.nsigma = nsigma 

98 self.recalculate = recalculate 

99 self.dimensions = self.pbc.sum() 

100 

101 if self.dimensions == 1 or self.dimensions == 2: 

102 for direction in range(3): 

103 if not self.pbc[direction]: 

104 if self.maxdims[direction] is not None: 

105 if self.maxdims[direction] <= 0: 

106 e = '''If a max thickness is specificed in maxdims 

107 for a non-periodic direction, it has to be 

108 strictly positive.''' 

109 raise ValueError(e) 

110 

111 def looks_like(self, a1, a2): 

112 """ Return if structure a1 or a2 are similar or not. """ 

113 if len(a1) != len(a2): 

114 raise Exception('The two configurations are not the same size.') 

115 

116 # first we check the energy criteria 

117 if a1.calc is not None and a2.calc is not None: 

118 dE = abs(a1.get_potential_energy() - a2.get_potential_energy()) 

119 if dE >= self.dE: 

120 return False 

121 

122 # then we check the structure 

123 cos_dist = self._compare_structure(a1, a2) 

124 verdict = cos_dist < self.cos_dist_max 

125 return verdict 

126 

127 def _json_encode(self, fingerprints, typedic): 

128 """ json does not accept tuples nor integers as dict keys, 

129 so in order to write the fingerprints to atoms.info, we need 

130 to convert them to strings """ 

131 fingerprints_encoded = {} 

132 for key, val in fingerprints.items(): 

133 try: 

134 newkey = "_".join(map(str, list(key))) 

135 except TypeError: 

136 newkey = str(key) 

137 if isinstance(val, dict): 

138 fingerprints_encoded[newkey] = {} 

139 for key2, val2 in val.items(): 

140 fingerprints_encoded[newkey][str(key2)] = val2 

141 else: 

142 fingerprints_encoded[newkey] = val 

143 typedic_encoded = {} 

144 for key, val in typedic.items(): 

145 newkey = str(key) 

146 typedic_encoded[newkey] = val 

147 return [fingerprints_encoded, typedic_encoded] 

148 

149 def _json_decode(self, fingerprints, typedic): 

150 """ This is the reverse operation of _json_encode """ 

151 fingerprints_decoded = {} 

152 for key, val in fingerprints.items(): 

153 newkey = list(map(int, key.split("_"))) 

154 if len(newkey) > 1: 

155 newkey = tuple(newkey) 

156 else: 

157 newkey = newkey[0] 

158 

159 if isinstance(val, dict): 

160 fingerprints_decoded[newkey] = {} 

161 for key2, val2 in val.items(): 

162 fingerprints_decoded[newkey][int(key2)] = np.array(val2) 

163 else: 

164 fingerprints_decoded[newkey] = np.array(val) 

165 typedic_decoded = {} 

166 for key, val in typedic.items(): 

167 newkey = int(key) 

168 typedic_decoded[newkey] = val 

169 return [fingerprints_decoded, typedic_decoded] 

170 

171 def _compare_structure(self, a1, a2): 

172 """ Returns the cosine distance between the two structures, 

173 using their fingerprints. """ 

174 

175 if len(a1) != len(a2): 

176 raise Exception('The two configurations are not the same size.') 

177 

178 a1top = a1[-self.n_top:] 

179 a2top = a2[-self.n_top:] 

180 

181 if 'fingerprints' in a1.info and not self.recalculate: 

182 fp1, typedic1 = a1.info['fingerprints'] 

183 fp1, typedic1 = self._json_decode(fp1, typedic1) 

184 else: 

185 fp1, typedic1 = self._take_fingerprints(a1top) 

186 a1.info['fingerprints'] = self._json_encode(fp1, typedic1) 

187 

188 if 'fingerprints' in a2.info and not self.recalculate: 

189 fp2, typedic2 = a2.info['fingerprints'] 

190 fp2, typedic2 = self._json_decode(fp2, typedic2) 

191 else: 

192 fp2, typedic2 = self._take_fingerprints(a2top) 

193 a2.info['fingerprints'] = self._json_encode(fp2, typedic2) 

194 

195 if sorted(fp1) != sorted(fp2): 

196 raise AssertionError('The two structures have fingerprints ' 

197 'with different compounds.') 

198 for key in typedic1: 

199 if not np.array_equal(typedic1[key], typedic2[key]): 

200 raise AssertionError('The two structures have a different ' 

201 'stoichiometry or ordering!') 

202 

203 cos_dist = self._cosine_distance(fp1, fp2, typedic1) 

204 return cos_dist 

205 

206 def _get_volume(self, a): 

207 ''' Calculates the normalizing value, and other parameters 

208 (pmin,pmax,qmin,qmax) that are used for surface area calculation 

209 in the case of 1 or 2-D periodicity.''' 

210 

211 cell = a.get_cell() 

212 scalpos = a.get_scaled_positions() 

213 

214 # defaults: 

215 volume = 1. 

216 pmin, pmax, qmin, qmax = [0.] * 4 

217 

218 if self.dimensions == 1 or self.dimensions == 2: 

219 for direction in range(3): 

220 if not self.pbc[direction]: 

221 if self.maxdims[direction] is None: 

222 maxdim = np.linalg.norm(cell[direction, :]) 

223 self.maxdims[direction] = maxdim 

224 

225 pbc_dirs = [i for i in range(3) if self.pbc[i]] 

226 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]] 

227 

228 if self.dimensions == 3: 

229 volume = abs(np.dot(np.cross(cell[0, :], cell[1, :]), cell[2, :])) 

230 

231 elif self.dimensions == 2: 

232 non_pbc_dir = non_pbc_dirs[0] 

233 

234 a = np.cross(cell[pbc_dirs[0], :], cell[pbc_dirs[1], :]) 

235 b = self.maxdims[non_pbc_dir] 

236 b /= np.linalg.norm(cell[non_pbc_dir, :]) 

237 

238 volume = np.abs(np.dot(a, b * cell[non_pbc_dir, :])) 

239 

240 maxpos = np.max(scalpos[:, non_pbc_dir]) 

241 minpos = np.min(scalpos[:, non_pbc_dir]) 

242 pwidth = maxpos - minpos 

243 pmargin = 0.5 * (b - pwidth) 

244 # note: here is a place where we assume that the 

245 # non-periodic direction is orthogonal to the periodic ones: 

246 pmin = np.min(scalpos[:, non_pbc_dir]) - pmargin 

247 pmin *= np.linalg.norm(cell[non_pbc_dir, :]) 

248 pmax = np.max(scalpos[:, non_pbc_dir]) + pmargin 

249 pmax *= np.linalg.norm(cell[non_pbc_dir, :]) 

250 

251 elif self.dimensions == 1: 

252 pbc_dir = pbc_dirs[0] 

253 

254 v0 = cell[non_pbc_dirs[0], :] 

255 b0 = self.maxdims[non_pbc_dirs[0]] 

256 b0 /= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

257 v1 = cell[non_pbc_dirs[1], :] 

258 b1 = self.maxdims[non_pbc_dirs[1]] 

259 b1 /= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

260 

261 volume = np.abs(np.dot(np.cross(b0 * v0, b1 * v1), 

262 cell[pbc_dir, :])) 

263 

264 # note: here is a place where we assume that the 

265 # non-periodic direction is orthogonal to the periodic ones: 

266 maxpos = np.max(scalpos[:, non_pbc_dirs[0]]) 

267 minpos = np.min(scalpos[:, non_pbc_dirs[0]]) 

268 pwidth = maxpos - minpos 

269 pmargin = 0.5 * (b0 - pwidth) 

270 

271 pmin = np.min(scalpos[:, non_pbc_dirs[0]]) - pmargin 

272 pmin *= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

273 pmax = np.max(scalpos[:, non_pbc_dirs[0]]) + pmargin 

274 pmax *= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

275 

276 maxpos = np.max(scalpos[:, non_pbc_dirs[1]]) 

277 minpos = np.min(scalpos[:, non_pbc_dirs[1]]) 

278 qwidth = maxpos - minpos 

279 qmargin = 0.5 * (b1 - qwidth) 

280 

281 qmin = np.min(scalpos[:, non_pbc_dirs[1]]) - qmargin 

282 qmin *= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

283 qmax = np.max(scalpos[:, non_pbc_dirs[1]]) + qmargin 

284 qmax *= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

285 

286 elif self.dimensions == 0: 

287 volume = 1. 

288 

289 return [volume, pmin, pmax, qmin, qmax] 

290 

291 def _take_fingerprints(self, atoms, individual=False): 

292 """ Returns a [fingerprints,typedic] list, where fingerprints 

293 is a dictionary with the fingerprints, and typedic is a 

294 dictionary with the list of atom indices for each element 

295 (or "type") in the atoms object. 

296 The keys in the fingerprints dictionary are the (A,B) tuples, 

297 which are the different element-element combinations in the 

298 atoms object (A and B are the atomic numbers). 

299 When A != B, the (A,B) tuple is sorted (A < B). 

300 

301 If individual=True, a dict is returned, where each atom index 

302 has an {atomic_number:fingerprint} dict as value. 

303 If individual=False, the fingerprints from atoms of the same 

304 atomic number are added together.""" 

305 

306 pos = atoms.get_positions() 

307 num = atoms.get_atomic_numbers() 

308 cell = atoms.get_cell() 

309 

310 unique_types = np.unique(num) 

311 posdic = {} 

312 typedic = {} 

313 for t in unique_types: 

314 tlist = [i for i, atom in enumerate(atoms) if atom.number == t] 

315 typedic[t] = tlist 

316 posdic[t] = pos[tlist] 

317 

318 # determining the volume normalization and other parameters 

319 volume, pmin, pmax, qmin, qmax = self._get_volume(atoms) 

320 

321 # functions for calculating the surface area 

322 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]] 

323 

324 def surface_area_0d(r): 

325 return 4 * np.pi * (r**2) 

326 

327 def surface_area_1d(r, pos): 

328 q0 = pos[non_pbc_dirs[1]] 

329 phi1 = np.lib.scimath.arccos((qmax - q0) / r).real 

330 phi2 = np.pi - np.lib.scimath.arccos((qmin - q0) / r).real 

331 factor = 1 - (phi1 + phi2) / np.pi 

332 return surface_area_2d(r, pos) * factor 

333 

334 def surface_area_2d(r, pos): 

335 p0 = pos[non_pbc_dirs[0]] 

336 area = np.minimum(pmax - p0, r) + np.minimum(p0 - pmin, r) 

337 area *= 2 * np.pi * r 

338 return area 

339 

340 def surface_area_3d(r): 

341 return 4 * np.pi * (r**2) 

342 

343 # build neighborlist 

344 # this is computationally the most intensive part 

345 a = atoms.copy() 

346 a.set_pbc(self.pbc) 

347 nl = NeighborList([self.rcut / 2.] * len(a), skin=0., 

348 self_interaction=False, bothways=True) 

349 nl.update(a) 

350 

351 # parameters for the binning: 

352 m = int(np.ceil(self.nsigma * self.sigma / self.binwidth)) 

353 x = 0.25 * np.sqrt(2) * self.binwidth * (2 * m + 1) * 1. / self.sigma 

354 smearing_norm = erf(x) 

355 nbins = int(np.ceil(self.rcut * 1. / self.binwidth)) 

356 bindist = self.binwidth * np.arange(1, nbins + 1) 

357 

358 def take_individual_rdf(index, unique_type): 

359 # Computes the radial distribution function of atoms 

360 # of type unique_type around the atom with index "index". 

361 rdf = np.zeros(nbins) 

362 

363 if self.dimensions == 3: 

364 weights = 1. / surface_area_3d(bindist) 

365 elif self.dimensions == 2: 

366 weights = 1. / surface_area_2d(bindist, pos[index]) 

367 elif self.dimensions == 1: 

368 weights = 1. / surface_area_1d(bindist, pos[index]) 

369 elif self.dimensions == 0: 

370 weights = 1. / surface_area_0d(bindist) 

371 weights /= self.binwidth 

372 

373 indices, offsets = nl.get_neighbors(index) 

374 valid = np.where(num[indices] == unique_type) 

375 p = pos[indices[valid]] + np.dot(offsets[valid], cell) 

376 r = cdist(p, [pos[index]]) 

377 bins = np.floor(r / self.binwidth) 

378 

379 for i in range(-m, m + 1): 

380 newbins = bins + i 

381 valid = np.where((newbins >= 0) & (newbins < nbins)) 

382 valid_bins = newbins[valid].astype(int) 

383 values = weights[valid_bins] 

384 

385 c = 0.25 * np.sqrt(2) * self.binwidth * 1. / self.sigma 

386 values *= 0.5 * erf(c * (2 * i + 1)) - \ 

387 0.5 * erf(c * (2 * i - 1)) 

388 values /= smearing_norm 

389 

390 for j, valid_bin in enumerate(valid_bins): 

391 rdf[valid_bin] += values[j] 

392 

393 rdf /= len(typedic[unique_type]) * 1. / volume 

394 return rdf 

395 

396 fingerprints = {} 

397 if individual: 

398 for i in range(len(atoms)): 

399 fingerprints[i] = {} 

400 for unique_type in unique_types: 

401 fingerprint = take_individual_rdf(i, unique_type) 

402 if self.dimensions > 0: 

403 fingerprint -= 1 

404 fingerprints[i][unique_type] = fingerprint 

405 else: 

406 for t1, t2 in combinations_with_replacement(unique_types, r=2): 

407 key = (t1, t2) 

408 fingerprint = np.zeros(nbins) 

409 for i in typedic[t1]: 

410 fingerprint += take_individual_rdf(i, t2) 

411 fingerprint /= len(typedic[t1]) 

412 if self.dimensions > 0: 

413 fingerprint -= 1 

414 fingerprints[key] = fingerprint 

415 

416 return [fingerprints, typedic] 

417 

418 def _calculate_local_orders(self, individual_fingerprints, typedic, 

419 volume): 

420 """ Returns a list with the local order for every atom, 

421 using the definition of local order from 

422 Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632 

423 https://doi.org/10.1016/j.cpc.2010.06.007""" 

424 

425 # total number of atoms: 

426 n_tot = sum([len(typedic[key]) for key in typedic]) 

427 

428 local_orders = [] 

429 for index, fingerprints in individual_fingerprints.items(): 

430 local_order = 0 

431 for unique_type, fingerprint in fingerprints.items(): 

432 term = np.linalg.norm(fingerprint)**2 

433 term *= self.binwidth 

434 term *= (volume * 1. / n_tot)**3 

435 term *= len(typedic[unique_type]) * 1. / n_tot 

436 local_order += term 

437 local_orders.append(np.sqrt(local_order)) 

438 

439 return local_orders 

440 

441 def get_local_orders(self, a): 

442 """ Returns the local orders of all the atoms.""" 

443 

444 a_top = a[-self.n_top:] 

445 key = 'individual_fingerprints' 

446 

447 if key in a.info and not self.recalculate: 

448 fp, typedic = self._json_decode(*a.info[key]) 

449 else: 

450 fp, typedic = self._take_fingerprints(a_top, individual=True) 

451 a.info[key] = self._json_encode(fp, typedic) 

452 

453 volume, pmin, pmax, qmin, qmax = self._get_volume(a_top) 

454 return self._calculate_local_orders(fp, typedic, volume) 

455 

456 def _cosine_distance(self, fp1, fp2, typedic): 

457 """ Returns the cosine distance from two fingerprints. 

458 It also needs information about the number of atoms from 

459 each element, which is included in "typedic".""" 

460 

461 keys = sorted(fp1) 

462 

463 # calculating the weights: 

464 w = {} 

465 wtot = 0 

466 for key in keys: 

467 weight = len(typedic[key[0]]) * len(typedic[key[1]]) 

468 wtot += weight 

469 w[key] = weight 

470 for key in keys: 

471 w[key] *= 1. / wtot 

472 

473 # calculating the fingerprint norms: 

474 norm1 = 0 

475 norm2 = 0 

476 for key in keys: 

477 norm1 += (np.linalg.norm(fp1[key])**2) * w[key] 

478 norm2 += (np.linalg.norm(fp2[key])**2) * w[key] 

479 norm1 = np.sqrt(norm1) 

480 norm2 = np.sqrt(norm2) 

481 

482 # calculating the distance: 

483 distance = 0 

484 for key in keys: 

485 distance += np.sum(fp1[key] * fp2[key]) * w[key] / (norm1 * norm2) 

486 

487 distance = 0.5 * (1 - distance) 

488 return distance 

489 

490 def plot_fingerprints(self, a, prefix=''): 

491 """ Function for quickly plotting all the fingerprints. 

492 Prefix = a prefix you want to give to the resulting PNG file.""" 

493 try: 

494 import matplotlib.pyplot as plt 

495 except ImportError: 

496 Warning("Matplotlib could not be loaded - plotting won't work") 

497 raise 

498 

499 if 'fingerprints' in a.info and not self.recalculate: 

500 fp, typedic = a.info['fingerprints'] 

501 fp, typedic = self._json_decode(fp, typedic) 

502 else: 

503 a_top = a[-self.n_top:] 

504 fp, typedic = self._take_fingerprints(a_top) 

505 a.info['fingerprints'] = self._json_encode(fp, typedic) 

506 

507 npts = int(np.ceil(self.rcut * 1. / self.binwidth)) 

508 x = np.linspace(0, self.rcut, npts, endpoint=False) 

509 

510 for key, val in fp.items(): 

511 plt.plot(x, val) 

512 suffix = "_fp_{0}_{1}.png".format(key[0], key[1]) 

513 plt.savefig(prefix + suffix) 

514 plt.clf() 

515 

516 def plot_individual_fingerprints(self, a, prefix=''): 

517 """ Function for plotting all the individual fingerprints. 

518 Prefix = a prefix for the resulting PNG file.""" 

519 try: 

520 import matplotlib.pyplot as plt 

521 except ImportError: 

522 Warning("Matplotlib could not be loaded - plotting won't work") 

523 raise 

524 

525 if 'individual_fingerprints' in a.info and not self.recalculate: 

526 fp, typedic = a.info['individual_fingerprints'] 

527 else: 

528 a_top = a[-self.n_top:] 

529 fp, typedic = self._take_fingerprints(a_top, individual=True) 

530 a.info['individual_fingerprints'] = [fp, typedic] 

531 

532 npts = int(np.ceil(self.rcut * 1. / self.binwidth)) 

533 x = np.linspace(0, self.rcut, npts, endpoint=False) 

534 

535 for key, val in fp.items(): 

536 for key2, val2 in val.items(): 

537 plt.plot(x, val2) 

538 plt.ylim([-1, 10]) 

539 suffix = "_individual_fp_{0}_{1}.png".format(key, key2) 

540 plt.savefig(prefix + suffix) 

541 plt.clf()