Coverage for /builds/debichem-team/python-ase/ase/ga/ofp_comparator.py: 50.33%
306 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
1from itertools import combinations_with_replacement
2from math import erf
4import matplotlib.pyplot as plt
5import numpy as np
6from scipy.spatial.distance import cdist
8from ase.neighborlist import NeighborList
9from ase.utils import pbc2pbc
12class OFPComparator:
13 """Implementation of comparison using Oganov's fingerprint (OFP)
14 functions, based on:
16 * :doi:`Oganov, Valle, J. Chem. Phys. 130, 104504 (2009)
17 <10.1063/1.3079326>`
19 * :doi:`Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632
20 <10.1016/j.cpc.2010.06.007>`
22 Parameters:
24 n_top: int or None
25 The number of atoms to optimize (None = include all).
27 dE: float
28 Energy difference above which two structures are
29 automatically considered to be different. (Default 1 eV)
31 cos_dist_max: float
32 Maximal cosine distance between two structures in
33 order to be still considered the same structure. Default 5e-3
35 rcut: float
36 Cutoff radius in Angstrom for the fingerprints.
37 (Default 20 Angstrom)
39 binwidth: float
40 Width in Angstrom of the bins over which the fingerprints
41 are discretized. (Default 0.05 Angstrom)
43 pbc: list of three booleans or None
44 Specifies whether to apply periodic boundary conditions
45 along each of the three unit cell vectors when calculating
46 the fingerprint. The default (None) is to apply PBCs in all
47 3 directions.
49 Note: for isolated systems (pbc = [False, False, False]),
50 the pair correlation function itself is always short-ranged
51 (decays to zero beyond a certain radius), so unity is not
52 subtracted for calculating the fingerprint. Also the
53 volume normalization disappears.
55 maxdims: list of three floats or None
56 If PBCs in only 1 or 2 dimensions are specified, the
57 maximal thicknesses along the non-periodic directions can
58 be specified here (the values given for the periodic
59 directions will not be used). If set to None (the
60 default), the length of the cell vector along the
61 non-periodic direction is used.
63 Note: in this implementation, the cell vectors are
64 assumed to be orthogonal.
66 sigma: float
67 Standard deviation of the gaussian smearing to be applied
68 in the calculation of the fingerprints (in
69 Angstrom). Default 0.02 Angstrom.
71 nsigma: int
72 Distance (as the number of standard deviations sigma) at
73 which the gaussian smearing is cut off (i.e. no smearing
74 beyond that distance). (Default 4)
76 recalculate: boolean
77 If True, ignores the fingerprints stored in
78 atoms.info and recalculates them. (Default False)
80 """
82 def __init__(self, n_top=None, dE=1.0, cos_dist_max=5e-3, rcut=20.,
83 binwidth=0.05, sigma=0.02, nsigma=4, pbc=True,
84 maxdims=None, recalculate=False):
85 self.n_top = n_top or 0
86 self.dE = dE
87 self.cos_dist_max = cos_dist_max
88 self.rcut = rcut
89 self.binwidth = binwidth
90 self.pbc = pbc2pbc(pbc)
92 if maxdims is None:
93 self.maxdims = [None] * 3
94 else:
95 self.maxdims = maxdims
97 self.sigma = sigma
98 self.nsigma = nsigma
99 self.recalculate = recalculate
100 self.dimensions = self.pbc.sum()
102 if self.dimensions == 1 or self.dimensions == 2:
103 for direction in range(3):
104 if not self.pbc[direction]:
105 if self.maxdims[direction] is not None:
106 if self.maxdims[direction] <= 0:
107 e = '''If a max thickness is specificed in maxdims
108 for a non-periodic direction, it has to be
109 strictly positive.'''
110 raise ValueError(e)
112 def looks_like(self, a1, a2):
113 """ Return if structure a1 or a2 are similar or not. """
114 if len(a1) != len(a2):
115 raise Exception('The two configurations are not the same size.')
117 # first we check the energy criteria
118 if a1.calc is not None and a2.calc is not None:
119 dE = abs(a1.get_potential_energy() - a2.get_potential_energy())
120 if dE >= self.dE:
121 return False
123 # then we check the structure
124 cos_dist = self._compare_structure(a1, a2)
125 verdict = cos_dist < self.cos_dist_max
126 return verdict
128 def _json_encode(self, fingerprints, typedic):
129 """ json does not accept tuples nor integers as dict keys,
130 so in order to write the fingerprints to atoms.info, we need
131 to convert them to strings """
132 fingerprints_encoded = {}
133 for key, val in fingerprints.items():
134 try:
135 newkey = "_".join(map(str, list(key)))
136 except TypeError:
137 newkey = str(key)
138 if isinstance(val, dict):
139 fingerprints_encoded[newkey] = {
140 str(key2): val2 for key2, val2 in val.items()}
141 else:
142 fingerprints_encoded[newkey] = val
143 typedic_encoded = {}
144 for key, val in typedic.items():
145 newkey = str(key)
146 typedic_encoded[newkey] = val
147 return [fingerprints_encoded, typedic_encoded]
149 def _json_decode(self, fingerprints, typedic):
150 """ This is the reverse operation of _json_encode """
151 fingerprints_decoded = {}
152 for key, val in fingerprints.items():
153 newkey = list(map(int, key.split("_")))
154 if len(newkey) > 1:
155 newkey = tuple(newkey)
156 else:
157 newkey = newkey[0]
159 if isinstance(val, dict):
160 fingerprints_decoded[newkey] = {
161 int(key2): np.array(val2) for key2, val2 in val.items()
162 }
163 else:
164 fingerprints_decoded[newkey] = np.array(val)
165 typedic_decoded = {}
166 for key, val in typedic.items():
167 newkey = int(key)
168 typedic_decoded[newkey] = val
169 return [fingerprints_decoded, typedic_decoded]
171 def _compare_structure(self, a1, a2):
172 """ Returns the cosine distance between the two structures,
173 using their fingerprints. """
175 if len(a1) != len(a2):
176 raise Exception('The two configurations are not the same size.')
178 a1top = a1[-self.n_top:]
179 a2top = a2[-self.n_top:]
181 if 'fingerprints' in a1.info and not self.recalculate:
182 fp1, typedic1 = a1.info['fingerprints']
183 fp1, typedic1 = self._json_decode(fp1, typedic1)
184 else:
185 fp1, typedic1 = self._take_fingerprints(a1top)
186 a1.info['fingerprints'] = self._json_encode(fp1, typedic1)
188 if 'fingerprints' in a2.info and not self.recalculate:
189 fp2, typedic2 = a2.info['fingerprints']
190 fp2, typedic2 = self._json_decode(fp2, typedic2)
191 else:
192 fp2, typedic2 = self._take_fingerprints(a2top)
193 a2.info['fingerprints'] = self._json_encode(fp2, typedic2)
195 if sorted(fp1) != sorted(fp2):
196 raise AssertionError('The two structures have fingerprints '
197 'with different compounds.')
198 for key in typedic1:
199 if not np.array_equal(typedic1[key], typedic2[key]):
200 raise AssertionError('The two structures have a different '
201 'stoichiometry or ordering!')
203 cos_dist = self._cosine_distance(fp1, fp2, typedic1)
204 return cos_dist
206 def _get_volume(self, a):
207 ''' Calculates the normalizing value, and other parameters
208 (pmin,pmax,qmin,qmax) that are used for surface area calculation
209 in the case of 1 or 2-D periodicity.'''
211 cell = a.get_cell()
212 scalpos = a.get_scaled_positions()
214 # defaults:
215 volume = 1.
216 pmin, pmax, qmin, qmax = [0.] * 4
218 if self.dimensions == 1 or self.dimensions == 2:
219 for direction in range(3):
220 if not self.pbc[direction]:
221 if self.maxdims[direction] is None:
222 maxdim = np.linalg.norm(cell[direction, :])
223 self.maxdims[direction] = maxdim
225 pbc_dirs = [i for i in range(3) if self.pbc[i]]
226 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]]
228 if self.dimensions == 3:
229 volume = abs(np.dot(np.cross(cell[0, :], cell[1, :]), cell[2, :]))
231 elif self.dimensions == 2:
232 non_pbc_dir = non_pbc_dirs[0]
234 a = np.cross(cell[pbc_dirs[0], :], cell[pbc_dirs[1], :])
235 b = self.maxdims[non_pbc_dir]
236 b /= np.linalg.norm(cell[non_pbc_dir, :])
238 volume = np.abs(np.dot(a, b * cell[non_pbc_dir, :]))
240 maxpos = np.max(scalpos[:, non_pbc_dir])
241 minpos = np.min(scalpos[:, non_pbc_dir])
242 pwidth = maxpos - minpos
243 pmargin = 0.5 * (b - pwidth)
244 # note: here is a place where we assume that the
245 # non-periodic direction is orthogonal to the periodic ones:
246 pmin = np.min(scalpos[:, non_pbc_dir]) - pmargin
247 pmin *= np.linalg.norm(cell[non_pbc_dir, :])
248 pmax = np.max(scalpos[:, non_pbc_dir]) + pmargin
249 pmax *= np.linalg.norm(cell[non_pbc_dir, :])
251 elif self.dimensions == 1:
252 pbc_dir = pbc_dirs[0]
254 v0 = cell[non_pbc_dirs[0], :]
255 b0 = self.maxdims[non_pbc_dirs[0]]
256 b0 /= np.linalg.norm(cell[non_pbc_dirs[0], :])
257 v1 = cell[non_pbc_dirs[1], :]
258 b1 = self.maxdims[non_pbc_dirs[1]]
259 b1 /= np.linalg.norm(cell[non_pbc_dirs[1], :])
261 volume = np.abs(np.dot(np.cross(b0 * v0, b1 * v1),
262 cell[pbc_dir, :]))
264 # note: here is a place where we assume that the
265 # non-periodic direction is orthogonal to the periodic ones:
266 maxpos = np.max(scalpos[:, non_pbc_dirs[0]])
267 minpos = np.min(scalpos[:, non_pbc_dirs[0]])
268 pwidth = maxpos - minpos
269 pmargin = 0.5 * (b0 - pwidth)
271 pmin = np.min(scalpos[:, non_pbc_dirs[0]]) - pmargin
272 pmin *= np.linalg.norm(cell[non_pbc_dirs[0], :])
273 pmax = np.max(scalpos[:, non_pbc_dirs[0]]) + pmargin
274 pmax *= np.linalg.norm(cell[non_pbc_dirs[0], :])
276 maxpos = np.max(scalpos[:, non_pbc_dirs[1]])
277 minpos = np.min(scalpos[:, non_pbc_dirs[1]])
278 qwidth = maxpos - minpos
279 qmargin = 0.5 * (b1 - qwidth)
281 qmin = np.min(scalpos[:, non_pbc_dirs[1]]) - qmargin
282 qmin *= np.linalg.norm(cell[non_pbc_dirs[1], :])
283 qmax = np.max(scalpos[:, non_pbc_dirs[1]]) + qmargin
284 qmax *= np.linalg.norm(cell[non_pbc_dirs[1], :])
286 elif self.dimensions == 0:
287 volume = 1.
289 return [volume, pmin, pmax, qmin, qmax]
291 def _take_fingerprints(self, atoms, individual=False):
292 """ Returns a [fingerprints,typedic] list, where fingerprints
293 is a dictionary with the fingerprints, and typedic is a
294 dictionary with the list of atom indices for each element
295 (or "type") in the atoms object.
296 The keys in the fingerprints dictionary are the (A,B) tuples,
297 which are the different element-element combinations in the
298 atoms object (A and B are the atomic numbers).
299 When A != B, the (A,B) tuple is sorted (A < B).
301 If individual=True, a dict is returned, where each atom index
302 has an {atomic_number:fingerprint} dict as value.
303 If individual=False, the fingerprints from atoms of the same
304 atomic number are added together."""
306 pos = atoms.get_positions()
307 num = atoms.get_atomic_numbers()
308 cell = atoms.get_cell()
310 unique_types = np.unique(num)
311 posdic = {}
312 typedic = {}
313 for t in unique_types:
314 tlist = [i for i, atom in enumerate(atoms) if atom.number == t]
315 typedic[t] = tlist
316 posdic[t] = pos[tlist]
318 # determining the volume normalization and other parameters
319 volume, pmin, pmax, qmin, qmax = self._get_volume(atoms)
321 # functions for calculating the surface area
322 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]]
324 def surface_area_0d(r):
325 return 4 * np.pi * (r**2)
327 def surface_area_1d(r, pos):
328 q0 = pos[non_pbc_dirs[1]]
329 phi1 = np.lib.scimath.arccos((qmax - q0) / r).real
330 phi2 = np.pi - np.lib.scimath.arccos((qmin - q0) / r).real
331 factor = 1 - (phi1 + phi2) / np.pi
332 return surface_area_2d(r, pos) * factor
334 def surface_area_2d(r, pos):
335 p0 = pos[non_pbc_dirs[0]]
336 area = np.minimum(pmax - p0, r) + np.minimum(p0 - pmin, r)
337 area *= 2 * np.pi * r
338 return area
340 def surface_area_3d(r):
341 return 4 * np.pi * (r**2)
343 # build neighborlist
344 # this is computationally the most intensive part
345 a = atoms.copy()
346 a.set_pbc(self.pbc)
347 nl = NeighborList([self.rcut / 2.] * len(a), skin=0.,
348 self_interaction=False, bothways=True)
349 nl.update(a)
351 # parameters for the binning:
352 m = int(np.ceil(self.nsigma * self.sigma / self.binwidth))
353 x = 0.25 * np.sqrt(2) * self.binwidth * (2 * m + 1) * 1. / self.sigma
354 smearing_norm = erf(x)
355 nbins = int(np.ceil(self.rcut * 1. / self.binwidth))
356 bindist = self.binwidth * np.arange(1, nbins + 1)
358 def take_individual_rdf(index, unique_type):
359 # Computes the radial distribution function of atoms
360 # of type unique_type around the atom with index "index".
361 rdf = np.zeros(nbins)
363 if self.dimensions == 3:
364 weights = 1. / surface_area_3d(bindist)
365 elif self.dimensions == 2:
366 weights = 1. / surface_area_2d(bindist, pos[index])
367 elif self.dimensions == 1:
368 weights = 1. / surface_area_1d(bindist, pos[index])
369 elif self.dimensions == 0:
370 weights = 1. / surface_area_0d(bindist)
371 weights /= self.binwidth
373 indices, offsets = nl.get_neighbors(index)
374 valid = np.where(num[indices] == unique_type)
375 p = pos[indices[valid]] + np.dot(offsets[valid], cell)
376 r = cdist(p, [pos[index]])
377 bins = np.floor(r / self.binwidth)
379 for i in range(-m, m + 1):
380 newbins = bins + i
381 valid = np.where((newbins >= 0) & (newbins < nbins))
382 valid_bins = newbins[valid].astype(int)
383 values = weights[valid_bins]
385 c = 0.25 * np.sqrt(2) * self.binwidth * 1. / self.sigma
386 values *= 0.5 * erf(c * (2 * i + 1)) - \
387 0.5 * erf(c * (2 * i - 1))
388 values /= smearing_norm
390 for j, valid_bin in enumerate(valid_bins):
391 rdf[valid_bin] += values[j]
393 rdf /= len(typedic[unique_type]) * 1. / volume
394 return rdf
396 fingerprints = {}
397 if individual:
398 for i in range(len(atoms)):
399 fingerprints[i] = {}
400 for unique_type in unique_types:
401 fingerprint = take_individual_rdf(i, unique_type)
402 if self.dimensions > 0:
403 fingerprint -= 1
404 fingerprints[i][unique_type] = fingerprint
405 else:
406 for t1, t2 in combinations_with_replacement(unique_types, r=2):
407 key = (t1, t2)
408 fingerprint = np.zeros(nbins)
409 for i in typedic[t1]:
410 fingerprint += take_individual_rdf(i, t2)
411 fingerprint /= len(typedic[t1])
412 if self.dimensions > 0:
413 fingerprint -= 1
414 fingerprints[key] = fingerprint
416 return [fingerprints, typedic]
418 def _calculate_local_orders(self, individual_fingerprints, typedic,
419 volume):
420 """ Returns a list with the local order for every atom,
421 using the definition of local order from
422 Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632
423 :doi:`10.1016/j.cpc.2010.06.007`"""
425 # total number of atoms:
426 n_tot = sum(len(typedic[key]) for key in typedic)
427 inv_n_tot = 1. / n_tot
429 local_orders = []
430 for fingerprints in individual_fingerprints.values():
431 local_order = 0
432 for unique_type, fingerprint in fingerprints.items():
433 term = np.linalg.norm(fingerprint)**2
434 term *= self.binwidth
435 term *= (volume * inv_n_tot)**(-1 / 3)
436 term *= len(typedic[unique_type]) * inv_n_tot
437 local_order += term
438 local_orders.append(np.sqrt(local_order))
440 return local_orders
442 def get_local_orders(self, a):
443 """ Returns the local orders of all the atoms."""
445 a_top = a[-self.n_top:]
446 key = 'individual_fingerprints'
448 if key in a.info and not self.recalculate:
449 fp, typedic = self._json_decode(*a.info[key])
450 else:
451 fp, typedic = self._take_fingerprints(a_top, individual=True)
452 a.info[key] = self._json_encode(fp, typedic)
454 volume, _pmin, _pmax, _qmin, _qmax = self._get_volume(a_top)
455 return self._calculate_local_orders(fp, typedic, volume)
457 def _cosine_distance(self, fp1, fp2, typedic):
458 """ Returns the cosine distance from two fingerprints.
459 It also needs information about the number of atoms from
460 each element, which is included in "typedic"."""
462 keys = sorted(fp1)
464 # calculating the weights:
465 w = {}
466 wtot = 0
467 for key in keys:
468 weight = len(typedic[key[0]]) * len(typedic[key[1]])
469 wtot += weight
470 w[key] = weight
471 for key in keys:
472 w[key] *= 1. / wtot
474 # calculating the fingerprint norms:
475 norm1 = 0
476 norm2 = 0
477 for key in keys:
478 norm1 += (np.linalg.norm(fp1[key])**2) * w[key]
479 norm2 += (np.linalg.norm(fp2[key])**2) * w[key]
480 norm1 = np.sqrt(norm1)
481 norm2 = np.sqrt(norm2)
483 # calculating the distance:
484 distance = 0
485 for key in keys:
486 distance += np.sum(fp1[key] * fp2[key]) * w[key] / (norm1 * norm2)
488 distance = 0.5 * (1 - distance)
489 return distance
491 def plot_fingerprints(self, a, prefix=''):
492 """ Function for quickly plotting all the fingerprints.
493 Prefix = a prefix you want to give to the resulting PNG file."""
495 if 'fingerprints' in a.info and not self.recalculate:
496 fp, typedic = a.info['fingerprints']
497 fp, typedic = self._json_decode(fp, typedic)
498 else:
499 a_top = a[-self.n_top:]
500 fp, typedic = self._take_fingerprints(a_top)
501 a.info['fingerprints'] = self._json_encode(fp, typedic)
503 npts = int(np.ceil(self.rcut * 1. / self.binwidth))
504 x = np.linspace(0, self.rcut, npts, endpoint=False)
506 for key, val in fp.items():
507 plt.plot(x, val)
508 suffix = f"_fp_{key[0]}_{key[1]}.png"
509 plt.savefig(prefix + suffix)
510 plt.clf()
512 def plot_individual_fingerprints(self, a, prefix=''):
513 """ Function for plotting all the individual fingerprints.
514 Prefix = a prefix for the resulting PNG file."""
515 if 'individual_fingerprints' in a.info and not self.recalculate:
516 fp, typedic = a.info['individual_fingerprints']
517 else:
518 a_top = a[-self.n_top:]
519 fp, typedic = self._take_fingerprints(a_top, individual=True)
520 a.info['individual_fingerprints'] = [fp, typedic]
522 npts = int(np.ceil(self.rcut * 1. / self.binwidth))
523 x = np.linspace(0, self.rcut, npts, endpoint=False)
525 for key, val in fp.items():
526 for key2, val2 in val.items():
527 plt.plot(x, val2)
528 plt.ylim([-1, 10])
529 suffix = f"_individual_fp_{key}_{key2}.png"
530 plt.savefig(prefix + suffix)
531 plt.clf()