Coverage for /builds/debichem-team/python-ase/ase/ga/cutandsplicepairing.py: 90.19%
214 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
1"""Implementation of the cut-and-splice paring operator."""
2import numpy as np
4from ase import Atoms
5from ase.ga.offspring_creator import OffspringCreator
6from ase.ga.utilities import (
7 atoms_too_close,
8 atoms_too_close_two_sets,
9 gather_atoms_by_tag,
10)
11from ase.geometry import find_mic
14class Positions:
15 """Helper object to simplify the pairing process.
17 Parameters:
19 scaled_positions: (Nx3) array
20 Positions in scaled coordinates
21 cop: (1x3) array
22 Center-of-positions (also in scaled coordinates)
23 symbols: str
24 String with the atomic symbols
25 distance: float
26 Signed distance to the cutting plane
27 origin: int (0 or 1)
28 Determines at which side of the plane the position should be.
29 """
31 def __init__(self, scaled_positions, cop, symbols, distance, origin):
32 self.scaled_positions = scaled_positions
33 self.cop = cop
34 self.symbols = symbols
35 self.distance = distance
36 self.origin = origin
38 def to_use(self):
39 """Tells whether this position is at the right side."""
40 if self.distance > 0. and self.origin == 0:
41 return True
42 elif self.distance < 0. and self.origin == 1:
43 return True
44 else:
45 return False
48class CutAndSplicePairing(OffspringCreator):
49 """The Cut and Splice operator by Deaven and Ho.
51 Creates offspring from two parent structures using
52 a randomly generated cutting plane.
54 The parents may have different unit cells, in which
55 case the offspring unit cell will be a random combination
56 of the parent cells.
58 The basic implementation (for fixed unit cells) is
59 described in:
61 :doi:`L.B. Vilhelmsen and B. Hammer, PRL, 108, 126101 (2012)
62 <10.1103/PhysRevLett.108.126101`>
64 The extension to variable unit cells is similar to:
66 * :doi:`Glass, Oganov, Hansen, Comp. Phys. Comm. 175 (2006) 713-720
67 <10.1016/j.cpc.2006.07.020>`
69 * :doi:`Lonie, Zurek, Comp. Phys. Comm. 182 (2011) 372-387
70 <10.1016/j.cpc.2010.07.048>`
72 The operator can furthermore preserve molecular identity
73 if desired (see the *use_tags* kwarg). Atoms with the same
74 tag will then be considered as belonging to the same molecule,
75 and their internal geometry will not be changed by the operator.
77 If use_tags is enabled, the operator will also conserve the
78 number of molecules of each kind (in addition to conserving
79 the overall stoichiometry). Currently, molecules are considered
80 to be of the same kind if their chemical symbol strings are
81 identical. In rare cases where this may not be sufficient
82 (i.e. when desiring to keep the same ratio of isomers), the
83 different isomers can be differentiated by giving them different
84 elemental orderings (e.g. 'XY2' and 'Y2X').
86 Parameters:
88 slab: Atoms object
89 Specifies the cell vectors and periodic boundary conditions
90 to be applied to the randomly generated structures.
91 Any included atoms (e.g. representing an underlying slab)
92 are copied to these new structures.
94 n_top: int
95 The number of atoms to optimize
97 blmin: dict
98 Dictionary with minimal interatomic distances.
99 Note: when preserving molecular identity (see use_tags),
100 the blmin dict will (naturally) only be applied
101 to intermolecular distances (not the intramolecular
102 ones).
104 number_of_variable_cell_vectors: int (default 0)
105 The number of variable cell vectors (0, 1, 2 or 3).
106 To keep things simple, it is the 'first' vectors which
107 will be treated as variable, i.e. the 'a' vector in the
108 univariate case, the 'a' and 'b' vectors in the bivariate
109 case, etc.
111 p1: float or int between 0 and 1
112 Probability that a parent is shifted over a random
113 distance along the normal of the cutting plane
114 (only operative if number_of_variable_cell_vectors > 0).
116 p2: float or int between 0 and 1
117 Same as p1, but for shifting along the directions
118 in the cutting plane (only operative if
119 number_of_variable_cell_vectors > 0).
121 minfrac: float between 0 and 1, or None (default)
122 Minimal fraction of atoms a parent must contribute
123 to the child. If None, each parent must contribute
124 at least one atom.
126 cellbounds: ase.ga.utilities.CellBounds instance
127 Describing limits on the cell shape, see
128 :class:`~ase.ga.utilities.CellBounds`.
129 Note that it only make sense to impose conditions
130 regarding cell vectors which have been marked as
131 variable (see number_of_variable_cell_vectors).
133 use_tags: bool
134 Whether to use the atomic tags to preserve
135 molecular identity.
137 test_dist_to_slab: bool (default True)
138 Whether to make sure that the distances between
139 the atoms and the slab satisfy the blmin.
141 rng: Random number generator
142 By default numpy.random.
143 """
145 def __init__(self, slab, n_top, blmin, number_of_variable_cell_vectors=0,
146 p1=1, p2=0.05, minfrac=None, cellbounds=None,
147 test_dist_to_slab=True, use_tags=False, rng=np.random,
148 verbose=False):
150 OffspringCreator.__init__(self, verbose, rng=rng)
151 self.slab = slab
152 self.n_top = n_top
153 self.blmin = blmin
154 assert number_of_variable_cell_vectors in range(4)
155 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors
156 self.p1 = p1
157 self.p2 = p2
158 self.minfrac = minfrac
159 self.cellbounds = cellbounds
160 self.test_dist_to_slab = test_dist_to_slab
161 self.use_tags = use_tags
163 self.scaling_volume = None
164 self.descriptor = 'CutAndSplicePairing'
165 self.min_inputs = 2
167 def update_scaling_volume(self, population, w_adapt=0.5, n_adapt=0):
168 """Updates the scaling volume that is used in the pairing
170 w_adapt: weight of the new vs the old scaling volume
171 n_adapt: number of best candidates in the population that
172 are used to calculate the new scaling volume
173 """
174 if not n_adapt:
175 # take best 20% of the population
176 n_adapt = int(np.ceil(0.2 * len(population)))
177 v_new = np.mean([a.get_volume() for a in population[:n_adapt]])
179 if not self.scaling_volume:
180 self.scaling_volume = v_new
181 else:
182 volumes = [self.scaling_volume, v_new]
183 weights = [1 - w_adapt, w_adapt]
184 self.scaling_volume = np.average(volumes, weights=weights)
186 def get_new_individual(self, parents):
187 """The method called by the user that
188 returns the paired structure."""
189 f, m = parents
191 indi = self.cross(f, m)
192 desc = 'pairing: {} {}'.format(f.info['confid'],
193 m.info['confid'])
194 # It is ok for an operator to return None
195 # It means that it could not make a legal offspring
196 # within a reasonable amount of time
197 if indi is None:
198 return indi, desc
199 indi = self.initialize_individual(f, indi)
200 indi.info['data']['parents'] = [f.info['confid'],
201 m.info['confid']]
203 return self.finalize_individual(indi), desc
205 def cross(self, a1, a2):
206 """Crosses the two atoms objects and returns one"""
208 if len(a1) != len(self.slab) + self.n_top:
209 raise ValueError('Wrong size of structure to optimize')
210 if len(a1) != len(a2):
211 raise ValueError('The two structures do not have the same length')
213 N = self.n_top
215 # Only consider the atoms to optimize
216 a1 = a1[len(a1) - N: len(a1)]
217 a2 = a2[len(a2) - N: len(a2)]
219 if not np.array_equal(a1.numbers, a2.numbers):
220 err = 'Trying to pair two structures with different stoichiometry'
221 raise ValueError(err)
223 if self.use_tags and not np.array_equal(a1.get_tags(), a2.get_tags()):
224 err = 'Trying to pair two structures with different tags'
225 raise ValueError(err)
227 cell1 = a1.get_cell()
228 cell2 = a2.get_cell()
229 for i in range(self.number_of_variable_cell_vectors, 3):
230 err = 'Unit cells are supposed to be identical in direction %d'
231 assert np.allclose(cell1[i], cell2[i]), (err % i, cell1, cell2)
233 invalid = True
234 counter = 0
235 maxcount = 1000
236 a1_copy = a1.copy()
237 a2_copy = a2.copy()
239 # Run until a valid pairing is made or maxcount pairings are tested.
240 while invalid and counter < maxcount:
241 counter += 1
243 newcell = self.generate_unit_cell(cell1, cell2)
244 if newcell is None:
245 # No valid unit cell could be generated.
246 # This strongly suggests that it is near-impossible
247 # to generate one from these parent cells and it is
248 # better to abort now.
249 break
251 # Choose direction of cutting plane normal
252 if self.number_of_variable_cell_vectors == 0:
253 # Will be generated entirely at random
254 theta = np.pi * self.rng.random()
255 phi = 2. * np.pi * self.rng.random()
256 cut_n = np.array([np.cos(phi) * np.sin(theta),
257 np.sin(phi) * np.sin(theta), np.cos(theta)])
258 else:
259 # Pick one of the 'variable' cell vectors
260 cut_n = self.rng.choice(self.number_of_variable_cell_vectors)
262 # Randomly translate parent structures
263 for a_copy, a in zip([a1_copy, a2_copy], [a1, a2]):
264 a_copy.set_positions(a.get_positions())
266 cell = a_copy.get_cell()
267 for i in range(self.number_of_variable_cell_vectors):
268 r = self.rng.random()
269 cond1 = i == cut_n and r < self.p1
270 cond2 = i != cut_n and r < self.p2
271 if cond1 or cond2:
272 a_copy.positions += self.rng.random() * cell[i]
274 if self.use_tags:
275 # For correct determination of the center-
276 # of-position of the multi-atom blocks,
277 # we need to group their constituent atoms
278 # together
279 gather_atoms_by_tag(a_copy)
280 else:
281 a_copy.wrap()
283 # Generate the cutting point in scaled coordinates
284 cosp1 = np.average(a1_copy.get_scaled_positions(), axis=0)
285 cosp2 = np.average(a2_copy.get_scaled_positions(), axis=0)
286 cut_p = np.zeros((1, 3))
287 for i in range(3):
288 if i < self.number_of_variable_cell_vectors:
289 cut_p[0, i] = self.rng.random()
290 else:
291 cut_p[0, i] = 0.5 * (cosp1[i] + cosp2[i])
293 # Perform the pairing:
294 child = self._get_pairing(a1_copy, a2_copy, cut_p, cut_n, newcell)
295 if child is None:
296 continue
298 # Verify whether the atoms are too close or not:
299 if atoms_too_close(child, self.blmin, use_tags=self.use_tags):
300 continue
302 if self.test_dist_to_slab and len(self.slab) > 0:
303 if atoms_too_close_two_sets(self.slab, child, self.blmin):
304 continue
306 # Passed all the tests
307 child = self.slab + child
308 child.set_cell(newcell, scale_atoms=False)
309 child.wrap()
310 return child
312 return None
314 def generate_unit_cell(self, cell1, cell2, maxcount=10000):
315 """Generates a new unit cell by a random linear combination
316 of the parent cells. The new cell must satisfy the
317 self.cellbounds constraints. Returns None if no such cell
318 was generated within a given number of attempts.
320 Parameters:
322 maxcount: int
323 The maximal number of attempts.
324 """
325 # First calculate the scaling volume
326 if not self.scaling_volume:
327 v1 = np.abs(np.linalg.det(cell1))
328 v2 = np.abs(np.linalg.det(cell2))
329 r = self.rng.random()
330 v_ref = r * v1 + (1 - r) * v2
331 else:
332 v_ref = self.scaling_volume
334 # Now the cell vectors
335 if self.number_of_variable_cell_vectors == 0:
336 assert np.allclose(cell1, cell2), 'Parent cells are not the same'
337 newcell = np.copy(cell1)
338 else:
339 count = 0
340 while count < maxcount:
341 r = self.rng.random()
342 newcell = r * cell1 + (1 - r) * cell2
344 vol = abs(np.linalg.det(newcell))
345 scaling = v_ref / vol
346 scaling **= 1. / self.number_of_variable_cell_vectors
347 newcell[:self.number_of_variable_cell_vectors] *= scaling
349 found = True
350 if self.cellbounds is not None:
351 found = self.cellbounds.is_within_bounds(newcell)
352 if found:
353 break
355 count += 1
356 else:
357 # Did not find acceptable cell
358 newcell = None
360 return newcell
362 def _get_pairing(self, a1, a2, cutting_point, cutting_normal, cell):
363 """Creates a child from two parents using the given cut.
365 Returns None if the generated structure does not contain
366 a large enough fraction of each parent (see self.minfrac).
368 Does not check whether atoms are too close.
370 Assumes the 'slab' parts have been removed from the parent
371 structures and that these have been checked for equal
372 lengths, stoichiometries, and tags (if self.use_tags).
374 Parameters:
376 cutting_normal: int or (1x3) array
378 cutting_point: (1x3) array
379 In fractional coordinates
381 cell: (3x3) array
382 The unit cell for the child structure
383 """
384 symbols = a1.get_chemical_symbols()
385 tags = a1.get_tags() if self.use_tags else np.arange(len(a1))
387 # Generate list of all atoms / atom groups:
388 p1, p2, sym = [], [], []
389 for i in np.unique(tags):
390 indices = np.where(tags == i)[0]
391 s = ''.join([symbols[j] for j in indices])
392 sym.append(s)
394 for i, (a, p) in enumerate(zip([a1, a2], [p1, p2])):
395 c = a.get_cell()
396 cop = np.mean(a.positions[indices], axis=0)
397 cut_p = np.dot(cutting_point, c)
398 if isinstance(cutting_normal, int):
399 vecs = [c[j] for j in range(3) if j != cutting_normal]
400 cut_n = np.cross(vecs[0], vecs[1])
401 else:
402 cut_n = np.dot(cutting_normal, c)
403 d = np.dot(cop - cut_p, cut_n)
404 spos = a.get_scaled_positions()[indices]
405 scop = np.mean(spos, axis=0)
406 p.append(Positions(spos, scop, s, d, i))
408 all_points = p1 + p2
409 unique_sym = np.unique(sym)
410 types = {s: sym.count(s) for s in unique_sym}
412 # Sort these by chemical symbols:
413 all_points.sort(key=lambda x: x.symbols, reverse=True)
415 # For each atom type make the pairing
416 unique_sym.sort()
417 use_total = {}
418 for s in unique_sym:
419 used = []
420 not_used = []
421 # The list is looked trough in
422 # reverse order so atoms can be removed
423 # from the list along the way.
424 for i in reversed(range(len(all_points))):
425 # If there are no more atoms of this type
426 if all_points[i].symbols != s:
427 break
428 # Check if the atom should be included
429 if all_points[i].to_use():
430 used.append(all_points.pop(i))
431 else:
432 not_used.append(all_points.pop(i))
434 assert len(used) + len(not_used) == types[s] * 2
436 # While we have too few of the given atom type
437 while len(used) < types[s]:
438 index = self.rng.randint(len(not_used))
439 used.append(not_used.pop(index))
441 # While we have too many of the given atom type
442 while len(used) > types[s]:
443 # remove randomly:
444 index = self.rng.randint(len(used))
445 not_used.append(used.pop(index))
447 use_total[s] = used
449 n_tot = sum(len(ll) for ll in use_total.values())
450 assert n_tot == len(sym)
452 # check if the generated structure contains
453 # atoms from both parents:
454 count1, count2, N = 0, 0, len(a1)
455 for x in use_total.values():
456 count1 += sum(y.origin == 0 for y in x)
457 count2 += sum(y.origin == 1 for y in x)
459 nmin = 1 if self.minfrac is None else int(round(self.minfrac * N))
460 if count1 < nmin or count2 < nmin:
461 return None
463 # Construct the cartesian positions and reorder the atoms
464 # to follow the original order
465 newpos = []
466 pbc = a1.get_pbc()
467 for s in sym:
468 p = use_total[s].pop()
469 c = a1.get_cell() if p.origin == 0 else a2.get_cell()
470 pos = np.dot(p.scaled_positions, c)
471 cop = np.dot(p.cop, c)
472 vectors, _lengths = find_mic(pos - cop, c, pbc)
473 newcop = np.dot(p.cop, cell)
474 pos = newcop + vectors
475 for row in pos:
476 newpos.append(row)
478 newpos = np.reshape(newpos, (N, 3))
479 num = a1.get_atomic_numbers()
480 child = Atoms(numbers=num, positions=newpos, pbc=pbc, cell=cell,
481 tags=tags)
482 child.wrap()
483 return child