Coverage for /builds/debichem-team/python-ase/ase/ga/startgenerator.py: 95.17%
207 statements
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2025-03-06 04:00 +0000
1"""Tools for generating new random starting candidates."""
2import numpy as np
4from ase import Atoms
5from ase.build import molecule
6from ase.data import atomic_numbers
7from ase.ga.utilities import (
8 atoms_too_close,
9 atoms_too_close_two_sets,
10 closest_distances_generator,
11)
14class StartGenerator:
15 """Class for generating random starting candidates.
17 Its basic task consists of randomly placing atoms or
18 molecules within a predescribed box, while respecting
19 certain minimal interatomic distances.
21 Depending on the problem at hand, certain box vectors
22 may not be known or chosen beforehand, and hence also
23 need to be generated at random. Common cases include
24 bulk crystals, films and chains, with respectively
25 3, 2 and 1 unknown cell vectors.
27 Parameters:
29 slab: Atoms object
30 Specifies the cell vectors and periodic boundary conditions
31 to be applied to the randomly generated structures.
32 Any included atoms (e.g. representing an underlying slab)
33 are copied to these new structures.
34 Variable cell vectors (see number_of_variable_cell_vectors)
35 will be ignored because these will be generated at random.
37 blocks: list
38 List of building units for the structure. Each item can be:
40 * an integer: representing a single atom by its atomic number,
41 * a string: for a single atom (a chemical symbol) or a
42 molecule (name recognized by ase.build.molecule),
43 * an Atoms object,
44 * an (A, B) tuple or list where A is any of the above
45 and B is the number of A units to include.
47 A few examples:
49 >>> blocks = ['Ti'] * 4 + ['O'] * 8
50 >>> blocks = [('Ti', 4), ('O', 8)]
51 >>> blocks = [('CO2', 3)] # 3 CO2 molecules
52 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]])
53 >>> blocks = [(co, 3)]
55 Each individual block (single atom or molecule) in the
56 randomly generated candidates is given a unique integer
57 tag. These can be used to preserve the molecular identity
58 of these subunits.
60 blmin: dict or float
61 Dictionary with minimal interatomic distances.
62 If a number is provided instead, the dictionary will
63 be generated with this ratio of covalent bond radii.
64 Note: when preserving molecular identity (see use_tags),
65 the blmin dict will (naturally) only be applied
66 to intermolecular distances (not the intramolecular
67 ones).
69 number_of_variable_cell_vectors: int (default 0)
70 The number of variable cell vectors (0, 1, 2 or 3).
71 To keep things simple, it is the 'first' vectors which
72 will be treated as variable, i.e. the 'a' vector in the
73 univariate case, the 'a' and 'b' vectors in the bivariate
74 case, etc.
76 box_to_place_in: [list, list of lists] (default None)
77 The box in which the atoms can be placed.
78 The default (None) means the box is equal to the
79 entire unit cell of the 'slab' object.
80 In many cases, however, smaller boxes are desired
81 (e.g. for adsorbates on a slab surface or for isolated
82 clusters). Then, box_to_place_in can be set as
83 [p0, [v1, v2, v3]] with positions being generated as
84 p0 + r1 * v1 + r2 * v2 + r3 + v3.
85 In case of one or more variable cell vectors,
86 the corresponding items in p0/v1/v2/v3 will be ignored.
88 box_volume: int or float or None (default)
89 Initial guess for the box volume in cubic Angstrom
90 (used in generating the variable cell vectors).
91 Typical values in the solid state are 8-12 A^3 per atom.
92 If there are no variable cell vectors, the default None
93 is required (box volume equal to the box_to_place_in
94 volume).
96 splits: dict or None
97 Splitting scheme for increasing the translational symmetry
98 in the random candidates, based on:
100 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__
102 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007
104 This should be a dict specifying the relative probabilities
105 for each split, written as tuples. For example,
107 >>> splits = {(2,): 3, (1,): 1}
109 This means that, for each structure, either a splitting
110 factor of 2 is applied to one randomly chosen axis,
111 or a splitting factor of 1 is applied (i.e., no splitting).
112 The probability ratio of the two scenararios will be 3:1,
113 i.e. 75% chance for the former and 25% chance for the latter
114 splitting scheme. Only the directions in which the 'slab'
115 object is periodic are eligible for splitting.
117 To e.g. always apply splitting factors of 2 and 3 along two
118 randomly chosen axes:
120 >>> splits = {(2, 3): 1}
122 By default, no splitting is applied (splits = None = {(1,): 1}).
124 cellbounds: ase.ga.utilities.CellBounds instance
125 Describing limits on the cell shape, see
126 :class:`~ase.ga.utilities.CellBounds`.
127 Note that it only make sense to impose conditions
128 regarding cell vectors which have been marked as
129 variable (see number_of_variable_cell_vectors).
131 test_dist_to_slab: bool (default True)
132 Whether to make sure that the distances between
133 the atoms and the slab satisfy the blmin.
135 test_too_far: bool (default True)
136 Whether to also make sure that there are no isolated
137 atoms or molecules with nearest-neighbour bond lengths
138 larger than 2x the value in the blmin dict.
140 rng: Random number generator
141 By default numpy.random.
142 """
144 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0,
145 box_to_place_in=None, box_volume=None, splits=None,
146 cellbounds=None, test_dist_to_slab=True, test_too_far=True,
147 rng=np.random):
149 self.slab = slab
151 self.blocks = []
152 for item in blocks:
153 if isinstance(item, (tuple, list)):
154 assert len(item) == 2, 'Item length %d != 2' % len(item)
155 block, count = item
156 else:
157 block, count = item, 1
159 # Convert block into Atoms object
160 if isinstance(block, Atoms):
161 pass
162 elif block in atomic_numbers:
163 block = Atoms(block)
164 elif isinstance(block, str):
165 block = molecule(block)
166 elif block in atomic_numbers.values():
167 block = Atoms(numbers=[block])
168 else:
169 raise ValueError('Cannot parse this block:', block)
171 # Add to self.blocks, taking into account that
172 # we want to group the same blocks together.
173 # This is important for the cell splitting.
174 for i, (b, c) in enumerate(self.blocks):
175 if block == b:
176 self.blocks[i][1] += count
177 break
178 else:
179 self.blocks.append([block, count])
181 if isinstance(blmin, dict):
182 self.blmin = blmin
183 else:
184 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks])
185 self.blmin = closest_distances_generator(
186 numbers,
187 ratio_of_covalent_radii=blmin)
189 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors
190 assert self.number_of_variable_cell_vectors in range(4)
191 if len(self.slab) > 0:
192 msg = 'Including atoms in the slab only makes sense'
193 msg += ' if there are no variable unit cell vectors'
194 assert self.number_of_variable_cell_vectors == 0, msg
195 for i in range(self.number_of_variable_cell_vectors):
196 msg = f'Unit cell {("abc"[i])}-vector is marked as variable '
197 msg += 'and slab must then also be periodic in this direction'
198 assert self.slab.pbc[i], msg
200 if box_to_place_in is None:
201 p0 = np.array([0., 0., 0.])
202 cell = self.slab.get_cell()
203 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]]
204 else:
205 self.box_to_place_in = box_to_place_in
207 if box_volume is None:
208 assert self.number_of_variable_cell_vectors == 0
209 box_volume = abs(np.linalg.det(self.box_to_place_in[1]))
210 else:
211 assert self.number_of_variable_cell_vectors > 0
212 self.box_volume = box_volume
213 assert self.box_volume > 0
215 if splits is None:
216 splits = {(1,): 1}
217 tot = sum(v for v in splits.values())
218 self.splits = {k: v * 1. / tot for k, v in splits.items()}
220 self.cellbounds = cellbounds
221 self.test_too_far = test_too_far
222 self.test_dist_to_slab = test_dist_to_slab
223 self.rng = rng
225 def get_new_candidate(self, maxiter=None):
226 """Returns a new candidate.
228 maxiter: upper bound on the total number of times
229 the random position generator is called
230 when generating the new candidate.
232 By default (maxiter=None) no such bound
233 is imposed. If the generator takes too
234 long time to create a new candidate, it
235 may be suitable to specify a finite value.
236 When the bound is exceeded, None is returned.
237 """
238 pbc = self.slab.get_pbc()
240 # Choose cell splitting
241 r = self.rng.random()
242 cumprob = 0
243 for split, prob in self.splits.items():
244 cumprob += prob
245 if cumprob > r:
246 break
248 # Choose direction(s) along which to split
249 # and by how much
250 directions = [i for i in range(3) if pbc[i]]
251 repeat = [1, 1, 1]
252 if len(directions) > 0:
253 for number in split:
254 d = self.rng.choice(directions)
255 repeat[d] = number
256 repeat = tuple(repeat)
258 # Generate the 'full' unit cell
259 # for the eventual candidates
260 cell = self.generate_unit_cell(repeat)
261 if self.number_of_variable_cell_vectors == 0:
262 assert np.allclose(cell, self.slab.get_cell())
264 # Make the smaller 'box' in which we are
265 # allowed to place the atoms and which will
266 # then be repeated to fill the 'full' unit cell
267 box = np.copy(cell)
268 for i in range(self.number_of_variable_cell_vectors, 3):
269 box[i] = np.array(self.box_to_place_in[1][i])
270 box /= np.array([repeat]).T
272 # Here we gather the (reduced) number of blocks
273 # to put in the smaller box, and the 'surplus'
274 # occurring when the block count is not divisible
275 # by the number of repetitions.
276 # E.g. if we have a ('Ti', 4) block and do a
277 # [2, 3, 1] repetition, we employ a ('Ti', 1)
278 # block in the smaller box and delete 2 out 6
279 # Ti atoms afterwards
280 nrep = int(np.prod(repeat))
281 blocks, ids, surplus = [], [], []
282 for i, (block, count) in enumerate(self.blocks):
283 count_part = int(np.ceil(count * 1. / nrep))
284 blocks.extend([block] * count_part)
285 surplus.append(nrep * count_part - count)
286 ids.extend([i] * count_part)
288 N_blocks = len(blocks)
290 # Shuffle the ordering so different blocks
291 # are added in random order
292 order = np.arange(N_blocks)
293 self.rng.shuffle(order)
294 blocks = [blocks[i] for i in order]
295 ids = np.array(ids)[order]
297 # Add blocks one by one until we have found
298 # a valid candidate
299 blmin = self.blmin
300 blmin_too_far = {key: 2 * val for key, val in blmin.items()}
302 niter = 0
303 while maxiter is None or niter < maxiter:
304 cand = Atoms('', cell=box, pbc=pbc)
306 for i in range(N_blocks):
307 atoms = blocks[i].copy()
308 atoms.set_tags(i)
309 atoms.set_pbc(pbc)
310 atoms.set_cell(box, scale_atoms=False)
312 while maxiter is None or niter < maxiter:
313 niter += 1
314 cop = atoms.get_positions().mean(axis=0)
315 pos = np.dot(self.rng.random((1, 3)), box)
316 atoms.translate(pos - cop)
318 if len(atoms) > 1:
319 # Apply a random rotation to multi-atom blocks
320 phi, theta, psi = 360 * self.rng.random(3)
321 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi,
322 center=pos)
324 if not atoms_too_close_two_sets(cand, atoms, blmin):
325 cand += atoms
326 break
327 else:
328 # Reached maximum iteration number
329 # Break out of the for loop above
330 cand = None
331 break
333 if cand is None:
334 # Exit the main while loop
335 break
337 # Rebuild the candidate after repeating,
338 # randomly deleting surplus blocks and
339 # sorting back to the original order
340 cand_full = cand.repeat(repeat)
342 tags_full = cand_full.get_tags()
343 for i in range(nrep):
344 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks
345 cand_full.set_tags(tags_full)
347 cand = Atoms('', cell=cell, pbc=pbc)
348 ids_full = np.tile(ids, nrep)
350 tag_counter = 0
351 if len(self.slab) > 0:
352 tag_counter = int(max(self.slab.get_tags())) + 1
354 for i, (block, count) in enumerate(self.blocks):
355 tags = np.where(ids_full == i)[0]
356 bad = self.rng.choice(tags, size=surplus[i], replace=False)
357 for tag in tags:
358 if tag not in bad:
359 select = [a.index for a in cand_full if a.tag == tag]
360 atoms = cand_full[select] # is indeed a copy!
361 atoms.set_tags(tag_counter)
362 assert len(atoms) == len(block)
363 cand += atoms
364 tag_counter += 1
366 for i in range(self.number_of_variable_cell_vectors, 3):
367 cand.positions[:, i] += self.box_to_place_in[0][i]
369 # By construction, the minimal interatomic distances
370 # within the structure should already be respected
371 assert not atoms_too_close(cand, blmin, use_tags=True), \
372 'This is not supposed to happen; please report this bug'
374 if self.test_dist_to_slab and len(self.slab) > 0:
375 if atoms_too_close_two_sets(self.slab, cand, blmin):
376 continue
378 if self.test_too_far:
379 tags = cand.get_tags()
381 for tag in np.unique(tags):
382 too_far = True
383 indices_i = np.where(tags == tag)[0]
384 indices_j = np.where(tags != tag)[0]
385 too_far = not atoms_too_close_two_sets(cand[indices_i],
386 cand[indices_j],
387 blmin_too_far)
389 if too_far and len(self.slab) > 0:
390 # the block is too far from the rest
391 # but might still be sufficiently
392 # close to the slab
393 too_far = not atoms_too_close_two_sets(cand[indices_i],
394 self.slab,
395 blmin_too_far)
396 if too_far:
397 break
398 else:
399 too_far = False
401 if too_far:
402 continue
404 # Passed all the tests
405 cand = self.slab + cand
406 cand.set_cell(cell, scale_atoms=False)
407 break
408 else:
409 # Reached max iteration count in the while loop
410 return None
412 return cand
414 def generate_unit_cell(self, repeat):
415 """Generates a random unit cell.
417 For this, we use the vectors in self.slab.cell
418 in the fixed directions and randomly generate
419 the variable ones. For such a cell to be valid,
420 it has to satisfy the self.cellbounds constraints.
422 The cell will also be such that the volume of the
423 box in which the atoms can be placed (box limits
424 described by self.box_to_place_in) is equal to
425 self.box_volume.
427 Parameters:
429 repeat: tuple of 3 integers
430 Indicates by how much each cell vector
431 will later be reduced by cell splitting.
433 This is used to ensure that the original
434 cell is large enough so that the cell lengths
435 of the smaller cell exceed the largest
436 (X,X)-minimal-interatomic-distance in self.blmin.
437 """
438 # Find the minimal cell length 'Lmin'
439 # that we need in order to ensure that
440 # an added atom or molecule will never
441 # be 'too close to itself'
442 Lmin = 0.
443 for atoms, count in self.blocks:
444 dist = atoms.get_all_distances(mic=False, vector=False)
445 num = atoms.get_atomic_numbers()
447 for i in range(len(atoms)):
448 dist[i, i] += self.blmin[(num[i], num[i])]
449 for j in range(i):
450 bl = self.blmin[(num[i], num[j])]
451 dist[i, j] += bl
452 dist[j, i] += bl
454 L = np.max(dist)
455 if L > Lmin:
456 Lmin = L
458 # Generate a suitable unit cell
459 valid = False
460 while not valid:
461 cell = np.zeros((3, 3))
463 for i in range(self.number_of_variable_cell_vectors):
464 # on-diagonal values
465 cell[i, i] = self.rng.random() * np.cbrt(self.box_volume)
466 cell[i, i] *= repeat[i]
467 for j in range(i):
468 # off-diagonal values
469 cell[i, j] = (self.rng.random() - 0.5) * cell[i - 1, i - 1]
471 # volume scaling
472 for i in range(self.number_of_variable_cell_vectors, 3):
473 cell[i] = self.box_to_place_in[1][i]
475 if self.number_of_variable_cell_vectors > 0:
476 volume = abs(np.linalg.det(cell))
477 scaling = self.box_volume / volume
478 scaling **= 1. / self.number_of_variable_cell_vectors
479 cell[:self.number_of_variable_cell_vectors] *= scaling
481 for i in range(self.number_of_variable_cell_vectors, 3):
482 cell[i] = self.slab.get_cell()[i]
484 # bounds checking
485 valid = True
487 if self.cellbounds is not None:
488 if not self.cellbounds.is_within_bounds(cell):
489 valid = False
491 if valid:
492 for i in range(3):
493 if np.linalg.norm(cell[i]) < repeat[i] * Lmin:
494 assert self.number_of_variable_cell_vectors > 0
495 valid = False
497 return cell