Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Tools for generating new random starting candidates."""
2import numpy as np
3from ase import Atoms
4from ase.data import atomic_numbers
5from ase.build import molecule
6from ase.ga.utilities import (closest_distances_generator, atoms_too_close,
7 atoms_too_close_two_sets)
10class StartGenerator:
11 """Class for generating random starting candidates.
13 Its basic task consists of randomly placing atoms or
14 molecules within a predescribed box, while respecting
15 certain minimal interatomic distances.
17 Depending on the problem at hand, certain box vectors
18 may not be known or chosen beforehand, and hence also
19 need to be generated at random. Common cases include
20 bulk crystals, films and chains, with respectively
21 3, 2 and 1 unknown cell vectors.
23 Parameters:
25 slab: Atoms object
26 Specifies the cell vectors and periodic boundary conditions
27 to be applied to the randomly generated structures.
28 Any included atoms (e.g. representing an underlying slab)
29 are copied to these new structures.
30 Variable cell vectors (see number_of_variable_cell_vectors)
31 will be ignored because these will be generated at random.
33 blocks: list
34 List of building units for the structure. Each item can be:
36 * an integer: representing a single atom by its atomic number,
37 * a string: for a single atom (a chemical symbol) or a
38 molecule (name recognized by ase.build.molecule),
39 * an Atoms object,
40 * an (A, B) tuple or list where A is any of the above
41 and B is the number of A units to include.
43 A few examples:
45 >>> blocks = ['Ti'] * 4 + ['O'] * 8
46 >>> blocks = [('Ti', 4), ('O', 8)]
47 >>> blocks = [('CO2', 3)] # 3 CO2 molecules
48 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]])
49 >>> blocks = [(co, 3)]
51 Each individual block (single atom or molecule) in the
52 randomly generated candidates is given a unique integer
53 tag. These can be used to preserve the molecular identity
54 of these subunits.
56 blmin: dict or float
57 Dictionary with minimal interatomic distances.
58 If a number is provided instead, the dictionary will
59 be generated with this ratio of covalent bond radii.
60 Note: when preserving molecular identity (see use_tags),
61 the blmin dict will (naturally) only be applied
62 to intermolecular distances (not the intramolecular
63 ones).
65 number_of_variable_cell_vectors: int (default 0)
66 The number of variable cell vectors (0, 1, 2 or 3).
67 To keep things simple, it is the 'first' vectors which
68 will be treated as variable, i.e. the 'a' vector in the
69 univariate case, the 'a' and 'b' vectors in the bivariate
70 case, etc.
72 box_to_place_in: [list, list of lists] (default None)
73 The box in which the atoms can be placed.
74 The default (None) means the box is equal to the
75 entire unit cell of the 'slab' object.
76 In many cases, however, smaller boxes are desired
77 (e.g. for adsorbates on a slab surface or for isolated
78 clusters). Then, box_to_place_in can be set as
79 [p0, [v1, v2, v3]] with positions being generated as
80 p0 + r1 * v1 + r2 * v2 + r3 + v3.
81 In case of one or more variable cell vectors,
82 the corresponding items in p0/v1/v2/v3 will be ignored.
84 box_volume: int or float or None (default)
85 Initial guess for the box volume in cubic Angstrom
86 (used in generating the variable cell vectors).
87 Typical values in the solid state are 8-12 A^3 per atom.
88 If there are no variable cell vectors, the default None
89 is required (box volume equal to the box_to_place_in
90 volume).
92 splits: dict or None
93 Splitting scheme for increasing the translational symmetry
94 in the random candidates, based on:
96 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__
98 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007
100 This should be a dict specifying the relative probabilities
101 for each split, written as tuples. For example,
103 >>> splits = {(2,): 3, (1,): 1}
105 This means that, for each structure, either a splitting
106 factor of 2 is applied to one randomly chosen axis,
107 or a splitting factor of 1 is applied (i.e., no splitting).
108 The probability ratio of the two scenararios will be 3:1,
109 i.e. 75% chance for the former and 25% chance for the latter
110 splitting scheme. Only the directions in which the 'slab'
111 object is periodic are eligible for splitting.
113 To e.g. always apply splitting factors of 2 and 3 along two
114 randomly chosen axes:
116 >>> splits = {(2, 3): 1}
118 By default, no splitting is applied (splits = None = {(1,): 1}).
120 cellbounds: ase.ga.utilities.CellBounds instance
121 Describing limits on the cell shape, see
122 :class:`~ase.ga.utilities.CellBounds`.
123 Note that it only make sense to impose conditions
124 regarding cell vectors which have been marked as
125 variable (see number_of_variable_cell_vectors).
127 test_dist_to_slab: bool (default True)
128 Whether to make sure that the distances between
129 the atoms and the slab satisfy the blmin.
131 test_too_far: bool (default True)
132 Whether to also make sure that there are no isolated
133 atoms or molecules with nearest-neighbour bond lengths
134 larger than 2x the value in the blmin dict.
136 rng: Random number generator
137 By default numpy.random.
138 """
139 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0,
140 box_to_place_in=None, box_volume=None, splits=None,
141 cellbounds=None, test_dist_to_slab=True, test_too_far=True,
142 rng=np.random):
144 self.slab = slab
146 self.blocks = []
147 for item in blocks:
148 if isinstance(item, tuple) or isinstance(item, list):
149 assert len(item) == 2, 'Item length %d != 2' % len(item)
150 block, count = item
151 else:
152 block, count = item, 1
154 # Convert block into Atoms object
155 if isinstance(block, Atoms):
156 pass
157 elif block in atomic_numbers:
158 block = Atoms(block)
159 elif isinstance(block, str):
160 block = molecule(block)
161 elif block in atomic_numbers.values():
162 block = Atoms(numbers=[block])
163 else:
164 raise ValueError('Cannot parse this block:', block)
166 # Add to self.blocks, taking into account that
167 # we want to group the same blocks together.
168 # This is important for the cell splitting.
169 for i, (b, c) in enumerate(self.blocks):
170 if block == b:
171 self.blocks[i][1] += count
172 break
173 else:
174 self.blocks.append([block, count])
176 if isinstance(blmin, dict):
177 self.blmin = blmin
178 else:
179 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks])
180 self.blmin = closest_distances_generator(numbers,
181 ratio_of_covalent_radii=blmin)
183 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors
184 assert self.number_of_variable_cell_vectors in range(4)
185 if len(self.slab) > 0:
186 msg = 'Including atoms in the slab only makes sense'
187 msg += ' if there are no variable unit cell vectors'
188 assert self.number_of_variable_cell_vectors == 0, msg
189 for i in range(self.number_of_variable_cell_vectors):
190 msg = 'Unit cell %s-vector is marked as variable ' % ('abc'[i])
191 msg += 'and slab must then also be periodic in this direction'
192 assert self.slab.pbc[i], msg
194 if box_to_place_in is None:
195 p0 = np.array([0., 0., 0.])
196 cell = self.slab.get_cell()
197 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]]
198 else:
199 self.box_to_place_in = box_to_place_in
201 if box_volume is None:
202 assert self.number_of_variable_cell_vectors == 0
203 box_volume = abs(np.linalg.det(self.box_to_place_in[1]))
204 else:
205 assert self.number_of_variable_cell_vectors > 0
206 self.box_volume = box_volume
207 assert self.box_volume > 0
209 if splits is None:
210 splits = {(1,): 1}
211 tot = sum([v for v in splits.values()]) # normalization
212 self.splits = {k: v * 1. / tot for k, v in splits.items()}
214 self.cellbounds = cellbounds
215 self.test_too_far = test_too_far
216 self.test_dist_to_slab = test_dist_to_slab
217 self.rng = rng
219 def get_new_candidate(self, maxiter=None):
220 """Returns a new candidate.
222 maxiter: upper bound on the total number of times
223 the random position generator is called
224 when generating the new candidate.
226 By default (maxiter=None) no such bound
227 is imposed. If the generator takes too
228 long time to create a new candidate, it
229 may be suitable to specify a finite value.
230 When the bound is exceeded, None is returned.
231 """
232 pbc = self.slab.get_pbc()
234 # Choose cell splitting
235 r = self.rng.rand()
236 cumprob = 0
237 for split, prob in self.splits.items():
238 cumprob += prob
239 if cumprob > r:
240 break
242 # Choose direction(s) along which to split
243 # and by how much
244 directions = [i for i in range(3) if pbc[i]]
245 repeat = [1, 1, 1]
246 if len(directions) > 0:
247 for number in split:
248 d = self.rng.choice(directions)
249 repeat[d] = number
250 repeat = tuple(repeat)
252 # Generate the 'full' unit cell
253 # for the eventual candidates
254 cell = self.generate_unit_cell(repeat)
255 if self.number_of_variable_cell_vectors == 0:
256 assert np.allclose(cell, self.slab.get_cell())
258 # Make the smaller 'box' in which we are
259 # allowed to place the atoms and which will
260 # then be repeated to fill the 'full' unit cell
261 box = np.copy(cell)
262 for i in range(self.number_of_variable_cell_vectors, 3):
263 box[i] = np.array(self.box_to_place_in[1][i])
264 box /= np.array([repeat]).T
266 # Here we gather the (reduced) number of blocks
267 # to put in the smaller box, and the 'surplus'
268 # occurring when the block count is not divisible
269 # by the number of repetitions.
270 # E.g. if we have a ('Ti', 4) block and do a
271 # [2, 3, 1] repetition, we employ a ('Ti', 1)
272 # block in the smaller box and delete 2 out 6
273 # Ti atoms afterwards
274 nrep = int(np.prod(repeat))
275 blocks, ids, surplus = [], [], []
276 for i, (block, count) in enumerate(self.blocks):
277 count_part = int(np.ceil(count * 1. / nrep))
278 blocks.extend([block] * count_part)
279 surplus.append(nrep * count_part - count)
280 ids.extend([i] * count_part)
282 N_blocks = len(blocks)
284 # Shuffle the ordering so different blocks
285 # are added in random order
286 order = np.arange(N_blocks)
287 self.rng.shuffle(order)
288 blocks = [blocks[i] for i in order]
289 ids = np.array(ids)[order]
291 # Add blocks one by one until we have found
292 # a valid candidate
293 blmin = self.blmin
294 blmin_too_far = {key: 2 * val for key, val in blmin.items()}
296 niter = 0
297 while maxiter is None or niter < maxiter:
298 cand = Atoms('', cell=box, pbc=pbc)
300 for i in range(N_blocks):
301 atoms = blocks[i].copy()
302 atoms.set_tags(i)
303 atoms.set_pbc(pbc)
304 atoms.set_cell(box, scale_atoms=False)
306 while maxiter is None or niter < maxiter:
307 niter += 1
308 cop = atoms.get_positions().mean(axis=0)
309 pos = np.dot(self.rng.rand(1, 3), box)
310 atoms.translate(pos - cop)
312 if len(atoms) > 1:
313 # Apply a random rotation to multi-atom blocks
314 phi, theta, psi = 360 * self.rng.rand(3)
315 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi,
316 center=pos)
318 if not atoms_too_close_two_sets(cand, atoms, blmin):
319 cand += atoms
320 break
321 else:
322 # Reached maximum iteration number
323 # Break out of the for loop above
324 cand = None
325 break
327 if cand is None:
328 # Exit the main while loop
329 break
331 # Rebuild the candidate after repeating,
332 # randomly deleting surplus blocks and
333 # sorting back to the original order
334 cand_full = cand.repeat(repeat)
336 tags_full = cand_full.get_tags()
337 for i in range(nrep):
338 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks
339 cand_full.set_tags(tags_full)
341 cand = Atoms('', cell=cell, pbc=pbc)
342 ids_full = np.tile(ids, nrep)
344 tag_counter = 0
345 if len(self.slab) > 0:
346 tag_counter = int(max(self.slab.get_tags())) + 1
348 for i, (block, count) in enumerate(self.blocks):
349 tags = np.where(ids_full == i)[0]
350 bad = self.rng.choice(tags, size=surplus[i], replace=False)
351 for tag in tags:
352 if tag not in bad:
353 select = [a.index for a in cand_full if a.tag == tag]
354 atoms = cand_full[select] # is indeed a copy!
355 atoms.set_tags(tag_counter)
356 assert len(atoms) == len(block)
357 cand += atoms
358 tag_counter += 1
360 for i in range(self.number_of_variable_cell_vectors, 3):
361 cand.positions[:, i] += self.box_to_place_in[0][i]
363 # By construction, the minimal interatomic distances
364 # within the structure should already be respected
365 assert not atoms_too_close(cand, blmin, use_tags=True), \
366 'This is not supposed to happen; please report this bug'
368 if self.test_dist_to_slab and len(self.slab) > 0:
369 if atoms_too_close_two_sets(self.slab, cand, blmin):
370 continue
372 if self.test_too_far:
373 tags = cand.get_tags()
375 for tag in np.unique(tags):
376 too_far = True
377 indices_i = np.where(tags == tag)[0]
378 indices_j = np.where(tags != tag)[0]
379 too_far = not atoms_too_close_two_sets(cand[indices_i],
380 cand[indices_j],
381 blmin_too_far)
383 if too_far and len(self.slab) > 0:
384 # the block is too far from the rest
385 # but might still be sufficiently
386 # close to the slab
387 too_far = not atoms_too_close_two_sets(cand[indices_i],
388 self.slab,
389 blmin_too_far)
390 if too_far:
391 break
392 else:
393 too_far = False
395 if too_far:
396 continue
398 # Passed all the tests
399 cand = self.slab + cand
400 cand.set_cell(cell, scale_atoms=False)
401 break
402 else:
403 # Reached max iteration count in the while loop
404 return None
406 return cand
408 def generate_unit_cell(self, repeat):
409 """Generates a random unit cell.
411 For this, we use the vectors in self.slab.cell
412 in the fixed directions and randomly generate
413 the variable ones. For such a cell to be valid,
414 it has to satisfy the self.cellbounds constraints.
416 The cell will also be such that the volume of the
417 box in which the atoms can be placed (box limits
418 described by self.box_to_place_in) is equal to
419 self.box_volume.
421 Parameters:
423 repeat: tuple of 3 integers
424 Indicates by how much each cell vector
425 will later be reduced by cell splitting.
427 This is used to ensure that the original
428 cell is large enough so that the cell lengths
429 of the smaller cell exceed the largest
430 (X,X)-minimal-interatomic-distance in self.blmin.
431 """
432 # Find the minimal cell length 'Lmin'
433 # that we need in order to ensure that
434 # an added atom or molecule will never
435 # be 'too close to itself'
436 Lmin = 0.
437 for atoms, count in self.blocks:
438 dist = atoms.get_all_distances(mic=False, vector=False)
439 num = atoms.get_atomic_numbers()
441 for i in range(len(atoms)):
442 dist[i, i] += self.blmin[(num[i], num[i])]
443 for j in range(i):
444 bl = self.blmin[(num[i], num[j])]
445 dist[i, j] += bl
446 dist[j, i] += bl
448 L = np.max(dist)
449 if L > Lmin:
450 Lmin = L
452 # Generate a suitable unit cell
453 valid = False
454 while not valid:
455 cell = np.zeros((3, 3))
457 for i in range(self.number_of_variable_cell_vectors):
458 # on-diagonal values
459 cell[i, i] = self.rng.rand() * np.cbrt(self.box_volume)
460 cell[i, i] *= repeat[i]
461 for j in range(i):
462 # off-diagonal values
463 cell[i, j] = (self.rng.rand() - 0.5) * cell[i - 1, i - 1]
465 # volume scaling
466 for i in range(self.number_of_variable_cell_vectors, 3):
467 cell[i] = self.box_to_place_in[1][i]
469 if self.number_of_variable_cell_vectors > 0:
470 volume = abs(np.linalg.det(cell))
471 scaling = self.box_volume / volume
472 scaling **= 1. / self.number_of_variable_cell_vectors
473 cell[:self.number_of_variable_cell_vectors] *= scaling
475 for i in range(self.number_of_variable_cell_vectors, 3):
476 cell[i] = self.slab.get_cell()[i]
478 # bounds checking
479 valid = True
481 if self.cellbounds is not None:
482 if not self.cellbounds.is_within_bounds(cell):
483 valid = False
485 if valid:
486 for i in range(3):
487 if np.linalg.norm(cell[i]) < repeat[i] * Lmin:
488 assert self.number_of_variable_cell_vectors > 0
489 valid = False
491 return cell