Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# flake8: noqa
2import logging
3import math
4import numpy as np
6###CO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
7try:
8 import scipy
9 import scipy.linalg
10 have_scipy = True
11except ImportError:
12 have_scipy = False
13#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
15from ase.utils import longsum
17logger = logging.getLogger(__name__)
19###CO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
20class LinearPath:
21 """Describes a linear search path of the form t -> t g
22 """
24 def __init__(self, dirn):
25 """Initialise LinearPath object
27 Args:
28 dirn : search direction
29 """
30 self.dirn = dirn
32 def step(self, alpha):
33 return alpha * self.dirn
37def nullspace(A, myeps=1e-10):
38 """The RumPath class needs the ability to compute the null-space of
39 a small matrix. This is provided here. But we now also need scipy!
41 This routine was copy-pasted from
42 http://stackoverflow.com/questions/5889142/python-numpy-scipy-finding-the-null-space-of-a-matrix
43 How the h*** does numpy/scipy not have a null-space implemented?
44 """
45 u, s, vh = scipy.linalg.svd(A)
46 padding = max(0, np.shape(A)[1] - np.shape(s)[0])
47 null_mask = np.concatenate(((s <= myeps),
48 np.ones((padding,),dtype=bool)),
49 axis=0)
50 null_space = scipy.compress(null_mask, vh, axis=0)
51 return scipy.transpose(null_space)
55class RumPath:
56 """Describes a curved search path, taking into account information
57 about (near-) rigid unit motions (RUMs).
59 One can tag sub-molecules of the system, which are collections of
60 particles that form a (near-)rigid unit. Let x1, ... xn be the positions
61 of one such molecule, then we construct a path of the form
62 xi(t) = xi(0) + (exp(K t) - I) yi + t wi + t c
63 where yi = xi - <x>, c = <g> is a rigid translation, K is anti-symmetric
64 so that exp(tK) yi denotes a rotation about the centre of mass, and wi
65 is the remainind stretch of the molecule.
67 The following variables are stored:
68 * rotation_factors : array of acceleration factors
69 * rigid_units : array of molecule indices
70 * stretch : w
71 * K : list of K matrices
72 * y : list of y-vectors
73 """
75 def __init__(self, x_start, dirn, rigid_units, rotation_factors):
76 """Initialise a `RumPath`
78 Args:
79 x_start : vector containing the positions in d x nAt shape
80 dirn : search direction, same shape as x_start vector
81 rigid_units : array of arrays of molecule indices
82 rotation_factors : factor by which the rotation of each molecular
83 is accelerated; array of scalars, same length as
84 rigid_units
85 """
87 if not have_scipy:
88 raise RuntimeError("RumPath depends on scipy, which could not be imported")
90 # keep some stuff stored
91 self.rotation_factors = rotation_factors
92 self.rigid_units = rigid_units
93 # create storage for more stuff
94 self.K = []
95 self.y = []
96 # We need to reshape x_start and dirn since we want to apply
97 # rotations to individual position vectors!
98 # we will eventually store the stretch in w, X is just a reference
99 # to x_start with different shape
100 w = dirn.copy().reshape( [3, len(dirn)/3] )
101 X = x_start.reshape( [3, len(dirn)/3] )
103 for I in rigid_units: # I is a list of indices for one molecule
104 # get the positions of the i-th molecule, subtract mean
105 x = X[:, I]
106 y = x - x.mean(0).T # PBC?
107 # same for forces >>> translation component
108 g = w[:, I]
109 f = g - g.mean(0).T
110 # compute the system to solve for K (see accompanying note!)
111 # A = \sum_j Yj Yj'
112 # b = \sum_j Yj' fj
113 A = np.zeros((3,3))
114 b = np.zeros(3)
115 for j in range(len(I)):
116 Yj = np.array( [ [ y[1,j], 0.0, -y[2,j] ],
117 [ -y[0,j], y[2,j], 0.0 ],
118 [ 0.0, -y[1,j], y[0,j] ] ] )
119 A += np.dot(Yj.T, Yj)
120 b += np.dot(Yj.T, f[:, j])
121 # If the directions y[:,j] span all of R^3 (canonically this is true
122 # when there are at least three atoms in the molecule) but if
123 # not, then A is singular so we cannot solve A k = b. In this case
124 # we solve Ak = b in the space orthogonal to the null-space of A.
125 # TODO:
126 # this can get unstable if A is "near-singular"! We may
127 # need to revisit this idea at some point to get something
128 # more robust
129 N = nullspace(A)
130 b -= np.dot(np.dot(N, N.T), b)
131 A += np.dot(N, N.T)
132 k = scipy.linalg.solve(A, b, sym_pos=True)
133 K = np.array( [ [ 0.0, k[0], -k[2] ],
134 [ -k[0], 0.0, k[1] ],
135 [ k[2], -k[1], 0.0 ] ] )
136 # now remove the rotational component from the search direction
137 # ( we actually keep the translational component as part of w,
138 # but this could be changed as well! )
139 w[:, I] -= np.dot(K, y)
140 # store K and y
141 self.K.append(K)
142 self.y.append(y)
144 # store the stretch (no need to copy here, since w is already a copy)
145 self.stretch = w
148 def step(self, alpha):
149 """perform a step in the line-search, given a step-length alpha
151 Args:
152 alpha : step-length
154 Returns:
155 s : update for positions
156 """
157 # translation and stretch
158 s = alpha * self.stretch
159 # loop through rigid_units
160 for (I, K, y, rf) in zip(self.rigid_units, self.K, self.y,
161 self.rotation_factors):
162 # with matrix exponentials:
163 # s[:, I] += expm(K * alpha * rf) * p.y - p.y
164 # third-order taylor approximation:
165 # I + t K + 1/2 t^2 K^2 + 1/6 t^3 K^3 - I
166 # = t K (I + 1/2 t K (I + 1/3 t K))
167 aK = alpha * rf * K
168 s[:, I] += np.dot(aK, y + 0.5 * np.dot(aK, y + 1/3. * np.dot( aK, y )) )
170 return s.ravel()
171#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
175class LineSearchArmijo:
177 def __init__(self, func, c1=0.1, tol=1e-14):
178 """Initialise the linesearch with set parameters and functions.
180 Args:
181 func: the function we are trying to minimise (energy), which should
182 take an array of positions for its argument
183 c1: parameter for the sufficient decrease condition in (0.0 0.5)
184 tol: tolerance for evaluating equality
186 """
188 self.tol = tol
189 self.func = func
191 if not (0 < c1 < 0.5):
192 logger.error("c1 outside of allowed interval (0, 0.5). Replacing with "
193 "default value.")
194 print("Warning: C1 outside of allowed interval. Replacing with "
195 "default value.")
196 c1 = 0.1
198 self.c1 = c1
201 ###CO : added rigid_units and rotation_factors
202 def run(self, x_start, dirn, a_max=None, a_min=None, a1=None,
203 func_start=None, func_old=None, func_prime_start=None,
204 rigid_units=None, rotation_factors=None, maxstep=None):
206 """Perform a backtracking / quadratic-interpolation linesearch
207 to find an appropriate step length with Armijo condition.
208 NOTE THIS LINESEARCH DOES NOT IMPOSE WOLFE CONDITIONS!
210 The idea is to do backtracking via quadratic interpolation, stabilised
211 by putting a lower bound on the decrease at each linesearch step.
212 To ensure BFGS-behaviour, whenever "reasonable" we take 1.0 as the
213 starting step.
215 Since Armijo does not guarantee convergence of BFGS, the outer
216 BFGS algorithm must restart when the current search direction
217 ceases to be a descent direction.
219 Args:
220 x_start: vector containing the position to begin the linesearch
221 from (ie the current location of the optimisation)
222 dirn: vector pointing in the direction to search in (pk in [NW]).
223 Note that this does not have to be a unit vector, but the
224 function will return a value scaled with respect to dirn.
225 a_max: an upper bound on the maximum step length allowed. Default is 2.0.
226 a_min: a lower bound on the minimum step length allowed. Default is 1e-10.
227 A RuntimeError is raised if this bound is violated
228 during the line search.
229 a1: the initial guess for an acceptable step length. If no value is
230 given, this will be set automatically, using quadratic
231 interpolation using func_old, or "rounded" to 1.0 if the
232 initial guess lies near 1.0. (specifically for LBFGS)
233 func_start: the value of func at the start of the linesearch, ie
234 phi(0). Passing this information avoids potentially expensive
235 re-calculations
236 func_prime_start: the value of func_prime at the start of the
237 linesearch (this will be dotted with dirn to find phi_prime(0))
238 func_old: the value of func_start at the previous step taken in
239 the optimisation (this will be used to calculate the initial
240 guess for the step length if it is not provided)
241 rigid_units, rotationfactors : see documentation of RumPath, if it is
242 unclear what these parameters are, then leave them at None
243 maxstep: maximum allowed displacement in Angstrom. Default is 0.2.
245 Returns:
246 A tuple: (step, func_val, no_update)
248 step: the final chosen step length, representing the number of
249 multiples of the direction vector to move
250 func_val: the value of func after taking this step, ie phi(step)
251 no_update: true if the linesearch has not performed any updates of
252 phi or alpha, due to errors or immediate convergence
254 Raises:
255 ValueError for problems with arguments
256 RuntimeError for problems encountered during iteration
257 """
259 a1 = self.handle_args(x_start, dirn, a_max, a_min, a1, func_start,
260 func_old, func_prime_start, maxstep)
262 # DEBUG
263 logger.debug("a1(auto) = ", a1)
265 if abs(a1 - 1.0) <= 0.5:
266 a1 = 1.0
268 logger.debug("-----------NEW LINESEARCH STARTED---------")
270 a_final = None
271 phi_a_final = None
272 num_iter = 0
274 ###CO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
275 # create a search-path
276 if rigid_units is None:
277 # standard linear search-path
278 logger.debug("-----using LinearPath-----")
279 path = LinearPath(dirn)
280 else:
281 logger.debug("-----using RumPath------")
282 # if rigid_units != None, but rotation_factors == None, then
283 # raise an error.
284 if rotation_factors == None:
285 raise RuntimeError('RumPath cannot be created since rotation_factors == None')
286 path = RumPath(x_start, dirn, rigid_units, rotation_factors)
287 #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
290 while(True):
292 logger.debug("-----------NEW ITERATION OF LINESEARCH----------")
293 logger.debug("Number of linesearch iterations: %d", num_iter)
294 logger.debug("a1 = %e", a1)
296 ###CO replaced: func_a1 = self.func(x_start + a1 * self.dirn)
297 func_a1 = self.func(x_start + path.step(a1))
298 phi_a1 = func_a1
299 # compute sufficient decrease (Armijo) condition
300 suff_dec = (phi_a1 <= self.func_start+self.c1*a1*self.phi_prime_start)
302 # DEBUG
303 # print("c1*a1*phi_prime_start = ", self.c1*a1*self.phi_prime_start,
304 # " | phi_a1 - phi_0 = ", phi_a1 - self.func_start)
305 logger.info("a1 = %.3f, suff_dec = %r", a1, suff_dec)
306 if a1 < self.a_min:
307 raise RuntimeError('a1 < a_min, giving up')
308 if self.phi_prime_start > 0.0:
309 raise RuntimeError("self.phi_prime_start > 0.0")
311 # check sufficient decrease (Armijo condition)
312 if suff_dec:
313 a_final = a1
314 phi_a_final = phi_a1
315 logger.debug("Linesearch returned a = %e, phi_a = %e",
316 a_final, phi_a_final)
317 logger.debug("-----------LINESEARCH COMPLETE-----------")
318 return a_final, phi_a_final, num_iter==0
320 # we don't have sufficient decrease, so we need to compute a
321 # new trial step-length
322 at = - ((self.phi_prime_start * a1) /
323 (2*((phi_a1 - self.func_start)/a1 - self.phi_prime_start)))
324 logger.debug("quadratic_min: initial at = %e", at)
326 # because a1 does not satisfy Armijo it follows that at must
327 # lie between 0 and a1. In fact, more strongly,
328 # at \leq (2 (1-c1))^{-1} a1, which is a back-tracking condition
329 # therefore, we should now only check that at has not become too small,
330 # in which case it is likely that nonlinearity has played a big role
331 # here, so we take an ultra-conservative backtracking step
332 a1 = max( at, a1 / 10.0 )
333 if a1 > at:
334 logger.debug("at (%e) < a1/10: revert to backtracking a1/10", at)
336 # (end of while(True) line-search loop)
337 # (end of run())
341 def handle_args(self, x_start, dirn, a_max, a_min, a1, func_start, func_old,
342 func_prime_start, maxstep):
344 """Verify passed parameters and set appropriate attributes accordingly.
346 A suitable value for the initial step-length guess will be either
347 verified or calculated, stored in the attribute self.a_start, and
348 returned.
350 Args:
351 The args should be identical to those of self.run().
353 Returns:
354 The suitable initial step-length guess a_start
356 Raises:
357 ValueError for problems with arguments
359 """
361 self.a_max = a_max
362 self.a_min = a_min
363 self.x_start = x_start
364 self.dirn = dirn
365 self.func_old = func_old
366 self.func_start = func_start
367 self.func_prime_start = func_prime_start
369 if a_max is None:
370 a_max = 2.0
372 if a_max < self.tol:
373 logger.warning("a_max too small relative to tol. Reverting to "
374 "default value a_max = 2.0 (twice the <ideal> step).")
375 a_max = 2.0 # THIS ASSUMES NEWTON/BFGS TYPE BEHAVIOUR!
377 if self.a_min is None:
378 self.a_min = 1e-10
380 if func_start is None:
381 logger.debug("Setting func_start")
382 self.func_start = self.func(x_start)
384 self.phi_prime_start = longsum(self.func_prime_start * self.dirn)
385 if self.phi_prime_start >= 0:
386 logger.error("Passed direction which is not downhill. Aborting...")
387 raise ValueError("Direction is not downhill.")
388 elif math.isinf(self.phi_prime_start):
389 logger.error("Passed func_prime_start and dirn which are too big. "
390 "Aborting...")
391 raise ValueError("func_prime_start and dirn are too big.")
393 if a1 is None:
394 if func_old is not None:
395 # Interpolating a quadratic to func and func_old - see NW
396 # equation 3.60
397 a1 = 2*(self.func_start - self.func_old)/self.phi_prime_start
398 logger.debug("Interpolated quadratic, obtained a1 = %e", a1)
399 if a1 is None or a1 > a_max:
400 logger.debug("a1 greater than a_max. Reverting to default value "
401 "a1 = 1.0")
402 a1 = 1.0
403 if a1 is None or a1 < self.tol:
404 logger.debug("a1 is None or a1 < self.tol. Reverting to default value "
405 "a1 = 1.0")
406 a1 = 1.0
407 if a1 is None or a1 < self.a_min:
408 logger.debug("a1 is None or a1 < a_min. Reverting to default value "
409 "a1 = 1.0")
410 a1 = 1.0
412 if maxstep is None:
413 maxstep = 0.2
414 logger.debug("maxstep = %e", maxstep)
416 r = np.reshape(dirn, (-1, 3))
417 steplengths = ((a1*r)**2).sum(1)**0.5
418 maxsteplength = np.max(steplengths)
419 if maxsteplength >= maxstep:
420 a1 *= maxstep / maxsteplength
421 logger.debug("Rescaled a1 to fulfill maxstep criterion")
423 self.a_start = a1
425 logger.debug("phi_start = %e, phi_prime_start = %e", self.func_start,
426 self.phi_prime_start)
427 logger.debug("func_start = %s, self.func_old = %s", self.func_start,
428 self.func_old)
429 logger.debug("a1 = %e, a_max = %e, a_min = %e", a1, a_max, self.a_min)
431 return a1