Coverage for /builds/debichem-team/python-ase/ase/ga/population.py: 26.38%

1""" Implementation of a population for maintaining a GA population and

2proposing structures to pair. """

3from math import exp, sqrt, tanh

4from operator import itemgetter

6import numpy as np

8from ase.db.core import now

9from ase.ga import get_raw_score

12def count_looks_like(a, all_cand, comp):

13 """Utility method for counting occurrences."""

14 n = 0

15 for b in all_cand:

16 if a.info['confid'] == b.info['confid']:

17 continue

18 if comp.looks_like(a, b):

19 n += 1

20 return n

23class Population:

24 """Population class which maintains the current population

25 and proposes which candidates to pair together.

27 Parameters:

29 data_connection: DataConnection object

30 Bla bla bla.

32 population_size: int

33 The number of candidates in the population.

35 comparator: Comparator object

36 this will tell if two configurations are equal.

37 Default compare atoms objects directly.

39 logfile: str

40 Text file that contains information about the population

41 The format is::

43 timestamp: generation(if available): id1,id2,id3...

45 Using this file greatly speeds up convergence checks.

46 Default None meaning that no file is written.

48 use_extinct: boolean

49 Set this to True if mass extinction and the extinct key

50 are going to be used. Default is False.

52 rng: Random number generator

53 By default numpy.random.

54 """

56 def __init__(self, data_connection, population_size,

57 comparator=None, logfile=None, use_extinct=False,

58 rng=np.random):

59 self.dc = data_connection

60 self.pop_size = population_size

61 if comparator is None:

62 from ase.ga.standard_comparators import AtomsComparator

63 comparator = AtomsComparator()

64 self.comparator = comparator

65 self.logfile = logfile

66 self.use_extinct = use_extinct

67 self.rng = rng

68 self.pop = []

69 self.pairs = None

70 self.all_cand = None

71 self.__initialize_pop__()

73 def __initialize_pop__(self):

74 """ Private method that initializes the population when

75 the population is created. """

77 # Get all relaxed candidates from the database

78 ue = self.use_extinct

79 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

80 all_cand.sort(key=lambda x: x.info['key_value_pairs']['raw_score'],

81 reverse=True)

82 # all_cand.sort(key=lambda x: x.get_potential_energy())

84 # Fill up the population with the self.pop_size most stable

85 # unique candidates.

86 i = 0

87 while i < len(all_cand) and len(self.pop) < self.pop_size:

88 c = all_cand[i]

89 i += 1

90 eq = False

91 for a in self.pop:

92 if self.comparator.looks_like(a, c):

93 eq = True

94 break

95 if not eq:

96 self.pop.append(c)

98 for a in self.pop:

99 a.info['looks_like'] = count_looks_like(a, all_cand,

100 self.comparator)

101

102 self.all_cand = all_cand

103 self.__calc_participation__()

104

105 def __calc_participation__(self):

106 """ Determines, from the database, how many times each

107 candidate has been used to generate new candidates. """

108 (participation, pairs) = self.dc.get_participation_in_pairing()

109 for a in self.pop:

110 if a.info['confid'] in participation.keys():

111 a.info['n_paired'] = participation[a.info['confid']]

112 else:

113 a.info['n_paired'] = 0

114 self.pairs = pairs

115

116 def update(self, new_cand=None):

117 """ New candidates can be added to the database

118 after the population object has been created.

119 This method extracts these new candidates from the

120 database and includes them in the population. """

121

122 if len(self.pop) == 0:

123 self.__initialize_pop__()

124

125 if new_cand is None:

126 ue = self.use_extinct

127 new_cand = self.dc.get_all_relaxed_candidates(only_new=True,

128 use_extinct=ue)

129

130 for a in new_cand:

131 self.__add_candidate__(a)

132 self.all_cand.append(a)

133 self.__calc_participation__()

134 self._write_log()

135

136 def get_current_population(self):

137 """ Returns a copy of the current population. """

138 self.update()

139 return [a.copy() for a in self.pop]

140

141 def get_population_after_generation(self, gen):

142 """ Returns a copy of the population as it where

143 after generation gen"""

144 if self.logfile is not None:

145 with open(self.logfile) as fd:

146 gens = {}

147 for line in fd:

148 _, no, popul = line.split(':')

149 gens[int(no)] = [int(i) for i in popul.split(',')]

150 return [c.copy() for c in self.all_cand[::-1]

151 if c.info['relax_id'] in gens[gen]]

152

153 all_candidates = [c for c in self.all_cand

154 if c.info['key_value_pairs']['generation'] <= gen]

155 cands = [all_candidates[0]]

156 for b in all_candidates:

157 if b not in cands:

158 for a in cands:

159 if self.comparator.looks_like(a, b):

160 break

161 else:

162 cands.append(b)

163 pop = cands[:self.pop_size]

164 return [a.copy() for a in pop]

165

166 def __add_candidate__(self, a):

167 """ Adds a single candidate to the population. """

168

169 # check if the structure is too low in raw score

170 raw_score_a = get_raw_score(a)

171 raw_score_worst = get_raw_score(self.pop[-1])

172 if raw_score_a < raw_score_worst \

173 and len(self.pop) == self.pop_size:

174 return

175

176 # check if the new candidate should

177 # replace a similar structure in the population

178 for (i, b) in enumerate(self.pop):

179 if self.comparator.looks_like(a, b):

180 if get_raw_score(b) < raw_score_a:

181 del self.pop[i]

182 a.info['looks_like'] = count_looks_like(a,

183 self.all_cand,

184 self.comparator)

185 self.pop.append(a)

186 self.pop.sort(key=get_raw_score,

187 reverse=True)

188 return

189

190 # the new candidate needs to be added, so remove the highest

191 # energy one

192 if len(self.pop) == self.pop_size:

193 del self.pop[-1]

194

195 # add the new candidate

196 a.info['looks_like'] = count_looks_like(a,

197 self.all_cand,

198 self.comparator)

199 self.pop.append(a)

200 self.pop.sort(key=get_raw_score, reverse=True)

201

202 def __get_fitness__(self, indecies, with_history=True):

203 """Calculates the fitness using the formula from

204 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

205

206 Sign change on the fitness compared to the formulation in the

207 abovementioned paper due to maximizing raw_score instead of

208 minimizing energy. (Set raw_score=-energy to optimize the energy)

209 """

210

211 scores = [get_raw_score(x) for x in self.pop]

212 min_s = min(scores)

213 max_s = max(scores)

214 T = min_s - max_s

215 if isinstance(indecies, int):

216 indecies = [indecies]

217

218 f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.))

219 for i in indecies]

220 if with_history:

221 M = [float(self.pop[i].info['n_paired']) for i in indecies]

222 L = [float(self.pop[i].info['looks_like']) for i in indecies]

223 f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i])

224 for i in range(len(f))]

225 return f

226

227 def get_two_candidates(self, with_history=True):

228 """ Returns two candidates for pairing employing the

229 fitness criteria from

230 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

231 and the roulete wheel selection scheme described in

232 R.L. Johnston Dalton Transactions,

233 Vol. 22, No. 22. (2003), pp. 4193-4207

234 """

235

236 if len(self.pop) < 2:

237 self.update()

238

239 if len(self.pop) < 2:

240 return None

241

242 fit = self.__get_fitness__(range(len(self.pop)), with_history)

243 fmax = max(fit)

244 c1 = self.pop[0]

245 c2 = self.pop[0]

246 used_before = False

247 while c1.info['confid'] == c2.info['confid'] and not used_before:

248 nnf = True

249 while nnf:

250 t = self.rng.randint(len(self.pop))

251 if fit[t] > self.rng.random() * fmax:

252 c1 = self.pop[t]

253 nnf = False

254 nnf = True

255 while nnf:

256 t = self.rng.randint(len(self.pop))

257 if fit[t] > self.rng.random() * fmax:

258 c2 = self.pop[t]

259 nnf = False

260

261 c1id = c1.info['confid']

262 c2id = c2.info['confid']

263 used_before = (min([c1id, c2id]), max([c1id, c2id])) in self.pairs

264 return (c1.copy(), c2.copy())

265

266 def get_one_candidate(self, with_history=True):

267 """Returns one candidate for mutation employing the

268 fitness criteria from

269 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

270 and the roulete wheel selection scheme described in

271 R.L. Johnston Dalton Transactions,

272 Vol. 22, No. 22. (2003), pp. 4193-4207

273 """

274 if len(self.pop) < 1:

275 self.update()

276

277 if len(self.pop) < 1:

278 return None

279

280 fit = self.__get_fitness__(range(len(self.pop)), with_history)

281 fmax = max(fit)

282 nnf = True

283 while nnf:

284 t = self.rng.randint(len(self.pop))

285 if fit[t] > self.rng.random() * fmax:

286 c1 = self.pop[t]

287 nnf = False

288

289 return c1.copy()

290

291 def _write_log(self):

292 """Writes the population to a logfile.

293

294 The format is::

295

296 timestamp: generation(if available): id1,id2,id3..."""

297 if self.logfile is not None:

298 ids = [str(a.info['relax_id']) for a in self.pop]

299 if ids != []:

300 try:

301 gen_nums = [c.info['key_value_pairs']['generation']

302 for c in self.all_cand]

303 max_gen = max(gen_nums)

304 except KeyError:

305 max_gen = ' '

306 with open(self.logfile, 'a') as fd:

307 fd.write('{time}: {gen}: {pop}\n'.format(time=now(),

308 pop=','.join(ids),

309 gen=max_gen))

310

311 def is_uniform(self, func, min_std, pop=None):

312 """Tests whether the current population is uniform or diverse.

313 Returns True if uniform, False otherwise.

314

315 Parameters:

316

317 func: function

318 that takes one argument an atoms object and returns a value that

319 will be used for testing against the rest of the population.

320

321 min_std: int or float

322 The minimum standard deviation, if the population has a lower

323 std dev it is uniform.

324

325 pop: list, optional

326 use this list of Atoms objects instead of the current population.

327 """

328 if pop is None:

329 pop = self.pop

330 vals = [func(a) for a in pop]

331 stddev = np.std(vals)

332 if stddev < min_std:

333 return True

334 return False

335

336 def mass_extinction(self, ids):

337 """Kills every candidate in the database with gaid in the

338 supplied list of ids. Typically used on the main part of the current

339 population if the diversity is to small.

340

341 Parameters:

342

343 ids: list

344 list of ids of candidates to be killed.

345

346 """

347 for confid in ids:

348 self.dc.kill_candidate(confid)

349 self.pop = []

350

351

352class RandomPopulation(Population):

353 def __init__(self, data_connection, population_size,

354 comparator=None, logfile=None, exclude_used_pairs=False,

355 bad_candidates=0, use_extinct=False):

356 self.exclude_used_pairs = exclude_used_pairs

357 self.bad_candidates = bad_candidates

358 Population.__init__(self, data_connection, population_size,

359 comparator, logfile, use_extinct)

360

361 def __initialize_pop__(self):

362 """ Private method that initializes the population when

363 the population is created. """

364

365 # Get all relaxed candidates from the database

366 ue = self.use_extinct

367 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

368 all_cand.sort(key=get_raw_score, reverse=True)

369 # all_cand.sort(key=lambda x: x.get_potential_energy())

370

371 if len(all_cand) > 0:

372 # Fill up the population with the self.pop_size most stable

373 # unique candidates.

374 ratings = []

375 best_raw = get_raw_score(all_cand[0])

376 i = 0

377 while i < len(all_cand):

378 c = all_cand[i]

379 i += 1

380 eq = False

381 for a in self.pop:

382 if self.comparator.looks_like(a, c):

383 eq = True

384 break

385 if not eq:

386 if len(self.pop) < self.pop_size - self.bad_candidates:

387 self.pop.append(c)

388 else:

389 exp_fact = exp(get_raw_score(c) / best_raw)

390 ratings.append([c, (exp_fact - 1) * self.rng.random()])

391 ratings.sort(key=itemgetter(1), reverse=True)

392

393 for i in range(self.bad_candidates):

394 self.pop.append(ratings[i][0])

395

396 for a in self.pop:

397 a.info['looks_like'] = count_looks_like(a, all_cand,

398 self.comparator)

399

400 self.all_cand = all_cand

401 self.__calc_participation__()

402

403 def update(self):

404 """ The update method in Population will add to the end of

405 the population, that can't be used here since we might have

406 bad candidates that need to stay in the population, therefore

407 just recalc the population every time. """

408

409 self.pop = []

410 self.__initialize_pop__()

411

412 self._write_log()

413

414 def get_one_candidate(self):

415 """Returns one candidates at random."""

416 if len(self.pop) < 1:

417 self.update()

418

419 if len(self.pop) < 1:

420 return None

421

422 t = self.rng.randint(len(self.pop))

423 c = self.pop[t]

424

425 return c.copy()

426

427 def get_two_candidates(self):

428 """Returns two candidates at random."""

429 if len(self.pop) < 2:

430 self.update()

431

432 if len(self.pop) < 2:

433 return None

434

435 c1 = self.pop[0]

436 c2 = self.pop[0]

437 used_before = False

438 while c1.info['confid'] == c2.info['confid'] and not used_before:

439 t = self.rng.randint(len(self.pop))

440 c1 = self.pop[t]

441 t = self.rng.randint(len(self.pop))

442 c2 = self.pop[t]

443

444 c1id = c1.info['confid']

445 c2id = c2.info['confid']

446 used_before = (tuple(sorted([c1id, c2id])) in self.pairs and

447 self.exclude_used_pairs)

448 return (c1.copy(), c2.copy())

449

450

451class FitnessSharingPopulation(Population):

452 """ Fitness sharing population that penalizes structures if they are

453 too similar. This is determined by a distance measure

454

455 Parameters:

456

457 comp_key: string

458 Key where the distance measure can be found in the

459 atoms.info['key_value_pairs'] dictionary.

460

461 threshold: float or int

462 Value above which no penalization of the fitness takes place

463

464 alpha_sh: float or int

465 Determines the shape of the sharing function.

466 Default is 1, which gives a linear sharing function.

467

468 """

469

470 def __init__(self, data_connection, population_size,

471 comp_key, threshold, alpha_sh=1.,

472 comparator=None, logfile=None, use_extinct=False):

473 self.comp_key = comp_key

474 self.dt = threshold # dissimilarity threshold

475 self.alpha_sh = alpha_sh

476 self.fit_scaling = 1.

477

478 self.sh_cache = {}

479

480 Population.__init__(self, data_connection, population_size,

481 comparator, logfile, use_extinct)

482

483 def __get_fitness__(self, candidates):

484 """Input should be sorted according to raw_score."""

485 max_s = get_raw_score(candidates[0])

486 min_s = get_raw_score(candidates[-1])

487 T = min_s - max_s

488

489 shared_fit = []

490 for c in candidates:

491 sc = get_raw_score(c)

492 obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.))

493 m = 1.

494 ck = c.info['key_value_pairs'][self.comp_key]

495 for other in candidates:

496 if other != c:

497 name = tuple(sorted([c.info['confid'],

498 other.info['confid']]))

499 if name not in self.sh_cache:

500 ok = other.info['key_value_pairs'][self.comp_key]

501 d = abs(ck - ok)

502 if d < self.dt:

503 v = 1 - (d / self.dt)**self.alpha_sh

504 self.sh_cache[name] = v

505 else:

506 self.sh_cache[name] = 0

507 m += self.sh_cache[name]

508

509 shf = (obj_fit ** self.fit_scaling) / m

510 shared_fit.append(shf)

511 return shared_fit

512

513 def update(self):

514 """ The update method in Population will add to the end of

515 the population, that can't be used here since the shared fitness

516 will change for all candidates when new are added, therefore

517 just recalc the population every time. """

518

519 self.pop = []

520 self.__initialize_pop__()

521

522 self._write_log()

523

524 def __initialize_pop__(self):

525 # Get all relaxed candidates from the database

526 ue = self.use_extinct

527 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

528 all_cand.sort(key=get_raw_score, reverse=True)

529

530 if len(all_cand) > 0:

531 shared_fit = self.__get_fitness__(all_cand)

532 all_sorted = list(zip(*sorted(zip(shared_fit, all_cand),

533 reverse=True)))[1]

534

535 # Fill up the population with the self.pop_size most stable

536 # unique candidates.

537 i = 0

538 while i < len(all_sorted) and len(self.pop) < self.pop_size:

539 c = all_sorted[i]

540 i += 1

541 eq = False

542 for a in self.pop:

543 if self.comparator.looks_like(a, c):

544 eq = True

545 break

546 if not eq:

547 self.pop.append(c)

548

549 for a in self.pop:

550 a.info['looks_like'] = count_looks_like(a, all_cand,

551 self.comparator)

552 self.all_cand = all_cand

553

554 def get_two_candidates(self):

555 """ Returns two candidates for pairing employing the

556 fitness criteria from

557 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

558 and the roulete wheel selection scheme described in

559 R.L. Johnston Dalton Transactions,

560 Vol. 22, No. 22. (2003), pp. 4193-4207

561 """

562

563 if len(self.pop) < 2:

564 self.update()

565

566 if len(self.pop) < 2:

567 return None

568

569 fit = self.__get_fitness__(self.pop)

570 fmax = max(fit)

571 c1 = self.pop[0]

572 c2 = self.pop[0]

573 while c1.info['confid'] == c2.info['confid']:

574 nnf = True

575 while nnf:

576 t = self.rng.randint(len(self.pop))

577 if fit[t] > self.rng.random() * fmax:

578 c1 = self.pop[t]

579 nnf = False

580 nnf = True

581 while nnf:

582 t = self.rng.randint(len(self.pop))

583 if fit[t] > self.rng.random() * fmax:

584 c2 = self.pop[t]

585 nnf = False

586

587 return (c1.copy(), c2.copy())

588

589

590class RankFitnessPopulation(Population):

591 """ Ranks the fitness relative to set variable to flatten the surface

592 in a certain direction such that mating across variable is equally

593 likely irrespective of raw_score.

594

595 Parameters:

596

597 variable_function: function

598 A function that takes as input an Atoms object and returns

599 the variable that differentiates the ranks.

600

601 exp_function: boolean

602 If True use an exponential function for ranking the fitness.

603 If False use the same as in Population. Default True.

604

605 exp_prefactor: float

606 The prefactor used in the exponential fitness scaling function.

607 Default 0.5

608 """

609

610 def __init__(self, data_connection, population_size, variable_function,

611 comparator=None, logfile=None, use_extinct=False,

612 exp_function=True, exp_prefactor=0.5):

613 self.exp_function = exp_function

614 self.exp_prefactor = exp_prefactor

615 self.vf = variable_function

616 # The current fitness is set at each update of the population

617 self.current_fitness = None

618

619 Population.__init__(self, data_connection, population_size,

620 comparator, logfile, use_extinct)

621

622 def get_rank(self, rcand, key=None):

623 # Set the initial order of the candidates, will need to

624 # be returned in this order at the end of ranking.

625 ordered = list(zip(range(len(rcand)), rcand))

626

627 # Niche and rank candidates.

628 rec_nic = []

629 rank_fit = []

630 for o, c in ordered:

631 if o not in rec_nic:

632 ntr = []

633 ce1 = self.vf(c)

634 rec_nic.append(o)

635 ntr.append([o, c])

636 for oother, cother in ordered:

637 if oother not in rec_nic:

638 ce2 = self.vf(cother)

639 if ce1 == ce2:

640 # put the now processed in oother

641 # in rec_nic as well

642 rec_nic.append(oother)

643 ntr.append([oother, cother])

644 # Each niche is sorted according to raw_score and

645 # assigned a fitness according to the ranking of

646 # the candidates

647 ntr.sort(key=lambda x: x[1].info['key_value_pairs'][key],

648 reverse=True)

649 start_rank = -1

650 for cor, (on, cn) in enumerate(ntr):

651 rank = start_rank - cor

652 rank_fit.append([on, cn, rank])

653 # The original order is reformed

654 rank_fit.sort(key=itemgetter(0), reverse=False)

655 return np.array(list(zip(*rank_fit))[2])

656

657 def __get_fitness__(self, candidates):

658 expf = self.exp_function

659 rfit = self.get_rank(candidates, key='raw_score')

660

661 if not expf:

662 rmax = max(rfit)

663 rmin = min(rfit)

664 T = rmin - rmax

665 # If using obj_rank probability, must have non-zero T val.

666 # pop_size must be greater than number of permutations.

667 # We test for this here

668 msg = "Equal fitness for best and worst candidate in the "

669 msg += "population! Fitness scaling is impossible! "

670 msg += "Try with a larger population."

671 assert T != 0., msg

672 return 0.5 * (1. - np.tanh(2. * (rfit - rmax) / T - 1.))

673 else:

674 return self.exp_prefactor ** (-rfit - 1)

675

676 def update(self):

677 """ The update method in Population will add to the end of

678 the population, that can't be used here since the fitness

679 will potentially change for all candidates when new are added,

680 therefore just recalc the population every time. """

681

682 self.pop = []

683 self.__initialize_pop__()

684 self.current_fitness = self.__get_fitness__(self.pop)

685

686 self._write_log()

687

688 def __initialize_pop__(self):

689 # Get all relaxed candidates from the database

690 ue = self.use_extinct

691 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

692 all_cand.sort(key=get_raw_score, reverse=True)

693

694 if len(all_cand) > 0:

695 fitf = self.__get_fitness__(all_cand)

696 all_sorted = list(zip(fitf, all_cand))

697 all_sorted.sort(key=itemgetter(0), reverse=True)

698 sort_cand = []

699 for _, t2 in all_sorted:

700 sort_cand.append(t2)

701 all_sorted = sort_cand

702

703 # Fill up the population with the self.pop_size most stable

704 # unique candidates.

705 i = 0

706 while i < len(all_sorted) and len(self.pop) < self.pop_size:

707 c = all_sorted[i]

708 c_vf = self.vf(c)

709 i += 1

710 eq = False

711 for a in self.pop:

712 a_vf = self.vf(a)

713 # Only run comparator if the variable_function (self.vf)

714 # returns the same. If it returns something different the

715 # candidates are inherently different.

716 # This is done to speed up.

717 if a_vf == c_vf:

718 if self.comparator.looks_like(a, c):

719 eq = True

720 break

721 if not eq:

722 self.pop.append(c)

723 self.all_cand = all_cand

724

725 def get_two_candidates(self):

726 """ Returns two candidates for pairing employing the

727 roulete wheel selection scheme described in

728 R.L. Johnston Dalton Transactions,

729 Vol. 22, No. 22. (2003), pp. 4193-4207

730 """

731

732 if len(self.pop) < 2:

733 self.update()

734

735 if len(self.pop) < 2:

736 return None

737

738 # Use saved fitness

739 fit = self.current_fitness

740 fmax = max(fit)

741 c1 = self.pop[0]

742 c2 = self.pop[0]

743 while c1.info['confid'] == c2.info['confid']:

744 nnf = True

745 while nnf:

746 t = self.rng.randint(len(self.pop))

747 if fit[t] > self.rng.random() * fmax:

748 c1 = self.pop[t]

749 nnf = False

750 nnf = True

751 while nnf:

752 t = self.rng.randint(len(self.pop))

753 if fit[t] > self.rng.random() * fmax:

754 c2 = self.pop[t]

755 nnf = False

756

757 return (c1.copy(), c2.copy())

758

759

760class MultiObjectivePopulation(RankFitnessPopulation):

761 """ Allows for assignment of fitness based on a set of two variables

762 such that fitness is ranked according to a Pareto-front of

763 non-dominated candidates.

764

765 Parameters

766 ----------

767 abs_data: list

768 Set of key_value_pairs in atoms object for which fitness should

769 be assigned based on absolute value.

770

771 rank_data: list

772 Set of key_value_pairs in atoms object for which data should

773 be ranked in order to ascribe fitness.

774

775 variable_function: function

776 A function that takes as input an Atoms object and returns

777 the variable that differentiates the ranks. Only use if

778 data is ranked.

779

780 exp_function: boolean

781 If True use an exponential function for ranking the fitness.

782 If False use the same as in Population. Default True.

783

784 exp_prefactor: float

785 The prefactor used in the exponential fitness scaling function.

786 Default 0.5

787

788 """

789

790 def __init__(self, data_connection, population_size,

791 variable_function=None, comparator=None, logfile=None,

792 use_extinct=False, abs_data=None, rank_data=None,

793 exp_function=True, exp_prefactor=0.5):

794 # The current fitness is set at each update of the population

795 self.current_fitness = None

796

797 if rank_data is None:

798 rank_data = []

799 self.rank_data = rank_data

800

801 if abs_data is None:

802 abs_data = []

803 self.abs_data = abs_data

804

805 RankFitnessPopulation.__init__(self, data_connection, population_size,

806 variable_function, comparator, logfile,

807 use_extinct, exp_function,

808 exp_prefactor)

809

810 def get_nonrank(self, nrcand, key=None):

811 """"Returns a list of fitness values."""

812 nrc_list = []

813 for nrc in nrcand:

814 nrc_list.append(nrc.info['key_value_pairs'][key])

815 return nrc_list

816

817 def __get_fitness__(self, candidates):

818 # There are no defaults set for the datasets to be

819 # used in this function, as such we test that the

820 # user has specified at least two here.

821 msg = "This is a multi-objective fitness function"

822 msg += " so there must be at least two datasets"

823 msg += " stated in the rank_data and abs_data variables"

824 assert len(self.rank_data) + len(self.abs_data) >= 2, msg

825

826 expf = self.exp_function

827

828 all_fitnesses = []

829 used = set()

830 for rd in self.rank_data:

831 used.add(rd)

832 # Build ranked fitness based on rd

833 all_fitnesses.append(self.get_rank(candidates, key=rd))

834

835 for d in self.abs_data:

836 if d not in used:

837 used.add(d)

838 # Build fitness based on d

839 all_fitnesses.append(self.get_nonrank(candidates, key=d))

840

841 # Set the initial order of the ranks, will need to

842 # be returned in this order at the end.

843 fordered = list(zip(range(len(all_fitnesses[0])), *all_fitnesses))

844 mvf_rank = -1 # Start multi variable rank at -1.

845 rec_vrc = [] # A record of already ranked candidates.

846 mvf_list = [] # A list for all candidate ranks.

847 # Sort by raw_score_1 in case this is different from

848 # the stored raw_score() variable that all_cands are

849 # sorted by.

850 fordered.sort(key=itemgetter(1), reverse=True)

851 # Niche candidates with equal or better raw_score to

852 # current candidate.

853 for a in fordered:

854 order, rest = a[0], a[1:]

855 if order not in rec_vrc:

856 pff = []

857 pff2 = []

858 rec_vrc.append(order)

859 pff.append((order, rest))

860 for b in fordered:

861 border, brest = b[0], b[1:]

862 if border not in rec_vrc:

863 if np.any(np.array(brest) >= np.array(rest)):

864 pff.append((border, brest))

865 # Remove any candidate from pff list that is dominated

866 # by another in the list.

867 for na in pff:

868 norder, nrest = na[0], na[1:]

869 dom = False

870 for nb in pff:

871 nborder, nbrest = nb[0], nb[1:]

872 if norder != nborder:

873 if np.all(np.array(nbrest) > np.array(nrest)):

874 dom = True

875 if not dom:

876 pff2.append((norder, nrest))

877 # Assign pareto rank from -1 to -N niches.

878 for ffa in pff2:

879 fforder, ffrest = ffa[0], ffa[1:]

880 rec_vrc.append(fforder)

881 mvf_list.append((fforder, mvf_rank, ffrest))

882 mvf_rank = mvf_rank - 1

883 # The original order is reformed

884 mvf_list.sort(key=itemgetter(0), reverse=False)

885 rfro = np.array(list(zip(*mvf_list))[1])

886

887 if not expf:

888 rmax = max(rfro)

889 rmin = min(rfro)

890 T = rmin - rmax

891 # If using obj_rank probability, must have non-zero T val.

892 # pop_size must be greater than number of permutations.

893 # We test for this here

894 msg = "Equal fitness for best and worst candidate in the "

895 msg += "population! Fitness scaling is impossible! "

896 msg += "Try with a larger population."

897 assert T != 0., msg

898 return 0.5 * (1. - np.tanh(2. * (rfro - rmax) / T - 1.))

899 else:

900 return self.exp_prefactor ** (-rfro - 1)

901

902 def __initialize_pop__(self):

903 # Get all relaxed candidates from the database

904 ue = self.use_extinct

905 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

906 all_cand.sort(key=get_raw_score, reverse=True)

907

908 if len(all_cand) > 0:

909 fitf = self.__get_fitness__(all_cand)

910 all_sorted = list(zip(fitf, all_cand))

911 all_sorted.sort(key=itemgetter(0), reverse=True)

912 sort_cand = []

913 for _, t2 in all_sorted:

914 sort_cand.append(t2)

915 all_sorted = sort_cand

916

917 # Fill up the population with the self.pop_size most stable

918 # unique candidates.

919 i = 0

920 while i < len(all_sorted) and len(self.pop) < self.pop_size:

921 c = all_sorted[i]

922 # Use variable_function to decide whether to run comparator

923 # if the function has been defined by the user. This does not

924 # need to be dependent on using the rank_data function.

925 if self.vf is not None:

926 c_vf = self.vf(c)

927 i += 1

928 eq = False

929 for a in self.pop:

930 if self.vf is not None:

931 a_vf = self.vf(a)

932 # Only run comparator if the variable_function

933 # (self.vf) returns the same. If it returns something

934 # different the candidates are inherently different.

935 # This is done to speed up.

936 if a_vf == c_vf:

937 if self.comparator.looks_like(a, c):

938 eq = True

939 break

940 else:

941 if self.comparator.looks_like(a, c):

942 eq = True

943 break

944 if not eq:

945 self.pop.append(c)

946 self.all_cand = all_cand