gofee.py 20.3 KB
Newer Older
1
2
3
""" Definition of GOFEE class.
"""

Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
4
import numpy as np
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
5
6
import pickle
from os.path import isfile
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
7

8
9
10
11
12
13
from surrogate.gpr import GPR
from population import population

from ase import Atoms
from ase.io import read, write, Trajectory
from ase.calculators.singlepoint import SinglePointCalculator
14
from ase.calculators.dftb import Dftb
15

Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
16
from utils import array_to_string
17
from parallel_utils import split, parallel_function_eval
18

Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
19
20
from bfgslinesearch_constrained import relax

21
from candidate_operations.candidate_generation import CandidateGenerator
22
23
from candidate_operations.basic_mutations import RattleMutation

24
25
26
27
28
29
from mpi4py import MPI
world = MPI.COMM_WORLD

import traceback
import sys

Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
30
31
from time import time

32
class GOFEE():
33
34
    """
    GOFEE global structure search method.
35
        
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
    Parameters:

    structures: Atoms-object, list of Atoms-objects or None
        In initial structures from which to start the sesarch.
        If None, the startgenerator must be supplied.
        If less than Ninit structures is supplied, the remaining
        ones are generated using the startgenerator or by rattling
        the supplied structures, depending on wether the
        startgenerator is supplied.

    calc: ASE calculator
        Specifies the energy-expression
        with respect to which the atomic coordinates are
        globally optimized.

    gpr: GPR object
        The Gaussian Process Regression model used as the
        surrogate model for the Potential energy surface.
54
    
55
56
57
58
    startgenerator: Startgenerator object
        Used to generate initial random
        structures. Must be supplied if structures if structues=None.
        (This is the recommended way to initialize the search.)
59

60
    candidate_generator: OperationSelector object
61
        Object used to generate new candidates.
62

63
64
65
    trajectory: str
        Name of trajectory to which all structures,
        evaluated during the search, is saved.
66

Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
67
68
69
70
    logfile: file object or str
        If *logfile* is a string, a file with that name will be opened.
        Use '-' for stdout.

71
    kappa: float
72
73
        How much to weigh predicted uncertainty in the acquisition
        function.
74

75
    max_steps: int
76
        Number of search steps.
77

78
    Ninit: int
79
80
81
82
        Number of initial structures. If len(structures) <
        Ninit, the remaining structures are generated using the
        startgenerator (if supplied) or by rattling the supplied
        'structures'.
83

84
    max_relax_dist: float
85
86
        Max distance (in Angstrom) that an atom is allowed to
        move during surrogate relaxation.
87

88
    Ncandidates: int
89
90
        Number of new cancidate structures generated and
        surrogate-relaxed in each search iteration.
91

92
    population_size: int
93
        Maximum number of structures in the population.
94

95
    dualpoint: boolean
96
        Whether to use dualpoint evaluation or not.
97
98

    min_certainty: float
99
100
101
        Max predicted uncertainty allowed for structures to be
        considdered for evaluation. (in units of the maximum possible
        uncertainty.)
102

103
104
105
106
107
    position_constraint: OperationConstraint object
        Enforces constraints on the positions of the "free" atoms
        in the search. The constraint is enforces both during
        mutation/crossover operations and during surrogate-relaxation.

108
    restart: str
109
        Filename for restart file.
110
    """
111
112
113
114
115
116
    def __init__(self, structures=None,
                 calc=None,
                 gpr=None,
                 startgenerator=None,
                 candidate_generator=None,
                 kappa=2,
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
117
                 max_steps=200,
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
118
                 Ninit=10,
119
                 max_relax_dist=4,
120
                 Ncandidates=30,
121
                 population_size=10,
122
                 dualpoint=True,
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
123
                 min_certainty=0.7,
124
125
126
                 position_constraint=None,
                 trajectory='structures.traj',
                 logfile='search.log',
127
                 restart='restart.pickl'):
128
        
129
130
131
132
        if structures is None:
            assert startgenerator is not None
            self.structures = None
        else:
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
            if isinstance(structures, Atoms):
                self.structures = [structures]
            elif isinstance(structures, list):
                assert isinstance(structures[0], Atoms)
                self.structures = structures
            elif isinstance(structures, str):
                self.structures = read(structures, index=':')
        
        if calc is not None:
            self.calc = calc
        else:
            assert structures is not None
            calc = structures[0].get_calculator()
            assert calc is not None and not isinstance(calc, SinglePointCalculator)
            self.calc = calc

149
        if startgenerator is None:
150
151
            assert structures is not None
            self.startgenerator = None
152
153
        else:
            self.startgenerator = startgenerator
154

155
        # Determine atoms to optimize
156
157
158
159
160
161
162
163
164
        if startgenerator is not None:
            self.n_to_optimize = len(self.startgenerator.stoichiometry)
        else:
            self.n_to_optimize = len(self.structures[0])
            for constraint in self.structures[0].constraints:
                if isinstance(constraint, FixAtoms):
                    indices_fixed = constraint.get_indices()
                    self.n_to_optimize -= len(indices_fixed)
                    break
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
165
        
166
        # Set up candidate-generator if not supplied
167
168
169
        if candidate_generator is not None:
            self.candidate_generator = candidate_generator
        else:
170
171
172
            rattle = RattleMutation(self.n_to_optimize,
                                    Nrattle=3,
                                    rattle_range=4)
173
            self.candidate_generator = CandidateGenerator([1.0],[rattle])
174
175
176
177
178
179

        # Define parallel communication
        self.comm = world.Dup()  # Important to avoid mpi-problems from call to ase.parallel in BFGS
        self.master = self.comm.rank == 0

        self.kappa = kappa
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
180
        self.max_steps = max_steps
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
181
        self.Ninit = Ninit
182
        self.max_relax_dist = max_relax_dist
183
184
185
        self.Ncandidates = Ncandidates
        self.dualpoint = dualpoint
        self.min_certainty = min_certainty
186
        self.position_constraint = position_constraint
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
187
188
        self.restart = restart
        
189
190
191
        # Add position-constraint to candidate-generator
        self.candidate_generator.set_constraints(position_constraint)

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
192
193
194
195
        if isinstance(trajectory, str):
            self.trajectory = Trajectory(filename=trajectory, mode='a', master=self.master)
            if self.restart:
                self.traj_name = trajectory
196

Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
197
198
199
200
201
202
203
204
205
206
        if not self.master:
            logfile = None
        elif isinstance(logfile, str):
            if logfile == "-":
                logfile = sys.stdout
            else:
                logfile = open(logfile, "a")
        self.logfile = logfile
        self.log_msg = ''

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
207
208
        if restart is None or not isfile(restart):
            self.initialize()
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
209
210
211
212
213
214
215
216

            if gpr is not None:
                self.gpr = gpr
            else:
                self.gpr = GPR(template_structure=self.structures[0])
            
            # Initialize population
            self.population = population(population_size=population_size, gpr=self.gpr, similarity2equal=0.9999)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
217
218
219
220
        else:
            self.read()
            self.comm.barrier()

221

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
222
    def initialize(self):
223
        self.get_initial_structures()
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
224
        self.steps = 0
225
226

    def get_initial_structures(self):
227
228
229
230
231
232
233
234
235
        """Method to prepare the initial structures for the search.
        
        The method makes sure that there are atleast self.Ninit
        initial structures.
        These structures are first of all the potentially supplied
        structures. If more structures are required, these are
        generated using self.startgenerator (if supplied), otherwise
        they are generated by heavily rattling the supplied structures.
        """
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
236
        
237
        # Collect potentially supplied structures.
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
238
        if self.structures is not None:
239
            for a in self.structures:
240
                a.info = {'origin': 'PreSupplied'}
241
242
        else:
            self.structures = []
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
243
        
244
        Nremaining = self.Ninit - len(self.structures)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
245
        
246
247
248
249
250
251
252
253
254
255
256
257
        if Nremaining > 0 and self.startgenerator is None:
            # Initialize rattle-mutation for all atoms.
            rattle = RattleMutation(self.n_to_optimize,
                                    Nrattle=self.n_to_optimize,
                                    rattle_range=2)

        # Generation of remaining initial-structures (up to self.Ninit).
        for i in range(Nremaining):
            if self.startgenerator is not None:
                a = self.startgenerator.get_new_candidate()
            else:
                # Perform two times rattle of all atoms.
258
                a0 = self.structures[i % len(self.structures)]
259
260
                a = rattle.get_new_candidate([a])
                a = rattle.get_new_candidate([a])
261
262
263
            self.structures.append(a)
                
    def evaluate_initial_structures(self):
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
264
265
266
        """ Evaluate energies and forces of all initial structures
        (self.structures) that have not yet been evaluated.
        """
267
268
269
270
271
272
273
274
        structures_init = []
        for a in self.structures:
            calc = a.get_calculator()
            if isinstance(calc, SinglePointCalculator):
                if 'energy' in calc.results and 'forces' in calc.results:
                    # Write without evaluating.
                    structures_init.append(a)
                    continue
275
276
            a = self.evaluate(a)
            structures_init.append(a)
277

278
279
        self.gpr.memory.save_data(structures_init)
        self.population.add(structures_init)
280

281
    def run(self):
282
283
        """ Method to run the search.
        """
284
285
        if self.steps == 0:
            self.evaluate_initial_structures()
286

Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
287
        while self.steps < self.max_steps:
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
288
289
            self.log_msg += (f"\n##### STEPS: {self.steps} #####\n\n")
            t0 = time()
290
            self.train_surrogate()
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
291
            t1 = time()
292
            self.update_population()
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
293
            t2 = time()
294
            relaxed_candidates = self.get_surrogate_relaxed_candidates()
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
295
            t3 = time()
296
            kappa = self.kappa
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
            a_add = []
            for _ in range(5):
                try:
                    anew = self.select_with_acquisition(relaxed_candidates, kappa)
                    anew = self.evaluate(anew)
                    a_add.append(anew)
                    if self.dualpoint:
                        adp = self.get_dualpoint(anew)
                        adp = self.evaluate(adp)
                        a_add.append(adp)
                    break
                except Exception as err:
                    kappa /=2
                    if self.master:
                        traceback.print_exc(file=sys.stderr)
            self.gpr.memory.save_data(a_add)
313

Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
314
315
316
317
318
            # log timing
            self.log_msg += "Timing:\n"
            self.log_msg += f"{'Training':12}{'Relax pop.':12}{'Relax candidates':18}{'Evaluate':12}\n"
            self.log_msg += f"{t1-t0:12.2e}\t{t2-t1:12.2e}\t{t3-t2:16.2e}\t{time()-t3:12.2e}\n\n"

319
            # Add structure to population
320
321
            index_lowest = np.argmin([a.get_potential_energy() for a in a_add])
            self.population.add([a_add[index_lowest]])
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
322
323

            # Save search state
324
            self.save_state()
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
325
            
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
326
327
328
329
330
331
332
            self.log_msg += (f"Prediction:\nenergy = {anew.info['key_value_pairs']['Epred']:.5f}eV,  energy_std = {anew.info['key_value_pairs']['Epred_std']:.5f}eV\n")
            self.log_msg += (f"E_true:\n{array_to_string([a.get_potential_energy() for a in a_add], unit='eV')}\n\n")
            #self.log_msg += (f"E_true: {[a.get_potential_energy() for a in a_add]}\n")
            self.log_msg += (f"Energy of population:\n{array_to_string([a.get_potential_energy() for a in self.population.pop], unit='eV')}\n")
            self.log_msg += (f"Max force of ML-relaxed population:\n{array_to_string([(a.get_forces()**2).sum(axis=1).max()**0.5 for a in self.population.pop_MLrelaxed], unit='eV/A')}\n")
            
            self.log()
333
            self.steps += 1
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
            
    def get_dualpoint(self, a, lmax=0.10, Fmax_flat=5):
        """Returns dual-point structure, i.e. the original structure
        perturbed slightly along the forces.
        
        lmax: The atom with the largest force will be displaced by
        this distance
        
        Fmax_flat: maximum atomic displacement. is increased linearely
        with force until Fmax = Fmax_flat, after which it remains
        constant as lmax.
        """
        F = a.get_forces()
        a_dp = a.copy()

        # Calculate and set new positions
        Fmax = np.sqrt((F**2).sum(axis=1).max())
        pos_displace = lmax * F*min(1/Fmax_flat, 1/Fmax)
        pos_dp = a.positions + pos_displace
        a_dp.set_positions(pos_dp)
        return a_dp

    def get_surrogate_relaxed_candidates(self):
357
358
359
360
361
        """ Method supplying a number of surrogate-relaxed new
        candidates. The method combines the generation of new
        candidates with subsequent surrogate relaxation.
        The tasks are parrlelized over all avaliable cores.
        """
362
363
364
365
366
367
368
369
370
371
        Njobs = self.Ncandidates
        task_split = split(Njobs, self.comm.size)
        def func1():
            return [self.generate_candidate() for i in task_split[self.comm.rank]]
        candidates = parallel_function_eval(self.comm, func1)
        def func2():
            return [self.surrogate_relaxation(candidates[i], Fmax=0.1, steps=200, kappa=self.kappa)
                    for i in task_split[self.comm.rank]]
        relaxed_candidates = parallel_function_eval(self.comm, func2)
        relaxed_candidates = self.certainty_filter(relaxed_candidates)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
372
        relaxed_candidates = self.population.pop_MLrelaxed + relaxed_candidates
373
374
        
        return relaxed_candidates
375
376
377
378
379
380
381
382
383
384
385
386
387

    def generate_candidate(self):
        """ Method to generate new candidate.
        """
        parents = self.population.get_structure_pair()
        a_mutated = self.candidate_generator.get_new_candidate(parents)
        return a_mutated

    def surrogate_relaxation(self, a, Fmax=0.1, steps=200, kappa=None):
        """ Method to carry out relaxations of new candidates in the
        surrogate potential.
        """
        calc = self.gpr.get_calculator(kappa)
388
389
390
        a_relaxed = relax(a, calc, Fmax=Fmax, steps_max=steps,
                          max_relax_dist=self.max_relax_dist,
                          position_constraint=self.position_constraint)
391
392
393
394
395
396
397
398
399
        # Evaluate uncertainty
        E, Estd = self.gpr.predict_energy(a_relaxed, eval_std=True)

        # Save prediction in info-dict
        a_relaxed.info['key_value_pairs']['Epred'] = E
        a_relaxed.info['key_value_pairs']['Epred_std'] = Estd
        a_relaxed.info['key_value_pairs']['kappa'] = self.kappa
        
        return a_relaxed
400
401
        
    def certainty_filter(self, structures):
402
403
404
405
        """ Method to filter away the most uncertain surrogate-relaxed
        candidates, which might otherewise get picked for first-principles
        evaluation, based on the very high uncertainty alone.
        """
406
407
408
409
410
411
412
413
414
415
416
417
418
        certainty = np.array([a.info['key_value_pairs']['Epred_std']
                              for a in structures]) / np.sqrt(self.gpr.K0)
        min_certainty = self.min_certainty
        for _ in range(5):
            filt = certainty < min_certainty
            if np.sum(filt.astype(int)) > 0:
                structures = [structures[i] for i in range(len(filt)) if filt[i]]
                break
            else:
                min_certainty = min_certainty + (1-min_certainty)/2
        return structures

    def update_population(self):
419
420
421
        """ Method to update the population with the new pirst-principles
        evaluated structures.
        """
422
423
424
425
426
427
        Njobs = len(self.population.pop)
        task_split = split(Njobs, self.comm.size)
        func = lambda: [self.surrogate_relaxation(self.population.pop[i],
                                                  Fmax=0.001, steps=200, kappa=None)
                        for i in task_split[self.comm.rank]]
        self.population.pop_MLrelaxed = parallel_function_eval(self.comm, func)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
428
429
        Fmax_pop_relaxed = [(a.get_forces()**2).sum(axis=1).max()**0.5
                                  for a in self.population.pop_MLrelaxed]
430
431

    def train_surrogate(self):
432
433
434
435
436
437
        """ Method to train the surrogate model.
        The method only performs hyperparameter optimization every 
        ten training instance, as carrying out the hyperparameter
        optimization is significantly more expensive than the basic
        training.
        """
438
        # Train
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
439
        if self.steps < 50 or (self.steps % 10) == 0:
440
            self.gpr.optimize_hyperparameters(comm=self.comm)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
441
442
            self.log_msg += (f"lml: {self.gpr.lml}\n")
            self.log_msg += (f"kernel optimized:\nTheta = {[f'{x:.2e}' for x in np.exp(self.gpr.kernel.theta)]}\n\n")
443
444
        else:
            self.gpr.train()
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
445
            self.log_msg += (f"kernel fixed:\nTheta = {[f'{x:.2e}' for x in np.exp(self.gpr.kernel.theta)]}\n\n")
446
447

    def select_with_acquisition(self, structures, kappa):
448
449
450
451
        """ Method to select single most "promizing" candidate 
        for first-principles evaluation according to the acquisition
        function min(E-kappa*std(E)).
        """
452
453
454
455
456
457
458
459
460
        Epred = np.array([a.info['key_value_pairs']['Epred']
                          for a in structures])
        Epred_std = np.array([a.info['key_value_pairs']['Epred_std']
                              for a in structures])
        acquisition = Epred - kappa*Epred_std
        index_select = np.argmin(acquisition)
        return structures[index_select]

    def evaluate(self, a):
461
462
463
        """ Method to evaluate the energy and forces of the selacted
        candidate.
        """
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
464
        a = self.comm.bcast(a, root=0)
465
466
467
        a.wrap()

        if isinstance(self.calc, Dftb):
468
            if self.master:
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
                try:
                    a.set_calculator(self.calc)
                    E = a.get_potential_energy()
                    F = a.get_forces()
                    results = {'energy': E, 'forces': F}
                    calc_sp = SinglePointCalculator(a, **results)
                    a.set_calculator(calc_sp)
                    success = True
                except:
                    success = False
            else:
                success = None
            success = self.comm.bcast(success, root=0)
            if success == False:
                raise RuntimeError('DFTB evaluation failed')
484
485
486
487
488
489
490
491
            a = self.comm.bcast(a, root=0)
        else:
            a.set_calculator(self.calc)
            E = a.get_potential_energy()
            F = a.get_forces()
            results = {'energy': E, 'forces': F}
            calc_sp = SinglePointCalculator(a, **results)
            a.set_calculator(calc_sp)
492
493
494
495
496
497

        self.write(a)

        return a

    def write(self, a):
498
499
        """ Method for writing new evaluated structures to file.
        """
500
501
502
        if self.trajectory is not None:
            self.trajectory.write(a)

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
503
    def dump(self, data):
504
505
506
        """ Method to save restart-file used if the search is
        restarted from some point in the search. 
        """
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
507
508
        if self.comm.rank == 0 and self.restart is not None:
            pickle.dump(data, open(self.restart, "wb"), protocol=2)
509

510
511
512
513
514
515
516
517
518
    def save_state(self):
        """ Saves the current state of the search, so the search can
        be continued after having finished or stoped prematurely.
        """
        self.dump((self.steps,
                   self.population,
                   self.gpr.kernel.theta,
                   np.random.get_state()))

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
519
    def read(self):
520
521
522
        """ Method to restart a search from the restart-file and the
        trajectory-file containing all structures evaluated so far.
        """
523
        self.steps, self.population, theta, random_state = pickle.load(open(self.restart, "rb"))
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
524
525
        np.random.set_state(random_state)
        training_structures = read(self.traj_name, index=':')
526
527
528

        # Salvage GPR model
        self.gpr = GPR(template_structure=training_structures[0])
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
529
        self.gpr.memory.save_data(training_structures)
530
        self.gpr.kernel.theta = theta
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
531
532
533
534
535
536
537
538
539
540
541
542
543

    def log(self):
        if self.logfile is not None:
            if self.steps == 0:
                msg = "GOFEE"
                self.logfile.write(msg)

            self.logfile.write(self.log_msg)
            self.logfile.flush()
        self.log_msg = ''