gpr.py 10.5 KB
Newer Older
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
1
import numpy as np
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
2
3
from scipy.linalg import cholesky, cho_solve, solve_triangular
from scipy.optimize import minimize
4
from scipy.optimize import fmin_l_bfgs_b
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
5

6
7
8
9
from surrogate.kernel import gauss_kernel, double_gauss_kernel
from surrogate.descriptor.fingerprint import Fingerprint
from surrogate.prior.prior import repulsive_prior
from surrogate.gpr_calculator import gpr_calculator
10

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
11
import warnings
12
13

class gpr_memory():
14
15
16
    """ Class for saving "expensive to calculate" data for
    the Gaussian Process Regression model.
    """
17
18
19
20
    def __init__(self, descriptor, prior):
        self.descriptor = descriptor
        self.prior = prior

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
21
        self.initialize_data()
22

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
    def initialize_data(self):
        self.energies = None
        self.features = None
        self.prior_values = None
        
    def get_data(self):
        return self.energies, self.features, self.prior_values
        
    def save_data(self, atoms_list, add_data=True):
        if not add_data:
            self.initialize_data()
        
        self.save_energies(atoms_list)
        self.save_features(atoms_list)
        self.save_prior_values(atoms_list)
38

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
39
40
41
    def save_energies(self, atoms_list):
        energies_save = np.array([a.get_potential_energy() for a in atoms_list])
        if self.energies is None:
42
43
44
45
            self.energies = energies_save
        else:
            self.energies = np.r_[self.energies, energies_save]
    
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
46
47
48
    def save_features(self, atoms_list):
        features_save = self.descriptor.get_featureMat(atoms_list)
        if self.features is None:
49
            self.features = features_save
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
50
        else:
51
52
            self.features = np.r_[self.features, features_save]

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
53
54
55
56
57
58
59
60
61
62
    def save_prior_values(self, atoms_list):
        if self.prior is not None:
            prior_values_save = np.array([self.prior.energy(a) for a in atoms_list])
            if self.prior_values is None:
                self.prior_values = prior_values_save
            else:
                self.prior_values = np.r_[self.prior_values, prior_values_save]
        else:
            self.prior_values = 0

63
class GPR():
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
64
65
66
67
68
69
70
71
72
73
    """Gaussian Process Regression
    
    Parameters:
    
    descriptor:
    
    kernel:
    
    prior:
    """
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
74
    def __init__(self, descriptor=None, kernel='double', prior=None, n_restarts_optimizer=1, template_structure=None):
75
76
77
78
        if descriptor is None:
            self.descriptor = Fingerprint()
        else:
            self.descriptor = descriptor
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
79
80
81
82
83
84
        Nsplit_eta = None
        if template_structure is not None:
            self.descriptor.initialize_from_atoms(template_structure)
            if hasattr(self.descriptor, 'use_angular'):
                if self.descriptor.use_angular:
                    Nsplit_eta = self.descriptor.Nelements_2body
85
86

        if kernel is 'single':
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
87
            self.kernel = gauss_kernel(Nsplit_eta=Nsplit_eta)
88
        elif kernel is 'double':
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
89
            self.kernel = double_gauss_kernel(Nsplit_eta=Nsplit_eta)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
90
91
        else:
            self.kernel = kernel
92
93
94
95
96
97

        if prior is None:
            self.prior = repulsive_prior()
        else:
            self.prior = prior

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
98
99
        self.n_restarts_optimizer = n_restarts_optimizer

100
        self.memory = gpr_memory(self.descriptor, self.prior)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
101

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
102
    def predict_energy(self, a, eval_std=False):
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
103
104
105
        x = self.descriptor.get_feature(a)
        k = self.kernel.kernel_vector(x, self.X)

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
106
107
108
109
110
        E = np.dot(k,self.alpha) + self.bias + self.prior.energy(a)

        if eval_std:
            # Lines 5 and 6 in GPML
            vk = np.dot(self.K_inv, k)
111
            E_std = np.sqrt(self.K0 - np.dot(k, vk))
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
            return E, E_std
        else:
            return E

    def predict_forces(self, a, eval_std=False):

        # Calculate descriptor and its gradient
        x = self.descriptor.get_feature(a)
        x_ddr = self.descriptor.get_featureGradient(a).T

        # Calculate kernel and its derivative
        k_ddx = self.kernel.kernel_jacobian(x, self.X)
        k_ddr = np.dot(k_ddx, x_ddr)

        F = -np.dot(k_ddr.T, self.alpha) + self.prior.forces(a)

        if eval_std:
            k = self.kernel.kernel_vector(x, self.X)
            vk = np.dot(self.K_inv, k)
131
            g = self.K0 - np.dot(k.T, vk)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
132
            assert g >= 0
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
133
134
            F_std = 1/np.sqrt(g) * np.dot(k_ddr.T, vk)
            return F.reshape((-1,3)), F_std.reshape(-1,3)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
135
        else:
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
136
            return F.reshape(-1,3)
137

138
    def update_bias(self):
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
139
140
        self.bias = np.mean(self.memory.energies - self.memory.prior_values)

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
141
    def train(self, atoms_list=None, add_data=True):
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
142
        if atoms_list is not None:
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
143
            self.memory.save_data(atoms_list, add_data)
144
145

        self.update_bias()
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
146
147
        self.E, self.X, self.prior_values = self.memory.get_data()
        self.Y = self.E - self.prior_values - self.bias
148
        
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
149
150
        K = self.kernel(self.X)
        L = cholesky(K, lower=True)
151
        
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
152
153
        self.alpha = cho_solve((L, True), self.Y)
        self.K_inv = cho_solve((L, True), np.eye(K.shape[0]))
154
        self.K0 = self.kernel.kernel_value(self.X[0], self.X[0])
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
155
    
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
156
    def optimize_hyperparameters(self, atoms_list=None, add_data=True, comm=None):
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
157
158
        if self.n_restarts_optimizer == 0:
            self.train(atoms_list)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
159
            return
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
160

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
161
        if atoms_list is not None:
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
162
            self.memory.save_data(atoms_list, add_data)
163
164

        self.update_bias()
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
165
166
        self.E, self.X, self.prior_values = self.memory.get_data()
        self.Y = self.E - self.prior_values - self.bias
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
167

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
168
169
170
171
        results = []
        for i in range(self.n_restarts_optimizer):
            theta_initial = np.random.uniform(self.kernel.theta_bounds[:, 0],
                                              self.kernel.theta_bounds[:, 1])
172
173
174
175
176
177
178
179
180
181
            if i == 0:
                # Make sure that the previously currently choosen
                # hyperparameters are always tried as initial values.
                if comm is not None:
                    # But only on a single communicator, if multiple are present.
                    if comm.rank == 0:
                        theta_initial = self.kernel.theta
                else:
                    theta_initial = self.kernel.theta
                        
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
182
183
184
            res = self.constrained_optimization(theta_initial)
            results.append(res)
        index_min = np.argmin(np.array([r[1] for r in results]))
185
186
        result_min = results[index_min]
        
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
187
        if comm is not None:
188
189
190
191
192
193
194
195
196
197
198
        # Find best hyperparameters among all communicators and broadcast.
            results_all = comm.gather(result_min, root=0)
            if comm.rank == 0:
                index_all_min = np.argmin(np.array([r[1] for r in results_all]))
                result_min = results_all[index_all_min]
            else:
                result_min = None
            result_min = comm.bcast(result_min, root=0)
                
        self.kernel.theta = result_min[0]
        self.lml = -result_min[1]
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
199

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
200
        self.train()
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
201
    
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
202
203
204
    def neg_log_marginal_likelihood(self, theta=None, eval_gradient=True):
        if theta is not None:
            self.kernel.theta = theta
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
205

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
206
207
208
209
        if eval_gradient:
            K, K_gradient = self.kernel(self.X, eval_gradient)
        else:
            K = self.kernel(self.X)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
210

Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
211
212
213
214
215
216
        L = cholesky(K, lower=True)
        alpha = cho_solve((L, True), self.Y)

        lml = -0.5 * np.dot(self.Y, alpha)
        lml -= np.sum(np.log(np.diag(L)))
        lml -= K.shape[0]/2 * np.log(2*np.pi)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
217
        
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
218
219
220
221
222
223
224
225
226
227
228
        if eval_gradient:
            # Equation (5.9) in GPML
            K_inv = cho_solve((L, True), np.eye(K.shape[0]))
            tmp = np.einsum("i,j->ij", alpha, alpha) - K_inv

            lml_gradient = 0.5*np.einsum("ij,kij->k", tmp, K_gradient)
            return -lml, -lml_gradient
        else:
            return -lml

    def constrained_optimization(self, theta_initial):
229
        """
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
230
231
232
233
        result = minimize(self.neg_log_marginal_likelihood,
                          x0=theta_initial,
                          bounds=self.kernel.theta_bounds,
                          method='L-BFGS-B',
234
                          jac=True)
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
235
236
237
        if not result.success:
            warnings.warn(f"L_BFGS_B terminated with state: {result.message}")
        return result.x, result.fun
238
239
240
241
242
243
244
245
246
        """
        theta_opt, func_min, convergence_dict = \
            fmin_l_bfgs_b(self.neg_log_marginal_likelihood,
                          theta_initial,
                          bounds=self.kernel.theta_bounds)
        if convergence_dict["warnflag"] != 0:
            warnings.warn("fmin_l_bfgs_b terminated abnormally with the "
                          " state: %s" % convergence_dict)
        return theta_opt, func_min
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
247
248
249
250
251
252
253
254
255
256
257
258
259
260

    def numerical_neg_lml(self, dx=1e-4):
        N_data = self.X.shape[0]
        theta = np.copy(self.kernel.theta)
        N_hyper = len(theta)
        lml_ddTheta = np.zeros((N_hyper))
        for i in range(N_hyper):
            theta_up = np.copy(theta)
            theta_down = np.copy(theta)
            theta_up[i] += 0.5*dx
            theta_down[i] -= 0.5*dx

            lml_up = self.neg_log_marginal_likelihood(theta_up, eval_gradient=False)
            lml_down = self.neg_log_marginal_likelihood(theta_down, eval_gradient=False)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
261
            lml_ddTheta[i] = (lml_up - lml_down)/dx
Malthe Kjær Bisbo's avatar
update    
Malthe Kjær Bisbo committed
262
        return lml_ddTheta
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

    def numerical_forces(self, a, dx=1e-4, eval_std=False):
        Na, Nd = a.positions.shape
        if not eval_std:
            F = np.zeros((Na,Nd))
            for ia in range(Na):
                for idim in range(Nd):
                    a_up = a.copy()
                    a_down = a.copy()
                    a_up.positions[ia,idim] += 0.5*dx
                    a_down.positions[ia,idim] -= 0.5*dx
                    
                    E_up = self.predict_energy(a_up)
                    E_down = self.predict_energy(a_down)
                    F[ia,idim] = -(E_up - E_down)/dx
            return F
        else:
            F = np.zeros((Na,Nd))
            Fstd = np.zeros((Na,Nd))
            for ia in range(Na):
                for idim in range(Nd):
                    a_up = a.copy()
                    a_down = a.copy()
                    a_up.positions[ia,idim] += 0.5*dx
                    a_down.positions[ia,idim] -= 0.5*dx
                    
                    E_up, Estd_up = self.predict_energy(a_up, eval_std=True)
                    E_down, Estd_down = self.predict_energy(a_down, eval_std=True)
                    F[ia,idim] = -(E_up - E_down)/dx
292
                    Fstd[ia,idim] = -(Estd_up - Estd_down)/dx
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
293
            return F, Fstd
294
295
296

    def get_calculator(self, kappa):
        return gpr_calculator(self, kappa)
Malthe Kjær Bisbo's avatar
Malthe Kjær Bisbo committed
297