Commit e9f52417 authored by Malthe Kjær Bisbo's avatar Malthe Kjær Bisbo
Browse files

Cleaned up cand_opp and missing missing wrap() calls

parent 531b2f5e
import numpy as np
from scipy.spatial.distance import euclidean
import matplotlib.pyplot as plt
from ase.io import read, write
import os
def collect_events(a_gm, runs_path, featureCalculator, dmax=0.1, dE_max=0.2, N=100, Ninit=0, traj_name=None, save_dir='stats', check_finalPop=False):
f_gm = featureCalculator.get_feature(a_gm)
E_gm = a_gm.get_potential_energy()
times = []
events = []
structures = []
iter_max = 0
for n in range(N):
found = False
try:
traj = read(runs_path + 'run{}/structures.traj'.format(n,n), index=':')
run_length = len(traj)
if run_length > iter_max:
iter_max = len(traj)
E_all = np.array([a.get_potential_energy() for a in traj])
for i, (E, a) in enumerate(zip(E_all, traj)):
if E < E_gm + dE_max:
f = featureCalculator.get_feature(traj[i])
d = euclidean(f, f_gm)
if d < dmax:
number_SP = i + Ninit
E = a.get_potential_energy()
print('run:', n, '\t iter_found:', number_SP, '\t dist:', d, '\t energy:', E)
times.append(i)
events.append(1)
structures.append(a)
found = True
break
if not found:
times.append(run_length)
events.append(0)
except Exception as err:
print('Exception caught:', err)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if traj_name is not None:
write(save_dir+'/'+traj_name, structures)
return times, events
def collect_events_energy(a_gm, runs_path, dE_max=0.3, N=100, Ninit=0, traj_name=None, save_dir='stats', check_finalPop=False):
E_gm = a_gm.get_potential_energy()
times = []
events = []
structures = []
iter_max = 0
for n in range(N):
found = False
try:
traj = read(runs_path + 'run{}/structures.traj'.format(n,n), index=':')
run_length = len(traj)
if run_length > iter_max:
iter_max = len(traj)
E_all = np.array([a.get_potential_energy() for a in traj])
for i, (E, a) in enumerate(zip(E_all, traj)):
if E < E_gm + dE_max:
number_SP = i + Ninit
print('run:', n, '\t iter_found:', number_SP, '\t Energy:', E)
times.append(number_SP)
events.append(number_SP)
structures.append(a)
found = True
break
if not found:
times.append(run_length)
events.append(0)
except Exception as err:
print('Exception caught:', err)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if traj_name is not None:
write(save_dir+'/'+traj_name, structures)
return times, events
def collect_events_old(a_gm, runs_path, featureCalculator, dmax=0.1, N=100, Ninit=0, traj_name=None, save_dir='stats', check_finalPop=False):
f_gm = featureCalculator.get_feature(a_gm)
times = []
events = []
structures = []
iter_max = 0
for n in range(N):
found = False
try:
traj = read(runs_path + 'run{}/structures.traj'.format(n,n), index=':')
run_length = len(traj)
if run_length > iter_max:
iter_max = len(traj)
fMat = featureCalculator.get_featureMat(traj)
for i, (f, a) in enumerate(zip(fMat, traj)):
d = euclidean(f, f_gm)
if d < dmax:
number_SP = i + Ninit
E = a.get_potential_energy()
print('run:', n, '\t iter_found:', number_SP, '\t dist:', d, '\t energy:', E)
times.append(i)
events.append(1)
structures.append(a)
found = True
break
if not found:
times.append(run_length)
events.append(0)
except:
pass
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if traj_name is not None:
write(save_dir+'/'+traj_name, structures)
return times, events
########################################################################################
#
# This program is designed to make it easy to make proper statistics for
# evolutionary algorithms. It can be used in two differnet ways:
#
# 1: Call the program from the terminal, followed be the files you want statistics for.
# If you have two data sets you want to compre it could look like this:
# $ python survival_stats.py event_file1.npy event_file2.npy
# If you only want statistics for one
# The results will be saved in a map called stats/ so you can make all
# the displayed yourself. Each file have a format of (x,y,y_LCB,y_UCB,censorings),
# Hazard only holds (x,y)
# A last input can be given to set labels. it shold be of this from:
# labels=[label1,label2,label3]
#
# 2: Import the function survival_stats() from this script and give it two lists
# of inputs. The first should be a list of all the times when an event or censoring
# occured. The second second should be a list of binaries indicating if an event
# occured at the corresponding time. That is 0 for censorings and 1 for events.
# List of list can also be used for input, and will additionally result in log-rank
# tests being made between the inputs
#
# Event files should be .npy files holding a 2 x n array. eg. [[5,4,10][1,1,0]].
# the first vector holds the times for the EA runs. either the time when the best
# structure was found or when the run ended. The second vector should hold a list
# of zero and ones, where a 1 indicate that the best structure was found, and a 0
# indicate that it was not.
#
########################################################################################
import os
import sys
import numpy as np
import scipy.stats as st
from scipy.special import erfinv
from copy import copy
from matplotlib.pyplot import *
# Define a class to be used KRR
class Comp(object):
def get_features(self,x):
return x
def get_similarity(self,f1,f2):
return abs(f1-f2)
# Function for the log-rank test
def logrank(n1,d1,t1,n2,d2,t2):
"""This function returns the p-value for a log-rank test
Inputs:
n1: Number at risk in population 1 at times indicated by t1
d1: Number of events in population 1 at times indicated by t1
t1: times used with the two above inputs
n2: Number at risk in population 2 at times indicated by t2
d2: Number of events in population 2 at times indicated by t2
t2: times used with the two above inputs
output:
p-value
"""
# The first part here is just collecting and ordering the inputs
# for the calculations
n1 = copy(n1)
d1 = copy(d1)
t1 = copy(t1)
n2 = copy(n2)
d2 = copy(d2)
t2 = copy(t2)
n = []
n_1 = []
n_2 = []
d = []
d_1 = []
d_2 = []
while t1 or t2:
if t1 and t2:
if t1[0] < t2[0]:
n_1.append(n1.pop(0))
n_2.append(n_2[-1])
n.append(n_1[-1]+n_2[-1])
d_1.append(d1.pop(0))
d_2.append(0)
d.append(d_1[-1]+d_2[-1])
t1.pop(0)
elif t1[0] > t2[0]:
n_1.append(n_1[-1])
n_2.append(n2.pop(0))
n.append(n_1[-1]+n_2[-1])
d_1.append(0)
d_2.append(d2.pop(0))
d.append(d_1[-1]+d_2[-1])
t2.pop(0)
elif t1[0] == t2[0]:
n_1.append(n1.pop(0))
n_2.append(n2.pop(0))
n.append(n_1[-1]+n_2[-1])
d_1.append(d1.pop(0))
d_2.append(d2.pop(0))
d.append(d_1[-1]+d_2[-1])
t1.pop(0)
t2.pop(0)
elif t1:
n_1.append(n1.pop(0))
n_2.append(n_2[-1])
n.append(n_1[-1]+n_2[-1])
d_1.append(d1.pop(0))
d_2.append(0)
d.append(d_1[-1]+d_2[-1])
t1.pop(0)
elif t2:
n_1.append(n_1[-1])
n_2.append(n2.pop(0))
n.append(n_1[-1]+n_2[-1])
d_1.append(0)
d_2.append(d2.pop(0))
d.append(d_1[-1]+d_2[-1])
t2.pop(0)
# This is where the actual test is performed
e_1 = []
v = []
for i in range(len(n)):
e1 = n_1[i]*d[i]/float(n[i])
e_1.append(e1)
v1 = (d[i] * n_1[i]/float(n[i]) * (1-n_1[i]/float(n[i])) * (n[i]-d[i])) / float(n[i]-1)
v.append(v1)
Z = np.sum(np.array(d_1)-np.array(e_1)) / np.sqrt(np.sum(v))
return st.norm.sf(abs(Z))*2
# This is the real function of interest
def survival_stats(times,events,alpha=0.95,sigma=5000,show_plot=True,save=True,get_hazard=True,labels=[], save_dir='stats'):
"""This function calculateds a number of statistics that may beof interest
inputs:
times: Either a single list of times or a list of list of times
events: Either a single list of events or a list of list of events.
0 indicates a censoring and 1 indicates an event.
alpha: Is the size of the confidence bound given for the functions.
Default is 0.95
sigma: Is used for the kernel size for the kernel smoothing used to creat
the hazard curve. Lower the number if the curve seems to flat and
raise it if the crve is to spikey. Default is 5000
show_plot: Default is True. Change to False if you don't want to see
the plots
save: Default is True. Change to False if you don't want the statistics saved
Output: (KM,CDF,NA,Hazard,censoring,logrank_res)
KM: Kaplan-Meier. List of tuples containing: time, value of KM , LCB of KM,
UCB of KM.
CDF: Cumultative distribution function. List of tuples containing: time,
value of CDF , LCB of CDF, UCB of CDF.
NA: Nelson-Aalen. List of tuples containing: time, value of NA , LCB of NA, UCB of NA.
Hazard: List of tuples containing: time, value of Hazard
censoring: List of list indicating if censorings occured at the times given by the KM times
logrank_res: The results of the log-rank tests arranged in a matrix
All the outer lists are used to seperate multiple inputs.
"""
# Arrange the input into a standard format
if hasattr((times[0]),'__len__'):
n_inputs = len(times)
else:
n_inputs = 1
times = [times]
events = [events]
# calculate a z value from the given alpha
z = np.sqrt(2)*erfinv(2*(alpha+(1-alpha)/2.)-1)
# Change the input to conviniant format
time = []
censoring = []
n = []
d = []
for i in range(n_inputs):
time.append([0])
censoring.append([False])
n.append([len(times[i])])
d.append([0])
ds = 0 # dead or censord at this timestep
sort_index = np.argsort(times[i])
for j in sort_index:
if times[i][j] == time[i][-1]:
ds += 1
if events[i][j]:
d[i][-1] += 1
else:
censoring[i][-1] = True
else:
time[i].append(times[i][j])
n[i].append(n[i][-1]-ds)
ds = 1
if events[i][j]:
d[i].append(1)
censoring[i].append(False)
else:
d[i].append(0)
censoring[i].append(True)
censoring[i][-1] = False
censoring[i] = np.array(censoring[i])
# Make Kaplan-Meier
KM = []
for i in range(n_inputs):
S = [1]
for j in range(1,len(time[i])):
S.append(S[-1]*(n[i][j]-d[i][j])/float(n[i][j]))
KM.append((np.array(time[i]),np.array(S)))
# Make confidence bounds for Kaplan-Meier
KM_CB = []
for i in range(n_inputs):
S_LCB = [1]
S_UCB = [1]
temp = 0
for j in range(1,len(time[i])):
if KM[i][1][j] == 1:
c_L = 1
c_U = 1
elif n[i][j] != d[i][j]:
temp += d[i][j]/float(n[i][j]*(n[i][j]-d[i][j]))
V = temp/float(np.log(KM[i][1][j])**2)
c_L = np.log(-np.log(KM[i][1][j])) + z*np.sqrt(V)
c_U = np.log(-np.log(KM[i][1][j])) - z*np.sqrt(V)
else:
V = temp/float(np.log(KM[i][1][j-1])**2)
c_L = np.log(-np.log(KM[i][1][j-1])) + z*np.sqrt(V)
c_U = np.log(-np.log(KM[i][1][j-1])) - z*np.sqrt(V)
S_LCB.append(np.exp(-np.exp(c_L)))
S_UCB.append(np.exp(-np.exp(c_U)))
KM_CB.append((np.array(time[i]),np.array(S_LCB),np.array(S_UCB)))
# Gather all KM stuff
for i in range(n_inputs):
KM[i] = (KM[i][0],KM[i][1],KM_CB[i][1],KM_CB[i][2])
# Make Cumultative distribution function
CDF = []
CDF_CB = []
for i in range(n_inputs):
CDF.append((KM[i][0],1-KM[i][1]))
CDF_CB.append((KM_CB[i][0],1-KM_CB[i][1],1-KM_CB[i][2]))
# Gather all CDF stuff
for i in range(n_inputs):
CDF[i] =(CDF[i][0],CDF[i][1],CDF_CB[i][1],CDF_CB[i][2])
if show_plot:
print('')
if show_plot:# make plots
labels += range(len(labels),n_inputs)
f, ax = subplots(1,1)
colors = ['b','r','g','y','c','m']
max_time = 0
for i in range(n_inputs):
try:
ax.fill_between(CDF[i][0], CDF[i][2], CDF[i][3], step='post', facecolor=colors[i%len(colors)], alpha=0.1)
except:
ax.fill_between(CDF[i][0], CDF[i][2], CDF[i][3], facecolor=colors[i%len(colors)], alpha=0.1)
ax.step(CDF[i][0], CDF[i][1],where='post',c=colors[i%len(colors)], label=labels[i])
#ax.plot(CDF[i][0][censoring[i]],CDF[i][1][censoring[i]],marker='+',c='k')
if CDF[i][0][-1] > max_time:
max_time = KM[i][0][-1]
ax.set_ylabel('Succes Rate')
ax.set_xlabel('Singlepoint calculations')
ax.set_xlim([0,max_time])
ax.set_ylim([0,1])
ax.set_yticks(np.linspace(0,1,6))
ax.set_yticklabels(['{} %'.format(int(i)) for i in np.linspace(0,100,6)])
ax.set_title('CDF')
ax.legend(loc=2)
if save and show_plot: # Save files
cwd = os.getcwd()
if not os.path.exists(os.path.join(save_dir)):
os.makedirs(os.path.join(save_dir))
savefig(save_dir+'/survival_stats.pdf')
savefig(save_dir+'/survival_stats.png')
for i in range(n_inputs):
name = str(i)
name_CDF = name+'_CDF'
if os.path.isfile(os.path.join(cwd,save_dir,name_CDF+'.npy')):
n_name = 1
while os.path.isfile(os.path.join(cwd,save_dir,name_CDF+'({})'.format(n_name)+'.npy')):
n_name += 1
name_CDF += '({})'.format(n_name)
name_CDF += '.npy'
np.save(os.path.join(cwd,save_dir,name_CDF),(CDF[i][0],CDF[i][1],CDF[i][2],CDF[i][3],censoring[i]))
if show_plot:
show()
return CDF
if __name__ == '__main__':
cwd = os.getcwd()
# Check how many inputs are given
try:
n_inputs = len(sys.argv)-1
except:
print('At least one input must be given for this program.')
raise
try:
if 'label' in sys.argv[-1]:
n_inputs -= 1
label_str = sys.argv[-1]
index1 = label_str.find('[')+1
label_str = label_str[index1:]
labels = []
while label_str.find(',') != -1:
index2 = label_str.find(',')
label = label_str[:index2]
labels.append(label)
label_str = label_str[index2+1:]
index2 = label_str.find(']')
label =label_str[:index2]
labels.append(label)
else:
labels = []
except:
print('labels should be given as the last input with a format like:\n'\
+'labels=[label1,label2,label3]')
raise
# Prepare the files for input into the function
times = [None]*n_inputs
events = [None]*n_inputs
for i in range(n_inputs):
times[i], events[i] = np.load(os.path.join(cwd,sys.argv[i+1]))
# Run function
survival_stats(times,events,labels=labels)
import numpy as np
from time import time
from ase.io import read
from prior_old import repulsive_prior as repulsive_prior_old
from prior import repulsive_prior
a = read('/home/mkb/DFTB/TiO_2layer/ref/Ti13O26_GM_done.traj', index='0')
Nrep = 2
prior_old = repulsive_prior_old()
E_old = prior_old.energy(a)
F_old = prior_old.forces(a)
t0=time()
for i in range(Nrep):
prior_old.energy(a)
dt_old = (time()-t0)/Nrep
prior = repulsive_prior()
E = prior.energy(a)
F = prior.forces(a)
t0=time()
for i in range(Nrep):
prior.energy(a)
dt = (time()-t0)/Nrep
print('dF =\n', F_old-F)
print('dE =', E_old-E)
print('E_old =', E_old)
print('E =', E)
print(f'runtime: (Nrep={Nrep})')
print(dt_old)
print(dt)
......@@ -11,6 +11,9 @@ from surrogate.gpr_calculator import gpr_calculator
import warnings
class gpr_memory():
""" Class for saving "expensive to calculate" data for
the Gaussian Process Regression model.
"""
def __init__(self, descriptor, prior):
self.descriptor = descriptor
self.prior = prior
......@@ -176,9 +179,6 @@ class GPR():
results.append(res)
index_min = np.argmin(np.array([r[1] for r in results]))
result_min = results[index_min]
#print('rank:', comm.rank, 'theta_init:', theta_initial)
#comm.barrier()
#print('rank:', comm.rank, 'lml:', -result_min[1], 'kernel:', result_min[0])
if comm is not None:
# Find best hyperparameters among all communicators and broadcast.
......@@ -204,14 +204,6 @@ class GPR():
else:
K = self.kernel(self.X)
"""
try:
L = cholesky(K, lower=True)
except Exception as err:
print('gpr: err:\n',err)
print('gpr: K:\n',K)
print(np.any(np.isinf(K)))
"""
L = cholesky(K, lower=True)
alpha = cho_solve((L, True), self.Y)
......
......@@ -66,12 +66,6 @@ class kernel(ABC):
theta_down = np.copy(theta)
theta_up[i] += 0.5*dx
theta_down[i] -= 0.5*dx
#theta_up[i] = np.log(np.exp(theta_up[i]) + 0.5*dx)
#theta_down[i] = np.log(np.exp(theta_down[i]) - 0.5*dx)
#dTheta = theta_up[i] - theta_down[i]
#dTheta = np.log(np.exp(theta_up[i]) + 0.5*dx) - np.log(np.exp(theta_down[i]) - 0.5*dx)
#print('dTheta:', dTheta)
self.theta = theta_up
K_up = self(X, eval_gradient=False)
......
......@@ -69,3 +69,11 @@ def get_covalent_distance_from_atom_numbers(a, indices=None):
r_cov_sub = r_cov[indices]
cd_mat = r_cov_sub.reshape(-1,1) + r_cov.reshape(1,-1)
return cd_mat
def get_min_distances_as_fraction_of_covalent(a, indices=None, covalent_distances=None):
bl = get_distances_as_fraction_of_covalent(a,indices)
# Filter away self interactions.
bl = bl[bl > 1e-6].reshape(bl.shape[0],bl.shape[1]-1)
return np.min(bl), bl.min(axis=1).argmin()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment