Commit e42bf350 authored by Jonathan Juhl's avatar Jonathan Juhl
Browse files

removed old directory

parent 80132a00
from appJar import gui
from os.path import isfile,join
from glob import glob
import os
import numpy as np
app = gui("SortEM", "800x800")
app.setLogLevel("ERROR")
def update_gui():
entry_dics = app.getAllEntries()
f = join(join(entry_dics['output_dir_entry'],'gui_display'),'parameters.npy')
if isfile(f):
try:
params = np.load(f,allow_pickle=True)
if params[0] != None:
try:
app.setMeter("sinogram process meter", float(params[0]))
except: pass
if params[1] != None and params[2] != None:
try:
app.setLabel("iteration","Current iteration: %s (%s)" %(params[1],params[2]))
except: pass
if params[3] != None:
try:
app.setMeter("training progress meter", float(params[3]))
except: pass
if params[4] != None:
try:
app.setMeter("inference progress meter", float(params[4]))
except: pass
if params[5] != None:
try:
app.setMeter("clustering progress meter", float(params[5]))
except: pass
if params[6] != None:
try:
s = join(join(entry_dics['output_dir_entry'],'gui_display'),'component_plot.png')
app.setImage("placeholder",s)
except: pass
except: pass
def run_program_locally(inputs):
entry_dics,scale_bar_dics,properties = inputs
num_gpus = scale_bar_dics['number of gpus']
num_cpus = scale_bar_dics['Number of CPUs']
num_it = scale_bar_dics['num_it']
silhouette_score = scale_bar_dics['Silhouette_Score']/100
top_k = scale_bar_dics['Top K']
auto_batch = scale_bar_dics["Auto encoder batch size"]
auto_predict_batch = scale_bar_dics['Auto encoder predict size']
matrix_window_size = scale_bar_dics['Matrix Window Size']
steps_pr_epoch = scale_bar_dics['pr_epoch']
epochs = scale_bar_dics['epochs']
pca_train_set = scale_bar_dics['PCA_train_set']
fraction = scale_bar_dics["fraction to train"]
float_16 = properties['Additional options']['Half Precision']
star_file = entry_dics['star_file_entry']
output_dir = entry_dics['output_dir_entry']
slurm_path = entry_dics['slurm_path']
num_it = scale_bar_dics['num_it']
tmp_list = []
tuples = (star_file,num_gpus,num_cpus,float_16,matrix_window_size,auto_batch,auto_predict_batch,top_k,num_it,fraction,output_dir,steps_pr_epoch,pca_train_set,epochs)
for k in tuples:
tmp_list.append(str(k))
tubles = tuple(tmp_list)
os.system('python3 main.py --star %s --num_gpus %s --num_cpus %s --f16 %s --batch %s --ab %s --pb %s --top_k %s --rounds %s --f %s --o %s --mp %s --pca %s --epochs %s' %tuples)
def make_slurm(slurm_path,additional_arguments,inputs):
def check_if_in_list(slurm_list,word):
return any(word in x for x in slurm_list)
entry_dics,scale_bar_dics,properties = inputs
num_gpus = scale_bar_dics['number of gpus']
num_cpus = scale_bar_dics['Number of CPUs']
num_it = scale_bar_dics['num_it']
silhouette_score = scale_bar_dics['Silhouette_Score']/100
top_k = scale_bar_dics['Top K']
auto_batch = scale_bar_dics["Auto encoder batch size"]
auto_predict_batch = scale_bar_dics['Auto encoder predict size']
matrix_window_size = scale_bar_dics['Matrix Window Size']
steps_pr_epoch = scale_bar_dics['pr_epoch']
epochs = scale_bar_dics['epochs']
pca_train_set = scale_bar_dics['PCA_train_set']
fraction = scale_bar_dics["fraction to train"]
float_16 = properties['Additional options']['Half Precision']
star_file = entry_dics['star_file_entry']
output_dir = entry_dics['output_dir_entry']
slurm_path = entry_dics['slurm_path']
num_it = scale_bar_dics['num_it']
tmp_list = []
tuples = (star_file,num_gpus,num_cpus,float_16,matrix_window_size,auto_batch,auto_predict_batch,top_k,num_it,fraction,output_dir,steps_pr_epoch,pca_train_set,epochs)
for k in tuples:
tmp_list.append(str(k))
tubles = tuple(tmp_list)
job_file = os.path.join(output_dir,"run.job")
sbatch_arguments = []
with open(slurm_path,'r') as osp:
for i in osp:
if "#!/bin/bash\n" not in i:
sbatch_arguments.append(i)
extra_arguments = additional_arguments.split(' ')
with open(job_file,'w') as fh:
fh.writelines("#!/bin/bash\n")
if not check_if_in_list(sbatch_arguments,'-p gpu'):
fh.writelines("#SBATCH -p gpu\n")
indice = sbatch_arguments.index("#SBATCH -p gpu\n")
sbatch_arguments.pop(indice)
if not check_if_in_list(sbatch_arguments,'--gpus'):
fh.writelines("#SBATCH --gpus=%s\n" % num_gpus)
indice = sbatch_arguments.index("#SBATCH --gpus=%s\n")
sbatch_arguments.pop(indice)
if not check_if_in_list(sbatch_arguments,'-o'):
fh.writelines("#SBATCH -o run.log\n")
indice = sbatch_arguments.index("#SBATCH -o run.log\n")
sbatch_arguments.pop(indice)
for i in sbatch_arguments:
fh.writelines(i)
for i in extra_arguments:
if not check_if_in_list(sbatch_arguments,i):
fh.writelines("#SBATCH " + i)
fh.writelines('python3 main.py --star %s --num_gpus %s --num_cpus %s --f16 %s --batch %s --ab %s --pb %s --top_k %s --rounds %s --power %s --f %s --t %s --o %s --mp %s --pca %s' %tuples)
os.system("sbatch %s" %job_file)
def infobox(btnName):
if btnName == "help1":
app.infoBox('help1', 'The k best comparisons the source image has with the target image', parent=None)
if btnName == "help2":
app.infoBox('help2', 'The batch size to train the Neural Network pr. GPU, generally the larger the better but limited to memory ', parent=None)
if btnName == "help3":
app.infoBox('help3', 'The batch size to infer data with the Neural Network pr. GPU, generally the larger the better but limited to memory ', parent=None)
if btnName == "help4":
app.infoBox('help4', 'How big the top_k graph window should be. In General the target and source values pr. window atleast a significant amount of data must be from the same class. ', parent=None)
if btnName == "help5":
app.infoBox('help5', 'The Number of training partcles used pr. epoch, the neural network will see number of particles * epoch examples ', parent=None)
if btnName == "help6":
app.infoBox('help6', 'number of epochs to run the neural network.', parent=None)
if btnName == "help7":
app.infoBox('help7', 'how much data used to train PCA matrix to reduce the data to two dimensions.')
if btnName == "help8":
app.infoBox('help8', 'Number of graphical processing unites to use.')
if btnName == "help9":
app.infoBox('help9', 'max number of CPU jobs. This is mainly for PCA, KMeans and image preprocessing/queing of images')
if btnName == "help10":
app.infoBox('help10', 'The Number of iterations before the data is split.')
if btnName == "help11":
app.infoBox('help11', 'The cluster score. Terminates if below a certain cluster distingtniss score. Generally the clusters in the program are very well seperated, and if partially merged it indicates it can not be splot no further.')
if btnName == "help12":
app.infoBox('help12', 'The number of JBKN iterations to perform, the higher the power the better the model. Generally a power beyond 4 for does not improve.')
if btnName == 'helpslurm':
app.infoBox('helpslurm', 'If submitting to a Queue you must give the premade slurm file. The program will add the necessary modifications (number of gpus)')
if btnName == 'helpstar':
app.infoBox('helpstar', 'The path to the particle stack star file.')
if btnName == 'helpfrac':
app.infoBox('helpfrac', "the fraction of data to use to train.")
if btnName == 'precision_help':
app.infoBox('precision_help', 'If the user has a volta series or beyond GPU architecture the user may compute the problem in half precision without the loss of accuracy. The algorithm will perform in general 3-5 times faster.')
if btnName == 'additionalslurm':
app.infoBox('additionalslurm', 'add additional arguments with space , no #SBATCH in front')
def run_program(name):
entry_dics = app.getAllEntries()
scale_bar_dics = app.getAllScales()
properties = app.getAllProperties()
slurm_queue = properties['Additional options']['submit to slurm']
tmp = 0
if name == 'Run':
check = False
all_wild_card_files = glob(entry_dics['star_file_entry'])
for i in all_wild_card_files:
with open(i) as opstar:
for row in opstar:
if '_rlnImageName' in row:
tmp +=1
break
if tmp == len(all_wild_card_files):
check = True
if entry_dics['star_file_entry'] == '' or '.star' not in entry_dics['star_file_entry'] or not isfile(entry_dics['star_file_entry'] ) and not check:
app.errorBox('input_error', "You must give a path to a valid star file ", parent=None)
elif entry_dics['output_dir_entry'] == '':
app.errorBox('out_error', "You must give a path to an output directory", parent=None)
elif slurm_queue == True and entry_dics['slurm_path'] == '':
app.errorBox('slurm_error', "You must give a path to the slurm file such that the program can modify the necessary parameters.", parent=None)
elif slurm_queue:
make_slurm(entry_dics["slurm_path"],entry_dics["additional_slurm_arguments"],[entry_dics,scale_bar_dics,properties])
app.infoBox('slurm_info', "the job has been submitted to the slurm queue, to look at the progress,go under the running tab.")
else:
app.infoBox('local_info', "the job is now running locallly, to look at the progress,go under the running tab.")
app.thread(run_program_locally,[entry_dics,scale_bar_dics,properties])
def reset_all(name):
if name == 'Reset':
app.setScale("number of gpus", 1, callFunction=False)
app.setScale("Number of CPUs", 1, callFunction=False)
app.setScale("num_it",5, callFunction=False)
app.setScale("Silhouette_Score",90, callFunction=False)
app.setScale("fraction to train",100, callFunction=False)
app.setScale("Auto encoder batch size",50,callFunction=False)
app.setScale("Auto encoder predict size",100,callFunction=False)
app.setScale("Matrix Window Size",4000,callFunction=False)
app.setScale("Top K",1000, callFunction=False)
app.setScale("pr_epoch",50000)
app.setScale("epochs",10)
app.setScale("PCA_train_set",50000)
app.setBg("white")
app.startTabbedFrame("TabbedFrame")
text = "welcome to sortEM a particle stack sorting algorithm to sort hetreogenous classes and proteins by the method of deep learning."
app.startTab("introduction")
app.addLabel("l0", "Welcome to SortEM")
app.addTextArea('welcome_text', text=None)
app.setTextArea('welcome_text', text, end=True, callFunction=False)
app.stopTab()
app.startTab("Submission")
app.addLabel("star_files", "path to star file",0,0)
app.addFileEntry("star_file_entry",0,1)
app.addIconButton('helpstar',infobox, 'help',0,2)
app.addLabel("data_output", "Output Path.",1,0)
app.addDirectoryEntry("output_dir_entry",1,1)
app.addIconButton('output_path',infobox, 'help',1,2)
app.addLabel("l1", "Number of GPUs",2,0)
app.addScale("number of gpus",2,1)
app.setScaleHorizontal("number of gpus")
app.setScaleRange("number of gpus",1,8,1)
app.setScaleIncrement("number of gpus",1)
app.showScaleIntervals('number of gpus',1)
app.addIconButton('help8',infobox, 'help',2,2)
app.addLabel("l2", "Number of CPUs",3,0)
app.addScale("Number of CPUs",3,1)
app.setScaleHorizontal("Number of CPUs")
app.setScaleRange("Number of CPUs",1,12)
app.setScaleIncrement("Number of CPUs",1)
app.showScaleIntervals("Number of CPUs",4)
app.addIconButton('help9',infobox, 'help',3,2)
app.addLabel("l22", "Number of iterations before data split.",4,0)
app.addScale("num_it",4,1)
app.setScaleHorizontal("num_it")
app.setScaleRange("num_it",4,12,5)
app.setScaleIncrement("num_it",1)
app.showScaleIntervals("num_it",1)
app.addIconButton('help10',infobox, 'help',4,2)
app.addLabel("l23", "Terminate below Silhouette Score ",5,0)
app.addScale("Silhouette_Score",5,1)
app.setScaleHorizontal("Silhouette_Score")
app.setScaleRange("Silhouette_Score",0,100,90)
app.setScaleIncrement("Silhouette_Score",10)
app.showScaleIntervals("Silhouette_Score",50)
app.addIconButton('help11',infobox, 'help',5,2)
app.addLabel("l24", "training fraction ",6,0)
app.addScale("fraction to train",6,1)
app.setScaleHorizontal("fraction to train")
app.setScaleRange("fraction to train",0,100,100)
app.setScaleIncrement("fraction to train",10)
app.showScaleIntervals("fraction to train",20)
app.addIconButton('helpfrac',infobox, 'help',6,2)
precision_options={"Half Precision":False, "submit to slurm":True}
app.addProperties("Additional options", precision_options,7,0,2)
app.addIconButton('precision_help',infobox, 'help',7,2,1)
app.addLabel("slurm_lab", "path to slurm file",9,0)
app.addFileEntry("slurm_path",9,1)
app.addIconButton('helpslurm',infobox, 'help',9,2)
app.addLabel("additional_slurm", "additional slurm arguments",10,0,1)
app.addEntry("additional_slurm_arguments",10,1,1)
app.addIconButton('additionalslurm',infobox, 'help',10,2,1)
app.button('Run',run_program,11,2,2)
app.button('Reset',reset_all,11,0,0)
app.stopTab()
app.startTab("advanced options")
app.addLabel("l3", "Top-K",0,0,1)
app.addScale("Top K",0,1,2)
app.setScaleHorizontal("Top K")
app.setScaleRange("Top K",500,2000,1000)
app.setScaleIncrement("Top K",250)
app.showScaleIntervals('Top K',500)
app.addIconButton('help1',infobox, 'help',0,4,1)
app.addLabel("l4", "Train Batch Size",1,0,1)
app.addScale("Auto encoder batch size",1,1,2)
app.setScaleHorizontal("Auto encoder batch size")
app.setScaleRange("Auto encoder batch size",50,500,50)
app.setScaleIncrement("Auto encoder batch size",10)
app.showScaleIntervals('Auto encoder batch size',100)
app.addIconButton('help2',infobox, 'help',1,4,1)
app.addLabel("l5", "Infer Batch Size",2,0,1)
app.addScale("Auto encoder predict size",2,1,2)
app.setScaleHorizontal("Auto encoder predict size")
app.setScaleRange("Auto encoder predict size",50,1000,100)
app.setScaleIncrement("Auto encoder predict size",10)
app.showScaleIntervals('Auto encoder predict size',200)
app.addIconButton('help3',infobox, 'help',2,4,1)
app.addLabel("l6", "Graph Matrix Size",3,0,1)
app.addScale("Matrix Window Size",3,1,2)
app.setScaleHorizontal("Matrix Window Size")
app.setScaleRange("Matrix Window Size",2000,8000,4000)
app.setScaleIncrement("Matrix Window Size",500)
app.showScaleIntervals("Matrix Window Size",2000)
app.addIconButton('help4',infobox, 'help',3,4,1)
app.addLabel("l7", "Training Particles pr. Epoch",4,0,1)
app.addScale("pr_epoch",4,1,2)
app.setScaleHorizontal("pr_epoch")
app.setScaleRange("pr_epoch",50000,200000,50000)
app.setScaleIncrement("pr_epoch",10000)
app.showScaleIntervals("pr_epoch",50000)
app.addIconButton('help5',infobox, 'help',4,4,1)
app.addLabel("l8", "Nr. Epochs",5,0,1)
app.addScale("epochs",5,1,2)
app.setScaleHorizontal("epochs")
app.setScaleRange("epochs",10,30,10)
app.setScaleIncrement("epochs",1)
app.showScaleIntervals("epochs",5)
app.addIconButton('help6',infobox, 'help',5,4,1)
app.addLabel("l9", "PCA train size",6,0,1)
app.addScale("PCA_train_set",6,1,2)
app.setScaleHorizontal("PCA_train_set")
app.setScaleRange("PCA_train_set",50000,200000,50000)
app.setScaleIncrement("PCA_train_set",50000)
app.showScaleIntervals("PCA_train_set",50000)
app.addIconButton('help7',infobox, 'help',6,4,1)
"""
app.addLabel("l10", "Refinement_power",6,0,1)
app.addScale("ref_power",6,1,2)
app.setScaleHorizontal("ref_power")
app.setScaleRange("ref_power",4,10,4)
app.setScaleIncrement("ref_power",1)
app.showScaleIntervals("ref_power",1)
app.addIconButton('help12',infobox, 'help',6,4,1)
"""
app.stopTab()
app.startTab("running")
app.addLabel("sinogram_process","processing sinograms")
app.addMeter("sinogram process meter")
app.setMeterFill("sinogram process meter","Blue")
app.addLabel("iteration","Current iteration: 0 (Preprocessing)")
app.addLabel("training_progress","Training Progress")
app.addMeter("training progress meter")
app.setMeterFill("training progress meter","Blue")
app.addLabel("inference_progress","Inference Progress")
app.addMeter("inference progress meter")
app.setMeterFill("inference progress meter","Blue")
app.addLabel("pca_progress","Clustering progress")
app.addMeter("clustering progress meter")
app.setMeterFill("clustering progress meter","Blue")
app.addImage("placeholder",'poul_nissen.jpg')
app.registerEvent(update_gui)
app.stopTab()
app.stopTabbedFrame()
app.go()
from .main_sortem import main
#from .Gui_sortem import run
\ No newline at end of file
#!/usr/bin/env python3
from .main_sortem import main
#from .Gui_sortem import run
\ No newline at end of file
import numpy as np
from super_class import super_class
from os.path import join,getsize,isdir,isfile,dirname,basename
from os import listdir,rename
from fac import DynAE
from os import listdir,mkdir
import glob
class control_flow:
def __init__(self,num_gpus,gpu_list,num_cpus,num_clusters,star,deep_NN_batch_size,deep_NN_prediction_size,workdir,half_precision,max_particles,epochs,pca,transfer_learning,verbose):
self.workdir = workdir
if not isdir(workdir): # check if all dirs are their
mkdir(workdir)
if not isdir(join(workdir,'gui_display')):
mkdir(join(workdir,'gui_display'))
if not isdir(join(workdir,'particle_stack_dir')):
mkdir(join(workdir,'particle_stack_dir'))
np.save(join(join(workdir,'gui_display'),'parameters.npy'),np.asarray([0.0,0.0,"preprocessing",0.0,0.0,0,0,'poul_nissen.jpg']))
self.epochs = epochs
if not isinstance(star, list) and isdir(star):
star_files = glob.glob(star)
if star_files == []:
print("no star files in directory. You must point to atleast one star file to run Sortinator.")
exit()
star_files = [join(star,i) for i in star_files]
else:
star_files = star
def get_star_file_parameters(star_files,path):
counter = []
count = 0
names = []
star_files = np.unique(star_files)
labels_list = []
for z in star_files:
c = z[::-1].split('/',1)[1][::-1]
with open(z, newline='') as csvfile:
reader = list(csvfile)
header = list(filter(lambda x: '_rln' in x,reader))
header = [i.split()[0] for i in header]
name = header.index('_rlnImageName')
if bool(verbose):
try:
class_num = header.index('_rlnClassNumber')
except:
verbose = False
print("the --log true cannot be run")
for row in reader:
if len(header)== len(row.split()):
# try:
current_name = row.split()[name].split('@')[1]
current_id = row.split()[name].split('@')[0]
if len(names) > 0 and names[0] != join(c,current_name):
counter.append(count)
names.append(join(c,current_name))
count = 0
elif len(names) == 0:
names.append(join(c,current_name))
count +=1
else:
count +=1
if bool(verbose):
labels_list.append(int(row.split()[class_num]))
#except: pass
counter.append(count)
np.save(join(join(self.workdir, 'particle_stack_dir'),'labels'),np.asarray(labels_list))
tmp_length = np.asarray(counter).sum()
return tmp_length,names
depth,mrc_paths = get_star_file_parameters(star_files,star)
length,bytes_pr_record = self.get_parameters(mrc_paths)
DynAE( mrc_paths,
workdir,
length,
bytes_pr_record,
num_cpus,
num_gpus,
depth,
num_clusters,
gpu_list,
half_precision,
deep_NN_batch_size,
deep_NN_prediction_size,
transfer_learning,
max_particles,
epochs,
verbose,
pca
)
self.write_star_file(star_files,labels)
def write_star_file(self,star_files,labels):
names = []
for z in star_files:
with open(z, newline='') as csvfile:
reader = list(csvfile)
header = list(filter(lambda x: '_rln' in x,reader))
for row in reader:
if len(row.split()) == len(header):
names.append(row.split())
for index,i in enumerate(np.unique(labels)):
f = open(join(join(self.workdir,'output_star_files'),'cluster_%s.star' %index), 'w')
with f:
f.write('\n')
f.write('data_images\n')
f.write('\n')
f.write('loop_\n ')
for z in header:
f.write(z)
f.write('')
for lab,row in zip(labels.tolist(),names):