Commit 611cdee2 authored by Christian Marius Lillelund's avatar Christian Marius Lillelund
Browse files

updated final model training scripts

parent 3839a8c1
Pipeline #100347 passed with stage
in 4 minutes and 18 seconds
"""
evaluate_baseline.py
evaluate_classification_cases.py
====================================
Script to make a baseline evaluation of cases
Script to make a baseline evaluation of classification cases
using CV with different datasets and classifers.
"""
......
"""
evaluate_survival_case.py
====================================
Script to make a baseline evaluation of the survival case using CV.
"""
import numpy as np
import paths as pt
from typing import List
from tools import file_writer, preprocessor, data_loader
from pathlib import Path
import csv
from utility.settings import load_settings
from utility.metrics import compute_mean, compute_std
from io import BytesIO
import shutil
def main():
pass
if __name__ == '__main__':
main()
......@@ -6,12 +6,11 @@ The main FastAPI module class.
import uvicorn
import pydantic
import shap
import os
import csv
import joblib
import pandas as pd
import numpy as np
import xgboost as xgb
import io
from typing import List, Optional
from fastapi import Depends, FastAPI, HTTPException, Request
......@@ -195,6 +194,8 @@ def predict_alarm(incoming_data: InputData):
index_at_one_year = event_times.index(365)
drop_after_one_year = 1 - float(df_surv.iloc[index_at_one_year][0])
alarm_arguments = generate_alarm_arguments(df, ats_resolution, drop_after_one_year)
# TODO: Use predict() to predict time to event
return {
'EventTimes': event_times,
......@@ -209,17 +210,17 @@ def predict_training(incoming_data: InputData):
data = validate_data(incoming_data)
df = prepare_data(data, ats_resolution)
complete_model = read_joblib(f'complete_xgb.joblib')
compliance_model = read_joblib(f'compliance_xgb.joblib')
fall_model = read_joblib(f'fall_xgb.joblib')
complete_model = xgb.XGBClassifier()
complete_model.load_model(read_joblib(f'complete_xgb.joblib'))
compliance_model = xgb.XGBClassifier()
compliance_model.load_model(read_joblib(f'compliance_xgb.joblib'))
df_for_complete = add_embedding(df.copy(), 'complete', ats_resolution)
df_for_compliance = add_embedding(df.copy(), 'compliance', ats_resolution)
df_for_fall = add_embedding(df.copy(), 'fall', ats_resolution)
complete_prob = complete_model.predict_proba(df_for_complete).flatten()[1]
fall_prob = fall_model.predict_proba(df_for_fall).flatten()[1]
if complete_prob > 0.5:
compliance_prob = compliance_model.predict_proba(df_for_compliance).flatten()[1]
else:
......@@ -229,7 +230,6 @@ def predict_training(incoming_data: InputData):
return {
'CompleteProb': float(complete_prob),
'FallProb': float(fall_prob),
'Compliance': int(compliance),
'CompleteArguments': complete_arguments
}
......
#!/usr/bin/env python
"""
train_alarm_model.py
====================================
Script to train the model for the Alarm case.
"""
import paths as pt
from tools import data_loader, file_writer
from utility.settings import load_settings
from sksurv.ensemble import RandomSurvivalForest
import pandas as pd
import numpy as np
from sksurv.ensemble import GradientBoostingSurvivalAnalysis
from pathlib import Path
from io import BytesIO
import shutil
......@@ -16,12 +19,18 @@ def main():
target_settings).load_data()
X, y = dl.get_data()
model = RandomSurvivalForest(n_estimators=200, max_depth=3,
n_jobs=-1, random_state=0)
model = GradientBoostingSurvivalAnalysis(n_estimators=200,
learning_rate=0.1,
max_depth=14,
loss='coxph',
min_samples_split=4,
max_features='sqrt',
dropout_rate=0.5,
random_state=0)
model.fit(X, y)
file_path = pt.MODELS_DIR
file_name = f'alarm.joblib'
file_name = f'alarm_gradboost.joblib'
with open(Path.joinpath(file_path, file_name), 'wb') as fd:
outfile = BytesIO()
file_writer.write_joblib(model, outfile)
......@@ -29,4 +38,4 @@ def main():
shutil.copyfileobj(outfile, fd)
if __name__ == '__main__':
main()
\ No newline at end of file
main()
"""
train_complete_model.py
====================================
Script to train the model for the Complete case.
"""
import paths as pt
from tools import file_writer, data_loader
from utility.settings import load_settings
import xgboost as xgb
from pathlib import Path
from io import BytesIO
import shutil
def main():
target_settings = load_settings(pt.CONFIGS_DIR, "complete.yaml")
dl = data_loader.CompleteDataLoader(pt.PROCESSED_DATA_DIR,
"complete_emb.csv",
target_settings).load_data()
X, y = dl.get_data()
params = {"n_estimators": 100,
"booster": "gbtree",
"max_depth": 7,
"gamma": 1,
"colsample_bytree": 0.6,
"min_child_weight": 7,
"reg_alpha": 128,
"reg_lambda": 0.9,
"learning_rate": 0.1,
"subsample": 1,
"use_label_encoder": False,
"eval_metric": "logloss",
"objective": "binary:logistic",
"random_state": 0}
model = xgb.XGBClassifier(**params)
model.fit(X, y)
file_path = pt.MODELS_DIR
file_name = f'complete_xgboost.joblib'
with open(Path.joinpath(file_path, file_name), 'wb') as fd:
outfile = BytesIO()
file_writer.write_joblib(model, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
if __name__ == '__main__':
main()
"""
train_compliance_model.py
====================================
Script to train the model for the Compliance case.
"""
import paths as pt
from tools import file_writer, data_loader
from utility.settings import load_settings
import xgboost as xgb
from pathlib import Path
from io import BytesIO
import shutil
def main():
target_settings = load_settings(pt.CONFIGS_DIR, "compliance.yaml")
dl = data_loader.ComplianceDataLoader(pt.PROCESSED_DATA_DIR,
"compliance_emb.csv",
target_settings).load_data()
X, y = dl.get_data()
params = {"n_estimators": 100,
"booster": "gbtree",
"max_depth": 3,
"gamma": 5,
"colsample_bytree": 0.6,
"min_child_weight": 4,
"reg_alpha": 40,
"reg_lambda": 0.6,
"learning_rate": 0.2,
"subsample": 1,
"use_label_encoder": False,
"eval_metric": "logloss",
"objective": "binary:logistic",
"random_state": 0}
model = xgb.XGBClassifier(**params)
model.fit(X, y)
file_path = pt.MODELS_DIR
file_name = f'complete_xgboost.joblib'
with open(Path.joinpath(file_path, file_name), 'wb') as fd:
outfile = BytesIO()
file_writer.write_joblib(model, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
if __name__ == '__main__':
main()
#!/usr/bin/env python
import paths as pt
from tools import file_writer, data_loader
from utility.settings import load_settings
from sklearn.ensemble import RandomForestClassifier
from pathlib import Path
import shutil
from io import BytesIO
CASES = ["Complete", "Compliance", "Fall", "Risk"]
DATASET_VERSION = 'emb'
def main():
for case in CASES:
if case == "Complete":
settings = load_settings(pt.CONFIGS_DIR, "complete.yaml")
file_name = f'complete_{DATASET_VERSION}.csv'
dl = data_loader.CompleteDataLoader(pt.PROCESSED_DATA_DIR,
file_name,
settings).load_data()
X, y = dl.prepare_data()
elif case == "Compliance":
settings = load_settings(pt.CONFIGS_DIR, "compliance.yaml")
file_name = f'compliance_{DATASET_VERSION}.csv'
dl = data_loader.ComplianceDataLoader(pt.PROCESSED_DATA_DIR,
file_name,
settings).load_data()
X, y = dl.prepare_data()
elif case == "Fall":
settings = load_settings(pt.CONFIGS_DIR, "fall.yaml")
file_name = f'fall_{DATASET_VERSION}.csv'
dl = data_loader.FallDataLoader(pt.PROCESSED_DATA_DIR,
file_name,
settings).load_data()
X, y = dl.prepare_data()
else:
settings = load_settings(pt.CONFIGS_DIR, "fall.yaml")
file_name = f'risk_{DATASET_VERSION}.csv'
dl = data_loader.RiskDataLoader(pt.PROCESSED_DATA_DIR,
file_name,
settings).load_data()
X, y = dl.prepare_data()
model = RandomForestClassifier(n_estimators=1,
bootstrap=False,
min_samples_leaf=0.1,
min_samples_split=0.54,
max_depth=29,
random_state=0)
model.fit(X, y)
file_path = pt.MODELS_DIR
file_name = f'{case.lower()}_rf.joblib'
with open(Path.joinpath(file_path, file_name), 'wb') as fd:
outfile = BytesIO()
file_writer.write_joblib(model, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
if __name__ == '__main__':
main()
#!/usr/bin/env python
import numpy as np
import paths as pt
from tools import file_writer, data_loader
from utility.settings import load_settings
import xgboost as xgb
from pathlib import Path
from io import BytesIO
import shutil
CASES = ["Complete", "Compliance", "Fall", "Risk"]
DATASET_VERSION = 'emb'
def main():
for case in CASES:
if case == "Complete":
settings = load_settings(pt.CONFIGS_DIR, "complete.yaml")
file_name = f'complete_{DATASET_VERSION}.csv'
dl = data_loader.CompleteDataLoader(pt.PROCESSED_DATA_DIR,
file_name,
settings).load_data()
X, y = dl.prepare_data()
elif case == "Compliance":
settings = load_settings(pt.CONFIGS_DIR, "compliance.yaml")
file_name = f'compliance_{DATASET_VERSION}.csv'
dl = data_loader.ComplianceDataLoader(pt.PROCESSED_DATA_DIR,
file_name,
settings).load_data()
X, y = dl.prepare_data()
elif case == "Fall":
settings = load_settings(pt.CONFIGS_DIR, "fall.yaml")
file_name = f'fall_{DATASET_VERSION}.csv'
dl = data_loader.FallDataLoader(pt.PROCESSED_DATA_DIR,
file_name,
settings).load_data()
X, y = dl.prepare_data()
else:
settings = load_settings(pt.CONFIGS_DIR, "risk.yaml")
file_name = f'risk_{DATASET_VERSION}.csv'
dl = data_loader.RiskDataLoader(pt.PROCESSED_DATA_DIR,
file_name,
settings).load_data()
X, y = dl.prepare_data()
neg, pos = np.bincount(y)
scale_pos_weight = neg / pos
params = {"n_estimators": 400,
"learning_rate": 0.1,
"scale_pos_weight": scale_pos_weight,
"objective": "binary:logistic",
"use_label_encoder": False,
"eval_metric": "logloss",
"random_state": 0}
model = xgb.XGBClassifier(**params)
model.fit(X, y)
file_path = pt.MODELS_DIR
file_name = f'{case.lower()}_xgb.joblib'
with open(Path.joinpath(file_path, file_name), 'wb') as fd:
outfile = BytesIO()
file_writer.write_joblib(model, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
if __name__ == '__main__':
main()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment