train_xgboost_model.py 3.11 KB
Newer Older
1
2
3
#!/usr/bin/env python
import numpy as np
import pandas as pd
4
import paths as pt
5
from tools import file_reader, file_writer, data_loader
6
from utility import metrics
7
from sklearn.metrics import accuracy_score, precision_score
8
from sklearn.metrics import recall_score, roc_auc_score
9
10
11
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import xgboost as xgb
12
13
from pathlib import Path
import yaml
14

15
CASES = ["Complete", "Compliance", "Alarm", "Fall"]
16
DATASET_VERSION = 'emb'
17

18
def main():
19
20
    for case in CASES:
        if case == "Complete":
21
22
23
24
25
            with open(Path.joinpath(pt.CONFIGS_DIR, "complete_emb.yaml"), 'r') as stream:
                settings = yaml.safe_load(stream)
            file_name = f'complete_{DATASET_VERSION}.csv'
            dl = data_loader.CompleteDataLoader(file_name, settings).load_data()
            X, y = dl.prepare_data()
26
        elif case == "Compliance":
27
28
29
30
31
            with open(Path.joinpath(pt.CONFIGS_DIR, "compliance_emb.yaml"), 'r') as stream:
                settings = yaml.safe_load(stream)
            file_name = f'compliance_{DATASET_VERSION}.csv'
            dl = data_loader.ComplianceDataLoader(file_name, settings).load_data()
            X, y = dl.prepare_data()
32
33
        elif case == "Alarm":
            with open(Path.joinpath(pt.CONFIGS_DIR, "alarm_emb.yaml"), 'r') as stream:
34
                settings = yaml.safe_load(stream)
35
36
            file_name = f'alarm_{DATASET_VERSION}.csv'
            dl = data_loader.AlarmDataLoader(file_name, settings).load_data()
37
            X, y = dl.prepare_data()
38
        else:
39
            with open(Path.joinpath(pt.CONFIGS_DIR, "fall_emb.yaml"), 'r') as stream:
40
                settings = yaml.safe_load(stream)
41
42
            file_name = f'fall_{DATASET_VERSION}.csv'
            dl = data_loader.FallDataLoader(file_name, settings).load_data()
43
            X, y = dl.prepare_data()
44
    
45
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
Christian Marius Lillelund's avatar
Christian Marius Lillelund committed
46
                                                            stratify=y, random_state=0)
47
        
48
49
        neg, pos = np.bincount(y)
        scale_pos_weight = neg / pos
50
        params = {"n_estimators": 400,
51
52
53
54
55
56
                  "learning_rate": 0.1,
                  "scale_pos_weight": scale_pos_weight,
                  "objective": "binary:logistic",
                  "random_state": 0,
                  "use_label_encoder": False,
                  "eval_metric": 'logloss'}
57
        
58
        model = xgb.XGBClassifier(**params)
59
        model.fit(X_train, y_train)
60
61

        y_pred = model.predict(X_test)
62
        y_proba = model.predict_proba(X_test)[:,1]
63
        file_writer.write_joblib(model, pt.MODELS_DIR, f'{case.lower()}_xgboost.joblib')
64
        
65
        print(f"Scores for {case} XGBoost model:")
66
67
68
69
        print(f"Accuracy: {np.around(accuracy_score(y_test, y_pred), decimals=3)}")
        print(f"Precision: {np.around(precision_score(y_test, y_pred), decimals=3)}")
        print(f"Recall: {np.around(recall_score(y_test, y_pred), decimals=3)}")
        print(f"ROC AUC: {np.around(roc_auc_score(y_test, y_proba), decimals=3)}\n")
70
71
    
if __name__ == '__main__':
72
    main()