test_model_balance.py 4.61 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python
from tools import data_loader, preprocessor
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from utility.settings import load_settings
import csv
from typing import List
import paths as pt
from pathlib import Path
from imblearn.over_sampling import ADASYN
from imblearn.pipeline import make_pipeline, Pipeline
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate
from utility.metrics import compute_mean, compute_std

def make_model(input_dim, class_weight=None):
    def make_keras_model():
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Dense(80,
                                        input_dim=input_dim,
                                        activation='relu'))
        model.add(tf.keras.layers.Dropout(0.35))
        model.add(tf.keras.layers.Dense(20, activation='relu'))
        model.add(tf.keras.layers.Dropout(0.15))
        model.add(tf.keras.layers.Dense(10, activation='relu'))
        model.add(tf.keras.layers.Dropout(0.15))
        model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy',
                    optimizer="Adam")
        return model
    
    return KerasClassifier(make_keras_model, epochs=20, batch_size=32,
                           class_weight=class_weight, verbose=False)

def main():
    cases = ["Complete", "Compliance", "Fall", "Risk"]
    for case in cases:
        output_filename = f"{case} model balance.csv"
        header = ['clf', 'version', 'accuracy_mean', 'accuracy_std',
                  'precision_mean', 'precision_std', 'recall_mean',
                  'recall_std', 'roc_auc_mean', 'roc_auc_std',
                  'pr_auc_mean', 'pr_auc_std', 'f1_mean', 'f1_std']
        with open(Path.joinpath(pt.REPORTS_DIR, output_filename), 'w',
                  encoding='UTF8', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(header)
            
        if case == "Complete":
            settings = load_settings("complete_emb.yaml")
            dl = data_loader.CompleteDataLoader("complete_emb.csv", settings).load_data()
            X, y = dl.get_data()
        elif case == "Compliance":
            settings = load_settings("compliance_emb.yaml")
            dl = data_loader.ComplianceDataLoader("compliance_emb.csv", settings).load_data()
            X, y = dl.get_data()
        elif case == "Fall":
            settings = load_settings("fall_emb.yaml")
61
            dl = data_loader.AlarmDataLoader("fall_emb.csv", settings).load_data()
62
63
64
            X, y = dl.get_data()
        else:
            settings = load_settings("risk_emb.yaml")
65
            dl = data_loader.FallDataLoader("risk_emb.csv", settings).load_data()
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
            X, y = dl.get_data()
        
        X, y = dl.prepare_data()
        versions = ['NoCW', 'CW', 'Oversampling']
        metrics = ['accuracy', 'precision', 'recall', 'roc_auc', 'average_precision', 'f1']
        for version in versions:
            if version == "NoCW":
                model = make_model(input_dim=X.shape[1])
                kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
                results = cross_validate(model, X, y, cv=kfold, scoring=metrics)
            elif version == "CW":
                neg, pos = np.bincount(y)
                class_weight = preprocessor.get_class_weight(neg, pos)
                model = make_model(input_dim=X.shape[1],
                                   class_weight=class_weight)
                kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
                results = cross_validate(model, X, y, cv=kfold, scoring=metrics)
            else:
                adasyn = ADASYN(random_state=0)
                model = make_model(input_dim=X.shape[1])
                pipeline = Pipeline([('sampling', adasyn), ('model', model)])
                kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
                results = cross_validate(pipeline, X, y, cv=kfold, scoring=metrics)

            with open(Path.joinpath(pt.REPORTS_DIR, output_filename), 'a',
                      encoding='UTF8', newline='') as f:
                writer = csv.writer(f)
                data = ["MLP", version]
                for metric in metrics:
                    mean = compute_mean(results[f'test_{metric}'])
                    std = compute_std(results[f'test_{metric}'])
                    data.extend((mean, std))
                writer.writerow(data)

if __name__ == '__main__':
    main()