Commit 4d4a3ca1 authored by thecml's avatar thecml
Browse files

changed risk case to fall, now have alarm and fall

parent d0a50b80
Pipeline #89918 failed with stage
in 2 minutes and 28 seconds
......@@ -10,7 +10,7 @@ ats_resolution: 10
train_ratio: 0.8
batch_size: 32
num_epochs: 10
num_epochs: 5
verbose: True
network_layers: [128]
optimizer: "Adam"
......
%% Cell type:code id: tags:
```
import tensorflow as tf
import numpy as np
import pandas as pd
from pathlib import Path
import paths as pt
import yaml
from tools import data_loader, preprocessor
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
def make_model(input_dim):
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(80,
input_dim=input_dim,
activation='relu'))
model.add(tf.keras.layers.Dropout(0.35))
model.add(tf.keras.layers.Dense(20, activation='relu'))
model.add(tf.keras.layers.Dropout(0.15))
model.add(tf.keras.layers.Dense(10, activation='relu'))
model.add(tf.keras.layers.Dropout(0.15))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
metrics = [
tf.keras.metrics.BinaryAccuracy(name='accuracy'),
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall'),
tf.keras.metrics.AUC(name='auc'),
]
model.compile(loss='binary_crossentropy',
optimizer="Adam",
metrics=metrics)
return model
# Load settings
with open(Path.joinpath(pt.CONFIGS_DIR, "complete_emb.yaml"), 'r') as stream:
with open(Path.joinpath(pt.CONFIGS_DIR, "fall_emb.yaml"), 'r') as stream:
settings = yaml.safe_load(stream)
# Load the data
file_name = "complete_emb.csv"
dl = data_loader.CompleteDataLoader(file_name, settings).load_data()
file_name = "fall_emb.csv"
dl = data_loader.FallDataLoader(file_name, settings).load_data()
features = dl.get_features()
X, y = dl.get_data()
# Calculate class weight
neg, pos = np.bincount(y)
class_weight = preprocessor.get_class_weight(neg, pos)
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7,
stratify=y, random_state=0)
# Scale the data
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
numeric_feats = ['BirthYear', 'Cluster', 'LoanPeriod', 'NumberAts']
scaler = StandardScaler()
X_train_enc = pd.DataFrame(scaler.fit_transform(X_train[numeric_feats]),
columns=numeric_feats)
X_test_enc = pd.DataFrame(scaler.transform(X_test[numeric_feats]),
columns=numeric_feats)
num_X_train = X_train.drop(numeric_feats, axis=1)
num_X_test = X_test.drop(numeric_feats, axis=1)
X_train_sc = pd.concat([num_X_train[['Gender_Male', 'Gender_Female']], X_train_enc,
num_X_train[['Rand', '1Ats', '2Ats', '3Ats', '4Ats', '5Ats',
'6Ats', '7Ats', '8Ats', '9Ats', '10Ats']]], axis=1)
X_test_sc = pd.concat([num_X_test[['Gender_Male', 'Gender_Female']], X_test_enc,
num_X_test[['Rand', '1Ats', '2Ats', '3Ats', '4Ats', '5Ats',
'6Ats', '7Ats', '8Ats', '9Ats', '10Ats']]], axis=1)
```
%% Cell type:code id: tags:
```
X_train = np.array(X_train)
X_train_sc = np.array(X_train_sc)
X_test = np.array(X_test)
X_test_sc = np.array(X_test_sc)
y_train = np.array(y_train)
y_test = np.array(y_test)
# Train the model
model = make_model(input_dim=X_train_sc.shape[1])
history = model.fit(X_train_sc, y_train, epochs=10,
class_weight=class_weight,
batch_size=32, verbose=True)
```
%% Output
WARNING:tensorflow:From C:\Users\cml\miniconda3\envs\py38-air\lib\site-packages\tensorflow\python\ops\array_ops.py:5043: calling gather (from tensorflow.python.ops.array_ops) with validate_indices is deprecated and will be removed in a future version.
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
Epoch 1/10
47/47 [==============================] - 1s 7ms/step - loss: 0.6353 - accuracy: 0.6820 - precision: 0.7183 - recall: 0.9185 - auc: 0.5047
47/47 [==============================] - 1s 7ms/step - loss: 0.6959 - accuracy: 0.7780 - precision: 0.1690 - recall: 0.1875 - auc: 0.5319
Epoch 2/10
47/47 [==============================] - 0s 7ms/step - loss: 0.6051 - accuracy: 0.7153 - precision: 0.7196 - recall: 0.9907 - auc: 0.5196
47/47 [==============================] - 0s 6ms/step - loss: 0.6823 - accuracy: 0.6913 - precision: 0.1587 - recall: 0.3281 - auc: 0.5959
Epoch 3/10
47/47 [==============================] - 0s 6ms/step - loss: 0.5968 - accuracy: 0.7187 - precision: 0.7196 - recall: 0.9981 - auc: 0.5452
47/47 [==============================] - 0s 7ms/step - loss: 0.6688 - accuracy: 0.6547 - precision: 0.2047 - recall: 0.5885 - auc: 0.6584
Epoch 4/10
47/47 [==============================] - 0s 6ms/step - loss: 0.5980 - accuracy: 0.7207 - precision: 0.7208 - recall: 0.9991 - auc: 0.5367
47/47 [==============================] - 0s 7ms/step - loss: 0.6550 - accuracy: 0.6440 - precision: 0.2031 - recall: 0.6094 - auc: 0.6871
Epoch 5/10
47/47 [==============================] - 0s 6ms/step - loss: 0.5860 - accuracy: 0.7180 - precision: 0.7194 - recall: 0.9972 - auc: 0.5813
47/47 [==============================] - 0s 7ms/step - loss: 0.6476 - accuracy: 0.6753 - precision: 0.2263 - recall: 0.6354 - auc: 0.6982
Epoch 6/10
47/47 [==============================] - 0s 6ms/step - loss: 0.5718 - accuracy: 0.7200 - precision: 0.7203 - recall: 0.9991 - auc: 0.6321
47/47 [==============================] - 0s 7ms/step - loss: 0.6057 - accuracy: 0.7240 - precision: 0.2707 - recall: 0.6823 - auc: 0.7592
Epoch 7/10
47/47 [==============================] - 0s 7ms/step - loss: 0.5773 - accuracy: 0.7193 - precision: 0.7198 - recall: 0.9991 - auc: 0.6152
47/47 [==============================] - 0s 8ms/step - loss: 0.5649 - accuracy: 0.7293 - precision: 0.2798 - recall: 0.7083 - auc: 0.7973
Epoch 8/10
47/47 [==============================] - 0s 7ms/step - loss: 0.5717 - accuracy: 0.7213 - precision: 0.7213 - recall: 0.9991 - auc: 0.6401
47/47 [==============================] - 0s 8ms/step - loss: 0.5424 - accuracy: 0.7487 - precision: 0.2985 - recall: 0.7135 - auc: 0.8084
Epoch 9/10
47/47 [==============================] - 0s 7ms/step - loss: 0.5524 - accuracy: 0.7193 - precision: 0.7216 - recall: 0.9935 - auc: 0.6758
47/47 [==============================] - 0s 7ms/step - loss: 0.5215 - accuracy: 0.7433 - precision: 0.2942 - recall: 0.7188 - auc: 0.8180
Epoch 10/10
47/47 [==============================] - 0s 7ms/step - loss: 0.5559 - accuracy: 0.7287 - precision: 0.7278 - recall: 0.9954 - auc: 0.6710
47/47 [==============================] - 0s 6ms/step - loss: 0.4941 - accuracy: 0.7520 - precision: 0.3125 - recall: 0.7812 - auc: 0.8420
%% Cell type:code id: tags:
```
def predict(data):
global model, scaler
data[:,2:6] = scaler.transform(data[:,2:6])
y_pred = model.predict(data).reshape(-1, 1)
y_pred = (y_pred>0.5)
print(np.array(list(zip(1-y_pred.reshape(data.shape[0]),y_pred.reshape(data.shape[0])))))
return np.hstack((1-y_pred,y_pred))
import lime
import lime.lime_tabular
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, mode='classification',
class_names=['No complete', 'Complete'],
feature_names=features)
exp = explainer.explain_instance(X_test[27], predict, num_features=X_train.shape[1])
```
%% Output
[[0 1]
[1 0]
[0 1]
[1 0]
...
[0 1]
[1 0]
[0 1]]
[1 0]
[1 0]]
%% Cell type:code id: tags:
```
exp.as_list()
```
%% Output
[('10Ats > 0.03', 0.20651574242348936),
('3Ats <= -0.05', 0.192865743355257),
('1Ats > 0.07', 0.18353986581301904),
('2Ats > -0.03', -0.18296575285910985),
('7Ats <= -0.02', -0.16874453649744492),
('9Ats <= -0.01', -0.13263677767888812),
('8Ats <= 0.06', -0.1273679310001168),
('6Ats > 0.08', 0.11597565308592392),
('5Ats <= -0.04', 0.11306311651048453),
('Gender_Female <= 0.00', -0.08515210039953246),
('8.00 < NumberAts <= 14.00', 0.06138616356339817),
('LoanPeriod > 1627.50', -0.05305334898011001),
('0.00 < Gender_Male <= 1.00', 0.05105843326784843),
('29.00 < BirthYear <= 35.00', 0.04509384864824174),
('Rand <= 0.25', 0.03759473681614202),
('0.03 < 4Ats <= 0.08', 0.0045078832741941325),
('Cluster > 11.00', 2.0051051251007468e-05)]
[('9Ats <= -0.05', 0.17654813091829888),
('7Ats <= -0.00', -0.15885528136175853),
('4Ats <= -0.10', 0.1563175889395826),
('1Ats <= -0.05', -0.14003525792270777),
('Gender_Female <= 0.00', 0.13946579801005823),
('8Ats <= -0.09', 0.12071895033166836),
('BirthYear <= 30.00', 0.10502370554320418),
('0.00 < Gender_Male <= 1.00', -0.09129729546037109),
('NumberAts <= 4.00', -0.07464773070373201),
('0.12 < 10Ats <= 0.14', 0.06538127129769919),
('-0.12 < 2Ats <= -0.04', -0.04133967778174314),
('Cluster > 11.00', 0.032454681755121915),
('-0.08 < 5Ats <= -0.07', -0.025478822881879495),
('LoanPeriod <= 391.75', 0.021822568160415383),
('-0.01 < 6Ats <= 0.05', -0.02117723724760794),
('-0.20 < 3Ats <= -0.12', 0.020074664152922403),
('Rand > 0.73', -0.013190059911986492)]
%% Cell type:code id: tags:
```
model.predict(np.array([X_test_sc[27],]))
```
%% Output
array([[0.59483784]], dtype=float32)
array([[0.66663617]], dtype=float32)
......
This diff is collapsed.
......@@ -78,7 +78,7 @@ def main():
num_clfs = len(clf_names)
metrics = ['accuracy', 'precision', 'recall', 'roc_auc', 'average_precision', 'f1']
#cases = ["Complete", "Compliance", "Fall", "Risk"]
cases = ['Alarm']
cases = ['Fall']
for case in cases:
settings = load_settings(f'{case.lower()}_emb.yaml')
output_filename = f"{case} model baseline.csv"
......@@ -93,7 +93,7 @@ def main():
versions = ['NoAts', 'Embedded', 'Counts', 'OneHot']
for version in versions:
if version == 'NoAts':
ats_cols = [f"{i}Ats" for i in range(1, 11)]
ats_cols = [f"{i}Ats" for i in range(1, settings['ats_resolution']+1)]
X, y = load_data_embedded(case, settings)
X = X.drop(ats_cols, axis=1)
elif version == "Embedded":
......
......@@ -29,5 +29,4 @@ def main():
file_writer.write_pickle(ic, pt.INTERIM_DATA_DIR, 'ic.pkl')