Commit adf81c60 authored by Christian Marius Lillelund's avatar Christian Marius Lillelund
Browse files

changed alarm surv prob to the inverse

parent f35b3046
Pipeline #103352 passed with stage
in 4 minutes and 41 seconds
...@@ -105,7 +105,7 @@ def main(): ...@@ -105,7 +105,7 @@ def main():
clf_names = ['KNN', 'SVM', 'LR', 'XGB', 'RF', 'MLP'] clf_names = ['KNN', 'SVM', 'LR', 'XGB', 'RF', 'MLP']
num_clfs = len(clf_names) num_clfs = len(clf_names)
metrics = ['accuracy', 'precision', 'recall', 'roc_auc', 'average_precision', 'f1'] metrics = ['accuracy', 'precision', 'recall', 'roc_auc', 'average_precision', 'f1']
cases = ["Compliance"] cases = ["Complete"]
for case in cases: for case in cases:
target_settings = load_config(pt.CONFIGS_DIR, f'{case.lower()}.yaml') target_settings = load_config(pt.CONFIGS_DIR, f'{case.lower()}.yaml')
output_filename = f"{case} model baseline.csv" output_filename = f"{case} model baseline.csv"
......
""" """
evaluate_gender_bias.py evaluate_gender_bias.py
==================================== ====================================
Script to evaluate gender bias in the Fall case. Script to evaluate gender bias in the Complete case.
""" """
from pathlib import Path from pathlib import Path
...@@ -21,28 +21,37 @@ import matplotlib.pyplot as plt ...@@ -21,28 +21,37 @@ import matplotlib.pyplot as plt
from utility.config import load_config from utility.config import load_config
def main(): def main():
settings = load_config(pt.CONFIGS_DIR, "fall.yaml") target_name = "Compliance"
settings = load_config(pt.CONFIGS_DIR, f"{target_name.lower()}.yaml")
protected_col_name = "Gender" protected_col_name = "Gender"
y_col_name="Fall" y_col_name=target_name
# Load the data # Load the data
file_name = "fall_emb.csv" dl = data_loader.ComplianceDataLoader(pt.PROCESSED_DATA_DIR,
dl = data_loader.FallDataLoader(pt.PROCESSED_DATA_DIR, file_name, settings).load_data() f"{target_name.lower()}_emb.csv",
settings).load_data()
X, y = dl.get_data() X, y = dl.get_data()
X = pd.DataFrame(X)
y = pd.Series(y, name=target_name)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
stratify=y, random_state=0) stratify=y, random_state=0)
neg, pos = np.bincount(y)
scale_pos_weight = neg / pos
params = {"n_estimators": 400, params = {"n_estimators": 400,
"objective": "binary:logistic", "booster": "gbtree",
"scale_pos_weight": scale_pos_weight, "max_depth": 5,
"gamma": 5,
"colsample_bytree": 1,
"min_child_weight": 8,
"reg_alpha": 10,
"reg_lambda": 0.9,
"learning_rate": 0.05,
"subsample": 0.8,
"use_label_encoder": False, "use_label_encoder": False,
"learning_rate": 0.1,
"eval_metric": "logloss", "eval_metric": "logloss",
"random_state": 0 "objective": "binary:logistic",
} "random_state": 0}
model = xgb.XGBClassifier(**params) model = xgb.XGBClassifier(**params)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0) skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
...@@ -51,10 +60,12 @@ def main(): ...@@ -51,10 +60,12 @@ def main():
i=0 i=0
y_valid_pred = 0*y y_valid_pred = 0*y
valid_acc, valid_pre, valid_recall, valid_roc_auc = list(), list(), list(), list() valid_acc, valid_pre, valid_recall, valid_roc_auc = list(), list(), list(), list()
for train_index, valid_index in skf.split(X_train, y_train): for train_index, valid_index in skf.split(X_train, y_train):
X_train_split, X_valid_split = X_train.iloc[train_index,:], X_train.iloc[valid_index,:] X_train_split, X_valid_split = X_train.iloc[train_index,:], X_train.iloc[valid_index,:]
y_train_split, y_valid_split = y_train.iloc[train_index], y_train.iloc[valid_index] y_train_split, y_valid_split = y_train.iloc[train_index], y_train.iloc[valid_index]
optimize_rounds = True optimize_rounds = True
early_stopping_rounds = 50 early_stopping_rounds = 50
......
...@@ -144,7 +144,7 @@ def predict_alarm(incoming_data: InputData): ...@@ -144,7 +144,7 @@ def predict_alarm(incoming_data: InputData):
surv_probs = np.array([float(x) for x in surv_func[0].y][:360]) surv_probs = np.array([float(x) for x in surv_func[0].y][:360])
surv_probs = np.mean(surv_probs.reshape(-1, 30), axis=1) surv_probs = np.mean(surv_probs.reshape(-1, 30), axis=1)
surv_probs = list(map((lambda x: float(round(100*Decimal(x), 1))), surv_probs)) surv_probs = list(map((lambda x: float(round(100*Decimal(x), 1))), surv_probs))
prob_after_one_year = 100 - surv_probs[-1] prob_after_one_year = 100 - surv_probs[-1]
alarm_arguments = generate_alarm_arguments(df, ats_resolution, prob_after_one_year) alarm_arguments = generate_alarm_arguments(df, ats_resolution, prob_after_one_year)
......
...@@ -73,11 +73,19 @@ class XgbClassifier(BaseClassifer): ...@@ -73,11 +73,19 @@ class XgbClassifier(BaseClassifer):
"""XGBoost classifier.""" """XGBoost classifier."""
def make_model(self): def make_model(self):
params = {"n_estimators": 400, params = {"n_estimators": 400,
"learning_rate": 0.1, "booster": "gbtree",
"objective": "binary:logistic", "max_depth": 9,
"random_state": 0, "gamma": 1,
"use_label_encoder": False, "colsample_bytree": 0.5,
"eval_metric": 'logloss'} "min_child_weight": 1,
"reg_alpha": 10,
"reg_lambda": 0.9,
"learning_rate": 0.2,
"subsample": 0.8,
"use_label_encoder": False,
"eval_metric": "logloss",
"objective": "binary:logistic",
"random_state": 0}
return xgb.XGBClassifier(**params) return xgb.XGBClassifier(**params)
class RfClassifier(BaseClassifer): class RfClassifier(BaseClassifer):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment