Commit adf81c60 authored by Christian Marius Lillelund's avatar Christian Marius Lillelund
Browse files

changed alarm surv prob to the inverse

parent f35b3046
Pipeline #103352 passed with stage
in 4 minutes and 41 seconds
......@@ -105,7 +105,7 @@ def main():
clf_names = ['KNN', 'SVM', 'LR', 'XGB', 'RF', 'MLP']
num_clfs = len(clf_names)
metrics = ['accuracy', 'precision', 'recall', 'roc_auc', 'average_precision', 'f1']
cases = ["Compliance"]
cases = ["Complete"]
for case in cases:
target_settings = load_config(pt.CONFIGS_DIR, f'{case.lower()}.yaml')
output_filename = f"{case} model baseline.csv"
......
"""
evaluate_gender_bias.py
====================================
Script to evaluate gender bias in the Fall case.
Script to evaluate gender bias in the Complete case.
"""
from pathlib import Path
......@@ -21,28 +21,37 @@ import matplotlib.pyplot as plt
from utility.config import load_config
def main():
settings = load_config(pt.CONFIGS_DIR, "fall.yaml")
target_name = "Compliance"
settings = load_config(pt.CONFIGS_DIR, f"{target_name.lower()}.yaml")
protected_col_name = "Gender"
y_col_name="Fall"
y_col_name=target_name
# Load the data
file_name = "fall_emb.csv"
dl = data_loader.FallDataLoader(pt.PROCESSED_DATA_DIR, file_name, settings).load_data()
dl = data_loader.ComplianceDataLoader(pt.PROCESSED_DATA_DIR,
f"{target_name.lower()}_emb.csv",
settings).load_data()
X, y = dl.get_data()
X = pd.DataFrame(X)
y = pd.Series(y, name=target_name)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
stratify=y, random_state=0)
neg, pos = np.bincount(y)
scale_pos_weight = neg / pos
params = {"n_estimators": 400,
"objective": "binary:logistic",
"scale_pos_weight": scale_pos_weight,
"booster": "gbtree",
"max_depth": 5,
"gamma": 5,
"colsample_bytree": 1,
"min_child_weight": 8,
"reg_alpha": 10,
"reg_lambda": 0.9,
"learning_rate": 0.05,
"subsample": 0.8,
"use_label_encoder": False,
"learning_rate": 0.1,
"eval_metric": "logloss",
"random_state": 0
}
"objective": "binary:logistic",
"random_state": 0}
model = xgb.XGBClassifier(**params)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
......@@ -51,10 +60,12 @@ def main():
i=0
y_valid_pred = 0*y
valid_acc, valid_pre, valid_recall, valid_roc_auc = list(), list(), list(), list()
for train_index, valid_index in skf.split(X_train, y_train):
X_train_split, X_valid_split = X_train.iloc[train_index,:], X_train.iloc[valid_index,:]
y_train_split, y_valid_split = y_train.iloc[train_index], y_train.iloc[valid_index]
optimize_rounds = True
early_stopping_rounds = 50
......
......@@ -144,7 +144,7 @@ def predict_alarm(incoming_data: InputData):
surv_probs = np.array([float(x) for x in surv_func[0].y][:360])
surv_probs = np.mean(surv_probs.reshape(-1, 30), axis=1)
surv_probs = list(map((lambda x: float(round(100*Decimal(x), 1))), surv_probs))
prob_after_one_year = 100 - surv_probs[-1]
alarm_arguments = generate_alarm_arguments(df, ats_resolution, prob_after_one_year)
......
......@@ -73,11 +73,19 @@ class XgbClassifier(BaseClassifer):
"""XGBoost classifier."""
def make_model(self):
params = {"n_estimators": 400,
"learning_rate": 0.1,
"objective": "binary:logistic",
"random_state": 0,
"use_label_encoder": False,
"eval_metric": 'logloss'}
"booster": "gbtree",
"max_depth": 9,
"gamma": 1,
"colsample_bytree": 0.5,
"min_child_weight": 1,
"reg_alpha": 10,
"reg_lambda": 0.9,
"learning_rate": 0.2,
"subsample": 0.8,
"use_label_encoder": False,
"eval_metric": "logloss",
"objective": "binary:logistic",
"random_state": 0}
return xgb.XGBClassifier(**params)
class RfClassifier(BaseClassifer):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment