Source code for utility.metrics
"""
metrics.py
====================================
Utility metrics functions.
"""
import numpy as np
import pandas as pd
import xgboost
from typing import List, Tuple
from sklearn.metrics import confusion_matrix
[docs]def compute_mean(values: List) -> float:
"""
Computes the rounded mean of a list of values
:param values: values to compute
:return: rounded list of values
"""
return round(np.mean(values)*100, 3)
[docs]def compute_std(values: List) -> float:
"""
Computes the rounded std of a list of values
:param values: values to compute
:return: rounded list of values
"""
return round(np.std(values), 3)
[docs]def gini_xgb(preds: np.array, dtrain: xgboost.core.DMatrix) -> List[Tuple]:
"""
Computes the negated gini socre
:param preds: predictions to use
:param dtrain: a DMatrix with the true labels
:return: a list of tuples with the gini score
"""
labels = dtrain.get_label()
gini_score = -eval_gini(labels, preds)
return [('gini', gini_score)]
[docs]def eval_gini(y_true: np.array, y_prob: np.array) -> float:
"""
Computes the gini score
:param y_true: the true labels
:param y_prob: the predicated labels
:return: the gini score
"""
y_true = np.asarray(y_true)
y_true = y_true[np.argsort(y_prob)]
ntrue = 0
gini = 0
delta = 0
n = len(y_true)
for i in range(n-1, -1, -1):
y_i = y_true[i]
ntrue += y_i
gini += y_i * delta
delta += 1 - y_i
gini = 1 - 2 * gini / (ntrue * (n - ntrue))
return gini
[docs]def get_cm_by_protected_variable(df: pd.DataFrame,
protected_col_name: str,
y_target_name: str,
y_pred_name: str) -> pd.DataFrame:
"""
Makes a confusion matrix for each value of a protected variable
:param df: dataframe with the data
:param protected_col_name: name of protected variable
:param y_target_name: name of the target
:param y_pred_name: name of the output
:return: confusion matrix as a dataframe
"""
confusion_df = pd.DataFrame(columns=[protected_col_name, "FPR", "FNR"])
for name in list(df[protected_col_name].unique()):
a=df[df[protected_col_name]==name][y_target_name]
b=df[df[protected_col_name]==name][y_pred_name]
TN, FP, FN, TP = confusion_matrix(list(a), list(b),labels=[0, 1]).ravel()
TPR = TP/(TP+FN)
TNR = TN/(TN+FP)
PPV = TP/(TP+FP)
NPV = TN/(TN+FN)
FPR = FP/(FP+TN)
FNR = FN/(TP+FN)
FDR = FP/(TP+FP)
ACC = (TP+TN)/(TP+FP+FN+TN)
LRplus=TPR/FPR
LRminus=FNR/TNR
F1=2*(PPV*TPR)/(PPV+TPR)
confusion_df = confusion_df.append({protected_col_name:name, "TPR":TPR, "TNR":TNR, "FPR":FPR,
"FNR":FNR, "PPV":PPV, "NPV":NPV, "FDR":FDR, "ACC":ACC,
"F1":F1, "LRplus":LRplus, "LRminus":LRminus, "TN":TN,
"FP":FP, "FN":FN, "TP":TP}, ignore_index=True)
return confusion_df