Source code for utility.metrics

"""
metrics.py
====================================
Utility metrics functions.
"""

import numpy as np
import pandas as pd
import xgboost
from typing import List, Tuple
from sklearn.metrics import confusion_matrix

[docs]def compute_mean(values: List) -> float: """ Computes the rounded mean of a list of values :param values: values to compute :return: rounded list of values """ return round(np.mean(values)*100, 3)
[docs]def compute_std(values: List) -> float: """ Computes the rounded std of a list of values :param values: values to compute :return: rounded list of values """ return round(np.std(values), 3)
[docs]def gini_xgb(preds: np.array, dtrain: xgboost.core.DMatrix) -> List[Tuple]: """ Computes the negated gini socre :param preds: predictions to use :param dtrain: a DMatrix with the true labels :return: a list of tuples with the gini score """ labels = dtrain.get_label() gini_score = -eval_gini(labels, preds) return [('gini', gini_score)]
[docs]def eval_gini(y_true: np.array, y_prob: np.array) -> float: """ Computes the gini score :param y_true: the true labels :param y_prob: the predicated labels :return: the gini score """ y_true = np.asarray(y_true) y_true = y_true[np.argsort(y_prob)] ntrue = 0 gini = 0 delta = 0 n = len(y_true) for i in range(n-1, -1, -1): y_i = y_true[i] ntrue += y_i gini += y_i * delta delta += 1 - y_i gini = 1 - 2 * gini / (ntrue * (n - ntrue)) return gini
[docs]def get_cm_by_protected_variable(df: pd.DataFrame, protected_col_name: str, y_target_name: str, y_pred_name: str) -> pd.DataFrame: """ Makes a confusion matrix for each value of a protected variable :param df: dataframe with the data :param protected_col_name: name of protected variable :param y_target_name: name of the target :param y_pred_name: name of the output :return: confusion matrix as a dataframe """ confusion_df = pd.DataFrame(columns=[protected_col_name, "FPR", "FNR"]) for name in list(df[protected_col_name].unique()): a=df[df[protected_col_name]==name][y_target_name] b=df[df[protected_col_name]==name][y_pred_name] TN, FP, FN, TP = confusion_matrix(list(a), list(b),labels=[0, 1]).ravel() TPR = TP/(TP+FN) TNR = TN/(TN+FP) PPV = TP/(TP+FP) NPV = TN/(TN+FN) FPR = FP/(FP+TN) FNR = FN/(TP+FN) FDR = FP/(TP+FP) ACC = (TP+TN)/(TP+FP+FN+TN) LRplus=TPR/FPR LRminus=FNR/TNR F1=2*(PPV*TPR)/(PPV+TPR) confusion_df = confusion_df.append({protected_col_name:name, "TPR":TPR, "TNR":TNR, "FPR":FPR, "FNR":FNR, "PPV":PPV, "NPV":NPV, "FDR":FDR, "ACC":ACC, "F1":F1, "LRplus":LRplus, "LRminus":LRminus, "TN":TN, "FP":FP, "FN":FN, "TP":TP}, ignore_index=True) return confusion_df