Source code for tools.classifiers

"""
classifiers.py
====================================
Module to store classifers used for CV.
"""

from abc import ABC, abstractmethod
from typing import Tuple, List
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from keras.wrappers.scikit_learn import KerasClassifier
import numpy as np
import tensorflow as tf
import xgboost as xgb

[docs]class BaseClassifer(ABC): """ Base class for classifiers. """ def __init__(self, X, y): """Initilizes inputs and targets variables.""" self.X = X self.y = y
[docs] @abstractmethod def make_model(self): """ This method is an abstract method to be implemented by a concrete classifier. Must return a sklearn-compatible estimator object implementing 'fit'. """
[docs] def evaluate(self, metrics:List, k:int) -> Tuple[dict,np.ndarray]: """ This method performs cross validation for k seeds on a given dataset X and y and outputs the results of N splits given a list of scoring metrics :param metrics: scoring metrics :param k: the seed to use :return: the results from a stratified K-fold CV process """ skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=k) model = self.make_model() res_validate = cross_validate(model, self.X, self.y, cv=skf, scoring=metrics) results = dict() for metric in metrics: results[metric] = res_validate[f'test_{metric}'] return results
[docs]class KnnClassifier(BaseClassifer): """KNN classifier."""
[docs] def make_model(self): return KNeighborsClassifier(n_neighbors=10, weights='distance', p=1)
[docs]class SvmClassifier(BaseClassifer): """Support vector machine classifier."""
[docs] def make_model(self): return SVC(random_state=0, probability=True)
[docs]class LrClassifier(BaseClassifer): """Logistic regression classifier."""
[docs] def make_model(self): return LogisticRegression(max_iter=1000, random_state=0)
[docs]class XgbClassifier(BaseClassifer): """XGBoost classifier."""
[docs] def make_model(self): params = {"n_estimators": 400, "booster": "gbtree", "max_depth": 9, "gamma": 1, "colsample_bytree": 0.5, "min_child_weight": 1, "reg_alpha": 10, "reg_lambda": 0.9, "learning_rate": 0.2, "subsample": 0.8, "use_label_encoder": False, "eval_metric": "logloss", "objective": "binary:logistic", "random_state": 0} return xgb.XGBClassifier(**params)
[docs]class RfClassifier(BaseClassifer): """Random Forest classifier."""
[docs] def make_model(self): return RandomForestClassifier(n_estimators=800, random_state=0)
[docs]class MlpClassifier(BaseClassifer): """Multi-layer Perceptron classifier."""
[docs] def make_model(self): def make_keras_model(): model = tf.keras.models.Sequential() model.add(tf.keras.layers.Dense(80, input_dim=self.X.shape[1], activation='relu')) model.add(tf.keras.layers.Dropout(0.35)) model.add(tf.keras.layers.Dense(20, activation='relu')) model.add(tf.keras.layers.Dropout(0.15)) model.add(tf.keras.layers.Dense(10, activation='relu')) model.add(tf.keras.layers.Dropout(0.15)) model.add(tf.keras.layers.Dense(1, activation='sigmoid')) metrics = [ tf.keras.metrics.BinaryAccuracy(name='accuracy'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), tf.keras.metrics.AUC(name='roc_auc'), tf.keras.metrics.AUC(name='pr_auc', curve='PR'), ] model.compile(loss='binary_crossentropy', optimizer="Adam", metrics=metrics) return model return KerasClassifier(make_keras_model, epochs=20, batch_size=64, verbose=False)