"""
classifiers.py
====================================
Module to store classifers used for CV.
"""
from abc import ABC, abstractmethod
from typing import Tuple, List
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from keras.wrappers.scikit_learn import KerasClassifier
import numpy as np
import tensorflow as tf
import xgboost as xgb
[docs]class BaseClassifer(ABC):
"""
Base class for classifiers.
"""
def __init__(self, X, y):
"""Initilizes inputs and targets variables."""
self.X = X
self.y = y
[docs] @abstractmethod
def make_model(self):
"""
This method is an abstract method to be implemented
by a concrete classifier. Must return a sklearn-compatible
estimator object implementing 'fit'.
"""
[docs] def evaluate(self, metrics:List, k:int) -> Tuple[dict,np.ndarray]:
"""
This method performs cross validation for k seeds
on a given dataset X and y and outputs the results
of N splits given a list of scoring metrics
:param metrics: scoring metrics
:param k: the seed to use
:return: the results from a stratified K-fold CV process
"""
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=k)
model = self.make_model()
res_validate = cross_validate(model, self.X, self.y, cv=skf, scoring=metrics)
results = dict()
for metric in metrics:
results[metric] = res_validate[f'test_{metric}']
return results
[docs]class KnnClassifier(BaseClassifer):
"""KNN classifier."""
[docs] def make_model(self):
return KNeighborsClassifier(n_neighbors=10,
weights='distance',
p=1)
[docs]class SvmClassifier(BaseClassifer):
"""Support vector machine classifier."""
[docs] def make_model(self):
return SVC(random_state=0,
probability=True)
[docs]class LrClassifier(BaseClassifer):
"""Logistic regression classifier."""
[docs] def make_model(self):
return LogisticRegression(max_iter=1000,
random_state=0)
[docs]class XgbClassifier(BaseClassifer):
"""XGBoost classifier."""
[docs] def make_model(self):
params = {"n_estimators": 400,
"booster": "gbtree",
"max_depth": 9,
"gamma": 1,
"colsample_bytree": 0.5,
"min_child_weight": 1,
"reg_alpha": 10,
"reg_lambda": 0.9,
"learning_rate": 0.2,
"subsample": 0.8,
"use_label_encoder": False,
"eval_metric": "logloss",
"objective": "binary:logistic",
"random_state": 0}
return xgb.XGBClassifier(**params)
[docs]class RfClassifier(BaseClassifer):
"""Random Forest classifier."""
[docs] def make_model(self):
return RandomForestClassifier(n_estimators=800,
random_state=0)
[docs]class MlpClassifier(BaseClassifer):
"""Multi-layer Perceptron classifier."""
[docs] def make_model(self):
def make_keras_model():
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(80,
input_dim=self.X.shape[1],
activation='relu'))
model.add(tf.keras.layers.Dropout(0.35))
model.add(tf.keras.layers.Dense(20, activation='relu'))
model.add(tf.keras.layers.Dropout(0.15))
model.add(tf.keras.layers.Dense(10, activation='relu'))
model.add(tf.keras.layers.Dropout(0.15))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
metrics = [
tf.keras.metrics.BinaryAccuracy(name='accuracy'),
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall'),
tf.keras.metrics.AUC(name='roc_auc'),
tf.keras.metrics.AUC(name='pr_auc', curve='PR'),
]
model.compile(loss='binary_crossentropy',
optimizer="Adam",
metrics=metrics)
return model
return KerasClassifier(make_keras_model, epochs=20,
batch_size=64, verbose=False)