"""
file_writer.py
====================================
File writer module to write files.
"""
import os
import pandas as pd
from typing import List
import matplotlib.pyplot as plt
import joblib
import numpy as np
import csv
import pickle
from io import StringIO, BytesIO
[docs]def write_csv(df: pd.DataFrame,
outfile: StringIO,
date_format: str='%d-%m-%Y',
index: bool=False) -> None:
"""
This method writes a csv file using Pandas to_csv() method.
:param df: dataframe to write
:param outfile: text output stream
:param date_format: data format to use
:param index: write row names (index)
:return: None
"""
df.to_csv(outfile, date_format=date_format, index=index)
[docs]def write_embedding(mapping: dict, outfile: StringIO) -> None:
"""
This method writes an embedding mapping as a csv file.
:param mapping: mapping dict
:param outfile: text output stream
:return: None
"""
field_names = ['Ats', 'Embedding']
writer = csv.writer(outfile)
writer.writerow(field_names)
for key, value in mapping.items():
writer.writerow([key, value])
[docs]def write_array(data: np.ndarray, outfile: BytesIO) -> None:
"""
This method writes an NumPy array.
:param data: data to write
:param outfile: binary output stream
:return: None
"""
np.save(outfile, data)
[docs]def write_pickle(data: any, outfile: BytesIO) -> None:
"""
This method writes a pickle file.
:param data: data to write
:param outfile: binary output stream
:return: None
"""
pickle.dump(data, outfile)
[docs]def write_joblib(data: any, outfile: BytesIO) -> None:
"""
This method writes a joblib file.
:param data: data to write
:param outfile: binary output stream
:return: None
"""
joblib.dump(data, outfile)
[docs]def write_shap_importance_plot(features: List[str],
importances: List[float],
title: str,
outfile: BytesIO):
"""
This method writes a SHAP importance plot.
:param features: feature names
:param importances: feature importances
:param outfile: binary output stream
:return: None
"""
plt.close()
plt.title(title)
plt.xlabel('SHAP values')
plt.ylabel('Feature')
plt.barh(features[::-1], importances[::-1])
plt.savefig(outfile, format='pdf', bbox_inches="tight")
[docs]def write_cv_plot(means: List, stds: List, metric: str,
num_iter: int, clf_names: List, title: str,
subtitle: str, outfile: BytesIO):
"""
This method writes a plot of the result from a CV process.
:param means: the mean values obtainted
:param stds: the standard deviations obtained
:param metric: the metric used
:param num_iter: the number of iterations
:param clf_names: names of classifiers used
:param title: plot title
:param subtitle: plot subtitle
:param outfile: binary output stream
:return: None
"""
plt.close()
x = range(num_iter)
fig, axs = plt.subplots(nrows=2, ncols=3, sharex=True, sharey=True)
for i, ax in enumerate(axs.flat):
ax.set_title(clf_names[i])
ax.errorbar(x, means[i], yerr=stds[i],
fmt='o', color='black', ecolor='lightgray',
elinewidth=3, capsize=0)
fig.suptitle(f"{os.path.splitext(title)[0]} {subtitle}")
plt.setp(axs[-1, :], xlabel='Seed')
plt.setp(axs[:, 0], ylabel=metric)
plt.savefig(outfile, format='pdf', bbox_inches="tight")