Commit f41eccb3 authored by thecml's avatar thecml
Browse files

improved some docs

parent 608c8f93
Pipeline #100537 passed with stage
in 4 minutes and 28 seconds
.PHONY: clean data test lint requirements sync_data_to_s3 sync_data_from_s3
#################################################################################
# GLOBALS #
#################################################################################
PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
BUCKET = [OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')
PROFILE = default
PROJECT_NAME = air
PYTHON_INTERPRETER = python3
ifeq (,$(shell which conda))
HAS_CONDA=False
else
HAS_CONDA=True
endif
#################################################################################
# COMMANDS #
#################################################################################
## Install Python Dependencies
requirements: test_environment
$(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel
$(PYTHON_INTERPRETER) -m pip install -r requirements.txt
## Make Dataset
data: requirements
$(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed
## Delete all compiled Python files
clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete
## Lint using flake8
lint:
flake8 src
test:
PYTHONPATH=. pytest
## Upload Data to S3
sync_data_to_s3:
ifeq (default,$(PROFILE))
aws s3 sync data/ s3://$(BUCKET)/data/
else
aws s3 sync data/ s3://$(BUCKET)/data/ --profile $(PROFILE)
endif
## Download Data from S3
sync_data_from_s3:
ifeq (default,$(PROFILE))
aws s3 sync s3://$(BUCKET)/data/ data/
else
aws s3 sync s3://$(BUCKET)/data/ data/ --profile $(PROFILE)
endif
## Set up python interpreter environment
create_environment:
ifeq (True,$(HAS_CONDA))
@echo ">>> Detected conda, creating conda environment."
ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER)))
conda create --name $(PROJECT_NAME) python=3
else
conda create --name $(PROJECT_NAME) python=2.7
endif
@echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)"
else
$(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper
@echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\
export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n"
@bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)"
@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
endif
## Test python environment is setup correctly
test_environment:
$(PYTHON_INTERPRETER) test_environment.py
#################################################################################
# PROJECT RULES #
#################################################################################
#################################################################################
# Self Documenting Commands #
#################################################################################
.DEFAULT_GOAL := help
# Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
# sed script explained:
# /^##/:
# * save line in hold space
# * purge line
# * Loop:
# * append newline + line to hold space
# * go to next line
# * if line starts with doc comment, strip comment character off and loop
# * remove target prerequisites
# * append hold space (+ newline) to line
# * replace newline plus comments by `---`
# * print line
# Separate expressions are necessary because labels cannot be delimited by
# semicolon; see <http://stackoverflow.com/a/11799865/1968>
.PHONY: help
help:
@echo "$$(tput bold)Available rules:$$(tput sgr0)"
@echo
@sed -n -e "/^## / { \
h; \
s/.*//; \
:doc" \
-e "H; \
n; \
s/^## //; \
t doc" \
-e "s/:.*//; \
G; \
s/\\n## /---/; \
s/\\n/ /g; \
p; \
}" ${MAKEFILE_LIST} \
| LC_ALL='C' sort --ignore-case \
| awk -F '---' \
-v ncol=$$(tput cols) \
-v indent=19 \
-v col_on="$$(tput setaf 6)" \
-v col_off="$$(tput sgr0)" \
'{ \
printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
n = split($$2, words, " "); \
line_length = ncol - indent; \
for (i = 1; i <= n; i++) { \
line_length -= length(words[i]) + 1; \
if (line_length <= 0) { \
line_length = ncol - indent - length(words[i]) - 1; \
printf "\n%*s ", -indent, " "; \
} \
printf "%s ", words[i]; \
} \
printf "\n"; \
}' \
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
......@@ -26,7 +26,7 @@ Project Organization
├── references <- Data dictionaries, manuals, and all other explanatory materials.
├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
│ generated with `pip freeze > requirements.txt`
......
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = 'air'
copyright = '2021, Christian Marius Lillelund'
author = 'Christian Marius Lillelund, Christian Fischer Pedersen'
# The full version, including alpha/beta/rc tags
release = '0.0.1'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
\ No newline at end of file
......@@ -6,7 +6,7 @@ setup(
packages=find_packages(where='src'),
setup_requires=["pytest-runner"],
tests_require=["pytest"],
version='0.0.1',
version='1.0.0',
description='AIR is an open-source machine learning project to improve rehabilitation.',
author='Christian Marius Lillelund',
author_email='cl@ece.au.dk',
......
"""
make_survival_data.py
====================================
Script to make dataset for Alarm case
Script to make dataset for Alarm case.
"""
from tools import preprocessor
......
......@@ -2,7 +2,7 @@
make_dataset_full.py
====================================
Script to make a dataset for Complete, Compliance, Fall and Risk
case based on the screenings
case based on the screenings.
"""
import paths as pt
......
......@@ -2,7 +2,7 @@
make_dataset_ohe.py
====================================
Script to make a dataset for Complete, Compliance, Fall and Risk
case using one hot encoding of categorial features
case using one hot encoding of categorial features.
"""
import paths as pt
......
......@@ -2,7 +2,7 @@
make_dataset_ordinal.py
====================================
Script to make a dataset for Complete, Compliance, Fall and Risk
using ordinal encoding of categorial features
using ordinal encoding of categorial features.
"""
import paths as pt
......
"""
make_screening_data.py
====================================
Script to make dataset of screenings to be used for cases
Script to make dataset of screenings to be used for cases.
"""
import numpy as np
......
......@@ -56,4 +56,4 @@ class Data:
ss: pd.DataFrame
td: pd.DataFrame
tc: pd.DataFrame
ats: pd.DataFrame
\ No newline at end of file
ats: pd.DataFrame
......@@ -253,4 +253,4 @@ def check_epochs(epochs: int) -> None:
def check_batch_size(batch_size: int) -> None:
if batch_size <= 0:
raise ValueError("You should provide a batch size greater than zero")
\ No newline at end of file
raise ValueError("You should provide a batch size greater than zero")
......@@ -9,7 +9,6 @@ import pandas as pd
import xgboost
from typing import List, Tuple
from sklearn.metrics import confusion_matrix
import matplotlib
def compute_mean(values: List):
return round(np.mean(values)*100, 3)
......@@ -39,15 +38,15 @@ def eval_gini(y_true: np.array, y_prob: np.array) -> float:
def get_cm_by_protected_variable(df, protected_col_name, y_target_name, y_pred_name):
confusion_df = pd.DataFrame(columns=[protected_col_name, "FPR", "FNR"])
for name in list(df[protected_col_name].unique()):
a=df[df[protected_col_name]==name][y_target_name]
b=df[df[protected_col_name]==name][y_pred_name]
TN, FP, FN, TP = confusion_matrix(list(a), list(b),labels=[0, 1]).ravel()
TPR = TP/(TP+FN)
TNR = TN/(TN+FP)
TNR = TN/(TN+FP)
PPV = TP/(TP+FP)
NPV = TN/(TN+FN)
FPR = FP/(FP+TN)
......@@ -59,8 +58,8 @@ def get_cm_by_protected_variable(df, protected_col_name, y_target_name, y_pred_n
F1=2*(PPV*TPR)/(PPV+TPR)
confusion_df = confusion_df.append({protected_col_name:name, "TPR":TPR, "TNR":TNR, "FPR":FPR,
"FNR":FNR, "PPV":PPV, "NPV":NPV, "FDR":FDR, "ACC":ACC,
"F1":F1, "LRplus":LRplus, "LRminus":LRminus, "TN":TN,
"FP":FP, "FN":FN, "TP":TP}, ignore_index=True)
return confusion_df
\ No newline at end of file
"FNR":FNR, "PPV":PPV, "NPV":NPV, "FDR":FDR, "ACC":ACC,
"F1":F1, "LRplus":LRplus, "LRminus":LRminus, "TN":TN,
"FP":FP, "FN":FN, "TP":TP}, ignore_index=True)
return confusion_df
......@@ -5,10 +5,9 @@ Utility settings functions.
"""
from pathlib import Path
import paths as pt
import yaml
def load_settings(file_path, file_name):
with open(Path.joinpath(file_path, file_name), 'r') as stream:
settings = yaml.safe_load(stream)
return settings
\ No newline at end of file
return settings
......@@ -13,4 +13,4 @@ def assert_datetime(entry: str) -> bool:
:param entry: entry to check
:return: boolean value whether conversion successful
"""
return pd.to_datetime(entry, format='%d-%m-%Y', errors='coerce') is not pd.NaT
\ No newline at end of file
return pd.to_datetime(entry, format='%d-%m-%Y', errors='coerce') is not pd.NaT
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment