Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Christian Fischer Pedersen
AIR
Commits
f41eccb3
Commit
f41eccb3
authored
Dec 21, 2021
by
thecml
Browse files
improved some docs
parent
608c8f93
Pipeline
#100537
passed with stage
in 4 minutes and 28 seconds
Changes
14
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
ml/Makefile
deleted
100644 → 0
View file @
608c8f93
.PHONY
:
clean data test lint requirements sync_data_to_s3 sync_data_from_s3
#################################################################################
# GLOBALS #
#################################################################################
PROJECT_DIR
:=
$(
shell
dirname
$(
realpath
$(
lastword
$(MAKEFILE_LIST)
)))
BUCKET
=
[
OPTIONAL] your-bucket-for-syncing-data
(
do
not include
's3://'
)
PROFILE
=
default
PROJECT_NAME
=
air
PYTHON_INTERPRETER
=
python3
ifeq
(,$(shell which conda))
HAS_CONDA
=
False
else
HAS_CONDA
=
True
endif
#################################################################################
# COMMANDS #
#################################################################################
## Install Python Dependencies
requirements
:
test_environment
$(PYTHON_INTERPRETER)
-m
pip
install
-U
pip setuptools wheel
$(PYTHON_INTERPRETER)
-m
pip
install
-r
requirements.txt
## Make Dataset
data
:
requirements
$(PYTHON_INTERPRETER)
src/data/make_dataset.py data/raw data/processed
## Delete all compiled Python files
clean
:
find
.
-type
f
-name
"*.py[co]"
-delete
find
.
-type
d
-name
"__pycache__"
-delete
## Lint using flake8
lint
:
flake8 src
test
:
PYTHONPATH
=
.
pytest
## Upload Data to S3
sync_data_to_s3
:
ifeq
(default,$(PROFILE))
aws s3
sync
data/ s3://
$(BUCKET)
/data/
else
aws s3
sync
data/ s3://
$(BUCKET)
/data/
--profile
$(PROFILE)
endif
## Download Data from S3
sync_data_from_s3
:
ifeq
(default,$(PROFILE))
aws s3
sync
s3://
$(BUCKET)
/data/ data/
else
aws s3
sync
s3://
$(BUCKET)
/data/ data/
--profile
$(PROFILE)
endif
## Set up python interpreter environment
create_environment
:
ifeq
(True,$(HAS_CONDA))
@
echo
">>> Detected conda, creating conda environment."
ifeq
(3,$(findstring 3,$(PYTHON_INTERPRETER)))
conda create
--name
$(PROJECT_NAME)
python
=
3
else
conda create
--name
$(PROJECT_NAME)
python
=
2.7
endif
@
echo
">>> New conda env created. Activate with:
\n
source activate
$(PROJECT_NAME)
"
else
$(PYTHON_INTERPRETER)
-m
pip
install
-q
virtualenv virtualenvwrapper
@
echo
">>> Installing virtualenvwrapper if not already installed.
\n
Make sure the following lines are in shell startup file
\n\
export WORKON_HOME=
$$
HOME/.virtualenvs
\n
export PROJECT_HOME=
$$
HOME/Devel
\n
source /usr/local/bin/virtualenvwrapper.sh
\n
"
@
bash
-c
"source
`
which virtualenvwrapper.sh
`
;mkvirtualenv
$(PROJECT_NAME)
--python=
$(PYTHON_INTERPRETER)
"
@
echo
">>> New virtualenv created. Activate with:
\n
workon
$(PROJECT_NAME)
"
endif
## Test python environment is setup correctly
test_environment
:
$(PYTHON_INTERPRETER)
test_environment.py
#################################################################################
# PROJECT RULES #
#################################################################################
#################################################################################
# Self Documenting Commands #
#################################################################################
.DEFAULT_GOAL
:=
help
# Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
# sed script explained:
# /^##/:
# * save line in hold space
# * purge line
# * Loop:
# * append newline + line to hold space
# * go to next line
# * if line starts with doc comment, strip comment character off and loop
# * remove target prerequisites
# * append hold space (+ newline) to line
# * replace newline plus comments by `---`
# * print line
# Separate expressions are necessary because labels cannot be delimited by
# semicolon; see <http://stackoverflow.com/a/11799865/1968>
.PHONY
:
help
help
:
@
echo
"
$$
(tput bold)Available rules:
$$
(tput sgr0)"
@
echo
@
sed
-n
-e
"/^## / {
\
h;
\
s/.*//;
\
:doc"
\
-e
"H;
\
n;
\
s/^## //;
\
t doc"
\
-e
"s/:.*//;
\
G;
\
s/
\\
n## /---/;
\
s/
\\
n/ /g;
\
p;
\
}"
${MAKEFILE_LIST}
\
|
LC_ALL
=
'C'
sort
--ignore-case
\
|
awk
-F
'---'
\
-v
ncol
=
$$
(
tput cols
)
\
-v
indent
=
19
\
-v
col_on
=
"
$$
(tput setaf 6)"
\
-v
col_off
=
"
$$
(tput sgr0)"
\
'{ \
printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
n = split($$2, words, " "); \
line_length = ncol - indent; \
for (i = 1; i <= n; i++) { \
line_length -= length(words[i]) + 1; \
if (line_length <= 0) { \
line_length = ncol - indent - length(words[i]) - 1; \
printf "\n%*s ", -indent, " "; \
} \
printf "%s ", words[i]; \
} \
printf "\n"; \
}'
\
| more
$(
shell
test
$(
shell
uname
)
=
Darwin
&&
echo
'--no-init --raw-control-chars'
)
ml/README.md
View file @
f41eccb3
...
...
@@ -26,7 +26,7 @@ Project Organization
├── references <- Data dictionaries, manuals, and all other explanatory materials.
│
├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
│
│
├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
│ generated with
`pip freeze > requirements.txt`
│
...
...
ml/conf.py
deleted
100644 → 0
View file @
608c8f93
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project
=
'air'
copyright
=
'2021, Christian Marius Lillelund'
author
=
'Christian Marius Lillelund, Christian Fischer Pedersen'
# The full version, including alpha/beta/rc tags
release
=
'0.0.1'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions
=
[
]
# Add any paths that contain templates here, relative to this directory.
templates_path
=
[
'_templates'
]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns
=
[
'_build'
,
'Thumbs.db'
,
'.DS_Store'
]
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme
=
'alabaster'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path
=
[
'_static'
]
\ No newline at end of file
ml/setup.py
View file @
f41eccb3
...
...
@@ -6,7 +6,7 @@ setup(
packages
=
find_packages
(
where
=
'src'
),
setup_requires
=
[
"pytest-runner"
],
tests_require
=
[
"pytest"
],
version
=
'0.0
.1
'
,
version
=
'
1.
0.0'
,
description
=
'AIR is an open-source machine learning project to improve rehabilitation.'
,
author
=
'Christian Marius Lillelund'
,
author_email
=
'cl@ece.au.dk'
,
...
...
ml/src/data/make_alarm_data.py
View file @
f41eccb3
"""
make_survival_data.py
====================================
Script to make dataset for Alarm case
Script to make dataset for Alarm case
.
"""
from
tools
import
preprocessor
...
...
ml/src/data/make_dataset_full.py
View file @
f41eccb3
...
...
@@ -2,7 +2,7 @@
make_dataset_full.py
====================================
Script to make a dataset for Complete, Compliance, Fall and Risk
case based on the screenings
case based on the screenings
.
"""
import
paths
as
pt
...
...
ml/src/data/make_dataset_ohe.py
View file @
f41eccb3
...
...
@@ -2,7 +2,7 @@
make_dataset_ohe.py
====================================
Script to make a dataset for Complete, Compliance, Fall and Risk
case using one hot encoding of categorial features
case using one hot encoding of categorial features
.
"""
import
paths
as
pt
...
...
ml/src/data/make_dataset_ordinal.py
View file @
f41eccb3
...
...
@@ -2,7 +2,7 @@
make_dataset_ordinal.py
====================================
Script to make a dataset for Complete, Compliance, Fall and Risk
using ordinal encoding of categorial features
using ordinal encoding of categorial features
.
"""
import
paths
as
pt
...
...
ml/src/data/make_screening_data.py
View file @
f41eccb3
"""
make_screening_data.py
====================================
Script to make dataset of screenings to be used for cases
Script to make dataset of screenings to be used for cases
.
"""
import
numpy
as
np
...
...
ml/src/utility/data.py
View file @
f41eccb3
...
...
@@ -56,4 +56,4 @@ class Data:
ss
:
pd
.
DataFrame
td
:
pd
.
DataFrame
tc
:
pd
.
DataFrame
ats
:
pd
.
DataFrame
\ No newline at end of file
ats
:
pd
.
DataFrame
ml/src/utility/embedder.py
View file @
f41eccb3
...
...
@@ -253,4 +253,4 @@ def check_epochs(epochs: int) -> None:
def
check_batch_size
(
batch_size
:
int
)
->
None
:
if
batch_size
<=
0
:
raise
ValueError
(
"You should provide a batch size greater than zero"
)
\ No newline at end of file
raise
ValueError
(
"You should provide a batch size greater than zero"
)
ml/src/utility/metrics.py
View file @
f41eccb3
...
...
@@ -9,7 +9,6 @@ import pandas as pd
import
xgboost
from
typing
import
List
,
Tuple
from
sklearn.metrics
import
confusion_matrix
import
matplotlib
def
compute_mean
(
values
:
List
):
return
round
(
np
.
mean
(
values
)
*
100
,
3
)
...
...
@@ -39,15 +38,15 @@ def eval_gini(y_true: np.array, y_prob: np.array) -> float:
def
get_cm_by_protected_variable
(
df
,
protected_col_name
,
y_target_name
,
y_pred_name
):
confusion_df
=
pd
.
DataFrame
(
columns
=
[
protected_col_name
,
"FPR"
,
"FNR"
])
for
name
in
list
(
df
[
protected_col_name
].
unique
()):
a
=
df
[
df
[
protected_col_name
]
==
name
][
y_target_name
]
b
=
df
[
df
[
protected_col_name
]
==
name
][
y_pred_name
]
TN
,
FP
,
FN
,
TP
=
confusion_matrix
(
list
(
a
),
list
(
b
),
labels
=
[
0
,
1
]).
ravel
()
TPR
=
TP
/
(
TP
+
FN
)
TNR
=
TN
/
(
TN
+
FP
)
TNR
=
TN
/
(
TN
+
FP
)
PPV
=
TP
/
(
TP
+
FP
)
NPV
=
TN
/
(
TN
+
FN
)
FPR
=
FP
/
(
FP
+
TN
)
...
...
@@ -59,8 +58,8 @@ def get_cm_by_protected_variable(df, protected_col_name, y_target_name, y_pred_n
F1
=
2
*
(
PPV
*
TPR
)
/
(
PPV
+
TPR
)
confusion_df
=
confusion_df
.
append
({
protected_col_name
:
name
,
"TPR"
:
TPR
,
"TNR"
:
TNR
,
"FPR"
:
FPR
,
"FNR"
:
FNR
,
"PPV"
:
PPV
,
"NPV"
:
NPV
,
"FDR"
:
FDR
,
"ACC"
:
ACC
,
"F1"
:
F1
,
"LRplus"
:
LRplus
,
"LRminus"
:
LRminus
,
"TN"
:
TN
,
"FP"
:
FP
,
"FN"
:
FN
,
"TP"
:
TP
},
ignore_index
=
True
)
return
confusion_df
\ No newline at end of file
"FNR"
:
FNR
,
"PPV"
:
PPV
,
"NPV"
:
NPV
,
"FDR"
:
FDR
,
"ACC"
:
ACC
,
"F1"
:
F1
,
"LRplus"
:
LRplus
,
"LRminus"
:
LRminus
,
"TN"
:
TN
,
"FP"
:
FP
,
"FN"
:
FN
,
"TP"
:
TP
},
ignore_index
=
True
)
return
confusion_df
ml/src/utility/settings.py
View file @
f41eccb3
...
...
@@ -5,10 +5,9 @@ Utility settings functions.
"""
from
pathlib
import
Path
import
paths
as
pt
import
yaml
def
load_settings
(
file_path
,
file_name
):
with
open
(
Path
.
joinpath
(
file_path
,
file_name
),
'r'
)
as
stream
:
settings
=
yaml
.
safe_load
(
stream
)
return
settings
\ No newline at end of file
return
settings
ml/src/utility/time.py
View file @
f41eccb3
...
...
@@ -13,4 +13,4 @@ def assert_datetime(entry: str) -> bool:
:param entry: entry to check
:return: boolean value whether conversion successful
"""
return
pd
.
to_datetime
(
entry
,
format
=
'%d-%m-%Y'
,
errors
=
'coerce'
)
is
not
pd
.
NaT
\ No newline at end of file
return
pd
.
to_datetime
(
entry
,
format
=
'%d-%m-%Y'
,
errors
=
'coerce'
)
is
not
pd
.
NaT
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment