Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Christian Fischer Pedersen
AIR
Commits
9fcead93
Commit
9fcead93
authored
Nov 22, 2021
by
thecml
Browse files
updated scripts for new loader
parent
785379cb
Pipeline
#94221
failed with stage
in 4 minutes and 17 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
ml/src/data/load_and_clean_data.py
View file @
9fcead93
#!/usr/bin/env python
from
tools
import
file_writer
,
raw_loader
,
cleaner
import
paths
as
pt
from
io
import
BytesIO
from
pathlib
import
Path
import
shutil
def
main
():
loader
=
raw_loader
.
RawLoader
()
...
...
@@ -20,12 +23,41 @@ def main():
training_cancelled
=
cleaner2020
.
clean_training_cancelled
(
tc
,
patient_data
)
assistive_aids
=
cleaner2020
.
clean_assistive_aids
(
ats
,
ic
)
file_writer
.
write_pickle
(
screening_content
,
pt
.
INTERIM_DATA_DIR
,
'sc.pkl'
)
file_writer
.
write_pickle
(
status_set
,
pt
.
INTERIM_DATA_DIR
,
'ss.pkl'
)
file_writer
.
write_pickle
(
training_done
,
pt
.
INTERIM_DATA_DIR
,
'td.pkl'
)
file_writer
.
write_pickle
(
training_cancelled
,
pt
.
INTERIM_DATA_DIR
,
'tc.pkl'
)
file_writer
.
write_pickle
(
assistive_aids
,
pt
.
INTERIM_DATA_DIR
,
'ats.pkl'
)
file_writer
.
write_pickle
(
ic
,
pt
.
INTERIM_DATA_DIR
,
'ic.pkl'
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'sc.pkl'
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_pickle
(
screening_content
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'ss.pkl'
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_pickle
(
status_set
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'td.pkl'
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_pickle
(
training_done
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'tc.pkl'
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_pickle
(
training_cancelled
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'ats.pkl'
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_pickle
(
assistive_aids
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'ic.pkl'
),
'wb'
)
as
fd
:
outfile
=
BytesIO
()
file_writer
.
write_pickle
(
ic
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
ml/src/data/make_dataset_full.py
View file @
9fcead93
...
...
@@ -9,9 +9,14 @@ import shutil
from
pathlib
import
Path
def
main
(
ats_resolution
:
int
=
None
):
screenings
=
file_reader
.
read_csv
(
pt
.
INTERIM_DATA_DIR
,
'screenings.csv'
,
converters
=
{
'CitizenId'
:
str
})
infile
=
StringIO
()
file_path
=
pt
.
INTERIM_DATA_DIR
file_name
=
'screenings.csv'
with
open
(
Path
.
joinpath
(
file_path
,
file_name
),
'r'
)
as
fd
:
shutil
.
copyfileobj
(
fd
,
infile
)
infile
.
seek
(
0
)
screenings
=
file_reader
.
read_csv
(
infile
,
converters
=
{
'CitizenId'
:
str
})
data_settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
'data.yaml'
)
if
ats_resolution
==
None
:
ats_resolution
=
data_settings
[
'ats_resolution'
]
...
...
ml/src/data/make_screening_data.py
View file @
9fcead93
...
...
@@ -8,19 +8,49 @@ from tools import file_reader, file_writer, inputter
from
utility.settings
import
load_settings
from
utility
import
data_dto
,
dataset
from
pandas.tseries.offsets
import
DateOffset
from
io
import
StringIO
,
BytesIO
import
shutil
def
main
():
sc
=
file_reader
.
read_pickle
(
pt
.
INTERIM_DATA_DIR
,
'sc.pkl'
)
ss
=
file_reader
.
read_pickle
(
pt
.
INTERIM_DATA_DIR
,
'ss.pkl'
)
td
=
file_reader
.
read_pickle
(
pt
.
INTERIM_DATA_DIR
,
'td.pkl'
)
tc
=
file_reader
.
read_pickle
(
pt
.
INTERIM_DATA_DIR
,
'tc.pkl'
)
ats
=
file_reader
.
read_pickle
(
pt
.
INTERIM_DATA_DIR
,
'ats.pkl'
)
settings
=
load_settings
(
"data.yaml"
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'sc.pkl'
),
'rb'
)
as
fd
:
infile
=
BytesIO
()
shutil
.
copyfileobj
(
fd
,
infile
)
infile
.
seek
(
0
)
sc
=
file_reader
.
read_pickle
(
infile
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'ss.pkl'
),
'rb'
)
as
fd
:
infile
=
BytesIO
()
shutil
.
copyfileobj
(
fd
,
infile
)
infile
.
seek
(
0
)
ss
=
file_reader
.
read_pickle
(
infile
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'td.pkl'
),
'rb'
)
as
fd
:
infile
=
BytesIO
()
shutil
.
copyfileobj
(
fd
,
infile
)
infile
.
seek
(
0
)
td
=
file_reader
.
read_pickle
(
infile
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'tc.pkl'
),
'rb'
)
as
fd
:
infile
=
BytesIO
()
shutil
.
copyfileobj
(
fd
,
infile
)
infile
.
seek
(
0
)
tc
=
file_reader
.
read_pickle
(
infile
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'ats.pkl'
),
'rb'
)
as
fd
:
infile
=
BytesIO
()
shutil
.
copyfileobj
(
fd
,
infile
)
infile
.
seek
(
0
)
ats
=
file_reader
.
read_pickle
(
infile
)
settings
=
load_settings
(
pt
.
CONFIGS_DIR
,
"data.yaml"
)
data
=
data_dto
.
Data
(
sc
,
ss
,
td
,
tc
,
ats
)
screenings
=
get_screenings
(
data
,
settings
)
file_writer
.
write_csv
(
screenings
,
pt
.
INTERIM_DATA_DIR
,
'screenings.csv'
)
with
open
(
Path
.
joinpath
(
pt
.
INTERIM_DATA_DIR
,
'screenings.csv'
),
'w'
,
newline
=
''
)
as
fd
:
outfile
=
StringIO
()
file_writer
.
write_csv
(
screenings
,
outfile
)
outfile
.
seek
(
0
)
shutil
.
copyfileobj
(
outfile
,
fd
)
def
get_screenings
(
data
,
settings
):
ids
=
dataset
.
create_union_of_ids
(
data
.
sc
,
data
.
ss
,
data
.
td
,
data
.
tc
)
...
...
ml/src/tools/raw_loader.py
View file @
9fcead93
...
...
@@ -9,7 +9,7 @@ import abc
from
io
import
BytesIO
,
StringIO
import
shutil
class
BaseRawLo
e
ader
(
metaclass
=
abc
.
ABCMeta
):
class
BaseRawLoader
(
metaclass
=
abc
.
ABCMeta
):
@
abc
.
abstractmethod
def
load_status_set
(
self
,
file_name
,
path
):
"""load the DiGiRehab StatusSet data set"""
...
...
@@ -34,11 +34,11 @@ class BaseRawLoeader(metaclass=abc.ABCMeta):
def
load_clusters
(
self
,
file_name
,
path
):
"""load the generated clusters data set"""
@
abc
.
abstractmethod
@
abc
.
abstractmethod
def
load_iso_classes
(
self
,
file_name
,
path
):
"""load the ISO classes data set"""
class
RawLoader
(
BaseRawLo
e
ader
):
class
RawLoader
(
BaseRawLoader
):
def
assert_datetime
(
self
,
entry
:
str
)
->
bool
:
"""
This method checks whether an entry can
...
...
ml/src/utility/data_dto.py
View file @
9fcead93
...
...
@@ -3,7 +3,7 @@ import pandas as pd
@
dataclass
class
Data
:
'''
Class
for keeping track of feature data'''
'''
Dto
for keeping track of feature data'''
sc
:
pd
.
DataFrame
ss
:
pd
.
DataFrame
td
:
pd
.
DataFrame
...
...
ml/tests/test_neural_embedder.py
View file @
9fcead93
...
...
@@ -23,10 +23,15 @@ def test_fit():
n_numerical_cols
=
df
.
shape
[
1
]
-
emb_cols
.
shape
[
1
]
-
1
df_to_enc
=
df
.
iloc
[:,
n_numerical_cols
:]
target_name
=
"Complete"
train_ratio
=
0.7
X_train
,
X_val
,
y_train
,
y_val
,
_
=
preprocessor
.
prepare_data_for_emb
(
df_to_enc
,
"Complete"
,
0.7
)
network
=
neural_embedder
.
NeuralEmbedder
(
df
=
df_to_enc
,
target_name
=
"Complete"
)
target_name
,
train_ratio
)
num_epochs
=
10
network
=
neural_embedder
.
NeuralEmbedder
(
df
=
df_to_enc
,
target_name
=
target_name
,
epochs
=
num_epochs
)
history
=
network
.
fit
(
X_train
,
y_train
,
X_val
,
y_val
)
assert
len
(
history
.
history
[
'accuracy'
])
==
10
\ No newline at end of file
assert
len
(
history
.
history
[
'accuracy'
])
==
num_epochs
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment