Commit 9fcead93 authored by thecml's avatar thecml
Browse files

updated scripts for new loader

parent 785379cb
Pipeline #94221 failed with stage
in 4 minutes and 17 seconds
#!/usr/bin/env python
from tools import file_writer, raw_loader, cleaner
import paths as pt
from io import BytesIO
from pathlib import Path
import shutil
def main():
loader = raw_loader.RawLoader()
......@@ -20,12 +23,41 @@ def main():
training_cancelled = cleaner2020.clean_training_cancelled(tc, patient_data)
assistive_aids = cleaner2020.clean_assistive_aids(ats, ic)
file_writer.write_pickle(screening_content, pt.INTERIM_DATA_DIR, 'sc.pkl')
file_writer.write_pickle(status_set, pt.INTERIM_DATA_DIR, 'ss.pkl')
file_writer.write_pickle(training_done, pt.INTERIM_DATA_DIR, 'td.pkl')
file_writer.write_pickle(training_cancelled, pt.INTERIM_DATA_DIR, 'tc.pkl')
file_writer.write_pickle(assistive_aids, pt.INTERIM_DATA_DIR, 'ats.pkl')
file_writer.write_pickle(ic, pt.INTERIM_DATA_DIR, 'ic.pkl')
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'sc.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(screening_content, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ss.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(status_set, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'td.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(training_done, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'tc.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(training_cancelled, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ats.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(assistive_aids, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ic.pkl'), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(ic, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
if __name__ == "__main__":
main()
\ No newline at end of file
......@@ -9,8 +9,13 @@ import shutil
from pathlib import Path
def main(ats_resolution: int = None):
screenings = file_reader.read_csv(pt.INTERIM_DATA_DIR, 'screenings.csv',
converters={'CitizenId': str})
infile = StringIO()
file_path = pt.INTERIM_DATA_DIR
file_name = 'screenings.csv'
with open(Path.joinpath(file_path, file_name), 'r') as fd:
shutil.copyfileobj(fd, infile)
infile.seek(0)
screenings = file_reader.read_csv(infile, converters={'CitizenId': str})
data_settings = load_settings(pt.CONFIGS_DIR, 'data.yaml')
if ats_resolution == None:
......
......@@ -8,19 +8,49 @@ from tools import file_reader, file_writer, inputter
from utility.settings import load_settings
from utility import data_dto, dataset
from pandas.tseries.offsets import DateOffset
from io import StringIO, BytesIO
import shutil
def main():
sc = file_reader.read_pickle(pt.INTERIM_DATA_DIR, 'sc.pkl')
ss = file_reader.read_pickle(pt.INTERIM_DATA_DIR, 'ss.pkl')
td = file_reader.read_pickle(pt.INTERIM_DATA_DIR, 'td.pkl')
tc = file_reader.read_pickle(pt.INTERIM_DATA_DIR, 'tc.pkl')
ats = file_reader.read_pickle(pt.INTERIM_DATA_DIR, 'ats.pkl')
settings = load_settings("data.yaml")
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'sc.pkl'), 'rb') as fd:
infile = BytesIO()
shutil.copyfileobj(fd, infile)
infile.seek(0)
sc = file_reader.read_pickle(infile)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ss.pkl'), 'rb') as fd:
infile = BytesIO()
shutil.copyfileobj(fd, infile)
infile.seek(0)
ss = file_reader.read_pickle(infile)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'td.pkl'), 'rb') as fd:
infile = BytesIO()
shutil.copyfileobj(fd, infile)
infile.seek(0)
td = file_reader.read_pickle(infile)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'tc.pkl'), 'rb') as fd:
infile = BytesIO()
shutil.copyfileobj(fd, infile)
infile.seek(0)
tc = file_reader.read_pickle(infile)
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ats.pkl'), 'rb') as fd:
infile = BytesIO()
shutil.copyfileobj(fd, infile)
infile.seek(0)
ats = file_reader.read_pickle(infile)
settings = load_settings(pt.CONFIGS_DIR, "data.yaml")
data = data_dto.Data(sc, ss, td, tc, ats)
screenings = get_screenings(data, settings)
file_writer.write_csv(screenings, pt.INTERIM_DATA_DIR, 'screenings.csv')
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'screenings.csv'), 'w', newline='') as fd:
outfile = StringIO()
file_writer.write_csv(screenings, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
def get_screenings(data, settings):
ids = dataset.create_union_of_ids(data.sc, data.ss, data.td, data.tc)
......
......@@ -9,7 +9,7 @@ import abc
from io import BytesIO, StringIO
import shutil
class BaseRawLoeader(metaclass=abc.ABCMeta):
class BaseRawLoader(metaclass=abc.ABCMeta):
@abc.abstractmethod
def load_status_set(self, file_name, path):
"""load the DiGiRehab StatusSet data set"""
......@@ -38,7 +38,7 @@ class BaseRawLoeader(metaclass=abc.ABCMeta):
def load_iso_classes(self, file_name, path):
"""load the ISO classes data set"""
class RawLoader(BaseRawLoeader):
class RawLoader(BaseRawLoader):
def assert_datetime(self, entry: str) -> bool:
"""
This method checks whether an entry can
......
......@@ -3,7 +3,7 @@ import pandas as pd
@dataclass
class Data:
'''Class for keeping track of feature data'''
'''Dto for keeping track of feature data'''
sc: pd.DataFrame
ss: pd.DataFrame
td: pd.DataFrame
......
......@@ -23,10 +23,15 @@ def test_fit():
n_numerical_cols = df.shape[1] - emb_cols.shape[1] - 1
df_to_enc = df.iloc[:,n_numerical_cols:]
target_name = "Complete"
train_ratio = 0.7
X_train, X_val, y_train, y_val, _ = preprocessor.prepare_data_for_emb(df_to_enc,
"Complete",
0.7)
network = neural_embedder.NeuralEmbedder(df=df_to_enc, target_name="Complete")
target_name,
train_ratio)
num_epochs = 10
network = neural_embedder.NeuralEmbedder(df=df_to_enc,
target_name=target_name,
epochs=num_epochs)
history = network.fit(X_train, y_train, X_val, y_val)
assert len(history.history['accuracy']) == 10
\ No newline at end of file
assert len(history.history['accuracy']) == num_epochs
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment