Commit 1e88c6d7 authored by Christian Marius Lillelund's avatar Christian Marius Lillelund
Browse files

added ats mapping notebook, corrected ats cleaner

parent 5b6db2b2
Pipeline #53778 passed with stage
in 3 minutes and 17 seconds
%% Cell type:code id: tags:
```
import pandas as pd
import config as cfg
from tools import file_reader
from pathlib import Path
df = file_reader.read_pickle(cfg.INTERIM_DATA_DIR, 'ats.pkl').reset_index()
mapping = file_reader.read_csv(cfg.REFERENCES_DIR, 'ats.csv',
converters={'ats_id': str})
df['DevISOClass'] = df['DevISOClass'].apply(lambda x: x[:6])
df = df.dropna(subset=['CitizenId'])
mapping_dict = dict(mapping.values)
df = df.replace(to_replace=mapping_dict)
df
```
%% Output
index CitizenId Gender BirthYear DevHMINumber \\n0 19735 1002012383 MALE 0 800026 \n1 19736 1002012383 MALE 0 800027 \n2 19737 1002012383 MALE 0 800278 \n3 19739 1002012383 MALE 0 800174 \n4 19175 1002012383 MALE 0 800027 \n... ... ... ... ... ... \n311708 9106 825965067 MALE 98 42273 \n311709 9107 825965067 MALE 98 42273 \n311710 60024 825965067 MALE 98 101101 \n311711 60026 825965067 MALE 98 89463 \n311712 37578 825965067 MALE 98 31353 \n\n DevHMIName \\n0 5501 Hjørnestol 1-3 år \n1 5502 Bord til hjørnestol 1-3 år \n2 Hynder/puder til hjørnestole \n3 Nakkestøtte m. pude til hjørnestol. \n4 5502 Bord til hjørnestol 1-3 år \n... ... \n311708 Albuestok med blødt standard håndtag, med clips \n311709 Albuestok med blødt standard håndtag, med clips \n311710 HAWK, SB 40 cm \n311711 Wing Viscoflex Plus, SB 40 x SD 40 cm, SH 8 cm \n311712 AD Stimulite Classic siddepude, 41x41x7 cm \n\n DevISOClass DevSerial LawParagraph LendDate \\n0 SpecielleSiddemøbler 800026-000017 97 2000-12-19 \n1 SpecielleSiddemøbler 800027-000003 0 2000-12-19 \n2 SpecielleSiddemøbler 800278-000011 0 2000-12-19 \n3 SpecielleSiddemøbler 800174-000005 0 2000-12-19 \n4 SpecielleSiddemøbler 800027-000005 0 2001-01-11 \n... ... ... ... ... \n311708 Albuestokke 042273-000612 112 2019-11-13 \n311709 Albuestokke 042273-000613 112 2019-11-13 \n311710 KørestoleManuelleDrivringe 101101-000003 112 2019-12-09 \n311711 TryksårsforebyggendeSidde 089463-000011 112 2019-12-09 \n311712 TryksårsforebyggendeSidde 031353-000002 112 2020-05-05 \n\n ReturnDate Price \n0 2001-11-26 0.0 \n1 2001-11-26 0.0 \n2 2001-11-26 0.0 \n3 2001-11-26 0.0 \n4 2001-11-26 0.0 \n... ... ... \n311708 NaT 0.0 \n311709 NaT 0.0 \n311710 NaT 0.0 \n311711 2020-08-17 0.0 \n311712 NaT 0.0 \n\n[311713 rows x 12 columns]
......@@ -148,8 +148,8 @@ def remove_citizens_without_valid_id(df: pd.DataFrame) -> pd.DataFrame:
df = df[df[cfg.CITIZEN_ID] != "0000000000"]
df = df[df[cfg.CITIZEN_ID] != '0']
df = df[df[cfg.CITIZEN_ID] != "#VALUE!"]
df = df[df[cfg.CITIZEN_ID] != 'nan']
df = df[df[cfg.CITIZEN_ID] != '681']
df = df.dropna(subset=['CitizenId'])
return df
def merge_train_and_patient_data(train_data: pd.DataFrame,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment