Commit 007bc756 authored by thecml's avatar thecml
Browse files

added in code for viborg and aalborg data

parent cd30cd59
%% Cell type:code id: tags:
``` python
import pandas as pd
file = 'C:\\Users\\cml\\Downloads\\AIR export\\Aalborg\\Hjælpemidler minus 50_Rasmus_Details-CPR.xlsx'
df = pd.read_excel(file, engine='openpyxl', converters={'ID': str, 'Kategori ISO nummer': str})
df = df.sort_values(by=['ID', 'Kørselsdato'])
```
%% Cell type:code id: tags:
``` python
res = df.groupby(['ID', 'Kategori ISO nummer']).size().sort_values(ascending=False)
res
```
%% Output
ID Kategori ISO nummer
62510422746 12310321 29
72093654450 12310321 28
45802099746 12310321 28
43477700202 12310321 27
14480436738 12310321 26
..
40930362042 24210301 1
18150302 1
18121030 1
18031502 1
9911573490 99999999 1
Length: 39101, dtype: int64
%% Cell type:code id: tags:
``` python
df
```
%% Output
ID Birth Year Gender HMI nr \
9610 9911573490 98 MALE 108993
9612 9911573490 98 MALE 42273
9613 9911573490 98 MALE 42273
9614 9911573490 98 MALE 62516
9615 9911573490 98 MALE 801199
... ... ... ... ...
9653 12031223466 30 FEMALE 800015
9655 12031223466 30 FEMALE 800015
9660 12031223466 30 FEMALE 82107
9662 12031223466 30 FEMALE 83587
9659 12031223466 30 FEMALE 82107
Produktnavn Kategori ISO nummer \
9610 Batec Mini 12240901
9612 Albuestok med blødt standard håndtag, med clips 12030604
9613 Albuestok med blødt standard håndtag, med clips 12030604
9614 Nielsen Line teleskopbadebænk 09330702
9615 Ryg til Nielsen Line Badebænk 99999999
... ... ...
9653 Overtræk til sengeheste, til metal (L170 x H41) 99999999
9655 Overtræk til sengeheste, til metal (L170 x H41) 99999999
9660 Emineo 08, OneTool, velcroryglæn, SB 39 cm 12221804
9662 Basic sejl, netmateriale str. S, med fastsyede... 12362124
9659 Emineo 08, OneTool, velcroryglæn, SB 39 cm 12221804
Løbenummer Indsats navn Paragraf \
9610 4.000 SEL §112 Hjælpemiddel (Genbrug) § 112 Serviceloven
9612 612.000 SEL §112 Hjælpemiddel (Genbrug) § 112 Serviceloven
9613 613.000 SEL §112 Hjælpemiddel (Genbrug) § 112 Serviceloven
9614 760.000 SEL §112 Hjælpemiddel (Genbrug) § 112 Serviceloven
9615 222.000 SEL §112 Hjælpemiddel (Genbrug) § 112 Serviceloven
... ... ... ...
9653 2.969 APV Hjælpemiddel § 15 Arbejdsmiljøloven
9655 2.970 APV Hjælpemiddel § 15 Arbejdsmiljøloven
9660 9.000 SEL §112 Hjælpemiddel (Genbrug) § 112 Serviceloven
9662 114.000 APV Hjælpemiddel § 15 Arbejdsmiljøloven
9659 9.000 SEL §112 Hjælpemiddel (Genbrug) § 112 Serviceloven
Kørselsdato Dødsdato
9610 31/08/21 31/12/99
9612 14/11/19 31/12/99
9613 14/11/19 31/12/99
9614 14/11/19 31/12/99
9615 14/11/19 31/12/99
... ... ...
9653 03/05/21 05/04/21
9655 03/05/21 05/04/21
9660 03/05/21 05/04/21
9662 03/05/21 05/04/21
9659 02/02/21 31/12/99
[72053 rows x 11 columns]
......@@ -10,18 +10,18 @@ import shutil
def main():
with open(Path.joinpath(pt.INTERIM_DATA_DIR, 'ats.pkl'), 'rb') as fd:
infile = BytesIO()
infile = BytesIO()
shutil.copyfileobj(fd, infile)
infile.seek(0)
df = file_reader.read_pickle(infile)
settings = load_settings(pt.CONFIGS_DIR, "data.yaml")
ats_iso_length = settings['ats_iso_length']
df['DevISOClass'] = df['DevISOClass'].apply(lambda x: x[:ats_iso_length]) # limit ats iso length
df = df[['CitizenId', 'BirthYear', 'Gender', 'LendDate', 'ReturnDate', 'DevISOClass']]
df = df.fillna(df.LendDate.max()) # replace invalid return dates with latest obs lend date
df = df.loc[df['ReturnDate'] >= df['LendDate']] # return date must be same or later than lend date
# Merge loans based on ats, lend date and return date
df = df.reset_index(drop=True).sort_values(by=['CitizenId', 'LendDate'])
subset_cols = ['CitizenId', 'DevISOClass']
......@@ -34,12 +34,12 @@ def main():
'Gender', 'DevISOClass',
'LendDate', 'ReturnDate']]
df = merged.reset_index().sort_values(['CitizenId', 'LendDate']).drop(['index'], axis=1)
# Calculate time diff between lends
df['DeltaLends'] = df.sort_values(['CitizenId', 'LendDate'])\
.groupby(['CitizenId'])['LendDate']\
.diff().dt.days.fillna(0).astype(int)
# Tag alarm lends, save alarm citizens and filter subsequent lends
alarm_ats = settings['alarm_ats']
df['IsAlarmLend'] = df.apply(lambda x: 1 if alarm_ats in x['DevISOClass'] else 0, axis=1)
......@@ -113,23 +113,23 @@ def main():
# Drop citizen id
x_df = x_df.drop('CitizenId', axis=1)
y_df = y_df.drop('CitizenId', axis=1)
# Encode gender
x_df['Gender'] = x_df['Gender'].apply(lambda x: 0 if x == "FEMALE" else 1)
# Prepare data y and x
ats_resolution = settings['ats_resolution']
data_y = np.array(list(tuple(x) for x in y_df.to_numpy()),
dtype=[('Status', 'bool'), ('Days_to_alarm', '>i4')])
data_x = preprocessor.split_cat_columns(x_df, col_to_split='Ats', tag='Ats',
resolution=ats_resolution)
data_dict = {'x': data_x, 'y': data_y}
with open(Path.joinpath(pt.PROCESSED_DATA_DIR, "alarm_data.pkl"), 'wb') as fd:
outfile = BytesIO()
file_writer.write_pickle(data_dict, outfile)
outfile.seek(0)
shutil.copyfileobj(outfile, fd)
if __name__ == "__main__":
main()
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment