Data2021.ipynb 8.35 KB
Newer Older
1
2
3
4
5
6
7
8
9
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
10
    "import numpy as np\n",
11
12
13
14
    "\n",
    "\n",
    "file = 'C:\\\\Users\\\\cml\\\\Downloads\\\\AIR export\\\\Aalborg\\\\Hjælpemidler minus 50_Rasmus_Details-CPR.xlsx'\n",
    "df = pd.read_excel(file, engine='openpyxl', converters={'ID': str, 'Kategori ISO nummer': str})\n",
15
16
17
18
19
20
21
22
23
24
25
26
27
    "\n",
    "df['Seq'] = df.groupby(['ID', 'Kategori ISO nummer']).cumcount()\n",
    "df = df[['ID', 'Birth Year', 'Gender', 'Kategori ISO nummer', 'Kørselsdato', 'Seq']]\n",
    "\n",
    "df['LendDate'] = df.apply(lambda x: x['Kørselsdato'] if x['Seq'] % 2 == 0 else pd.NaT, axis=1)\n",
    "df['ReturnDate'] = df.apply(lambda x: x['Kørselsdato'] if x['Seq'] % 2 == 1 else pd.NaT, axis=1)\n",
    "\n",
    "df['ReturnDate'] = df.groupby(['ID', 'Kategori ISO nummer'])['ReturnDate'].shift(-1)\n",
    "df = df.dropna(subset=['LendDate', 'ReturnDate'], thresh=1)\n",
    "\n",
    "#df['ReturnDate'] = df['ReturnDate'].shift(-1)\n",
    "#df = df.dropna(subset=['LendDate', 'ReturnDate'], thresh=1)\n",
    "#df = df.drop(['Kørselsdato', 'Seq'], axis=1)"
28
29
30
31
   ]
  },
  {
   "cell_type": "code",
32
   "execution_count": 10,
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Birth Year</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Kategori ISO nummer</th>\n",
       "      <th>Kørselsdato</th>\n",
61
62
63
       "      <th>Seq</th>\n",
       "      <th>LendDate</th>\n",
       "      <th>ReturnDate</th>\n",
64
65
66
67
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
68
69
70
71
72
73
74
75
76
       "      <th>0</th>\n",
       "      <td>2429541786</td>\n",
       "      <td>23</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>22271812</td>\n",
       "      <td>06/08/19</td>\n",
       "      <td>0</td>\n",
       "      <td>06/08/19</td>\n",
       "      <td>NaN</td>\n",
77
78
       "    </tr>\n",
       "    <tr>\n",
79
80
81
82
83
84
85
86
87
       "      <th>1</th>\n",
       "      <td>2429541786</td>\n",
       "      <td>23</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>12060611</td>\n",
       "      <td>19/02/18</td>\n",
       "      <td>0</td>\n",
       "      <td>19/02/18</td>\n",
       "      <td>NaN</td>\n",
88
89
       "    </tr>\n",
       "    <tr>\n",
90
91
92
93
94
95
96
97
98
       "      <th>2</th>\n",
       "      <td>2429541786</td>\n",
       "      <td>23</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>12072401</td>\n",
       "      <td>19/02/18</td>\n",
       "      <td>0</td>\n",
       "      <td>19/02/18</td>\n",
       "      <td>NaN</td>\n",
99
100
       "    </tr>\n",
       "    <tr>\n",
101
102
103
104
105
106
107
108
109
       "      <th>3</th>\n",
       "      <td>2430269034</td>\n",
       "      <td>26</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>22271812</td>\n",
       "      <td>09/03/20</td>\n",
       "      <td>0</td>\n",
       "      <td>09/03/20</td>\n",
       "      <td>09/11/20</td>\n",
110
111
       "    </tr>\n",
       "    <tr>\n",
112
113
114
115
116
117
118
119
120
       "      <th>5</th>\n",
       "      <td>2430269034</td>\n",
       "      <td>26</td>\n",
       "      <td>FEMALE</td>\n",
       "      <td>12362124</td>\n",
       "      <td>14/10/19</td>\n",
       "      <td>0</td>\n",
       "      <td>14/10/19</td>\n",
       "      <td>29/07/21</td>\n",
121
122
123
124
125
126
127
128
129
130
131
132
133
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
134
135
136
       "      <th>72044</th>\n",
       "      <td>74711770410</td>\n",
       "      <td>99</td>\n",
137
       "      <td>FEMALE</td>\n",
138
139
140
141
142
       "      <td>18301509</td>\n",
       "      <td>13/06/19</td>\n",
       "      <td>0</td>\n",
       "      <td>13/06/19</td>\n",
       "      <td>07/09/21</td>\n",
143
144
       "    </tr>\n",
       "    <tr>\n",
145
146
147
       "      <th>72046</th>\n",
       "      <td>74711770410</td>\n",
       "      <td>99</td>\n",
148
       "      <td>FEMALE</td>\n",
149
150
151
152
153
       "      <td>12220308</td>\n",
       "      <td>28/09/21</td>\n",
       "      <td>0</td>\n",
       "      <td>28/09/21</td>\n",
       "      <td>07/09/21</td>\n",
154
155
       "    </tr>\n",
       "    <tr>\n",
156
157
158
       "      <th>72048</th>\n",
       "      <td>74711770410</td>\n",
       "      <td>99</td>\n",
159
       "      <td>FEMALE</td>\n",
160
161
162
163
164
       "      <td>12220308</td>\n",
       "      <td>25/08/16</td>\n",
       "      <td>2</td>\n",
       "      <td>25/08/16</td>\n",
       "      <td>29/09/21</td>\n",
165
166
       "    </tr>\n",
       "    <tr>\n",
167
168
169
       "      <th>72050</th>\n",
       "      <td>74711770410</td>\n",
       "      <td>99</td>\n",
170
       "      <td>FEMALE</td>\n",
171
172
173
174
175
       "      <td>04330301</td>\n",
       "      <td>07/09/21</td>\n",
       "      <td>0</td>\n",
       "      <td>07/09/21</td>\n",
       "      <td>28/09/21</td>\n",
176
177
       "    </tr>\n",
       "    <tr>\n",
178
179
180
       "      <th>72051</th>\n",
       "      <td>74711770410</td>\n",
       "      <td>99</td>\n",
181
       "      <td>FEMALE</td>\n",
182
183
184
185
186
       "      <td>99999999</td>\n",
       "      <td>28/09/21</td>\n",
       "      <td>0</td>\n",
       "      <td>28/09/21</td>\n",
       "      <td>NaN</td>\n",
187
188
189
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
190
       "<p>48292 rows × 8 columns</p>\n",
191
192
193
       "</div>"
      ],
      "text/plain": [
194
195
196
197
198
199
200
201
202
203
204
205
       "                ID  Birth Year  Gender Kategori ISO nummer Kørselsdato  Seq  \\\n",
       "0       2429541786          23  FEMALE            22271812    06/08/19    0   \n",
       "1       2429541786          23  FEMALE            12060611    19/02/18    0   \n",
       "2       2429541786          23  FEMALE            12072401    19/02/18    0   \n",
       "3       2430269034          26  FEMALE            22271812    09/03/20    0   \n",
       "5       2430269034          26  FEMALE            12362124    14/10/19    0   \n",
       "...            ...         ...     ...                 ...         ...  ...   \n",
       "72044  74711770410          99  FEMALE            18301509    13/06/19    0   \n",
       "72046  74711770410          99  FEMALE            12220308    28/09/21    0   \n",
       "72048  74711770410          99  FEMALE            12220308    25/08/16    2   \n",
       "72050  74711770410          99  FEMALE            04330301    07/09/21    0   \n",
       "72051  74711770410          99  FEMALE            99999999    28/09/21    0   \n",
206
       "\n",
207
208
209
210
211
212
213
214
215
216
217
218
       "       LendDate ReturnDate  \n",
       "0      06/08/19        NaN  \n",
       "1      19/02/18        NaN  \n",
       "2      19/02/18        NaN  \n",
       "3      09/03/20   09/11/20  \n",
       "5      14/10/19   29/07/21  \n",
       "...         ...        ...  \n",
       "72044  13/06/19   07/09/21  \n",
       "72046  28/09/21   07/09/21  \n",
       "72048  25/08/16   29/09/21  \n",
       "72050  07/09/21   28/09/21  \n",
       "72051  28/09/21        NaN  \n",
219
       "\n",
220
       "[48292 rows x 8 columns]"
221
222
      ]
     },
223
     "execution_count": 10,
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "59ff6fbb0321898508cf6243593820bf2585fcfb6693fd00e85ec94ed8847fd0"
  },
  "kernelspec": {
   "display_name": "Python 3.8.8 64-bit ('py38-air': conda)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}