Commit b9d3a855 authored by Carsten Eie Frigaard's avatar Carsten Eie Frigaard
Browse files

small_pre_l04_update

parent 4a1efdd3
......@@ -563,7 +563,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
......@@ -577,7 +577,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.9.7"
},
"nav_menu": {},
"toc": {
......
%% Cell type:markdown id: tags:
# Preprocessing of Data
## Standardization
### First standarization...
%% Cell type:code id: tags:
``` python
import numpy as np
from sklearn.preprocessing import scale, StandardScaler
from libitmal import utils as libmalutils
# Some dummy data...
X_train = np.array([[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]])
y_train = np.array([1, 0, 1])
X_scaled = scale(X_train)
libmalutils.PrintMatrix(X_scaled,'X_scaled=')
print()
print(f'X_scaled.mean()={X_scaled.mean():0.3f}')
print(f'X_scaled.std() ={X_scaled.std():0.3f}')
print('\nColumn-wise mean and std:\n')
print(f'X_scaled.mean()={X_scaled.mean(axis=0)}')
print(f'X_scaled.std() ={X_scaled.std (axis=0)}')
print("\nOK")
```
%% Output
X_scaled=[[ 0. -1.22 1.34]
[ 1.22 0. -0.27]
[-1.22 1.22 -1.07]]
X_scaled.mean()=0.000
X_scaled.std() =1.000
Column-wise mean and std:
X_scaled.mean()=[0. 0. 0.]
X_scaled.std() =[1. 1. 1.]
OK
%% Cell type:markdown id: tags:
### Now via a preprocess (fit-predict interface) `StandardScaler`..
%% Cell type:code id: tags:
``` python
scaler = StandardScaler().fit(X_train)
print(scaler)
print()
print(f'scaler.mean_ ={scaler.mean_}')
print(f'scaler.scale_={scaler.scale_}')
print(f'scaler.var_ ={scaler.var_}')
print()
libmalutils.PrintMatrix(scaler.transform(X_train),'scaler.transform(X_train)=')
libmalutils.PrintMatrix(scaler.transform(X_train), 'scaler.transform(X_train)=')
libmalutils.PrintMatrix(scaler.transform(X_train).std(axis=0),'scaler.transform(X_train)=')
print("\nOK")
```
%% Output
StandardScaler()
scaler.mean_ =[1. 0. 0.33333333]
scaler.scale_=[0.81649658 0.81649658 1.24721913]
scaler.var_ =[0.66666667 0.66666667 1.55555556]
scaler.transform(X_train)=[[ 0. -1.22 1.34]
[ 1.22 0. -0.27]
[-1.22 1.22 -1.07]]
scaler.transform(X_train)=[1. 1. 1.]
OK
%% Cell type:markdown id: tags:
### And finally via a Pipeline
%% Cell type:code id: tags:
``` python
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
mypipeline = make_pipeline(
StandardScaler(),
GaussianNB(priors=None)
)
mypipeline.fit(X_train, y_train)
print(mypipeline.predict(X_train)) # just a dummy demo, predict on train data
print("\nOK")
```
%% Output
[1 0 1]
OK
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment