Commit b9d3a855 authored by Carsten Eie Frigaard's avatar Carsten Eie Frigaard
Browse files

small_pre_l04_update

parent 4a1efdd3
...@@ -563,7 +563,7 @@ ...@@ -563,7 +563,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
...@@ -577,7 +577,7 @@ ...@@ -577,7 +577,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.9.7"
}, },
"nav_menu": {}, "nav_menu": {},
"toc": { "toc": {
......
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Preprocessing of Data # Preprocessing of Data
## Standardization ## Standardization
### First standarization... ### First standarization...
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import numpy as np import numpy as np
from sklearn.preprocessing import scale, StandardScaler from sklearn.preprocessing import scale, StandardScaler
from libitmal import utils as libmalutils from libitmal import utils as libmalutils
# Some dummy data... # Some dummy data...
X_train = np.array([[ 1., -1., 2.], X_train = np.array([[ 1., -1., 2.],
[ 2., 0., 0.], [ 2., 0., 0.],
[ 0., 1., -1.]]) [ 0., 1., -1.]])
y_train = np.array([1, 0, 1]) y_train = np.array([1, 0, 1])
X_scaled = scale(X_train) X_scaled = scale(X_train)
libmalutils.PrintMatrix(X_scaled,'X_scaled=') libmalutils.PrintMatrix(X_scaled,'X_scaled=')
print() print()
print(f'X_scaled.mean()={X_scaled.mean():0.3f}') print(f'X_scaled.mean()={X_scaled.mean():0.3f}')
print(f'X_scaled.std() ={X_scaled.std():0.3f}') print(f'X_scaled.std() ={X_scaled.std():0.3f}')
print('\nColumn-wise mean and std:\n') print('\nColumn-wise mean and std:\n')
print(f'X_scaled.mean()={X_scaled.mean(axis=0)}') print(f'X_scaled.mean()={X_scaled.mean(axis=0)}')
print(f'X_scaled.std() ={X_scaled.std (axis=0)}') print(f'X_scaled.std() ={X_scaled.std (axis=0)}')
print("\nOK")
``` ```
%% Output %% Output
X_scaled=[[ 0. -1.22 1.34] X_scaled=[[ 0. -1.22 1.34]
[ 1.22 0. -0.27] [ 1.22 0. -0.27]
[-1.22 1.22 -1.07]] [-1.22 1.22 -1.07]]
X_scaled.mean()=0.000 X_scaled.mean()=0.000
X_scaled.std() =1.000 X_scaled.std() =1.000
Column-wise mean and std: Column-wise mean and std:
X_scaled.mean()=[0. 0. 0.] X_scaled.mean()=[0. 0. 0.]
X_scaled.std() =[1. 1. 1.] X_scaled.std() =[1. 1. 1.]
OK
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### Now via a preprocess (fit-predict interface) `StandardScaler`.. ### Now via a preprocess (fit-predict interface) `StandardScaler`..
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
scaler = StandardScaler().fit(X_train) scaler = StandardScaler().fit(X_train)
print(scaler) print(scaler)
print() print()
print(f'scaler.mean_ ={scaler.mean_}') print(f'scaler.mean_ ={scaler.mean_}')
print(f'scaler.scale_={scaler.scale_}') print(f'scaler.scale_={scaler.scale_}')
print(f'scaler.var_ ={scaler.var_}') print(f'scaler.var_ ={scaler.var_}')
print() print()
libmalutils.PrintMatrix(scaler.transform(X_train),'scaler.transform(X_train)=') libmalutils.PrintMatrix(scaler.transform(X_train), 'scaler.transform(X_train)=')
libmalutils.PrintMatrix(scaler.transform(X_train).std(axis=0),'scaler.transform(X_train)=') libmalutils.PrintMatrix(scaler.transform(X_train).std(axis=0),'scaler.transform(X_train)=')
print("\nOK")
``` ```
%% Output %% Output
StandardScaler() StandardScaler()
scaler.mean_ =[1. 0. 0.33333333] scaler.mean_ =[1. 0. 0.33333333]
scaler.scale_=[0.81649658 0.81649658 1.24721913] scaler.scale_=[0.81649658 0.81649658 1.24721913]
scaler.var_ =[0.66666667 0.66666667 1.55555556] scaler.var_ =[0.66666667 0.66666667 1.55555556]
scaler.transform(X_train)=[[ 0. -1.22 1.34] scaler.transform(X_train)=[[ 0. -1.22 1.34]
[ 1.22 0. -0.27] [ 1.22 0. -0.27]
[-1.22 1.22 -1.07]] [-1.22 1.22 -1.07]]
scaler.transform(X_train)=[1. 1. 1.] scaler.transform(X_train)=[1. 1. 1.]
OK
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
### And finally via a Pipeline ### And finally via a Pipeline
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from sklearn.naive_bayes import GaussianNB from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline from sklearn.pipeline import make_pipeline
mypipeline = make_pipeline( mypipeline = make_pipeline(
StandardScaler(), StandardScaler(),
GaussianNB(priors=None) GaussianNB(priors=None)
) )
mypipeline.fit(X_train, y_train) mypipeline.fit(X_train, y_train)
print(mypipeline.predict(X_train)) # just a dummy demo, predict on train data print(mypipeline.predict(X_train)) # just a dummy demo, predict on train data
print("\nOK")
``` ```
%% Output %% Output
[1 0 1] [1 0 1]
OK
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment