特征处理
python 版本 3.7
scikit-learn 版本 1.0.2
1.标准化
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from matplotlib import gridspec
import numpy as np
import matplotlib.pyplot as plt
cps = np.random.random_integers(0, 100, (100, 2))
ss = StandardScaler()
std_cps = ss.fit_transform(cps)
gs = gridspec.GridSpec(5,5)
fig = plt.figure()
ax1 = fig.add_subplot(gs[0:2, 1:4])
ax2 = fig.add_subplot(gs[3:5, 1:4])
ax1.scatter(cps[:, 0], cps[:, 1])
ax2.scatter(std_cps[:, 0], std_cps[:, 1])
plt.show()
2.归一化
from sklearn.preprocessing import MinMaxScaler
import numpy as np
data = np.random.uniform(0, 100, 10)[:, np.newaxis]
mm = MinMaxScaler()
mm_data = mm.fit_transform(data)
origin_data = mm.inverse_transform(mm_data)
print('data is ',data)
print('after Min Max ',mm_data)
print('origin data is ',origin_data)
3.正则化
X = [[1, -1, 2],
[2, 0, 0],
[0, 1, -1]]
# 使用L2正则化
from sklearn.preprocessing import normalize
l2 = normalize(X, norm='l2')
print('l2:', l2)
# 使用L1正则化
from sklearn.preprocessing import Normalizer
normalizerl1 = Normalizer(norm='l1')
l1 = normalizerl1.fit_transform(X)
print('l1:', l1)