代码较为简单。
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
df=pd.read_csv("Battery_RUL.csv")
df.head()
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 15064 entries, 0 to 15063 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Cycle_Index 15064 non-null float64 1 Discharge Time (s) 15064 non-null float64 2 Decrement 3.6-3.4V (s) 15064 non-null float64 3 Max. Voltage Dischar. (V) 15064 non-null float64 4 Min. Voltage Charg. (V) 15064 non-null float64 5 Time at 4.15V (s) 15064 non-null float64 6 Time constant current (s) 15064 non-null float64 7 Charging time (s) 15064 non-null float64 8 RUL 15064 non-null int64 dtypes: float64(8), int64(1) memory usage: 1.0 MB
df.describe()
numlist=df.columns
fig = plt.figure(figsize=(20,20))
for i in range(len(numlist)):
plt.subplot(3,3,i+1)
plt.title(numlist[i],fontsize=12)
plt.hist(df[numlist[i]])
plt.tight_layout()
sns.pairplot(df)
plt.figure(figsize = (15,8))
sns.heatmap(df.corr(),annot=True, cbar=False, cmap='Blues', fmt='.1f')
df1=df.drop(['Cycle_Index','Discharge Time (s)', 'Decrement 3.6-3.4V (s)', 'Time constant current (s)','Charging time (s)'],axis=1)
from pycaret.regression import *
exp_reg = setup(df1, target = 'RUL',remove_outliers=True,session_id=1)
Description | Value | |
---|---|---|
0 | Session id | 1 |
1 | Target | RUL |
2 | Target type | Regression |
3 | Original data shape | (15064, 4) |
4 | Transformed data shape | (14536, 4) |
5 | Transformed train set shape | (10016, 4) |
6 | Transformed test set shape | (4520, 4) |
7 | Numeric features | 3 |
8 | Preprocess | True |
9 | Imputation type | simple |
10 | Numeric imputation | mean |
11 | Categorical imputation | mode |
12 | Remove outliers | True |
13 | Outliers threshold | 0.050000 |
14 | Fold Generator | KFold |
15 | Fold Number | 10 |
16 | CPU Jobs | -1 |
17 | Use GPU | False |
18 | Log Experiment | False |
19 | Experiment Name | reg-default-name |
20 | USI | dc44 |
compare_models()
Model | MAE | MSE | RMSE | R2 | RMSLE | MAPE | TT (Sec) | |
---|---|---|---|---|---|---|---|---|
et | Extra Trees Regressor | 21.7428 | 5805.6217 | 75.5050 | 0.9441 | 0.3375 | 0.2090 | 0.1840 |
rf | Random Forest Regressor | 23.3808 | 6218.2462 | 78.2757 | 0.9402 | 0.3703 | 0.2286 | 0.2520 |
xgboost | Extreme Gradient Boosting | 28.1191 | 6649.0490 | 80.7887 | 0.9360 | 0.3634 | 0.2583 | 0.1810 |
lightgbm | Light Gradient Boosting Machine | 33.3121 | 6980.3989 | 82.9109 | 0.9328 | 0.3918 | 0.3013 | 0.1180 |
dt | Decision Tree Regressor | 24.7941 | 7224.9013 | 84.4653 | 0.9305 | 0.4137 | 0.2263 | 0.0450 |
gbr | Gradient Boosting Regressor | 41.4292 | 7696.6475 | 87.1895 | 0.9259 | 0.4326 | 0.3767 | 0.1220 |
ada | AdaBoost Regressor | 53.8692 | 8601.3932 | 92.3123 | 0.9172 | 0.5116 | 0.5761 | 0.0830 |
knn | K Neighbors Regressor | 47.6369 | 8847.0397 | 93.7257 | 0.9149 | 0.5202 | 0.3359 | 0.0500 |
dummy | Dummy Regressor | 278.8523 | 104050.8450 | 322.5486 | -0.0008 | 1.0563 | 3.3094 | 0.0420 |
lr | Linear Regression | 215.7580 | 3866029.7278 | 1866.2254 | -36.1340 | 0.6161 | 0.7615 | 0.2940 |
lar | Least Angle Regression | 215.7595 | 3866065.5042 | 1866.2348 | -36.1343 | 0.6161 | 0.7614 | 0.0440 |
ridge | Ridge Regression | 220.8640 | 4141461.0688 | 1930.9796 | -38.7797 | 0.6138 | 0.7712 | 0.0450 |
par | Passive Aggressive Regressor | 268.5288 | 4193895.0303 | 1907.7742 | -39.3176 | 0.6897 | 0.9640 | 0.0470 |
br | Bayesian Ridge | 232.6850 | 4716618.3836 | 2054.9328 | -44.3122 | 0.6308 | 0.8136 | 0.0430 |
lasso | Lasso Regression | 233.8883 | 4790127.0360 | 2073.9986 | -45.0074 | 0.6325 | 0.8140 | 0.2320 |
llar | Lasso Least Angle Regression | 233.8883 | 4790127.6230 | 2073.9987 | -45.0074 | 0.6325 | 0.8140 | 0.0460 |
omp | Orthogonal Matching Pursuit | 233.8889 | 4790152.8777 | 2074.0042 | -45.0076 | 0.6324 | 0.8140 | 0.0340 |
en | Elastic Net | 233.8890 | 4790175.9367 | 2074.0099 | -45.0078 | 0.6325 | 0.8140 | 0.0420 |
huber | Huber Regressor | 235.3448 | 4915415.1237 | 2086.1153 | -46.1760 | 0.6331 | 0.8195 | 0.0460 |
ExtraTreesRegressor(n_jobs=-1, random_state=1)
reg_model = create_model('et')
plot_model(reg_model,plot='error')
plot_model(reg_model,plot='residuals')
plot_model(reg_model,plot='feature')
xgb = create_model('xgboost',verbose=False)
rf = create_model('rf', verbose = False)
blend_specific = blend_models(estimator_list = [rf,xgb,reg_model])
工学博士,担任《Mechanical System and Signal Processing》《中国电机工程学报》《控制与决策》等期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。