1,导包
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
2,读取数据
data = pd.read_csv('000300.SH.csv')
shsz=data.rename(columns={'收盘价(元)':'Close',"日期":"Date"})
3,均值预测法
split_p=int(len(shsz)*0.7)
train = shsz[:split_p]
valid = shsz[split_p:]
preds = []
for i in range(0,1371):
a = train['closing'][len(train)-1371+i:].sum() + sum(preds)
b = a/1371
preds.append(b)
valid['Predictions'] = 0
valid['Predictions'] = preds
plt.plot(train['closing'])
plt.plot(valid[['closing', 'Predictions']])
4,线性回归预测法
#linear
split_p=int(len(shsz)*0.7)
train = shsz_robot[:split_p]
valid = shsz_robot[split_p:]
x_train = train.drop(['closing','代码','成交量(股)','简称'], axis=1)
y_train = train['closing']
x_valid = valid.drop(['closing','代码','成交量(股)','简称'], axis=1)
y_valid = valid['closing']
# 实现线性回归
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train,y_train)
# 做出预测并找到rmse
preds = model.predict(x_valid)
rms = np.sqrt(np.mean(np.power((np.array(y_valid)-np.array(preds)),2)))
# 图表
valid=valid.drop(['代码','成交量(股)','简称'], axis=1)
valid['Predictions'] = 0
valid['Predictions'] = preds
valid.index = shsz_robot[split_p:].index
train.index = shsz_robot[:split_p].index
plt.plot(train['closing'])
plt.plot(valid[['closing', 'Predictions']])
5,k近邻预测法
#KNN
from sklearn import neighbors
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
x_train_scaled = scaler.fit_transform(x_train)
x_train = pd.DataFrame(x_train_scaled)
x_valid_scaled = scaler.fit_transform(x_valid)
x_valid = pd.DataFrame(x_valid_scaled)# 使用gridsearch查找最佳参数params = {'n_neighbors':[2,3,4,5,6,7,8,9]}knn = neighbors.KNeighborsRegressor()model = GridSearchCV(knn, params, cv=5)
model.fit(x_train,y_train)
preds = model.predict(x_valid)
rmserms = np.sqrt(np.mean(np.power((np.array(y_valid)-np.array(preds)),2)))
rmserms
valid['Predictions'] = 0
valid['Predictions'] = preds
plt.plot(train['closing'])
plt.plot(valid[['closing', 'Predictions']])
6,LSTM 预测法
# 导入所需的库
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
# 创建数据框
data = shsz.sort_index(ascending=True, axis=0)
new_data = pd.DataFrame(index=range(0,len(shsz)),columns=['Date', 'Close'])
for i in range(0,len(data)):
new_data['Date'][i] = data['Date'][i]
new_data['Close'][i] = data['Close'][i]
# 设置索引
new_data.index = new_data.Date
new_data.drop('Date', axis=1, inplace=True)
# 创建训练集和验证集
dataset = new_data.values
train = dataset[0:3170,:]
valid = dataset[3170:,:]
# 将数据集转换为x_train和y_train
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)
x_train, y_train = [], []
for i in range(60,len(train)):
x_train.append(scaled_data[i-60:i,0])
y_train.append(scaled_data[i,0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))
train = new_data[:3170]
valid = new_data[3170:]
valid['Predictions'] = closing_price
# plt.plot(train['Close'])
plt.plot(list(valid['Close']))
plt.plot(list(valid['Predictions']))
# 创建和拟合LSTM网络
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=50))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=1, batch_size=1, verbose=2)
# 使用过去值来预测246个值
inputs = new_data[len(new_data) - len(valid) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = scaler.transform(inputs)
X_test = []
for i in range(60,inputs.shape[0]):
X_test.append(inputs[i-60:i,0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
closing_price = model.predict(X_test)
closing_price = scaler.inverse_transform(closing_price)
总结:
以上预测方法比较准确的有LSTM 和K 近邻方法,其他方法比较粗糙,不过也算预测数据的一种。不过上述预测方法中仅代表正常情况波动,期货股票市场波动比较大,仅供参考
Python实现:使用机器学习和深度学习预测股票价格 - 知乎