博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
吴裕雄--天生自然 PYTHON数据分析:所有美国股票和etf的历史日价格和成交量分析...
阅读量:5302 次
发布时间:2019-06-14

本文共 5526 字,大约阅读时间需要 18 分钟。

# This Python 3 environment comes with many helpful analytics libraries installed# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python# For example, here's several helpful packages to load in import matplotlib.pyplot as pltimport statsmodels.tsa.seasonal as smtimport numpy as np # linear algebraimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)import randomimport datetime as dtfrom sklearn import linear_model from sklearn.metrics import mean_absolute_errorimport plotly# import the relevant Keras modulesfrom keras.models import Sequentialfrom keras.layers import Activation, Densefrom keras.layers import LSTMfrom keras.layers import Dropout# Input data files are available in the "../input/" directory.# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directoryfrom subprocess import check_output
import osos.chdir('F:\\kaggleDataSet\\price-volume\\Stocks')
#read data# kernels let us navigate through the zipfile as if it were a directory# trying to read a file of size zero will throw an error, so skip them# filenames = [x for x in os.listdir() if x.endswith('.txt') and os.path.getsize(x) > 0]# filenames = random.sample(filenames,1)filenames = ['prk.us.txt', 'bgr.us.txt', 'jci.us.txt', 'aa.us.txt', 'fr.us.txt', 'star.us.txt', 'sons.us.txt', 'ipl_d.us.txt', 'sna.us.txt', 'utg.us.txt']filenames = [filenames[1]]print(filenames)data = []for filename in filenames:    df = pd.read_csv(filename, sep=',')    label, _, _ = filename.split(sep='.')    df['Label'] = filename    df['Date'] = pd.to_datetime(df['Date'])    data.append(df)

traces = []for df in data:    clr = str(r()) + str(r()) + str(r())    df = df.sort_values('Date')    label = df['Label'].iloc[0]    trace = plotly.graph_objs.Scattergl(x=df['Date'],y=df['Close'])    traces.append(trace)    layout = plotly.graph_objs.Layout(title='Plot',)fig = plotly.graph_objs.Figure(data=traces, layout=layout)plotly.offline.init_notebook_mode(connected=True)plotly.offline.iplot(fig, filename='dataplot')

df = data[0]window_len = 10#Create a data point (i.e. a date) which splits the training and testing setsplit_date = list(data[0]["Date"][-(2*window_len+1):])[0]#Split the training and test settraining_set, test_set = df[df['Date'] < split_date], df[df['Date'] >= split_date]training_set = training_set.drop(['Date','Label', 'OpenInt'], 1)test_set = test_set.drop(['Date','Label','OpenInt'], 1)#Create windows for trainingLSTM_training_inputs = []for i in range(len(training_set)-window_len):    temp_set = training_set[i:(i+window_len)].copy()        for col in list(temp_set):        temp_set[col] = temp_set[col]/temp_set[col].iloc[0] - 1    LSTM_training_inputs.append(temp_set)LSTM_training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]LSTM_training_inputs = np.array(LSTM_training_inputs)#Create windows for testingLSTM_test_inputs = []for i in range(len(test_set)-window_len):    temp_set = test_set[i:(i+window_len)].copy()        for col in list(temp_set):        temp_set[col] = temp_set[col]/temp_set[col].iloc[0] - 1    LSTM_test_inputs.append(temp_set)LSTM_test_outputs = (test_set['Close'][window_len:].values/test_set['Close'][:-window_len].values)-1LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]LSTM_test_inputs = np.array(LSTM_test_inputs)
def build_model(inputs, output_size, neurons, activ_func="linear",dropout=0.10, loss="mae", optimizer="adam"):    model = Sequential()    model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))    model.add(Dropout(dropout))    model.add(Dense(units=output_size))    model.add(Activation(activ_func))    model.compile(loss=loss, optimizer=optimizer)    return model
# initialise model architecturenn_model = build_model(LSTM_training_inputs, output_size=1, neurons = 32)# model output is next price normalised to 10th previous closing price# train model on data# note: eth_history contains information on the training error per epochnn_history = nn_model.fit(LSTM_training_inputs, LSTM_training_outputs, epochs=5, batch_size=1, verbose=2, shuffle=True)

plt.plot(LSTM_test_outputs, label = "actual")plt.plot(nn_model.predict(LSTM_test_inputs), label = "predicted")plt.legend()plt.show()MAE = mean_absolute_error(LSTM_test_outputs, nn_model.predict(LSTM_test_inputs))print('The Mean Absolute Error is: {}'.format(MAE))

#https://github.com/llSourcell/How-to-Predict-Stock-Prices-Easily-Demo/blob/master/lstm.pydef predict_sequence_full(model, data, window_size):    #Shift the window by 1 new prediction each time, re-run predictions on new window    curr_frame = data[0]    predicted = []    for i in range(len(data)):        predicted.append(model.predict(curr_frame[np.newaxis,:,:])[0,0])        curr_frame = curr_frame[1:]        curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)    return predictedpredictions = predict_sequence_full(nn_model, LSTM_test_inputs, 10)plt.plot(LSTM_test_outputs, label="actual")plt.plot(predictions, label="predicted")plt.legend()plt.show()MAE = mean_absolute_error(LSTM_test_outputs, predictions)print('The Mean Absolute Error is: {}'.format(MAE))

结论LSTM不能解决时间序列预测问题。对一个时间步长的预测并不比滞后模型好多少。如果我们增加预测的时间步长,性能下降的速度就不会像其他更传统的方法那么快。然而,在这种情况下,我们的误差增加了大约4.5倍。它随着我们试图预测的时间步长呈超线性增长。

 

转载于:https://www.cnblogs.com/tszr/p/11235914.html

你可能感兴趣的文章
tcp滑动窗口和读写缓冲区
查看>>
GO 使用静态链接库编译 生成可执行文件 使用第三方 .a 文件,无源码构造
查看>>
ssh 使用指定网卡 连接特定网络
查看>>
鸿蒙操作系统发布会 分析 记录
查看>>
浅谈python 中正则的一些函数
查看>>
app生命周期之即将关闭
查看>>
MPU6050
查看>>
Asp.Net 加载不同项目程序集
查看>>
Jenkins插件--通知Notification
查看>>
思1-基本三观
查看>>
angularJS--apply() 和digest()方法
查看>>
Alpha 冲刺 (5/10)
查看>>
PHP函数之$_SERVER
查看>>
利用安装光盘创建本地yum源补装 RPM 软件包-通过命令行模式
查看>>
XML通過XSD產生CLASS
查看>>
跨线程调用窗体控件
查看>>
linq to sql 扩展方法
查看>>
241. Different Ways to Add Parentheses
查看>>
实验10 编写子程序 1.显示字符串
查看>>
Effect-Compiler Tool(fxc.exe)
查看>>