import csv import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler from keras.models import Sequential from keras.layers import Dense, LSTM from sklearn.metrics import mean_absolute_error from sklearn.metrics import mean_squared_error from sklearn.metrics import r2_score def api_dataset(): with open('api_access_fix.csv',encoding = 'utf-8-sig') as f: reader = csv.reader(f) dataset = [] for item in reader: try: dataset.append([int(float(item[2]))]) except: pass for i in range(len(dataset)): if dataset[i][0]<=500 and i < 1440: dataset[i][0] = int(sum([dataset[i+x*1440][0] for x in range(1,7)])/6) return np.array(dataset) # 归一化函数 def sc_fit_transform(nDlist): # 将所有数据归一化为0-1的范围 sc = MinMaxScaler(feature_range=(0, 1)) dataset_transform = sc.fit_transform(X=nDlist) # 归一化后的数据 return sc, np.array(dataset_transform) ############################################################################### # 需要之前60次的访问数据来预测下一次的数据, timestep = 60 # 训练数据的大小 training_num = 8640 # 迭代训练10次 epoch = 10 # 每次取数据数量 batch_size = 100 ############################################################################### listDataset = api_dataset() # print(listDataset.shape) # 生成训练集访问数据集 xTrainDataset = listDataset[0:training_num] # 每次的下次访问次数是训练结果 yTrainDataset = listDataset[1:training_num+1] # 原始数据归一化 scTrainDataseX, xTrainDataset = sc_fit_transform(xTrainDataset) scTrainDataseY, yTrainDataset = sc_fit_transform(yTrainDataset) ############################################################################### # 生成lstm模型需要的训练集数据 xTrain = [] for i in range(timestep, training_num): xTrain.append(xTrainDataset[i-timestep : i]) xTrain = np.array(xTrain) # print(xTrain.shape) yTrain = [] for i in range(timestep, training_num): yTrain.append(yTrainDataset[i]) yTrain = np.array(yTrain) # print(yTrain.shape) ############################################################################### # 构建网络,使用的是序贯模型 model = Sequential() #return_sequences=True返回的是全部输出,LSTM做第一层时,需要指定输入shape model.add(LSTM(units=128, input_shape=[xTrain.shape[1], 1])) model.add(Dense(1)) # 进行配置 model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy']) model.fit(x=xTrain, y=yTrain, epochs=epoch, batch_size=batch_size) model.save('my_model.h5') ############################################################################### xTestDataset = listDataset[training_num:10080-2] scTesDatasetX, xTestDataset = sc_fit_transform(xTestDataset) yTestDataset = listDataset[training_num+1:10080-1] scTestDataseY, yTestDataset = sc_fit_transform(yTestDataset) # 生成lstm模型需要的训练集数据 xTest = [] for i in range(timestep, len(xTestDataset)): xTest.append(xTestDataset[i-timestep : i]) xTest = np.array(xTest) print(xTest.shape) yTest = [] for i in range(timestep, len(xTestDataset)): yTest.append(yTestDataset[i]) # 反归一化 yTest = scTestDataseY.inverse_transform(X= yTest) print(yTest.shape) print(yTest) ############################################################################### # 进行预测 yPredictes = model.predict(x=xTest) # 反归一化 yPredictes = scTestDataseY.inverse_transform(X=yPredictes) print(yPredictes.shape) print(yPredictes) ############################################################################### #对比结果,绘制数据图表,红色是真实数据,蓝色是预测数据 plt.plot(yTest, color='red', label='Real') plt.plot(yPredictes, color='blue', label='Predict') plt.title(label='Prediction') plt.xlabel(xlabel='Time') plt.ylabel(ylabel='Api_access_num') plt.legend() plt.show() # 评估标准: mae, rmse, r2_score mae = mean_absolute_error(yTest, yPredictes) rmse = mean_squared_error(yTest, yPredictes, squared=False) r2 = r2_score(yTest, yPredictes) print(mae, rmse, r2) # 72.02636248234026 98.38626354602893 0.9791679689516253 # 45.70792188492153 74.77525176850149 0.9880226807229917