init. add the code of py
This commit is contained in:
commit
68d7c5d2c9
184
data.py
Normal file
184
data.py
Normal file
|
@ -0,0 +1,184 @@
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense, Dropout, Embedding
|
||||||
|
from keras.layers import InputLayer
|
||||||
|
from keras.layers import LSTM
|
||||||
|
from keras import backend
|
||||||
|
|
||||||
|
import pymysql
|
||||||
|
import pickle
|
||||||
|
import os
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
def get_collect():
|
||||||
|
collect = {}
|
||||||
|
loadfile = "./collect.pickle"
|
||||||
|
|
||||||
|
try:
|
||||||
|
collect = pickle.load(open(loadfile, 'rb'))
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
# 打开数据库连接
|
||||||
|
db = pymysql.connect(host="sg-board1.livenono.com", port=3306,user="root",passwd="Nono-databoard",db="databoard",charset="utf8")
|
||||||
|
|
||||||
|
# 使用 cursor() 方法创建一个游标对象 cursor
|
||||||
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
# 使用 execute() 方法执行 SQL 查询
|
||||||
|
print(cursor.execute('''SELECT coin, extra_coins, pay_users, create_at from pay_items_hour pih where region = "all" and platform="all"'''))
|
||||||
|
collect_pay = {}
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
# print(row)
|
||||||
|
coin, extra_coins, pay_users, create_at = row
|
||||||
|
d = str(create_at.date())
|
||||||
|
if d in collect_pay:
|
||||||
|
collect_pay[d].append(row)
|
||||||
|
else:
|
||||||
|
collect_pay[d] = [ row ]
|
||||||
|
# print(dir(create_at), create_at.timestamp(), create_at.date())
|
||||||
|
print('共查找出', cursor.rowcount, '条数据')
|
||||||
|
deletelist = []
|
||||||
|
for k in collect_pay:
|
||||||
|
if len(collect_pay[k]) != 24:
|
||||||
|
deletelist.append(k)
|
||||||
|
|
||||||
|
for k in deletelist:
|
||||||
|
del collect_pay[k]
|
||||||
|
|
||||||
|
querydate= []
|
||||||
|
for k in collect_pay:
|
||||||
|
querydate.append(k)
|
||||||
|
|
||||||
|
querydate.sort()
|
||||||
|
cursor.execute(
|
||||||
|
'''SELECT coin, users, create_at from gift_items_hour pih where region = "all" and create_at >= %s and create_at <= %s''',
|
||||||
|
(querydate[0], querydate[-1]),
|
||||||
|
)
|
||||||
|
|
||||||
|
collect_gift = {}
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
|
||||||
|
coin, users, create_at = row
|
||||||
|
d = str(create_at.date())
|
||||||
|
if d in collect_gift:
|
||||||
|
collect_gift[d].append(row)
|
||||||
|
else:
|
||||||
|
collect_gift[d] = [ row ]
|
||||||
|
|
||||||
|
for k in collect_pay:
|
||||||
|
l = collect_pay[k]
|
||||||
|
l.sort(key=lambda x:x[3])
|
||||||
|
|
||||||
|
for k in collect_gift:
|
||||||
|
l = collect_gift[k]
|
||||||
|
l.sort(key=lambda x:x[2])
|
||||||
|
|
||||||
|
collect["pay"] = collect_pay
|
||||||
|
collect["gift"] = collect_gift
|
||||||
|
|
||||||
|
pickle.dump(collect, open(loadfile, 'wb+'))
|
||||||
|
finally:
|
||||||
|
return collect
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def load_pay_data(textNum = 80):
|
||||||
|
|
||||||
|
collect = get_collect()
|
||||||
|
|
||||||
|
# TODO: 处理gift pay的波动关系
|
||||||
|
|
||||||
|
x_train = []
|
||||||
|
y_train = []
|
||||||
|
|
||||||
|
collect_pay = []
|
||||||
|
for k in collect["pay"]:
|
||||||
|
collect_pay.append(collect["pay"][k])
|
||||||
|
|
||||||
|
collect_pay.sort(key=lambda x:x[0][3])
|
||||||
|
lastday_v = collect_pay[0]
|
||||||
|
for cur_v in collect_pay[1:]:
|
||||||
|
|
||||||
|
total_coin = 0
|
||||||
|
last_total_coin = 0
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
for v1, v2 in zip(cur_v,lastday_v):
|
||||||
|
total_coin += v1[0] + v1[1]
|
||||||
|
# print(v1[3])
|
||||||
|
|
||||||
|
last_total_coin += v2[0] + v2[1]
|
||||||
|
# print(v2[3])
|
||||||
|
compare = float(total_coin - last_total_coin) / float(last_total_coin)
|
||||||
|
# print(compare)
|
||||||
|
|
||||||
|
x_train.append([count, total_coin , (total_coin - last_total_coin) , v1[2] , v2[2]])
|
||||||
|
count+=1
|
||||||
|
|
||||||
|
for i in range(count):
|
||||||
|
y_train.append(total_coin)
|
||||||
|
|
||||||
|
lastday_v = cur_v
|
||||||
|
|
||||||
|
x_train = numpy.reshape(x_train, (len(x_train) , 5, 1))
|
||||||
|
y_train = numpy.reshape(y_train, (len(y_train)))
|
||||||
|
# max_features = 1024
|
||||||
|
|
||||||
|
tx_train = x_train[len(x_train) - textNum:]
|
||||||
|
ty_train = y_train[len(y_train) - textNum:]
|
||||||
|
|
||||||
|
x_train = x_train[:len(x_train) - textNum]
|
||||||
|
y_train = y_train[:len(y_train) - textNum]
|
||||||
|
|
||||||
|
return x_train, y_train, tx_train, ty_train
|
||||||
|
|
||||||
|
def load_gift_data(textNum = 80):
|
||||||
|
|
||||||
|
collect = get_collect()
|
||||||
|
|
||||||
|
x_train = []
|
||||||
|
y_train = []
|
||||||
|
|
||||||
|
collect_gift = []
|
||||||
|
for k in collect["gift"]:
|
||||||
|
collect_gift.append(collect["gift"][k])
|
||||||
|
|
||||||
|
collect_gift.sort(key=lambda x:x[0][2])
|
||||||
|
lastday_v = collect_gift[0]
|
||||||
|
for cur_v in collect_gift[1:]:
|
||||||
|
|
||||||
|
total_coin = 0
|
||||||
|
last_total_coin = 0
|
||||||
|
users = 0
|
||||||
|
|
||||||
|
f = 20000000.0
|
||||||
|
count = 0
|
||||||
|
for v1, v2 in zip(cur_v,lastday_v):
|
||||||
|
total_coin += v1[0]
|
||||||
|
# print(v1[3])
|
||||||
|
|
||||||
|
last_total_coin += v2[0]
|
||||||
|
users += v1[1]
|
||||||
|
|
||||||
|
# print(v2[3])
|
||||||
|
compare = float(total_coin - last_total_coin) / float(last_total_coin)
|
||||||
|
# print(compare)
|
||||||
|
|
||||||
|
x_train.append([count, total_coin, compare, users ])
|
||||||
|
count+=1
|
||||||
|
|
||||||
|
for i in range(count):
|
||||||
|
y_train.append(total_coin)
|
||||||
|
|
||||||
|
lastday_v = cur_v
|
||||||
|
|
||||||
|
x_train = numpy.reshape(x_train, (len(x_train) , 4, 1))
|
||||||
|
y_train = numpy.reshape(y_train, (len(y_train)))
|
||||||
|
# max_features = 1024
|
||||||
|
|
||||||
|
tx_train = x_train[len(x_train) - textNum:]
|
||||||
|
ty_train = y_train[len(y_train) - textNum:]
|
||||||
|
|
||||||
|
x_train = x_train[:len(x_train) - textNum]
|
||||||
|
y_train = y_train[:len(y_train) - textNum]
|
||||||
|
|
||||||
|
return x_train, y_train, tx_train, ty_train
|
1
example.py
Normal file
1
example.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
|
31
predict.py
Normal file
31
predict.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
import numpy
|
||||||
|
from keras.models import load_model
|
||||||
|
from data import load_pay_data, load_gift_data
|
||||||
|
|
||||||
|
# x_train, y_train, tx_train, ty_train = load_pay_data(160)
|
||||||
|
# model = load_model("./predict_pay")
|
||||||
|
|
||||||
|
# p_data = model.predict(tx_train)
|
||||||
|
# for i in range(len(p_data)):
|
||||||
|
# comp = (p_data[i][0] - ty_train[i]) / ty_train[i]
|
||||||
|
# print(comp, p_data[i][0], ty_train[i])
|
||||||
|
# if abs(comp) >= 1:
|
||||||
|
# print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
x_train, y_train, tx_train, ty_train = load_gift_data(160)
|
||||||
|
model = load_model("./predict_gift")
|
||||||
|
p_data = model.predict(tx_train)
|
||||||
|
for i in range(len(p_data)):
|
||||||
|
comp = (p_data[i][0] - ty_train[i]) / ty_train[i]
|
||||||
|
print(comp, p_data[i][0], ty_train[i])
|
||||||
|
if abs(comp) >= 0.1:
|
||||||
|
print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i])
|
||||||
|
|
||||||
|
# data = numpy.reshape([[15, 2359688 / 10000000, 255968 / 1000000, 10 / 10000]],(1, 4, 1))
|
||||||
|
# print( model.predict(data))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
121
testcase1.py
Normal file
121
testcase1.py
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
import csv
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense, LSTM
|
||||||
|
from sklearn.metrics import mean_absolute_error
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.metrics import r2_score
|
||||||
|
|
||||||
|
def api_dataset():
|
||||||
|
with open('api_access_fix.csv',encoding = 'utf-8-sig') as f:
|
||||||
|
reader = csv.reader(f)
|
||||||
|
dataset = []
|
||||||
|
for item in reader:
|
||||||
|
try:
|
||||||
|
dataset.append([int(float(item[2]))])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
for i in range(len(dataset)):
|
||||||
|
if dataset[i][0]<=500 and i < 1440:
|
||||||
|
dataset[i][0] = int(sum([dataset[i+x*1440][0] for x in range(1,7)])/6)
|
||||||
|
return np.array(dataset)
|
||||||
|
|
||||||
|
# 归一化函数
|
||||||
|
def sc_fit_transform(nDlist):
|
||||||
|
# 将所有数据归一化为0-1的范围
|
||||||
|
sc = MinMaxScaler(feature_range=(0, 1))
|
||||||
|
dataset_transform = sc.fit_transform(X=nDlist)
|
||||||
|
# 归一化后的数据
|
||||||
|
return sc, np.array(dataset_transform)
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# 需要之前60次的访问数据来预测下一次的数据,
|
||||||
|
timestep = 60
|
||||||
|
# 训练数据的大小
|
||||||
|
training_num = 8640
|
||||||
|
# 迭代训练10次
|
||||||
|
epoch = 10
|
||||||
|
# 每次取数据数量
|
||||||
|
batch_size = 100
|
||||||
|
###############################################################################
|
||||||
|
listDataset = api_dataset()
|
||||||
|
# print(listDataset.shape)
|
||||||
|
# 生成训练集访问数据集
|
||||||
|
xTrainDataset = listDataset[0:training_num]
|
||||||
|
# 每次的下次访问次数是训练结果
|
||||||
|
yTrainDataset = listDataset[1:training_num+1]
|
||||||
|
|
||||||
|
# 原始数据归一化
|
||||||
|
scTrainDataseX, xTrainDataset = sc_fit_transform(xTrainDataset)
|
||||||
|
scTrainDataseY, yTrainDataset = sc_fit_transform(yTrainDataset)
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# 生成lstm模型需要的训练集数据
|
||||||
|
xTrain = []
|
||||||
|
for i in range(timestep, training_num):
|
||||||
|
xTrain.append(xTrainDataset[i-timestep : i])
|
||||||
|
xTrain = np.array(xTrain)
|
||||||
|
# print(xTrain.shape)
|
||||||
|
|
||||||
|
yTrain = []
|
||||||
|
for i in range(timestep, training_num):
|
||||||
|
yTrain.append(yTrainDataset[i])
|
||||||
|
yTrain = np.array(yTrain)
|
||||||
|
# print(yTrain.shape)
|
||||||
|
###############################################################################
|
||||||
|
# 构建网络,使用的是序贯模型
|
||||||
|
model = Sequential()
|
||||||
|
#return_sequences=True返回的是全部输出,LSTM做第一层时,需要指定输入shape
|
||||||
|
model.add(LSTM(units=128, input_shape=[xTrain.shape[1], 1]))
|
||||||
|
model.add(Dense(1))
|
||||||
|
# 进行配置
|
||||||
|
model.compile(optimizer='adam',
|
||||||
|
loss='mean_squared_error',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
model.fit(x=xTrain, y=yTrain, epochs=epoch, batch_size=batch_size)
|
||||||
|
model.save('my_model.h5')
|
||||||
|
###############################################################################
|
||||||
|
xTestDataset = listDataset[training_num:10080-2]
|
||||||
|
scTesDatasetX, xTestDataset = sc_fit_transform(xTestDataset)
|
||||||
|
|
||||||
|
yTestDataset = listDataset[training_num+1:10080-1]
|
||||||
|
scTestDataseY, yTestDataset = sc_fit_transform(yTestDataset)
|
||||||
|
# 生成lstm模型需要的训练集数据
|
||||||
|
xTest = []
|
||||||
|
for i in range(timestep, len(xTestDataset)):
|
||||||
|
xTest.append(xTestDataset[i-timestep : i])
|
||||||
|
xTest = np.array(xTest)
|
||||||
|
print(xTest.shape)
|
||||||
|
yTest = []
|
||||||
|
for i in range(timestep, len(xTestDataset)):
|
||||||
|
yTest.append(yTestDataset[i])
|
||||||
|
# 反归一化
|
||||||
|
yTest = scTestDataseY.inverse_transform(X= yTest)
|
||||||
|
print(yTest.shape)
|
||||||
|
print(yTest)
|
||||||
|
###############################################################################
|
||||||
|
# 进行预测
|
||||||
|
yPredictes = model.predict(x=xTest)
|
||||||
|
# 反归一化
|
||||||
|
yPredictes = scTestDataseY.inverse_transform(X=yPredictes)
|
||||||
|
print(yPredictes.shape)
|
||||||
|
print(yPredictes)
|
||||||
|
###############################################################################
|
||||||
|
#对比结果,绘制数据图表,红色是真实数据,蓝色是预测数据
|
||||||
|
plt.plot(yTest, color='red', label='Real')
|
||||||
|
plt.plot(yPredictes, color='blue', label='Predict')
|
||||||
|
plt.title(label='Prediction')
|
||||||
|
plt.xlabel(xlabel='Time')
|
||||||
|
plt.ylabel(ylabel='Api_access_num')
|
||||||
|
plt.legend()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# 评估标准: mae, rmse, r2_score
|
||||||
|
mae = mean_absolute_error(yTest, yPredictes)
|
||||||
|
rmse = mean_squared_error(yTest, yPredictes, squared=False)
|
||||||
|
r2 = r2_score(yTest, yPredictes)
|
||||||
|
print(mae, rmse, r2)
|
||||||
|
# 72.02636248234026 98.38626354602893 0.9791679689516253
|
||||||
|
# 45.70792188492153 74.77525176850149 0.9880226807229917
|
21
testcase2.py
Normal file
21
testcase2.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# load and plot dataset
|
||||||
|
from pandas import read_csv
|
||||||
|
from pandas import datetime
|
||||||
|
from matplotlib import pyplot
|
||||||
|
from pandas import DataFrame
|
||||||
|
from pandas import concat
|
||||||
|
from pandas import read_csv
|
||||||
|
from pandas import datetime
|
||||||
|
import pandas as pd
|
||||||
|
from math import sqrt
|
||||||
|
import sklearn
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
# load dataset
|
||||||
|
def parser(x):
|
||||||
|
return datetime.strptime('190'+x, '%Y-%m')
|
||||||
|
series = read_csv('case2.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
|
||||||
|
# summarize first few rows
|
||||||
|
print(series.head())
|
||||||
|
# line plot
|
||||||
|
series.plot()
|
||||||
|
pyplot.show()
|
41
train_gift.py
Normal file
41
train_gift.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
|
||||||
|
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense, Dropout, Embedding
|
||||||
|
from keras.layers import InputLayer, Activation
|
||||||
|
from keras.layers import LSTM
|
||||||
|
from keras import backend
|
||||||
|
|
||||||
|
import pymysql
|
||||||
|
import pickle
|
||||||
|
import os
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
from data import load_gift_data
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
x_train, y_train, tx_train, ty_train = load_gift_data()
|
||||||
|
|
||||||
|
model = Sequential()
|
||||||
|
units = 256
|
||||||
|
|
||||||
|
model.add(LSTM(units, activation='relu', input_shape=(4,1)))
|
||||||
|
model.add(Dropout(0.3))
|
||||||
|
model.add(Dense(1))
|
||||||
|
model.summary()
|
||||||
|
|
||||||
|
model.compile(loss='mse', optimizer='adam')
|
||||||
|
|
||||||
|
model.fit(x_train, y_train, batch_size=32, epochs=1000)
|
||||||
|
model.save("./predict_gift")
|
||||||
|
|
||||||
|
p_data = model.predict(tx_train)
|
||||||
|
for i in range(len(p_data)):
|
||||||
|
comp = (p_data[i][0] - ty_train[i]) / ty_train[i]
|
||||||
|
print(comp, p_data[i][0], ty_train[i])
|
||||||
|
if abs(comp) >= 0.2:
|
||||||
|
print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i])
|
||||||
|
|
||||||
|
|
||||||
|
|
39
train_pay.py
Normal file
39
train_pay.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
|
||||||
|
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense, Dropout, Embedding
|
||||||
|
from keras.layers import InputLayer
|
||||||
|
from keras.layers import LSTM
|
||||||
|
from keras import backend
|
||||||
|
|
||||||
|
import pymysql
|
||||||
|
import pickle
|
||||||
|
import os
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
from data import load_pay_data
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
x_train, y_train, tx_train, ty_train = load_pay_data()
|
||||||
|
|
||||||
|
|
||||||
|
model = Sequential()
|
||||||
|
units = 500
|
||||||
|
model.add(LSTM(units, activation='relu', input_shape=(5,1)))
|
||||||
|
model.add(Dropout(0.1))
|
||||||
|
model.add(Dense(1))
|
||||||
|
model.summary()
|
||||||
|
model.compile(loss='mse', optimizer='adam')
|
||||||
|
|
||||||
|
model.fit(x_train, y_train, batch_size=128, epochs=1500)
|
||||||
|
model.save("./predict_pay")
|
||||||
|
|
||||||
|
p_data = model.predict(tx_train)
|
||||||
|
for i in range(len(p_data)):
|
||||||
|
print((p_data[i][0] - ty_train[i]) / ty_train[i], p_data[i][0], ty_train[i])
|
||||||
|
# print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i])
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user