init. add the code of py

This commit is contained in:
eson 2021-03-19 17:14:35 +08:00
commit 68d7c5d2c9
7 changed files with 438 additions and 0 deletions

184
data.py Normal file
View File

@ -0,0 +1,184 @@
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding
from keras.layers import InputLayer
from keras.layers import LSTM
from keras import backend
import pymysql
import pickle
import os
import numpy
def get_collect():
collect = {}
loadfile = "./collect.pickle"
try:
collect = pickle.load(open(loadfile, 'rb'))
except Exception as e:
print(e)
# 打开数据库连接
db = pymysql.connect(host="sg-board1.livenono.com", port=3306,user="root",passwd="Nono-databoard",db="databoard",charset="utf8")
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
# 使用 execute() 方法执行 SQL 查询
print(cursor.execute('''SELECT coin, extra_coins, pay_users, create_at from pay_items_hour pih where region = "all" and platform="all"'''))
collect_pay = {}
for row in cursor.fetchall():
# print(row)
coin, extra_coins, pay_users, create_at = row
d = str(create_at.date())
if d in collect_pay:
collect_pay[d].append(row)
else:
collect_pay[d] = [ row ]
# print(dir(create_at), create_at.timestamp(), create_at.date())
print('共查找出', cursor.rowcount, '条数据')
deletelist = []
for k in collect_pay:
if len(collect_pay[k]) != 24:
deletelist.append(k)
for k in deletelist:
del collect_pay[k]
querydate= []
for k in collect_pay:
querydate.append(k)
querydate.sort()
cursor.execute(
'''SELECT coin, users, create_at from gift_items_hour pih where region = "all" and create_at >= %s and create_at <= %s''',
(querydate[0], querydate[-1]),
)
collect_gift = {}
for row in cursor.fetchall():
coin, users, create_at = row
d = str(create_at.date())
if d in collect_gift:
collect_gift[d].append(row)
else:
collect_gift[d] = [ row ]
for k in collect_pay:
l = collect_pay[k]
l.sort(key=lambda x:x[3])
for k in collect_gift:
l = collect_gift[k]
l.sort(key=lambda x:x[2])
collect["pay"] = collect_pay
collect["gift"] = collect_gift
pickle.dump(collect, open(loadfile, 'wb+'))
finally:
return collect
def load_pay_data(textNum = 80):
collect = get_collect()
# TODO: 处理gift pay的波动关系
x_train = []
y_train = []
collect_pay = []
for k in collect["pay"]:
collect_pay.append(collect["pay"][k])
collect_pay.sort(key=lambda x:x[0][3])
lastday_v = collect_pay[0]
for cur_v in collect_pay[1:]:
total_coin = 0
last_total_coin = 0
count = 0
for v1, v2 in zip(cur_v,lastday_v):
total_coin += v1[0] + v1[1]
# print(v1[3])
last_total_coin += v2[0] + v2[1]
# print(v2[3])
compare = float(total_coin - last_total_coin) / float(last_total_coin)
# print(compare)
x_train.append([count, total_coin , (total_coin - last_total_coin) , v1[2] , v2[2]])
count+=1
for i in range(count):
y_train.append(total_coin)
lastday_v = cur_v
x_train = numpy.reshape(x_train, (len(x_train) , 5, 1))
y_train = numpy.reshape(y_train, (len(y_train)))
# max_features = 1024
tx_train = x_train[len(x_train) - textNum:]
ty_train = y_train[len(y_train) - textNum:]
x_train = x_train[:len(x_train) - textNum]
y_train = y_train[:len(y_train) - textNum]
return x_train, y_train, tx_train, ty_train
def load_gift_data(textNum = 80):
collect = get_collect()
x_train = []
y_train = []
collect_gift = []
for k in collect["gift"]:
collect_gift.append(collect["gift"][k])
collect_gift.sort(key=lambda x:x[0][2])
lastday_v = collect_gift[0]
for cur_v in collect_gift[1:]:
total_coin = 0
last_total_coin = 0
users = 0
f = 20000000.0
count = 0
for v1, v2 in zip(cur_v,lastday_v):
total_coin += v1[0]
# print(v1[3])
last_total_coin += v2[0]
users += v1[1]
# print(v2[3])
compare = float(total_coin - last_total_coin) / float(last_total_coin)
# print(compare)
x_train.append([count, total_coin, compare, users ])
count+=1
for i in range(count):
y_train.append(total_coin)
lastday_v = cur_v
x_train = numpy.reshape(x_train, (len(x_train) , 4, 1))
y_train = numpy.reshape(y_train, (len(y_train)))
# max_features = 1024
tx_train = x_train[len(x_train) - textNum:]
ty_train = y_train[len(y_train) - textNum:]
x_train = x_train[:len(x_train) - textNum]
y_train = y_train[:len(y_train) - textNum]
return x_train, y_train, tx_train, ty_train

1
example.py Normal file
View File

@ -0,0 +1 @@

31
predict.py Normal file
View File

@ -0,0 +1,31 @@
import numpy
from keras.models import load_model
from data import load_pay_data, load_gift_data
# x_train, y_train, tx_train, ty_train = load_pay_data(160)
# model = load_model("./predict_pay")
# p_data = model.predict(tx_train)
# for i in range(len(p_data)):
# comp = (p_data[i][0] - ty_train[i]) / ty_train[i]
# print(comp, p_data[i][0], ty_train[i])
# if abs(comp) >= 1:
# print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i])
x_train, y_train, tx_train, ty_train = load_gift_data(160)
model = load_model("./predict_gift")
p_data = model.predict(tx_train)
for i in range(len(p_data)):
comp = (p_data[i][0] - ty_train[i]) / ty_train[i]
print(comp, p_data[i][0], ty_train[i])
if abs(comp) >= 0.1:
print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i])
# data = numpy.reshape([[15, 2359688 / 10000000, 255968 / 1000000, 10 / 10000]],(1, 4, 1))
# print( model.predict(data))

121
testcase1.py Normal file
View File

@ -0,0 +1,121 @@
import csv
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
def api_dataset():
with open('api_access_fix.csv',encoding = 'utf-8-sig') as f:
reader = csv.reader(f)
dataset = []
for item in reader:
try:
dataset.append([int(float(item[2]))])
except:
pass
for i in range(len(dataset)):
if dataset[i][0]<=500 and i < 1440:
dataset[i][0] = int(sum([dataset[i+x*1440][0] for x in range(1,7)])/6)
return np.array(dataset)
# 归一化函数
def sc_fit_transform(nDlist):
# 将所有数据归一化为0-1的范围
sc = MinMaxScaler(feature_range=(0, 1))
dataset_transform = sc.fit_transform(X=nDlist)
# 归一化后的数据
return sc, np.array(dataset_transform)
###############################################################################
# 需要之前60次的访问数据来预测下一次的数据
timestep = 60
# 训练数据的大小
training_num = 8640
# 迭代训练10次
epoch = 10
# 每次取数据数量
batch_size = 100
###############################################################################
listDataset = api_dataset()
# print(listDataset.shape)
# 生成训练集访问数据集
xTrainDataset = listDataset[0:training_num]
# 每次的下次访问次数是训练结果
yTrainDataset = listDataset[1:training_num+1]
# 原始数据归一化
scTrainDataseX, xTrainDataset = sc_fit_transform(xTrainDataset)
scTrainDataseY, yTrainDataset = sc_fit_transform(yTrainDataset)
###############################################################################
# 生成lstm模型需要的训练集数据
xTrain = []
for i in range(timestep, training_num):
xTrain.append(xTrainDataset[i-timestep : i])
xTrain = np.array(xTrain)
# print(xTrain.shape)
yTrain = []
for i in range(timestep, training_num):
yTrain.append(yTrainDataset[i])
yTrain = np.array(yTrain)
# print(yTrain.shape)
###############################################################################
# 构建网络,使用的是序贯模型
model = Sequential()
#return_sequences=True返回的是全部输出LSTM做第一层时需要指定输入shape
model.add(LSTM(units=128, input_shape=[xTrain.shape[1], 1]))
model.add(Dense(1))
# 进行配置
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['accuracy'])
model.fit(x=xTrain, y=yTrain, epochs=epoch, batch_size=batch_size)
model.save('my_model.h5')
###############################################################################
xTestDataset = listDataset[training_num:10080-2]
scTesDatasetX, xTestDataset = sc_fit_transform(xTestDataset)
yTestDataset = listDataset[training_num+1:10080-1]
scTestDataseY, yTestDataset = sc_fit_transform(yTestDataset)
# 生成lstm模型需要的训练集数据
xTest = []
for i in range(timestep, len(xTestDataset)):
xTest.append(xTestDataset[i-timestep : i])
xTest = np.array(xTest)
print(xTest.shape)
yTest = []
for i in range(timestep, len(xTestDataset)):
yTest.append(yTestDataset[i])
# 反归一化
yTest = scTestDataseY.inverse_transform(X= yTest)
print(yTest.shape)
print(yTest)
###############################################################################
# 进行预测
yPredictes = model.predict(x=xTest)
# 反归一化
yPredictes = scTestDataseY.inverse_transform(X=yPredictes)
print(yPredictes.shape)
print(yPredictes)
###############################################################################
#对比结果,绘制数据图表,红色是真实数据,蓝色是预测数据
plt.plot(yTest, color='red', label='Real')
plt.plot(yPredictes, color='blue', label='Predict')
plt.title(label='Prediction')
plt.xlabel(xlabel='Time')
plt.ylabel(ylabel='Api_access_num')
plt.legend()
plt.show()
# 评估标准: mae, rmse, r2_score
mae = mean_absolute_error(yTest, yPredictes)
rmse = mean_squared_error(yTest, yPredictes, squared=False)
r2 = r2_score(yTest, yPredictes)
print(mae, rmse, r2)
# 72.02636248234026 98.38626354602893 0.9791679689516253
# 45.70792188492153 74.77525176850149 0.9880226807229917

21
testcase2.py Normal file
View File

@ -0,0 +1,21 @@
# load and plot dataset
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
from pandas import DataFrame
from pandas import concat
from pandas import read_csv
from pandas import datetime
import pandas as pd
from math import sqrt
import sklearn
from sklearn.metrics import mean_squared_error
# load dataset
def parser(x):
return datetime.strptime('190'+x, '%Y-%m')
series = read_csv('case2.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
# summarize first few rows
print(series.head())
# line plot
series.plot()
pyplot.show()

41
train_gift.py Normal file
View File

@ -0,0 +1,41 @@
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding
from keras.layers import InputLayer, Activation
from keras.layers import LSTM
from keras import backend
import pymysql
import pickle
import os
import numpy
from data import load_gift_data
if __name__ == "__main__":
x_train, y_train, tx_train, ty_train = load_gift_data()
model = Sequential()
units = 256
model.add(LSTM(units, activation='relu', input_shape=(4,1)))
model.add(Dropout(0.3))
model.add(Dense(1))
model.summary()
model.compile(loss='mse', optimizer='adam')
model.fit(x_train, y_train, batch_size=32, epochs=1000)
model.save("./predict_gift")
p_data = model.predict(tx_train)
for i in range(len(p_data)):
comp = (p_data[i][0] - ty_train[i]) / ty_train[i]
print(comp, p_data[i][0], ty_train[i])
if abs(comp) >= 0.2:
print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i])

39
train_pay.py Normal file
View File

@ -0,0 +1,39 @@
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding
from keras.layers import InputLayer
from keras.layers import LSTM
from keras import backend
import pymysql
import pickle
import os
import numpy
from data import load_pay_data
if __name__ == "__main__":
x_train, y_train, tx_train, ty_train = load_pay_data()
model = Sequential()
units = 500
model.add(LSTM(units, activation='relu', input_shape=(5,1)))
model.add(Dropout(0.1))
model.add(Dense(1))
model.summary()
model.compile(loss='mse', optimizer='adam')
model.fit(x_train, y_train, batch_size=128, epochs=1500)
model.save("./predict_pay")
p_data = model.predict(tx_train)
for i in range(len(p_data)):
print((p_data[i][0] - ty_train[i]) / ty_train[i], p_data[i][0], ty_train[i])
# print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i])