From 68d7c5d2c9a2f197a4240370297107635f2be201 Mon Sep 17 00:00:00 2001 From: eson <474420502@qq.com> Date: Fri, 19 Mar 2021 17:14:35 +0800 Subject: [PATCH] init. add the code of py --- data.py | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++ example.py | 1 + predict.py | 31 +++++++++ testcase1.py | 121 +++++++++++++++++++++++++++++++++ testcase2.py | 21 ++++++ train_gift.py | 41 +++++++++++ train_pay.py | 39 +++++++++++ 7 files changed, 438 insertions(+) create mode 100644 data.py create mode 100644 example.py create mode 100644 predict.py create mode 100644 testcase1.py create mode 100644 testcase2.py create mode 100644 train_gift.py create mode 100644 train_pay.py diff --git a/data.py b/data.py new file mode 100644 index 0000000..8585862 --- /dev/null +++ b/data.py @@ -0,0 +1,184 @@ +from keras.models import Sequential +from keras.layers import Dense, Dropout, Embedding +from keras.layers import InputLayer +from keras.layers import LSTM +from keras import backend + +import pymysql +import pickle +import os +import numpy + +def get_collect(): + collect = {} + loadfile = "./collect.pickle" + + try: + collect = pickle.load(open(loadfile, 'rb')) + except Exception as e: + print(e) + # 打开数据库连接 + db = pymysql.connect(host="sg-board1.livenono.com", port=3306,user="root",passwd="Nono-databoard",db="databoard",charset="utf8") + + # 使用 cursor() 方法创建一个游标对象 cursor + cursor = db.cursor() + + # 使用 execute() 方法执行 SQL 查询 + print(cursor.execute('''SELECT coin, extra_coins, pay_users, create_at from pay_items_hour pih where region = "all" and platform="all"''')) + collect_pay = {} + for row in cursor.fetchall(): + # print(row) + coin, extra_coins, pay_users, create_at = row + d = str(create_at.date()) + if d in collect_pay: + collect_pay[d].append(row) + else: + collect_pay[d] = [ row ] + # print(dir(create_at), create_at.timestamp(), create_at.date()) + print('共查找出', cursor.rowcount, '条数据') + deletelist = [] + for k in collect_pay: + if len(collect_pay[k]) != 24: + deletelist.append(k) + + for k in deletelist: + del collect_pay[k] + + querydate= [] + for k in collect_pay: + querydate.append(k) + + querydate.sort() + cursor.execute( + '''SELECT coin, users, create_at from gift_items_hour pih where region = "all" and create_at >= %s and create_at <= %s''', + (querydate[0], querydate[-1]), + ) + + collect_gift = {} + for row in cursor.fetchall(): + + coin, users, create_at = row + d = str(create_at.date()) + if d in collect_gift: + collect_gift[d].append(row) + else: + collect_gift[d] = [ row ] + + for k in collect_pay: + l = collect_pay[k] + l.sort(key=lambda x:x[3]) + + for k in collect_gift: + l = collect_gift[k] + l.sort(key=lambda x:x[2]) + + collect["pay"] = collect_pay + collect["gift"] = collect_gift + + pickle.dump(collect, open(loadfile, 'wb+')) + finally: + return collect + + + +def load_pay_data(textNum = 80): + + collect = get_collect() + + # TODO: 处理gift pay的波动关系 + + x_train = [] + y_train = [] + + collect_pay = [] + for k in collect["pay"]: + collect_pay.append(collect["pay"][k]) + + collect_pay.sort(key=lambda x:x[0][3]) + lastday_v = collect_pay[0] + for cur_v in collect_pay[1:]: + + total_coin = 0 + last_total_coin = 0 + + count = 0 + for v1, v2 in zip(cur_v,lastday_v): + total_coin += v1[0] + v1[1] + # print(v1[3]) + + last_total_coin += v2[0] + v2[1] + # print(v2[3]) + compare = float(total_coin - last_total_coin) / float(last_total_coin) + # print(compare) + + x_train.append([count, total_coin , (total_coin - last_total_coin) , v1[2] , v2[2]]) + count+=1 + + for i in range(count): + y_train.append(total_coin) + + lastday_v = cur_v + + x_train = numpy.reshape(x_train, (len(x_train) , 5, 1)) + y_train = numpy.reshape(y_train, (len(y_train))) + # max_features = 1024 + + tx_train = x_train[len(x_train) - textNum:] + ty_train = y_train[len(y_train) - textNum:] + + x_train = x_train[:len(x_train) - textNum] + y_train = y_train[:len(y_train) - textNum] + + return x_train, y_train, tx_train, ty_train + +def load_gift_data(textNum = 80): + + collect = get_collect() + + x_train = [] + y_train = [] + + collect_gift = [] + for k in collect["gift"]: + collect_gift.append(collect["gift"][k]) + + collect_gift.sort(key=lambda x:x[0][2]) + lastday_v = collect_gift[0] + for cur_v in collect_gift[1:]: + + total_coin = 0 + last_total_coin = 0 + users = 0 + + f = 20000000.0 + count = 0 + for v1, v2 in zip(cur_v,lastday_v): + total_coin += v1[0] + # print(v1[3]) + + last_total_coin += v2[0] + users += v1[1] + + # print(v2[3]) + compare = float(total_coin - last_total_coin) / float(last_total_coin) + # print(compare) + + x_train.append([count, total_coin, compare, users ]) + count+=1 + + for i in range(count): + y_train.append(total_coin) + + lastday_v = cur_v + + x_train = numpy.reshape(x_train, (len(x_train) , 4, 1)) + y_train = numpy.reshape(y_train, (len(y_train))) + # max_features = 1024 + + tx_train = x_train[len(x_train) - textNum:] + ty_train = y_train[len(y_train) - textNum:] + + x_train = x_train[:len(x_train) - textNum] + y_train = y_train[:len(y_train) - textNum] + + return x_train, y_train, tx_train, ty_train \ No newline at end of file diff --git a/example.py b/example.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/example.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..0c18b6c --- /dev/null +++ b/predict.py @@ -0,0 +1,31 @@ +import numpy +from keras.models import load_model +from data import load_pay_data, load_gift_data + +# x_train, y_train, tx_train, ty_train = load_pay_data(160) +# model = load_model("./predict_pay") + +# p_data = model.predict(tx_train) +# for i in range(len(p_data)): +# comp = (p_data[i][0] - ty_train[i]) / ty_train[i] +# print(comp, p_data[i][0], ty_train[i]) +# if abs(comp) >= 1: +# print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i]) + + + +x_train, y_train, tx_train, ty_train = load_gift_data(160) +model = load_model("./predict_gift") +p_data = model.predict(tx_train) +for i in range(len(p_data)): + comp = (p_data[i][0] - ty_train[i]) / ty_train[i] + print(comp, p_data[i][0], ty_train[i]) + if abs(comp) >= 0.1: + print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i]) + +# data = numpy.reshape([[15, 2359688 / 10000000, 255968 / 1000000, 10 / 10000]],(1, 4, 1)) +# print( model.predict(data)) + + + + \ No newline at end of file diff --git a/testcase1.py b/testcase1.py new file mode 100644 index 0000000..9117401 --- /dev/null +++ b/testcase1.py @@ -0,0 +1,121 @@ +import csv +import numpy as np +import matplotlib.pyplot as plt +from sklearn.preprocessing import MinMaxScaler +from keras.models import Sequential +from keras.layers import Dense, LSTM +from sklearn.metrics import mean_absolute_error +from sklearn.metrics import mean_squared_error +from sklearn.metrics import r2_score + +def api_dataset(): + with open('api_access_fix.csv',encoding = 'utf-8-sig') as f: + reader = csv.reader(f) + dataset = [] + for item in reader: + try: + dataset.append([int(float(item[2]))]) + except: + pass + for i in range(len(dataset)): + if dataset[i][0]<=500 and i < 1440: + dataset[i][0] = int(sum([dataset[i+x*1440][0] for x in range(1,7)])/6) + return np.array(dataset) + +# 归一化函数 +def sc_fit_transform(nDlist): + # 将所有数据归一化为0-1的范围 + sc = MinMaxScaler(feature_range=(0, 1)) + dataset_transform = sc.fit_transform(X=nDlist) + # 归一化后的数据 + return sc, np.array(dataset_transform) + +############################################################################### +# 需要之前60次的访问数据来预测下一次的数据, +timestep = 60 +# 训练数据的大小 +training_num = 8640 +# 迭代训练10次 +epoch = 10 +# 每次取数据数量 +batch_size = 100 +############################################################################### +listDataset = api_dataset() +# print(listDataset.shape) +# 生成训练集访问数据集 +xTrainDataset = listDataset[0:training_num] +# 每次的下次访问次数是训练结果 +yTrainDataset = listDataset[1:training_num+1] + +# 原始数据归一化 +scTrainDataseX, xTrainDataset = sc_fit_transform(xTrainDataset) +scTrainDataseY, yTrainDataset = sc_fit_transform(yTrainDataset) + +############################################################################### +# 生成lstm模型需要的训练集数据 +xTrain = [] +for i in range(timestep, training_num): + xTrain.append(xTrainDataset[i-timestep : i]) +xTrain = np.array(xTrain) +# print(xTrain.shape) + +yTrain = [] +for i in range(timestep, training_num): + yTrain.append(yTrainDataset[i]) +yTrain = np.array(yTrain) +# print(yTrain.shape) +############################################################################### +# 构建网络,使用的是序贯模型 +model = Sequential() +#return_sequences=True返回的是全部输出,LSTM做第一层时,需要指定输入shape +model.add(LSTM(units=128, input_shape=[xTrain.shape[1], 1])) +model.add(Dense(1)) +# 进行配置 +model.compile(optimizer='adam', + loss='mean_squared_error', + metrics=['accuracy']) +model.fit(x=xTrain, y=yTrain, epochs=epoch, batch_size=batch_size) +model.save('my_model.h5') +############################################################################### +xTestDataset = listDataset[training_num:10080-2] +scTesDatasetX, xTestDataset = sc_fit_transform(xTestDataset) + +yTestDataset = listDataset[training_num+1:10080-1] +scTestDataseY, yTestDataset = sc_fit_transform(yTestDataset) +# 生成lstm模型需要的训练集数据 +xTest = [] +for i in range(timestep, len(xTestDataset)): + xTest.append(xTestDataset[i-timestep : i]) +xTest = np.array(xTest) +print(xTest.shape) +yTest = [] +for i in range(timestep, len(xTestDataset)): + yTest.append(yTestDataset[i]) +# 反归一化 +yTest = scTestDataseY.inverse_transform(X= yTest) +print(yTest.shape) +print(yTest) +############################################################################### +# 进行预测 +yPredictes = model.predict(x=xTest) +# 反归一化 +yPredictes = scTestDataseY.inverse_transform(X=yPredictes) +print(yPredictes.shape) +print(yPredictes) +############################################################################### +#对比结果,绘制数据图表,红色是真实数据,蓝色是预测数据 +plt.plot(yTest, color='red', label='Real') +plt.plot(yPredictes, color='blue', label='Predict') +plt.title(label='Prediction') +plt.xlabel(xlabel='Time') +plt.ylabel(ylabel='Api_access_num') +plt.legend() +plt.show() + +# 评估标准: mae, rmse, r2_score +mae = mean_absolute_error(yTest, yPredictes) +rmse = mean_squared_error(yTest, yPredictes, squared=False) +r2 = r2_score(yTest, yPredictes) +print(mae, rmse, r2) +# 72.02636248234026 98.38626354602893 0.9791679689516253 +# 45.70792188492153 74.77525176850149 0.9880226807229917 \ No newline at end of file diff --git a/testcase2.py b/testcase2.py new file mode 100644 index 0000000..754ad3e --- /dev/null +++ b/testcase2.py @@ -0,0 +1,21 @@ +# load and plot dataset +from pandas import read_csv +from pandas import datetime +from matplotlib import pyplot +from pandas import DataFrame +from pandas import concat +from pandas import read_csv +from pandas import datetime +import pandas as pd +from math import sqrt +import sklearn +from sklearn.metrics import mean_squared_error +# load dataset +def parser(x): + return datetime.strptime('190'+x, '%Y-%m') +series = read_csv('case2.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser) +# summarize first few rows +print(series.head()) +# line plot +series.plot() +pyplot.show() \ No newline at end of file diff --git a/train_gift.py b/train_gift.py new file mode 100644 index 0000000..16cdeb5 --- /dev/null +++ b/train_gift.py @@ -0,0 +1,41 @@ + + +from keras.models import Sequential +from keras.layers import Dense, Dropout, Embedding +from keras.layers import InputLayer, Activation +from keras.layers import LSTM +from keras import backend + +import pymysql +import pickle +import os +import numpy + +from data import load_gift_data + +if __name__ == "__main__": + + x_train, y_train, tx_train, ty_train = load_gift_data() + + model = Sequential() + units = 256 + + model.add(LSTM(units, activation='relu', input_shape=(4,1))) + model.add(Dropout(0.3)) + model.add(Dense(1)) + model.summary() + + model.compile(loss='mse', optimizer='adam') + + model.fit(x_train, y_train, batch_size=32, epochs=1000) + model.save("./predict_gift") + + p_data = model.predict(tx_train) + for i in range(len(p_data)): + comp = (p_data[i][0] - ty_train[i]) / ty_train[i] + print(comp, p_data[i][0], ty_train[i]) + if abs(comp) >= 0.2: + print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i]) + + + \ No newline at end of file diff --git a/train_pay.py b/train_pay.py new file mode 100644 index 0000000..20d3a78 --- /dev/null +++ b/train_pay.py @@ -0,0 +1,39 @@ + + +from keras.models import Sequential +from keras.layers import Dense, Dropout, Embedding +from keras.layers import InputLayer +from keras.layers import LSTM +from keras import backend + +import pymysql +import pickle +import os +import numpy + +from data import load_pay_data + + +if __name__ == "__main__": + + x_train, y_train, tx_train, ty_train = load_pay_data() + + + model = Sequential() + units = 500 + model.add(LSTM(units, activation='relu', input_shape=(5,1))) + model.add(Dropout(0.1)) + model.add(Dense(1)) + model.summary() + model.compile(loss='mse', optimizer='adam') + + model.fit(x_train, y_train, batch_size=128, epochs=1500) + model.save("./predict_pay") + + p_data = model.predict(tx_train) + for i in range(len(p_data)): + print((p_data[i][0] - ty_train[i]) / ty_train[i], p_data[i][0], ty_train[i]) + # print("测结果:", p_data[i][0], "测:", tx_train[i], "真实:", ty_train[i]) + + + \ No newline at end of file