predict-coin/data.py

192 lines
5.3 KiB
Python
Raw Normal View History

2021-03-19 09:14:35 +00:00
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding
from keras.layers import InputLayer
from keras.layers import LSTM
from keras import backend
import pymysql
import pickle
import os
import numpy
2021-03-23 10:12:32 +00:00
import time, datetime
2021-03-19 09:14:35 +00:00
def get_collect():
collect = {}
loadfile = "./collect.pickle"
try:
collect = pickle.load(open(loadfile, 'rb'))
except Exception as e:
print(e)
# 打开数据库连接
db = pymysql.connect(host="sg-board1.livenono.com", port=3306,user="root",passwd="Nono-databoard",db="databoard",charset="utf8")
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
2021-03-23 10:12:32 +00:00
today = time.strftime("%Y-%m-%d", time.localtime())
2021-03-19 09:14:35 +00:00
# 使用 execute() 方法执行 SQL 查询
2021-03-23 10:12:32 +00:00
cursor.execute('''SELECT coin, extra_coins, pay_users, create_at from pay_items_hour pih where region = "all" and platform="all" and create_at <= %s''', today)
2021-03-19 09:14:35 +00:00
collect_pay = {}
for row in cursor.fetchall():
# print(row)
coin, extra_coins, pay_users, create_at = row
d = str(create_at.date())
if d in collect_pay:
collect_pay[d].append(row)
else:
collect_pay[d] = [ row ]
# print(dir(create_at), create_at.timestamp(), create_at.date())
print('共查找出', cursor.rowcount, '条数据')
deletelist = []
for k in collect_pay:
if len(collect_pay[k]) != 24:
deletelist.append(k)
for k in deletelist:
del collect_pay[k]
querydate= []
for k in collect_pay:
querydate.append(k)
querydate.sort()
cursor.execute(
'''SELECT coin, users, create_at from gift_items_hour pih where region = "all" and create_at >= %s and create_at <= %s''',
(querydate[0], querydate[-1]),
)
collect_gift = {}
for row in cursor.fetchall():
coin, users, create_at = row
d = str(create_at.date())
if d in collect_gift:
collect_gift[d].append(row)
else:
collect_gift[d] = [ row ]
for k in collect_pay:
l = collect_pay[k]
l.sort(key=lambda x:x[3])
for k in collect_gift:
l = collect_gift[k]
l.sort(key=lambda x:x[2])
collect["pay"] = collect_pay
collect["gift"] = collect_gift
pickle.dump(collect, open(loadfile, 'wb+'))
finally:
return collect
def load_pay_data(textNum = 80):
collect = get_collect()
2021-03-23 10:12:32 +00:00
2021-03-19 09:14:35 +00:00
# TODO: 处理gift pay的波动关系
x_train = []
y_train = []
collect_pay = []
for k in collect["pay"]:
collect_pay.append(collect["pay"][k])
collect_pay.sort(key=lambda x:x[0][3])
lastday_v = collect_pay[0]
for cur_v in collect_pay[1:]:
total_coin = 0
last_total_coin = 0
2021-03-23 10:12:32 +00:00
for v2 in lastday_v:
last_total_coin += v2[0] + v2[1]
2021-03-19 09:14:35 +00:00
count = 0
for v1, v2 in zip(cur_v,lastday_v):
total_coin += v1[0] + v1[1]
# print(v1[3])
2021-03-23 10:12:32 +00:00
# last_total_coin += v2[0] + v2[1]
2021-03-19 09:14:35 +00:00
# print(v2[3])
2021-03-23 10:12:32 +00:00
# compare = float(total_coin - last_total_coin) / float(last_total_coin)
2021-03-19 09:14:35 +00:00
# print(compare)
2021-03-23 10:12:32 +00:00
x_train.append([count, total_coin, total_coin/last_total_coin])
2021-03-19 09:14:35 +00:00
count+=1
for i in range(count):
y_train.append(total_coin)
lastday_v = cur_v
2021-03-23 10:12:32 +00:00
x_train = numpy.reshape(x_train, (len(x_train) , 3, 1))
2021-03-19 09:14:35 +00:00
y_train = numpy.reshape(y_train, (len(y_train)))
# max_features = 1024
tx_train = x_train[len(x_train) - textNum:]
ty_train = y_train[len(y_train) - textNum:]
2021-03-23 10:12:32 +00:00
# x_train = x_train[:len(x_train) - textNum]
# y_train = y_train[:len(y_train) - textNum]
2021-03-19 09:14:35 +00:00
return x_train, y_train, tx_train, ty_train
def load_gift_data(textNum = 80):
collect = get_collect()
x_train = []
y_train = []
collect_gift = []
for k in collect["gift"]:
collect_gift.append(collect["gift"][k])
collect_gift.sort(key=lambda x:x[0][2])
lastday_v = collect_gift[0]
for cur_v in collect_gift[1:]:
total_coin = 0
last_total_coin = 0
users = 0
2021-03-23 10:12:32 +00:00
for v2 in lastday_v:
last_total_coin += v2[0]
2021-03-19 09:14:35 +00:00
f = 20000000.0
2021-03-23 10:12:32 +00:00
count = 1
2021-03-19 09:14:35 +00:00
for v1, v2 in zip(cur_v,lastday_v):
total_coin += v1[0]
# print(v1[3])
2021-03-23 10:12:32 +00:00
# last_total_coin += v2[0]
2021-03-19 09:14:35 +00:00
users += v1[1]
# print(v2[3])
2021-03-23 10:12:32 +00:00
# compare = float(total_coin - last_total_coin) / float(last_total_coin)
2021-03-19 09:14:35 +00:00
# print(compare)
2021-03-23 10:12:32 +00:00
x_train.append([count, total_coin, total_coin / last_total_coin, users ])
2021-03-19 09:14:35 +00:00
count+=1
2021-03-23 10:12:32 +00:00
for i in range(count - 1):
2021-03-19 09:14:35 +00:00
y_train.append(total_coin)
lastday_v = cur_v
x_train = numpy.reshape(x_train, (len(x_train) , 4, 1))
y_train = numpy.reshape(y_train, (len(y_train)))
# max_features = 1024
tx_train = x_train[len(x_train) - textNum:]
ty_train = y_train[len(y_train) - textNum:]
2021-03-23 10:12:32 +00:00
# x_train = x_train[:len(x_train) - textNum]
# y_train = y_train[:len(y_train) - textNum]
2021-03-19 09:14:35 +00:00
return x_train, y_train, tx_train, ty_train