predict-coin/data.py

198 lines
5.7 KiB
Python

from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding
from keras.layers import InputLayer
from keras.layers import LSTM
from keras import backend
import pymysql
import pickle
import os
import numpy
import time, datetime
def get_collect():
collect = {}
loadfile = "./collect.pickle"
try:
collect = pickle.load(open(loadfile, 'rb'))
except Exception as e:
print(e)
# 打开数据库连接
db = pymysql.connect(host="sg-board1.livenono.com", port=3306,user="root",passwd="Nono-databoard",db="databoard",charset="utf8")
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
today = time.strftime("%Y-%m-%d", time.localtime())
# 使用 execute() 方法执行 SQL 查询
cursor.execute('''SELECT coin, extra_coins, pay_users, create_at from pay_items_hour pih where region = "all" and platform="all" and create_at <= %s''', today)
collect_pay = {}
for row in cursor.fetchall():
# print(row)
coin, extra_coins, pay_users, create_at = row
d = str(create_at.date())
if d in collect_pay:
collect_pay[d].append(row)
else:
collect_pay[d] = [ row ]
# print(dir(create_at), create_at.timestamp(), create_at.date())
print('共查找出', cursor.rowcount, '条数据')
deletelist = []
for k in collect_pay:
if len(collect_pay[k]) != 24:
deletelist.append(k)
for k in deletelist:
del collect_pay[k]
querydate= []
for k in collect_pay:
querydate.append(k)
querydate.sort()
cursor.execute(
'''SELECT coin, users, create_at from gift_items_hour pih where region = "all" and create_at >= %s and create_at <= %s''',
(querydate[0], querydate[-1]),
)
collect_gift = {}
for row in cursor.fetchall():
coin, users, create_at = row
d = str(create_at.date())
if d in collect_gift:
collect_gift[d].append(row)
else:
collect_gift[d] = [ row ]
for k in collect_pay:
l = collect_pay[k]
l.sort(key=lambda x:x[3])
for k in collect_gift:
l = collect_gift[k]
l.sort(key=lambda x:x[2])
collect["pay"] = collect_pay
collect["gift"] = collect_gift
pickle.dump(collect, open(loadfile, 'wb+'))
finally:
return collect
def load_pay_data(textNum = 80):
collect = get_collect()
# TODO: 处理gift pay的波动关系
x_train = []
y_train = []
collect_pay = []
for k in collect["pay"]:
collect_pay.append(collect["pay"][k])
collect_pay.sort(key=lambda x:x[0][3])
lastday_v = collect_pay[0]
for cur_v in collect_pay[1:]:
total_coin = 0
users = 0
last_total_coin = 0
for v2 in lastday_v:
last_total_coin += v2[0] + v2[1]
count = 0
for v1, v2 in zip(cur_v,lastday_v):
total_coin += v1[0] + v1[1]
users += v1[2]
# print(v1[3])
# last_total_coin += v2[0] + v2[1]
# print(v2[3])
# compare = float(total_coin - last_total_coin) / float(last_total_coin)
# print(compare)
# 时刻. 前一个小时 时刻. 当前支付总币数. 当前支付总币数 昨天币数
x_train.append([count ,total_coin / last_total_coin , total_coin])
count+=1
for i in range(count):
y_train.append(total_coin)
lastday_v = cur_v
input_shape = (len(x_train[0]), 1)
x_train = numpy.reshape(x_train, (len(x_train) , input_shape[0], input_shape[1]))
y_train = numpy.reshape(y_train, (len(y_train)))
# max_features = 1024
tx_train = x_train[len(x_train) - textNum:]
ty_train = y_train[len(y_train) - textNum:]
# x_train = x_train[:len(x_train) - textNum]
# y_train = y_train[:len(y_train) - textNum]
return x_train, y_train, tx_train, ty_train, input_shape
def load_gift_data(textNum = 80):
collect = get_collect()
x_train = []
y_train = []
collect_gift = []
for k in collect["gift"]:
collect_gift.append(collect["gift"][k])
collect_gift.sort(key=lambda x:x[0][2])
lastday_v = collect_gift[0]
for cur_v in collect_gift[1:]:
total_coin = 0
last_total_coin = 0
users = 0
for v2 in lastday_v:
last_total_coin += v2[0]
f = 20000000.0
count = 0
for v1, v2 in zip(cur_v,lastday_v):
total_coin += v1[0]
# print(v1[3])
# last_total_coin += v2[0]
users += v1[1]
# print(v2[3])
# compare = float(total_coin - last_total_coin) / float(last_total_coin)
# print(compare)
# 参数 前一小个小时. 时刻. 当前金钱. 送礼人数
x_train.append([count, total_coin / last_total_coin, total_coin ])
count+=1
for i in range(count):
y_train.append(total_coin)
lastday_v = cur_v
input_shape = (len(x_train[0]), 1)
x_train = numpy.reshape(x_train, (len(x_train) , input_shape[0], input_shape[1]))
y_train = numpy.reshape(y_train, (len(y_train)))
# max_features = 1024
tx_train = x_train[len(x_train) - textNum:]
ty_train = y_train[len(y_train) - textNum:]
# x_train = x_train[:len(x_train) - textNum]
# y_train = y_train[:len(y_train) - textNum]
return x_train, y_train, tx_train, ty_train, input_shape