predict-coin/data.py

274 lines
8.8 KiB
Python

import logging
import traceback
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding
from keras.layers import InputLayer
from keras.layers import LSTM
from keras import backend
import pymysql
import pickle
import os
import numpy
import time, datetime
regions = [
"all",
"Region_Arab",
"Region_China",
"Region_English",
"Region_Germany",
"Region_India",
"Region_Indonesia",
"Region_Japan",
"Region_Philippines",
"Region_Portuguese",
"Region_Russian",
"Region_Spanish",
"Region_Thailand",
"Region_Turkey",
"Region_Vietnam",
]
def get_collect():
collect = {}
loadfile = "./collect.pickle"
try:
collect = pickle.load(open(loadfile, 'rb'))
except Exception as e:
print(e)
try:
# 打开数据库连接
db = pymysql.connect(host="sg-board1.livenono.com", port=3306,user="root",passwd="Nono-databoard",db="databoard",charset="utf8")
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
today = time.strftime("%Y-%m-%d", time.localtime())
for region in regions:
# 使用 execute() 方法执行 SQL 查询
cursor.execute('''SELECT coin, extra_coins, pay_users, create_at from pay_items_hour pih where region = %s and country = "all" and platform="all" and create_at >= "2021-02-23" and create_at <= %s''',(region , today))
collect_pay = []
for row in cursor.fetchall():
# print(row)
coin, extra_coins, pay_users, create_at = row
rowlist = [coin, extra_coins, pay_users, create_at.hour, create_at]
# print(dir(create_at), create_at.hour)
collect_pay.append(rowlist)
# d = str(create_at.date())
# if d in collect_pay:
# collect_pay.append(row)
# else:
# collect_pay[d] = [ row ]
# print(dir(create_at), create_at.timestamp(), create_at.date())
print('共查找出', cursor.rowcount, '条数据')
# if cursor.rowcount <= 500:
# collect["pay-" + region] = None
# collect["gift-" + region] = None
# continue
# deletelist = []
# for k in collect_pay:
# if len(collect_pay[k]) != 24:
# deletelist.append(k)
# for k in deletelist:
# del collect_pay[k]
# querydate= []
# for k in collect_pay:
# querydate.append(k)
# querydate.sort()
cursor.execute(
'''SELECT coin, users, create_at from gift_items_hour pih where region = %s and country = "all" and create_at >= "2021-02-23" and create_at <= %s''',
(region, today),
)
collect_gift = []
for row in cursor.fetchall():
coin, users, create_at = row
rowlist = [coin, users, create_at.hour, create_at]
collect_gift.append(rowlist)
# d = str(create_at.date())
# if d in collect_gift:
# collect_gift[d].append(row)
# else:
# collect_gift[d] = [ row ]
collect_pay.sort(key=lambda x:x[-1])
collect_gift.sort(key=lambda x:x[-1])
# for k in collect_gift:
# l = collect_gift[k]
# l.sort(key=lambda x:x[2])
yesterday = {}
for v in collect_pay:
print(v[-1])
date = (v[-1].date() - datetime.timedelta(days=1)).__str__()
print(date)
if date not in yesterday:
cursor.execute(
'''SELECT coin, extra_coins, pay_users, create_at from pay_items_day pid where region = %s and country = "all" and platform="all" and create_at = %s''',
(region , date),
)
row = cursor.fetchone()
coin, extra_coins, pay_users, create_at = row
yesterday[date] = coin + extra_coins
v.insert(-2, yesterday[date])
yesterday = {}
for v in collect_gift:
print(v[-1])
date = (v[-1].date() - datetime.timedelta(days=1)).__str__()
print(date)
if date not in yesterday:
cursor.execute(
'''SELECT coin, users, create_at from gift_items_day where region = %s and country = "all" and create_at = %s''',
(region , date),
)
row = cursor.fetchone()
coin, users, create_at = row
yesterday[date] = coin
v.insert(-2, yesterday[date])
collect["pay-" + region] = collect_pay
collect["gift-" + region] = collect_gift
except Exception as e:
# print(e)
logging.error(traceback.format_exc())
pickle.dump(collect, open(loadfile, 'wb+'))
finally:
return collect
def load_pay_data(textNum = 80, region = "all"):
collect = get_collect()
# TODO: 处理gift pay的波动关系
x_train = []
y_train = []
rkey = "pay-" + region
collect_pay = collect[rkey]
# for k in collect[rkey]:
# collect_pay.append(collect[rkey][k])
# collect_pay.sort(key=lambda x:x[0][3])
lastday_v = collect_pay[0]
for cur_v in collect_pay[1:]:
total_coin = 0
users = 0
last_total_coin = 0
count = 0
for v1, v2 in zip(cur_v,lastday_v):
total_coin += v1[0] + v1[1]
users += v1[2]
# print(v1[3])
# last_total_coin += v2[0] + v2[1]
# print(v2[3])
# compare = float(total_coin - last_total_coin) / float(last_total_coin)
# print(compare)
# 时刻. 前一个小时 时刻. 当前支付总币数. 当前支付总币数 昨天币数
x_train.append([v1[-2] ,total_coin / v1[-3] , total_coin])
count+=1
for i in range(count):
y_train.append(total_coin)
lastday_v = cur_v
input_shape = (len(x_train[0]), 1)
x_train = numpy.reshape(x_train, (len(x_train) , input_shape[0], input_shape[1]))
y_train = numpy.reshape(y_train, (len(y_train)))
# max_features = 1024
tx_train = x_train[len(x_train) - textNum:]
ty_train = y_train[len(y_train) - textNum:]
# x_train = x_train[:len(x_train) - textNum]
# y_train = y_train[:len(y_train) - textNum]
return x_train, y_train, tx_train, ty_train, input_shape
def load_gift_data(textNum = 80, region = "all"):
collect = get_collect()
x_train = []
y_train = []
rkey = "gift-" + region
collect_gift = []
for k in collect[rkey]:
collect_gift.append(collect[rkey][k])
collect_gift.sort(key=lambda x:x[0][2])
lastday_v = collect_gift[0]
for cur_v in collect_gift[1:]:
total_coin = 0
last_total_coin = 0
users = 0
for v2 in lastday_v:
last_total_coin += v2[0]
f = 20000000.0
count = 0
for v1, v2 in zip(cur_v,lastday_v):
total_coin += v1[0]
# print(v1[3])
# last_total_coin += v2[0]
users += v1[1]
# print(v2[3])
# compare = float(total_coin - last_total_coin) / float(last_total_coin)
# print(compare)
# 参数 前一小个小时. 时刻. 当前金钱. 送礼人数
x_train.append([count, total_coin / last_total_coin, total_coin ])
count+=1
for i in range(count):
y_train.append(total_coin)
lastday_v = cur_v
input_shape = (len(x_train[0]), 1)
x_train = numpy.reshape(x_train, (len(x_train) , input_shape[0], input_shape[1]))
y_train = numpy.reshape(y_train, (len(y_train)))
# max_features = 1024
tx_train = x_train[len(x_train) - textNum:]
ty_train = y_train[len(y_train) - textNum:]
# x_train = x_train[:len(x_train) - textNum]
# y_train = y_train[:len(y_train) - textNum]
return x_train, y_train, tx_train, ty_train, input_shape