Compare commits

..

No commits in common. "master" and "feature/autostore" have entirely different histories.

28 changed files with 380 additions and 3353 deletions

View File

@ -34,10 +34,9 @@ type Table struct {
name string
setting interface{}
updatesql string
selectsql string
insertsql string
duplicatesql string
updatesql string
selectsql string
insertsql string
}
func NewStore(uri string) *Store {
@ -55,7 +54,6 @@ func (store *Store) Table(name string) *Table {
table.name = name
table.insertsql = `INSERT INTO ` + table.name + `(%s) values(%s)`
table.duplicatesql = `INSERT INTO ` + table.name + `(%s) values(%s) ON DUPLICATE KEY UPDATE %s`
table.updatesql = `UPDATE ` + table.name + ` SET %s WHERE %s = ?`
table.selectsql = `SELECT %s FROM ` + table.name + ` WHERE %s `
return table
@ -145,7 +143,7 @@ func (queue *Queue) Pop() (result interface{}, err error) {
rows, err := tx.Query(selectsql, queue.cond.CondArgs...)
if err != nil {
return nil, fmt.Errorf("table: %s queue is empty. %s", queue.table.name, err.Error())
return nil, fmt.Errorf("table: %s queue is empty", queue.table.name)
}
var fields = make([]interface{}, len(queue.fieldIndex))
@ -228,83 +226,6 @@ func (t *Table) Insert(obj interface{}) error {
return err
}
// DUpdate ON DUPLICATE KEY UPDATE struct. Field对应的tag field字段
type DUpdate struct {
Field string // selected 字段
Value interface{}
}
// InsertOrUpdate nil 不插入. 不支持嵌套. 必须是Ptr类型
func (t *Table) InsertOrUpdate(obj interface{}, updates ...DUpdate) error {
ov := reflect.ValueOf(obj).Elem()
ot := reflect.TypeOf(obj)
fieldsql := ""
argssql := ""
var SourceUpdate []*DUpdate
var OtherUpdate []*DUpdate
for _, u := range updates {
if u.Value == nil {
SourceUpdate = append(SourceUpdate, &u)
} else {
OtherUpdate = append(OtherUpdate, &u)
}
}
var args []interface{}
for i := 0; i < ov.NumField(); i++ {
field := ov.Field(i)
ftype := ot.Elem().Field(i)
if fname, ok := ftype.Tag.Lookup("field"); ok {
// if flag, ok := ftype.Tag.Lookup("uid"); ok {
// if flag == "auto" {
// continue
// }
// }
k := ftype.Type.Kind()
if k == reflect.Ptr || k == reflect.Interface {
if !field.IsNil() {
felem := field.Elem()
args = append(args, felem.Interface())
fieldsql += fname + ","
argssql += "?,"
}
} else {
args = append(args, field.Interface())
fieldsql += fname + ","
argssql += "?,"
}
for _, u := range SourceUpdate {
if u.Field == fname {
u.Value = args[len(args)-1]
break
}
}
}
}
var duplicateSet string = ""
for _, u := range SourceUpdate {
duplicateSet += u.Field + " = ?,"
args = append(args, u.Value)
}
for _, u := range OtherUpdate {
duplicateSet += u.Field + " = ?,"
args = append(args, u.Value)
}
ssql := fmt.Sprintf(t.duplicatesql, fieldsql[:len(fieldsql)-1], argssql[:len(argssql)-1], duplicateSet[:len(duplicateSet)-1])
_, err := t.store.db.Exec(ssql, args...)
return err
}
// InsertRetAutoID nil 不插入. 不支持嵌套. 并返回auto uid
func (t *Table) InsertRetAutoID(obj interface{}) (int64, error) {
ov := reflect.ValueOf(obj).Elem()
@ -427,10 +348,10 @@ func (t *Table) UpdateError(obj interface{}, err error) {
}
}
_, dberr := t.store.db.Exec("update "+t.name+" set operator = ?, error_msg = ? where "+uidname+" = ?", 10000, sql.NullString{String: err.Error(), Valid: true}, uidvalue)
_, dberr := t.store.db.Exec("update "+t.name+" set operator = ?, error_msg = ? where ? = ?", 10000, sql.NullString{String: err.Error(), Valid: true}, uidname, uidvalue)
if dberr != nil {
// email tell owner to deal with
panic(dberr)
panic(err)
}
}

View File

@ -5,24 +5,26 @@ import (
"encoding/json"
"testing"
"time"
"github.com/davecgh/go-spew/spew"
)
func TestAutoStore(t *testing.T) {
func estAutoStore(t *testing.T) {
uri := "root:@tcp(127.0.0.1:4000)/test?parseTime=true&loc=Local&charset=utf8mb4&collation=utf8mb4_unicode_ci"
store := NewStore(uri)
// queue := store.Table("streamer").Queue(TSreamer{}, CondWhere{Condition: "operator = 0"})
// re, _ := queue.Pop()
queue := store.Table("streamer").Queue(TSreamer{}, CondWhere{Condition: "operator = 0"})
re, _ := queue.Pop()
// pstreamer := re.(*TSreamer)
// m := make(map[string]interface{})
// json.Unmarshal(pstreamer.Iface.([]byte), &m)
// spew.Println(re.(*TSreamer), m)
pstreamer := re.(*TSreamer)
m := make(map[string]interface{})
json.Unmarshal(pstreamer.Iface.([]byte), &m)
spew.Println(re.(*TSreamer), m)
streamer := &TSreamer{}
streamer.Uid = 1
streamer.UserID = &sql.NullString{String: "xixi", Valid: true}
streamer.Uid = 2
streamer.UserID = &sql.NullString{String: "hehe", Valid: true}
streamer.Name = "streamer"
streamer.Operator = 0
streamer.Bit = 0b11
@ -39,7 +41,7 @@ func TestAutoStore(t *testing.T) {
now := time.Now()
streamer.UpdateTime = &now
err = store.Table("streamer").InsertOrUpdate(streamer, DUpdate{Field: "userid"})
err = store.Table("streamer").Insert(streamer)
if err != nil {
t.Error(err)
}

View File

@ -1,159 +0,0 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
func main() {
sessionstr := `
-H 'authority: www.mirrativ.com'
-H 'accept: application/json'
-H 'x-timezone: Asia/Shanghai'
-H 'x-csrf-token: F3Ojd6RBtApP6YAZzVn-9jWN1of159VxAqOQL1Zn'
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
-H 'content-type: application/json'
-H 'sec-fetch-site: same-origin'
-H 'sec-fetch-mode: cors'
-H 'sec-fetch-dest: empty'
-H 'referer: https://www.mirrativ.com/live/O5Ia4iX9c5CeZj7DFtg52Q'
-H 'accept-language: zh-CN,zh;q=0.9,ja;q=0.8'
-H 'cookie: f=A2D75F0E-D218-11EA-A042-452BF6D21CE8; _ga=GA1.2.689947597.1596081392; mr_id=kxb65LddGMZf5C28jkR_tGCZD_ZFOAepD5gfXO7eNjfPMB8EKYvU1Vg_Y29V1lsa; _gid=GA1.2.2116692650.1600139685; lang=ja'`
ps := intimate.NewPerfectShutdown()
gprofile := gcurl.Parse(`curl 'https://www.mirrativ.com/api/user/profile?user_id=103383701'` + sessionstr)
tpProfile := gprofile.CreateTemporary(nil)
tpProfileUserID := tpProfile.QueryParam("user_id")
g := gcurl.Parse(`culr 'https://www.mirrativ.com/api/live/live?live_id=O5Ia4iX9c5CeZj7DFtg52Q'` + sessionstr)
tpLive := g.CreateTemporary(nil)
tpLiveID := tpLive.QueryParam("live_id")
var lasterr error
queue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.PMirrativ))
for !ps.IsClose() {
istreamer, err := queue.Pop()
if err != nil {
if lasterr != err {
lasterr = err
log.Println(err)
}
ps.Wait(time.Second * 5)
continue
}
now := &sql.NullTime{Time: time.Now(), Valid: true}
streamer := istreamer.(*intimate.Streamer)
streamer.UpdateTime = now
userid := *streamer.UserId
log.Println(userid)
tpProfileUserID.StringSet(userid)
resp, err := tpProfile.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second)
continue
}
clog := &intimate.CollectLog{}
clog.Platform = intimate.PMirrativ
clog.UpdateTime = now
clog.UserId = userid
clog.StreamerUid = streamer.Uid
profilejson := gjson.ParseBytes(resp.Content())
if result := profilejson.Get("follower_num"); result.Exists() {
clog.Followers = &sql.NullInt64{Int64: result.Int(), Valid: true}
}
if result := profilejson.Get("onlive.live_id"); result.Exists() {
liveID := result.String()
tpLiveID.StringSet(liveID)
resp, err = tpLive.Execute()
if err != nil {
log.Println(err)
continue
}
livejson := gjson.ParseBytes(resp.Content())
if result := livejson.Get("total_viewer_num"); result.Exists() {
clog.Views = &sql.NullInt64{Int64: result.Int(), Valid: true}
}
if result := livejson.Get("title"); result.Exists() {
clog.LiveTitle = &sql.NullString{String: result.String(), Valid: true}
}
if result := livejson.Get("started_at"); result.Exists() {
clog.LiveStartTime = &sql.NullTime{Time: time.Unix(result.Int(), 0), Valid: true}
}
if result := livejson.Get("heartbeated_at"); result.Exists() {
clog.LiveEndTime = &sql.NullTime{Time: time.Unix(result.Int(), 0), Valid: true}
}
if result := livejson.Get("app_id"); result.Exists() {
streamer.Channel = &sql.NullString{String: result.String(), Valid: true}
}
if result := livejson.Get("timeline.#.app.short_title"); result.Exists() {
for _, tl := range result.Array() {
var tags []string = []string{tl.String()}
jtags, _ := json.Marshal(tags)
streamer.Tags = jtags
clog.Tags = jtags
break
}
} else {
log.Println(string(resp.Content()))
return
}
if result := livejson.Get("gift_ranking_url"); result.Exists() {
// streamer.Channel = &sql.NullString{String: result.String(), Valid: true}
gifturl := "curl '" + result.String() + "&type=monthly&cursor='" + sessionstr
ggift := gcurl.Parse(gifturl)
tp := ggift.CreateTemporary(nil)
tp.SetURLRawPath("/api/gift/ranking")
pcursor := tp.QueryParam("cursor")
var gratuity int64 = 0
for {
giftdata, err := tp.Execute()
giftjson := gjson.ParseBytes(giftdata.Content())
if err != nil {
log.Println(err)
} else {
for _, rpoint := range giftjson.Get("ranking.#.point").Array() {
gratuity += rpoint.Int()
}
}
ncursor := giftjson.Get("next_cursor").String()
if ncursor == "" {
break
}
pcursor.StringSet(ncursor)
}
// https://www.mirrativ.com/gift/ranking?live_id=O5Ia4iX9c5CeZj7DFtg52Q&obfuscated_user_id=PgIBEgc6jVc
clog.Gratuity = &sql.NullInt64{Int64: gratuity, Valid: true}
}
cid, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
log.Println(err)
}
streamer.LatestLogUid = cid
}
intimate.TStreamer.Update(streamer)
time.Sleep(time.Second * 2)
}
}

View File

@ -1,9 +0,0 @@
package main
import (
"testing"
)
func TestDo(t *testing.T) {
main()
}

View File

@ -11,10 +11,10 @@ import (
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STNimo))
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STNimo))
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func main() {
Execute()
@ -28,25 +28,21 @@ type LiveInfo struct {
}
func Execute() {
adriver := intimate.GetChromeDriver()
wd := intimate.GetChromeDriver(3030)
count := 0
countlimit := 200
wd := adriver.Webdriver
waitfor := intimate.NewWaitFor(wd)
ps := intimate.NewPerfectShutdown()
queue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.PNimo))
for !ps.IsClose() {
istreamer, err := queue.Pop()
streamer, err := estore.Pop(intimate.PNimo)
if err != nil {
log.Println(err)
intimate.TStreamer.UpdateError(istreamer, err)
estore.UpdateError(streamer, err)
continue
}
streamer := istreamer.(*intimate.Streamer)
wd.Get(streamer.LiveUrl.String)
// wd.Get("https://www.nimo.tv/live/1253835677")
@ -75,25 +71,22 @@ func Execute() {
clog := &intimate.CollectLog{}
clog.Platform = intimate.PNimo
clog.Followers = &sql.NullInt64{Int64: li.Followers, Valid: true}
clog.Views = &sql.NullInt64{Int64: li.Views, Valid: true}
clog.UpdateTime = &utime
clog.Followers = sql.NullInt64{Int64: li.Followers, Valid: true}
clog.Views = sql.NullInt64{Int64: li.Views, Valid: true}
clog.UpdateTime = utime
clog.StreamerUid = streamer.Uid
var sum int64 = 0
for _, v := range li.Gratuity {
sum += v
}
clog.Gratuity = &sql.NullInt64{Int64: sum, Valid: true}
clog.Gratuity = sql.NullInt64{Int64: sum, Valid: true}
cuid, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
panic(err)
}
cuid := estore.InsertClog(clog)
streamer.Channel = &sql.NullString{String: li.Channel, Valid: true}
streamer.Channel = sql.NullString{String: li.Channel, Valid: true}
streamer.LatestLogUid = cuid
streamer.UpdateTime = &utime
streamer.UpdateTime = utime
streamer.Operator = 0
switch {
@ -109,17 +102,14 @@ func Execute() {
streamer.UpdateInterval = 60
}
// estore.Update(streamer, "update_interval", streamer.UpdateInterval, "operator", streamer.Operator, "channel", streamer.Channel, "latest_log_uid", streamer.LatestLogUid, "update_time", streamer.UpdateTime)
err = intimate.TStreamer.Update(streamer)
if err != nil {
panic(err)
}
estore.Update(streamer, "update_interval", streamer.UpdateInterval, "operator", streamer.Operator, "channel", streamer.Channel, "latest_log_uid", streamer.LatestLogUid, "update_time", streamer.UpdateTime)
count++
if count >= countlimit {
count = 0
adriver.Close()
adriver = intimate.GetChromeDriver()
wd.Close()
wd.Quit()
wd = intimate.GetChromeDriver(3030)
}
}
}

View File

@ -34,17 +34,13 @@ func main() {
ps := intimate.NewPerfectShutdown()
ses := requests.NewSession()
streamerQueue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.Ptwitcasting))
var lasterr error
for !ps.IsClose() {
// streamer, err := estore.Pop(intimate.Ptwitcasting)
isteamer, err := streamerQueue.Pop()
if err != nil {
if lasterr != err {
lasterr = err
log.Println(err)
}
time.Sleep(time.Minute)
log.Println(err, isteamer)
continue
}

View File

@ -3,326 +3,134 @@ package main
import (
"database/sql"
"encoding/json"
"fmt"
"intimate"
"log"
"regexp"
"strings"
"time"
"github.com/tebeka/selenium"
)
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func main() {
adriver := intimate.GetChromeDriver()
wd := intimate.GetChromeDriver(3030)
ps := intimate.NewPerfectShutdown()
slqueue := intimate.TStreamerList.Queue(intimate.StreamerList{}, intimate.ConditionDefault(intimate.Ptwitch))
squeue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.Ptwitch))
var count = 0
var countlimt = 200
var recreate = time.Now()
var lasterr error = nil
// var err error
for !ps.IsClose() {
wd := adriver.Webdriver
// sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
isl, err := slqueue.Pop()
if err != nil {
if lasterr != err {
streamer, err := estore.Pop(intimate.Ptwitch)
if streamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
lasterr = err
log.Println(err)
}
istreamer, err := squeue.Pop()
if err != nil {
if lasterr != err {
lasterr = err
log.Println(err)
ps.Wait(time.Minute)
continue
}
}
streamer := istreamer.(*intimate.Streamer)
Extractor(wd, streamer)
if err = intimate.TStreamer.Update(streamer); err != nil {
log.Println(err)
}
count++
if count >= countlimt || time.Now().Sub(recreate) >= time.Minute*120 {
count = 0
adriver.Close()
adriver = intimate.GetChromeDriver()
recreate = time.Now()
}
time.Sleep(time.Second * 2)
continue
}
streamerlist := isl.(*intimate.StreamerList)
var updateUrl map[string]string
json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
liveUrl := updateUrl["live"]
liveUrl = strings.Replace(liveUrl, "/watchparty", "", -1)
log.Println(liveUrl)
weburl := streamerlist.Url + "?sort=VIEWER_COUNT"
err = wd.Get(weburl)
// err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
err = wd.Get(liveUrl + "/about")
if err != nil {
log.Println(err)
// sstore.UpdateError(sourceChannel, err)
intimate.TStreamerList.UpdateError(streamerlist, err)
time.Sleep(time.Second * 10)
estore.UpdateError(streamer, err)
time.Sleep(time.Second * 5)
continue
}
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
return false, err
}
return true, nil
}, time.Second*10)
streamer.LiveUrl = sql.NullString{String: liveUrl, Valid: true}
clog := &intimate.CollectLog{}
clog.UserId = streamer.UserId
clog.Gratuity = sql.NullInt64{Int64: 0, Valid: false}
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
time.Sleep(time.Millisecond * 500)
err = extractUserName(wd, streamer)
if err != nil {
log.Println(err)
_, err = wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='browse-channels-button']")
if err == nil {
log.Println(streamer.UserId, "may be cancell")
streamer.Operator = 5
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
estore.UpdateStreamer(streamer)
}
continue
}
btn.Click()
var elements []selenium.WebElement
var liveurls = 0
var delayerror = 2
for i := 0; i < 200 && !ps.IsClose(); i++ {
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
log.Println(err)
break
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2000)
if len(elements) == liveurls {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 2
}
liveurls = len(elements)
}
articles, err := wd.FindElements(selenium.ByXPATH, "//article")
err = extractFollowers(wd, clog)
if err != nil {
log.Println(err)
continue
}
var streamers []*intimate.Streamer
for _, article := range articles {
e, err := article.FindElement(selenium.ByXPATH, ".//a[@data-a-target='preview-card-title-link' and @href]")
if err != nil {
log.Println(err)
continue
}
href, err := e.GetAttribute("href")
if err != nil {
log.Println(err)
continue
}
btns, err := article.FindElements(selenium.ByXPATH, ".//div[@class='tw-full-width tw-inline-block']//button")
if err != nil {
log.Println(err)
continue
}
var tags []string
for _, btn := range btns {
tag, err := btn.GetAttribute("data-a-target")
if err == nil {
tags = append(tags, tag)
err = extractViews(wd, clog) // views + tags + gratuity
if err != nil {
// 不直播时提取礼物 gratuity
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`)
btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
if (err == nil && channelchat != nil) || btn != nil {
if channelchat != nil {
channelchat.Click()
}
time.Sleep(time.Second)
extractGratuity(wd, clog)
return true, nil
}
}
return false, nil
streamer := &intimate.Streamer{}
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(href)
if len(matches) == 2 {
mc := matches[1]
streamer.UserId = &mc
} else {
log.Println(href)
continue
}
jtags, err := json.Marshal(tags)
if err != nil {
log.Println(err)
} else {
streamer.Tags = jtags
}
streamer.Platform = intimate.Ptwitch
streamer.LiveUrl = &sql.NullString{String: href, Valid: true}
streamer.Operator = 0
streamers = append(streamers, streamer)
// if estore.InsertStreamer(streamer) {
// // log.Println("streamer update tags", streamer.Uid, tags)
// if streamer.Tags != nil {
// estore.Update(streamer, "Tags", streamer.Tags)
// }
// }
}, time.Second*4)
}
for _, streamer := range streamers {
Extractor(wd, streamer)
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
if err = intimate.TStreamer.InsertOrUpdate(streamer,
intimate.DUpdate{Field: "tags"},
intimate.DUpdate{Field: "update_time"},
intimate.DUpdate{Field: "update_interval"},
); err != nil {
log.Println(err)
}
streamer.Platform = intimate.Ptwitch
clog.Platform = streamer.Platform
clog.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
lastClogId := estore.InsertClog(clog)
streamer.Operator = 10
streamer.LatestLogUid = lastClogId
if clog.Tags != nil {
streamer.Tags = clog.Tags
}
log.Println("streamer find", len(articles))
if len(articles) == 0 {
intimate.TStreamerList.UpdateError(streamerlist, fmt.Errorf(""))
switch fl := clog.Followers.Int64; {
case fl > 100000:
streamer.UpdateInterval = 120
case fl > 10000:
streamer.UpdateInterval = 240 * 2
case fl > 1000:
streamer.UpdateInterval = 360 * 2
case fl > 100:
streamer.UpdateInterval = 720 * 2
case fl > 0:
streamer.UpdateInterval = 1440 * 4
}
streamer.UpdateTime = clog.UpdateTime
estore.UpdateStreamer(streamer)
count++
if count >= countlimt || time.Now().Sub(recreate) >= time.Minute*120 {
if count >= countlimt {
count = 0
adriver.Close()
adriver = intimate.GetChromeDriver()
recreate = time.Now()
// wd.Quit()
wd = intimate.GetChromeDriver(3030)
}
}
adriver.Close()
}
func Extractor(wd selenium.WebDriver, streamer *intimate.Streamer) {
// streamer, err := estore.Pop(intimate.Ptwitch)
// if streamer == nil || err != nil {
// if err != lasterr {
// log.Println(err, lasterr)
// lasterr = err
// }
// time.Sleep(time.Second * 2)
// continue
// }
// var updateUrl map[string]string
// json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
liveUrl := "https://www.twitch.tv/" + (*streamer.UserId)
// liveUrl = strings.Replace(liveUrl, "/watchparty", "", -1)
log.Println(liveUrl)
// err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
err := wd.Get(liveUrl + "/about")
if err != nil {
errstr := fmt.Errorf("%s: %s", err.Error(), liveUrl+"/about")
log.Println(errstr)
intimate.TStreamer.UpdateError(streamer, errstr)
time.Sleep(time.Second * 5)
return
}
streamer.LiveUrl = &sql.NullString{String: liveUrl, Valid: true}
clog := &intimate.CollectLog{}
clog.UserId = *streamer.UserId
clog.Gratuity = &sql.NullInt64{Int64: 0, Valid: false}
time.Sleep(time.Millisecond * 500)
err = extractUserName(wd, streamer)
if err != nil {
_, err = wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='browse-channels-button']")
if err == nil {
log.Println(*streamer.UserId, "may be cancell")
streamer.Operator = 5
intimate.TStreamer.UpdateError(streamer, fmt.Errorf(*streamer.UserId, "may be cancell"))
}
return
}
err = extractFollowers(wd, clog)
if err != nil {
// log.Println(err)
streamer.UpdateInterval += 30
return
}
err = extractViews(wd, clog) // views + tags + gratuity
if err != nil {
// 不直播时提取礼物 gratuity
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`)
btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
if (err == nil && channelchat != nil) || btn != nil {
if channelchat != nil {
channelchat.Click()
}
time.Sleep(time.Second)
extractGratuity(wd, clog)
return true, nil
}
return false, nil
}, time.Second*4)
}
streamer.Platform = intimate.Ptwitch
clog.Platform = streamer.Platform
clog.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
// clog.StreamerUid = streamer.Uid
lastClogId, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
log.Println(err)
return
}
streamer.LatestLogUid = lastClogId
if clog.Tags != nil {
streamer.Tags = clog.Tags
}
switch fl := clog.Followers.Int64; {
case fl > 100000:
streamer.UpdateInterval = 120
case fl > 10000:
streamer.UpdateInterval = 240 * 2
case fl > 1000:
streamer.UpdateInterval = 360 * 2
case fl > 100:
streamer.UpdateInterval = 720 * 2
case fl > 0:
streamer.UpdateInterval = 1440 * 4
}
streamer.UpdateTime = clog.UpdateTime
// intimate.TStreamer.InsertOrUpdate(streamer)
// count++
// if count >= countlimt {
// count = 0
// // wd.Quit()
// wd = intimate.GetChromeDriver(3030)
// }
wd.Close()
wd.Quit()
}
func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error {
@ -331,7 +139,7 @@ func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error {
if err == nil {
if ltxt, err := label.Text(); err == nil && ltxt != "" {
// log.Println("label:", ltxt)
streamer.UserName = &sql.NullString{String: ltxt, Valid: true}
streamer.UserName = sql.NullString{String: ltxt, Valid: true}
return true, nil
}
}
@ -351,7 +159,7 @@ func extractFollowers(wd selenium.WebDriver, clog *intimate.CollectLog) error {
}
followers = regexp.MustCompile(`[\d,]+`).FindString(followers)
fint, _ := intimate.ParseNumber(followers)
clog.Followers = &sql.NullInt64{Int64: int64(fint), Valid: true}
clog.Followers = sql.NullInt64{Int64: int64(fint), Valid: true}
// log.Println("followers: ", followers, fint)
return true, nil
}, 4*time.Second)
@ -364,7 +172,7 @@ func extractViews(wd selenium.WebDriver, clog *intimate.CollectLog) error {
if txt, err := views.Text(); err == nil {
vint, _ := intimate.ParseNumber(txt)
clog.Views = &sql.NullInt64{Int64: vint, Valid: true}
clog.Views = sql.NullInt64{Int64: vint, Valid: true}
// log.Println("views:", txt)
views.Click()
@ -384,7 +192,7 @@ func extractTitle(wd selenium.WebDriver, clog *intimate.CollectLog) error {
title, err := web.FindElement(selenium.ByXPATH, `//h2[@data-a-target='stream-title']`)
if err == nil {
if txt, err := title.Text(); err == nil {
clog.LiveTitle = &sql.NullString{String: txt, Valid: true}
clog.LiveTitle = sql.NullString{String: txt, Valid: true}
return true, nil
}
}
@ -436,7 +244,7 @@ func extractGratuity(wd selenium.WebDriver, clog *intimate.CollectLog) error {
log.Println(err)
}
}
clog.Gratuity = &sql.NullInt64{Int64: gratuity, Valid: true}
clog.Gratuity = sql.NullInt64{Int64: gratuity, Valid: true}
}
return true, nil
}

View File

@ -9,9 +9,9 @@ type GetSet struct {
}
type StreamerList struct {
UrlHash string `field:"urlhash" uid:"true"` //
Platform string `field:"platform" ` //
Url string `field:"url" ` //
UrlHash string `field:"urlhash" ` //
Platform string `field:"platform" ` //
Url string `field:"url" ` //
Label *sql.NullString `field:"label" ` //
@ -46,7 +46,6 @@ type Streamer struct {
Channel *sql.NullString `field:"channel"` //
Tags interface{} `field:"tags"`
Ext interface{} `field:"ext"` //
// Comments interface{} `field:"comments"`
IsUpdateStreamer bool // 更新上面的内容
IsUpdateUrl bool
@ -90,7 +89,6 @@ type CollectLog struct {
Tags interface{} `field:"tags"`
Ext interface{} `field:"ext"` //
ErrorMsg *sql.NullString `field:"error_msg"` //
Comments interface{} `field:"comments"` //
}
// Get Simple Value

View File

@ -15,7 +15,4 @@ const (
// PNimo PNimo 平台
PNimo Platform = "nimo"
// PMirrativ PNimo 平台
PMirrativ Platform = "mirrativ"
)

View File

@ -1,6 +1,6 @@
CURPATH=`pwd`
BINPATH=$(dirname "$CURPATH")/bin
find $BINPATH -type f -name 'log' -exec truncate -s 0 {} +
find $BINPATH -type f -name 'log' -exec rm {} +

View File

@ -1,10 +0,0 @@
[supervisord]
nodaemon=true
[program:mirrativ_extractor]
directory = MYPATH/bin/mirrativ_extractor/
command= MYPATH/bin/mirrativ_extractor/mirrativ_extractor
autorestart=true
stderr_logfile=MYPATH/bin/mirrativ_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,13 +0,0 @@
[supervisord]
nodaemon=false
[program:mirrativ_task1]
environment=DISPLAY=":99"
directory = MYPATH/bin/mirrativ_task1/
command= MYPATH/bin/mirrativ_task1/mirrativ_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/mirrativ_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,12 @@
[supervisord]
nodaemon=true
[program:openrec_source]
directory = MYPATH/bin/openrec_task2/
command= MYPATH/bin/openrec_task2/openrec_task2
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=4 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/openrec_task2/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,5 +1,5 @@
[supervisord]
nodaemon=true
nodaemon=false
[program:twitch_extractor]
environment=DISPLAY=":99"

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=false
[program:twitch_extractor_p2]
environment=DISPLAY=":99",pac_proxy=http://localhost:1090/pac1
directory = MYPATH/bin/twitch_extractor
command= MYPATH/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=2 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,13 +0,0 @@
[supervisord]
nodaemon=false
[program:twitch_task1]
environment=DISPLAY=":99"
directory = MYPATH/bin/twitch_task1
command= MYPATH/bin/twitch_task1/twitch_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitch_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=false
[program:twitch_task2]
environment=DISPLAY=":99"
directory = MYPATH/bin/twitch_task2
command= MYPATH/bin/twitch_task2/twitch_task2
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=6 ;启动多个进程
autorestart=true
stderr_logfile=MYPATH/bin/twitch_task2/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,83 +0,0 @@
package main
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
func main() {
bcurl := `curl 'https://www.mirrativ.com/api/live/catalog?id=2&cursor=%s' \
-H 'authority: www.mirrativ.com' \
-H 'accept: application/json' \
-H 'x-timezone: Asia/Shanghai' \
-H 'x-csrf-token: F3Ojd6RBtApP6YAZzVn-9jWN1of159VxAqOQL1Zn' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' \
-H 'content-type: application/json' \
-H 'sec-fetch-site: same-origin' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.mirrativ.com/' \
-H 'accept-language: zh-CN,zh;q=0.9,ja;q=0.8' \
-H 'cookie: f=A2D75F0E-D218-11EA-A042-452BF6D21CE8; _ga=GA1.2.689947597.1596081392; mr_id=kxb65LddGMZf5C28jkR_tGCZD_ZFOAepD5gfXO7eNjfPMB8EKYvU1Vg_Y29V1lsa; _gid=GA1.2.2116692650.1600139685; lang=ja' \
--compressed`
curl := gcurl.Parse(bcurl)
tp := curl.CreateTemporary(nil)
cursor := tp.QueryParam(`cursor`)
cursor.StringSet("")
ps := intimate.NewPerfectShutdown()
for !ps.IsClose() {
log.Println(tp.ParsedURL.String())
resp, err := tp.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second * 2)
continue
}
apijson := gjson.ParseBytes(resp.Content())
next := apijson.Get("next_cursor").String()
for _, liveinfo := range apijson.Get("list").Array() {
var prekey string
if liveinfo.Get("live_preview").Exists() {
prekey = "live_preview"
} else {
prekey = "live"
}
owner := liveinfo.Get(prekey + ".owner")
if guserid := owner.Get("user_id"); guserid.String() != "" {
streamer := &intimate.Streamer{}
streamer.Platform = intimate.PMirrativ
streamer.Operator = 0
streamer.UserId = &guserid.Str
streamer.UserName = &sql.NullString{String: owner.Get("name").String(), Valid: true}
streamer.UpdateInterval = 600
streamer.UpdateTime = intimate.GetUpdateTimeNow()
err = intimate.TStreamer.InsertOrUpdate(
streamer,
intimate.DUpdate{Field: "update_time"},
)
if err != nil {
log.Println(err)
panic(err)
}
}
}
if next == "" {
ps.Wait(time.Minute * 10)
} else {
ps.Wait(time.Second * 2)
}
cursor.StringSet(next)
}
}

View File

@ -12,7 +12,7 @@ import (
)
// estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// Execute 执行
func Execute() {
@ -95,10 +95,7 @@ func Execute() {
}
streamer.UpdateInterval = 120
err = intimate.TStreamer.Insert(streamer)
if err != nil {
panic(err)
}
estore.InsertStreamer(streamer)
} else {
log.Println("userid is null.", room.String())

View File

@ -70,11 +70,11 @@ func Execute() {
if ok := queuedict[wurl]; !ok {
log.Println(wurl)
sl := &intimate.StreamerList{}
sl.Platform = string(intimate.Ptwitcasting)
sl.Platform = intimate.Ptwitcasting
sl.Url = wurl
sl.Operator = 0
sl.UpdateInterval = 120
sl.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
sl.UpdateTime = time.Now()
sl.UrlHash = intimate.GetUrlHash(sl.Url)
intimate.TStreamerList.Insert(sl)
@ -104,11 +104,11 @@ func Execute() {
sp.TagUrl[i] = wurl
if ok := queuedict[wurl]; !ok {
sl := &intimate.StreamerList{}
sl.Platform = string(intimate.Ptwitcasting)
sl.Platform = intimate.Ptwitcasting
sl.Url = wurl
sl.Operator = 0
sl.UpdateInterval = 120
sl.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
sl.UpdateTime = time.Now()
sl.UrlHash = intimate.GetUrlHash(sl.Url)
intimate.TStreamerList.Insert(sl)

View File

@ -20,13 +20,10 @@ import (
func Execute() {
ps := intimate.NewPerfectShutdown()
var adriver *intimate.AutoCloseDriver
for !ps.IsClose() {
var err error
adriver = intimate.GetChromeDriver()
wd := adriver.Webdriver
wd := intimate.GetChromeDriver(3030)
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
@ -110,11 +107,21 @@ func Execute() {
if err != nil {
log.Println(err)
}
// TODO: Save href
// source := &intimate.Source{}
// source.Source = sql.NullString{String: href, Valid: true}
// source.Operator = 0
// source.Target = intimate.TTwitchChannel
// source.Url = weburl
// sstore.Insert(source)
}
log.Println("hrefs len:", len(hrefs))
adriver.Close()
ps.Wait(time.Minute * 5)
}
// sstore.Deduplicate(intimate.TTwitchChannel, "source")
wd.Close()
wd.Quit()
time.Sleep(time.Minute * 30)
}
}

2
tasks/twitch/twitch_task2/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
twitch_task2
log

View File

@ -0,0 +1,6 @@
package main
func main() {
Execute()
}

View File

@ -0,0 +1,175 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"regexp"
"time"
"github.com/tebeka/selenium"
)
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// Execute 执行任务
func Execute() {
// DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ;
//article//a[@data-a-target='preview-card-title-link']
wd := intimate.GetChromeDriver(3030)
defer wd.Quit()
ps := intimate.NewPerfectShutdown()
counter := intimate.NewCounter()
counter.SetMaxLimit(100)
counter.SetMaxToDo(func(olist ...interface{}) error {
owd := olist[0].(*selenium.WebDriver)
if err := (*owd).Quit(); err != nil {
log.Println(err)
}
*owd = intimate.GetChromeDriver(3030)
return nil
}, &wd)
for !ps.IsClose() {
var err error
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
if err != nil {
panic(err)
}
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
log.Println(err)
sstore.UpdateError(sourceChannel, err)
time.Sleep(time.Second * 10)
continue
}
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
return false, err
}
return true, nil
}, time.Second*10)
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
log.Println(err)
continue
}
btn.Click()
var elements []selenium.WebElement
var liveurls = 0
var delayerror = 2
for i := 0; i < 200 && !ps.IsClose(); i++ {
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
log.Println(err)
break
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2000)
if len(elements) == liveurls {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 2
}
liveurls = len(elements)
}
articles, err := wd.FindElements(selenium.ByXPATH, "//article")
if err != nil {
log.Println(err)
continue
}
for _, article := range articles {
e, err := article.FindElement(selenium.ByXPATH, ".//a[@data-a-target='preview-card-title-link' and @href]")
if err != nil {
log.Println(err)
continue
}
href, err := e.GetAttribute("href")
if err != nil {
log.Println(err)
continue
}
btns, err := article.FindElements(selenium.ByXPATH, ".//div[@class='tw-full-width tw-inline-block']//button")
if err != nil {
log.Println(err)
continue
}
var tags []string
for _, btn := range btns {
tag, err := btn.GetAttribute("data-a-target")
if err == nil {
tags = append(tags, tag)
}
}
streamer := &intimate.Streamer{}
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(href)
if len(matches) == 2 {
mc := matches[1]
streamer.UserId = &mc
} else {
log.Println(href)
continue
}
jtags, err := json.Marshal(tags)
if err != nil {
log.Println(err)
} else {
streamer.Tags = jtags
}
streamer.Platform = intimate.Ptwitch
updateUrl := make(map[string]string)
updateUrl["live"] = href
streamer.LiveUrl = &sql.NullString{String: href, Valid: true}
data, err := json.Marshal(updateUrl)
if err != nil {
log.Println(err)
continue
}
streamer.UpdateUrl = data
streamer.Operator = 0
if estore.InsertStreamer(streamer) {
// log.Println("streamer update tags", streamer.Uid, tags)
if streamer.Tags != nil {
estore.Update(streamer, "Tags", streamer.Tags)
}
}
}
log.Println("streamer find", len(articles))
if len(articles) == 0 {
sourceChannel.Operator = 5
sstore.UpdateOperator(sourceChannel)
}
counter.AddWithReset(1)
}
}

View File

@ -1,8 +1,6 @@
package main
import (
"testing"
)
import "testing"
func TestMain(t *testing.T) {
main()

File diff suppressed because it is too large Load Diff

View File

@ -5,10 +5,9 @@ import (
"database/sql"
"fmt"
"log"
"net"
"os"
"os/exec"
"os/signal"
"runtime"
"strconv"
"strings"
"sync/atomic"
@ -94,43 +93,13 @@ func ParseDuration(dt string) (time.Duration, error) {
return tdt.Sub(zeroTime), nil
}
type AutoCloseDriver struct {
Webdriver selenium.WebDriver
Port int
}
func (adriver *AutoCloseDriver) Close() {
data, err := exec.Command("/bin/bash", "-c", fmt.Sprintf(`pgrep -f "port=%d"`, adriver.Port)).Output()
if err != nil {
log.Println(err)
log.Println(string(data))
return
}
// log.Println(string(data))
killshell := fmt.Sprintf("pkill -9 -P %s", data)
err = exec.Command("/bin/bash", "-c", killshell).Run()
if err != nil {
log.Println(err)
return
}
err = exec.Command("/bin/bash", "-c", fmt.Sprintf("kill -9 %s", data)).Run()
if err != nil {
log.Println(err)
return
}
}
func GetChromeDriver() *AutoCloseDriver {
port := GetFreePort()
func GetChromeDriver(port int) selenium.WebDriver {
var err error
caps := selenium.Capabilities{"browserName": "chrome"}
chromecaps := chrome.Capabilities{}
// chromecaps.AddExtension("/home/eson/test/myblock.crx")
for _, epath := range []string{"../../../crx/myblock.crx", "../../crx/myblock.crx"} {
_, err := os.Stat(epath)
if err == nil {
@ -162,27 +131,28 @@ func GetChromeDriver() *AutoCloseDriver {
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
caps.AddChrome(chromecaps)
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", port)
if err != nil {
panic(err)
}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port))
if err != nil {
panic(err)
}
adriver := &AutoCloseDriver{}
adriver.Port = port
adriver.Webdriver = wd
runtime.SetFinalizer(wd, func(obj interface{}) {
if err := obj.(selenium.WebDriver).Close(); err != nil {
log.Println(err)
}
if err := obj.(selenium.WebDriver).Quit(); err != nil {
log.Println(err)
}
})
wd.ExecuteScript("windows.navigator.webdriver = undefined", nil)
if err != nil {
panic(err)
}
return adriver
return wd
}
// PerfectShutdown 完美关闭程序
@ -210,18 +180,6 @@ func (ps *PerfectShutdown) IsClose() bool {
return atomic.LoadInt32(&ps.loop) == 0
}
// Wait 判断是否要关闭
func (ps *PerfectShutdown) Wait(tm time.Duration) bool {
now := time.Now()
for time.Now().Sub(now) <= tm {
if ps.IsClose() {
return false
}
time.Sleep(time.Second)
}
return true
}
type Counter struct {
dcount int
count int
@ -347,17 +305,3 @@ func (wf *WaitFor) WaitWithTimeout(xpath string, timeout time.Duration, do func(
}, timeout)
}
func GetFreePort() int {
addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
if err != nil {
panic(err)
}
l, err := net.ListenTCP("tcp", addr)
if err != nil {
panic(err)
}
defer l.Close()
return l.Addr().(*net.TCPAddr).Port
}