intimate/extractor/nimo_extractor/nimo_extractor.go
2020-09-03 14:17:54 +08:00

116 lines
3.3 KiB
Go

package main
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/474420502/extractor"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STNimo))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func main() {
Execute()
}
type LiveInfo struct {
Followers int64 `exp:"//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2']" mth:"r:ExtractNumber"`
Views int64 `exp:"//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']" mth:"r:ExtractNumber"`
Channel string `exp:"//div[contains(@class,'nimo-rm_type')]//span"`
Gratuity []int64 `exp:"//div[contains(@class,'rank-item-after3')]//span[contains(@class,'nimo-currency__count')]"`
}
func Execute() {
wd := intimate.GetChromeDriver(3030)
count := 0
countlimit := 200
waitfor := intimate.NewWaitFor(wd)
ps := intimate.NewPerfectShutdown()
for !ps.IsClose() {
streamer, err := estore.Pop(intimate.PNimo)
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
continue
}
wd.Get(streamer.LiveUrl.String)
// wd.Get("https://www.nimo.tv/live/1253835677")
waitfor.Default("//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2' and text() != '']", nil)
waitfor.WaitWithTimeout("//div[contains(@class,'rank-item-top3')]", 7*time.Second, nil)
element, err := wd.FindElement(selenium.ByXPATH, "//div[contains(@class,'rank-item-top3')]")
if err != nil {
log.Println(streamer.Uid, err)
} else {
err = element.MoveTo(50, 50)
element.Click()
if err != nil {
log.Println(streamer.Uid, err)
}
}
waitfor.Default("//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']", nil)
var pagesource string
pagesource, _ = wd.PageSource()
etor := extractor.ExtractHtmlString(pagesource)
li := etor.GetObjectByTag(LiveInfo{}).(*LiveInfo)
// log.Printf("%#v", li)
utime := sql.NullTime{Time: time.Now(), Valid: true}
clog := &intimate.CollectLog{}
clog.Platform = intimate.PNimo
clog.Followers = sql.NullInt64{Int64: li.Followers, Valid: true}
clog.Views = sql.NullInt64{Int64: li.Views, Valid: true}
clog.UpdateTime = utime
clog.StreamerUid = streamer.Uid
var sum int64 = 0
for _, v := range li.Gratuity {
sum += v
}
clog.Gratuity = sql.NullInt64{Int64: sum, Valid: true}
cuid := estore.InsertClog(clog)
streamer.Channel = sql.NullString{String: li.Channel, Valid: true}
streamer.LatestLogUid = cuid
streamer.UpdateTime = utime
streamer.Operator = 0
switch {
case li.Followers <= 1000:
streamer.UpdateInterval = 720
case li.Followers <= 10000:
streamer.UpdateInterval = 360
case li.Followers <= 100000:
streamer.UpdateInterval = 180
case li.Followers <= 1000000:
streamer.UpdateInterval = 90
default:
streamer.UpdateInterval = 60
}
estore.Update(streamer, "update_interval", streamer.UpdateInterval, "operator", streamer.Operator, "channel", streamer.Channel, "latest_log_uid", streamer.LatestLogUid, "update_time", streamer.UpdateTime)
count++
if count >= countlimit {
count = 0
wd.Close()
wd.Quit()
wd = intimate.GetChromeDriver(3030)
}
}
}