intimate/tasks/twitch/twitch_task1/task_twitch.go
2020-09-14 15:15:41 +08:00

121 lines
2.9 KiB
Go

package main
import (
"intimate"
"log"
"time"
"github.com/tebeka/selenium"
)
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// Execute 执行任务
func Execute() {
ps := intimate.NewPerfectShutdown()
var adriver *intimate.AutoCloseDriver
for !ps.IsClose() {
var err error
adriver = intimate.GetChromeDriver()
wd := adriver.Webdriver
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
panic(err)
}
cardCondition := func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
return false, err
}
return len(elements) > 0, nil
}
wd.WaitWithTimeout(cardCondition, time.Second*15)
time.Sleep(time.Second)
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
panic(err)
}
e.Click()
var lasthreflen = 0
var hrefs map[string]bool = make(map[string]bool)
var delayerror = 5
for i := 0; i <= 200; i++ {
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
log.Println(err)
break
}
if len(hrefs) == lasthreflen {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 7
}
lasthreflen = len(hrefs)
for ii := 0; ii < 10; ii++ {
for _, card := range cards {
href, err := card.GetAttribute("href")
if err != nil {
log.Println(href, err)
continue
} else {
hrefs[href] = true
}
}
break
}
if ps.IsClose() {
break
}
if len(cards) > 10 {
log.Println(len(cards))
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil)
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2500)
}
for href := range hrefs {
sl := &intimate.StreamerList{}
sl.Url = href
sl.UrlHash = intimate.GetUrlHash(sl.Url)
sl.Platform = string(intimate.Ptwitch)
sl.UpdateTime = intimate.GetUpdateTimeNow()
err := intimate.TStreamerList.Insert(sl)
if err != nil {
log.Println(err)
}
}
log.Println("hrefs len:", len(hrefs))
adriver.Close()
ps.Wait(time.Minute * 5)
}
}