TODO: twitch_task2 fix 错误
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"intimate"
|
||||
"log"
|
||||
"time"
|
||||
@@ -9,100 +8,120 @@ import (
|
||||
"github.com/tebeka/selenium"
|
||||
)
|
||||
|
||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
|
||||
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
|
||||
|
||||
// estore 解析存储连接实例
|
||||
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
// // estore 解析存储连接实例
|
||||
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
|
||||
// 获取类型的所有频道链接
|
||||
|
||||
// Execute 执行任务
|
||||
func Execute() {
|
||||
var err error
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
|
||||
ps := intimate.NewPerfectShutdown()
|
||||
|
||||
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for !ps.IsClose() {
|
||||
var err error
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
|
||||
cardCondition := func(wd selenium.WebDriver) (bool, error) {
|
||||
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return len(elements) > 0, nil
|
||||
}
|
||||
wd.WaitWithTimeout(cardCondition, time.Second*15)
|
||||
time.Sleep(time.Second)
|
||||
|
||||
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
e.Click()
|
||||
|
||||
var hrefs map[string]bool = make(map[string]bool)
|
||||
var delayerror = 5
|
||||
for i := 0; i <= 200; i++ {
|
||||
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
break
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if len(hrefs) == 0 {
|
||||
delayerror--
|
||||
if delayerror <= 0 {
|
||||
cardCondition := func(wd selenium.WebDriver) (bool, error) {
|
||||
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return len(elements) > 0, nil
|
||||
}
|
||||
wd.WaitWithTimeout(cardCondition, time.Second*15)
|
||||
time.Sleep(time.Second)
|
||||
|
||||
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
e.Click()
|
||||
|
||||
var lasthreflen = 0
|
||||
var hrefs map[string]bool = make(map[string]bool)
|
||||
var delayerror = 5
|
||||
for i := 0; i <= 200; i++ {
|
||||
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
break
|
||||
}
|
||||
} else {
|
||||
delayerror = 5
|
||||
}
|
||||
|
||||
for ii := 0; ii < 10; ii++ {
|
||||
for _, card := range cards {
|
||||
href, err := card.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(href, err)
|
||||
continue
|
||||
} else {
|
||||
hrefs[href] = true
|
||||
if len(hrefs) == lasthreflen {
|
||||
delayerror--
|
||||
if delayerror <= 0 {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
delayerror = 7
|
||||
}
|
||||
break
|
||||
}
|
||||
lasthreflen = len(hrefs)
|
||||
|
||||
if ps.IsClose() {
|
||||
break
|
||||
}
|
||||
for ii := 0; ii < 10; ii++ {
|
||||
for _, card := range cards {
|
||||
href, err := card.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(href, err)
|
||||
continue
|
||||
} else {
|
||||
hrefs[href] = true
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if len(cards) > 10 {
|
||||
log.Println(len(cards))
|
||||
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
if ps.IsClose() {
|
||||
break
|
||||
}
|
||||
|
||||
if len(cards) > 10 {
|
||||
log.Println(len(cards))
|
||||
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil)
|
||||
}
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyUp(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 2500)
|
||||
}
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyUp(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 2500)
|
||||
|
||||
for href := range hrefs {
|
||||
|
||||
sl := &intimate.StreamerList{}
|
||||
sl.Url = href
|
||||
sl.UrlHash = intimate.GetUrlHash(sl.Url)
|
||||
sl.Platform = string(intimate.Ptwitch)
|
||||
sl.UpdateTime = intimate.GetUpdateTimeNow()
|
||||
err := intimate.TStreamerList.Insert(sl)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
// TODO: Save href
|
||||
// source := &intimate.Source{}
|
||||
// source.Source = sql.NullString{String: href, Valid: true}
|
||||
// source.Operator = 0
|
||||
// source.Target = intimate.TTwitchChannel
|
||||
// source.Url = weburl
|
||||
// sstore.Insert(source)
|
||||
}
|
||||
|
||||
log.Println("hrefs len:", len(hrefs))
|
||||
// sstore.Deduplicate(intimate.TTwitchChannel, "source")
|
||||
|
||||
wd.Close()
|
||||
wd.Quit()
|
||||
time.Sleep(time.Minute * 30)
|
||||
}
|
||||
|
||||
for href := range hrefs {
|
||||
|
||||
// TODO: Save href
|
||||
source := &intimate.Source{}
|
||||
source.Source = sql.NullString{String: href, Valid: true}
|
||||
source.Operator = 0
|
||||
source.Target = intimate.TTwitchChannel
|
||||
source.Url = weburl
|
||||
sstore.Insert(source)
|
||||
}
|
||||
|
||||
log.Println("hrefs len:", len(hrefs))
|
||||
sstore.Deduplicate(intimate.TTwitchChannel, "source")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user