intimate/tasks/twitch/twitch_task1/task_twitch.go

77 lines
1.8 KiB
Go
Raw Normal View History

2020-07-22 12:00:02 +00:00
package main
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// ChannelLink 频道链接
type ChannelLink struct {
}
// Execute 执行任务
func (cl *ChannelLink) Execute() {
2020-07-24 10:48:33 +00:00
var err error
wd := intimate.GetChromeDriver(3030)
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
panic(err)
}
cardCondition := func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
return false, err
}
return len(elements) > 0, nil
}
wd.WaitWithTimeout(cardCondition, time.Second*30)
time.Sleep(time.Second)
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
panic(err)
}
e.Click()
for i := 0; i <= 200; i++ {
wd.KeyDown(selenium.EndKey)
2020-07-26 16:35:41 +00:00
time.Sleep(time.Second * 2)
}
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
panic(err)
}
2020-07-24 10:48:33 +00:00
// xpath: //article//a[@data-a-target='preview-card-title-link']
for _, ele := range elements {
href, err := ele.GetAttribute("href")
if err != nil {
log.Println(err)
}
log.Println(href) // TODO: Save href
source := &intimate.Source{}
source.Source = sql.NullString{String: href, Valid: true}
source.Operator = 0
source.Target = intimate.TTwitchChannel
source.Url = weburl
sstore.Insert(source)
}
2020-07-24 10:48:33 +00:00
sstore.Deduplicate(intimate.TTwitchChannel, "source")
}