twitch source 非常多, 需要把增量的架构设计好. 修改原来架构.

This commit is contained in:
eson
2020-07-23 18:29:56 +08:00
parent 6d688b8450
commit cbdedb6795
11 changed files with 148 additions and 48 deletions

View File

@@ -1 +1,97 @@
package main
import (
"database/sql"
"fmt"
"intimate"
"log"
"time"
"github.com/tebeka/selenium"
"github.com/tebeka/selenium/chrome"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// ChannelLink 频道链接
type ChannelLink struct {
}
// Execute 执行任务
func (cl *ChannelLink) Execute() {
caps := selenium.Capabilities{"browserName": "chrome"}
chromecaps := chrome.Capabilities{}
err := chromecaps.AddExtension("/home/eson/test/ssh-key/0.1.2_0.crx")
if err != nil {
panic(err)
}
chromecaps.Args = append(chromecaps.Args, "--disk-cache-dir=/home/eson/test/ssh-key/cache")
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
caps.AddChrome(chromecaps)
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", 3030)
if err != nil {
panic(err)
}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", 3030))
defer func() {
if err := wd.Close(); err != nil {
log.Println(err)
}
}()
wd.ExecuteScript("windows.navigator.webdriver = undefined", nil)
if err != nil {
panic(err)
}
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
panic(err)
}
cardCondition := func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
return false, err
}
return len(elements) > 0, nil
}
wd.WaitWithTimeout(cardCondition, time.Second*30)
time.Sleep(time.Second)
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
panic(err)
}
e.Click()
for i := 0; i <= 200; i++ {
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Second * 3)
}
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
panic(err)
}
for _, ele := range elements {
href, err := ele.GetAttribute("href")
if err != nil {
log.Println(err)
}
log.Println(href) // TODO: Save href
source := &intimate.Source{}
source.Source = sql.NullString{String: href, Valid: true}
source.Operator = 0
source.Target = intimate.TTwitchChannel
source.Url = weburl
sstore.Insert(source)
}
}

View File

@@ -1,31 +1,10 @@
package main
import (
"fmt"
"testing"
"github.com/tebeka/selenium"
"github.com/tebeka/selenium/chrome"
)
func TestCase1(t *testing.T) {
caps := selenium.Capabilities{"browserName": "chrome"}
chromecaps := chrome.Capabilities{}
err := chromecaps.AddExtension("/home/eson/test/ssh-key/0.1.2_0.crx")
if err != nil {
panic(err)
}
caps.AddChrome(chromecaps)
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", 3030)
if err != nil {
panic(err)
}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", 3030))
if err != nil {
panic(err)
}
err = wd.Get("https://www.twitch.tv/directory/all")
if err != nil {
panic(err)
}
e := ChannelLink{}
e.Execute()
}