twitch source 非常多, 需要把增量的架构设计好. 修改原来架构.
This commit is contained in:
@@ -1 +1,97 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"intimate"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/tebeka/selenium"
|
||||
"github.com/tebeka/selenium/chrome"
|
||||
)
|
||||
|
||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
|
||||
|
||||
// estore 解析存储连接实例
|
||||
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
|
||||
// 获取类型的所有频道链接
|
||||
|
||||
// ChannelLink 频道链接
|
||||
type ChannelLink struct {
|
||||
}
|
||||
|
||||
// Execute 执行任务
|
||||
func (cl *ChannelLink) Execute() {
|
||||
caps := selenium.Capabilities{"browserName": "chrome"}
|
||||
chromecaps := chrome.Capabilities{}
|
||||
err := chromecaps.AddExtension("/home/eson/test/ssh-key/0.1.2_0.crx")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
chromecaps.Args = append(chromecaps.Args, "--disk-cache-dir=/home/eson/test/ssh-key/cache")
|
||||
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
|
||||
caps.AddChrome(chromecaps)
|
||||
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", 3030)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", 3030))
|
||||
defer func() {
|
||||
if err := wd.Close(); err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
}()
|
||||
wd.ExecuteScript("windows.navigator.webdriver = undefined", nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
|
||||
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
cardCondition := func(wd selenium.WebDriver) (bool, error) {
|
||||
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return len(elements) > 0, nil
|
||||
}
|
||||
wd.WaitWithTimeout(cardCondition, time.Second*30)
|
||||
time.Sleep(time.Second)
|
||||
|
||||
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
e.Click()
|
||||
|
||||
for i := 0; i <= 200; i++ {
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
time.Sleep(time.Second * 3)
|
||||
}
|
||||
|
||||
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for _, ele := range elements {
|
||||
href, err := ele.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
log.Println(href) // TODO: Save href
|
||||
source := &intimate.Source{}
|
||||
source.Source = sql.NullString{String: href, Valid: true}
|
||||
source.Operator = 0
|
||||
source.Target = intimate.TTwitchChannel
|
||||
source.Url = weburl
|
||||
sstore.Insert(source)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,31 +1,10 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/tebeka/selenium"
|
||||
"github.com/tebeka/selenium/chrome"
|
||||
)
|
||||
|
||||
func TestCase1(t *testing.T) {
|
||||
caps := selenium.Capabilities{"browserName": "chrome"}
|
||||
chromecaps := chrome.Capabilities{}
|
||||
err := chromecaps.AddExtension("/home/eson/test/ssh-key/0.1.2_0.crx")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
caps.AddChrome(chromecaps)
|
||||
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", 3030)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", 3030))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
err = wd.Get("https://www.twitch.tv/directory/all")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
e := ChannelLink{}
|
||||
e.Execute()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user