TODO: twitch get all userid list

This commit is contained in:
eson
2020-07-24 18:48:33 +08:00
parent cbdedb6795
commit ea650f91dc
10 changed files with 189 additions and 27 deletions

View File

@@ -0,0 +1,6 @@
package main
func main() {
e := ChannelLink{}
e.Execute()
}

View File

@@ -2,13 +2,11 @@ package main
import (
"database/sql"
"fmt"
"intimate"
"log"
"time"
"github.com/tebeka/selenium"
"github.com/tebeka/selenium/chrome"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
@@ -25,32 +23,10 @@ type ChannelLink struct {
// Execute 执行任务
func (cl *ChannelLink) Execute() {
caps := selenium.Capabilities{"browserName": "chrome"}
chromecaps := chrome.Capabilities{}
err := chromecaps.AddExtension("/home/eson/test/ssh-key/0.1.2_0.crx")
if err != nil {
panic(err)
}
chromecaps.Args = append(chromecaps.Args, "--disk-cache-dir=/home/eson/test/ssh-key/cache")
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
caps.AddChrome(chromecaps)
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", 3030)
if err != nil {
panic(err)
}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", 3030))
defer func() {
if err := wd.Close(); err != nil {
log.Println(err)
}
}()
wd.ExecuteScript("windows.navigator.webdriver = undefined", nil)
if err != nil {
panic(err)
}
var err error
wd := intimate.GetChromeDriver(3030)
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
panic(err)
@@ -81,6 +57,7 @@ func (cl *ChannelLink) Execute() {
if err != nil {
panic(err)
}
// xpath: //article//a[@data-a-target='preview-card-title-link']
for _, ele := range elements {
href, err := ele.GetAttribute("href")
if err != nil {
@@ -94,4 +71,6 @@ func (cl *ChannelLink) Execute() {
source.Url = weburl
sstore.Insert(source)
}
sstore.Deduplicate(intimate.TTwitchChannel, "source")
}

View File

@@ -8,3 +8,7 @@ func TestCase1(t *testing.T) {
e := ChannelLink{}
e.Execute()
}
func TestLiveUrl(t *testing.T) {
}

View File

@@ -0,0 +1,6 @@
package main
func main() {
ul := UserList{}
ul.Execute()
}

View File

@@ -0,0 +1,114 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"regexp"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// UserList 频道链接
type UserList struct {
}
// Execute 执行任务
func (cl *UserList) Execute() {
// DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ;
//article//a[@data-a-target='preview-card-title-link']
var err error
wd := intimate.GetChromeDriver(3030)
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
if err != nil {
panic(err)
}
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
panic(err)
}
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
return false, err
}
return true, nil
}, time.Second*10)
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
panic(err)
}
btn.Click()
var elements []selenium.WebElement
var liveurls = 0
var delayerror = 3
for i := 0; i < 2; i++ {
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
panic(err)
}
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Second * 2)
if len(elements) == liveurls {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 3
}
}
elements, err = wd.FindElements(selenium.ByXPATH, "//article//a[@data-a-target='preview-card-title-link' and @href]")
if err != nil {
panic(err)
}
for _, e := range elements {
attr, err := e.GetAttribute("href")
if err != nil {
log.Println(err)
continue
}
streamer := &intimate.Streamer{}
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(attr)
if len(matches) == 2 {
streamer.UserId = matches[1]
} else {
log.Println(attr)
continue
}
streamer.Platform = intimate.Ptwitch
updateUrl := make(map[string]string)
updateUrl["live"] = attr
streamer.LiveUrl = sql.NullString{String: attr, Valid: true}
data, err := json.Marshal(updateUrl)
if err != nil {
log.Println(err)
continue
}
streamer.UpdateUrl = data
streamer.Operator = 0
estore.InsertStreamer(streamer)
}
}

View File

@@ -0,0 +1,7 @@
package main
import "testing"
func TestMain(t *testing.T) {
main()
}