完成twitch的期望功能
This commit is contained in:
@@ -25,7 +25,7 @@ type ChannelLink struct {
|
||||
func (cl *ChannelLink) Execute() {
|
||||
var err error
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
defer wd.Close()
|
||||
ps := intimate.NewPerfectShutdown()
|
||||
|
||||
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
@@ -40,7 +40,7 @@ func (cl *ChannelLink) Execute() {
|
||||
}
|
||||
return len(elements) > 0, nil
|
||||
}
|
||||
wd.WaitWithTimeout(cardCondition, time.Second*30)
|
||||
wd.WaitWithTimeout(cardCondition, time.Second*15)
|
||||
time.Sleep(time.Second)
|
||||
|
||||
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
@@ -68,15 +68,22 @@ func (cl *ChannelLink) Execute() {
|
||||
delayerror = 5
|
||||
}
|
||||
|
||||
for _, card := range cards {
|
||||
href, err := card.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
} else {
|
||||
hrefs[href] = true
|
||||
for ii := 0; ii < 10; ii++ {
|
||||
for _, card := range cards {
|
||||
href, err := card.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(href, err)
|
||||
continue
|
||||
} else {
|
||||
hrefs[href] = true
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
samecount = len(cards)
|
||||
if ps.IsClose() {
|
||||
break
|
||||
}
|
||||
|
||||
if len(cards) > 10 {
|
||||
log.Println(len(cards))
|
||||
@@ -88,7 +95,6 @@ func (cl *ChannelLink) Execute() {
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyUp(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 2500)
|
||||
|
||||
}
|
||||
|
||||
for href := range hrefs {
|
||||
|
||||
@@ -5,11 +5,7 @@ import (
|
||||
"encoding/json"
|
||||
"intimate"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"regexp"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/tebeka/selenium"
|
||||
@@ -33,19 +29,18 @@ func (cl *UserList) Execute() {
|
||||
//article//a[@data-a-target='preview-card-title-link']
|
||||
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
defer wd.Close()
|
||||
ps := intimate.NewPerfectShutdown()
|
||||
counter := intimate.NewCounter()
|
||||
counter.SetMaxLimit(100)
|
||||
counter.SetMaxToDo(func(olist ...interface{}) error {
|
||||
owd := olist[0].(*selenium.WebDriver)
|
||||
(*owd).Close()
|
||||
(*owd).Quit()
|
||||
*owd = intimate.GetChromeDriver(3030)
|
||||
return nil
|
||||
}, &wd)
|
||||
|
||||
var loop int32 = 1
|
||||
var count = 0
|
||||
|
||||
go func() {
|
||||
signalchan := make(chan os.Signal)
|
||||
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
|
||||
log.Println("accept stop command:", <-signalchan)
|
||||
atomic.StoreInt32(&loop, 0)
|
||||
}()
|
||||
|
||||
for atomic.LoadInt32(&loop) > 0 {
|
||||
for !ps.IsClose() {
|
||||
|
||||
var err error
|
||||
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
|
||||
@@ -56,7 +51,10 @@ func (cl *UserList) Execute() {
|
||||
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
log.Println(err)
|
||||
sstore.UpdateError(sourceChannel, err)
|
||||
time.Sleep(time.Second * 10)
|
||||
continue
|
||||
}
|
||||
|
||||
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
|
||||
@@ -77,16 +75,17 @@ func (cl *UserList) Execute() {
|
||||
var elements []selenium.WebElement
|
||||
var liveurls = 0
|
||||
var delayerror = 2
|
||||
for i := 0; i < 200 && atomic.LoadInt32(&loop) > 0; i++ {
|
||||
for i := 0; i < 200 && !ps.IsClose(); i++ {
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Millisecond * 500)
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyUp(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 1500)
|
||||
time.Sleep(time.Millisecond * 2000)
|
||||
if len(elements) == liveurls {
|
||||
delayerror--
|
||||
if delayerror <= 0 {
|
||||
@@ -97,34 +96,62 @@ func (cl *UserList) Execute() {
|
||||
}
|
||||
liveurls = len(elements)
|
||||
}
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "//article//a[@data-a-target='preview-card-title-link' and @href]")
|
||||
articles, err := wd.FindElements(selenium.ByXPATH, "//article")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, e := range elements {
|
||||
for _, article := range articles {
|
||||
|
||||
attr, err := e.GetAttribute("href")
|
||||
e, err := article.FindElement(selenium.ByXPATH, ".//a[@data-a-target='preview-card-title-link' and @href]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
href, err := e.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
btns, err := article.FindElements(selenium.ByXPATH, ".//div[@class='tw-full-width tw-inline-block']//button")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
var tags []string
|
||||
for _, btn := range btns {
|
||||
tag, err := btn.GetAttribute("data-a-target")
|
||||
if err == nil {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
}
|
||||
|
||||
streamer := &intimate.Streamer{}
|
||||
|
||||
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(attr)
|
||||
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(href)
|
||||
if len(matches) == 2 {
|
||||
streamer.UserId = matches[1]
|
||||
} else {
|
||||
log.Println(attr)
|
||||
log.Println(href)
|
||||
continue
|
||||
}
|
||||
|
||||
jtags, err := json.Marshal(tags)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
} else {
|
||||
streamer.Tags = jtags
|
||||
}
|
||||
|
||||
streamer.Platform = intimate.Ptwitch
|
||||
|
||||
updateUrl := make(map[string]string)
|
||||
updateUrl["live"] = attr
|
||||
streamer.LiveUrl = sql.NullString{String: attr, Valid: true}
|
||||
updateUrl["live"] = href
|
||||
streamer.LiveUrl = sql.NullString{String: href, Valid: true}
|
||||
data, err := json.Marshal(updateUrl)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
@@ -132,15 +159,19 @@ func (cl *UserList) Execute() {
|
||||
}
|
||||
streamer.UpdateUrl = data
|
||||
streamer.Operator = 0
|
||||
|
||||
estore.InsertStreamer(streamer)
|
||||
if estore.InsertStreamer(streamer) {
|
||||
// log.Println("streamer update tags", streamer.Uid, tags)
|
||||
estore.Update(streamer, "Tags", streamer.Tags)
|
||||
}
|
||||
}
|
||||
log.Println("streamer insert", len(elements))
|
||||
count++
|
||||
if count >= 100 {
|
||||
wd.Close()
|
||||
wd = intimate.GetChromeDriver(3030)
|
||||
count = 0
|
||||
log.Println("streamer find", len(articles))
|
||||
if len(articles) == 0 {
|
||||
sourceChannel.Operator = 5
|
||||
sstore.UpdateOperator(sourceChannel)
|
||||
}
|
||||
counter.AddWithReset(1)
|
||||
}
|
||||
|
||||
wd.Close()
|
||||
wd.Quit()
|
||||
}
|
||||
|
||||
@@ -1,221 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"intimate"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"regexp"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/tebeka/selenium"
|
||||
)
|
||||
|
||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
|
||||
|
||||
// estore 解析存储连接实例
|
||||
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
|
||||
func TestCase(t *testing.T) {
|
||||
var loop int32 = 1
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
|
||||
go func() {
|
||||
signalchan := make(chan os.Signal)
|
||||
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
|
||||
log.Println("accept stop command:", <-signalchan)
|
||||
atomic.StoreInt32(&loop, 0)
|
||||
}()
|
||||
|
||||
var lasterr error = nil
|
||||
// var err error
|
||||
|
||||
for atomic.LoadInt32(&loop) > 0 {
|
||||
streamer, err := estore.Pop(intimate.Ptwitch, 0)
|
||||
if streamer == nil || err != nil {
|
||||
if err != lasterr {
|
||||
log.Println(err, lasterr)
|
||||
lasterr = err
|
||||
}
|
||||
time.Sleep(time.Second * 2)
|
||||
continue
|
||||
}
|
||||
|
||||
var updateUrl map[string]string
|
||||
json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
|
||||
liveUrl := updateUrl["live"]
|
||||
log.Println(liveUrl)
|
||||
|
||||
// err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
|
||||
err = wd.Get(liveUrl + "/about")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
//estore.UpdateError(streamer, err)
|
||||
continue
|
||||
}
|
||||
|
||||
streamer.LiveUrl = sql.NullString{String: liveUrl, Valid: true}
|
||||
clog := &intimate.CollectLog{}
|
||||
clog.UserId = streamer.UserId
|
||||
|
||||
time.Sleep(time.Millisecond * 500)
|
||||
extractUserName(wd, streamer)
|
||||
extractFollowers(wd, clog)
|
||||
err = extractViews(wd, clog) // views + tags + gratuity
|
||||
|
||||
if err != nil {
|
||||
// 不直播时提取礼物 gratuity
|
||||
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
|
||||
channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`)
|
||||
btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
|
||||
if (err == nil && channelchat != nil) || btn != nil {
|
||||
if channelchat != nil {
|
||||
channelchat.Click()
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
extractGratuity(wd, clog)
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
|
||||
}, time.Second*4)
|
||||
}
|
||||
|
||||
streamer.Platform = intimate.Ptwitch
|
||||
clog.Platform = string(streamer.Platform)
|
||||
clog.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
|
||||
lastClogId := estore.InsertCollectLog(clog)
|
||||
|
||||
streamer.Operator = 100
|
||||
streamer.LatestLogUid = lastClogId
|
||||
estore.UpdateStreamer(streamer)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error {
|
||||
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
|
||||
label, err := web.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1")
|
||||
if err == nil {
|
||||
if ltxt, err := label.Text(); err == nil {
|
||||
log.Println("label:", ltxt)
|
||||
streamer.UserName = sql.NullString{String: ltxt, Valid: true}
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, err
|
||||
}, 6*time.Second)
|
||||
}
|
||||
|
||||
func extractFollowers(wd selenium.WebDriver, clog *intimate.CollectLog) error {
|
||||
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
|
||||
efollowers, err := web.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
followers, err := efollowers.Text()
|
||||
if err != nil || followers == "" {
|
||||
return false, err
|
||||
}
|
||||
followers = regexp.MustCompile(`[\d,]+`).FindString(followers)
|
||||
fint, _ := intimate.ParseNumber(followers)
|
||||
clog.Followers = sql.NullInt64{Int64: int64(fint), Valid: true}
|
||||
log.Println("followers: ", followers, fint)
|
||||
return true, nil
|
||||
}, 6*time.Second)
|
||||
}
|
||||
|
||||
func extractViews(wd selenium.WebDriver, clog *intimate.CollectLog) error {
|
||||
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
|
||||
views, err := web.FindElement(selenium.ByXPATH, "//a[@data-a-target='home-live-overlay-button']/span")
|
||||
if views != nil {
|
||||
if txt, err := views.Text(); err == nil {
|
||||
|
||||
vint, _ := intimate.ParseNumber(txt)
|
||||
clog.Views = sql.NullInt64{Int64: vint, Valid: true}
|
||||
log.Println("views:", txt)
|
||||
views.Click()
|
||||
|
||||
extractTags(wd, clog)
|
||||
extractTitle(wd, clog)
|
||||
extractGratuity(wd, clog)
|
||||
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, err
|
||||
}, time.Second*4)
|
||||
}
|
||||
|
||||
func extractTitle(wd selenium.WebDriver, clog *intimate.CollectLog) error {
|
||||
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
|
||||
title, err := web.FindElement(selenium.ByXPATH, `//h2[@data-a-target='stream-title']`)
|
||||
if err == nil {
|
||||
if txt, err := title.Text(); err == nil {
|
||||
clog.LiveTitle = sql.NullString{String: txt, Valid: true}
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, err
|
||||
}, time.Second*4)
|
||||
}
|
||||
|
||||
func extractTags(wd selenium.WebDriver, clog *intimate.CollectLog) error {
|
||||
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
|
||||
tags, err := web.FindElements(selenium.ByXPATH, "//a[@aria-label and @data-a-target and @href]/div[@class and text()]")
|
||||
if len(tags) == 0 {
|
||||
return false, err
|
||||
}
|
||||
|
||||
var stags []string
|
||||
for _, tag := range tags {
|
||||
if txt, err := tag.Text(); err == nil {
|
||||
stags = append(stags, txt)
|
||||
} else {
|
||||
log.Println(err)
|
||||
}
|
||||
log.Println(tag.Text())
|
||||
}
|
||||
if len(stags) > 0 {
|
||||
if tagbuf, err := json.Marshal(stags); err == nil {
|
||||
clog.Tags = tagbuf
|
||||
} else {
|
||||
log.Println(err)
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}, time.Second*4)
|
||||
}
|
||||
|
||||
func extractGratuity(wd selenium.WebDriver, clog *intimate.CollectLog) error {
|
||||
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
|
||||
btn, err := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
|
||||
if err == nil {
|
||||
btn.Click()
|
||||
time.Sleep(time.Second)
|
||||
gifcount, err := web.FindElements(selenium.ByXPATH, `//div[@class="sub-gift-count tw-flex"]/p`)
|
||||
if err == nil {
|
||||
var gratuity int64 = 0
|
||||
for _, gc := range gifcount {
|
||||
if gtxt, err := gc.Text(); err == nil {
|
||||
gint, _ := intimate.ParseNumber(gtxt)
|
||||
gratuity += gint
|
||||
} else {
|
||||
log.Println(err)
|
||||
}
|
||||
}
|
||||
clog.Gratuity = sql.NullInt64{Int64: gratuity, Valid: true}
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, err
|
||||
}, time.Second*4)
|
||||
}
|
||||
Reference in New Issue
Block a user