From ac1ab81676f7a2745b0b442c889325f59483804c Mon Sep 17 00:00:00 2001 From: eson Date: Tue, 28 Jul 2020 18:56:27 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90twitch=20=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E6=8F=90=E5=8F=96=E5=85=A5=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tasks/twitch/twitch_task1/task_twitch.go | 54 +++-- tasks/twitch/twitch_task2/task_twitch.go | 14 +- tasks/twitch/twitch_task3/task_twitch_test.go | 201 ++++++++++++++---- utils.go | 8 + 4 files changed, 212 insertions(+), 65 deletions(-) diff --git a/tasks/twitch/twitch_task1/task_twitch.go b/tasks/twitch/twitch_task1/task_twitch.go index bc7805d..18b852d 100644 --- a/tasks/twitch/twitch_task1/task_twitch.go +++ b/tasks/twitch/twitch_task1/task_twitch.go @@ -49,22 +49,51 @@ func (cl *ChannelLink) Execute() { } e.Click() + var hrefs map[string]bool = make(map[string]bool) + var delayerror = 5 + var samecount = 0 for i := 0; i <= 200; i++ { - wd.KeyDown(selenium.EndKey) - time.Sleep(time.Second * 2) - } - - elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]") - if err != nil { - panic(err) - } - // xpath: //article//a[@data-a-target='preview-card-title-link'] - for _, ele := range elements { - href, err := ele.GetAttribute("href") + cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]") if err != nil { log.Println(err) + break } - log.Println(href) // TODO: Save href + + if len(cards) == samecount { + delayerror-- + if delayerror <= 0 { + break + } + } else { + delayerror = 5 + } + + for _, card := range cards { + href, err := card.GetAttribute("href") + if err != nil { + log.Println(err) + } else { + hrefs[href] = true + } + } + samecount = len(cards) + + if len(cards) > 10 { + log.Println(len(cards)) + wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); + for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil) + } + time.Sleep(time.Millisecond * 200) + wd.KeyDown(selenium.EndKey) + time.Sleep(time.Millisecond * 200) + wd.KeyUp(selenium.EndKey) + time.Sleep(time.Millisecond * 2500) + + } + + for href := range hrefs { + + // TODO: Save href source := &intimate.Source{} source.Source = sql.NullString{String: href, Valid: true} source.Operator = 0 @@ -73,5 +102,6 @@ func (cl *ChannelLink) Execute() { sstore.Insert(source) } + log.Println("hrefs len:", len(hrefs)) sstore.Deduplicate(intimate.TTwitchChannel, "source") } diff --git a/tasks/twitch/twitch_task2/task_twitch.go b/tasks/twitch/twitch_task2/task_twitch.go index 7b721a5..c64ef1d 100644 --- a/tasks/twitch/twitch_task2/task_twitch.go +++ b/tasks/twitch/twitch_task2/task_twitch.go @@ -36,6 +36,7 @@ func (cl *UserList) Execute() { defer wd.Close() var loop int32 = 1 + var count = 0 go func() { signalchan := make(chan os.Signal) @@ -87,12 +88,7 @@ func (cl *UserList) Execute() { wd.KeyUp(selenium.EndKey) time.Sleep(time.Millisecond * 1500) if len(elements) == liveurls { - if liveurls == 0 { - delayerror -= 1 - } else { - delayerror-- - } - + delayerror-- if delayerror <= 0 { break } @@ -140,5 +136,11 @@ func (cl *UserList) Execute() { estore.InsertStreamer(streamer) } log.Println("streamer insert", len(elements)) + count++ + if count >= 100 { + wd.Close() + wd = intimate.GetChromeDriver(3030) + count = 0 + } } } diff --git a/tasks/twitch/twitch_task3/task_twitch_test.go b/tasks/twitch/twitch_task3/task_twitch_test.go index 2c6c19a..db7b37b 100644 --- a/tasks/twitch/twitch_task3/task_twitch_test.go +++ b/tasks/twitch/twitch_task3/task_twitch_test.go @@ -1,6 +1,7 @@ package main import ( + "database/sql" "encoding/json" "intimate" "log" @@ -51,64 +52,170 @@ func TestCase(t *testing.T) { liveUrl := updateUrl["live"] log.Println(liveUrl) - err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about") + // err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about") + err = wd.Get(liveUrl + "/about") if err != nil { log.Println(err) //estore.UpdateError(streamer, err) continue } + streamer.LiveUrl = sql.NullString{String: liveUrl, Valid: true} + clog := &intimate.CollectLog{} + clog.UserId = streamer.UserId + time.Sleep(time.Millisecond * 500) - wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { - _, err = web.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1/text()") - if err != nil { - return false, err - } - return true, err - }, 12) + extractUserName(wd, streamer) + extractFollowers(wd, clog) + err = extractViews(wd, clog) // views + tags + gratuity - label, err := wd.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1") if err != nil { - log.Println(err) - //estore.UpdateError(streamer, err) - continue - } - log.Println(label.Text()) + // 不直播时提取礼物 gratuity + wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { + channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`) + btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`) + if (err == nil && channelchat != nil) || btn != nil { + if channelchat != nil { + channelchat.Click() + } + time.Sleep(time.Second) + extractGratuity(wd, clog) + return true, nil + } + return false, nil - wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { - followers, err := web.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']/text()") - if err != nil { - return false, err - } - ft, err := followers.Text() - log.Println(ft) - if err != nil || ft != "" { - return false, err - } - return true, nil - }, 12) - - followers, err := wd.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']") - if err != nil { - log.Println(err) - //estore.UpdateError(streamer, err) - continue - } - fstr, err := followers.Text() - if err != nil { - log.Println(err) - //estore.UpdateError(streamer, err) - continue - } - log.Println(regexp.MustCompile(`[\d,]+`).FindString(fstr)) - //div[@data-a-target="about-panel"] - - if views, err := wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='home-live-overlay-button']/span"); err == nil { - log.Println(views.Text()) - views.Click() + }, time.Second*4) } - streamer.Operator = 0 - estore.UpdateOperator(streamer) + streamer.Platform = intimate.Ptwitch + clog.Platform = string(streamer.Platform) + clog.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true} + lastClogId := estore.InsertCollectLog(clog) + + streamer.Operator = 100 + streamer.LatestLogUid = lastClogId + estore.UpdateStreamer(streamer) + } } + +func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error { + return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { + label, err := web.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1") + if err == nil { + if ltxt, err := label.Text(); err == nil { + log.Println("label:", ltxt) + streamer.UserName = sql.NullString{String: ltxt, Valid: true} + return true, nil + } + } + return false, err + }, 6*time.Second) +} + +func extractFollowers(wd selenium.WebDriver, clog *intimate.CollectLog) error { + return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { + efollowers, err := web.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']") + if err != nil { + return false, err + } + followers, err := efollowers.Text() + if err != nil || followers == "" { + return false, err + } + followers = regexp.MustCompile(`[\d,]+`).FindString(followers) + fint, _ := intimate.ParseNumber(followers) + clog.Followers = sql.NullInt64{Int64: int64(fint), Valid: true} + log.Println("followers: ", followers, fint) + return true, nil + }, 6*time.Second) +} + +func extractViews(wd selenium.WebDriver, clog *intimate.CollectLog) error { + return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { + views, err := web.FindElement(selenium.ByXPATH, "//a[@data-a-target='home-live-overlay-button']/span") + if views != nil { + if txt, err := views.Text(); err == nil { + + vint, _ := intimate.ParseNumber(txt) + clog.Views = sql.NullInt64{Int64: vint, Valid: true} + log.Println("views:", txt) + views.Click() + + extractTags(wd, clog) + extractTitle(wd, clog) + extractGratuity(wd, clog) + + return true, nil + } + } + return false, err + }, time.Second*4) +} + +func extractTitle(wd selenium.WebDriver, clog *intimate.CollectLog) error { + return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { + title, err := web.FindElement(selenium.ByXPATH, `//h2[@data-a-target='stream-title']`) + if err == nil { + if txt, err := title.Text(); err == nil { + clog.LiveTitle = sql.NullString{String: txt, Valid: true} + return true, nil + } + } + return false, err + }, time.Second*4) +} + +func extractTags(wd selenium.WebDriver, clog *intimate.CollectLog) error { + return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { + tags, err := web.FindElements(selenium.ByXPATH, "//a[@aria-label and @data-a-target and @href]/div[@class and text()]") + if len(tags) == 0 { + return false, err + } + + var stags []string + for _, tag := range tags { + if txt, err := tag.Text(); err == nil { + stags = append(stags, txt) + } else { + log.Println(err) + } + log.Println(tag.Text()) + } + if len(stags) > 0 { + if tagbuf, err := json.Marshal(stags); err == nil { + clog.Tags = tagbuf + } else { + log.Println(err) + } + } + + return true, nil + }, time.Second*4) +} + +func extractGratuity(wd selenium.WebDriver, clog *intimate.CollectLog) error { + return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { + btn, err := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`) + if err == nil { + btn.Click() + time.Sleep(time.Second) + gifcount, err := web.FindElements(selenium.ByXPATH, `//div[@class="sub-gift-count tw-flex"]/p`) + if err == nil { + var gratuity int64 = 0 + for _, gc := range gifcount { + if gtxt, err := gc.Text(); err == nil { + gint, _ := intimate.ParseNumber(gtxt) + gratuity += gint + } else { + log.Println(err) + } + } + clog.Gratuity = sql.NullInt64{Int64: gratuity, Valid: true} + } + return true, nil + } + + return false, err + }, time.Second*4) +} diff --git a/utils.go b/utils.go index 899b68d..39f1d19 100644 --- a/utils.go +++ b/utils.go @@ -4,6 +4,8 @@ import ( "fmt" "log" "runtime" + "strconv" + "strings" "time" "github.com/tebeka/selenium" @@ -22,6 +24,12 @@ func init() { } +// ParseNumber 去逗号解析数字 +func ParseNumber(number string) (int64, error) { + number = strings.ReplaceAll(number, ",", "") + return strconv.ParseInt(number, 10, 64) +} + // ParseDuration time to duration eg: 1:40:00 -> time.Duration func ParseDuration(dt string) (time.Duration, error) {