1. 添加了block twitch 视频流的插件.

2. 流量减少99%
3. cpu使用率降低50%
4. 速度提高了400%
This commit is contained in:
eson
2020-07-27 19:30:54 +08:00
parent 41d3763b57
commit 1d2f2d14c5
18 changed files with 186 additions and 75 deletions

View File

@@ -70,12 +70,12 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
tp := cxt.Temporary()
content := resp.Content()
if len(content) <= 200 { // 末页退出
if len(content) <= 200 { //末页时没有内容返回, 末页退出
finishpoint := time.Now()
log.Println("任务Ranking UserId结束休眠, 下次启动时间:", finishpoint.Add(time.Minute*120))
for time.Now().Sub(finishpoint) < time.Minute*120 {
time.Sleep(time.Second)
if atomic.LoadInt32(&loop) > 0 {
if atomic.LoadInt32(&loop) <= 0 {
return
}
}
@@ -122,6 +122,7 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
}
}
// 修改url query 参数的page递增. 遍历所有页面
querys := tp.GetQuery()
page, err := strconv.Atoi(querys.Get("page"))
if err != nil {

View File

@@ -51,7 +51,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
for atomic.LoadInt32(&loop) > 0 {
streamer, err := estore.Pop(intimate.Popenrec)
streamer, err := estore.Pop(intimate.Popenrec) //队列里弹出一个streamer行. 进行解析
if streamer == nil || err != nil {
if err != lasterr {
@@ -66,7 +66,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
var updateUrl map[string]string
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) // 反序列化update_url, 里面存了需要采集的url
if err != nil {
log.Println(err)
continue
@@ -74,7 +74,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
// Check Userid
userUrl := updateUrl["user"]
tp := cxt.Session().Get(userUrl)
tp := cxt.Session().Get(userUrl) // 获取user url页面数据
resp, err := tp.Execute()
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
@@ -86,14 +86,14 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
cookies := cxt.Session().GetCookies(tp.GetParsedURL())
scurl := updateUrl["supporters"]
scurl := updateUrl["supporters"] //获取打赏者的数据
curl := gcurl.ParseRawCURL(scurl)
supportersSession := curl.CreateSession()
temporary := curl.CreateTemporary(supportersSession)
supportersSession.SetCookies(temporary.GetParsedURL(), cookies)
var supporters []string
for {
for { // supporters 数据需要登录信息. 下面为赋值 supporters链接获取的uid token random码
supportersQuery := temporary.GetQuery()
@@ -122,13 +122,13 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
log.Println(err)
}
supporterjson := gjson.ParseBytes(resp.Content())
supporterdata := supporterjson.Get("data")
supporterdata := supporterjson.Get("data") //解析supporters获取的json数据
if supporterdata.Type == gjson.Null {
break
}
supporters = append(supporters, string(resp.Content()))
page := supportersQuery.Get("page_number")
page := supportersQuery.Get("page_number") // page_number 加1
pageint, err := strconv.Atoi(page)
if err != nil {
log.Println(err)

2
tasks/twitch/twitch_task1/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
twitch_task1
log

View File

@@ -25,6 +25,7 @@ type ChannelLink struct {
func (cl *ChannelLink) Execute() {
var err error
wd := intimate.GetChromeDriver(3030)
defer wd.Close()
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)

2
tasks/twitch/twitch_task2/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
twitch_task2
log

View File

@@ -33,6 +33,7 @@ func (cl *UserList) Execute() {
//article//a[@data-a-target='preview-card-title-link']
wd := intimate.GetChromeDriver(3030)
defer wd.Close()
var loop int32 = 1
@@ -74,19 +75,20 @@ func (cl *UserList) Execute() {
var elements []selenium.WebElement
var liveurls = 0
var delayerror = 3
var delayerror = 2
for i := 0; i < 200 && atomic.LoadInt32(&loop) > 0; i++ {
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
log.Println(err)
break
}
time.Sleep(time.Millisecond * 500)
wd.KeyDown(selenium.EndKey)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Second * 2)
time.Sleep(time.Millisecond * 1500)
if len(elements) == liveurls {
if liveurls == 0 {
delayerror -= 2
delayerror -= 1
} else {
delayerror--
}
@@ -95,7 +97,7 @@ func (cl *UserList) Execute() {
break
}
} else {
delayerror = 3
delayerror = 2
}
liveurls = len(elements)
}

View File

@@ -0,0 +1,114 @@
package main
import (
"encoding/json"
"intimate"
"log"
"os"
"os/signal"
"regexp"
"sync/atomic"
"syscall"
"testing"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func TestCase(t *testing.T) {
var loop int32 = 1
wd := intimate.GetChromeDriver(3030)
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
var lasterr error = nil
// var err error
for atomic.LoadInt32(&loop) > 0 {
streamer, err := estore.Pop(intimate.Ptwitch, 0)
if streamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
lasterr = err
}
time.Sleep(time.Second * 2)
continue
}
var updateUrl map[string]string
json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
liveUrl := updateUrl["live"]
log.Println(liveUrl)
err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
if err != nil {
log.Println(err)
//estore.UpdateError(streamer, err)
continue
}
time.Sleep(time.Millisecond * 500)
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
_, err = web.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1/text()")
if err != nil {
return false, err
}
return true, err
}, 12)
label, err := wd.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1")
if err != nil {
log.Println(err)
//estore.UpdateError(streamer, err)
continue
}
log.Println(label.Text())
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
followers, err := web.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']/text()")
if err != nil {
return false, err
}
ft, err := followers.Text()
log.Println(ft)
if err != nil || ft != "" {
return false, err
}
return true, nil
}, 12)
followers, err := wd.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']")
if err != nil {
log.Println(err)
//estore.UpdateError(streamer, err)
continue
}
fstr, err := followers.Text()
if err != nil {
log.Println(err)
//estore.UpdateError(streamer, err)
continue
}
log.Println(regexp.MustCompile(`[\d,]+`).FindString(fstr))
//div[@data-a-target="about-panel"]
if views, err := wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='home-live-overlay-button']/span"); err == nil {
log.Println(views.Text())
views.Click()
}
streamer.Operator = 0
estore.UpdateOperator(streamer)
}
}