1. 添加了block twitch 视频流的插件.
2. 流量减少99% 3. cpu使用率降低50% 4. 速度提高了400%
This commit is contained in:
@@ -70,12 +70,12 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
|
||||
tp := cxt.Temporary()
|
||||
|
||||
content := resp.Content()
|
||||
if len(content) <= 200 { // 末页退出
|
||||
if len(content) <= 200 { //末页时没有内容返回, 末页退出
|
||||
finishpoint := time.Now()
|
||||
log.Println("任务Ranking UserId结束休眠, 下次启动时间:", finishpoint.Add(time.Minute*120))
|
||||
for time.Now().Sub(finishpoint) < time.Minute*120 {
|
||||
time.Sleep(time.Second)
|
||||
if atomic.LoadInt32(&loop) > 0 {
|
||||
if atomic.LoadInt32(&loop) <= 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -122,6 +122,7 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
|
||||
}
|
||||
}
|
||||
|
||||
// 修改url query 参数的page递增. 遍历所有页面
|
||||
querys := tp.GetQuery()
|
||||
page, err := strconv.Atoi(querys.Get("page"))
|
||||
if err != nil {
|
||||
|
||||
@@ -51,7 +51,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
|
||||
|
||||
for atomic.LoadInt32(&loop) > 0 {
|
||||
|
||||
streamer, err := estore.Pop(intimate.Popenrec)
|
||||
streamer, err := estore.Pop(intimate.Popenrec) //队列里弹出一个streamer行. 进行解析
|
||||
|
||||
if streamer == nil || err != nil {
|
||||
if err != lasterr {
|
||||
@@ -66,7 +66,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
|
||||
|
||||
var updateUrl map[string]string
|
||||
|
||||
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
|
||||
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) // 反序列化update_url, 里面存了需要采集的url
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
@@ -74,7 +74,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
|
||||
// Check Userid
|
||||
|
||||
userUrl := updateUrl["user"]
|
||||
tp := cxt.Session().Get(userUrl)
|
||||
tp := cxt.Session().Get(userUrl) // 获取user url页面数据
|
||||
resp, err := tp.Execute()
|
||||
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
|
||||
|
||||
@@ -86,14 +86,14 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
|
||||
|
||||
cookies := cxt.Session().GetCookies(tp.GetParsedURL())
|
||||
|
||||
scurl := updateUrl["supporters"]
|
||||
scurl := updateUrl["supporters"] //获取打赏者的数据
|
||||
curl := gcurl.ParseRawCURL(scurl)
|
||||
supportersSession := curl.CreateSession()
|
||||
|
||||
temporary := curl.CreateTemporary(supportersSession)
|
||||
supportersSession.SetCookies(temporary.GetParsedURL(), cookies)
|
||||
var supporters []string
|
||||
for {
|
||||
for { // supporters 数据需要登录信息. 下面为赋值 supporters链接获取的uid token random码
|
||||
|
||||
supportersQuery := temporary.GetQuery()
|
||||
|
||||
@@ -122,13 +122,13 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
|
||||
log.Println(err)
|
||||
}
|
||||
supporterjson := gjson.ParseBytes(resp.Content())
|
||||
supporterdata := supporterjson.Get("data")
|
||||
supporterdata := supporterjson.Get("data") //解析supporters获取的json数据
|
||||
if supporterdata.Type == gjson.Null {
|
||||
break
|
||||
}
|
||||
supporters = append(supporters, string(resp.Content()))
|
||||
|
||||
page := supportersQuery.Get("page_number")
|
||||
page := supportersQuery.Get("page_number") // page_number 加1
|
||||
pageint, err := strconv.Atoi(page)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
|
||||
2
tasks/twitch/twitch_task1/.gitignore
vendored
Normal file
2
tasks/twitch/twitch_task1/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
twitch_task1
|
||||
log
|
||||
@@ -25,6 +25,7 @@ type ChannelLink struct {
|
||||
func (cl *ChannelLink) Execute() {
|
||||
var err error
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
defer wd.Close()
|
||||
|
||||
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
|
||||
2
tasks/twitch/twitch_task2/.gitignore
vendored
Normal file
2
tasks/twitch/twitch_task2/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
twitch_task2
|
||||
log
|
||||
@@ -33,6 +33,7 @@ func (cl *UserList) Execute() {
|
||||
//article//a[@data-a-target='preview-card-title-link']
|
||||
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
defer wd.Close()
|
||||
|
||||
var loop int32 = 1
|
||||
|
||||
@@ -74,19 +75,20 @@ func (cl *UserList) Execute() {
|
||||
|
||||
var elements []selenium.WebElement
|
||||
var liveurls = 0
|
||||
var delayerror = 3
|
||||
var delayerror = 2
|
||||
for i := 0; i < 200 && atomic.LoadInt32(&loop) > 0; i++ {
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Millisecond * 500)
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
wd.KeyUp(selenium.EndKey)
|
||||
time.Sleep(time.Second * 2)
|
||||
time.Sleep(time.Millisecond * 1500)
|
||||
if len(elements) == liveurls {
|
||||
if liveurls == 0 {
|
||||
delayerror -= 2
|
||||
delayerror -= 1
|
||||
} else {
|
||||
delayerror--
|
||||
}
|
||||
@@ -95,7 +97,7 @@ func (cl *UserList) Execute() {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
delayerror = 3
|
||||
delayerror = 2
|
||||
}
|
||||
liveurls = len(elements)
|
||||
}
|
||||
|
||||
114
tasks/twitch/twitch_task3/task_twitch_test.go
Normal file
114
tasks/twitch/twitch_task3/task_twitch_test.go
Normal file
@@ -0,0 +1,114 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"intimate"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"regexp"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/tebeka/selenium"
|
||||
)
|
||||
|
||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
|
||||
|
||||
// estore 解析存储连接实例
|
||||
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
|
||||
func TestCase(t *testing.T) {
|
||||
var loop int32 = 1
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
|
||||
go func() {
|
||||
signalchan := make(chan os.Signal)
|
||||
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
|
||||
log.Println("accept stop command:", <-signalchan)
|
||||
atomic.StoreInt32(&loop, 0)
|
||||
}()
|
||||
|
||||
var lasterr error = nil
|
||||
// var err error
|
||||
|
||||
for atomic.LoadInt32(&loop) > 0 {
|
||||
streamer, err := estore.Pop(intimate.Ptwitch, 0)
|
||||
if streamer == nil || err != nil {
|
||||
if err != lasterr {
|
||||
log.Println(err, lasterr)
|
||||
lasterr = err
|
||||
}
|
||||
time.Sleep(time.Second * 2)
|
||||
continue
|
||||
}
|
||||
|
||||
var updateUrl map[string]string
|
||||
json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
|
||||
liveUrl := updateUrl["live"]
|
||||
log.Println(liveUrl)
|
||||
|
||||
err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
//estore.UpdateError(streamer, err)
|
||||
continue
|
||||
}
|
||||
|
||||
time.Sleep(time.Millisecond * 500)
|
||||
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
|
||||
_, err = web.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1/text()")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, err
|
||||
}, 12)
|
||||
|
||||
label, err := wd.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
//estore.UpdateError(streamer, err)
|
||||
continue
|
||||
}
|
||||
log.Println(label.Text())
|
||||
|
||||
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
|
||||
followers, err := web.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']/text()")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
ft, err := followers.Text()
|
||||
log.Println(ft)
|
||||
if err != nil || ft != "" {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}, 12)
|
||||
|
||||
followers, err := wd.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
//estore.UpdateError(streamer, err)
|
||||
continue
|
||||
}
|
||||
fstr, err := followers.Text()
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
//estore.UpdateError(streamer, err)
|
||||
continue
|
||||
}
|
||||
log.Println(regexp.MustCompile(`[\d,]+`).FindString(fstr))
|
||||
//div[@data-a-target="about-panel"]
|
||||
|
||||
if views, err := wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='home-live-overlay-button']/span"); err == nil {
|
||||
log.Println(views.Text())
|
||||
views.Click()
|
||||
}
|
||||
|
||||
streamer.Operator = 0
|
||||
estore.UpdateOperator(streamer)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user