TODO: twitch_task2 fix 错误
This commit is contained in:
@@ -70,13 +70,13 @@ func Execute() {
|
||||
|
||||
if userid := room.Get("id").String(); userid != "" {
|
||||
|
||||
streamer.UserId = userid
|
||||
streamer.LiveUrl = sql.NullString{String: "https://www.nimo.tv/live/" + userid, Valid: true}
|
||||
streamer.UserId = &userid
|
||||
streamer.LiveUrl = &sql.NullString{String: "https://www.nimo.tv/live/" + userid, Valid: true}
|
||||
|
||||
channel := room.Get("roomTypeName").String()
|
||||
streamer.Channel = sql.NullString{String: channel, Valid: channel != ""}
|
||||
streamer.Channel = &sql.NullString{String: channel, Valid: channel != ""}
|
||||
username := room.Get("anchorName").String()
|
||||
streamer.UserName = sql.NullString{String: username, Valid: username != ""}
|
||||
streamer.UserName = &sql.NullString{String: username, Valid: username != ""}
|
||||
|
||||
if rtags := room.Get("anchorLabels"); rtags.IsArray() {
|
||||
|
||||
|
||||
@@ -11,11 +11,11 @@ import (
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
|
||||
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
|
||||
|
||||
// estore 解析存储连接实例
|
||||
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
// // estore 解析存储连接实例
|
||||
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
|
||||
// Execute 执行方法
|
||||
func Execute() {
|
||||
@@ -71,7 +71,7 @@ func Execute() {
|
||||
|
||||
userid := User.Get("channel.id").String()
|
||||
streamer := &intimate.Streamer{}
|
||||
streamer.UserId = userid
|
||||
streamer.UserId = &userid
|
||||
streamer.Platform = intimate.Popenrec
|
||||
|
||||
updateUrl := make(map[string]interface{})
|
||||
@@ -83,15 +83,16 @@ func Execute() {
|
||||
|
||||
updateUrlBytes, err := json.Marshal(updateUrl)
|
||||
if err != nil {
|
||||
estore.UpdateError(streamer, err)
|
||||
intimate.TStreamer.UpdateError(streamer, err)
|
||||
continue
|
||||
}
|
||||
|
||||
streamer.UpdateUrl = updateUrlBytes
|
||||
estore.InsertStreamer(streamer)
|
||||
intimate.TStreamer.Insert(streamer)
|
||||
}
|
||||
}
|
||||
|
||||
log.Println("streamer count:", len(result.Array()), tp.ParsedURL.String())
|
||||
// 修改url query 参数的page递增. 遍历所有页面
|
||||
tp.QueryParam("page").IntAdd(1)
|
||||
time.Sleep(time.Second * 1)
|
||||
|
||||
2
tasks/openrec/openrec_task2/.gitignore
vendored
2
tasks/openrec/openrec_task2/.gitignore
vendored
@@ -1,2 +0,0 @@
|
||||
openrec_task2
|
||||
log
|
||||
@@ -1,5 +0,0 @@
|
||||
package main
|
||||
|
||||
func main() {
|
||||
Execute()
|
||||
}
|
||||
@@ -1,154 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"intimate"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/474420502/gcurl"
|
||||
"github.com/474420502/requests"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
|
||||
|
||||
// estore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_extractor.sql
|
||||
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
|
||||
func init() {
|
||||
|
||||
}
|
||||
|
||||
// Execute 执行方法
|
||||
func Execute() {
|
||||
|
||||
ps := intimate.NewPerfectShutdown()
|
||||
ses := requests.NewSession()
|
||||
|
||||
var lasterr error = nil
|
||||
|
||||
for !ps.IsClose() {
|
||||
|
||||
streamer, err := estore.Pop(intimate.Popenrec) //队列里弹出一个streamer行. 进行解析
|
||||
|
||||
if streamer == nil || err != nil {
|
||||
if err != lasterr {
|
||||
log.Println(err, lasterr)
|
||||
lasterr = err
|
||||
}
|
||||
time.Sleep(time.Second * 2)
|
||||
continue
|
||||
}
|
||||
|
||||
userId := streamer.UserId
|
||||
|
||||
var updateUrl map[string]string
|
||||
|
||||
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) // 反序列化update_url, 里面存了需要采集的url
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
// Check Userid
|
||||
|
||||
userUrl := updateUrl["user"]
|
||||
log.Println(userUrl)
|
||||
tp := ses.Get(userUrl) // 获取user url页面数据
|
||||
resp, err := tp.Execute()
|
||||
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
|
||||
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
estore.UpdateError(streamer, err)
|
||||
continue
|
||||
}
|
||||
|
||||
cookies := ses.GetCookies(tp.GetParsedURL())
|
||||
|
||||
scurl := updateUrl["supporters"] //获取打赏者的数据
|
||||
curl := gcurl.Parse(scurl)
|
||||
supportersSession := curl.CreateSession()
|
||||
|
||||
temporary := curl.CreateTemporary(supportersSession)
|
||||
supportersSession.SetCookies(temporary.GetParsedURL(), cookies)
|
||||
var supporters []string
|
||||
for { // supporters 数据需要登录信息. 下面为赋值 supporters链接获取的uid token random码
|
||||
|
||||
supportersQuery := temporary.GetQuery()
|
||||
|
||||
for _, cookie := range cookies {
|
||||
if cookie.Name == "uuid" {
|
||||
supportersQuery.Set("Uuid", cookie.Value)
|
||||
continue
|
||||
}
|
||||
|
||||
if cookie.Name == "token" {
|
||||
supportersQuery.Set("Token", cookie.Value)
|
||||
continue
|
||||
}
|
||||
|
||||
if cookie.Name == "random" {
|
||||
supportersQuery.Set("Random", cookie.Value)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
supportersQuery.Set("identify_id", userId)
|
||||
temporary.SetQuery(supportersQuery)
|
||||
|
||||
resp, err := temporary.Execute()
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
supporterjson := gjson.ParseBytes(resp.Content())
|
||||
supporterdata := supporterjson.Get("data") //解析supporters获取的json数据
|
||||
if supporterdata.Type == gjson.Null {
|
||||
break
|
||||
}
|
||||
supporters = append(supporters, string(resp.Content()))
|
||||
|
||||
temporary.QueryParam("page_number").IntAdd(1)
|
||||
}
|
||||
|
||||
// cookies := cxt.Session().GetCookies(wf.GetParsedURL())
|
||||
ext := make(map[string]interface{})
|
||||
|
||||
ext["json_supporters"] = supporters
|
||||
ext["html_user"] = string(resp.Content())
|
||||
|
||||
liveUrl := updateUrl["live"]
|
||||
tp = ses.Get(liveUrl)
|
||||
resp, err = tp.Execute()
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
estore.UpdateError(streamer, err)
|
||||
continue
|
||||
}
|
||||
ext["html_live"] = string(resp.Content())
|
||||
ext["var_user_id"] = userId
|
||||
|
||||
extJsonBytes, err := json.Marshal(ext)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
estore.UpdateError(streamer, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// streamer.Platform = intimate.Popenrec
|
||||
streamer.UpdateInterval = 120
|
||||
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
|
||||
streamer.Operator = 0
|
||||
|
||||
source := &intimate.Source{}
|
||||
source.Target = intimate.TOpenrecUser
|
||||
source.Ext = string(extJsonBytes)
|
||||
source.StreamerId = sql.NullInt64{Int64: streamer.Uid, Valid: true}
|
||||
sstore.Insert(source)
|
||||
|
||||
estore.UpdateStreamer(streamer)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMain(t *testing.T) {
|
||||
main()
|
||||
}
|
||||
@@ -75,8 +75,10 @@ func Execute() {
|
||||
sl.Operator = 0
|
||||
sl.UpdateInterval = 120
|
||||
sl.UpdateTime = time.Now()
|
||||
sl.UrlHash = intimate.GetUrlHash(sl.Url)
|
||||
|
||||
estore.InsertStreamerList(sl)
|
||||
intimate.TStreamerList.Insert(sl)
|
||||
// estore.InsertStreamerList(sl)
|
||||
|
||||
queue.Put(wurl)
|
||||
queuedict[wurl] = true
|
||||
@@ -107,7 +109,8 @@ func Execute() {
|
||||
sl.Operator = 0
|
||||
sl.UpdateInterval = 120
|
||||
sl.UpdateTime = time.Now()
|
||||
estore.InsertStreamerList(sl)
|
||||
sl.UrlHash = intimate.GetUrlHash(sl.Url)
|
||||
intimate.TStreamerList.Insert(sl)
|
||||
|
||||
queue.Put(wurl)
|
||||
queuedict[wurl] = true
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"intimate"
|
||||
"log"
|
||||
"time"
|
||||
@@ -9,100 +8,120 @@ import (
|
||||
"github.com/tebeka/selenium"
|
||||
)
|
||||
|
||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
|
||||
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
|
||||
|
||||
// estore 解析存储连接实例
|
||||
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
// // estore 解析存储连接实例
|
||||
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
|
||||
// 获取类型的所有频道链接
|
||||
|
||||
// Execute 执行任务
|
||||
func Execute() {
|
||||
var err error
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
|
||||
ps := intimate.NewPerfectShutdown()
|
||||
|
||||
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for !ps.IsClose() {
|
||||
var err error
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
|
||||
cardCondition := func(wd selenium.WebDriver) (bool, error) {
|
||||
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return len(elements) > 0, nil
|
||||
}
|
||||
wd.WaitWithTimeout(cardCondition, time.Second*15)
|
||||
time.Sleep(time.Second)
|
||||
|
||||
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
e.Click()
|
||||
|
||||
var hrefs map[string]bool = make(map[string]bool)
|
||||
var delayerror = 5
|
||||
for i := 0; i <= 200; i++ {
|
||||
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
break
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if len(hrefs) == 0 {
|
||||
delayerror--
|
||||
if delayerror <= 0 {
|
||||
cardCondition := func(wd selenium.WebDriver) (bool, error) {
|
||||
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return len(elements) > 0, nil
|
||||
}
|
||||
wd.WaitWithTimeout(cardCondition, time.Second*15)
|
||||
time.Sleep(time.Second)
|
||||
|
||||
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
e.Click()
|
||||
|
||||
var lasthreflen = 0
|
||||
var hrefs map[string]bool = make(map[string]bool)
|
||||
var delayerror = 5
|
||||
for i := 0; i <= 200; i++ {
|
||||
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
break
|
||||
}
|
||||
} else {
|
||||
delayerror = 5
|
||||
}
|
||||
|
||||
for ii := 0; ii < 10; ii++ {
|
||||
for _, card := range cards {
|
||||
href, err := card.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(href, err)
|
||||
continue
|
||||
} else {
|
||||
hrefs[href] = true
|
||||
if len(hrefs) == lasthreflen {
|
||||
delayerror--
|
||||
if delayerror <= 0 {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
delayerror = 7
|
||||
}
|
||||
break
|
||||
}
|
||||
lasthreflen = len(hrefs)
|
||||
|
||||
if ps.IsClose() {
|
||||
break
|
||||
}
|
||||
for ii := 0; ii < 10; ii++ {
|
||||
for _, card := range cards {
|
||||
href, err := card.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(href, err)
|
||||
continue
|
||||
} else {
|
||||
hrefs[href] = true
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if len(cards) > 10 {
|
||||
log.Println(len(cards))
|
||||
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
if ps.IsClose() {
|
||||
break
|
||||
}
|
||||
|
||||
if len(cards) > 10 {
|
||||
log.Println(len(cards))
|
||||
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil)
|
||||
}
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyUp(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 2500)
|
||||
}
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 200)
|
||||
wd.KeyUp(selenium.EndKey)
|
||||
time.Sleep(time.Millisecond * 2500)
|
||||
|
||||
for href := range hrefs {
|
||||
|
||||
sl := &intimate.StreamerList{}
|
||||
sl.Url = href
|
||||
sl.UrlHash = intimate.GetUrlHash(sl.Url)
|
||||
sl.Platform = string(intimate.Ptwitch)
|
||||
sl.UpdateTime = intimate.GetUpdateTimeNow()
|
||||
err := intimate.TStreamerList.Insert(sl)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
// TODO: Save href
|
||||
// source := &intimate.Source{}
|
||||
// source.Source = sql.NullString{String: href, Valid: true}
|
||||
// source.Operator = 0
|
||||
// source.Target = intimate.TTwitchChannel
|
||||
// source.Url = weburl
|
||||
// sstore.Insert(source)
|
||||
}
|
||||
|
||||
log.Println("hrefs len:", len(hrefs))
|
||||
// sstore.Deduplicate(intimate.TTwitchChannel, "source")
|
||||
|
||||
wd.Close()
|
||||
wd.Quit()
|
||||
time.Sleep(time.Minute * 30)
|
||||
}
|
||||
|
||||
for href := range hrefs {
|
||||
|
||||
// TODO: Save href
|
||||
source := &intimate.Source{}
|
||||
source.Source = sql.NullString{String: href, Valid: true}
|
||||
source.Operator = 0
|
||||
source.Target = intimate.TTwitchChannel
|
||||
source.Url = weburl
|
||||
sstore.Insert(source)
|
||||
}
|
||||
|
||||
log.Println("hrefs len:", len(hrefs))
|
||||
sstore.Deduplicate(intimate.TTwitchChannel, "source")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user