7 Commits

Author SHA1 Message Date
eson
ee28db4ad8 todo: profile live 2020-09-15 19:06:03 +08:00
eson
925b7d42c7 add: mirrativ init 2020-09-15 17:09:22 +08:00
eson
36c277c3ce fix: update_interval = 0 if that is error 2020-09-15 10:39:34 +08:00
eson
072468005c pkill 添加-9 强制kill 2020-09-14 15:15:41 +08:00
eson
ef7b59ce3d TODO: streamerlist 2020-09-11 18:52:04 +08:00
eson
c4d0140b42 for save 2020-09-10 17:33:52 +08:00
eson
0b8a6fd810 Merge tag 'v0.6.0' into develop
重构后最高效率版本
2020-09-09 17:27:09 +08:00
13 changed files with 2676 additions and 53 deletions

View File

@@ -427,10 +427,10 @@ func (t *Table) UpdateError(obj interface{}, err error) {
}
}
_, dberr := t.store.db.Exec("update "+t.name+" set operator = ?, error_msg = ? where ? = ?", 10000, sql.NullString{String: err.Error(), Valid: true}, uidname, uidvalue)
_, dberr := t.store.db.Exec("update "+t.name+" set operator = ?, error_msg = ? where "+uidname+" = ?", 10000, sql.NullString{String: err.Error(), Valid: true}, uidvalue)
if dberr != nil {
// email tell owner to deal with
panic(err)
panic(dberr)
}
}

View File

@@ -0,0 +1 @@
package main

View File

@@ -0,0 +1,51 @@
package main
import (
"intimate"
"log"
"testing"
"time"
"github.com/474420502/gcurl"
)
func main() {
ps := intimate.NewPerfectShutdown()
gprofile := gcurl.Parse(`https://www.mirrativ.com/api/user/profile?user_id=103383701`)
tpProfile := gprofile.CreateTemporary(nil)
tpProfileUserID := tpProfile.QueryParam("user_id")
g := gcurl.Parse(`https://www.mirrativ.com/api/live/live?live_id=O5Ia4iX9c5CeZj7DFtg52Q`)
tpLive := g.CreateTemporary(nil)
tpLiveID := tpLive.QueryParam("live_id")
queue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.PMirrativ))
for !ps.IsClose() {
istreamer, err := queue.Pop()
if err != nil {
log.Println(err)
time.Sleep(time.Second * 2)
continue
}
streamer := istreamer.(*intimate.Streamer)
userid := *streamer.UserId
tpProfileUserID.StringSet(userid)
resp, err := tpProfile.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second)
continue
}
profilejson := gcurl.Parse(string(resp.Content()))
}
}
func TestDo(t *testing.T) {
main()
}

View File

@@ -28,10 +28,10 @@ type LiveInfo struct {
}
func Execute() {
wd := intimate.GetChromeDriver(3030)
adriver := intimate.GetChromeDriver()
count := 0
countlimit := 200
wd := adriver.Webdriver
waitfor := intimate.NewWaitFor(wd)
ps := intimate.NewPerfectShutdown()
@@ -118,9 +118,8 @@ func Execute() {
count++
if count >= countlimit {
count = 0
wd.Close()
wd.Quit()
wd = intimate.GetChromeDriver(3030)
adriver.Close()
adriver = intimate.GetChromeDriver()
}
}
}

View File

@@ -34,13 +34,17 @@ func main() {
ps := intimate.NewPerfectShutdown()
ses := requests.NewSession()
streamerQueue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.Ptwitcasting))
var lasterr error
for !ps.IsClose() {
// streamer, err := estore.Pop(intimate.Ptwitcasting)
isteamer, err := streamerQueue.Pop()
if err != nil {
log.Println(err, isteamer)
if lasterr != err {
lasterr = err
log.Println(err)
}
time.Sleep(time.Minute)
continue
}

View File

@@ -7,7 +7,6 @@ import (
"intimate"
"log"
"regexp"
"strings"
"time"
"github.com/tebeka/selenium"
@@ -20,22 +19,57 @@ import (
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func main() {
wd := intimate.GetChromeDriver(3040)
adriver := intimate.GetChromeDriver()
ps := intimate.NewPerfectShutdown()
queue := intimate.TStreamerList.Queue(intimate.StreamerList{}, intimate.ConditionDefault(intimate.Ptwitch))
slqueue := intimate.TStreamerList.Queue(intimate.StreamerList{}, intimate.ConditionDefault(intimate.Ptwitch))
squeue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.Ptwitch))
var count = 0
var countlimt = 200
var recreate = time.Now()
// var lasterr error = nil
var lasterr error = nil
// var err error
for !ps.IsClose() {
wd := adriver.Webdriver
// sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
isl, err := queue.Pop()
isl, err := slqueue.Pop()
if err != nil {
panic(err)
if lasterr != err {
lasterr = err
log.Println(err)
}
istreamer, err := squeue.Pop()
if err != nil {
if lasterr != err {
lasterr = err
log.Println(err)
ps.Wait(time.Minute)
continue
}
}
streamer := istreamer.(*intimate.Streamer)
Extractor(wd, streamer)
if err = intimate.TStreamer.Update(streamer); err != nil {
log.Println(err)
}
count++
if count >= countlimt || time.Now().Sub(recreate) >= time.Minute*120 {
count = 0
adriver.Close()
adriver = intimate.GetChromeDriver()
recreate = time.Now()
}
continue
}
streamerlist := isl.(*intimate.StreamerList)
weburl := streamerlist.Url + "?sort=VIEWER_COUNT"
@@ -155,9 +189,11 @@ func main() {
for _, streamer := range streamers {
Extractor(wd, streamer)
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
if err = intimate.TStreamer.InsertOrUpdate(streamer,
intimate.DUpdate{Field: "tags"},
intimate.DUpdate{Field: "update_time"},
intimate.DUpdate{Field: "update_interval"},
); err != nil {
log.Println(err)
}
@@ -169,15 +205,16 @@ func main() {
}
count++
if count >= countlimt {
if count >= countlimt || time.Now().Sub(recreate) >= time.Minute*120 {
count = 0
wd = intimate.GetChromeDriver(3031)
adriver.Close()
adriver = intimate.GetChromeDriver()
recreate = time.Now()
}
}
wd.Close()
wd.Quit()
adriver.Close()
}
func Extractor(wd selenium.WebDriver, streamer *intimate.Streamer) {
@@ -193,16 +230,16 @@ func Extractor(wd selenium.WebDriver, streamer *intimate.Streamer) {
// var updateUrl map[string]string
// json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
liveUrl := streamer.LiveUrl.String
liveUrl = strings.Replace(liveUrl, "/watchparty", "", -1)
liveUrl := "https://www.twitch.tv/" + (*streamer.UserId)
// liveUrl = strings.Replace(liveUrl, "/watchparty", "", -1)
log.Println(liveUrl)
// err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
err := wd.Get(liveUrl + "/about")
if err != nil {
log.Println(err)
intimate.TStreamer.UpdateError(streamer, err)
errstr := fmt.Errorf("%s: %s", err.Error(), liveUrl+"/about")
log.Println(errstr)
intimate.TStreamer.UpdateError(streamer, errstr)
time.Sleep(time.Second * 5)
return
}
@@ -217,15 +254,17 @@ func Extractor(wd selenium.WebDriver, streamer *intimate.Streamer) {
if err != nil {
_, err = wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='browse-channels-button']")
if err == nil {
log.Println(streamer.UserId, "may be cancell")
log.Println(*streamer.UserId, "may be cancell")
streamer.Operator = 5
streamer.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true}
intimate.TStreamer.UpdateError(streamer, fmt.Errorf(""))
intimate.TStreamer.UpdateError(streamer, fmt.Errorf(*streamer.UserId, "may be cancell"))
}
return
}
err = extractFollowers(wd, clog)
if err != nil {
// log.Println(err)
streamer.UpdateInterval += 30
return
}
@@ -258,7 +297,6 @@ func Extractor(wd selenium.WebDriver, streamer *intimate.Streamer) {
return
}
streamer.Operator = 10
streamer.LatestLogUid = lastClogId
if clog.Tags != nil {
streamer.Tags = clog.Tags

View File

@@ -46,6 +46,7 @@ type Streamer struct {
Channel *sql.NullString `field:"channel"` //
Tags interface{} `field:"tags"`
Ext interface{} `field:"ext"` //
Comments interface{} `field:"comments"`
IsUpdateStreamer bool // 更新上面的内容
IsUpdateUrl bool
@@ -89,6 +90,7 @@ type CollectLog struct {
Tags interface{} `field:"tags"`
Ext interface{} `field:"ext"` //
ErrorMsg *sql.NullString `field:"error_msg"` //
Comments interface{} `field:"comments"` //
}
// Get Simple Value

View File

@@ -15,4 +15,7 @@ const (
// PNimo PNimo 平台
PNimo Platform = "nimo"
// PMirrativ PNimo 平台
PMirrativ Platform = "mirrativ"
)

View File

@@ -0,0 +1,81 @@
package main
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
func main() {
bcurl := `curl 'https://www.mirrativ.com/api/live/catalog?id=2&cursor=%s' \
-H 'authority: www.mirrativ.com' \
-H 'accept: application/json' \
-H 'x-timezone: Asia/Shanghai' \
-H 'x-csrf-token: F3Ojd6RBtApP6YAZzVn-9jWN1of159VxAqOQL1Zn' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' \
-H 'content-type: application/json' \
-H 'sec-fetch-site: same-origin' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.mirrativ.com/' \
-H 'accept-language: zh-CN,zh;q=0.9,ja;q=0.8' \
-H 'cookie: f=A2D75F0E-D218-11EA-A042-452BF6D21CE8; _ga=GA1.2.689947597.1596081392; mr_id=kxb65LddGMZf5C28jkR_tGCZD_ZFOAepD5gfXO7eNjfPMB8EKYvU1Vg_Y29V1lsa; _gid=GA1.2.2116692650.1600139685; lang=ja' \
--compressed`
curl := gcurl.Parse(bcurl)
tp := curl.CreateTemporary(nil)
cursor := tp.QueryParam(`cursor`)
cursor.StringSet("")
for {
log.Println(tp.ParsedURL.String())
resp, err := tp.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second * 2)
continue
}
apijson := gjson.ParseBytes(resp.Content())
next := apijson.Get("next_cursor").String()
for _, liveinfo := range apijson.Get("list").Array() {
var prekey string
if liveinfo.Get("live_preview").Exists() {
prekey = "live_preview"
} else {
prekey = "live"
}
owner := liveinfo.Get(prekey + ".owner")
if guserid := owner.Get("user_id"); guserid.String() != "" {
streamer := &intimate.Streamer{}
streamer.Platform = intimate.PMirrativ
streamer.Operator = 0
streamer.UserId = &guserid.Str
streamer.UserName = &sql.NullString{String: liveinfo.Get("name").String(), Valid: true}
streamer.UpdateInterval = 60
streamer.UpdateTime = intimate.GetUpdateTimeNow()
err = intimate.TStreamer.InsertOrUpdate(
streamer,
intimate.DUpdate{Field: "update_time"},
)
if err != nil {
log.Println(err)
panic(err)
}
}
}
if next == "" {
time.Sleep(time.Minute * 5)
} else {
time.Sleep(time.Second * 2)
}
cursor.StringSet(next)
}
}

View File

@@ -0,0 +1,9 @@
package main
import (
"testing"
)
func TestMain(t *testing.T) {
}

View File

@@ -20,10 +20,13 @@ import (
func Execute() {
ps := intimate.NewPerfectShutdown()
var adriver *intimate.AutoCloseDriver
for !ps.IsClose() {
var err error
wd := intimate.GetChromeDriver(3030)
adriver = intimate.GetChromeDriver()
wd := adriver.Webdriver
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
@@ -107,21 +110,11 @@ func Execute() {
if err != nil {
log.Println(err)
}
// TODO: Save href
// source := &intimate.Source{}
// source.Source = sql.NullString{String: href, Valid: true}
// source.Operator = 0
// source.Target = intimate.TTwitchChannel
// source.Url = weburl
// sstore.Insert(source)
}
log.Println("hrefs len:", len(hrefs))
// sstore.Deduplicate(intimate.TTwitchChannel, "source")
wd.Close()
wd.Quit()
time.Sleep(time.Minute * 30)
adriver.Close()
ps.Wait(time.Minute * 5)
}
}

2364
testfile/mirrativ.json Normal file

File diff suppressed because it is too large Load Diff

100
utils.go
View File

@@ -5,9 +5,10 @@ import (
"database/sql"
"fmt"
"log"
"net"
"os"
"os/exec"
"os/signal"
"runtime"
"strconv"
"strings"
"sync/atomic"
@@ -93,7 +94,42 @@ func ParseDuration(dt string) (time.Duration, error) {
return tdt.Sub(zeroTime), nil
}
func GetChromeDriver(port int) selenium.WebDriver {
type AutoCloseDriver struct {
Webdriver selenium.WebDriver
Port int
}
func (adriver *AutoCloseDriver) Close() {
data, err := exec.Command("/bin/bash", "-c", fmt.Sprintf(`pgrep -f "port=%d"`, adriver.Port)).Output()
if err != nil {
log.Println(err)
log.Println(string(data))
return
}
// log.Println(string(data))
killshell := fmt.Sprintf("pkill -9 -P %s", data)
// log.Println(killshell)
// pkill -f \"port=%d\"
// log.Printf(fmt.Sprintf("kill -9 $(lsof -t -i:%d)", port))
err = exec.Command("/bin/bash", "-c", killshell).Run()
if err != nil {
log.Println(err)
return
}
err = exec.Command("/bin/bash", "-c", fmt.Sprintf("kill %s", data)).Run()
if err != nil {
log.Println(err)
return
}
}
func GetChromeDriver() *AutoCloseDriver {
port := GetFreePort()
var err error
caps := selenium.Capabilities{"browserName": "chrome"}
@@ -131,28 +167,44 @@ func GetChromeDriver(port int) selenium.WebDriver {
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
caps.AddChrome(chromecaps)
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", port)
if err != nil {
panic(err)
}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port))
if err != nil {
panic(err)
}
runtime.SetFinalizer(wd, func(obj interface{}) {
if err := obj.(selenium.WebDriver).Close(); err != nil {
log.Println(err)
}
if err := obj.(selenium.WebDriver).Quit(); err != nil {
log.Println(err)
}
})
adriver := &AutoCloseDriver{}
adriver.Port = port
adriver.Webdriver = wd
// runtime.SetFinalizer(adriver, func(obj interface{}) {
// adriver := obj.(*AutoCloseDriver)
// adriver.Webdriver.Close()
// adriver.Webdriver.Quit()
// killshell := fmt.Sprintf("pkill -P `pgrep -f 'port=%d '` && pkill -f 'port=%d '", port, port)
// log.Println(killshell)
// // log.Printf(fmt.Sprintf("kill -9 $(lsof -t -i:%d)", port))
// // cmd := exec.Command("sh", "-c", killshell)
// // err = cmd.Run()
// // if err != nil {
// // log.Println(err)
// // }
// })
wd.ExecuteScript("windows.navigator.webdriver = undefined", nil)
if err != nil {
panic(err)
}
return wd
return adriver
}
// PerfectShutdown 完美关闭程序
@@ -180,6 +232,18 @@ func (ps *PerfectShutdown) IsClose() bool {
return atomic.LoadInt32(&ps.loop) == 0
}
// Wait 判断是否要关闭
func (ps *PerfectShutdown) Wait(tm time.Duration) bool {
now := time.Now()
for time.Now().Sub(now) <= tm {
if ps.IsClose() {
return false
}
time.Sleep(time.Second)
}
return true
}
type Counter struct {
dcount int
count int
@@ -305,3 +369,17 @@ func (wf *WaitFor) WaitWithTimeout(xpath string, timeout time.Duration, do func(
}, timeout)
}
func GetFreePort() int {
addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
if err != nil {
panic(err)
}
l, err := net.ListenTCP("tcp", addr)
if err != nil {
panic(err)
}
defer l.Close()
return l.Addr().(*net.TCPAddr).Port
}