add new nimo code

change more
This commit is contained in:
eson 2020-08-28 19:07:12 +08:00
parent eee4e30585
commit 5d50119825
30 changed files with 10540 additions and 110 deletions

View File

@ -27,7 +27,7 @@ type UserLive struct {
Title string `exp:"//h1[contains(@class,'MovieTitle__Title')]"`
LiveStartTime string `exp:"//meta[@itemprop='uploadDate']/@content"`
LiveEndTime string `exp:"//meta[@itemprop='duration']/@content"`
Tags []string `exp:"//a[contains(@class,'TagButton')]"`
Tags []string `exp:"//div[contains(@class,'MovieMetaContent__TagContainer')]//a[@role ='button']"`
}
// Execute 执行
@ -135,8 +135,9 @@ func Execute() {
streamer.Uid = source.StreamerId.Int64
streamer.UpdateTime = source.UpdateTime
streamer.Tags = clog.Tags
if clog.Tags != nil {
streamer.Tags = clog.Tags
}
clog.Platform = intimate.Popenrec
clog.UserId = userId
clog.UpdateTime = source.UpdateTime

View File

@ -113,42 +113,38 @@ func main() {
clog.LiveTitle = sql.NullString{String: ldata.LiveTitle, Valid: true}
clog.Tags = tags
// fl, err := intimate.ParseNumberEx(ldata.Follower)
// if err == nil {
clog.Followers = sql.NullInt64{Int64: int64(ldata.Follower), Valid: true}
switch {
case ldata.Follower <= 100:
streamer.UpdateInterval = 360
streamer.UpdateInterval = 720
case ldata.Follower <= 1000:
streamer.UpdateInterval = 320
case ldata.Follower <= 10000:
streamer.UpdateInterval = 240
case ldata.Follower <= 100:
streamer.UpdateInterval = 120
default:
streamer.UpdateInterval = 60
streamer.UpdateInterval = 120
}
streamer.UpdateInterval = 60
// } else {
// log.Println(err)
// }
clog.Views = sql.NullInt64{Int64: ldata.MaxViews, Valid: true}
// views, err := strconv.Atoi(ldata.MaxViews)
// if err == nil {
// clog.Views = sql.NullInt64{Int64: int64(views), Valid: true}
// } else {
// clog.Views = sql.NullInt64{Int64: int64(0), Valid: true}
// // log.Println(err, streamer.UserId)
// }
// st, err := strconv.Atoi(ldata.LiveStart)
if ldata.LiveStart != "" {
st, err := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", ldata.LiveStart)
if err == nil {
startTime := st
clog.LiveStartTime = sql.NullTime{Time: startTime, Valid: true}
dt, err := strconv.Atoi(ldata.LiveDuration)
if time.Now().Sub(startTime) >= time.Hour*24*90 {
liveduration := time.Now().Sub(startTime)
switch {
case liveduration >= time.Hour*24*240:
streamer.Operator = 5
case liveduration >= time.Hour*24*60:
streamer.UpdateInterval = 60 * 24 * 30
case liveduration >= time.Hour*24*30:
streamer.UpdateInterval = 60 * 24 * 15
case liveduration >= time.Hour*24*15:
streamer.UpdateInterval = 60 * 24 * 7
case liveduration >= time.Hour*24*7:
streamer.UpdateInterval = 60 * 24 * 3
}
if err == nil {

View File

@ -22,21 +22,14 @@ func main() {
wd := intimate.GetChromeDriver(3030)
ps := intimate.NewPerfectShutdown()
counter := intimate.NewCounter()
counter.SetMaxLimit(200)
counter.SetMaxToDo(func(olist ...interface{}) error {
owd := olist[0].(*selenium.WebDriver)
(*owd).Close()
(*owd).Quit()
*owd = intimate.GetChromeDriver(3030)
return nil
}, &wd)
var count = 0
var countlimt = 200
var lasterr error = nil
// var err error
for !ps.IsClose() {
streamer, err := estore.Pop(intimate.Ptwitch, 0)
streamer, err := estore.PopNoWait(intimate.Ptwitch, "tags is NULL", 15)
if streamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
@ -117,18 +110,23 @@ func main() {
case fl > 100000:
streamer.UpdateInterval = 120
case fl > 10000:
streamer.UpdateInterval = 240
streamer.UpdateInterval = 240 * 2
case fl > 1000:
streamer.UpdateInterval = 360
streamer.UpdateInterval = 360 * 2
case fl > 100:
streamer.UpdateInterval = 720
streamer.UpdateInterval = 720 * 2
case fl > 0:
streamer.UpdateInterval = 1440
streamer.UpdateInterval = 1440 * 4
}
streamer.UpdateTime = clog.UpdateTime
estore.UpdateStreamer(streamer)
counter.AddWithReset(1)
count++
if count >= countlimt {
count = 0
// wd.Quit()
wd = intimate.GetChromeDriver(3030)
}
}
wd.Close()

7
go.mod
View File

@ -5,13 +5,14 @@ go 1.15
require (
github.com/474420502/extractor v0.9.6
github.com/474420502/focus v0.12.0
github.com/474420502/gcurl v0.2.0
github.com/474420502/requests v1.7.0
github.com/474420502/gcurl v0.4.4
github.com/474420502/requests v1.9.1
github.com/davecgh/go-spew v1.1.1
github.com/go-sql-driver/mysql v1.5.0
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
github.com/tebeka/selenium v0.9.9
github.com/tidwall/gjson v1.6.0
github.com/tidwall/pretty v1.0.1 // indirect
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect
golang.org/x/net v0.0.0-20200822124328-c89045814202 // indirect
gopkg.in/yaml.v2 v2.3.0
)

20
go.sum
View File

@ -8,10 +8,28 @@ github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Ev
github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s=
github.com/474420502/gcurl v0.2.0 h1:m6+vw4NX4f5Tfp7c3nuaIgHUE/7zTX6K3xK+pTCBoCo=
github.com/474420502/gcurl v0.2.0/go.mod h1:kJZDbgXn5wbAaR+hhBi4Sbw44P4igJ7qYXC6mejLuhQ=
github.com/474420502/gcurl v0.3.0 h1:j0YP55ANQUHDF86pL5rCqfC+dX887uL9GxHFE3Zj31k=
github.com/474420502/gcurl v0.3.0/go.mod h1:kJZDbgXn5wbAaR+hhBi4Sbw44P4igJ7qYXC6mejLuhQ=
github.com/474420502/gcurl v0.4.0 h1:HOnuYhZD2rwvAXdrF1tqS6gQYYcpnpcPVdOVbDsFT4E=
github.com/474420502/gcurl v0.4.0/go.mod h1:kJZDbgXn5wbAaR+hhBi4Sbw44P4igJ7qYXC6mejLuhQ=
github.com/474420502/gcurl v0.4.1 h1:js/s+w1yBkpSU5vo3kXLQs5F+NMwbNWI23Bcx3LDnnQ=
github.com/474420502/gcurl v0.4.1/go.mod h1:IOyP4j8lUCJaHqODzRyeZQcrQy9DGDalLc3z1gVuiJ4=
github.com/474420502/gcurl v0.4.3 h1:CwyKPrv8mzSL+pa7mrYpuVNEC7rst6MdHx0Enn+7cUk=
github.com/474420502/gcurl v0.4.3/go.mod h1:IOyP4j8lUCJaHqODzRyeZQcrQy9DGDalLc3z1gVuiJ4=
github.com/474420502/gcurl v0.4.4 h1:ZILu7RRjDBGHpTGmuWGKf1NZZbZsC7AHPlI8RHqs9As=
github.com/474420502/gcurl v0.4.4/go.mod h1:7w4knyVJa1ia4I1xd0krG51fKLGwMmNn5sfG2zPWbqM=
github.com/474420502/htmlquery v1.2.4-0.20200812072201-e871dd09247a h1:E1T6CYQKsUn7fMvNbeKfISjBLfOJjZX4KpWwStT20Kc=
github.com/474420502/htmlquery v1.2.4-0.20200812072201-e871dd09247a/go.mod h1:AoSN890esHwNKecV0tCs+W0ele1xgFL1Jqk6UcrdxgU=
github.com/474420502/requests v1.7.0 h1:oaBwVrxZ7yZ+hDOKwHm2NflYib2y1geIUxBxQ2U48mw=
github.com/474420502/requests v1.7.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/474420502/requests v1.8.1 h1:zR2PE97PY6qTfwCtUl7fJ2Sg7JEXcXZg2nxgxrOutFU=
github.com/474420502/requests v1.8.1/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/474420502/requests v1.8.2 h1:DiNLp8IlHjZKgNJbzLcwt89YosWBfkmjEMNmUXxF+Hs=
github.com/474420502/requests v1.8.2/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/474420502/requests v1.9.0 h1:c/oj1G4pGulIzljZ0WqoF8dkOCWjfJLmoyH+DCVRGH8=
github.com/474420502/requests v1.9.0/go.mod h1:x2T9l+e40R6kxxMvNm+YSZ9D6BHAXUDak4kQElIPJ9A=
github.com/474420502/requests v1.9.1 h1:gCDmBCW1ym8zOvKKBGjEG9wafMY7imYk2u28+Oy1WEc=
github.com/474420502/requests v1.9.1/go.mod h1:x2T9l+e40R6kxxMvNm+YSZ9D6BHAXUDak4kQElIPJ9A=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc=
@ -112,6 +130,8 @@ golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202 h1:VvcQYSHwXgi7W+TpUR6A9g6Up98WAHf3f/ulnJ62IyA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=

View File

@ -12,4 +12,7 @@ const (
// Ptwitcasting twitcasting 平台
Ptwitcasting Platform = "twitcasting"
// PNimo PNimo 平台
PNimo Platform = "nimo"
)

View File

@ -228,15 +228,13 @@ func NewStoreExtractor() *StoreExtractor {
return &StoreExtractor{db: db}
}
// Pop 弹出一条未处理的数据
func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Streamer, error) {
func (store *StoreExtractor) PopNoWait(platform Platform, condition string, operators ...int32) (*Streamer, error) {
tx, err := store.db.Begin()
if err != nil {
return nil, err
}
var args = []interface{}{string(platform)}
selectSQL := `select uid, update_time, user_id, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and TIMESTAMPDIFF(MINUTE , update_time, CURRENT_TIMESTAMP()) >= update_interval`
selectSQL := `select uid, update_time, user_id, tags, live_url, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and ` + condition
if len(operators) == 0 {
selectSQL += " and operator = ?"
args = append(args, 0)
@ -264,7 +262,52 @@ func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Stream
s := &Streamer{}
// uid, url, target_type, source, ext, operator
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.Tags, &s.LiveUrl, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
if err != nil {
return nil, err
}
s.Set("LastOperator", s.Operator)
_, err = tx.Exec("update "+StreamerTable+" set operator = ? where uid = ?", OperatorWait, s.Uid)
return s, nil
}
// Pop 弹出一条未处理的数据
func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Streamer, error) {
tx, err := store.db.Begin()
if err != nil {
return nil, err
}
var args = []interface{}{string(platform)}
selectSQL := `select uid, update_time, user_id, tags, live_url, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and TIMESTAMPDIFF(MINUTE , update_time, CURRENT_TIMESTAMP()) >= update_interval`
if len(operators) == 0 {
selectSQL += " and operator = ?"
args = append(args, 0)
} else {
for _, operator := range operators {
selectSQL += " and operator = ?"
args = append(args, operator)
}
}
defer func() {
err := tx.Commit()
if err != nil {
log.Println(err)
err = tx.Rollback()
if err != nil {
log.Println(err)
}
}
store.popCount++
}()
// log.Println(selectSQL + ` limit 1 for update`)
row := tx.QueryRow(selectSQL+` limit 1 for update`, args...)
s := &Streamer{}
// uid, url, target_type, source, ext, operator
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.Tags, &s.LiveUrl, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
if err != nil {
return nil, err
}
@ -316,9 +359,9 @@ func (store *StoreExtractor) InsertStreamerList(streamerlist IGet) (isExists boo
}
// InsertStreamer Streamer表, 插入数据
func (store *StoreExtractor) InsertStreamer(streamer IGet) (isExists bool) {
func (store *StoreExtractor) InsertStreamer(streamer *Streamer) (isExists bool) {
// select uid from table where platform = ? and user_id = ?
selectSQL := "SELECT is_update_url, uid FROM " + StreamerTable + " WHERE platform = ? AND user_id = ?"
// selectSQL := "SELECT is_update_url, uid FROM " + StreamerTable + " WHERE platform = ? AND user_id = ?"
tx, err := store.db.Begin()
if err != nil {
panic(err)
@ -335,18 +378,17 @@ func (store *StoreExtractor) InsertStreamer(streamer IGet) (isExists bool) {
}
}()
row := tx.QueryRow(selectSQL+` LIMIT 1 FOR UPDATE`, streamer.Get("Platform"), streamer.Get("UserId"))
var isUpdateUrl bool
var Uid int64
if err = row.Scan(&isUpdateUrl, &Uid); err == nil {
if isUpdateUrl {
tx.Exec("UPDATE "+StreamerTable+" SET update_url = ?", streamer.Get("UpdateUrl"))
}
streamer.(ISet).Set("Uid", Uid)
return true
}
streamer.UpdateTime = sql.NullTime{Time: time.Now().Add(-time.Hour * 100000), Valid: true}
_, err = tx.Exec("INSERT IGNORE INTO "+StreamerTable+"(platform, user_id, user_name, live_url, update_url, tags, update_time) VALUES(?,?,?,?,?,?,?);",
streamer.Platform,
streamer.UserId,
streamer.UserName,
streamer.LiveUrl,
streamer.UpdateUrl,
streamer.Tags,
streamer.UpdateTime,
)
_, err = tx.Exec("INSERT INTO "+StreamerTable+"(platform, user_id, update_url, tags, update_time) VALUES(?,?,?,?,?);", streamer.Get("Platform"), streamer.Get("UserId"), streamer.Get("UpdateUrl"), streamer.Get("Tags"), time.Now().Add(-time.Hour*100000))
if err != nil {
panic(err)
}

1
supervisor_conf/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
conf.d

View File

@ -0,0 +1,7 @@
CURPATH=`pwd`
BINPATH=$(dirname "$CURPATH")/bin
find $BINPATH -type f -name 'log' -exec rm {} +

View File

@ -1 +1,8 @@
ln -sf `pwd`/*.conf /etc/supervisor/conf.d/
CURPATH=`pwd`
SUPPATH=$(dirname "$CURPATH")
mkdir -p $CURPATH/conf.d
cp *.conf $CURPATH/conf.d/
sed -i "s#MYPATH#$SUPPATH#g" $CURPATH/conf.d/*.conf
ln -sf $CURPATH/conf.d/*.conf /etc/supervisor/conf.d/

View File

@ -2,9 +2,9 @@
nodaemon=true
[program:openrec_extractor]
directory = /home/eson/test/intimate/bin/openrec_extractor/
command= /home/eson/test/intimate/bin/openrec_extractor/openrec_extractor
directory = MYPATH/bin/openrec_extractor/
command= MYPATH/bin/openrec_extractor/openrec_extractor
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/openrec_extractor/log
stderr_logfile=MYPATH/bin/openrec_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -2,9 +2,9 @@
nodaemon=true
[program:openrec_ranking]
directory = /home/eson/test/intimate/bin/openrec_task1
command= /home/eson/test/intimate/bin/openrec_task1/openrec_task1
directory = MYPATH/bin/openrec_task1
command= MYPATH/bin/openrec_task1/openrec_task1
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/openrec_task1/log
stderr_logfile=MYPATH/bin/openrec_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -2,11 +2,11 @@
nodaemon=true
[program:openrec_source]
directory = /home/eson/test/intimate/bin/openrec_task2/
command= /home/eson/test/intimate/bin/openrec_task2/openrec_task2
directory = MYPATH/bin/openrec_task2/
command= MYPATH/bin/openrec_task2/openrec_task2
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=4 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/openrec_task2/log
stderr_logfile=MYPATH/bin/openrec_task2/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -1,12 +0,0 @@
# [supervisord]
# nodaemon=true
# [program:openrec_extractor]
# directory = /home/eson/test/intimate/extractor/openrec_extractor
# command= /home/eson/test/intimate/extractor/openrec_extractor/openrec_extractor
# autorestart=true
# # stdout_logfile=/home/eson/test/intimate/extractor/openrec/stdout
# # stdout_logfile_maxbytes=0
# stderr_logfile=/home/eson/test/intimate/extractor/openrec/log
# stderr_logfile_maxbytes=0
# stopsignal=QUIT

View File

@ -3,12 +3,12 @@ nodaemon=false
[program:twitcasting_extractor]
environment=DISPLAY=":99"
directory = /home/eson/test/intimate/bin/twitcasting_extractor/
command= /home/eson/test/intimate/bin/twitcasting_extractor/twitcasting_extractor
directory = MYPATH/bin/twitcasting_extractor/
command= MYPATH/bin/twitcasting_extractor/twitcasting_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=3 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitcasting_extractor/log
stderr_logfile=MYPATH/bin/twitcasting_extractor/log
# stderr_logfile=%(supervisorctl.var.directory)s/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -3,11 +3,11 @@ nodaemon=false
[program:twitcasting_task1]
environment=DISPLAY=":99"
directory = /home/eson/test/intimate/bin/twitcasting_task1/
command= /home/eson/test/intimate/bin/twitcasting_task1/twitcasting_task1
directory = MYPATH/bin/twitcasting_task1/
command= MYPATH/bin/twitcasting_task1/twitcasting_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitcasting_task1/log
stderr_logfile=MYPATH/bin/twitcasting_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -3,11 +3,11 @@ nodaemon=false
[program:twitch_extractor]
environment=DISPLAY=":99"
directory = /home/eson/test/intimate/bin/twitch_extractor
command= /home/eson/test/intimate/bin/twitch_extractor/twitch_extractor
directory = MYPATH/bin/twitch_extractor
command= MYPATH/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=5 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitch_extractor/log
stderr_logfile=MYPATH/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -3,11 +3,11 @@ nodaemon=true
[program:twitch_extractor_p1]
environment=DISPLAY=":99",pac_proxy=http://localhost:1090/pac
directory = /home/eson/test/intimate/bin/twitch_extractor
command= /home/eson/test/intimate/bin/twitch_extractor/twitch_extractor
directory = MYPATH/bin/twitch_extractor
command= MYPATH/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=2 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitch_extractor/log
stderr_logfile=MYPATH/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -3,11 +3,11 @@ nodaemon=false
[program:twitch_extractor_p2]
environment=DISPLAY=":99",pac_proxy=http://localhost:1090/pac1
directory = /home/eson/test/intimate/bin/twitch_extractor
command= /home/eson/test/intimate/bin/twitch_extractor/twitch_extractor
directory = MYPATH/bin/twitch_extractor
command= MYPATH/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=2 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitch_extractor/log
stderr_logfile=MYPATH/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -3,11 +3,11 @@ nodaemon=false
[program:twitch_task2]
environment=DISPLAY=":99"
directory = /home/eson/test/intimate/bin/twitch_task2
command= /home/eson/test/intimate/bin/twitch_task2/twitch_task2
directory = MYPATH/bin/twitch_task2
command= MYPATH/bin/twitch_task2/twitch_task2
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=6 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitch_task2/log
stderr_logfile=MYPATH/bin/twitch_task2/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -3,11 +3,6 @@
nodaemon=true
[program:xvfb-99]
# directory = /home/eson/test/intimate/extractor/openrec_extractor
command=/usr/bin/Xvfb :99 -screen 0 1280x720x24 -ac -nolisten tcp -dpi 96 +extension RANDR -nolisten tcp
autorestart=true
# # stdout_logfile=/home/eson/test/intimate/extractor/openrec/stdout
# # stdout_logfile_maxbytes=0
# stderr_logfile=/home/eson/test/intimate/extractor/openrec/log
# stderr_logfile_maxbytes=0
# stopsignal=QUIT

View File

@ -12,4 +12,7 @@ const (
// STTwitcasting STTwitcasting源table名称
STTwitcasting SourceTable = "source_twitcasting"
// STNimo nimo源table名称
STNimo SourceTable = "source_nimo"
)

View File

@ -0,0 +1,106 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"time"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// Execute 执行
func Execute() {
nimoapi := `curl 'https://api.nimo.tv/oversea/nimo/api/v2/liveRoom/liveRoomPage-1-100-/HK/1028/1000' \
-H 'authority: api.nimo.tv' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' \
-H 'content-type: multipart/form-data; boundary=----WebKitFormBoundary3bCA1lzvhj4kBR4Q' \
-H 'accept: */*' \
-H 'origin: https://www.nimo.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.nimo.tv/lives' \
-H 'accept-language: zh-CN,zh;q=0.9' \
--data-binary $'------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="keyType"\r\n\r\n0\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="body"\r\n\r\n{"deviceType":7,"requestSource":"WEB","iNetType":5}\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q--\r\n' \
--compressed`
curl := gcurl.Parse(nimoapi)
ses := curl.CreateSession()
tp := curl.CreateTemporary(ses)
param := tp.PathParam("liveRoomPage-(1)-")
ps := intimate.NewPerfectShutdown()
for !ps.IsClose() {
resp, err := tp.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second)
continue
}
result := gjson.ParseBytes(resp.Content())
roomlist := result.Get("data.result.liveRoomViewList")
if !roomlist.IsArray() {
log.Println("json is error")
log.Println(string(resp.Content()))
break
}
rooms := roomlist.Array()
log.Println(tp.GetURLRawPath(), "rooms:", len(rooms))
if len(rooms) == 0 {
param.IntSet(1)
time.Sleep(time.Minute * 10)
continue
}
for _, room := range rooms {
streamer := &intimate.Streamer{}
streamer.Platform = intimate.PNimo
if userid := room.Get("id").String(); userid != "" {
streamer.UserId = userid
streamer.LiveUrl = sql.NullString{String: "https://www.nimo.tv/live/" + userid, Valid: true}
channel := room.Get("roomTypeName").String()
streamer.Channel = sql.NullString{String: channel, Valid: channel != ""}
username := room.Get("anchorName").String()
streamer.UserName = sql.NullString{String: username, Valid: username != ""}
if rtags := room.Get("anchorLabels"); rtags.IsArray() {
var tags []string
for _, r := range rtags.Array() {
tag := r.Get("labelName").String()
tags = append(tags, tag)
}
data, err := json.Marshal(tags)
if err != nil {
panic(err)
}
streamer.Tags = data
}
streamer.UpdateInterval = 120
// spew.Println(streamer)
estore.InsertStreamer(streamer)
} else {
log.Println("userid is null.", room.String())
}
}
param.IntAdd(1)
}
}

View File

@ -0,0 +1,55 @@
package main
import (
"fmt"
"io/ioutil"
"os"
"testing"
"github.com/474420502/gcurl"
)
func CrawlContent(args ...interface{}) []byte {
nimoapi := `curl 'https://api.nimo.tv/oversea/nimo/api/v2/liveRoom/liveRoomPage-%d-30-/HK/1028/1000' \
-H 'authority: api.nimo.tv' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36' \
-H 'content-type: multipart/form-data; boundary=----WebKitFormBoundary3bCA1lzvhj4kBR4Q' \
-H 'accept: */*' \
-H 'origin: https://www.nimo.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.nimo.tv/lives' \
-H 'accept-language: zh-CN,zh;q=0.9' \
--data-binary $'------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="keyType"\r\n\r\n0\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q\r\nContent-Disposition: form-data; name="body"\r\n\r\n{"deviceType":7,"requestSource":"WEB","iNetType":5}\r\n------WebKitFormBoundary3bCA1lzvhj4kBR4Q--\r\n' \
--compressed`
curl := gcurl.Parse(fmt.Sprintf(nimoapi, 1))
tp := curl.CreateTemporary(nil)
resp, err := tp.Execute()
if err != nil {
panic(err)
}
return resp.Content()
}
func openTestFile(...interface{}) []byte {
f, err := os.Open("../../../testfile/nimo1.json")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
return data
}
var Crawl func(...interface{}) []byte
func Test(t *testing.T) {
Execute()
}

View File

@ -34,7 +34,7 @@ func Execute() {
-H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
--compressed`
g := gcurl.ParseRawCURL(turl)
g := gcurl.Parse(turl)
tp := g.Temporary()
for !ps.IsClose() {

View File

@ -69,7 +69,7 @@ func Execute() {
cookies := ses.GetCookies(tp.GetParsedURL())
scurl := updateUrl["supporters"] //获取打赏者的数据
curl := gcurl.ParseRawCURL(scurl)
curl := gcurl.Parse(scurl)
supportersSession := curl.CreateSession()
temporary := curl.CreateTemporary(supportersSession)

View File

@ -4,7 +4,7 @@ import (
"testing"
)
func estCase1(t *testing.T) {
func TestCase1(t *testing.T) {
Execute()
}

View File

@ -159,7 +159,9 @@ func Execute() {
streamer.Operator = 0
if estore.InsertStreamer(streamer) {
// log.Println("streamer update tags", streamer.Uid, tags)
estore.Update(streamer, "Tags", streamer.Tags)
if streamer.Tags != nil {
estore.Update(streamer, "Tags", streamer.Tags)
}
}
}
log.Println("streamer find", len(articles))

View File

@ -2,6 +2,6 @@ package main
import "testing"
func estMain(t *testing.T) {
func TestMain(t *testing.T) {
main()
}

10205
testfile/nimo1.json Normal file

File diff suppressed because it is too large Load Diff