Merge branch 'release/v0.5.4'

This commit is contained in:
eson 2020-08-17 14:53:10 +08:00
commit d2b05e864c
38 changed files with 326 additions and 440 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
bin
*.log
log
screenlog.*

View File

@ -7,13 +7,17 @@ src=`pwd`
for path in `ls -d $source_tasks`
do
echo $path
cd $path && go build
projectname=${path##*/}
projectworkspace=$src/bin/$projectname
cd $path && mkdir $projectworkspace -p && go build -o $projectworkspace/$projectname
cd $src
done
for path in `ls -d $extractor_tasks`
do
echo $path
cd $path && go build
projectname=${path##*/}
projectworkspace=$src/bin/$projectname
cd $path && mkdir $projectworkspace -p && go build -o $projectworkspace/$projectname
cd $src
done

View File

@ -15,6 +15,5 @@ import (
*/
func main() {
oe := &OpenrecExtractor{}
oe.Execute()
Execute()
}

View File

@ -3,43 +3,40 @@ package main
import (
"database/sql"
"encoding/json"
"errors"
"intimate"
"log"
"os"
"os/signal"
"regexp"
"strconv"
"strings"
"sync/atomic"
"syscall"
"time"
"github.com/474420502/extractor"
"github.com/tidwall/gjson"
)
var estore = intimate.NewStoreExtractor()
var sstore = intimate.NewStoreSource(string(intimate.STOpenrec))
// OpenrecExtractor 提取方法
type OpenrecExtractor struct {
user *intimate.ExtractorSource
userLive *intimate.ExtractorSource
supporters *intimate.ExtractorSource
//UserInfo 提取信息的结构体
type UserInfo struct {
UserName string `exp:"//p[ contains(@class, 'c-global__user__profile__list__name__text')]"`
Followers int64 `exp:"//p[@class='c-global__user__count__row__right js-userCountFollowers']" mth:"r:ParseNumber"`
Views int64 `exp:"//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']" mth:"r:ExtractNumber"`
}
func (oe *OpenrecExtractor) Execute() {
//UserLive 提取信息的结构体
type UserLive struct {
Title string `exp:"//h1[contains(@class,'MovieTitle__Title')]"`
LiveStartTime string `exp:"//meta[@itemprop='uploadDate']/@content"`
LiveEndTime string `exp:"//meta[@itemprop='duration']/@content"`
Tags []string `exp:"//a[contains(@class,'TagButton')]"`
}
var loop int32 = 1
// Execute 执行
func Execute() {
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
ps := intimate.NewPerfectShutdown()
var lasterr error = nil
for atomic.LoadInt32(&loop) > 0 {
for !ps.IsClose() {
var err error
source, err := sstore.Pop(intimate.TOpenrecUser, 0)
@ -51,6 +48,7 @@ func (oe *OpenrecExtractor) Execute() {
time.Sleep(time.Second * 5)
continue
}
lasterr = nil
sdata := source.Ext.([]byte)
datamap := gjson.ParseBytes(sdata).Map()
@ -60,28 +58,80 @@ func (oe *OpenrecExtractor) Execute() {
streamer := &intimate.Streamer{}
streamer.UserId = userId
streamer.Platform = intimate.Popenrec
// streamer.Platform = intimate.Popenrec 不需要更新字段
htmlUser := datamap["html_user"]
oe.user = intimate.NewExtractorSource(&htmlUser)
oe.user.CreateExtractor()
userEtor := extractor.ExtractHtmlString(htmlUser.String())
ui, ok1 := userEtor.GetObjectByTag(UserInfo{}).(*UserInfo)
htmlLive := datamap["html_live"]
oe.userLive = intimate.NewExtractorSource(&htmlLive)
oe.userLive.CreateExtractor()
liveEtor := extractor.ExtractHtmlString(htmlLive.String())
ul, ok2 := liveEtor.GetObjectByTag(UserLive{}).(*UserLive)
jsonSupporters := datamap["json_supporters"]
oe.supporters = intimate.NewExtractorSource(&jsonSupporters)
clog := &intimate.CollectLog{}
// log.Println(anchorId)
if ok1 {
clog.Followers = sql.NullInt64{Int64: ui.Followers, Valid: true}
clog.Views = sql.NullInt64{Int64: ui.Views, Valid: true}
if ui.Views != 0 {
clog.IsLiveStreaming = true
}
streamer.UserName = sql.NullString{String: ui.UserName, Valid: true}
oe.extractFollowers(clog)
oe.extractUserName(streamer)
oe.extractViewsAndLiveStreaming(clog)
oe.extractGiversAndGratuity(clog)
oe.extractLive(clog)
oe.extractTags(clog)
giverjson := jsonSupporters
var givers []interface{}
var gratuity int64 = 0
for _, v := range giverjson.Array() {
giverSource := gjson.Parse(v.String())
for _, item := range giverSource.Get("data.items").Array() {
givers = append(givers, item.Map())
gratuity += item.Get("total_yells").Int()
}
}
giversbytes, err := json.Marshal(givers)
if err != nil {
log.Println(err)
clog.ErrorMsg = sql.NullString{String: err.Error(), Valid: true}
} else {
clog.Giver = giversbytes
}
clog.Gratuity = sql.NullInt64{Int64: gratuity, Valid: true}
} else {
log.Println("UserInfo may be not exists")
estore.UpdateError(streamer, errors.New("UserInfo may be not exists"))
continue
}
//log.Println(ul)
if ok2 {
clog.LiveTitle = sql.NullString{String: ul.Title, Valid: true}
startTime, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", ul.LiveStartTime, time.Local)
if err != nil {
log.Println(err)
} else {
clog.LiveStartTime = sql.NullTime{Time: startTime.Local(), Valid: true}
duration, err := intimate.ParseDuration(ul.LiveEndTime)
if err != nil {
log.Println(err)
} else {
endTime := startTime.Add(duration)
clog.LiveStartTime = sql.NullTime{Time: endTime.Local(), Valid: true}
}
}
if tags, err := json.Marshal(ul.Tags); err == nil {
clog.Tags = tags
} else {
log.Println("json error", ul.Tags, clog.Tags)
}
}
streamer.Uid = source.StreamerId.Int64
streamer.UpdateTime = source.UpdateTime
@ -90,149 +140,28 @@ func (oe *OpenrecExtractor) Execute() {
clog.Platform = intimate.Popenrec
clog.UserId = userId
clog.UpdateTime = source.UpdateTime
clog.StreamerUid = streamer.Uid
logUid := estore.InsertClog(clog)
LiveUrl := "https://www.openrec.tv/live/" + userId
streamer.LiveUrl = sql.NullString{String: LiveUrl, Valid: true}
streamer.LatestLogUid = logUid
streamer.Operator = 0
estore.UpdateStreamer(streamer)
// streamer.Operator = 0
log.Println(streamer.UserId)
estore.Update(streamer,
"user_name", streamer.UserName,
"user_id", streamer.UserId,
"live_url", streamer.LiveUrl,
"latest_log_uid", streamer.LatestLogUid,
"update_time", streamer.UpdateTime,
"tags", streamer.Tags,
)
source.Operator = int32(intimate.OperatorExtractorOK)
sstore.UpdateOperator(source)
}
}
func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) {
extractor := oe.user.GetExtractor()
xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()")
if err != nil {
log.Println(err)
}
if !xp.NodeIter().Next() {
log.Println("不存在粉丝数")
}
followers := strings.ReplaceAll(xp.String(), ",", "")
followersInt, err := strconv.ParseInt(followers, 10, 64)
if err != nil {
log.Println(err)
}
clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true})
}
func (oe *OpenrecExtractor) extractUserName(streamer intimate.ISet) {
extractor := oe.user.GetExtractor()
xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()")
if err != nil {
log.Println(err)
} else {
if xp.NodeIter().Next() {
userName := xp.String()
streamer.Set("UserName", sql.NullString{String: userName, Valid: true})
}
}
}
func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) {
extractor := oe.user.GetExtractor()
// c-contents
xp, err := extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()")
if err != nil {
log.Println(err)
}
if xp.NodeIter().Next() {
views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String())
views = strings.ReplaceAll(views, ",", "")
viewsint, err := strconv.Atoi(views)
if err != nil {
log.Println(err)
}
clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true})
clog.Set("IsLiveStreaming", true)
}
}
func (oe *OpenrecExtractor) extractGiversAndGratuity(clog intimate.ISet) {
// extractor := oe.user.GetExtractor()
giverjson := oe.supporters.GetSource()
var givers []interface{}
var gratuity int64 = 0
for _, v := range giverjson.Array() {
giverSource := gjson.Parse(v.String())
for _, item := range giverSource.Get("data.items").Array() {
givers = append(givers, item.Map())
gratuity += item.Get("total_yells").Int()
}
}
giversbytes, err := json.Marshal(givers)
if err != nil {
log.Println(err)
clog.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true})
} else {
clog.Set("Giver", giversbytes)
}
clog.Set("Gratuity", sql.NullInt64{Int64: gratuity, Valid: true})
}
func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) {
extractor := oe.userLive.GetExtractor()
mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})</h1>").FindStringSubmatch(oe.userLive.GetSource().Str)
if len(mathes) == 2 {
clog.Set("LiveTitle", sql.NullString{String: mathes[1], Valid: true})
content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content")
if err != nil {
log.Println(err)
}
iter := content.NodeIter()
if iter.Next() {
tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local)
if err != nil {
log.Println(err)
}
// log.Println(iter.Node().NodeValue(), tm.Local())
clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true})
duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content")
if err != nil {
log.Println(err)
}
diter := duration.NodeIter()
if diter.Next() {
dt, err := intimate.ParseDuration(diter.Node().NodeValue())
if err != nil {
log.Println(err)
}
endtm := tm.Add(dt)
clog.Set("LiveEndTime", sql.NullTime{Time: endtm.Local(), Valid: true})
}
}
}
}
func (oe *OpenrecExtractor) extractTags(clog intimate.ISet) {
var tags []string
matheslist := regexp.MustCompile(`<[^>]+TagButton[^>]+>([^<]{1,100})<`).FindAllStringSubmatch(oe.userLive.GetSource().Str, -1)
for _, m := range matheslist {
tags = append(tags, m[1])
}
tagsBytes, err := json.Marshal(tags)
if err != nil {
log.Println(err)
}
clog.Set("Tags", tagsBytes)
}

View File

@ -7,7 +7,6 @@ import (
"testing"
"time"
"github.com/474420502/hunter"
"github.com/lestrrat-go/libxml2"
)
@ -91,29 +90,6 @@ func TestCase(t *testing.T) {
t.Error(xr)
}
func TestUserName(t *testing.T) {
f, err := os.Open("test.html")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
extractor := hunter.NewExtractor(data)
xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()")
if err != nil {
t.Error(err)
} else {
if xp.NodeIter().Next() {
userName := xp.String()
t.Error(userName)
}
}
}
func TestExtractor(t *testing.T) {
oe := &OpenrecExtractor{}
oe.Execute()
Execute()
}

View File

@ -38,7 +38,7 @@ func main() {
streamer, err := estore.Pop(intimate.Ptwitcasting)
if err != nil {
log.Println(err, streamer.UserId)
log.Println(err, streamer)
}
streamer.LiveUrl = sql.NullString{String: "https://twitcasting.tv/" + streamer.UserId, Valid: true}
@ -49,7 +49,7 @@ func main() {
continue
}
var ldata *LiveData
etor := extractor.ExtractXml(resp.Content())
etor := extractor.ExtractHtml(resp.Content())
ldata = etor.GetObjectByTag(LiveData{}).(*LiveData)
ldata.MaxViews = regexp.MustCompile("\\d+").FindString(ldata.MaxViews)
coincount := 0
@ -59,11 +59,14 @@ func main() {
giverurl := streamer.LiveUrl.String + "/backers/" + strconv.Itoa(i)
resp, err = ses.Get(giverurl).Execute()
if err != nil {
estore.UpdateError(streamer, err)
log.Panic(err)
}
etor := extractor.ExtractXml(resp.Content())
etor := extractor.ExtractHtml(resp.Content())
xp, err := etor.XPaths("//td[@class='tw-memorial-table-recent-point']")
if err != nil {
estore.UpdateError(streamer, err)
log.Panic(err)
}
@ -90,7 +93,7 @@ func main() {
streamer.Platform = intimate.Ptwitcasting
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
streamer.UserName = sql.NullString{String: ldata.UserName, Valid: true}
streamer.Operator = 10
streamer.Operator = 0
// streamer.UpdateInterval = 60
clog := &intimate.CollectLog{}
clog.UserId = streamer.UserId

View File

@ -10,7 +10,7 @@ import (
// Follower string `exp:".//span[@class='tw-user-nav-list-count']" method:"Text"`
// }
func TestMain(t *testing.T) {
func estMain(t *testing.T) {
main()
}

View File

@ -4,9 +4,6 @@ import (
"database/sql"
"reflect"
"time"
"github.com/474420502/hunter"
"github.com/tidwall/gjson"
)
type GetSet struct {
@ -104,36 +101,3 @@ func (cl *CollectLog) Get(field string) interface{} {
func (cl *CollectLog) Set(field string, value interface{}) {
reflect.ValueOf(cl).Elem().FieldByName(field).Set(reflect.ValueOf(value))
}
type ExtractorSource struct {
source *gjson.Result
extractor *hunter.Extractor
}
func NewExtractorSource(gr *gjson.Result) *ExtractorSource {
es := &ExtractorSource{}
es.SetSource(gr)
return es
}
func (es *ExtractorSource) SetSource(gr *gjson.Result) {
es.source = gr
es.extractor = nil
}
func (es *ExtractorSource) Clear() {
es.source = nil
es.extractor = nil
}
func (es *ExtractorSource) CreateExtractor() {
es.extractor = hunter.NewExtractor([]byte(es.source.String()))
}
func (es *ExtractorSource) GetSource() *gjson.Result {
return es.source
}
func (es *ExtractorSource) GetExtractor() *hunter.Extractor {
return es.extractor
}

9
go.mod
View File

@ -1,13 +1,12 @@
module intimate
go 1.14
go 1.15
require (
github.com/474420502/extractor v0.5.2
github.com/474420502/extractor v0.9.5
github.com/474420502/focus v0.12.0
github.com/474420502/gcurl v0.1.2
github.com/474420502/hunter v0.3.4
github.com/474420502/requests v1.6.0
github.com/474420502/gcurl v0.2.0
github.com/474420502/requests v1.7.0
github.com/go-sql-driver/mysql v1.5.0
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
github.com/tebeka/selenium v0.9.9

16
go.sum
View File

@ -2,20 +2,20 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg=
github.com/474420502/extractor v0.5.1 h1:A1heJJSYbV9nEaUHfl3/1HYXcsBQfsTzAHikgwg2IF0=
github.com/474420502/extractor v0.5.1/go.mod h1:vkqsbi7wXPqyi5Q5dchcGjiaWHbgOJOAEcwonBiAs/E=
github.com/474420502/extractor v0.5.2 h1:ndgrAkxJjQg0Nrbq3AX2/xAnmIJNxSHRFGQ78wEtWj4=
github.com/474420502/extractor v0.5.2/go.mod h1:vkqsbi7wXPqyi5Q5dchcGjiaWHbgOJOAEcwonBiAs/E=
github.com/474420502/extractor v0.9.5 h1:nM3/1tNL8BHS3PN9eXvm8Ve3hqTReKNB+ba8VpdL8bw=
github.com/474420502/extractor v0.9.5/go.mod h1:vJnXWmvO5bJDW4Yag0GoE2GxtHRg03TAxp2oXN1DcSY=
github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo=
github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s=
github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg=
github.com/474420502/gcurl v0.1.2/go.mod h1:hws5q/Ao64bXLLDnldz9VyTQUndTWc/i5DzdEazFfoM=
github.com/474420502/htmlquery v1.2.4-0.20200810165859-a0e2c521c7c2 h1:4F1tpJ+sEkb3N+XD+Wb9MFiQmOMm3bHp8QUP+BQvkVk=
github.com/474420502/htmlquery v1.2.4-0.20200810165859-a0e2c521c7c2/go.mod h1:AoSN890esHwNKecV0tCs+W0ele1xgFL1Jqk6UcrdxgU=
github.com/474420502/hunter v0.3.4 h1:fyLAgI84jWe3IcqsISC53j1w3CXI1FERxX//Potns0M=
github.com/474420502/hunter v0.3.4/go.mod h1:pe4Xr/I+2agvq339vS/OZV+EiHAWtpXQs75rioSW9oA=
github.com/474420502/gcurl v0.2.0 h1:m6+vw4NX4f5Tfp7c3nuaIgHUE/7zTX6K3xK+pTCBoCo=
github.com/474420502/gcurl v0.2.0/go.mod h1:kJZDbgXn5wbAaR+hhBi4Sbw44P4igJ7qYXC6mejLuhQ=
github.com/474420502/htmlquery v1.2.4-0.20200812072201-e871dd09247a h1:E1T6CYQKsUn7fMvNbeKfISjBLfOJjZX4KpWwStT20Kc=
github.com/474420502/htmlquery v1.2.4-0.20200812072201-e871dd09247a/go.mod h1:AoSN890esHwNKecV0tCs+W0ele1xgFL1Jqk6UcrdxgU=
github.com/474420502/requests v1.6.0 h1:f4h4j40eT0P5whhg9LdkotD8CaKjtuDu/vz9iSUkCgY=
github.com/474420502/requests v1.6.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/474420502/requests v1.7.0 h1:oaBwVrxZ7yZ+hDOKwHm2NflYib2y1geIUxBxQ2U48mw=
github.com/474420502/requests v1.7.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc=

View File

@ -382,7 +382,8 @@ func (store *StoreExtractor) UpdateOperator(isource IGet) {
// UpdateStreamer Streamer表, 插入数据
func (store *StoreExtractor) UpdateStreamer(streamer IGet) {
_, err := store.db.Exec("UPDATE "+StreamerTable+" SET user_name = ?, live_url = ?, channel = ?, latest_log_uid = ?, tags = ?, ext = ?, operator = ?, update_time = ?, update_interval = ? WHERE uid = ?;",
// log.Printf("UPDATE "+StreamerTable+" SET user_name = %v, live_url = %v, channel = %v, latest_log_uid = %v, tags = %v, ext = %v, operator = %v, update_time = %v, update_interval = %v WHERE uid = %v", streamer.Get("UserName"), streamer.Get("LiveUrl"), streamer.Get("Channel"), streamer.Get("LatestLogUid"), streamer.Get("Tags"), streamer.Get("Ext"), streamer.Get("Operator"), streamer.Get("UpdateTime"), streamer.Get("UpdateInterval"), streamer.Get("Uid"))
_, err := store.db.Exec("UPDATE "+StreamerTable+" SET user_name = ?, live_url = ?, channel = ?, latest_log_uid = ?, tags = ?, ext = ?, operator = ?, update_time = ?, update_interval = ? WHERE uid = ?",
streamer.Get("UserName"), streamer.Get("LiveUrl"), streamer.Get("Channel"), streamer.Get("LatestLogUid"), streamer.Get("Tags"), streamer.Get("Ext"), streamer.Get("Operator"), streamer.Get("UpdateTime"), streamer.Get("UpdateInterval"), streamer.Get("Uid"))
if err != nil {
panic(err)
@ -396,12 +397,14 @@ func (store *StoreExtractor) Update(streamer IGet, fieldvalues ...interface{}) {
for i := 0; i < len(fieldvalues); i += 2 {
field := fieldvalues[i]
values = append(values, fieldvalues[i+1])
updateSQL += field.(string) + " = ? "
updateSQL += field.(string) + " = ?,"
}
updateSQL = updateSQL[0 : len(updateSQL)-1]
updateSQL += "WHERE uid = ?"
values = append(values, streamer.Get("Uid"))
_, err := store.db.Exec(updateSQL, values...)
if err != nil {
log.Println(updateSQL)
panic(err)
}
}

View File

@ -5,8 +5,7 @@ import (
)
func TestStoreInsert(t *testing.T) {
// ht := hunter.NewHunter(openrecRanking)
// ht.Execute()
}
func TestStoreInsertCase1(t *testing.T) {

View File

@ -0,0 +1 @@
ln -sf `pwd`/*.conf /etc/supervisor/conf.d/

View File

@ -0,0 +1,10 @@
[supervisord]
nodaemon=true
[program:openrec_extractor]
directory = /home/eson/test/intimate/bin/openrec_extractor/
command= /home/eson/test/intimate/bin/openrec_extractor/openrec_extractor
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/openrec_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,10 @@
[supervisord]
nodaemon=true
[program:openrec_ranking]
directory = /home/eson/test/intimate/bin/openrec_task1
command= /home/eson/test/intimate/bin/openrec_task1/openrec_task1
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/openrec_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,12 @@
[supervisord]
nodaemon=true
[program:openrec_source]
directory = /home/eson/test/intimate/bin/openrec_task2/
command= /home/eson/test/intimate/bin/openrec_task2/openrec_task2
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=4 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/openrec_task2/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

12
supervisor_conf/test.conf Normal file
View File

@ -0,0 +1,12 @@
# [supervisord]
# nodaemon=true
# [program:openrec_extractor]
# directory = /home/eson/test/intimate/extractor/openrec_extractor
# command= /home/eson/test/intimate/extractor/openrec_extractor/openrec_extractor
# autorestart=true
# # stdout_logfile=/home/eson/test/intimate/extractor/openrec/stdout
# # stdout_logfile_maxbytes=0
# stderr_logfile=/home/eson/test/intimate/extractor/openrec/log
# stderr_logfile_maxbytes=0
# stopsignal=QUIT

View File

@ -0,0 +1,14 @@
[supervisord]
nodaemon=false
[program:twitcasting_extractor]
environment=DISPLAY=":99"
directory = /home/eson/test/intimate/bin/twitcasting_extractor/
command= /home/eson/test/intimate/bin/twitcasting_extractor/twitcasting_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=3 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitcasting_extractor/log
# stderr_logfile=%(supervisorctl.var.directory)s/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=false
[program:twitcasting_task1]
environment=DISPLAY=":99"
directory = /home/eson/test/intimate/bin/twitcasting_task1/
command= /home/eson/test/intimate/bin/twitcasting_task1/twitcasting_task1
# process_name=%(program_name)s_%(process_num)02d ;多进程名称
# numprocs=1 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitcasting_task1/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=false
[program:twitch_extractor]
environment=DISPLAY=":99"
directory = /home/eson/test/intimate/bin/twitch_extractor
command= /home/eson/test/intimate/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=5 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=true
[program:twitch_extractor_p1]
environment=DISPLAY=":99",pac_proxy=http://localhost:1090/pac
directory = /home/eson/test/intimate/bin/twitch_extractor
command= /home/eson/test/intimate/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=2 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=false
[program:twitch_extractor_p2]
environment=DISPLAY=":99",pac_proxy=http://localhost:1090/pac1
directory = /home/eson/test/intimate/bin/twitch_extractor
command= /home/eson/test/intimate/bin/twitch_extractor/twitch_extractor
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=2 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitch_extractor/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=false
[program:twitch_task2]
environment=DISPLAY=":99"
directory = /home/eson/test/intimate/bin/twitch_task2
command= /home/eson/test/intimate/bin/twitch_task2/twitch_task2
process_name=%(program_name)s_%(process_num)02d ;多进程名称
numprocs=6 ;启动多个进程
autorestart=true
stderr_logfile=/home/eson/test/intimate/bin/twitch_task2/log
stderr_logfile_maxbytes=0
stopsignal=QUIT

13
supervisor_conf/xvfb.conf Normal file
View File

@ -0,0 +1,13 @@
[supervisord]
nodaemon=true
[program:xvfb-99]
# directory = /home/eson/test/intimate/extractor/openrec_extractor
command=/usr/bin/Xvfb :99 -screen 0 1280x720x24 -ac -nolisten tcp -dpi 96 +extension RANDR -nolisten tcp
autorestart=true
# # stdout_logfile=/home/eson/test/intimate/extractor/openrec/stdout
# # stdout_logfile_maxbytes=0
# stderr_logfile=/home/eson/test/intimate/extractor/openrec/log
# stderr_logfile_maxbytes=0
# stopsignal=QUIT

View File

@ -1,8 +1,5 @@
package main
import "github.com/474420502/hunter"
func main() {
ht := hunter.NewHunter(openrecRanking)
ht.Execute()
Execute()
}

View File

@ -4,29 +4,24 @@ import (
"encoding/json"
"intimate"
"log"
"os"
"os/signal"
"strconv"
"sync/atomic"
"syscall"
"time"
"github.com/474420502/hunter"
"github.com/474420502/gcurl"
"github.com/tidwall/gjson"
)
var openrecRanking *OpenrecRanking
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func init() {
// Execute 执行方法
func Execute() {
openrecRanking = &OpenrecRanking{}
openrecRanking.PreCurlUrl = `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
ps := intimate.NewPerfectShutdown()
turl := `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
-H 'authority: public.openrec.tv' \
-H 'accept: application/json, text/javascript, */*; q=0.01' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' \
@ -39,43 +34,25 @@ func init() {
-H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
--compressed`
}
g := gcurl.ParseRawCURL(turl)
tp := g.Temporary()
// OpenrecRanking 获取排名任务
type OpenrecRanking struct {
hunter.PreCurlUrl
}
for !ps.IsClose() {
// Execute 执行方法
func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
var loop int32 = 1
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
for atomic.LoadInt32(&loop) > 0 {
resp, err := cxt.Hunt()
resp, err := tp.Execute()
if err != nil {
log.Println(err)
time.Sleep(time.Second * 2)
continue
}
tp := cxt.Temporary()
content := resp.Content()
if len(content) <= 200 { //末页时没有内容返回, 末页退出
finishpoint := time.Now()
log.Println("任务Ranking UserId结束休眠, 下次启动时间:", finishpoint.Add(time.Minute*120))
for time.Now().Sub(finishpoint) < time.Minute*120 {
time.Sleep(time.Second)
if atomic.LoadInt32(&loop) <= 0 {
if ps.IsClose() {
return
}
}
@ -93,13 +70,6 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
for _, User := range result.Array() {
userid := User.Get("channel.id").String()
// data := &intimate.Source{}
// data.Source = sql.NullString{String: userid, Valid: len(userid) > 0}
// data.Url = tp.GetRawURL()
// data.TargetType = string(intimate.TTOpenrecUser)
// sstore.Insert(data)
streamer := &intimate.Streamer{}
streamer.UserId = userid
streamer.Platform = intimate.Popenrec
@ -123,17 +93,7 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
}
// 修改url query 参数的page递增. 遍历所有页面
querys := tp.GetQuery()
page, err := strconv.Atoi(querys.Get("page"))
if err != nil {
log.Println(err)
return
}
page++
querys.Set("page", strconv.Itoa(page))
tp.SetQuery(querys)
tp.QueryParam("page").IntAdd(1)
time.Sleep(time.Second * 1)
}
}

View File

@ -3,67 +3,14 @@ package main
import (
"testing"
"time"
"github.com/tidwall/gjson"
"github.com/474420502/hunter"
)
// OpenrecRanking 获取排名任务
type OpenrecRankingTest struct {
hunter.PreCurlUrl
}
// Execute 执行方法
func (or *OpenrecRankingTest) Execute(cxt *hunter.TaskContext) {
resp, err := cxt.Hunt()
if err != nil {
panic(err)
}
t := cxt.GetShare("test").(*testing.T)
if !gjson.ValidBytes(resp.Content()) {
t.Error("source is not json format.")
}
result := gjson.GetBytes(resp.Content(), "0.rank")
if result.Int() != 1 {
t.Error("rank is error. result raw is ", result.Raw)
}
if cxt.Temporary().GetQuery().Get("page") != "1" {
t.Error("Temporary page error")
}
// t.Error(string(resp.Content()))
}
func TestRanking(t *testing.T) {
curlBash := `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
-H 'authority: public.openrec.tv' \
-H 'accept: application/json, text/javascript, */*; q=0.01' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' \
-H 'origin: https://www.openrec.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.openrec.tv/ranking' \
-H 'accept-language: zh-CN,zh;q=0.9' \
-H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
--compressed`
ort := &OpenrecRankingTest{}
ort.PreCurlUrl = hunter.PreCurlUrl(curlBash)
ht := hunter.NewHunter(ort)
ht.SetShare("test", t)
ht.Execute()
}
func TestTimeAdd(t *testing.T) {
finishpoint := time.Now()
time.Sleep(time.Second * 2)
t.Error(time.Now().Sub(finishpoint) > time.Second*1)
}
func TestRankingInsert(t *testing.T) {
ht := hunter.NewHunter(openrecRanking)
ht.Execute()
func TestMain(t *testing.T) {
main()
}

View File

@ -1,8 +1,5 @@
package main
import "github.com/474420502/hunter"
func main() {
ht := hunter.NewHunter(oer)
ht.Execute()
Execute()
}

View File

@ -5,21 +5,13 @@ import (
"encoding/json"
"intimate"
"log"
"os"
"os/signal"
"strconv"
"sync/atomic"
"syscall"
"time"
"github.com/474420502/gcurl"
"github.com/474420502/requests"
"github.com/tidwall/gjson"
"github.com/474420502/hunter"
)
var oer *OpenrecExtratorRanking
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
@ -27,29 +19,18 @@ var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpe
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func init() {
oer = &OpenrecExtratorRanking{}
}
// OpenrecExtratorRanking 获取用户信息
type OpenrecExtratorRanking struct {
// Store *intimate.Store
}
// Execute 执行方法
func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
func Execute() {
var loop int32 = 1
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
ps := intimate.NewPerfectShutdown()
ses := requests.NewSession()
var lasterr error = nil
for atomic.LoadInt32(&loop) > 0 {
for !ps.IsClose() {
streamer, err := estore.Pop(intimate.Popenrec) //队列里弹出一个streamer行. 进行解析
@ -74,7 +55,8 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
// Check Userid
userUrl := updateUrl["user"]
tp := cxt.Session().Get(userUrl) // 获取user url页面数据
log.Println(userUrl)
tp := ses.Get(userUrl) // 获取user url页面数据
resp, err := tp.Execute()
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
@ -84,7 +66,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
continue
}
cookies := cxt.Session().GetCookies(tp.GetParsedURL())
cookies := ses.GetCookies(tp.GetParsedURL())
scurl := updateUrl["supporters"] //获取打赏者的数据
curl := gcurl.ParseRawCURL(scurl)
@ -128,16 +110,17 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
}
supporters = append(supporters, string(resp.Content()))
page := supportersQuery.Get("page_number") // page_number 加1
pageint, err := strconv.Atoi(page)
if err != nil {
log.Println(err)
break
}
pageint++
page = strconv.Itoa(pageint)
supportersQuery.Set("page_number", page)
temporary.SetQuery(supportersQuery)
temporary.QueryParam("page_number").IntAdd(1)
// page := supportersQuery.Get("page_number") // page_number 加1
// pageint, err := strconv.Atoi(page)
// if err != nil {
// log.Println(err)
// break
// }
// pageint++
// page = strconv.Itoa(pageint)
// supportersQuery.Set("page_number", page)
// temporary.SetQuery(supportersQuery)
}
// cookies := cxt.Session().GetCookies(wf.GetParsedURL())
@ -147,7 +130,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
ext["html_user"] = string(resp.Content())
liveUrl := updateUrl["live"]
tp = cxt.Session().Get(liveUrl)
tp = ses.Get(liveUrl)
resp, err = tp.Execute()
if err != nil {
log.Println(err)
@ -164,7 +147,10 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
continue
}
streamer.Operator = int32(intimate.OperatorOK)
// streamer.Platform = intimate.Popenrec
streamer.UpdateInterval = 120
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
streamer.Operator = 0
source := &intimate.Source{}
source.Target = intimate.TOpenrecUser
@ -172,7 +158,7 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
source.StreamerId = sql.NullInt64{Int64: streamer.Uid, Valid: true}
sstore.Insert(source)
estore.UpdateOperator(streamer)
estore.UpdateStreamer(streamer)
}
}

View File

@ -2,11 +2,8 @@ package main
import (
"testing"
"github.com/474420502/hunter"
)
func TestOpenrecUser(t *testing.T) {
ht := hunter.NewHunter(oer)
ht.Execute()
func TestMain(t *testing.T) {
main()
}

View File

@ -58,7 +58,7 @@ func Execute() {
// log.Panic(err)
}
etor := extractor.ExtractXml(resp.Content())
etor := extractor.ExtractHtml(resp.Content())
result, err := etor.XPaths("//p[@class='taglist']/a[contains(@class, 'tag')]/@href")
if err != nil {
panic(err)
@ -89,7 +89,7 @@ func Execute() {
continue
}
var splist = xps.ForEachTag(SearchProfile{})
var splist = xps.ForEachObjectByTag(SearchProfile{})
for _, isp := range splist {
sp := isp.(*SearchProfile)
if sp.LiveUrl == "" {

View File

@ -1,6 +1,5 @@
package main
func main() {
e := ChannelLink{}
e.Execute()
Execute()
}

View File

@ -17,12 +17,8 @@ var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// ChannelLink 频道链接
type ChannelLink struct {
}
// Execute 执行任务
func (cl *ChannelLink) Execute() {
func Execute() {
var err error
wd := intimate.GetChromeDriver(3030)
ps := intimate.NewPerfectShutdown()

View File

@ -4,9 +4,8 @@ import (
"testing"
)
func TestCase1(t *testing.T) {
e := ChannelLink{}
e.Execute()
func estCase1(t *testing.T) {
Execute()
}
func TestLiveUrl(t *testing.T) {

View File

@ -1,6 +1,6 @@
package main
func main() {
ul := UserList{}
ul.Execute()
Execute()
}

View File

@ -19,17 +19,12 @@ var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// UserList 频道链接
type UserList struct {
}
// Execute 执行任务
func (cl *UserList) Execute() {
func Execute() {
// DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ;
//article//a[@data-a-target='preview-card-title-link']
wd := intimate.GetChromeDriver(3030)
defer wd.Close()
defer wd.Quit()
ps := intimate.NewPerfectShutdown()
counter := intimate.NewCounter()
@ -174,7 +169,4 @@ func (cl *UserList) Execute() {
}
counter.AddWithReset(1)
}
wd.Close()
wd.Quit()
}

View File

@ -2,6 +2,6 @@ package main
import "testing"
func TestMain(t *testing.T) {
func estMain(t *testing.T) {
main()
}

View File

@ -29,14 +29,16 @@ func init() {
}
// ParseNumber 去逗号解析数字
func ParseNumber(number string) (int64, error) {
number = strings.ReplaceAll(number, ",", "")
return strconv.ParseInt(number, 10, 64)
func ParseNumber(num string) (int64, error) {
num = strings.Trim(num, " ")
num = strings.ReplaceAll(num, ",", "")
return strconv.ParseInt(num, 10, 64)
}
// ParseNumberEx 解析带字符的数字
func ParseNumberEx(num string) (float64, error) {
num = strings.Trim(num, " ")
num = strings.ReplaceAll(num, ",", "")
last := num[len(num)-1]
factor := 1.0
switch {