19 Commits

Author SHA1 Message Date
eson
0bff7169ec Extractor upgrade 2020-08-05 18:49:47 +08:00
eson
6158976986 add twitcasting test 2020-08-04 14:13:39 +08:00
eson
826d15876a fix windows quit 2020-08-04 14:12:00 +08:00
eson
93ec2e78a6 Merge tag 'v0.4.0' into develop
v0.4.0
2020-07-31 18:05:51 +08:00
eson
2161de6e33 Merge branch 'release/v0.4.0' 2020-07-31 18:05:28 +08:00
eson
ac47b28153 Merge branch 'feature/add-twitch' into develop 2020-07-31 18:05:06 +08:00
eson
0c54cbf8d8 完成twitch的期望功能 2020-07-31 18:04:10 +08:00
eson
ac1ab81676 完成twitch 数据提取入库 2020-07-28 18:56:27 +08:00
eson
1d2f2d14c5 1. 添加了block twitch 视频流的插件.
2. 流量减少99%
3. cpu使用率降低50%
4. 速度提高了400%
2020-07-27 19:30:54 +08:00
41d3763b57 TODO: finish extractor data 2020-07-27 00:35:41 +08:00
eson
ea650f91dc TODO: twitch get all userid list 2020-07-24 18:48:33 +08:00
eson
cbdedb6795 twitch source 非常多, 需要把增量的架构设计好. 修改原来架构. 2020-07-23 18:29:56 +08:00
eson
6d688b8450 fix: libxml2 leak 2020-07-22 20:00:02 +08:00
eson
b9f2f5cf22 Merge tag 'v0.3.0' into develop
finish
2020-07-21 15:07:24 +08:00
eson
9c4b3eb60b Merge branch 'release/v0.3.0' 2020-07-21 15:07:18 +08:00
eson
079488a2ba v0.3.0版本重构. 以 主播 为目标单位. 2020-07-21 15:05:56 +08:00
eson
d7a6da287d TODO: extractor 的重构. 2020-07-20 18:54:34 +08:00
eson
f0f83a9f00 1.最后一次数据存储结构重构.
2.数据最接近需求方.
2020-07-20 18:13:54 +08:00
eson
6369387179 Merge tag 'v0.2.1' into develop
测试完整
2020-07-17 19:21:33 +08:00
36 changed files with 1544 additions and 258 deletions

4
.gitignore vendored
View File

@@ -6,5 +6,7 @@ screenlog.*
intimate intimate
*.gz *.gz
debug.test debug.test
myblock
run.sh
stop.sh

View File

@@ -17,7 +17,7 @@ func init() {
InitConfig.Load() InitConfig.Load()
// storeOpenrec = NewStore() // storeOpenrec = NewStore()
log.SetFlags(log.Llongfile | log.Ldate) log.SetFlags(log.Llongfile | log.Ltime)
} }
// Config 配置 // Config 配置

BIN
crx/myblock.crx Normal file

Binary file not shown.

28
crx/myblock.pem Normal file
View File

@@ -0,0 +1,28 @@
-----BEGIN PRIVATE KEY-----
MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDSG09DSvB03TOe
eOmQwfiCIf0wa2WRB31ewxa6i/PRgEKeJSUvIsIuaECUer2ss+J3rwSS2lDpGuiw
FnsVyZqKI/+Rcuc83YJGYg6OAzVMz6UL8YCWhXu3huTJ+V+a5iNereIC69ZERRJt
nXlWqsq6HKya+6BP9sX9CI4GTHQrnWBysAxsswhdnnnRvu+GxglWafSIzuS6OizT
1M1CmkZxNvDJhTSOR7SJlIYm2kM5/fIL53BdndF2IGAjfV1WV7AjwhTfun5cViEO
i8niQUIMY4L0AiO9grFD1g1xIYkeuVBoLxOUBzPxJwQmb64gseb9Dvt0BKLRGoou
SIOyE+KVAgMBAAECggEAI4b6J2kR0VUBEDwmVHO0K38HUstqNHSVgrNO0dLt8sAz
I44o5DhGqPW4a9L4ZS5SrkWyKonPcic6buISRIwfPVoacjQBfVWAXJnil6lbtyYK
ZMNcqLcgBRfCcpOgEq91DiKta6yIwekDFXVyCdFd78v+9ML1J+hUsLVkXJTLdP88
PGamRWVd6vGy3QMRjyM29GLPgS+/6Vrp1cptSuYNqYhlszohmu8lBvzjH9jbPh9d
GFrrd8Bs7IRCdtKZig/3fbln4JEyyOYE+gcT2jplPksB6mR/5DBIdkVbeuFwGB0+
h1/PKlprNQt7+Ei0HhHnTib7lZP8WGo4HkSi7PsAGQKBgQD1Ptho0wJiI2+6gL1O
iNsEJVKIQ2Sxdx3wI/qudphM99t6xKCpPyVI2Nd9PBf2jbZjGAaz+P/KQYxEqb6i
PRcQ+i99wCQoRfnRvUbKA4goEpKwRXmvn+499dm6D5pEuumOXGQYCmaFXuLTRN/I
BL6GNgLtoZAlLjUXaWtk8TszGQKBgQDbUf3p3HLpCjRvRDW/vA5xj+08t7xtF9uO
NilGK79uOA4VnxE2w3ioYqQ7t3I8J/0rAzGKq3tylg4QX6UpQ4b2koRr2B3cqoAk
dsRdNWAHwCNepz8hTLsZyuihzbNv2nHmoqhzjK/FcrBHx5NAM+T6OBpLzQBnbUzk
3wIcqm223QKBgQDo/IRxyY0pGMtLXoT6ODACF0b6JzRhGG37tuKvngGAlbQQRP7w
6wmL1F2cH1wQon7UU34CupqfVnhgvvZZgToJqfU2PTTcgeYc6Pl4b7SJhWOQTOCX
BZQ7jvYCulHv27aIxaNd53uQVx2cYoFKr58lN+i+QtADUoujq0YYxshb+QKBgQDW
ZOti7kZCeuBRGIu2V56C8uBFp5MBzf2polZsqx1iIFfcWPfZ4fGUIYFMgwKfvbOl
lWSbmxB9LiSnaugoU0OezBG43rYqXV4Qxy0jtKagTPoGcFWtNrX7+7e3XD8Zi6Am
hkFHW3MEAB5EvNq8Oz6OP8Os78SCVn2BimMlJJFF3QKBgQCF+aEAiBv+ivcmHUeP
2eBq9nLltPFAfXJ/p31MMQ6Jgo36DBqUeoLeyq/WfIXvwqbVbP9fANZrKoTPbI97
dilCHUoO33rafXJy6jtaggtpz14tt9soecTop0vM/rU7tGtfBe6NXg9LRl+oDJCU
37I3a9Is+2CLyAUXWCk9mLfFsQ==
-----END PRIVATE KEY-----

View File

@@ -1,7 +1,6 @@
package main package main
import ( import (
"net/http"
_ "net/http/pprof" _ "net/http/pprof"
) )
@@ -16,11 +15,6 @@ import (
*/ */
func main() { func main() {
go func() {
http.ListenAndServe("0.0.0.0:8899", nil)
}()
oe := &OpenrecExtractor{} oe := &OpenrecExtractor{}
oe.Execute() oe.Execute()
} }

View File

@@ -8,7 +8,6 @@ import (
"os" "os"
"os/signal" "os/signal"
"regexp" "regexp"
"runtime"
"strconv" "strconv"
"strings" "strings"
"sync/atomic" "sync/atomic"
@@ -18,6 +17,9 @@ import (
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
) )
var estore = intimate.NewStoreExtractor()
var sstore = intimate.NewStoreSource(string(intimate.STOpenrec))
// OpenrecExtractor 提取方法 // OpenrecExtractor 提取方法
type OpenrecExtractor struct { type OpenrecExtractor struct {
user *intimate.ExtractorSource user *intimate.ExtractorSource
@@ -36,44 +38,40 @@ func (oe *OpenrecExtractor) Execute() {
atomic.StoreInt32(&loop, 0) atomic.StoreInt32(&loop, 0)
}() }()
extractorStore := intimate.NewExtractorStore()
store := intimate.NewSourceStore("source_openrec")
var lasterr error = nil var lasterr error = nil
for atomic.LoadInt32(&loop) > 0 { for atomic.LoadInt32(&loop) > 0 {
var err error
runtime.GC() source, err := sstore.Pop(intimate.TOpenrecUser, 0)
time.Sleep(time.Nanosecond)
source, err := store.Pop(string(intimate.TTOpenrecRanking), 100)
if err != nil { if err != nil {
if err != lasterr { if err != lasterr {
log.Println(err, lasterr) log.Println(err, lasterr)
lasterr = err lasterr = err
} }
time.Sleep(time.Second * 2) time.Sleep(time.Second * 5)
continue continue
} }
sdata := source.Ext.([]byte)
datamap := gjson.ParseBytes(sdata).Map()
source.Operator = int32(intimate.OperatorError) source.Operator = int32(intimate.OperatorError)
userId := source.Source.String userId := datamap["var_user_id"].String()
streamer := &intimate.Streamer{} streamer := &intimate.Streamer{}
streamer.UserId = userId streamer.UserId = userId
streamer.Platform = string(intimate.Popenrec) streamer.Platform = intimate.Popenrec
sdata := source.Ext.([]byte) htmlUser := datamap["html_user"]
if gjson.ValidBytes(sdata) { oe.user = intimate.NewExtractorSource(&htmlUser)
result := gjson.ParseBytes(sdata)
datamap := result.Map()
oe.user = intimate.NewExtractorSource(datamap["user"])
oe.user.CreateExtractor() oe.user.CreateExtractor()
oe.userLive = intimate.NewExtractorSource(datamap["user_live"]) htmlLive := datamap["html_live"]
oe.userLive = intimate.NewExtractorSource(&htmlLive)
oe.userLive.CreateExtractor() oe.userLive.CreateExtractor()
oe.supporters = intimate.NewExtractorSource(datamap["supporters"]) jsonSupporters := datamap["json_supporters"]
oe.supporters = intimate.NewExtractorSource(&jsonSupporters)
clog := &intimate.CollectLog{} clog := &intimate.CollectLog{}
// log.Println(anchorId) // log.Println(anchorId)
@@ -85,38 +83,26 @@ func (oe *OpenrecExtractor) Execute() {
oe.extractLive(clog) oe.extractLive(clog)
oe.extractTags(clog) oe.extractTags(clog)
streamer.Uid = source.StreamerId.Int64
streamer.UpdateTime = source.UpdateTime streamer.UpdateTime = source.UpdateTime
streamer.Tags = clog.Tags
LiveUrl := "https://www.openrec.tv/live/" + userId
streamer.LiveUrl = sql.NullString{String: LiveUrl, Valid: true}
streamUid, err := extractorStore.InsertStreamer(streamer)
if err != nil {
log.Println(err)
source.ErrorMsg = sql.NullString{String: err.Error(), Valid: true}
store.UpdateOperator(source)
return
}
clog.StreamerUid = streamUid
clog.Platform = string(intimate.Popenrec) clog.Platform = string(intimate.Popenrec)
clog.UserId = userId clog.UserId = userId
clog.UpdateTime = source.UpdateTime clog.UpdateTime = source.UpdateTime
logUid, err := extractorStore.InsertCollectLog(clog) logUid := estore.InsertClog(clog)
if err != nil {
source.ErrorMsg = sql.NullString{String: err.Error(), Valid: true} LiveUrl := "https://www.openrec.tv/live/" + userId
store.UpdateOperator(source) streamer.LiveUrl = sql.NullString{String: LiveUrl, Valid: true}
return streamer.LatestLogUid = logUid
streamer.Operator = 0
estore.UpdateStreamer(streamer)
source.Operator = int32(intimate.OperatorExtractorOK)
sstore.UpdateOperator(source)
} }
extractorStore.UpdateStreamerLogUid(logUid, streamUid)
source.Operator = int32(intimate.OperatorExtractorOK)
store.UpdateOperator(source)
} else {
log.Println("data is not json:\n", string(sdata))
}
}
} }
func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) { func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) {
@@ -138,15 +124,15 @@ func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) {
clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true}) clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true})
} }
func (oe *OpenrecExtractor) extractUserName(ai intimate.ISet) { func (oe *OpenrecExtractor) extractUserName(streamer intimate.ISet) {
extractor := oe.user.GetExtractor() extractor := oe.user.GetExtractor()
xp, err := extractor.XPathResult("//p[@class='c-global__user__profile__list__name__text official-icon--after']/text()") xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()")
if err != nil { if err != nil {
log.Println(err) log.Println(err)
} else { } else {
if xp.NodeIter().Next() { if xp.NodeIter().Next() {
userName := xp.String() userName := xp.String()
ai.Set("UserName", userName) streamer.Set("UserName", sql.NullString{String: userName, Valid: true})
} }
} }
} }
@@ -158,6 +144,7 @@ func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) {
if err != nil { if err != nil {
log.Println(err) log.Println(err)
} }
if xp.NodeIter().Next() { if xp.NodeIter().Next() {
views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String()) views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String())
views = strings.ReplaceAll(views, ",", "") views = strings.ReplaceAll(views, ",", "")
@@ -167,7 +154,7 @@ func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) {
} }
clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true}) clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true})
clog.Set("IsLiveStreaming", int32(1)) clog.Set("IsLiveStreaming", true)
} }
} }
@@ -214,7 +201,7 @@ func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) {
if err != nil { if err != nil {
log.Println(err) log.Println(err)
} }
log.Println(iter.Node().NodeValue(), tm.Local()) // log.Println(iter.Node().NodeValue(), tm.Local())
clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true}) clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true})
duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content") duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content")

View File

@@ -7,6 +7,7 @@ import (
"testing" "testing"
"time" "time"
"github.com/474420502/hunter"
"github.com/lestrrat-go/libxml2" "github.com/lestrrat-go/libxml2"
) )
@@ -90,6 +91,28 @@ func TestCase(t *testing.T) {
t.Error(xr) t.Error(xr)
} }
func TestUserName(t *testing.T) {
f, err := os.Open("test.html")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
extractor := hunter.NewExtractor(data)
xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()")
if err != nil {
t.Error(err)
} else {
if xp.NodeIter().Next() {
userName := xp.String()
t.Error(userName)
}
}
}
func TestExtractor(t *testing.T) { func TestExtractor(t *testing.T) {
oe := &OpenrecExtractor{} oe := &OpenrecExtractor{}
oe.Execute() oe.Execute()

4
extractor/twitch_extractor/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
*.html
log
screenlog.*
twitch_extractor

View File

@@ -0,0 +1,247 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"regexp"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func main() {
wd := intimate.GetChromeDriver(3030)
ps := intimate.NewPerfectShutdown()
counter := intimate.NewCounter()
counter.SetMaxLimit(200)
counter.SetMaxToDo(func(olist ...interface{}) error {
owd := olist[0].(*selenium.WebDriver)
(*owd).Close()
(*owd).Quit()
*owd = intimate.GetChromeDriver(3030)
return nil
}, &wd)
var lasterr error = nil
// var err error
for !ps.IsClose() {
streamer, err := estore.Pop(intimate.Ptwitch, 0)
if streamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
lasterr = err
}
time.Sleep(time.Second * 2)
continue
}
var updateUrl map[string]string
json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
liveUrl := updateUrl["live"]
log.Println(liveUrl)
// err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
err = wd.Get(liveUrl + "/about")
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
time.Sleep(time.Second * 5)
continue
}
streamer.LiveUrl = sql.NullString{String: liveUrl, Valid: true}
clog := &intimate.CollectLog{}
clog.UserId = streamer.UserId
clog.Gratuity = sql.NullInt64{Int64: 0, Valid: false}
time.Sleep(time.Millisecond * 500)
err = extractUserName(wd, streamer)
if err != nil {
continue
}
err = extractFollowers(wd, clog)
if err != nil {
continue
}
err = extractViews(wd, clog) // views + tags + gratuity
if err != nil {
// 不直播时提取礼物 gratuity
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`)
btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
if (err == nil && channelchat != nil) || btn != nil {
if channelchat != nil {
channelchat.Click()
}
time.Sleep(time.Second)
extractGratuity(wd, clog)
return true, nil
}
return false, nil
}, time.Second*4)
}
streamer.Platform = intimate.Ptwitch
clog.Platform = string(streamer.Platform)
clog.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
lastClogId := estore.InsertClog(clog)
streamer.Operator = 10
streamer.LatestLogUid = lastClogId
if clog.Tags != nil {
streamer.Tags = clog.Tags
}
switch fl := clog.Followers.Int64; {
case fl > 100000:
streamer.UpdateInterval = 120
case fl > 10000:
streamer.UpdateInterval = 240
case fl > 1000:
streamer.UpdateInterval = 360
case fl > 100:
streamer.UpdateInterval = 720
case fl > 0:
streamer.UpdateInterval = 1440
}
streamer.UpdateTime = clog.UpdateTime
estore.UpdateStreamer(streamer)
counter.AddWithReset(1)
}
wd.Close()
wd.Quit()
}
func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
label, err := web.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1")
if err == nil {
if ltxt, err := label.Text(); err == nil && ltxt != "" {
// log.Println("label:", ltxt)
streamer.UserName = sql.NullString{String: ltxt, Valid: true}
return true, nil
}
}
return false, err
}, 15*time.Second)
}
func extractFollowers(wd selenium.WebDriver, clog *intimate.CollectLog) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
efollowers, err := web.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']")
if err != nil {
return false, err
}
followers, err := efollowers.Text()
if err != nil || followers == "" {
return false, err
}
followers = regexp.MustCompile(`[\d,]+`).FindString(followers)
fint, _ := intimate.ParseNumber(followers)
clog.Followers = sql.NullInt64{Int64: int64(fint), Valid: true}
// log.Println("followers: ", followers, fint)
return true, nil
}, 4*time.Second)
}
func extractViews(wd selenium.WebDriver, clog *intimate.CollectLog) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
views, err := web.FindElement(selenium.ByXPATH, "//a[@data-a-target='home-live-overlay-button']/span")
if views != nil {
if txt, err := views.Text(); err == nil {
vint, _ := intimate.ParseNumber(txt)
clog.Views = sql.NullInt64{Int64: vint, Valid: true}
// log.Println("views:", txt)
views.Click()
extractTags(wd, clog)
extractTitle(wd, clog)
extractGratuity(wd, clog)
return true, nil
}
}
return false, err
}, time.Second*4)
}
func extractTitle(wd selenium.WebDriver, clog *intimate.CollectLog) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
title, err := web.FindElement(selenium.ByXPATH, `//h2[@data-a-target='stream-title']`)
if err == nil {
if txt, err := title.Text(); err == nil {
clog.LiveTitle = sql.NullString{String: txt, Valid: true}
return true, nil
}
}
return false, err
}, time.Second*4)
}
func extractTags(wd selenium.WebDriver, clog *intimate.CollectLog) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
tags, err := web.FindElements(selenium.ByXPATH, "//a[@aria-label and @data-a-target and @href]/div[@class and text()]")
if len(tags) == 0 {
return false, err
}
var stags []string
for _, tag := range tags {
if txt, err := tag.Text(); err == nil {
stags = append(stags, txt)
} else {
log.Println(err)
}
}
if len(stags) > 0 {
if tagbuf, err := json.Marshal(stags); err == nil {
clog.Tags = tagbuf
} else {
log.Println(err)
}
}
return true, nil
}, time.Second*4)
}
func extractGratuity(wd selenium.WebDriver, clog *intimate.CollectLog) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
btn, err := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
if err == nil {
btn.Click()
time.Sleep(time.Second)
gifcount, err := web.FindElements(selenium.ByXPATH, `//div[@class="sub-gift-count tw-flex"]/p`)
if err == nil {
var gratuity int64 = 0
for _, gc := range gifcount {
if gtxt, err := gc.Text(); err == nil {
gint, _ := intimate.ParseNumber(gtxt)
gratuity += gint
} else {
log.Println(err)
}
}
clog.Gratuity = sql.NullInt64{Int64: gratuity, Valid: true}
}
return true, nil
}
return false, err
}, time.Second*4)
}

View File

@@ -0,0 +1,9 @@
package main
import (
"testing"
)
func TestCase0(t *testing.T) {
main()
}

View File

@@ -3,6 +3,7 @@ package intimate
import ( import (
"database/sql" "database/sql"
"reflect" "reflect"
"time"
"github.com/474420502/hunter" "github.com/474420502/hunter"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
@@ -11,16 +12,56 @@ import (
type GetSet struct { type GetSet struct {
} }
type StreamerList struct {
UrlHash []byte //
Platform Platform //
Url string //
Label sql.NullString //
Serialize interface{}
UpdateInterval int32
UpdateTime time.Time //
ErrorMsg sql.NullString
Operator int32
LastOperator int32
}
// Get Simple Value
func (sl *StreamerList) Get(field string) interface{} {
return reflect.ValueOf(sl).Elem().FieldByName(field).Interface()
}
// Set Simple Value
func (sl *StreamerList) Set(field string, value interface{}) {
reflect.ValueOf(sl).Elem().FieldByName(field).Set(reflect.ValueOf(value))
}
type Streamer struct { type Streamer struct {
Uid int64 // Uid int64 //
Platform string // Platform Platform //
UserId string // UserId string //
UserName string //
UserName sql.NullString //
LiveUrl sql.NullString // LiveUrl sql.NullString //
Channel sql.NullString // Channel sql.NullString //
LatestLogUid int64 Tags interface{}
Ext interface{} // Ext interface{} //
IsUpdateStreamer bool // 更新上面的内容
IsUpdateUrl bool
UpdateInterval int32
UpdateUrl interface{}
LatestLogUid int64
UpdateTime sql.NullTime // UpdateTime sql.NullTime //
ErrorMsg sql.NullString
Operator int32
LastOperator int32
} }
// Get Simple Value // Get Simple Value
@@ -39,8 +80,8 @@ type CollectLog struct {
Platform string // Platform string //
UserId string // 平台的UserId UserId string // 平台的UserId
IsLiveStreaming int32 // IsLiveStreaming bool //
IsError int32 // IsError bool //
Followers sql.NullInt64 // Followers sql.NullInt64 //
Views sql.NullInt64 // Views sql.NullInt64 //
Giver interface{} // Giver interface{} //
@@ -65,21 +106,31 @@ func (cl *CollectLog) Set(field string, value interface{}) {
} }
type ExtractorSource struct { type ExtractorSource struct {
source gjson.Result source *gjson.Result
extractor *hunter.Extractor extractor *hunter.Extractor
} }
func NewExtractorSource(gr gjson.Result) *ExtractorSource { func NewExtractorSource(gr *gjson.Result) *ExtractorSource {
es := &ExtractorSource{} es := &ExtractorSource{}
es.source = gr es.SetSource(gr)
return es return es
} }
func (es *ExtractorSource) CreateExtractor() { func (es *ExtractorSource) SetSource(gr *gjson.Result) {
es.extractor = hunter.NewExtractor([]byte(es.source.Str)) es.source = gr
es.extractor = nil
} }
func (es *ExtractorSource) GetSource() gjson.Result { func (es *ExtractorSource) Clear() {
es.source = nil
es.extractor = nil
}
func (es *ExtractorSource) CreateExtractor() {
es.extractor = hunter.NewExtractor([]byte(es.source.String()))
}
func (es *ExtractorSource) GetSource() *gjson.Result {
return es.source return es.source
} }

6
go.mod
View File

@@ -3,10 +3,14 @@ module intimate
go 1.14 go 1.14
require ( require (
github.com/474420502/extractor v0.2.2
github.com/474420502/focus v0.12.0
github.com/474420502/gcurl v0.1.2 github.com/474420502/gcurl v0.1.2
github.com/474420502/hunter v0.3.0 github.com/474420502/hunter v0.3.4
github.com/474420502/requests v1.6.0
github.com/go-sql-driver/mysql v1.5.0 github.com/go-sql-driver/mysql v1.5.0
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
github.com/tebeka/selenium v0.9.9
github.com/tidwall/gjson v1.6.0 github.com/tidwall/gjson v1.6.0
github.com/tidwall/pretty v1.0.1 // indirect github.com/tidwall/pretty v1.0.1 // indirect
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect

12
go.sum
View File

@@ -2,12 +2,16 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg= cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg=
github.com/474420502/extractor v0.2.2 h1:hGao2iZt5CEI8oqYjQW938osQdHKgNWL/bwRJQNgHTM=
github.com/474420502/extractor v0.2.2/go.mod h1:OVFijdKLDghigpIYISHzlognL5q8eeVenT2fRhCyFns=
github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo= github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo=
github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s= github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s=
github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg= github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg=
github.com/474420502/gcurl v0.1.2/go.mod h1:hws5q/Ao64bXLLDnldz9VyTQUndTWc/i5DzdEazFfoM= github.com/474420502/gcurl v0.1.2/go.mod h1:hws5q/Ao64bXLLDnldz9VyTQUndTWc/i5DzdEazFfoM=
github.com/474420502/hunter v0.3.0 h1:0VPi1MInxjHOta3da4v0ALWK0y3/X4/6nUSLFvdbiFU= github.com/474420502/hunter v0.3.4 h1:fyLAgI84jWe3IcqsISC53j1w3CXI1FERxX//Potns0M=
github.com/474420502/hunter v0.3.0/go.mod h1:pe4Xr/I+2agvq339vS/OZV+EiHAWtpXQs75rioSW9oA= github.com/474420502/hunter v0.3.4/go.mod h1:pe4Xr/I+2agvq339vS/OZV+EiHAWtpXQs75rioSW9oA=
github.com/474420502/libxml2 v0.0.0-20200803084225-29e441d26406 h1:nLvl2D2y+hxCglLnRmLqwRGwmUsXQt8ga46zGySTU1I=
github.com/474420502/libxml2 v0.0.0-20200803084225-29e441d26406/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34=
github.com/474420502/requests v1.6.0 h1:f4h4j40eT0P5whhg9LdkotD8CaKjtuDu/vz9iSUkCgY= github.com/474420502/requests v1.6.0 h1:f4h4j40eT0P5whhg9LdkotD8CaKjtuDu/vz9iSUkCgY=
github.com/474420502/requests v1.6.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo= github.com/474420502/requests v1.6.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
@@ -66,6 +70,8 @@ github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYe
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tebeka/selenium v0.9.9 h1:cNziB+etNgyH/7KlNI7RMC1ua5aH1+5wUlFQyzeMh+w= github.com/tebeka/selenium v0.9.9 h1:cNziB+etNgyH/7KlNI7RMC1ua5aH1+5wUlFQyzeMh+w=
github.com/tebeka/selenium v0.9.9/go.mod h1:5Fr8+pUvU6B1OiPfkdCKdXZyr5znvVkxuPd0NOdZCQc= github.com/tebeka/selenium v0.9.9/go.mod h1:5Fr8+pUvU6B1OiPfkdCKdXZyr5znvVkxuPd0NOdZCQc=
github.com/tidwall/gjson v1.3.2/go.mod h1:P256ACg0Mn+j1RXIDXoss50DeIABTYK1PULOJHhxOls= github.com/tidwall/gjson v1.3.2/go.mod h1:P256ACg0Mn+j1RXIDXoss50DeIABTYK1PULOJHhxOls=
@@ -162,6 +168,8 @@ gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU= gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a h1:LJwr7TCTghdatWv40WobzlKXc9c4s8oGa7QKJUtHhWA= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a h1:LJwr7TCTghdatWv40WobzlKXc9c4s8oGa7QKJUtHhWA=

View File

@@ -4,6 +4,12 @@ package intimate
type Platform string type Platform string
const ( const (
// Popenrec openrec源table名称 // Popenrec openrec 平台
Popenrec Platform = "openrec" Popenrec Platform = "openrec"
// Ptwitch twitch 平台
Ptwitch Platform = "twitch"
// Ptwitcasting twitcasting 平台
Ptwitcasting Platform = "twitcasting"
) )

View File

@@ -8,15 +8,20 @@ import (
// Source 的结构体 // Source 的结构体
type Source struct { type Source struct {
Uid int64 // Uid int64 //
Url string // Url string //
TargetType string //
StreamerId sql.NullInt64 //
Source sql.NullString // Source sql.NullString //
PassGob sql.NullString // PassGob sql.NullString //
Ext interface{} // Ext interface{} //
UpdateTime sql.NullTime // UpdateTime sql.NullTime //
Operator int32 //
ErrorMsg sql.NullString // ErrorMsg sql.NullString //
Target Target //
Operator int32 //
LastOperator int32 LastOperator int32
} }

View File

@@ -1,47 +1,77 @@
create database if not exists `intimate_extractor`; create database if not exists `intimate_extractor`;
use intimate_extractor; use intimate_extractor;
CREATE TABLE IF NOT EXISTS `streamer_list` (
`urlhash` varchar(32) NOT NULL COMMENT '平台',
`url` text COMMENT 'url获取streamer列表的url',
`platform` varchar(255) NOT NULL COMMENT '平台',
`label` varchar(255) DEFAULT NULL COMMENT '必须的时候打上标签',
`serialize` blob DEFAULT NULL COMMENT '保存进程的必要计算数据',
`update_interval` int DEFAULT 120 COMMENT '分钟单位, 默认120分钟, 下次更新的时间间隔',
`update_time` Timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`error_msg` text DEFAULT NULL COMMENT '错误信息',
`operator` int DEFAULT 0 COMMENT '操作标志位, 根据不同解析方法有不同标志',
PRIMARY KEY (`urlhash`),
KEY `platform_idx` (`platform`),
KEY `update_time_idx` (`update_time`),
KEY `operator_idx` (`operator`)
)
CREATE TABLE IF NOT EXISTS `streamer` ( CREATE TABLE IF NOT EXISTS `streamer` (
`uid` bigint AUTO_INCREMENT, `uid` bigint AUTO_INCREMENT COMMENT '自增UID, 便于查询定位',
`platform` varchar(255) NOT NULL, `platform` varchar(255) NOT NULL COMMENT '平台',
`user_id` varchar(255) NOT NULL, `user_id` varchar(255) NOT NULL COMMENT '用户唯一UID',
`user_name` varchar(255) NOT NULL, `user_name` varchar(255) DEFAULT NULL COMMENT '用户名字 区别于ID',
`live_url` text, `live_url` text COMMENT '直播的url',
`channel` varchar(128) DEFAULT NULL, `channel` varchar(128) DEFAULT NULL COMMENT'所属 频道,分类 未必所有平台都有明确的标签',
`latest_log_uid` bigint, `tag` json DEFAULT NULL COMMENT 'streamer 最新的tag',
`ext` json DEFAULT NULL, `ext` json DEFAULT NULL COMMENT '扩展类型, 把一些可能需要但是没字段的数据放在json扩展',
`update_time` Timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`is_update_streamer` tinyint(1) DEFAULT 0 COMMENT '是否需要持续更新streamer的信息. 1为需要,0则否',
`is_update_url` tinyint(1) DEFAULT 0 COMMENT '是否需要持续更新update_url. 1为需要,0则否',
`update_url` json DEFAULT NULL COMMENT '更新数据的url, 如直播url, profile url等',
`update_interval` int DEFAULT 30 COMMENT '分钟单位, 默认30分钟, 下次更新的时间间隔',
`update_time` Timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
`latest_log_uid` bigint COMMENT '最新更新的日志表的uid, 方便关联',
`error_msg` text DEFAULT NULL COMMENT '错误信息',
`operator` int DEFAULT 0 COMMENT '操作标志位, 根据不同解析方法有不同标志',
PRIMARY KEY (`uid`), PRIMARY KEY (`uid`),
UNIQUE KEY `platform_anchor_id_idx` (`platform`, `user_id`), UNIQUE KEY `platform_user_id_idx` (`platform`, `user_id`),
KEY `platform_idx` (`platform`), KEY `platform_idx` (`platform`),
KEY `user_id_idx` (`user_id`), KEY `user_id_idx` (`user_id`),
KEY `user_name_idx` (`user_name`), KEY `user_name_idx` (`user_name`),
KEY `channel_idx` (`channel`), KEY `channel_idx` (`channel`),
KEY `update_time_idx` (`update_time`) KEY `update_time_idx` (`update_time`),
KEY `operator_idx` (`operator`)
); );
CREATE TABLE IF NOT EXISTS `collect_log` ( CREATE TABLE IF NOT EXISTS `collect_log` (
`log_uid` bigint AUTO_INCREMENT, `log_uid` bigint AUTO_INCREMENT COMMENT '日志自增UID',
`streamer_uid` bigint, `streamer_uid` bigint COMMENT '对应streamer表的UID',
`platform` varchar(255) NOT NULL, `platform` varchar(255) NOT NULL COMMENT '平台名称, 方便于搜索日志分类',
`user_id` varchar(255) NOT NULL, `user_id` varchar(255) NOT NULL COMMENT '用户UID',
`is_live_streaming` tinyint(1) DEFAULT 0, `is_live_streaming` tinyint(1) DEFAULT 0 COMMENT '是否正在直播',
`is_error` tinyint(1) DEFAULT 0, `is_error` tinyint(1) DEFAULT 0 COMMENT '是否采集数据的时候出错, 便于定位错误',
`followers` bigint(11) DEFAULT NULL, `followers` bigint(11) DEFAULT NULL COMMENT '关注数',
`views` bigint(11) DEFAULT NULL, `views` bigint(11) DEFAULT NULL COMMENT '当前直播的观众',
`giver` json DEFAULT NULL, `giver` json DEFAULT NULL COMMENT '打赏礼物者,和一些金额数据等, 数据类型异于平台',
`gratuity` bigint(11) DEFAULT NULL, `gratuity` bigint(11) DEFAULT NULL COMMENT '打赏值的总值, 数据类型异于平台',
`live_title` text DEFAULT NULL, `live_title` text DEFAULT NULL COMMENT '直播标题',
`live_start_time` Timestamp NULL DEFAULT NULL, `live_start_time` Timestamp NULL DEFAULT NULL COMMENT '直播开始时间',
`live_end_time` Timestamp NULL DEFAULT NULL, `live_end_time` Timestamp NULL DEFAULT NULL COMMENT '直播结束时间',
`update_time` Timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, `update_time` Timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '数据日志更新时间',
`tags` json DEFAULT NULL, `tags` json DEFAULT NULL COMMENT '主播直播的类型标签, 非永久固定',
`ext` json DEFAULT NULL, `ext` json DEFAULT NULL COMMENT '扩展字段, 用于一些数据不存在的字段, 便于记录扩展',
`error_msg` text DEFAULT NULL, `error_msg` text DEFAULT NULL COMMENT '错误信息',
PRIMARY KEY (`log_uid`), PRIMARY KEY (`log_uid`),
KEY `streamer_uid_idx` (`streamer_uid`), KEY `streamer_uid_idx` (`streamer_uid`),

View File

@@ -2,15 +2,38 @@ create database if not exists `intimate_source`;
use intimate_source; use intimate_source;
CREATE TABLE IF NOT EXISTS `source_openrec` ( CREATE TABLE IF NOT EXISTS `source_openrec` (
uid bigint AUTO_INCREMENT, uid bigint AUTO_INCREMENT COMMENT '自增UID',
`url` text NOT NULL,
`target_type` varchar(64) NOT NULL, `streamer_id` bigint DEFAULT NULL COMMENT 'streamer uid, 关联主播',
`source` longtext DEFAULT NULL, `url` text NOT NULL COMMENT '获取源数据地址',
`ext` json DEFAULT NULL, `source` longtext DEFAULT NULL COMMENT '源数据',
`pass_gob` blob DEFAULT NULL, `ext` json DEFAULT NULL COMMENT '扩展字段',
`update_time` Timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, `serialize` blob DEFAULT NULL COMMENT '需要给下个任务传递 序列花数据, 非必要不用',
`operator` int DEFAULT 0, `update_time` Timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新数据',
`error_msg` text DEFAULT NULL, `error_msg` text DEFAULT NULL COMMENT '错误信息',
`target_type` varchar(64) NOT NULL COMMENT '目标类型',
`operator` int DEFAULT 0 COMMENT '操作标志位, 根据不同解析方法有不同标志',
PRIMARY KEY(`uid`),
KEY `operator_idx` (`operator`),
KEY `update_time_idx` (`update_time`),
KEY `target_type_idx` (`target_type`)
);
CREATE TABLE IF NOT EXISTS `source_twitch` (
uid bigint AUTO_INCREMENT COMMENT '自增UID',
`streamer_id` bigint DEFAULT NULL COMMENT 'streamer uid, 关联主播',
`url` text NOT NULL COMMENT '获取源数据地址',
`source` longtext DEFAULT NULL COMMENT '源数据',
`ext` json DEFAULT NULL COMMENT '扩展字段',
`serialize` blob DEFAULT NULL COMMENT '需要给下个任务传递 序列花数据, 非必要不用',
`update_time` Timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新数据',
`error_msg` text DEFAULT NULL COMMENT '错误信息',
`target_type` varchar(64) NOT NULL COMMENT '目标类型',
`operator` int DEFAULT 0 COMMENT '操作标志位, 根据不同解析方法有不同标志',
PRIMARY KEY(`uid`), PRIMARY KEY(`uid`),
KEY `operator_idx` (`operator`), KEY `operator_idx` (`operator`),
KEY `update_time_idx` (`update_time`), KEY `update_time_idx` (`update_time`),

10
sql/remake_database.sh Normal file
View File

@@ -0,0 +1,10 @@
# /bin/bash
USER=root
HOST=127.0.0.1
PORT=4000
# mysql -h $HOST -u $USER -P $PORT -c "drop database intimate_source";
# mysql -h $HOST -u $USER -P $PORT -c "drop database intimate_extractor";
mysql -h $HOST -u $USER -P $PORT < ./intimate_extractor.sql;
mysql -h $HOST -u $USER -P $PORT < ./intimate_source.sql;

313
store.go
View File

@@ -1,9 +1,12 @@
package intimate package intimate
import ( import (
"crypto/md5"
"database/sql" "database/sql"
"fmt"
"log" "log"
"os" "strings"
"time"
_ "github.com/go-sql-driver/mysql" _ "github.com/go-sql-driver/mysql"
) )
@@ -36,23 +39,33 @@ type IGetSet interface {
} }
// SourceStore 储存 // SourceStore 储存
type SourceStore struct { type StoreSource struct {
table string table string
db *sql.DB db *sql.DB
popCount int
errorCount int errorCount int
errorLimit int errorLimit int
} }
func (store *StoreSource) PopCount() int {
return store.popCount
}
func (store *StoreSource) Close() error {
return store.db.Close()
}
// NewSourceStore 创建一个存储实例 // NewSourceStore 创建一个存储实例
func NewSourceStore(table string) *SourceStore { func NewStoreSource(table string) *StoreSource {
db, err := sql.Open("mysql", InitConfig.Database.SourceURI) db, err := sql.Open("mysql", InitConfig.Database.SourceURI)
if err != nil { if err != nil {
panic(err) panic(err)
} }
return &SourceStore{table: table, db: db} return &StoreSource{table: table, db: db}
} }
func (store *SourceStore) errorAlarm(err error) { func (store *StoreSource) errorAlarm(err error) {
if err != nil { if err != nil {
log.Println("store error: ", err) log.Println("store error: ", err)
// 报警. 如果数据插入有问题 // 报警. 如果数据插入有问题
@@ -68,56 +81,67 @@ func (store *SourceStore) errorAlarm(err error) {
} }
// Insert 插入数据 // Insert 插入数据
func (store *SourceStore) Insert(isource IGet) { func (store *StoreSource) Insert(isource IGet) {
_, err := store.db.Exec("insert into "+store.table+"(url, target_type, source, ext, operator, error_msg) values(?,?,?,?,?,?)", isource.Get("Url"), isource.Get("TargetType"), isource.Get("Source"), isource.Get("Ext"), isource.Get("Operator"), isource.Get("ErrorMsg")) _, err := store.db.Exec("insert into "+store.table+"(url, target_type, source, ext, operator, error_msg, streamer_id) values(?,?,?,?,?,?,?)", isource.Get("Url"), isource.Get("Target"), isource.Get("Source"), isource.Get("Ext"), isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("StreamerId"))
if err != nil { if err != nil {
log.Panic(err) panic(err)
}
}
// Deduplicate 去重
func (store *StoreSource) Deduplicate(target Target, field string) {
sql := `DELETE FROM ` + store.table + ` WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, ` + field + ` FROM ` + store.table + ` force index(target_type_idx) WHERE target_type = "` + string(target) + `" ) s GROUP BY s.` + string(field) + `) ;`
_, err := store.db.Exec(sql)
if err != nil {
panic(err)
} }
} }
// Update 更新数据 // Update 更新数据
func (store *SourceStore) Update(isource IGet) { func (store *StoreSource) Update(isource IGet) {
_, err := store.db.Exec("update "+store.table+" set ext = ?, pass_gob = ?, operator = ?, error_msg = ? where uid = ?", isource.Get("Ext"), isource.Get("PassGob"), isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid")) _, err := store.db.Exec("update "+store.table+" set ext = ?, pass_gob = ?, operator = ?, error_msg = ? where uid = ?", isource.Get("Ext"), isource.Get("PassGob"), isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
if err != nil { if err != nil {
log.Panic(err) panic(err)
} }
} }
// UpdateOperator 更新数据操作标志位 // UpdateOperator 更新数据操作标志位
func (store *SourceStore) UpdateOperator(isource IGet) { func (store *StoreSource) UpdateOperator(isource IGet) {
_, err := store.db.Exec("update "+store.table+" set operator = ?, error_msg = ? where uid = ?", isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid")) _, err := store.db.Exec("update "+store.table+" set operator = ?, error_msg = ? where uid = ?", isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
if err != nil { if err != nil {
log.Panic(err) panic(err)
} }
} }
// UpdateError 更新错误数据 // UpdateError 更新错误数据
func (store *SourceStore) UpdateError(isource IGetSet, err error) { func (store *StoreSource) UpdateError(isource IGetSet, err error) {
isource.Set("Operator", int32(OperatorError)) isource.Set("Operator", int32(OperatorError)+isource.Get("Operator").(int32))
isource.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true}) isource.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true})
_, dberr := store.db.Exec("update "+store.table+" set operator = ?, error_msg = ? where uid = ?", isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid")) _, dberr := store.db.Exec("update "+store.table+" set operator = ?, error_msg = ? where uid = ?", isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
if dberr != nil { if dberr != nil {
log.Panic(err) // email tell owner to deal with
panic(err)
} }
} }
// Restore 恢复Operator数据状态 // Restore 恢复Operator数据状态
func (store *SourceStore) Restore(isource IGet) { func (store *StoreSource) Restore(isource IGet) {
_, err := store.db.Exec("update "+store.table+" set operator = ? where uid = ?", isource.Get("LastOperator"), isource.Get("Uid")) _, dberr := store.db.Exec("update "+store.table+" set operator = ? where uid = ?", isource.Get("LastOperator"), isource.Get("Uid"))
if err != nil { if dberr != nil {
log.Panic(err) // email tell owner to deal with
panic(dberr)
} }
} }
// Pop 弹出一条未处理的数据 // Pop 弹出一条未处理的数据
func (store *SourceStore) Pop(targetType string, operators ...int32) (*Source, error) { func (store *StoreSource) Pop(targetType Target, operators ...int32) (*Source, error) {
tx, err := store.db.Begin() tx, err := store.db.Begin()
if err != nil { if err != nil {
return nil, err return nil, err
} }
var args = []interface{}{targetType} var args = []interface{}{string(targetType)}
selectSQL := `select uid, url, target_type, source, ext, operator, update_time from ` + store.table + ` where target_type = ?` selectSQL := `select uid, url, target_type, source, ext, operator, update_time, streamer_id from ` + store.table + ` where target_type = ?`
if len(operators) == 0 { if len(operators) == 0 {
selectSQL += " and operator = ?" selectSQL += " and operator = ?"
args = append(args, 0) args = append(args, 0)
@@ -140,14 +164,16 @@ func (store *SourceStore) Pop(targetType string, operators ...int32) (*Source, e
log.Println(err) log.Println(err)
} }
} }
store.popCount++
}() }()
s := &Source{} s := &Source{}
// uid, url, target_type, source, ext, operator // uid, url, target_type, source, ext, operator
err = row.Scan(&s.Uid, &s.Url, &s.TargetType, &s.Source, &s.Ext, &s.Operator, &s.UpdateTime) err = row.Scan(&s.Uid, &s.Url, &s.Target, &s.Source, &s.Ext, &s.Operator, &s.UpdateTime, &s.StreamerId)
if err != nil { if err != nil {
return nil, err return nil, err
} }
s.Set("LastOperator", s.Operator) s.Set("LastOperator", s.Operator)
_, err = tx.Exec("update "+store.table+" set operator = ? where uid = ?", OperatorWait, s.Uid) _, err = tx.Exec("update "+store.table+" set operator = ? where uid = ?", OperatorWait, s.Uid)
return s, nil return s, nil
@@ -159,16 +185,28 @@ const StreamerTable string = "streamer"
// CollectLogTable 采集日志表 // CollectLogTable 采集日志表
const CollectLogTable string = "collect_log" const CollectLogTable string = "collect_log"
type ExtractorStore struct { // StreamerListTable 主播表名称
const StreamerListTable string = "streamer_list"
type StoreExtractor struct {
db *sql.DB db *sql.DB
popCount int
errorCount int errorCount int
errorLimit int errorLimit int
} }
func (store *ExtractorStore) errorAlarm(err error) { func (store *StoreExtractor) PopCount() int {
return store.popCount
}
func (store *StoreExtractor) Close() error {
return store.db.Close()
}
func (store *StoreExtractor) errorAlarm(err error) {
if err != nil { if err != nil {
log.Panic("store error: ", err) log.Println("store error: ", err)
// 报警. 如果数据插入有问题 // 报警. 如果数据插入有问题
store.errorCount++ store.errorCount++
if store.errorCount >= store.errorLimit { if store.errorCount >= store.errorLimit {
@@ -181,108 +219,223 @@ func (store *ExtractorStore) errorAlarm(err error) {
} }
} }
func NewExtractorStore() *ExtractorStore { // NewStoreExtractor 生成一个extractor库的相关链接
func NewStoreExtractor() *StoreExtractor {
db, err := sql.Open("mysql", InitConfig.Database.ExtractorURI) db, err := sql.Open("mysql", InitConfig.Database.ExtractorURI)
if err != nil { if err != nil {
log.Panic(err) panic(err)
} }
return &ExtractorStore{db: db} return &StoreExtractor{db: db}
} }
/* // Pop 弹出一条未处理的数据
`uid` bigint, func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Streamer, error) {
`platform` varchar(255) NOT NULL,
`anchor_id` varchar(255) NOT NULL,
`anchor_name` varchar(255) NOT NULL,
`live_url` text,
`channel` varchar(128) DEFAULT NULL,
`show_type` varchar(255) DEFAULT NULL,
*/
// UpdateStreamerLogUid Streamer表, 插入数据 tx, err := store.db.Begin()
func (store *ExtractorStore) UpdateStreamerLogUid(logUid, streamerUid int64) error {
_, err := store.db.Exec("update "+StreamerTable+" set latest_log_uid = ? where uid = ?", logUid, streamerUid)
if err != nil { if err != nil {
log.Panic(err) return nil, err
} }
return err var args = []interface{}{string(platform)}
selectSQL := `select uid, update_time, user_id, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and TIMESTAMPDIFF(MINUTE , update_time, CURRENT_TIMESTAMP()) >= update_interval`
if len(operators) == 0 {
selectSQL += " and operator = ?"
args = append(args, 0)
} else {
for _, operator := range operators {
selectSQL += " and operator = ?"
args = append(args, operator)
}
}
defer func() {
err := tx.Commit()
if err != nil {
log.Println(err)
err = tx.Rollback()
if err != nil {
log.Println(err)
}
}
store.popCount++
}()
// log.Println(selectSQL + ` limit 1 for update`)
row := tx.QueryRow(selectSQL+` limit 1 for update`, args...)
s := &Streamer{}
// uid, url, target_type, source, ext, operator
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
if err != nil {
return nil, err
}
s.Set("LastOperator", s.Operator)
_, err = tx.Exec("update "+StreamerTable+" set operator = ? where uid = ?", OperatorWait, s.Uid)
return s, nil
}
// UpdateStreamerList streamerlist表, 更新数据
func (store *StoreExtractor) UpdateStreamerList(streamer IGet, fieldvalues ...interface{}) {
updateSQL := "UPDATE " + StreamerListTable + " SET "
var values []interface{}
for i := 0; i < len(fieldvalues); i += 2 {
field := fieldvalues[i]
values = append(values, fieldvalues[i+1])
updateSQL += field.(string) + " = ? "
}
updateSQL += "WHERE urlhash = ?"
values = append(values, streamer.Get("UrlHash"))
_, err := store.db.Exec(updateSQL, values...)
if err != nil {
panic(err)
}
}
// InsertStreamer streamerlist表, 插入数据
func (store *StoreExtractor) InsertStreamerList(streamerlist IGet) (isExists bool) {
urlstr := streamerlist.Get("Url").(string)
_, err := store.db.Exec("insert into streamer_list(urlhash, url, platform, label, serialize, update_interval, error_msg, operator) values(?,?,?,?,?,?,?,?)",
fmt.Sprintf("%x", md5.Sum([]byte(urlstr))),
urlstr,
streamerlist.Get("Platform"),
streamerlist.Get("Label"),
streamerlist.Get("Serialize"),
streamerlist.Get("UpdateInterval"),
streamerlist.Get("ErrorMsg"),
streamerlist.Get("Operator"),
)
if err != nil {
if !strings.HasPrefix(err.Error(), "Error 1062") {
log.Println(err)
}
return true
}
return false
} }
// InsertStreamer Streamer表, 插入数据 // InsertStreamer Streamer表, 插入数据
func (store *ExtractorStore) InsertStreamer(isource IGet) (Uid int64, err error) { func (store *StoreExtractor) InsertStreamer(streamer IGet) (isExists bool) {
// select uid from table where platform = ? and user_id = ? // select uid from table where platform = ? and user_id = ?
selectSQL := "select uid from " + StreamerTable + " where platform = ? and user_id = ?" selectSQL := "SELECT is_update_url, uid FROM " + StreamerTable + " WHERE platform = ? AND user_id = ?"
tx, err := store.db.Begin() tx, err := store.db.Begin()
if err != nil { if err != nil {
log.Println(err) panic(err)
return 0, err
} }
defer func() { defer func() {
err = tx.Commit() err = tx.Commit()
if err != nil { if err != nil {
log.Println(err) rerr := tx.Rollback()
err = tx.Rollback() if rerr != nil {
if err != nil { log.Println(rerr)
log.Println(err)
} }
Uid = 0 panic(err)
} }
}() }()
row := tx.QueryRow(selectSQL+` limit 1 for update`, isource.Get("Platform"), isource.Get("UserId")) row := tx.QueryRow(selectSQL+` LIMIT 1 FOR UPDATE`, streamer.Get("Platform"), streamer.Get("UserId"))
var isUpdateUrl bool
var uid int64 var Uid int64
if err = row.Scan(&uid); err == nil { if err = row.Scan(&isUpdateUrl, &Uid); err == nil {
return uid, nil if isUpdateUrl {
} else { tx.Exec("UPDATE "+StreamerTable+" SET update_url = ?", streamer.Get("UpdateUrl"))
log.Println(err) }
streamer.(ISet).Set("Uid", Uid)
return true
} }
result, err := tx.Exec("insert into "+StreamerTable+"(platform, user_id, user_name, live_url, channel, latest_log_uid, ext) values(?,?,?,?,?,?,?);", isource.Get("Platform"), isource.Get("UserId"), isource.Get("UserName"), isource.Get("LiveUrl"), isource.Get("Channel"), isource.Get("LatestLogUid"), isource.Get("Ext")) _, err = tx.Exec("INSERT INTO "+StreamerTable+"(platform, user_id, update_url, update_time) VALUES(?,?,?,?);", streamer.Get("Platform"), streamer.Get("UserId"), streamer.Get("UpdateUrl"), time.Now().Add(-time.Minute*60))
if err != nil { if err != nil {
log.Println(err) panic(err)
return 0, nil
} }
return false
return result.LastInsertId()
} }
// InsertCollectLog CollectLog表插入数据 // UpdateError 更新错误数据
func (store *ExtractorStore) InsertCollectLog(isource IGet) (int64, error) { func (store *StoreExtractor) UpdateError(isource IGetSet, err error) {
isource.Set("Operator", int32(OperatorError)+isource.Get("Operator").(int32))
isource.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true})
_, dberr := store.db.Exec("update "+StreamerTable+" set operator = ?, error_msg = ? where uid = ?", isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
if dberr != nil {
// email tell owner to deal with
panic(err)
}
}
// UpdateStreamerLog 只更新Streamer的关联日志和时间戳
func (store *StoreExtractor) UpdateStreamerLog(latestUid int64, streamerUid int64) {
_, err := store.db.Exec("UPDATE "+StreamerTable+" SET latest_log_uid = ?, update_time = CURRENT_TIMESTAMP() WHERE uid = ?", latestUid, streamerUid)
if err != nil {
panic(err)
}
}
// UpdateOperator Streamer表, 插入数据
func (store *StoreExtractor) UpdateOperator(isource IGet) {
_, err := store.db.Exec("update "+StreamerTable+" set operator = ?, error_msg = ? where uid = ?", isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
if err != nil {
panic(err)
}
}
// UpdateStreamer Streamer表, 插入数据
func (store *StoreExtractor) UpdateStreamer(streamer IGet) {
_, err := store.db.Exec("UPDATE "+StreamerTable+" SET user_name = ?, live_url = ?, channel = ?, latest_log_uid = ?, tags = ?, ext = ?, operator = ?, update_time = ?, update_interval = ? WHERE uid = ?;",
streamer.Get("UserName"), streamer.Get("LiveUrl"), streamer.Get("Channel"), streamer.Get("LatestLogUid"), streamer.Get("Tags"), streamer.Get("Ext"), streamer.Get("Operator"), streamer.Get("UpdateTime"), streamer.Get("UpdateInterval"), streamer.Get("Uid"))
if err != nil {
panic(err)
}
}
// Update Streamer表, 更新指定的字段
func (store *StoreExtractor) Update(streamer IGet, fieldvalues ...interface{}) {
updateSQL := "UPDATE " + StreamerTable + " SET "
var values []interface{}
for i := 0; i < len(fieldvalues); i += 2 {
field := fieldvalues[i]
values = append(values, fieldvalues[i+1])
updateSQL += field.(string) + " = ? "
}
updateSQL += "WHERE uid = ?"
values = append(values, streamer.Get("Uid"))
_, err := store.db.Exec(updateSQL, values...)
if err != nil {
panic(err)
}
}
// InsertClog CollectLog表插入数据
func (store *StoreExtractor) InsertClog(clog IGet) int64 {
tx, err := store.db.Begin() tx, err := store.db.Begin()
defer func() { defer func() {
if err := recover(); err != nil { if err := recover(); err != nil {
log.Println(err) tx.Rollback()
err = tx.Rollback() log.Panic(err)
if err != nil {
log.Println(err)
}
os.Exit(0)
} }
}() }()
if err != nil { if err != nil {
log.Panic(err) panic(err)
} }
result, err := tx.Exec("insert into "+CollectLogTable+"(streamer_uid, platform, user_id, is_live_streaming, is_error, followers, views, giver, gratuity, live_title, live_start_time, live_end_time, update_time, tags, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", result, err := tx.Exec("insert into "+CollectLogTable+"(streamer_uid, platform, user_id, is_live_streaming, is_error, followers, views, giver, gratuity, live_title, live_start_time, live_end_time, update_time, tags, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
isource.Get("StreamerUid"), isource.Get("Platform"), isource.Get("UserId"), isource.Get("IsLiveStreaming"), isource.Get("IsError"), isource.Get("Followers"), isource.Get("Views"), isource.Get("Giver"), isource.Get("Gratuity"), isource.Get("LiveTitle"), isource.Get("LiveStartTime"), isource.Get("LiveEndTime"), isource.Get("UpdateTime"), isource.Get("Tags"), isource.Get("Ext"), isource.Get("ErrorMsg"), clog.Get("StreamerUid"), clog.Get("Platform"), clog.Get("UserId"), clog.Get("IsLiveStreaming"), clog.Get("IsError"), clog.Get("Followers"), clog.Get("Views"), clog.Get("Giver"), clog.Get("Gratuity"), clog.Get("LiveTitle"), clog.Get("LiveStartTime"), clog.Get("LiveEndTime"), clog.Get("UpdateTime"), clog.Get("Tags"), clog.Get("Ext"), clog.Get("ErrorMsg"),
) )
if err != nil { if err != nil {
log.Panic(err) panic(err)
} }
logUid, err := result.LastInsertId() logUid, err := result.LastInsertId()
if err != nil { if err != nil {
log.Panic(err) panic(err)
} }
_, err = tx.Exec("update "+StreamerTable+" set latest_log_uid = ? where uid = ?", logUid, isource.Get("StreamerUid")) _, err = tx.Exec("update "+StreamerTable+" set latest_log_uid = ? where uid = ?", logUid, clog.Get("StreamerUid"))
if err = tx.Commit(); err != nil { if err = tx.Commit(); err != nil {
log.Panic(err) panic(err)
} }
return result.LastInsertId() return logUid
} }

View File

@@ -6,5 +6,10 @@ type SourceTable string
const ( const (
// STOpenrec openrec源table名称 // STOpenrec openrec源table名称
STOpenrec SourceTable = "source_openrec" STOpenrec SourceTable = "source_openrec"
)
// STTwitch twitch源table名称
STTwitch SourceTable = "source_twitch"
// STTwitcasting STTwitcasting源table名称
STTwitcasting SourceTable = "source_twitcasting"
)

View File

@@ -1,12 +1,18 @@
package intimate package intimate
// TargetType 源的 目标类型 列表 // Target 源的 目标类型 列表
type TargetType string type Target string
const ( const (
// TTOpenrecRanking openrec源TargetType名称 // TOpenrecRanking 获取排名 Target名称
TTOpenrecRanking TargetType = "openrec_ranking" TOpenrecRanking Target = "openrec_ranking"
// TTOpenrecUser openrec源TargetType名称 // TOpenrecUser 获取用户列表 源Target名称
TTOpenrecUser TargetType = "openrec_ranking" TOpenrecUser Target = "openrec_user"
// TTwitchChannel twitch 获取类别操作目标
TTwitchChannel Target = "twitch_channel"
// TTwitchUser twitch 获取类别操作目标
TTwitchUser Target = "twitch_user"
) )

View File

@@ -1,7 +1,7 @@
package main package main
import ( import (
"database/sql" "encoding/json"
"intimate" "intimate"
"log" "log"
"os" "os"
@@ -17,8 +17,11 @@ import (
var openrecRanking *OpenrecRanking var openrecRanking *OpenrecRanking
// store 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var store *intimate.SourceStore = intimate.NewSourceStore(string(intimate.STOpenrec)) var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func init() { func init() {
@@ -67,31 +70,59 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
tp := cxt.Temporary() tp := cxt.Temporary()
content := resp.Content() content := resp.Content()
if len(content) <= 200 { // 末页退出 if len(content) <= 200 { //末页时没有内容返回, 末页退出
finishpoint := time.Now() finishpoint := time.Now()
log.Println("任务结束休眠, 下次启动时间:", finishpoint.Add(time.Minute*30)) log.Println("任务Ranking UserId结束休眠, 下次启动时间:", finishpoint.Add(time.Minute*120))
for time.Now().Sub(finishpoint) < time.Minute*60 { for time.Now().Sub(finishpoint) < time.Minute*120 {
time.Sleep(time.Second) time.Sleep(time.Second)
if atomic.LoadInt32(&loop) > 0 { if atomic.LoadInt32(&loop) <= 0 {
return return
} }
} }
log.Println("获取Ranking UserId启动:", time.Now())
querys := tp.GetQuery()
querys.Set("page", strconv.Itoa(1))
tp.SetQuery(querys)
continue continue
} }
result := gjson.ParseBytes(content) result := gjson.ParseBytes(content)
if result.IsArray() { if result.IsArray() {
for _, User := range result.Array() { for _, User := range result.Array() {
data := &intimate.Source{}
userid := User.Get("channel.id").String() userid := User.Get("channel.id").String()
data.Source = sql.NullString{String: userid, Valid: len(userid) > 0} // data := &intimate.Source{}
data.Url = tp.GetRawURL() // data.Source = sql.NullString{String: userid, Valid: len(userid) > 0}
data.TargetType = string(intimate.TTOpenrecUser) // data.Url = tp.GetRawURL()
store.Insert(data) // data.TargetType = string(intimate.TTOpenrecUser)
// sstore.Insert(data)
streamer := &intimate.Streamer{}
streamer.UserId = userid
streamer.Platform = intimate.Popenrec
updateUrl := make(map[string]interface{})
supportersUrl := "curl 'https://www.openrec.tv/viewapp/api/v6/supporters?identify_id=sumomo_xqx&month=&Uuid=B96EE988-E3A2-4A44-A543-611A8B4BC683&Token=46598c320408bd69ae3c63298f6f4a3a97354175&Random=AZVXNAAXQVMOSVWNDPIQ&page_number=1' -H 'accept: application/json, text/javascript, */*; q=0.01' -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' -H 'cookie: uuid=B96EE988-E3A2-4A44-A543-611A8B4BC683;' --compressed"
updateUrl["supporters"] = supportersUrl
updateUrl["user"] = "https://www.openrec.tv/user/" + userid
updateUrl["live"] = "https://www.openrec.tv/live/" + userid
updateUrlBytes, err := json.Marshal(updateUrl)
if err != nil {
estore.UpdateError(streamer, err)
continue
}
streamer.UpdateUrl = updateUrlBytes
estore.InsertStreamer(streamer)
} }
} }
// 修改url query 参数的page递增. 遍历所有页面
querys := tp.GetQuery() querys := tp.GetQuery()
page, err := strconv.Atoi(querys.Get("page")) page, err := strconv.Atoi(querys.Get("page"))
if err != nil { if err != nil {
@@ -102,6 +133,7 @@ func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
page++ page++
querys.Set("page", strconv.Itoa(page)) querys.Set("page", strconv.Itoa(page))
tp.SetQuery(querys) tp.SetQuery(querys)
time.Sleep(time.Second * 2)
time.Sleep(time.Second * 1)
} }
} }

View File

@@ -2,6 +2,7 @@ package main
import ( import (
"testing" "testing"
"time"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
@@ -56,6 +57,12 @@ func TestRanking(t *testing.T) {
ht.Execute() ht.Execute()
} }
func TestTimeAdd(t *testing.T) {
finishpoint := time.Now()
time.Sleep(time.Second * 2)
t.Error(time.Now().Sub(finishpoint) > time.Second*1)
}
func TestRankingInsert(t *testing.T) { func TestRankingInsert(t *testing.T) {
ht := hunter.NewHunter(openrecRanking) ht := hunter.NewHunter(openrecRanking)
ht.Execute() ht.Execute()

View File

@@ -20,8 +20,11 @@ import (
var oer *OpenrecExtratorRanking var oer *OpenrecExtratorRanking
// store 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var store *intimate.SourceStore = intimate.NewSourceStore(string(intimate.STOpenrec)) var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
// estore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_extractor.sql
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func init() { func init() {
oer = &OpenrecExtratorRanking{} oer = &OpenrecExtratorRanking{}
@@ -48,9 +51,9 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
for atomic.LoadInt32(&loop) > 0 { for atomic.LoadInt32(&loop) > 0 {
source, err := store.Pop(string(intimate.TTOpenrecUser)) streamer, err := estore.Pop(intimate.Popenrec) //队列里弹出一个streamer行. 进行解析
if source == nil || err != nil { if streamer == nil || err != nil {
if err != lasterr { if err != lasterr {
log.Println(err, lasterr) log.Println(err, lasterr)
lasterr = err lasterr = err
@@ -59,29 +62,38 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
continue continue
} }
userId := source.Source.String userId := streamer.UserId
userUrl := "https://www.openrec.tv/user/" + userId
tp := cxt.Session().Get(userUrl) var updateUrl map[string]string
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) // 反序列化update_url, 里面存了需要采集的url
if err != nil {
log.Println(err)
continue
}
// Check Userid
userUrl := updateUrl["user"]
tp := cxt.Session().Get(userUrl) // 获取user url页面数据
resp, err := tp.Execute() resp, err := tp.Execute()
source.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true} streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
if err != nil { if err != nil {
log.Println(err) log.Println(err)
store.UpdateError(source, err) estore.UpdateError(streamer, err)
continue continue
} }
cookies := cxt.Session().GetCookies(tp.GetParsedURL()) cookies := cxt.Session().GetCookies(tp.GetParsedURL())
scurl := "https://www.openrec.tv/viewapp/api/v6/supporters?identify_id=sumomo_xqx&month=&Uuid=B96EE988-E3A2-4A44-A543-611A8B4BC683&Token=46598c320408bd69ae3c63298f6f4a3a97354175&Random=AZVXNAAXQVMOSVWNDPIQ&page_number=1 -H 'accept: application/json, text/javascript, */*; q=0.01' -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' -H 'cookie: uuid=B96EE988-E3A2-4A44-A543-611A8B4BC683;' --compressed" scurl := updateUrl["supporters"] //获取打赏者的数据
curl := gcurl.ParseRawCURL(scurl) curl := gcurl.ParseRawCURL(scurl)
supportersSession := curl.CreateSession() supportersSession := curl.CreateSession()
temporary := curl.CreateTemporary(supportersSession) temporary := curl.CreateTemporary(supportersSession)
supportersSession.SetCookies(temporary.GetParsedURL(), cookies) supportersSession.SetCookies(temporary.GetParsedURL(), cookies)
var supporters []string var supporters []string
for { for { // supporters 数据需要登录信息. 下面为赋值 supporters链接获取的uid token random码
supportersQuery := temporary.GetQuery() supportersQuery := temporary.GetQuery()
@@ -110,13 +122,13 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
log.Println(err) log.Println(err)
} }
supporterjson := gjson.ParseBytes(resp.Content()) supporterjson := gjson.ParseBytes(resp.Content())
supporterdata := supporterjson.Get("data") supporterdata := supporterjson.Get("data") //解析supporters获取的json数据
if supporterdata.Type == gjson.Null { if supporterdata.Type == gjson.Null {
break break
} }
supporters = append(supporters, string(resp.Content())) supporters = append(supporters, string(resp.Content()))
page := supportersQuery.Get("page_number") page := supportersQuery.Get("page_number") // page_number 加1
pageint, err := strconv.Atoi(page) pageint, err := strconv.Atoi(page)
if err != nil { if err != nil {
log.Println(err) log.Println(err)
@@ -131,28 +143,36 @@ func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
// cookies := cxt.Session().GetCookies(wf.GetParsedURL()) // cookies := cxt.Session().GetCookies(wf.GetParsedURL())
ext := make(map[string]interface{}) ext := make(map[string]interface{})
ext["supporters"] = supporters ext["json_supporters"] = supporters
ext["user"] = string(resp.Content()) ext["html_user"] = string(resp.Content())
tp = cxt.Session().Get("https://www.openrec.tv/live/" + userId) liveUrl := updateUrl["live"]
tp = cxt.Session().Get(liveUrl)
resp, err = tp.Execute() resp, err = tp.Execute()
if err != nil { if err != nil {
log.Println(err) log.Println(err)
store.UpdateError(source, err) estore.UpdateError(streamer, err)
continue continue
} }
ext["user_live"] = string(resp.Content()) ext["html_live"] = string(resp.Content())
ext["var_user_id"] = userId
extJsonBytes, err := json.Marshal(ext) extJsonBytes, err := json.Marshal(ext)
if err != nil { if err != nil {
log.Println(err) log.Println(err)
store.UpdateError(source, err) estore.UpdateError(streamer, err)
continue continue
} }
source.Operator = int32(intimate.OperatorOK) streamer.Operator = int32(intimate.OperatorOK)
source := &intimate.Source{}
source.Target = intimate.TOpenrecUser
source.Ext = string(extJsonBytes) source.Ext = string(extJsonBytes)
store.Update(source) source.StreamerId = sql.NullInt64{Int64: streamer.Uid, Valid: true}
sstore.Insert(source)
estore.UpdateOperator(streamer)
} }
} }

View File

@@ -0,0 +1 @@
package main

View File

@@ -0,0 +1,108 @@
package main
import (
"intimate"
"time"
"github.com/474420502/extractor"
"github.com/474420502/focus/compare"
"github.com/474420502/focus/tree/heap"
"log"
"testing"
"github.com/474420502/requests"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitcasting))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func TestMain(t *testing.T) {
searchurl := "https://twitcasting.tv/rankingindex.php"
queuedict := make(map[string]bool)
queue := heap.New(compare.String)
queue.Put(searchurl)
queuedict[searchurl] = true
for surl, ok := queue.Pop(); ok; surl, ok = queue.Pop() {
ses := requests.NewSession()
resp, err := ses.Get(surl.(string)).Execute()
if err != nil {
panic(err)
}
etor := extractor.ExtractXml(resp.Content())
// doc, err := libxml2.ParseHTML(resp.Content())
// if err != nil {
// panic(err)
// }
// defer doc.Free()
result, err := etor.XPath("//*[contains(@class, 'tag')]/@href")
if err != nil {
panic(err)
}
// result, err := doc.Find("//*[contains(@class, 'tag')]/@href")
// if err != nil {
// panic(err)
// }
// defer result.Free()
iter := result.NodeIter()
for iter.Next() {
wurl := "https://twitcasting.tv" + iter.Node().NodeValue()
if ok := queuedict[wurl]; !ok {
log.Println(wurl)
sl := &intimate.StreamerList{}
sl.Platform = intimate.Ptwitcasting
sl.Url = wurl
sl.Operator = 0
sl.UpdateInterval = 120
sl.UpdateTime = time.Now()
estore.InsertStreamerList(sl)
queue.Put(wurl)
queuedict[wurl] = true
}
}
// doc.Find("//div[@class='tw-search-result-row']")
xps, err := etor.XPaths("//div[@class='tw-search-result-row']")
if err != nil {
log.Println(surl, err)
continue
}
// xps.ForEachTag(SearchProfile{})
// texts, errs := xps.ForEachText(".//span[@class='username']")
// if len(errs) > 0 {
// t.Error(errs)
// }
var splist = xps.ForEachTag(SearchProfile{})
for _, isp := range splist {
sp := isp.(*SearchProfile)
sp.UserId = sp.LiveUrl[1:]
// log.Println(sp.(SearchProfile))
}
for _, isp := range splist {
log.Println(isp.(*SearchProfile))
}
log.Println("finish remain", queue.Size())
}
}
type SearchProfile struct {
UserName string `exp:".//span[@class='username']" method:"Text"`
UserId string // `exp:".//span[@class='fullname']" method:"Text"`
LiveUrl string `exp:".//div[@class='usertext']/a[@href]" method:"Attribute,href Value"`
}

2
tasks/twitch/twitch_task1/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
twitch_task1
log

View File

@@ -0,0 +1,6 @@
package main
func main() {
e := ChannelLink{}
e.Execute()
}

View File

@@ -0,0 +1,112 @@
package main
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// ChannelLink 频道链接
type ChannelLink struct {
}
// Execute 执行任务
func (cl *ChannelLink) Execute() {
var err error
wd := intimate.GetChromeDriver(3030)
ps := intimate.NewPerfectShutdown()
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
panic(err)
}
cardCondition := func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
return false, err
}
return len(elements) > 0, nil
}
wd.WaitWithTimeout(cardCondition, time.Second*15)
time.Sleep(time.Second)
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
panic(err)
}
e.Click()
var hrefs map[string]bool = make(map[string]bool)
var delayerror = 5
for i := 0; i <= 200; i++ {
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
log.Println(err)
break
}
if len(hrefs) == 0 {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 5
}
for ii := 0; ii < 10; ii++ {
for _, card := range cards {
href, err := card.GetAttribute("href")
if err != nil {
log.Println(href, err)
continue
} else {
hrefs[href] = true
}
}
break
}
if ps.IsClose() {
break
}
if len(cards) > 10 {
log.Println(len(cards))
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil)
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2500)
}
for href := range hrefs {
// TODO: Save href
source := &intimate.Source{}
source.Source = sql.NullString{String: href, Valid: true}
source.Operator = 0
source.Target = intimate.TTwitchChannel
source.Url = weburl
sstore.Insert(source)
}
log.Println("hrefs len:", len(hrefs))
sstore.Deduplicate(intimate.TTwitchChannel, "source")
}

View File

@@ -0,0 +1,14 @@
package main
import (
"testing"
)
func TestCase1(t *testing.T) {
e := ChannelLink{}
e.Execute()
}
func TestLiveUrl(t *testing.T) {
}

2
tasks/twitch/twitch_task2/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
twitch_task2
log

View File

@@ -0,0 +1,6 @@
package main
func main() {
ul := UserList{}
ul.Execute()
}

View File

@@ -0,0 +1,180 @@
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"regexp"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// UserList 频道链接
type UserList struct {
}
// Execute 执行任务
func (cl *UserList) Execute() {
// DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ;
//article//a[@data-a-target='preview-card-title-link']
wd := intimate.GetChromeDriver(3030)
defer wd.Close()
defer wd.Quit()
ps := intimate.NewPerfectShutdown()
counter := intimate.NewCounter()
counter.SetMaxLimit(100)
counter.SetMaxToDo(func(olist ...interface{}) error {
owd := olist[0].(*selenium.WebDriver)
if err := (*owd).Quit(); err != nil {
log.Println(err)
}
*owd = intimate.GetChromeDriver(3030)
return nil
}, &wd)
for !ps.IsClose() {
var err error
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
if err != nil {
panic(err)
}
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
log.Println(err)
sstore.UpdateError(sourceChannel, err)
time.Sleep(time.Second * 10)
continue
}
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
return false, err
}
return true, nil
}, time.Second*10)
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
log.Println(err)
continue
}
btn.Click()
var elements []selenium.WebElement
var liveurls = 0
var delayerror = 2
for i := 0; i < 200 && !ps.IsClose(); i++ {
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
log.Println(err)
break
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2000)
if len(elements) == liveurls {
delayerror--
if delayerror <= 0 {
break
}
} else {
delayerror = 2
}
liveurls = len(elements)
}
articles, err := wd.FindElements(selenium.ByXPATH, "//article")
if err != nil {
log.Println(err)
continue
}
for _, article := range articles {
e, err := article.FindElement(selenium.ByXPATH, ".//a[@data-a-target='preview-card-title-link' and @href]")
if err != nil {
log.Println(err)
continue
}
href, err := e.GetAttribute("href")
if err != nil {
log.Println(err)
continue
}
btns, err := article.FindElements(selenium.ByXPATH, ".//div[@class='tw-full-width tw-inline-block']//button")
if err != nil {
log.Println(err)
continue
}
var tags []string
for _, btn := range btns {
tag, err := btn.GetAttribute("data-a-target")
if err == nil {
tags = append(tags, tag)
}
}
streamer := &intimate.Streamer{}
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(href)
if len(matches) == 2 {
streamer.UserId = matches[1]
} else {
log.Println(href)
continue
}
jtags, err := json.Marshal(tags)
if err != nil {
log.Println(err)
} else {
streamer.Tags = jtags
}
streamer.Platform = intimate.Ptwitch
updateUrl := make(map[string]string)
updateUrl["live"] = href
streamer.LiveUrl = sql.NullString{String: href, Valid: true}
data, err := json.Marshal(updateUrl)
if err != nil {
log.Println(err)
continue
}
streamer.UpdateUrl = data
streamer.Operator = 0
if estore.InsertStreamer(streamer) {
// log.Println("streamer update tags", streamer.Uid, tags)
estore.Update(streamer, "Tags", streamer.Tags)
}
}
log.Println("streamer find", len(articles))
if len(articles) == 0 {
sourceChannel.Operator = 5
sstore.UpdateOperator(sourceChannel)
}
counter.AddWithReset(1)
}
wd.Close()
wd.Quit()
}

View File

@@ -0,0 +1,7 @@
package main
import "testing"
func TestMain(t *testing.T) {
main()
}

195
utils.go
View File

@@ -1,8 +1,19 @@
package intimate package intimate
import ( import (
"fmt"
"log" "log"
"os"
"os/signal"
"runtime"
"strconv"
"strings"
"sync/atomic"
"syscall"
"time" "time"
"github.com/tebeka/selenium"
"github.com/tebeka/selenium/chrome"
) )
var zeroTime time.Time var zeroTime time.Time
@@ -17,6 +28,12 @@ func init() {
} }
// ParseNumber 去逗号解析数字
func ParseNumber(number string) (int64, error) {
number = strings.ReplaceAll(number, ",", "")
return strconv.ParseInt(number, 10, 64)
}
// ParseDuration time to duration eg: 1:40:00 -> time.Duration // ParseDuration time to duration eg: 1:40:00 -> time.Duration
func ParseDuration(dt string) (time.Duration, error) { func ParseDuration(dt string) (time.Duration, error) {
@@ -37,8 +54,184 @@ func ParseDuration(dt string) (time.Duration, error) {
tdt, err := time.Parse("15:04:05", string(parse)) tdt, err := time.Parse("15:04:05", string(parse))
if err != nil { if err != nil {
return time.Duration(0), err return time.Duration(0), err
} }
return tdt.Sub(zeroTime), nil return tdt.Sub(zeroTime), nil
} }
func GetChromeDriver(port int) selenium.WebDriver {
var err error
caps := selenium.Capabilities{"browserName": "chrome"}
chromecaps := chrome.Capabilities{}
for _, epath := range []string{"../../../crx/myblock.crx", "../../crx/myblock.crx"} {
_, err := os.Stat(epath)
if err == nil {
err := chromecaps.AddExtension(epath)
if err != nil {
panic(err)
}
break
}
}
if proxy := os.Getenv("chrome_proxy"); proxy != "" {
log.Println("proxy-server", proxy)
chromecaps.Args = append(chromecaps.Args, "--proxy-server="+proxy)
}
if proxy := os.Getenv("pac_proxy"); proxy != "" {
log.Println("--proxy-pac-url=" + proxy)
chromecaps.Args = append(chromecaps.Args, "--proxy-pac-url="+proxy)
}
// chromecaps.Args = append(chromecaps.Args, "--proxy-pac-url=http://127.0.0.1:1081/pac")
chromecaps.Args = append(chromecaps.Args, "--disk-cache-dir=/tmp/chromedriver-cache")
chromecaps.Args = append(chromecaps.Args, "--disable-gpu", "--disable-images", "--start-maximized", "--disable-infobars")
// chromecaps.Args = append(chromecaps.Args, "--headless")
chromecaps.Args = append(chromecaps.Args, "--no-sandbox")
chromecaps.Args = append(chromecaps.Args, "--disable-dev-shm-usage", "--mute-audio", "--safebrowsing-disable-auto-update")
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
caps.AddChrome(chromecaps)
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", port)
if err != nil {
panic(err)
}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port))
if err != nil {
panic(err)
}
runtime.SetFinalizer(wd, func(obj interface{}) {
if err := obj.(selenium.WebDriver).Close(); err != nil {
log.Println(err)
}
if err := obj.(selenium.WebDriver).Quit(); err != nil {
log.Println(err)
}
})
wd.ExecuteScript("windows.navigator.webdriver = undefined", nil)
if err != nil {
panic(err)
}
return wd
}
// PerfectShutdown 完美关闭程序
type PerfectShutdown struct {
loop int32
}
// NewPerfectShutdown 创建完美关闭程序
func NewPerfectShutdown() *PerfectShutdown {
ps := &PerfectShutdown{}
ps.loop = 1
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGINT, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&ps.loop, 0)
}()
return ps
}
// IsClose 判断是否要关闭
func (ps *PerfectShutdown) IsClose() bool {
return atomic.LoadInt32(&ps.loop) == 0
}
type Counter struct {
dcount int
count int
maxLimit int
minLimit int
minobj []interface{}
maxobj []interface{}
maxLimitToDo func(obj ...interface{}) error
minLimitToDo func(obj ...interface{}) error
}
func NewCounter() *Counter {
c := &Counter{}
return c
}
// SetDefault 设置默认值
func (c *Counter) SetDefault(n int) {
c.dcount = n
}
// Reset 最置count为defaultCount值
func (c *Counter) Reset() {
c.count = c.dcount
}
// SetCount 设置count到最大值的时候执行do函数
func (c *Counter) SetCount(count int) {
c.count = count
}
// GetCount 设置count到最大值的时候执行do函数
func (c *Counter) GetCount() int {
return c.count
}
// SetMinLimit 设置最小限制
func (c *Counter) SetMinLimit(n int) {
c.minLimit = n
}
// SetMaxLimit 设置最大限制
func (c *Counter) SetMaxLimit(n int) {
c.maxLimit = n
}
// SetMaxToDo 设置count到最大值的时候执行do函数
func (c *Counter) SetMaxToDo(do func(obj ...interface{}) error, obj ...interface{}) {
c.maxLimitToDo = do
c.maxobj = obj
}
// SetMinToDo 设置count到最小值的时候执行do函数
func (c *Counter) SetMinToDo(do func(obj ...interface{}) error, obj ...interface{}) {
c.minLimitToDo = do
c.minobj = obj
}
// AddWithReset 操作 count 默认值为0, 当触发限制时, 重置为默认值
func (c *Counter) AddWithReset(n int) error {
c.count += n
if c.maxLimitToDo != nil {
if c.count >= c.maxLimit {
defer c.Reset()
return c.maxLimitToDo(c.maxobj...)
}
}
if c.minLimitToDo != nil {
if c.count <= c.minLimit {
defer c.Reset()
return c.minLimitToDo(c.minobj...)
}
}
return nil
}
// Add 操作 count 默认值为0
func (c *Counter) Add(n int) error {
c.count += n
if c.maxLimitToDo != nil {
if c.count >= c.maxLimit {
return c.maxLimitToDo(c.maxobj...)
}
}
if c.minLimitToDo != nil {
if c.count <= c.minLimit {
return c.minLimitToDo(c.minobj...)
}
}
return nil
}

1
xvfb.sh Normal file
View File

@@ -0,0 +1 @@
screen -dmS xvfb-99 Xvfb :99 -screen 0 1280x720x24 -ac -nolisten tcp -dpi 96 +extension RANDR -nolisten tcp