From a50c879d83a4fba2110c0acbe6fbf1f97c3e7859 Mon Sep 17 00:00:00 2001 From: eson Date: Fri, 14 Aug 2020 19:26:03 +0800 Subject: [PATCH] =?UTF-8?q?todo:=20openrec=20=E9=87=8D=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- extractor/openrec_extractor/openrec_extractor.go | 10 ++++++++++ .../twitcasting_extractor/twitcasting_extractor.go | 4 ++-- go.mod | 2 +- go.sum | 8 ++++++-- tasks/twitcasting/twitcasting_task1/twitcasting.go | 2 +- utils.go | 8 +++++--- 6 files changed, 25 insertions(+), 9 deletions(-) diff --git a/extractor/openrec_extractor/openrec_extractor.go b/extractor/openrec_extractor/openrec_extractor.go index b8a8943..4ac446f 100644 --- a/extractor/openrec_extractor/openrec_extractor.go +++ b/extractor/openrec_extractor/openrec_extractor.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/474420502/extractor" "github.com/tidwall/gjson" ) @@ -23,6 +24,12 @@ type OpenrecExtractor struct { supporters *intimate.ExtractorSource } +type UserInfo struct { + UserName string `exp:"//p[ contains(@class, 'c-global__user__profile__list__name__text')]"` + Followers int `exp:"//p[@class='c-global__user__count__row__right js-userCountFollowers']" mth:"r:ParseNumber"` + Views int `exp:"//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']" mth:"r:ExtractNumber"` +} + func (oe *OpenrecExtractor) Execute() { ps := intimate.NewPerfectShutdown() @@ -55,6 +62,9 @@ func (oe *OpenrecExtractor) Execute() { oe.user = intimate.NewExtractorSource(&htmlUser) oe.user.CreateExtractor() + userEtor := extractor.ExtractHtmlString(htmlUser.String()) + log.Println(userEtor.GetObjectByTag(UserInfo{})) + htmlLive := datamap["html_live"] oe.userLive = intimate.NewExtractorSource(&htmlLive) oe.userLive.CreateExtractor() diff --git a/extractor/twitcasting_extractor/twitcasting_extractor.go b/extractor/twitcasting_extractor/twitcasting_extractor.go index 6351c3d..9f0b031 100644 --- a/extractor/twitcasting_extractor/twitcasting_extractor.go +++ b/extractor/twitcasting_extractor/twitcasting_extractor.go @@ -38,7 +38,7 @@ func main() { streamer, err := estore.Pop(intimate.Ptwitcasting) if err != nil { - log.Println(err, streamer.UserId) + log.Println(err, streamer) } streamer.LiveUrl = sql.NullString{String: "https://twitcasting.tv/" + streamer.UserId, Valid: true} @@ -93,7 +93,7 @@ func main() { streamer.Platform = intimate.Ptwitcasting streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true} streamer.UserName = sql.NullString{String: ldata.UserName, Valid: true} - streamer.Operator = 10 + streamer.Operator = 0 // streamer.UpdateInterval = 60 clog := &intimate.CollectLog{} clog.UserId = streamer.UserId diff --git a/go.mod b/go.mod index 36800bf..16c756a 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module intimate go 1.14 require ( - github.com/474420502/extractor v0.7.2 + github.com/474420502/extractor v0.9.4-0.20200814111732-bc270321f8f9 github.com/474420502/focus v0.12.0 github.com/474420502/gcurl v0.2.0 github.com/474420502/hunter v0.3.4 diff --git a/go.sum b/go.sum index 5ed6621..1573290 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,12 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg= -github.com/474420502/extractor v0.7.2 h1:idZnsekOKRV8fpJwsRcr6Ol7KSphKXe9tc+JJXgGqQ4= -github.com/474420502/extractor v0.7.2/go.mod h1:92J6QZKstpAKGhv+DibemhQbR/d6lJ+ftyR/ZHmeJ0w= +github.com/474420502/extractor v0.9.2 h1:pDDLpxq4bsWjwU3H8epBxDifnmMVBdT0onye2MXCJ8A= +github.com/474420502/extractor v0.9.2/go.mod h1:Ss0KTfwsdB4XBpNda/V50rx21V9bl6/eQmyl50mjAS4= +github.com/474420502/extractor v0.9.3 h1:Cjri64DbgWQQ64EjPiBSQfUH9l0cYlzU8py0PQu8pAs= +github.com/474420502/extractor v0.9.3/go.mod h1:Ss0KTfwsdB4XBpNda/V50rx21V9bl6/eQmyl50mjAS4= +github.com/474420502/extractor v0.9.4-0.20200814111732-bc270321f8f9 h1:cxgsTQwRJSiML4yBL40n/0pD/FbEqkCIXE7qq6hJyLg= +github.com/474420502/extractor v0.9.4-0.20200814111732-bc270321f8f9/go.mod h1:Ss0KTfwsdB4XBpNda/V50rx21V9bl6/eQmyl50mjAS4= github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo= github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s= github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg= diff --git a/tasks/twitcasting/twitcasting_task1/twitcasting.go b/tasks/twitcasting/twitcasting_task1/twitcasting.go index 89204c2..a11e724 100644 --- a/tasks/twitcasting/twitcasting_task1/twitcasting.go +++ b/tasks/twitcasting/twitcasting_task1/twitcasting.go @@ -89,7 +89,7 @@ func Execute() { continue } - var splist = xps.ForEachTag(SearchProfile{}) + var splist = xps.ForEachObjectByTag(SearchProfile{}) for _, isp := range splist { sp := isp.(*SearchProfile) if sp.LiveUrl == "" { diff --git a/utils.go b/utils.go index cb7d782..2ed1a79 100644 --- a/utils.go +++ b/utils.go @@ -29,14 +29,16 @@ func init() { } // ParseNumber 去逗号解析数字 -func ParseNumber(number string) (int64, error) { - number = strings.ReplaceAll(number, ",", "") - return strconv.ParseInt(number, 10, 64) +func ParseNumber(num string) (int64, error) { + num = strings.Trim(num, " ") + num = strings.ReplaceAll(num, ",", "") + return strconv.ParseInt(num, 10, 64) } // ParseNumberEx 解析带字符的数字 func ParseNumberEx(num string) (float64, error) { num = strings.Trim(num, " ") + num = strings.ReplaceAll(num, ",", "") last := num[len(num)-1] factor := 1.0 switch {