254 lines
5.8 KiB
Go
254 lines
5.8 KiB
Go
package main
|
|
|
|
import (
|
|
"database/sql"
|
|
"encoding/json"
|
|
"intimate"
|
|
"io/ioutil"
|
|
"log"
|
|
"os"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/474420502/hunter"
|
|
"github.com/474420502/requests"
|
|
"github.com/lestrrat-go/libxml2"
|
|
"github.com/tidwall/gjson"
|
|
)
|
|
|
|
func TestCase1(t *testing.T) {
|
|
date := "2020-07-13T18:58:24+09:00"
|
|
|
|
tm, err := time.Parse("2006-01-02T15:04:05Z07:00", date)
|
|
t.Error(err)
|
|
t.Error(time.Now())
|
|
t.Error(tm.Local().UTC(), tm.Local())
|
|
|
|
}
|
|
|
|
func TestCase2(t *testing.T) {
|
|
duration1 := "0:00:00"
|
|
duration2 := "4:56:04"
|
|
tm2, err := time.Parse("15:04:05", duration2)
|
|
tm1, err := time.Parse("15:04:05", duration1)
|
|
|
|
tm2.Sub(tm1)
|
|
|
|
t.Error(err)
|
|
t.Error(tm2.Sub(tm1))
|
|
|
|
}
|
|
|
|
func TestCase(t *testing.T) {
|
|
f, _ := os.Open("./test.html")
|
|
data, _ := ioutil.ReadAll(f)
|
|
|
|
doc, err := libxml2.ParseHTML(data)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
// doc.CreateElement("meta")
|
|
// "<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">"
|
|
|
|
xresult, err := doc.Find("/html/head")
|
|
ele, err := doc.CreateElement(`META`)
|
|
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
ele.SetAttribute("charset", "utf-8")
|
|
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
iter := xresult.NodeIter()
|
|
if iter.Next() {
|
|
n := iter.Node()
|
|
|
|
err = n.AddChild(ele)
|
|
// childs, err := n.ChildNodes()
|
|
if err != nil {
|
|
t.Error(err)
|
|
}
|
|
t.Error(n)
|
|
}
|
|
|
|
xr, err := doc.Find("//h1[ contains(@class, 'MovieTitle__Title')]")
|
|
if err != nil {
|
|
panic(nil)
|
|
}
|
|
|
|
t.Error(xr)
|
|
}
|
|
|
|
func TestExtractor(t *testing.T) {
|
|
|
|
ses := requests.NewSession()
|
|
tp := ses.Get("https://www.openrec.tv/user/Riowh/supporters")
|
|
tp.Execute()
|
|
|
|
// t.Error(ses.GetCookies(wf.GetParsedURL()))
|
|
|
|
collect := intimate.NewExtractorStore()
|
|
store := intimate.NewSourceStore("source_openrec")
|
|
source, err := store.Pop(string(intimate.TTOpenrecRanking), 100)
|
|
|
|
anchorId := source.GetSource().String
|
|
|
|
ai := &intimate.AnchorInfo{}
|
|
ai.SetAnchorId(anchorId)
|
|
ai.SetPlatform(string(intimate.Popenrec))
|
|
|
|
sdata := source.GetExt().([]byte)
|
|
|
|
if gjson.ValidBytes(sdata) {
|
|
result := gjson.ParseBytes(sdata)
|
|
m := result.Map()
|
|
|
|
user := m["user"]
|
|
|
|
clog := &intimate.CollectLog{}
|
|
extractor := hunter.NewExtractor([]byte(user.Str))
|
|
xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()")
|
|
if err != nil {
|
|
t.Error(err)
|
|
}
|
|
if !xp.NodeIter().Next() {
|
|
t.Error("不存在粉丝数")
|
|
}
|
|
|
|
followers := strings.ReplaceAll(xp.String(), ",", "")
|
|
followersInt, err := strconv.ParseInt(followers, 10, 64)
|
|
|
|
if err != nil {
|
|
t.Error(err)
|
|
}
|
|
|
|
var anchorName string
|
|
xp, err = extractor.XPathResult("//p[@class='c-global__user__profile__list__name__text official-icon--after']/text()")
|
|
if xp.NodeIter().Next() {
|
|
anchorName = xp.String()
|
|
} else {
|
|
t.Error(err)
|
|
}
|
|
|
|
t.Error(source.GetSource())
|
|
t.Error(anchorName)
|
|
|
|
ai.SetAnchorName(anchorName)
|
|
|
|
// c-contents
|
|
xp, err = extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()")
|
|
if err != nil {
|
|
log.Println(err)
|
|
}
|
|
if xp.NodeIter().Next() {
|
|
views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String())
|
|
views = strings.ReplaceAll(views, ",", "")
|
|
viewsint, err := strconv.Atoi(views)
|
|
if err != nil {
|
|
t.Error(err)
|
|
}
|
|
|
|
clog.SetViews(sql.NullInt64{Int64: int64(viewsint), Valid: true})
|
|
clog.SetIsShowing(1)
|
|
}
|
|
|
|
var givers []interface{}
|
|
var gratuity int64 = 0
|
|
giverjson := m["supporters"]
|
|
for _, v := range giverjson.Array() {
|
|
giverSource := gjson.Parse(v.String())
|
|
for _, item := range giverSource.Get("data.items").Array() {
|
|
givers = append(givers, item.Map())
|
|
gratuity += item.Get("total_yells").Int()
|
|
}
|
|
}
|
|
|
|
giversbytes, err := json.Marshal(givers)
|
|
if err != nil {
|
|
t.Error(err)
|
|
clog.SetErrorMsg(sql.NullString{String: err.Error(), Valid: true})
|
|
} else {
|
|
clog.SetGiver(giversbytes)
|
|
}
|
|
|
|
// MovieToolbar__Views-g5e6ic-13 iDRGyA
|
|
livejson := m["user_live"]
|
|
|
|
// f, err := os.OpenFile("./test.html", os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.ModePerm)
|
|
// if err != nil {
|
|
// panic(err)
|
|
// }
|
|
// f.WriteString(livejson.String())
|
|
|
|
extractor = hunter.NewExtractor([]byte(livejson.Str))
|
|
// xr, err := extractor.XPathResult("//h1[ contains(@class, 'MovieTitle__Title')]")
|
|
// if err != nil {
|
|
// t.Error(err)
|
|
// }
|
|
|
|
mathes := regexp.MustCompile("MovieTitle__Title.*>(.+)</h1>").FindStringSubmatch(livejson.Str)
|
|
if len(mathes) == 2 {
|
|
|
|
clog.SetShowTitle(sql.NullString{String: mathes[1], Valid: true})
|
|
|
|
content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content")
|
|
if err != nil {
|
|
t.Error(err)
|
|
}
|
|
|
|
iter := content.NodeIter()
|
|
if iter.Next() {
|
|
tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local)
|
|
if err != nil {
|
|
t.Error(err)
|
|
}
|
|
clog.SetShowStartTime(sql.NullTime{Time: tm.Local(), Valid: true})
|
|
|
|
duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content")
|
|
if err != nil {
|
|
t.Error(err)
|
|
}
|
|
|
|
diter := duration.NodeIter()
|
|
if diter.Next() {
|
|
|
|
dt, err := intimate.ParseDuration(diter.Node().NodeValue())
|
|
if err != nil {
|
|
log.Println(err)
|
|
}
|
|
endtm := tm.Add(dt)
|
|
clog.SetShowEndTime(sql.NullTime{Time: endtm.Local(), Valid: true})
|
|
}
|
|
}
|
|
}
|
|
|
|
LiveUrl := "https://www.openrec.tv/live/" + anchorId
|
|
ai.SetLiveUrl(sql.NullString{String: LiveUrl, Valid: true})
|
|
|
|
Uid, err := collect.InsertAnchorInfo(ai)
|
|
if err != nil {
|
|
t.Error(err)
|
|
return
|
|
}
|
|
|
|
clog.SetUid(Uid)
|
|
clog.SetGratuity(sql.NullInt64{Int64: gratuity, Valid: true})
|
|
clog.SetPlatform(string(intimate.Popenrec))
|
|
clog.SetFollowers(sql.NullInt64{Int64: int64(followersInt), Valid: true})
|
|
clog.SetAnchorId(anchorId)
|
|
clog.SetUpdateTime(source.GetUpdateTime())
|
|
|
|
collect.InsertCollectLog(clog)
|
|
|
|
} else {
|
|
t.Error("data is not json:\n", string(sdata))
|
|
}
|
|
|
|
}
|