intimate/extractor/openrec/openrec_test.go

161 lines
3.2 KiB
Go
Raw Normal View History

2020-07-10 04:05:33 +00:00
package main
import (
2020-07-14 11:00:34 +00:00
"database/sql"
"intimate"
"io/ioutil"
2020-07-14 11:00:34 +00:00
"log"
"os"
"regexp"
2020-07-10 04:05:33 +00:00
"testing"
"time"
2020-07-10 04:05:33 +00:00
"github.com/lestrrat-go/libxml2"
2020-07-14 11:00:34 +00:00
"github.com/tidwall/gjson"
2020-07-10 04:05:33 +00:00
)
func TestCase0(t *testing.T) {
f, err := os.Open("./test.html")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})</a`).FindAllStringSubmatch(string(data), -1)
t.Error(matheslist)
}
func TestCase1(t *testing.T) {
date := "2020-07-13T18:58:24+09:00"
tm, err := time.Parse("2006-01-02T15:04:05Z07:00", date)
t.Error(err)
t.Error(time.Now())
t.Error(tm.Local().UTC(), tm.Local())
2020-07-14 11:00:34 +00:00
}
func TestCase2(t *testing.T) {
duration1 := "0:00:00"
duration2 := "4:56:04"
tm2, err := time.Parse("15:04:05", duration2)
tm1, err := time.Parse("15:04:05", duration1)
tm2.Sub(tm1)
t.Error(err)
t.Error(tm2.Sub(tm1))
2020-07-14 11:00:34 +00:00
}
func TestCase(t *testing.T) {
f, _ := os.Open("./test.html")
data, _ := ioutil.ReadAll(f)
doc, err := libxml2.ParseHTML(data)
if err != nil {
panic(err)
}
// doc.CreateElement("meta")
// "<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">"
xresult, err := doc.Find("/html/head")
ele, err := doc.CreateElement(`META`)
if err != nil {
panic(err)
}
ele.SetAttribute("charset", "utf-8")
if err != nil {
panic(err)
}
iter := xresult.NodeIter()
if iter.Next() {
n := iter.Node()
err = n.AddChild(ele)
// childs, err := n.ChildNodes()
if err != nil {
t.Error(err)
2020-07-14 11:00:34 +00:00
}
t.Error(n)
2020-07-14 11:00:34 +00:00
}
xr, err := doc.Find("//h1[ contains(@class, 'MovieTitle__Title')]")
if err != nil {
panic(nil)
}
t.Error(xr)
2020-07-14 11:00:34 +00:00
}
2020-07-10 04:05:33 +00:00
func TestExtractor(t *testing.T) {
2020-07-14 11:00:34 +00:00
collect := intimate.NewExtractorStore()
store := intimate.NewSourceStore("source_openrec")
for {
source, err := store.Pop(string(intimate.TTOpenrecRanking), 100)
2020-07-14 11:00:34 +00:00
if err != nil {
log.Println(err)
return
2020-07-14 11:00:34 +00:00
}
anchorId := source.GetSource().String
2020-07-14 11:00:34 +00:00
ai := &intimate.AnchorInfo{}
ai.SetAnchorId(anchorId)
ai.SetPlatform(string(intimate.Popenrec))
sdata := source.GetExt().([]byte)
2020-07-14 11:00:34 +00:00
if gjson.ValidBytes(sdata) {
result := gjson.ParseBytes(sdata)
datamap := result.Map()
2020-07-14 11:00:34 +00:00
oe := &OpenrecExtractor{}
oe.user = intimate.NewExtractorSource(datamap["user"])
oe.user.CreateExtractor()
2020-07-14 11:00:34 +00:00
oe.userLive = intimate.NewExtractorSource(datamap["user_live"])
oe.userLive.CreateExtractor()
2020-07-14 11:00:34 +00:00
oe.supporters = intimate.NewExtractorSource(datamap["supporters"])
2020-07-14 11:00:34 +00:00
clog := &intimate.CollectLog{}
oe.extractFollowers(clog)
oe.extractAnchorName(ai)
oe.extractViewsAndLiveStreaming(clog)
oe.extractGiversAndGratuity(clog)
oe.extractLive(clog)
oe.extractTags(clog)
2020-07-14 11:00:34 +00:00
ai.Set("UpdateTime", source.GetUpdateTime())
LiveUrl := "https://www.openrec.tv/live/" + anchorId
ai.Set("LiveUrl", sql.NullString{String: LiveUrl, Valid: true})
2020-07-14 11:00:34 +00:00
Uid, err := collect.InsertAnchorInfo(ai)
2020-07-14 11:00:34 +00:00
if err != nil {
t.Error(err)
return
2020-07-14 11:00:34 +00:00
}
clog.Set("Uid", Uid)
clog.Set("Platform", string(intimate.Popenrec))
clog.Set("AnchorId", anchorId)
clog.Set("UpdateTime", source.GetUpdateTime())
2020-07-14 11:00:34 +00:00
collect.InsertCollectLog(clog)
} else {
t.Error("data is not json:\n", string(sdata))
}
2020-07-14 11:00:34 +00:00
}
2020-07-10 04:05:33 +00:00
}