2020-07-10 04:05:33 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2020-07-14 11:00:34 +00:00
|
|
|
"database/sql"
|
|
|
|
"intimate"
|
2020-07-15 07:44:21 +00:00
|
|
|
"io/ioutil"
|
2020-07-14 11:00:34 +00:00
|
|
|
"log"
|
|
|
|
"os"
|
|
|
|
"regexp"
|
2020-07-10 04:05:33 +00:00
|
|
|
"testing"
|
2020-07-15 07:44:21 +00:00
|
|
|
"time"
|
2020-07-10 04:05:33 +00:00
|
|
|
|
2020-07-15 07:44:21 +00:00
|
|
|
"github.com/lestrrat-go/libxml2"
|
2020-07-14 11:00:34 +00:00
|
|
|
"github.com/tidwall/gjson"
|
2020-07-10 04:05:33 +00:00
|
|
|
)
|
|
|
|
|
2020-07-16 03:02:30 +00:00
|
|
|
func TestCase0(t *testing.T) {
|
|
|
|
f, err := os.Open("./test.html")
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
data, err := ioutil.ReadAll(f)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})</a`).FindAllStringSubmatch(string(data), -1)
|
|
|
|
t.Error(matheslist)
|
|
|
|
}
|
|
|
|
|
2020-07-15 07:44:21 +00:00
|
|
|
func TestCase1(t *testing.T) {
|
|
|
|
date := "2020-07-13T18:58:24+09:00"
|
|
|
|
|
|
|
|
tm, err := time.Parse("2006-01-02T15:04:05Z07:00", date)
|
|
|
|
t.Error(err)
|
|
|
|
t.Error(time.Now())
|
|
|
|
t.Error(tm.Local().UTC(), tm.Local())
|
|
|
|
|
2020-07-14 11:00:34 +00:00
|
|
|
}
|
2020-07-15 07:44:21 +00:00
|
|
|
|
|
|
|
func TestCase2(t *testing.T) {
|
|
|
|
duration1 := "0:00:00"
|
|
|
|
duration2 := "4:56:04"
|
|
|
|
tm2, err := time.Parse("15:04:05", duration2)
|
|
|
|
tm1, err := time.Parse("15:04:05", duration1)
|
|
|
|
|
|
|
|
tm2.Sub(tm1)
|
|
|
|
|
|
|
|
t.Error(err)
|
|
|
|
t.Error(tm2.Sub(tm1))
|
|
|
|
|
2020-07-14 11:00:34 +00:00
|
|
|
}
|
|
|
|
|
2020-07-15 07:44:21 +00:00
|
|
|
func TestCase(t *testing.T) {
|
|
|
|
f, _ := os.Open("./test.html")
|
|
|
|
data, _ := ioutil.ReadAll(f)
|
|
|
|
|
|
|
|
doc, err := libxml2.ParseHTML(data)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
// doc.CreateElement("meta")
|
|
|
|
// "<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">"
|
|
|
|
|
|
|
|
xresult, err := doc.Find("/html/head")
|
|
|
|
ele, err := doc.CreateElement(`META`)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
ele.SetAttribute("charset", "utf-8")
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
iter := xresult.NodeIter()
|
|
|
|
if iter.Next() {
|
|
|
|
n := iter.Node()
|
|
|
|
|
|
|
|
err = n.AddChild(ele)
|
|
|
|
// childs, err := n.ChildNodes()
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
2020-07-14 11:00:34 +00:00
|
|
|
}
|
2020-07-15 07:44:21 +00:00
|
|
|
t.Error(n)
|
2020-07-14 11:00:34 +00:00
|
|
|
}
|
2020-07-15 07:44:21 +00:00
|
|
|
|
|
|
|
xr, err := doc.Find("//h1[ contains(@class, 'MovieTitle__Title')]")
|
|
|
|
if err != nil {
|
|
|
|
panic(nil)
|
|
|
|
}
|
|
|
|
|
|
|
|
t.Error(xr)
|
2020-07-14 11:00:34 +00:00
|
|
|
}
|
|
|
|
|
2020-07-10 04:05:33 +00:00
|
|
|
func TestExtractor(t *testing.T) {
|
2020-07-14 11:00:34 +00:00
|
|
|
collect := intimate.NewExtractorStore()
|
|
|
|
store := intimate.NewSourceStore("source_openrec")
|
2020-07-15 07:44:21 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
for {
|
|
|
|
source, err := store.Pop(string(intimate.TTOpenrecRanking), 100)
|
2020-07-14 11:00:34 +00:00
|
|
|
if err != nil {
|
2020-07-16 07:25:55 +00:00
|
|
|
log.Println(err)
|
|
|
|
return
|
2020-07-14 11:00:34 +00:00
|
|
|
}
|
2020-07-15 10:22:40 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
anchorId := source.GetSource().String
|
2020-07-14 11:00:34 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
ai := &intimate.AnchorInfo{}
|
|
|
|
ai.SetAnchorId(anchorId)
|
|
|
|
ai.SetPlatform(string(intimate.Popenrec))
|
2020-07-15 10:22:40 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
sdata := source.GetExt().([]byte)
|
2020-07-14 11:00:34 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
if gjson.ValidBytes(sdata) {
|
|
|
|
result := gjson.ParseBytes(sdata)
|
|
|
|
datamap := result.Map()
|
2020-07-14 11:00:34 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
oe := &OpenrecExtractor{}
|
|
|
|
oe.user = intimate.NewExtractorSource(datamap["user"])
|
|
|
|
oe.user.CreateExtractor()
|
2020-07-14 11:00:34 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
oe.userLive = intimate.NewExtractorSource(datamap["user_live"])
|
|
|
|
oe.userLive.CreateExtractor()
|
2020-07-14 11:00:34 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
oe.supporters = intimate.NewExtractorSource(datamap["supporters"])
|
2020-07-14 11:00:34 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
clog := &intimate.CollectLog{}
|
2020-07-15 07:44:21 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
oe.extractFollowers(clog)
|
|
|
|
oe.extractAnchorName(ai)
|
|
|
|
oe.extractViewsAndLiveStreaming(clog)
|
|
|
|
oe.extractGiversAndGratuity(clog)
|
|
|
|
oe.extractLive(clog)
|
|
|
|
oe.extractTags(clog)
|
2020-07-14 11:00:34 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
ai.Set("UpdateTime", source.GetUpdateTime())
|
2020-07-15 07:44:21 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
LiveUrl := "https://www.openrec.tv/live/" + anchorId
|
|
|
|
ai.Set("LiveUrl", sql.NullString{String: LiveUrl, Valid: true})
|
2020-07-14 11:00:34 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
Uid, err := collect.InsertAnchorInfo(ai)
|
2020-07-14 11:00:34 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
2020-07-16 07:25:55 +00:00
|
|
|
return
|
2020-07-14 11:00:34 +00:00
|
|
|
}
|
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
clog.Set("Uid", Uid)
|
|
|
|
clog.Set("Platform", string(intimate.Popenrec))
|
|
|
|
clog.Set("AnchorId", anchorId)
|
|
|
|
clog.Set("UpdateTime", source.GetUpdateTime())
|
2020-07-14 11:00:34 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
collect.InsertCollectLog(clog)
|
|
|
|
} else {
|
|
|
|
t.Error("data is not json:\n", string(sdata))
|
2020-07-15 10:22:40 +00:00
|
|
|
}
|
2020-07-14 11:00:34 +00:00
|
|
|
}
|
2020-07-10 04:05:33 +00:00
|
|
|
|
|
|
|
}
|