package main import ( "database/sql" "intimate" "io/ioutil" "log" "os" "regexp" "testing" "time" "github.com/lestrrat-go/libxml2" "github.com/tidwall/gjson" ) func TestCase0(t *testing.T) { f, err := os.Open("./test.html") if err != nil { panic(err) } data, err := ioutil.ReadAll(f) if err != nil { panic(err) } matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})" xresult, err := doc.Find("/html/head") ele, err := doc.CreateElement(`META`) if err != nil { panic(err) } ele.SetAttribute("charset", "utf-8") if err != nil { panic(err) } iter := xresult.NodeIter() if iter.Next() { n := iter.Node() err = n.AddChild(ele) // childs, err := n.ChildNodes() if err != nil { t.Error(err) } t.Error(n) } xr, err := doc.Find("//h1[ contains(@class, 'MovieTitle__Title')]") if err != nil { panic(nil) } t.Error(xr) } func TestExtractor(t *testing.T) { collect := intimate.NewExtractorStore() store := intimate.NewSourceStore("source_openrec") for { source, err := store.Pop(string(intimate.TTOpenrecRanking), 100) if err != nil { log.Println(err) return } anchorId := source.GetSource().String ai := &intimate.AnchorInfo{} ai.SetAnchorId(anchorId) ai.SetPlatform(string(intimate.Popenrec)) sdata := source.GetExt().([]byte) if gjson.ValidBytes(sdata) { result := gjson.ParseBytes(sdata) datamap := result.Map() oe := &OpenrecExtractor{} oe.user = intimate.NewExtractorSource(datamap["user"]) oe.user.CreateExtractor() oe.userLive = intimate.NewExtractorSource(datamap["user_live"]) oe.userLive.CreateExtractor() oe.supporters = intimate.NewExtractorSource(datamap["supporters"]) clog := &intimate.CollectLog{} oe.extractFollowers(clog) oe.extractAnchorName(ai) oe.extractViewsAndLiveStreaming(clog) oe.extractGiversAndGratuity(clog) oe.extractLive(clog) oe.extractTags(clog) ai.Set("UpdateTime", source.GetUpdateTime()) LiveUrl := "https://www.openrec.tv/live/" + anchorId ai.Set("LiveUrl", sql.NullString{String: LiveUrl, Valid: true}) Uid, err := collect.InsertAnchorInfo(ai) if err != nil { t.Error(err) return } clog.Set("Uid", Uid) clog.Set("Platform", string(intimate.Popenrec)) clog.Set("AnchorId", anchorId) clog.Set("UpdateTime", source.GetUpdateTime()) collect.InsertCollectLog(clog) } else { t.Error("data is not json:\n", string(sdata)) } } }