package main import ( "database/sql" "encoding/json" "intimate" "io/ioutil" "log" "os" "regexp" "strconv" "strings" "testing" "time" "github.com/474420502/hunter" "github.com/474420502/requests" "github.com/lestrrat-go/libxml2" "github.com/tidwall/gjson" ) func TestCase1(t *testing.T) { date := "2020-07-13T18:58:24+09:00" tm, err := time.Parse("2006-01-02T15:04:05Z07:00", date) t.Error(err) t.Error(time.Now()) t.Error(tm.Local().UTC(), tm.Local()) } func TestCase2(t *testing.T) { duration1 := "0:00:00" duration2 := "4:56:04" tm2, err := time.Parse("15:04:05", duration2) tm1, err := time.Parse("15:04:05", duration1) tm2.Sub(tm1) t.Error(err) t.Error(tm2.Sub(tm1)) } func TestCase(t *testing.T) { f, _ := os.Open("./test.html") data, _ := ioutil.ReadAll(f) doc, err := libxml2.ParseHTML(data) if err != nil { panic(err) } // doc.CreateElement("meta") // "" xresult, err := doc.Find("/html/head") ele, err := doc.CreateElement(`META`) if err != nil { panic(err) } ele.SetAttribute("charset", "utf-8") if err != nil { panic(err) } iter := xresult.NodeIter() if iter.Next() { n := iter.Node() err = n.AddChild(ele) // childs, err := n.ChildNodes() if err != nil { t.Error(err) } t.Error(n) } xr, err := doc.Find("//h1[ contains(@class, 'MovieTitle__Title')]") if err != nil { panic(nil) } t.Error(xr) } func TestExtractor(t *testing.T) { ses := requests.NewSession() tp := ses.Get("https://www.openrec.tv/user/Riowh/supporters") tp.Execute() // t.Error(ses.GetCookies(wf.GetParsedURL())) collect := intimate.NewExtractorStore() store := intimate.NewSourceStore("source_openrec") source, err := store.Pop(string(intimate.TTOpenrecRanking), 100) anchorId := source.GetSource().String ai := &intimate.AnchorInfo{} ai.SetAnchorId(anchorId) ai.SetPlatform(string(intimate.Popenrec)) sdata := source.GetExt().([]byte) if gjson.ValidBytes(sdata) { result := gjson.ParseBytes(sdata) m := result.Map() user := m["user"] clog := &intimate.CollectLog{} extractor := hunter.NewExtractor([]byte(user.Str)) xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()") if err != nil { t.Error(err) } if !xp.NodeIter().Next() { t.Error("不存在粉丝数") } followers := strings.ReplaceAll(xp.String(), ",", "") followersInt, err := strconv.ParseInt(followers, 10, 64) if err != nil { t.Error(err) } var anchorName string xp, err = extractor.XPathResult("//p[@class='c-global__user__profile__list__name__text official-icon--after']/text()") if xp.NodeIter().Next() { anchorName = xp.String() } else { t.Error(err) } t.Error(source.GetSource()) t.Error(anchorName) ai.SetAnchorName(anchorName) // c-contents xp, err = extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()") if err != nil { log.Println(err) } if xp.NodeIter().Next() { views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String()) views = strings.ReplaceAll(views, ",", "") viewsint, err := strconv.Atoi(views) if err != nil { t.Error(err) } clog.SetViews(sql.NullInt64{Int64: int64(viewsint), Valid: true}) clog.SetIsShowing(1) } var givers []interface{} var gratuity int64 = 0 giverjson := m["supporters"] for _, v := range giverjson.Array() { giverSource := gjson.Parse(v.String()) for _, item := range giverSource.Get("data.items").Array() { givers = append(givers, item.Map()) gratuity += item.Get("total_yells").Int() } } giversbytes, err := json.Marshal(givers) if err != nil { t.Error(err) clog.SetErrorMsg(sql.NullString{String: err.Error(), Valid: true}) } else { clog.SetGiver(giversbytes) } // MovieToolbar__Views-g5e6ic-13 iDRGyA livejson := m["user_live"] // f, err := os.OpenFile("./test.html", os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.ModePerm) // if err != nil { // panic(err) // } // f.WriteString(livejson.String()) extractor = hunter.NewExtractor([]byte(livejson.Str)) // xr, err := extractor.XPathResult("//h1[ contains(@class, 'MovieTitle__Title')]") // if err != nil { // t.Error(err) // } mathes := regexp.MustCompile("MovieTitle__Title.*>(.+)").FindStringSubmatch(livejson.Str) if len(mathes) == 2 { clog.SetShowTitle(sql.NullString{String: mathes[1], Valid: true}) content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content") if err != nil { t.Error(err) } iter := content.NodeIter() if iter.Next() { tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local) if err != nil { t.Error(err) } clog.SetShowStartTime(sql.NullTime{Time: tm.Local(), Valid: true}) duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content") if err != nil { t.Error(err) } diter := duration.NodeIter() if diter.Next() { dt, err := intimate.ParseDuration(diter.Node().NodeValue()) if err != nil { log.Println(err) } endtm := tm.Add(dt) clog.SetShowEndTime(sql.NullTime{Time: endtm.Local(), Valid: true}) } } } LiveUrl := "https://www.openrec.tv/live/" + anchorId ai.SetLiveUrl(sql.NullString{String: LiveUrl, Valid: true}) Uid, err := collect.InsertAnchorInfo(ai) if err != nil { t.Error(err) return } clog.SetUid(Uid) clog.SetGratuity(sql.NullInt64{Int64: gratuity, Valid: true}) clog.SetPlatform(string(intimate.Popenrec)) clog.SetFollowers(sql.NullInt64{Int64: int64(followersInt), Valid: true}) clog.SetAnchorId(anchorId) clog.SetUpdateTime(source.GetUpdateTime()) collect.InsertCollectLog(clog) } else { t.Error("data is not json:\n", string(sdata)) } }