package main import ( "database/sql" "encoding/json" "intimate" "log" "regexp" "strconv" "strings" "time" "github.com/tidwall/gjson" ) // OpenrecExtractor 提取方法 type OpenrecExtractor struct { user *intimate.ExtractorSource userLive *intimate.ExtractorSource supporters *intimate.ExtractorSource } func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) { extractor := oe.user.GetExtractor() xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()") if err != nil { log.Println(err) } if !xp.NodeIter().Next() { log.Println("不存在粉丝数") } followers := strings.ReplaceAll(xp.String(), ",", "") followersInt, err := strconv.ParseInt(followers, 10, 64) if err != nil { log.Println(err) } clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true}) } func (oe *OpenrecExtractor) extractAnchorName(ai intimate.ISet) { extractor := oe.user.GetExtractor() xp, err := extractor.XPathResult("//p[@class='c-global__user__profile__list__name__text official-icon--after']/text()") if xp.NodeIter().Next() { anchorName := xp.String() ai.Set("AnchorName", anchorName) } else { log.Println(err) } } func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) { extractor := oe.user.GetExtractor() // c-contents xp, err := extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()") if err != nil { log.Println(err) } if xp.NodeIter().Next() { views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String()) views = strings.ReplaceAll(views, ",", "") viewsint, err := strconv.Atoi(views) if err != nil { log.Println(err) } clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true}) clog.Set("IsLiveStreaming", int32(1)) } } func (oe *OpenrecExtractor) extractGiversAndGratuity(clog intimate.ISet) { // extractor := oe.user.GetExtractor() giverjson := oe.supporters.GetSource() var givers []interface{} var gratuity int64 = 0 for _, v := range giverjson.Array() { giverSource := gjson.Parse(v.String()) for _, item := range giverSource.Get("data.items").Array() { givers = append(givers, item.Map()) gratuity += item.Get("total_yells").Int() } } giversbytes, err := json.Marshal(givers) if err != nil { log.Println(err) clog.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true}) } else { clog.Set("Giver", giversbytes) } clog.Set("Gratuity", sql.NullInt64{Int64: gratuity, Valid: true}) } func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) { extractor := oe.userLive.GetExtractor() mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})").FindStringSubmatch(oe.userLive.GetSource().Str) if len(mathes) == 2 { clog.Set("LiveTitle", sql.NullString{String: mathes[1], Valid: true}) content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content") if err != nil { log.Println(err) } iter := content.NodeIter() if iter.Next() { tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local) if err != nil { log.Println(err) } clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true}) duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content") if err != nil { log.Println(err) } diter := duration.NodeIter() if diter.Next() { dt, err := intimate.ParseDuration(diter.Node().NodeValue()) if err != nil { log.Println(err) } endtm := tm.Add(dt) clog.Set("LiveEndTime", sql.NullTime{Time: endtm.Local(), Valid: true}) } } } } func (oe *OpenrecExtractor) extractTags(clog intimate.ISet) { var tags []string matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})