package main import ( "database/sql" "encoding/json" "intimate" "log" "net/url" "time" "github.com/474420502/extractor" "github.com/474420502/focus/compare" "github.com/474420502/focus/tree/heap" "github.com/474420502/requests" ) // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitcasting)) // estore 解析存储连接实例 var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() type SearchProfile struct { UserName string `exp:".//span[@class='username']" method:"Text"` UserId string // `exp:".//span[@class='fullname']" method:"Text"` LiveUrl string `exp:".//div[@class='usertext']/a[@href]" method:"AttributeValue,href"` Tag []string `exp:".//a[contains(@class, 'tag tag-mini')]" method:"Text"` TagUrl []string `exp:".//a[contains(@class, 'tag tag-mini')]" method:"AttributeValue,href"` } func Execute() { homeurl := "https://twitcasting.tv" searchurl := "https://twitcasting.tv/rankingindex.php" queuedict := make(map[string]bool) queue := heap.New(compare.String) queue.Put(searchurl) queuedict[searchurl] = true ses := requests.NewSession() ses.Config().SetTimeout(15) var surl interface{} var ok bool ps := intimate.NewPerfectShutdown() for surl, ok = queue.Pop(); ok && !ps.IsClose(); surl, ok = queue.Pop() { u, err := url.Parse(surl.(string)) if err != nil { log.Println(err) continue } resp, err := ses.Get(u.String()).Execute() if err != nil { log.Println(err) log.Println(u.String(), surl) continue // log.Panic(err) } etor := extractor.ExtractHtml(resp.Content()) result, err := etor.XPaths("//p[@class='taglist']/a[contains(@class, 'tag')]/@href") if err != nil { panic(err) } for _, href := range result.GetTexts() { wurl := homeurl + href if ok := queuedict[wurl]; !ok { log.Println(wurl) sl := &intimate.StreamerList{} sl.Platform = string(intimate.Ptwitcasting) sl.Url = wurl sl.Operator = 0 sl.UpdateInterval = 120 sl.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true} sl.UrlHash = intimate.GetUrlHash(sl.Url) intimate.TStreamerList.Insert(sl) // estore.InsertStreamerList(sl) queue.Put(wurl) queuedict[wurl] = true } } xps, err := etor.XPaths("//div[@class='tw-search-result-row']") if err != nil { log.Println(surl, err) continue } var splist = xps.ForEachObjectByTag(SearchProfile{}) for _, isp := range splist { sp := isp.(*SearchProfile) if sp.LiveUrl == "" { continue } sp.UserId = sp.LiveUrl[1:] for i := 0; i < len(sp.TagUrl); i++ { wurl := homeurl + sp.TagUrl[i] sp.TagUrl[i] = wurl if ok := queuedict[wurl]; !ok { sl := &intimate.StreamerList{} sl.Platform = string(intimate.Ptwitcasting) sl.Url = wurl sl.Operator = 0 sl.UpdateInterval = 120 sl.UpdateTime = &sql.NullTime{Time: time.Now(), Valid: true} sl.UrlHash = intimate.GetUrlHash(sl.Url) intimate.TStreamerList.Insert(sl) queue.Put(wurl) queuedict[wurl] = true } } // log.Println(sp.(SearchProfile)) } log.Println("find user:", len(splist)) for _, isp := range splist { sp := isp.(*SearchProfile) // log.Println(sp) streamer := &intimate.Streamer{} streamer.Platform = intimate.Ptwitcasting streamer.LiveUrl = &sql.NullString{String: sp.LiveUrl, Valid: true} if btags, err := json.Marshal(sp.Tag); err != nil { log.Println(err) } else { streamer.Tags = btags } streamer.UpdateInterval = 120 streamer.UpdateTime = intimate.GetUpdateTimeNow() streamer.UserName = &sql.NullString{String: sp.UserName, Valid: true} streamer.UserId = &sp.UserId streamer.Operator = 0 // estore.InsertStreamer(streamer) intimate.TStreamer.Insert(streamer) } log.Println("finish remain", queue.Size()) } }