Extractor upgrade
This commit is contained in:
@@ -1,6 +1,10 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"intimate"
|
||||
"time"
|
||||
|
||||
"github.com/474420502/extractor"
|
||||
"github.com/474420502/focus/compare"
|
||||
"github.com/474420502/focus/tree/heap"
|
||||
|
||||
@@ -8,9 +12,14 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/474420502/requests"
|
||||
"github.com/lestrrat-go/libxml2"
|
||||
)
|
||||
|
||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitcasting))
|
||||
|
||||
// estore 解析存储连接实例
|
||||
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
|
||||
func TestMain(t *testing.T) {
|
||||
|
||||
searchurl := "https://twitcasting.tv/rankingindex.php"
|
||||
@@ -27,30 +36,73 @@ func TestMain(t *testing.T) {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
doc, err := libxml2.ParseHTML(resp.Content())
|
||||
etor := extractor.ExtractXml(resp.Content())
|
||||
|
||||
// doc, err := libxml2.ParseHTML(resp.Content())
|
||||
// if err != nil {
|
||||
// panic(err)
|
||||
// }
|
||||
// defer doc.Free()
|
||||
|
||||
result, err := etor.XPath("//*[contains(@class, 'tag')]/@href")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer doc.Free()
|
||||
result, err := doc.Find("//*[contains(@class, 'tag')]/@href")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer result.Free()
|
||||
|
||||
// result, err := doc.Find("//*[contains(@class, 'tag')]/@href")
|
||||
// if err != nil {
|
||||
// panic(err)
|
||||
// }
|
||||
// defer result.Free()
|
||||
|
||||
iter := result.NodeIter()
|
||||
for iter.Next() {
|
||||
|
||||
log.Println(iter.Node().NodeValue())
|
||||
wurl := "https://twitcasting.tv" + iter.Node().NodeValue()
|
||||
if ok := queuedict[wurl]; !ok {
|
||||
log.Println(wurl)
|
||||
sl := &intimate.StreamerList{}
|
||||
sl.Platform = intimate.Ptwitcasting
|
||||
sl.Url = wurl
|
||||
sl.Operator = 0
|
||||
sl.UpdateInterval = 120
|
||||
sl.UpdateTime = time.Now()
|
||||
estore.InsertStreamerList(sl)
|
||||
queue.Put(wurl)
|
||||
queuedict[wurl] = true
|
||||
}
|
||||
}
|
||||
|
||||
doc.Find("//div[@class='tw-search-result-row']")
|
||||
// doc.Find("//div[@class='tw-search-result-row']")
|
||||
xps, err := etor.XPaths("//div[@class='tw-search-result-row']")
|
||||
if err != nil {
|
||||
log.Println(surl, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// xps.ForEachTag(SearchProfile{})
|
||||
|
||||
// texts, errs := xps.ForEachText(".//span[@class='username']")
|
||||
// if len(errs) > 0 {
|
||||
// t.Error(errs)
|
||||
// }
|
||||
var splist = xps.ForEachTag(SearchProfile{})
|
||||
for _, isp := range splist {
|
||||
sp := isp.(*SearchProfile)
|
||||
sp.UserId = sp.LiveUrl[1:]
|
||||
// log.Println(sp.(SearchProfile))
|
||||
}
|
||||
|
||||
for _, isp := range splist {
|
||||
log.Println(isp.(*SearchProfile))
|
||||
}
|
||||
|
||||
log.Println("finish remain", queue.Size())
|
||||
}
|
||||
}
|
||||
|
||||
type SearchProfile struct {
|
||||
UserName string `exp:".//span[@class='username']" method:"Text"`
|
||||
UserId string // `exp:".//span[@class='fullname']" method:"Text"`
|
||||
LiveUrl string `exp:".//div[@class='usertext']/a[@href]" method:"Attribute,href Value"`
|
||||
}
|
||||
|
||||
@@ -29,13 +29,16 @@ func (cl *UserList) Execute() {
|
||||
//article//a[@data-a-target='preview-card-title-link']
|
||||
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
defer wd.Close()
|
||||
defer wd.Quit()
|
||||
ps := intimate.NewPerfectShutdown()
|
||||
counter := intimate.NewCounter()
|
||||
counter.SetMaxLimit(100)
|
||||
counter.SetMaxToDo(func(olist ...interface{}) error {
|
||||
owd := olist[0].(*selenium.WebDriver)
|
||||
(*owd).Quit()
|
||||
if err := (*owd).Quit(); err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
*owd = intimate.GetChromeDriver(3030)
|
||||
return nil
|
||||
}, &wd)
|
||||
|
||||
Reference in New Issue
Block a user