TODO: finish extractor data
This commit is contained in:
@@ -5,7 +5,11 @@ import (
|
||||
"encoding/json"
|
||||
"intimate"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"regexp"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/tebeka/selenium"
|
||||
@@ -27,88 +31,112 @@ type UserList struct {
|
||||
func (cl *UserList) Execute() {
|
||||
// DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ;
|
||||
//article//a[@data-a-target='preview-card-title-link']
|
||||
var err error
|
||||
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
|
||||
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var loop int32 = 1
|
||||
|
||||
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
go func() {
|
||||
signalchan := make(chan os.Signal)
|
||||
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
|
||||
log.Println("accept stop command:", <-signalchan)
|
||||
atomic.StoreInt32(&loop, 0)
|
||||
}()
|
||||
|
||||
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
|
||||
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}, time.Second*10)
|
||||
for atomic.LoadInt32(&loop) > 0 {
|
||||
|
||||
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
btn.Click()
|
||||
|
||||
var elements []selenium.WebElement
|
||||
var liveurls = 0
|
||||
var delayerror = 3
|
||||
for i := 0; i < 2; i++ {
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
var err error
|
||||
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
time.Sleep(time.Second * 2)
|
||||
if len(elements) == liveurls {
|
||||
delayerror--
|
||||
if delayerror <= 0 {
|
||||
|
||||
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
|
||||
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}, time.Second*10)
|
||||
|
||||
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
btn.Click()
|
||||
|
||||
var elements []selenium.WebElement
|
||||
var liveurls = 0
|
||||
var delayerror = 3
|
||||
for i := 0; i < 200 && atomic.LoadInt32(&loop) > 0; i++ {
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
break
|
||||
}
|
||||
} else {
|
||||
delayerror = 3
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
wd.KeyUp(selenium.EndKey)
|
||||
time.Sleep(time.Second * 2)
|
||||
if len(elements) == liveurls {
|
||||
if liveurls == 0 {
|
||||
delayerror -= 2
|
||||
} else {
|
||||
delayerror--
|
||||
}
|
||||
|
||||
if delayerror <= 0 {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
delayerror = 3
|
||||
}
|
||||
liveurls = len(elements)
|
||||
}
|
||||
}
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "//article//a[@data-a-target='preview-card-title-link' and @href]")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for _, e := range elements {
|
||||
|
||||
attr, err := e.GetAttribute("href")
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "//article//a[@data-a-target='preview-card-title-link' and @href]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
streamer := &intimate.Streamer{}
|
||||
|
||||
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(attr)
|
||||
if len(matches) == 2 {
|
||||
streamer.UserId = matches[1]
|
||||
} else {
|
||||
log.Println(attr)
|
||||
continue
|
||||
for _, e := range elements {
|
||||
|
||||
attr, err := e.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
streamer := &intimate.Streamer{}
|
||||
|
||||
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(attr)
|
||||
if len(matches) == 2 {
|
||||
streamer.UserId = matches[1]
|
||||
} else {
|
||||
log.Println(attr)
|
||||
continue
|
||||
}
|
||||
|
||||
streamer.Platform = intimate.Ptwitch
|
||||
|
||||
updateUrl := make(map[string]string)
|
||||
updateUrl["live"] = attr
|
||||
streamer.LiveUrl = sql.NullString{String: attr, Valid: true}
|
||||
data, err := json.Marshal(updateUrl)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
streamer.UpdateUrl = data
|
||||
streamer.Operator = 0
|
||||
|
||||
estore.InsertStreamer(streamer)
|
||||
}
|
||||
|
||||
streamer.Platform = intimate.Ptwitch
|
||||
|
||||
updateUrl := make(map[string]string)
|
||||
updateUrl["live"] = attr
|
||||
streamer.LiveUrl = sql.NullString{String: attr, Valid: true}
|
||||
data, err := json.Marshal(updateUrl)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
streamer.UpdateUrl = data
|
||||
streamer.Operator = 0
|
||||
|
||||
estore.InsertStreamer(streamer)
|
||||
log.Println("streamer insert", len(elements))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user