TODO: finish extractor data
This commit is contained in:
parent
ea650f91dc
commit
41d3763b57
BIN
crx/0.1.2_0.crx
Normal file
BIN
crx/0.1.2_0.crx
Normal file
Binary file not shown.
28
crx/0.1.2_0.pem
Normal file
28
crx/0.1.2_0.pem
Normal file
|
@ -0,0 +1,28 @@
|
|||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC4DYEiDPfw094p
|
||||
axusu7+kP9J9waL3+794PORnMoBnoKEBuEAfeWLZPtX338nq3dmyiVvNsNKFimcH
|
||||
vVFye5uHdFOfXZ7f6oIBzeY0616dhcASjV7nj0HkuhDhZQGvBWegQrFJrQQDG99T
|
||||
kXv3mH8e4W6qvwpyRedXqQJC3gE0LOKvUGlWABQwwq9NF5Uf6vJyXYMv7uBMwpWc
|
||||
liAtJ6v9742IIGjmZhhaQELliOUucbaeilzxJ29XyVXGhEXNERAXbswSGectHvoz
|
||||
HsBo4YVzKaR2b8wRXGT1W7am1MwFmOzZcX8tQAXTnoThsAL5tLHi+HJ5QRNbSf6h
|
||||
C44x8bcdAgMBAAECggEAW+G5/pK00FAkj06+MRxFTqzh2w/o2J2f91mGuJH4kkMZ
|
||||
Pa+Kq2vA0i1RSf45YfvAqyVxZB0K68mtJ6r2Vw2oFhYXjO6C2svKfTYZ339E66SS
|
||||
v3A92aGlxpawyKTRE1vCYLoKoXozD45BjgmJ9o/1nifyRGE8yNFm7VcdHt6PgUix
|
||||
914dJWeSwF94tnRqPJwfOXJkpTXdiWQunGWBOH2nK6y/r2xlLiR4EXDA/4LwMegh
|
||||
5XHWA4YOG0jQc5a/U5w+899/JKvduo5ZU738jKrtcqD8b2G76R+VTxzbv11ROm6E
|
||||
AMo1nTHRtbPAKDbSSPWgrzjxQPVGbJPJ+BnzF2V3rQKBgQDnC8OxPuhFvEdjmijW
|
||||
1lPWkB4NBJ9uBtWEMhHe5PTeHs0sfm9AZvM8npObMNcOvmQH3bGX2aY9XoEHZbjK
|
||||
ixM5miVazdEt0y7UonzcXqjpOvjiqlaAcMP+2Y6ejqi1JD3sflyi/GmLNCtlbRsZ
|
||||
Fx7sgPNk+LueGvwK35TWIsL7hwKBgQDL7mpjYk5V3osb1AGqcaJYsVa9Qm2izsAl
|
||||
g13sxollDLazaitwAt3r+FMtLVgJPptTlV37QF1WbSCfGCYjjFRP0WQN1lTlZqUN
|
||||
4QNKQ6SI/Wp4qjl127T2n/1toc7Mhjs00V+RJiFYpN5cdvXniBXjJC0oh30tL+L8
|
||||
Cvws2QYJOwKBgQDJEqD1QSUNg4SxdvkxtwbxhSzR8YL6UzJAwP5yd9lu8Wln3oTd
|
||||
jHsE95DID6Ipr6IIgnRLDdyyLeumz20ZwB00FSWLN/FiqxZncR2u/yaLC4qMYOe1
|
||||
Ee5QfW+0J71FH8xQY8wk//yua/GUbHaXyFpeQv8PkbReLWfJ4rh5/3inQwKBgQCl
|
||||
7M9dG6BXF6Ihu0a7soeAGJJVnRXtUMFgBFnIi+VAda61nh3Hnl2IYFz0th8aLnlc
|
||||
8XwtMLqA1nujVpe5drUm2FzLMWeT2wdSmpD9vLnDyET39rCX53J+87/UksHbASBt
|
||||
IinaxKZ/JG3T1+rOPphoXofroQnFWWAa6KkzqETT/wKBgDv2KDnZqYMSNy4xtz96
|
||||
IdOpYioocSvRS5kUUwokAIU9CYIo5+iyaJYok25u6OaRNOr1vqzqeG5j8Rdc/kQc
|
||||
70df/gZ2Gejn+3BYJtwtgeB25KfCjd+jhTHOOgLbnK0tX3h8X5wkpA0628inwMhg
|
||||
Q9GxE6HDisGIr0S5PWnZFh34
|
||||
-----END PRIVATE KEY-----
|
10
sql/remake_database.sh
Normal file
10
sql/remake_database.sh
Normal file
|
@ -0,0 +1,10 @@
|
|||
# /bin/bash
|
||||
USER=root
|
||||
HOST=127.0.0.1
|
||||
PORT=4000
|
||||
|
||||
# mysql -h $HOST -u $USER -P $PORT -c "drop database intimate_source";
|
||||
# mysql -h $HOST -u $USER -P $PORT -c "drop database intimate_extractor";
|
||||
|
||||
mysql -h $HOST -u $USER -P $PORT < ./intimate_extractor.sql;
|
||||
mysql -h $HOST -u $USER -P $PORT < ./intimate_source.sql;
|
3
store.go
3
store.go
|
@ -87,7 +87,8 @@ func (store *StoreSource) Insert(isource IGet) {
|
|||
|
||||
// Deduplicate 去重
|
||||
func (store *StoreSource) Deduplicate(target Target, field string) {
|
||||
_, err := store.db.Exec(`DELETE FROM ` + store.table + ` WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, ` + field + ` FROM ` + store.table + `force index(target_type_idx) WHERE target_type = "` + string(target) + `" ) s GROUP BY s.` + string(target) + `) ;`)
|
||||
sql := `DELETE FROM ` + store.table + ` WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, ` + field + ` FROM ` + store.table + ` force index(target_type_idx) WHERE target_type = "` + string(target) + `" ) s GROUP BY s.` + string(field) + `) ;`
|
||||
_, err := store.db.Exec(sql)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ func (cl *ChannelLink) Execute() {
|
|||
|
||||
for i := 0; i <= 200; i++ {
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
time.Sleep(time.Second * 3)
|
||||
time.Sleep(time.Second * 2)
|
||||
}
|
||||
|
||||
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
|
||||
|
|
|
@ -5,7 +5,11 @@ import (
|
|||
"encoding/json"
|
||||
"intimate"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"regexp"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/tebeka/selenium"
|
||||
|
@ -27,88 +31,112 @@ type UserList struct {
|
|||
func (cl *UserList) Execute() {
|
||||
// DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ;
|
||||
//article//a[@data-a-target='preview-card-title-link']
|
||||
var err error
|
||||
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
|
||||
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var loop int32 = 1
|
||||
|
||||
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
go func() {
|
||||
signalchan := make(chan os.Signal)
|
||||
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
|
||||
log.Println("accept stop command:", <-signalchan)
|
||||
atomic.StoreInt32(&loop, 0)
|
||||
}()
|
||||
|
||||
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
|
||||
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}, time.Second*10)
|
||||
for atomic.LoadInt32(&loop) > 0 {
|
||||
|
||||
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
btn.Click()
|
||||
|
||||
var elements []selenium.WebElement
|
||||
var liveurls = 0
|
||||
var delayerror = 3
|
||||
for i := 0; i < 2; i++ {
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
var err error
|
||||
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
time.Sleep(time.Second * 2)
|
||||
if len(elements) == liveurls {
|
||||
delayerror--
|
||||
if delayerror <= 0 {
|
||||
|
||||
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
|
||||
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}, time.Second*10)
|
||||
|
||||
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
btn.Click()
|
||||
|
||||
var elements []selenium.WebElement
|
||||
var liveurls = 0
|
||||
var delayerror = 3
|
||||
for i := 0; i < 200 && atomic.LoadInt32(&loop) > 0; i++ {
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
break
|
||||
}
|
||||
} else {
|
||||
delayerror = 3
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
wd.KeyUp(selenium.EndKey)
|
||||
time.Sleep(time.Second * 2)
|
||||
if len(elements) == liveurls {
|
||||
if liveurls == 0 {
|
||||
delayerror -= 2
|
||||
} else {
|
||||
delayerror--
|
||||
}
|
||||
|
||||
if delayerror <= 0 {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
delayerror = 3
|
||||
}
|
||||
liveurls = len(elements)
|
||||
}
|
||||
}
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "//article//a[@data-a-target='preview-card-title-link' and @href]")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for _, e := range elements {
|
||||
|
||||
attr, err := e.GetAttribute("href")
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "//article//a[@data-a-target='preview-card-title-link' and @href]")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
streamer := &intimate.Streamer{}
|
||||
|
||||
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(attr)
|
||||
if len(matches) == 2 {
|
||||
streamer.UserId = matches[1]
|
||||
} else {
|
||||
log.Println(attr)
|
||||
continue
|
||||
for _, e := range elements {
|
||||
|
||||
attr, err := e.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
streamer := &intimate.Streamer{}
|
||||
|
||||
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(attr)
|
||||
if len(matches) == 2 {
|
||||
streamer.UserId = matches[1]
|
||||
} else {
|
||||
log.Println(attr)
|
||||
continue
|
||||
}
|
||||
|
||||
streamer.Platform = intimate.Ptwitch
|
||||
|
||||
updateUrl := make(map[string]string)
|
||||
updateUrl["live"] = attr
|
||||
streamer.LiveUrl = sql.NullString{String: attr, Valid: true}
|
||||
data, err := json.Marshal(updateUrl)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
streamer.UpdateUrl = data
|
||||
streamer.Operator = 0
|
||||
|
||||
estore.InsertStreamer(streamer)
|
||||
}
|
||||
|
||||
streamer.Platform = intimate.Ptwitch
|
||||
|
||||
updateUrl := make(map[string]string)
|
||||
updateUrl["live"] = attr
|
||||
streamer.LiveUrl = sql.NullString{String: attr, Valid: true}
|
||||
data, err := json.Marshal(updateUrl)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
streamer.UpdateUrl = data
|
||||
streamer.Operator = 0
|
||||
|
||||
estore.InsertStreamer(streamer)
|
||||
log.Println("streamer insert", len(elements))
|
||||
}
|
||||
}
|
||||
|
|
9
utils.go
9
utils.go
|
@ -51,10 +51,11 @@ func ParseDuration(dt string) (time.Duration, error) {
|
|||
func GetChromeDriver(port int) selenium.WebDriver {
|
||||
caps := selenium.Capabilities{"browserName": "chrome"}
|
||||
chromecaps := chrome.Capabilities{}
|
||||
err := chromecaps.AddExtension("/home/eson/test/ssh-key/0.1.2_0.crx")
|
||||
err := chromecaps.AddExtension("../../../crx/0.1.2_0.crx")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
chromecaps.Args = append(chromecaps.Args, "--proxy-pac-url=http://127.0.0.1:1081/pac")
|
||||
chromecaps.Args = append(chromecaps.Args, "--disk-cache-dir=/tmp/chromedriver-cache")
|
||||
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
|
||||
caps.AddChrome(chromecaps)
|
||||
|
@ -63,8 +64,12 @@ func GetChromeDriver(port int) selenium.WebDriver {
|
|||
panic(err)
|
||||
}
|
||||
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
runtime.SetFinalizer(wd, func(obj interface{}) {
|
||||
if err := wd.Close(); err != nil {
|
||||
log.Println(obj)
|
||||
if err := obj.(selenium.WebDriver).Close(); err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue
Block a user