diff --git a/crx/0.1.2_0.crx b/crx/0.1.2_0.crx new file mode 100644 index 0000000..54ac5ce Binary files /dev/null and b/crx/0.1.2_0.crx differ diff --git a/crx/0.1.2_0.pem b/crx/0.1.2_0.pem new file mode 100644 index 0000000..6b412b5 --- /dev/null +++ b/crx/0.1.2_0.pem @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC4DYEiDPfw094p +axusu7+kP9J9waL3+794PORnMoBnoKEBuEAfeWLZPtX338nq3dmyiVvNsNKFimcH +vVFye5uHdFOfXZ7f6oIBzeY0616dhcASjV7nj0HkuhDhZQGvBWegQrFJrQQDG99T +kXv3mH8e4W6qvwpyRedXqQJC3gE0LOKvUGlWABQwwq9NF5Uf6vJyXYMv7uBMwpWc +liAtJ6v9742IIGjmZhhaQELliOUucbaeilzxJ29XyVXGhEXNERAXbswSGectHvoz +HsBo4YVzKaR2b8wRXGT1W7am1MwFmOzZcX8tQAXTnoThsAL5tLHi+HJ5QRNbSf6h +C44x8bcdAgMBAAECggEAW+G5/pK00FAkj06+MRxFTqzh2w/o2J2f91mGuJH4kkMZ +Pa+Kq2vA0i1RSf45YfvAqyVxZB0K68mtJ6r2Vw2oFhYXjO6C2svKfTYZ339E66SS +v3A92aGlxpawyKTRE1vCYLoKoXozD45BjgmJ9o/1nifyRGE8yNFm7VcdHt6PgUix +914dJWeSwF94tnRqPJwfOXJkpTXdiWQunGWBOH2nK6y/r2xlLiR4EXDA/4LwMegh +5XHWA4YOG0jQc5a/U5w+899/JKvduo5ZU738jKrtcqD8b2G76R+VTxzbv11ROm6E +AMo1nTHRtbPAKDbSSPWgrzjxQPVGbJPJ+BnzF2V3rQKBgQDnC8OxPuhFvEdjmijW +1lPWkB4NBJ9uBtWEMhHe5PTeHs0sfm9AZvM8npObMNcOvmQH3bGX2aY9XoEHZbjK +ixM5miVazdEt0y7UonzcXqjpOvjiqlaAcMP+2Y6ejqi1JD3sflyi/GmLNCtlbRsZ +Fx7sgPNk+LueGvwK35TWIsL7hwKBgQDL7mpjYk5V3osb1AGqcaJYsVa9Qm2izsAl +g13sxollDLazaitwAt3r+FMtLVgJPptTlV37QF1WbSCfGCYjjFRP0WQN1lTlZqUN +4QNKQ6SI/Wp4qjl127T2n/1toc7Mhjs00V+RJiFYpN5cdvXniBXjJC0oh30tL+L8 +Cvws2QYJOwKBgQDJEqD1QSUNg4SxdvkxtwbxhSzR8YL6UzJAwP5yd9lu8Wln3oTd +jHsE95DID6Ipr6IIgnRLDdyyLeumz20ZwB00FSWLN/FiqxZncR2u/yaLC4qMYOe1 +Ee5QfW+0J71FH8xQY8wk//yua/GUbHaXyFpeQv8PkbReLWfJ4rh5/3inQwKBgQCl +7M9dG6BXF6Ihu0a7soeAGJJVnRXtUMFgBFnIi+VAda61nh3Hnl2IYFz0th8aLnlc +8XwtMLqA1nujVpe5drUm2FzLMWeT2wdSmpD9vLnDyET39rCX53J+87/UksHbASBt +IinaxKZ/JG3T1+rOPphoXofroQnFWWAa6KkzqETT/wKBgDv2KDnZqYMSNy4xtz96 +IdOpYioocSvRS5kUUwokAIU9CYIo5+iyaJYok25u6OaRNOr1vqzqeG5j8Rdc/kQc +70df/gZ2Gejn+3BYJtwtgeB25KfCjd+jhTHOOgLbnK0tX3h8X5wkpA0628inwMhg +Q9GxE6HDisGIr0S5PWnZFh34 +-----END PRIVATE KEY----- diff --git a/sql/remake_database.sh b/sql/remake_database.sh new file mode 100644 index 0000000..180663b --- /dev/null +++ b/sql/remake_database.sh @@ -0,0 +1,10 @@ +# /bin/bash +USER=root +HOST=127.0.0.1 +PORT=4000 + +# mysql -h $HOST -u $USER -P $PORT -c "drop database intimate_source"; +# mysql -h $HOST -u $USER -P $PORT -c "drop database intimate_extractor"; + +mysql -h $HOST -u $USER -P $PORT < ./intimate_extractor.sql; +mysql -h $HOST -u $USER -P $PORT < ./intimate_source.sql; diff --git a/store.go b/store.go index 2c07234..45f7f06 100644 --- a/store.go +++ b/store.go @@ -87,7 +87,8 @@ func (store *StoreSource) Insert(isource IGet) { // Deduplicate 去重 func (store *StoreSource) Deduplicate(target Target, field string) { - _, err := store.db.Exec(`DELETE FROM ` + store.table + ` WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, ` + field + ` FROM ` + store.table + `force index(target_type_idx) WHERE target_type = "` + string(target) + `" ) s GROUP BY s.` + string(target) + `) ;`) + sql := `DELETE FROM ` + store.table + ` WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, ` + field + ` FROM ` + store.table + ` force index(target_type_idx) WHERE target_type = "` + string(target) + `" ) s GROUP BY s.` + string(field) + `) ;` + _, err := store.db.Exec(sql) if err != nil { panic(err) } diff --git a/tasks/twitch/twitch_task1/task_twitch.go b/tasks/twitch/twitch_task1/task_twitch.go index 4f0aa50..cdff148 100644 --- a/tasks/twitch/twitch_task1/task_twitch.go +++ b/tasks/twitch/twitch_task1/task_twitch.go @@ -50,7 +50,7 @@ func (cl *ChannelLink) Execute() { for i := 0; i <= 200; i++ { wd.KeyDown(selenium.EndKey) - time.Sleep(time.Second * 3) + time.Sleep(time.Second * 2) } elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]") diff --git a/tasks/twitch/twitch_task2/task_twitch.go b/tasks/twitch/twitch_task2/task_twitch.go index eb3780a..178f0b8 100644 --- a/tasks/twitch/twitch_task2/task_twitch.go +++ b/tasks/twitch/twitch_task2/task_twitch.go @@ -5,7 +5,11 @@ import ( "encoding/json" "intimate" "log" + "os" + "os/signal" "regexp" + "sync/atomic" + "syscall" "time" "github.com/tebeka/selenium" @@ -27,88 +31,112 @@ type UserList struct { func (cl *UserList) Execute() { // DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ; //article//a[@data-a-target='preview-card-title-link'] - var err error + wd := intimate.GetChromeDriver(3030) - sourceChannel, err := sstore.Pop(intimate.TTwitchChannel) - if err != nil { - panic(err) - } + var loop int32 = 1 - weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT" - err = wd.Get(weburl) - if err != nil { - panic(err) - } + go func() { + signalchan := make(chan os.Signal) + signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP) + log.Println("accept stop command:", <-signalchan) + atomic.StoreInt32(&loop, 0) + }() - wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) { - _, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]") - if err != nil { - return false, err - } - return true, nil - }, time.Second*10) + for atomic.LoadInt32(&loop) > 0 { - btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']") - if err != nil { - panic(err) - } - btn.Click() - - var elements []selenium.WebElement - var liveurls = 0 - var delayerror = 3 - for i := 0; i < 2; i++ { - elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]") + var err error + sourceChannel, err := sstore.Pop(intimate.TTwitchChannel) if err != nil { panic(err) } - wd.KeyDown(selenium.EndKey) - time.Sleep(time.Second * 2) - if len(elements) == liveurls { - delayerror-- - if delayerror <= 0 { + + weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT" + err = wd.Get(weburl) + if err != nil { + panic(err) + } + + wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) { + _, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]") + if err != nil { + return false, err + } + return true, nil + }, time.Second*10) + + btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']") + if err != nil { + log.Println(err) + continue + } + btn.Click() + + var elements []selenium.WebElement + var liveurls = 0 + var delayerror = 3 + for i := 0; i < 200 && atomic.LoadInt32(&loop) > 0; i++ { + elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]") + if err != nil { + log.Println(err) break } - } else { - delayerror = 3 + wd.KeyDown(selenium.EndKey) + wd.KeyUp(selenium.EndKey) + time.Sleep(time.Second * 2) + if len(elements) == liveurls { + if liveurls == 0 { + delayerror -= 2 + } else { + delayerror-- + } + + if delayerror <= 0 { + break + } + } else { + delayerror = 3 + } + liveurls = len(elements) } - } - elements, err = wd.FindElements(selenium.ByXPATH, "//article//a[@data-a-target='preview-card-title-link' and @href]") - if err != nil { - panic(err) - } - - for _, e := range elements { - - attr, err := e.GetAttribute("href") + elements, err = wd.FindElements(selenium.ByXPATH, "//article//a[@data-a-target='preview-card-title-link' and @href]") if err != nil { log.Println(err) continue } - streamer := &intimate.Streamer{} - matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(attr) - if len(matches) == 2 { - streamer.UserId = matches[1] - } else { - log.Println(attr) - continue + for _, e := range elements { + + attr, err := e.GetAttribute("href") + if err != nil { + log.Println(err) + continue + } + streamer := &intimate.Streamer{} + + matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(attr) + if len(matches) == 2 { + streamer.UserId = matches[1] + } else { + log.Println(attr) + continue + } + + streamer.Platform = intimate.Ptwitch + + updateUrl := make(map[string]string) + updateUrl["live"] = attr + streamer.LiveUrl = sql.NullString{String: attr, Valid: true} + data, err := json.Marshal(updateUrl) + if err != nil { + log.Println(err) + continue + } + streamer.UpdateUrl = data + streamer.Operator = 0 + + estore.InsertStreamer(streamer) } - - streamer.Platform = intimate.Ptwitch - - updateUrl := make(map[string]string) - updateUrl["live"] = attr - streamer.LiveUrl = sql.NullString{String: attr, Valid: true} - data, err := json.Marshal(updateUrl) - if err != nil { - log.Println(err) - continue - } - streamer.UpdateUrl = data - streamer.Operator = 0 - - estore.InsertStreamer(streamer) + log.Println("streamer insert", len(elements)) } } diff --git a/utils.go b/utils.go index 64f9efe..774de66 100644 --- a/utils.go +++ b/utils.go @@ -51,10 +51,11 @@ func ParseDuration(dt string) (time.Duration, error) { func GetChromeDriver(port int) selenium.WebDriver { caps := selenium.Capabilities{"browserName": "chrome"} chromecaps := chrome.Capabilities{} - err := chromecaps.AddExtension("/home/eson/test/ssh-key/0.1.2_0.crx") + err := chromecaps.AddExtension("../../../crx/0.1.2_0.crx") if err != nil { panic(err) } + chromecaps.Args = append(chromecaps.Args, "--proxy-pac-url=http://127.0.0.1:1081/pac") chromecaps.Args = append(chromecaps.Args, "--disk-cache-dir=/tmp/chromedriver-cache") chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation") caps.AddChrome(chromecaps) @@ -63,8 +64,12 @@ func GetChromeDriver(port int) selenium.WebDriver { panic(err) } wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port)) + if err != nil { + panic(err) + } runtime.SetFinalizer(wd, func(obj interface{}) { - if err := wd.Close(); err != nil { + log.Println(obj) + if err := obj.(selenium.WebDriver).Close(); err != nil { log.Println(err) } })