2020-07-22 12:00:02 +00:00
package main
2020-07-23 10:29:56 +00:00
import (
"intimate"
"log"
"time"
"github.com/tebeka/selenium"
)
2020-09-08 10:24:51 +00:00
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
2020-07-23 10:29:56 +00:00
2020-09-08 10:24:51 +00:00
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
2020-07-23 10:29:56 +00:00
// 获取类型的所有频道链接
// Execute 执行任务
2020-08-13 11:11:53 +00:00
func Execute ( ) {
2020-09-08 10:24:51 +00:00
2020-07-31 10:04:10 +00:00
ps := intimate . NewPerfectShutdown ( )
2020-09-11 10:52:04 +00:00
var adriver * intimate . AutoCloseDriver
2020-07-23 10:29:56 +00:00
2020-09-08 10:24:51 +00:00
for ! ps . IsClose ( ) {
2020-09-10 09:33:52 +00:00
2020-09-08 10:24:51 +00:00
var err error
2020-09-11 10:52:04 +00:00
adriver = intimate . GetChromeDriver ( )
2020-09-10 09:33:52 +00:00
wd := adriver . Webdriver
2020-07-23 10:29:56 +00:00
2020-09-08 10:24:51 +00:00
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd . Get ( weburl )
2020-07-23 10:29:56 +00:00
if err != nil {
2020-09-08 10:24:51 +00:00
panic ( err )
2020-07-23 10:29:56 +00:00
}
2020-09-08 10:24:51 +00:00
cardCondition := func ( wd selenium . WebDriver ) ( bool , error ) {
elements , err := wd . FindElements ( selenium . ByXPATH , "//span/a[contains(@data-a-target,'card-') and @href]" )
if err != nil {
return false , err
}
return len ( elements ) > 0 , nil
}
wd . WaitWithTimeout ( cardCondition , time . Second * 15 )
time . Sleep ( time . Second )
2020-07-23 10:29:56 +00:00
2020-09-08 10:24:51 +00:00
e , err := wd . FindElement ( selenium . ByXPATH , "//button[@data-a-target='browse-sort-menu']" )
2020-07-23 10:29:56 +00:00
if err != nil {
2020-09-08 10:24:51 +00:00
panic ( err )
2020-07-28 10:56:27 +00:00
}
2020-09-08 10:24:51 +00:00
e . Click ( )
var lasthreflen = 0
var hrefs map [ string ] bool = make ( map [ string ] bool )
var delayerror = 5
for i := 0 ; i <= 200 ; i ++ {
cards , err := wd . FindElements ( selenium . ByXPATH , "//span/a[contains(@data-a-target,'card-') and @href]" )
if err != nil {
log . Println ( err )
2020-07-28 10:56:27 +00:00
break
}
2020-09-08 10:24:51 +00:00
if len ( hrefs ) == lasthreflen {
delayerror --
if delayerror <= 0 {
break
2020-07-31 10:04:10 +00:00
}
2020-09-08 10:24:51 +00:00
} else {
delayerror = 7
}
lasthreflen = len ( hrefs )
for ii := 0 ; ii < 10 ; ii ++ {
for _ , card := range cards {
href , err := card . GetAttribute ( "href" )
if err != nil {
log . Println ( href , err )
continue
} else {
hrefs [ href ] = true
}
}
break
2020-07-28 10:56:27 +00:00
}
2020-08-04 06:12:00 +00:00
2020-09-08 10:24:51 +00:00
if ps . IsClose ( ) {
break
}
2020-07-28 10:56:27 +00:00
2020-09-08 10:24:51 +00:00
if len ( cards ) > 10 {
log . Println ( len ( cards ) )
wd . ExecuteScript ( ` items = document . evaluate ( "//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]" , document , null , XPathResult . ORDERED_NODE_SNAPSHOT_TYPE , null ) ;
2020-07-28 10:56:27 +00:00
for ( var i = 0 ; i < items . snapshotLength - 10 ; i ++ ) { item = items . snapshotItem ( i ) ; item . remove ( ) ; } ; ` , nil )
2020-09-08 10:24:51 +00:00
}
time . Sleep ( time . Millisecond * 200 )
wd . KeyDown ( selenium . EndKey )
time . Sleep ( time . Millisecond * 200 )
wd . KeyUp ( selenium . EndKey )
time . Sleep ( time . Millisecond * 2500 )
2020-07-28 10:56:27 +00:00
}
2020-09-08 10:24:51 +00:00
for href := range hrefs {
2020-07-28 10:56:27 +00:00
2020-09-08 10:24:51 +00:00
sl := & intimate . StreamerList { }
sl . Url = href
sl . UrlHash = intimate . GetUrlHash ( sl . Url )
sl . Platform = string ( intimate . Ptwitch )
sl . UpdateTime = intimate . GetUpdateTimeNow ( )
err := intimate . TStreamerList . Insert ( sl )
if err != nil {
log . Println ( err )
}
}
2020-07-24 10:48:33 +00:00
2020-09-08 10:24:51 +00:00
log . Println ( "hrefs len:" , len ( hrefs ) )
2020-09-11 10:52:04 +00:00
adriver . Close ( )
ps . Wait ( time . Minute * 5 )
}
2020-07-23 10:29:56 +00:00
}