2020-07-22 12:00:02 +00:00
package main
2020-07-23 10:29:56 +00:00
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore * intimate . StoreSource = intimate . NewStoreSource ( string ( intimate . STTwitch ) )
// estore 解析存储连接实例
var estore * intimate . StoreExtractor = intimate . NewStoreExtractor ( )
// 获取类型的所有频道链接
// ChannelLink 频道链接
type ChannelLink struct {
}
// Execute 执行任务
func ( cl * ChannelLink ) Execute ( ) {
2020-07-24 10:48:33 +00:00
var err error
wd := intimate . GetChromeDriver ( 3030 )
2020-07-31 10:04:10 +00:00
ps := intimate . NewPerfectShutdown ( )
2020-07-23 10:29:56 +00:00
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd . Get ( weburl )
if err != nil {
panic ( err )
}
cardCondition := func ( wd selenium . WebDriver ) ( bool , error ) {
elements , err := wd . FindElements ( selenium . ByXPATH , "//span/a[contains(@data-a-target,'card-') and @href]" )
if err != nil {
return false , err
}
return len ( elements ) > 0 , nil
}
2020-07-31 10:04:10 +00:00
wd . WaitWithTimeout ( cardCondition , time . Second * 15 )
2020-07-23 10:29:56 +00:00
time . Sleep ( time . Second )
e , err := wd . FindElement ( selenium . ByXPATH , "//button[@data-a-target='browse-sort-menu']" )
if err != nil {
panic ( err )
}
e . Click ( )
2020-07-28 10:56:27 +00:00
var hrefs map [ string ] bool = make ( map [ string ] bool )
var delayerror = 5
2020-07-23 10:29:56 +00:00
for i := 0 ; i <= 200 ; i ++ {
2020-07-28 10:56:27 +00:00
cards , err := wd . FindElements ( selenium . ByXPATH , "//span/a[contains(@data-a-target,'card-') and @href]" )
2020-07-23 10:29:56 +00:00
if err != nil {
log . Println ( err )
2020-07-28 10:56:27 +00:00
break
}
2020-08-04 06:12:00 +00:00
if len ( hrefs ) == 0 {
2020-07-28 10:56:27 +00:00
delayerror --
if delayerror <= 0 {
break
}
} else {
delayerror = 5
2020-07-23 10:29:56 +00:00
}
2020-07-28 10:56:27 +00:00
2020-07-31 10:04:10 +00:00
for ii := 0 ; ii < 10 ; ii ++ {
for _ , card := range cards {
href , err := card . GetAttribute ( "href" )
if err != nil {
log . Println ( href , err )
continue
} else {
hrefs [ href ] = true
}
2020-07-28 10:56:27 +00:00
}
2020-07-31 10:04:10 +00:00
break
2020-07-28 10:56:27 +00:00
}
2020-08-04 06:12:00 +00:00
2020-07-31 10:04:10 +00:00
if ps . IsClose ( ) {
break
}
2020-07-28 10:56:27 +00:00
if len ( cards ) > 10 {
log . Println ( len ( cards ) )
wd . ExecuteScript ( ` items = document . evaluate ( "//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]" , document , null , XPathResult . ORDERED_NODE_SNAPSHOT_TYPE , null ) ;
for ( var i = 0 ; i < items . snapshotLength - 10 ; i ++ ) { item = items . snapshotItem ( i ) ; item . remove ( ) ; } ; ` , nil )
}
time . Sleep ( time . Millisecond * 200 )
wd . KeyDown ( selenium . EndKey )
time . Sleep ( time . Millisecond * 200 )
wd . KeyUp ( selenium . EndKey )
time . Sleep ( time . Millisecond * 2500 )
}
for href := range hrefs {
// TODO: Save href
2020-07-23 10:29:56 +00:00
source := & intimate . Source { }
source . Source = sql . NullString { String : href , Valid : true }
source . Operator = 0
source . Target = intimate . TTwitchChannel
source . Url = weburl
sstore . Insert ( source )
}
2020-07-24 10:48:33 +00:00
2020-07-28 10:56:27 +00:00
log . Println ( "hrefs len:" , len ( hrefs ) )
2020-07-24 10:48:33 +00:00
sstore . Deduplicate ( intimate . TTwitchChannel , "source" )
2020-07-23 10:29:56 +00:00
}