完成chrome 组件的例子. TODO: id_com. --> crash 重启
This commit is contained in:
parent
9c7f62d759
commit
056877030f
3
go.mod
3
go.mod
|
@ -6,5 +6,8 @@ require (
|
||||||
github.com/474420502/focus v0.8.1
|
github.com/474420502/focus v0.8.1
|
||||||
github.com/474420502/gurl v0.0.2
|
github.com/474420502/gurl v0.0.2
|
||||||
github.com/474420502/requests v1.5.0
|
github.com/474420502/requests v1.5.0
|
||||||
|
github.com/Pallinder/go-randomdata v1.1.0
|
||||||
|
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
|
||||||
|
github.com/pkg/errors v0.9.1 // indirect
|
||||||
github.com/tebeka/selenium v0.9.9
|
github.com/tebeka/selenium v0.9.9
|
||||||
)
|
)
|
||||||
|
|
5
go.sum
5
go.sum
|
@ -15,6 +15,7 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOC
|
||||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||||
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnROylmoQL57iVUL57wGKTR5O6KpVnbm2tA=
|
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnROylmoQL57iVUL57wGKTR5O6KpVnbm2tA=
|
||||||
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k=
|
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k=
|
||||||
|
github.com/Pallinder/go-randomdata v1.1.0 h1:gUubB1IEUliFmzjqjhf+bgkg1o6uoFIkRsP3VrhEcx8=
|
||||||
github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
|
github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
|
||||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
|
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
|
||||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
|
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
|
||||||
|
@ -50,6 +51,10 @@ github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5m
|
||||||
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||||
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||||
|
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb h1:qqNmX9V9n4byPp7LUvUf7CPhMPYO9ol4ElpbD3DgzuY=
|
||||||
|
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb/go.mod h1:fy/ZVbgyB83mtricxwSW3zqIRXWOVpKG2PvdUDFeC58=
|
||||||
|
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||||
|
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc=
|
github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc=
|
||||||
|
|
22
hunter.go
22
hunter.go
|
@ -1,6 +1,7 @@
|
||||||
package hunter
|
package hunter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
||||||
pqueue "github.com/474420502/focus/priority_queue"
|
pqueue "github.com/474420502/focus/priority_queue"
|
||||||
|
@ -50,13 +51,6 @@ func NewPriorityHunter(queueCreator func() *pqueue.PriorityQueue) *Hunter {
|
||||||
hunter := &Hunter{}
|
hunter := &Hunter{}
|
||||||
hunter.createQueue = queueCreator
|
hunter.createQueue = queueCreator
|
||||||
|
|
||||||
// hunter.task = &BaseTask{}
|
|
||||||
// hunter.task.SetParent(nil)
|
|
||||||
// hunter.task.SetChildren(hunter.createQueue())
|
|
||||||
|
|
||||||
// hunter.cxt = NewContext()
|
|
||||||
// hunter.cxt.curNode = hunter.task
|
|
||||||
|
|
||||||
hunter.share = make(map[string]interface{})
|
hunter.share = make(map[string]interface{})
|
||||||
return hunter
|
return hunter
|
||||||
}
|
}
|
||||||
|
@ -156,10 +150,22 @@ func (hunter *Hunter) recursionTasks(cxt *TaskContext) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stop 停止任务
|
// Stop 停止任务
|
||||||
func (hunter *Hunter) Stop() {
|
func (hunter *Hunter) close(itask ITask) {
|
||||||
|
if iclose, ok := itask.(IClose); ok {
|
||||||
|
if err := iclose.Close(); err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stop 停止任务
|
||||||
|
func (hunter *Hunter) Stop() {
|
||||||
|
for _, task := range hunter.tasks {
|
||||||
|
hunter.close(task)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// AddTask 执行任务
|
// AddTask 执行任务
|
||||||
func (hunter *Hunter) AddTask(task ITask) {
|
func (hunter *Hunter) AddTask(task ITask) {
|
||||||
hunter.tasks = append(hunter.tasks, task)
|
hunter.tasks = append(hunter.tasks, task)
|
||||||
|
|
|
@ -7,7 +7,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
log.Println("测试最好使用 docker run -p 80:80 kennethreitz/httpbin")
|
log.Println("recommend: docker run -p 80:80 kennethreitz/httpbin")
|
||||||
}
|
}
|
||||||
|
|
||||||
type WebGet struct {
|
type WebGet struct {
|
||||||
|
|
81
pre_base_driver_com.go
Normal file
81
pre_base_driver_com.go
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
package hunter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/474420502/requests"
|
||||||
|
"github.com/tebeka/selenium"
|
||||||
|
)
|
||||||
|
|
||||||
|
// https://github.com/tebeka/selenium
|
||||||
|
|
||||||
|
// PreBaseDriverUrl Task的 curl bash 预处理组件
|
||||||
|
type PreBaseDriverUrl struct {
|
||||||
|
PreUrl string
|
||||||
|
Port int
|
||||||
|
service *selenium.Service
|
||||||
|
driver selenium.WebDriver
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close 如果需要在最后执行销毁操作, 继承覆盖该方法
|
||||||
|
func (u *PreBaseDriverUrl) Close() error {
|
||||||
|
|
||||||
|
if u.service != nil {
|
||||||
|
// 直接退出, 所有销毁 直接忽略webdriver.Quit(). // Delete Session
|
||||||
|
if err := u.service.Stop(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IResponse interface {
|
||||||
|
// Content() []byte
|
||||||
|
// GetStatus() string
|
||||||
|
// GetStatusCode() int
|
||||||
|
// GetHeader() http.Header
|
||||||
|
// GetCookie() []*http.Cookie
|
||||||
|
|
||||||
|
// // 返回不同的自定义的Response, 也可以是其他定义的结构体如WebDriver
|
||||||
|
// GetResponse() interface{}
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Content 内容
|
||||||
|
func (u *PreBaseDriverUrl) Content() []byte {
|
||||||
|
content, err := u.driver.PageSource()
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
}
|
||||||
|
return []byte(content)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetStatusCode 暂时为空
|
||||||
|
func (u *PreBaseDriverUrl) GetStatusCode() int {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetStatus 内容 暂时为空
|
||||||
|
func (u *PreBaseDriverUrl) GetStatus() string {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetHeader 暂时为空
|
||||||
|
func (u *PreBaseDriverUrl) GetHeader() http.Header {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCookie 暂时为空
|
||||||
|
func (u *PreBaseDriverUrl) GetCookie() []*http.Cookie {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetResponse 返回 webdriver
|
||||||
|
func (u *PreBaseDriverUrl) GetResponse() interface{} {
|
||||||
|
return u.driver
|
||||||
|
}
|
||||||
|
|
||||||
|
func (u *PreBaseDriverUrl) Hunt() (requests.IResponse, error) {
|
||||||
|
err := u.driver.Get(string(u.PreUrl))
|
||||||
|
return u, err
|
||||||
|
}
|
46
pre_chrome_driver_com.go
Normal file
46
pre_chrome_driver_com.go
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
package hunter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"github.com/Pallinder/go-randomdata"
|
||||||
|
"github.com/tebeka/selenium"
|
||||||
|
)
|
||||||
|
|
||||||
|
// PreChromeUrl Chrome的url预处理
|
||||||
|
type PreChromeUrl struct {
|
||||||
|
PreBaseDriverUrl
|
||||||
|
}
|
||||||
|
|
||||||
|
// Before 驱动的预处理
|
||||||
|
func (u *PreChromeUrl) Before(ctx *TaskContext) {
|
||||||
|
|
||||||
|
var err error
|
||||||
|
var service *selenium.Service
|
||||||
|
|
||||||
|
if u.service == nil {
|
||||||
|
for i := 0; i < 20; i++ {
|
||||||
|
if u.Port == 0 {
|
||||||
|
u.Port = randomdata.Number(10000, 50000)
|
||||||
|
}
|
||||||
|
service, err = selenium.NewChromeDriverService("chromedriver", u.Port)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(i, err)
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u.service = service
|
||||||
|
}
|
||||||
|
|
||||||
|
if u.driver == nil {
|
||||||
|
caps := selenium.Capabilities{"browserName": "chrome"}
|
||||||
|
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", u.Port))
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
u.driver = wd
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,61 +0,0 @@
|
||||||
package hunter
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"runtime"
|
|
||||||
|
|
||||||
"github.com/474420502/requests"
|
|
||||||
"github.com/tebeka/selenium"
|
|
||||||
)
|
|
||||||
|
|
||||||
// https://github.com/tebeka/selenium
|
|
||||||
|
|
||||||
// PreDriverUrl Task的 curl bash 预处理组件
|
|
||||||
type PreDriverUrl struct {
|
|
||||||
url string
|
|
||||||
service *selenium.Service
|
|
||||||
driver selenium.WebDriver
|
|
||||||
}
|
|
||||||
|
|
||||||
func (u *PreDriverUrl) Before(ctx *TaskContext) {
|
|
||||||
service, err := selenium.NewChromeDriverService("chromedriver", 1030)
|
|
||||||
if err != nil {
|
|
||||||
log.Panic(err)
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
panic(err) // panic is used only as an example and is not otherwise recommended.
|
|
||||||
}
|
|
||||||
u.service = service
|
|
||||||
|
|
||||||
caps := selenium.Capabilities{"browserName": "chrome"}
|
|
||||||
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", 1030))
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
u.driver = wd
|
|
||||||
|
|
||||||
runtime.SetFinalizer(&[]interface{}{service, wd}, func(obj interface{}) {
|
|
||||||
iobj := obj.([]interface{})
|
|
||||||
service := iobj[0].(*selenium.Service)
|
|
||||||
service.Stop()
|
|
||||||
|
|
||||||
wd := iobj[1].(selenium.WebDriver)
|
|
||||||
wd.Quit()
|
|
||||||
})
|
|
||||||
|
|
||||||
err = wd.Get(string(u.url))
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
ele, err := wd.FindElement(selenium.ByXPATH, "//title")
|
|
||||||
log.Println(ele.Text())
|
|
||||||
log.Println(ele.TagName())
|
|
||||||
}
|
|
||||||
|
|
||||||
func (u *PreDriverUrl) Hunt() (requests.IResponse, error) {
|
|
||||||
err := u.driver.Get(string(u.url))
|
|
||||||
|
|
||||||
return nil, err
|
|
||||||
}
|
|
|
@ -1,9 +1,15 @@
|
||||||
package hunter
|
package hunter
|
||||||
|
|
||||||
import "testing"
|
import (
|
||||||
|
"log"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/lestrrat-go/libxml2"
|
||||||
|
"github.com/tebeka/selenium"
|
||||||
|
)
|
||||||
|
|
||||||
type WebPreDriverUrl struct {
|
type WebPreDriverUrl struct {
|
||||||
PreDriverUrl
|
PreChromeUrl
|
||||||
}
|
}
|
||||||
|
|
||||||
func (web *WebPreDriverUrl) Execute(cxt *TaskContext) {
|
func (web *WebPreDriverUrl) Execute(cxt *TaskContext) {
|
||||||
|
@ -12,9 +18,51 @@ func (web *WebPreDriverUrl) Execute(cxt *TaskContext) {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
cxt.SetShare("test", resp.Content())
|
cxt.SetShare("test", resp.Content())
|
||||||
|
wd := resp.GetResponse().(selenium.WebDriver)
|
||||||
|
ele, err := wd.FindElement(selenium.ByXPATH, "//title")
|
||||||
|
if err != nil {
|
||||||
|
log.Panic(err)
|
||||||
|
}
|
||||||
|
title, err := ele.GetAttribute("text")
|
||||||
|
if err != nil {
|
||||||
|
log.Panic(err)
|
||||||
|
}
|
||||||
|
cxt.SetShare("driver-title", title)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDriver(t *testing.T) {
|
func TestDriver(t *testing.T) {
|
||||||
hunter := NewHunter(&WebPreDriverUrl{PreDriverUrl("http://httpbin.org")}) // first params PreCurlUrl
|
preurl := &WebPreDriverUrl{}
|
||||||
|
preurl.PreUrl = "http://httpbin.org"
|
||||||
|
|
||||||
|
hunter := NewHunter(preurl) // first params PreCurlUrl
|
||||||
hunter.Execute()
|
hunter.Execute()
|
||||||
|
defer hunter.Stop()
|
||||||
|
|
||||||
|
content := hunter.GetShare("test").([]byte)
|
||||||
|
if content != nil {
|
||||||
|
doc, err := libxml2.ParseHTML(content)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
} else {
|
||||||
|
if result, err := doc.Find("//title"); err == nil {
|
||||||
|
iter := result.NodeIter()
|
||||||
|
if iter.Next() {
|
||||||
|
n := iter.Node()
|
||||||
|
if n.TextContent() != "httpbin.org" {
|
||||||
|
t.Error(n.TextContent())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
t.Error("can't xpath title")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
title := hunter.GetShare("driver-title").(string)
|
||||||
|
if title != "httpbin.org" {
|
||||||
|
t.Error(title)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user