完成chrome 组件的例子. TODO: id_com. --> crash 重启
This commit is contained in:
parent
9c7f62d759
commit
056877030f
3
go.mod
3
go.mod
|
@ -6,5 +6,8 @@ require (
|
|||
github.com/474420502/focus v0.8.1
|
||||
github.com/474420502/gurl v0.0.2
|
||||
github.com/474420502/requests v1.5.0
|
||||
github.com/Pallinder/go-randomdata v1.1.0
|
||||
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/tebeka/selenium v0.9.9
|
||||
)
|
||||
|
|
5
go.sum
5
go.sum
|
@ -15,6 +15,7 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOC
|
|||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnROylmoQL57iVUL57wGKTR5O6KpVnbm2tA=
|
||||
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k=
|
||||
github.com/Pallinder/go-randomdata v1.1.0 h1:gUubB1IEUliFmzjqjhf+bgkg1o6uoFIkRsP3VrhEcx8=
|
||||
github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
|
||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
|
||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
|
||||
|
@ -50,6 +51,10 @@ github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5m
|
|||
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb h1:qqNmX9V9n4byPp7LUvUf7CPhMPYO9ol4ElpbD3DgzuY=
|
||||
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb/go.mod h1:fy/ZVbgyB83mtricxwSW3zqIRXWOVpKG2PvdUDFeC58=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc=
|
||||
|
|
22
hunter.go
22
hunter.go
|
@ -1,6 +1,7 @@
|
|||
package hunter
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strconv"
|
||||
|
||||
pqueue "github.com/474420502/focus/priority_queue"
|
||||
|
@ -50,13 +51,6 @@ func NewPriorityHunter(queueCreator func() *pqueue.PriorityQueue) *Hunter {
|
|||
hunter := &Hunter{}
|
||||
hunter.createQueue = queueCreator
|
||||
|
||||
// hunter.task = &BaseTask{}
|
||||
// hunter.task.SetParent(nil)
|
||||
// hunter.task.SetChildren(hunter.createQueue())
|
||||
|
||||
// hunter.cxt = NewContext()
|
||||
// hunter.cxt.curNode = hunter.task
|
||||
|
||||
hunter.share = make(map[string]interface{})
|
||||
return hunter
|
||||
}
|
||||
|
@ -156,10 +150,22 @@ func (hunter *Hunter) recursionTasks(cxt *TaskContext) {
|
|||
}
|
||||
|
||||
// Stop 停止任务
|
||||
func (hunter *Hunter) Stop() {
|
||||
func (hunter *Hunter) close(itask ITask) {
|
||||
if iclose, ok := itask.(IClose); ok {
|
||||
if err := iclose.Close(); err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Stop 停止任务
|
||||
func (hunter *Hunter) Stop() {
|
||||
for _, task := range hunter.tasks {
|
||||
hunter.close(task)
|
||||
}
|
||||
}
|
||||
|
||||
// AddTask 执行任务
|
||||
func (hunter *Hunter) AddTask(task ITask) {
|
||||
hunter.tasks = append(hunter.tasks, task)
|
||||
|
|
|
@ -7,7 +7,7 @@ import (
|
|||
)
|
||||
|
||||
func init() {
|
||||
log.Println("测试最好使用 docker run -p 80:80 kennethreitz/httpbin")
|
||||
log.Println("recommend: docker run -p 80:80 kennethreitz/httpbin")
|
||||
}
|
||||
|
||||
type WebGet struct {
|
||||
|
|
81
pre_base_driver_com.go
Normal file
81
pre_base_driver_com.go
Normal file
|
@ -0,0 +1,81 @@
|
|||
package hunter
|
||||
|
||||
import (
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
"github.com/474420502/requests"
|
||||
"github.com/tebeka/selenium"
|
||||
)
|
||||
|
||||
// https://github.com/tebeka/selenium
|
||||
|
||||
// PreBaseDriverUrl Task的 curl bash 预处理组件
|
||||
type PreBaseDriverUrl struct {
|
||||
PreUrl string
|
||||
Port int
|
||||
service *selenium.Service
|
||||
driver selenium.WebDriver
|
||||
}
|
||||
|
||||
// Close 如果需要在最后执行销毁操作, 继承覆盖该方法
|
||||
func (u *PreBaseDriverUrl) Close() error {
|
||||
|
||||
if u.service != nil {
|
||||
// 直接退出, 所有销毁 直接忽略webdriver.Quit(). // Delete Session
|
||||
if err := u.service.Stop(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IResponse interface {
|
||||
// Content() []byte
|
||||
// GetStatus() string
|
||||
// GetStatusCode() int
|
||||
// GetHeader() http.Header
|
||||
// GetCookie() []*http.Cookie
|
||||
|
||||
// // 返回不同的自定义的Response, 也可以是其他定义的结构体如WebDriver
|
||||
// GetResponse() interface{}
|
||||
// }
|
||||
|
||||
// Content 内容
|
||||
func (u *PreBaseDriverUrl) Content() []byte {
|
||||
content, err := u.driver.PageSource()
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
return []byte(content)
|
||||
}
|
||||
|
||||
// GetStatusCode 暂时为空
|
||||
func (u *PreBaseDriverUrl) GetStatusCode() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// GetStatus 内容 暂时为空
|
||||
func (u *PreBaseDriverUrl) GetStatus() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// GetHeader 暂时为空
|
||||
func (u *PreBaseDriverUrl) GetHeader() http.Header {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetCookie 暂时为空
|
||||
func (u *PreBaseDriverUrl) GetCookie() []*http.Cookie {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetResponse 返回 webdriver
|
||||
func (u *PreBaseDriverUrl) GetResponse() interface{} {
|
||||
return u.driver
|
||||
}
|
||||
|
||||
func (u *PreBaseDriverUrl) Hunt() (requests.IResponse, error) {
|
||||
err := u.driver.Get(string(u.PreUrl))
|
||||
return u, err
|
||||
}
|
46
pre_chrome_driver_com.go
Normal file
46
pre_chrome_driver_com.go
Normal file
|
@ -0,0 +1,46 @@
|
|||
package hunter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/Pallinder/go-randomdata"
|
||||
"github.com/tebeka/selenium"
|
||||
)
|
||||
|
||||
// PreChromeUrl Chrome的url预处理
|
||||
type PreChromeUrl struct {
|
||||
PreBaseDriverUrl
|
||||
}
|
||||
|
||||
// Before 驱动的预处理
|
||||
func (u *PreChromeUrl) Before(ctx *TaskContext) {
|
||||
|
||||
var err error
|
||||
var service *selenium.Service
|
||||
|
||||
if u.service == nil {
|
||||
for i := 0; i < 20; i++ {
|
||||
if u.Port == 0 {
|
||||
u.Port = randomdata.Number(10000, 50000)
|
||||
}
|
||||
service, err = selenium.NewChromeDriverService("chromedriver", u.Port)
|
||||
if err != nil {
|
||||
log.Println(i, err)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
u.service = service
|
||||
}
|
||||
|
||||
if u.driver == nil {
|
||||
caps := selenium.Capabilities{"browserName": "chrome"}
|
||||
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", u.Port))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
u.driver = wd
|
||||
}
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
package hunter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"runtime"
|
||||
|
||||
"github.com/474420502/requests"
|
||||
"github.com/tebeka/selenium"
|
||||
)
|
||||
|
||||
// https://github.com/tebeka/selenium
|
||||
|
||||
// PreDriverUrl Task的 curl bash 预处理组件
|
||||
type PreDriverUrl struct {
|
||||
url string
|
||||
service *selenium.Service
|
||||
driver selenium.WebDriver
|
||||
}
|
||||
|
||||
func (u *PreDriverUrl) Before(ctx *TaskContext) {
|
||||
service, err := selenium.NewChromeDriverService("chromedriver", 1030)
|
||||
if err != nil {
|
||||
log.Panic(err)
|
||||
}
|
||||
if err != nil {
|
||||
panic(err) // panic is used only as an example and is not otherwise recommended.
|
||||
}
|
||||
u.service = service
|
||||
|
||||
caps := selenium.Capabilities{"browserName": "chrome"}
|
||||
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", 1030))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
u.driver = wd
|
||||
|
||||
runtime.SetFinalizer(&[]interface{}{service, wd}, func(obj interface{}) {
|
||||
iobj := obj.([]interface{})
|
||||
service := iobj[0].(*selenium.Service)
|
||||
service.Stop()
|
||||
|
||||
wd := iobj[1].(selenium.WebDriver)
|
||||
wd.Quit()
|
||||
})
|
||||
|
||||
err = wd.Get(string(u.url))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ele, err := wd.FindElement(selenium.ByXPATH, "//title")
|
||||
log.Println(ele.Text())
|
||||
log.Println(ele.TagName())
|
||||
}
|
||||
|
||||
func (u *PreDriverUrl) Hunt() (requests.IResponse, error) {
|
||||
err := u.driver.Get(string(u.url))
|
||||
|
||||
return nil, err
|
||||
}
|
|
@ -1,9 +1,15 @@
|
|||
package hunter
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"log"
|
||||
"testing"
|
||||
|
||||
"github.com/lestrrat-go/libxml2"
|
||||
"github.com/tebeka/selenium"
|
||||
)
|
||||
|
||||
type WebPreDriverUrl struct {
|
||||
PreDriverUrl
|
||||
PreChromeUrl
|
||||
}
|
||||
|
||||
func (web *WebPreDriverUrl) Execute(cxt *TaskContext) {
|
||||
|
@ -12,9 +18,51 @@ func (web *WebPreDriverUrl) Execute(cxt *TaskContext) {
|
|||
panic(err)
|
||||
}
|
||||
cxt.SetShare("test", resp.Content())
|
||||
wd := resp.GetResponse().(selenium.WebDriver)
|
||||
ele, err := wd.FindElement(selenium.ByXPATH, "//title")
|
||||
if err != nil {
|
||||
log.Panic(err)
|
||||
}
|
||||
title, err := ele.GetAttribute("text")
|
||||
if err != nil {
|
||||
log.Panic(err)
|
||||
}
|
||||
cxt.SetShare("driver-title", title)
|
||||
}
|
||||
|
||||
func TestDriver(t *testing.T) {
|
||||
hunter := NewHunter(&WebPreDriverUrl{PreDriverUrl("http://httpbin.org")}) // first params PreCurlUrl
|
||||
preurl := &WebPreDriverUrl{}
|
||||
preurl.PreUrl = "http://httpbin.org"
|
||||
|
||||
hunter := NewHunter(preurl) // first params PreCurlUrl
|
||||
hunter.Execute()
|
||||
defer hunter.Stop()
|
||||
|
||||
content := hunter.GetShare("test").([]byte)
|
||||
if content != nil {
|
||||
doc, err := libxml2.ParseHTML(content)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
} else {
|
||||
if result, err := doc.Find("//title"); err == nil {
|
||||
iter := result.NodeIter()
|
||||
if iter.Next() {
|
||||
n := iter.Node()
|
||||
if n.TextContent() != "httpbin.org" {
|
||||
t.Error(n.TextContent())
|
||||
}
|
||||
} else {
|
||||
t.Error("can't xpath title")
|
||||
}
|
||||
} else {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
title := hunter.GetShare("driver-title").(string)
|
||||
if title != "httpbin.org" {
|
||||
t.Error(title)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user