完成chrome 组件的例子. TODO: id_com. --> crash 重启

This commit is contained in:
huangsimin 2020-04-13 18:04:13 +08:00
parent 9c7f62d759
commit 056877030f
9 changed files with 206 additions and 73 deletions

3
go.mod
View File

@ -6,5 +6,8 @@ require (
github.com/474420502/focus v0.8.1
github.com/474420502/gurl v0.0.2
github.com/474420502/requests v1.5.0
github.com/Pallinder/go-randomdata v1.1.0
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
github.com/pkg/errors v0.9.1 // indirect
github.com/tebeka/selenium v0.9.9
)

5
go.sum
View File

@ -15,6 +15,7 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOC
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnROylmoQL57iVUL57wGKTR5O6KpVnbm2tA=
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k=
github.com/Pallinder/go-randomdata v1.1.0 h1:gUubB1IEUliFmzjqjhf+bgkg1o6uoFIkRsP3VrhEcx8=
github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
@ -50,6 +51,10 @@ github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5m
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb h1:qqNmX9V9n4byPp7LUvUf7CPhMPYO9ol4ElpbD3DgzuY=
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb/go.mod h1:fy/ZVbgyB83mtricxwSW3zqIRXWOVpKG2PvdUDFeC58=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc=

View File

@ -1,6 +1,7 @@
package hunter
import (
"log"
"strconv"
pqueue "github.com/474420502/focus/priority_queue"
@ -50,13 +51,6 @@ func NewPriorityHunter(queueCreator func() *pqueue.PriorityQueue) *Hunter {
hunter := &Hunter{}
hunter.createQueue = queueCreator
// hunter.task = &BaseTask{}
// hunter.task.SetParent(nil)
// hunter.task.SetChildren(hunter.createQueue())
// hunter.cxt = NewContext()
// hunter.cxt.curNode = hunter.task
hunter.share = make(map[string]interface{})
return hunter
}
@ -156,10 +150,22 @@ func (hunter *Hunter) recursionTasks(cxt *TaskContext) {
}
// Stop 停止任务
func (hunter *Hunter) Stop() {
func (hunter *Hunter) close(itask ITask) {
if iclose, ok := itask.(IClose); ok {
if err := iclose.Close(); err != nil {
log.Println(err)
}
}
}
// Stop 停止任务
func (hunter *Hunter) Stop() {
for _, task := range hunter.tasks {
hunter.close(task)
}
}
// AddTask 执行任务
func (hunter *Hunter) AddTask(task ITask) {
hunter.tasks = append(hunter.tasks, task)

View File

@ -7,7 +7,7 @@ import (
)
func init() {
log.Println("测试最好使用 docker run -p 80:80 kennethreitz/httpbin")
log.Println("recommend: docker run -p 80:80 kennethreitz/httpbin")
}
type WebGet struct {

81
pre_base_driver_com.go Normal file
View File

@ -0,0 +1,81 @@
package hunter
import (
"log"
"net/http"
"github.com/474420502/requests"
"github.com/tebeka/selenium"
)
// https://github.com/tebeka/selenium
// PreBaseDriverUrl Task的 curl bash 预处理组件
type PreBaseDriverUrl struct {
PreUrl string
Port int
service *selenium.Service
driver selenium.WebDriver
}
// Close 如果需要在最后执行销毁操作, 继承覆盖该方法
func (u *PreBaseDriverUrl) Close() error {
if u.service != nil {
// 直接退出, 所有销毁 直接忽略webdriver.Quit(). // Delete Session
if err := u.service.Stop(); err != nil {
return err
}
}
return nil
}
// IResponse interface {
// Content() []byte
// GetStatus() string
// GetStatusCode() int
// GetHeader() http.Header
// GetCookie() []*http.Cookie
// // 返回不同的自定义的Response, 也可以是其他定义的结构体如WebDriver
// GetResponse() interface{}
// }
// Content 内容
func (u *PreBaseDriverUrl) Content() []byte {
content, err := u.driver.PageSource()
if err != nil {
log.Println(err)
}
return []byte(content)
}
// GetStatusCode 暂时为空
func (u *PreBaseDriverUrl) GetStatusCode() int {
return 0
}
// GetStatus 内容 暂时为空
func (u *PreBaseDriverUrl) GetStatus() string {
return ""
}
// GetHeader 暂时为空
func (u *PreBaseDriverUrl) GetHeader() http.Header {
return nil
}
// GetCookie 暂时为空
func (u *PreBaseDriverUrl) GetCookie() []*http.Cookie {
return nil
}
// GetResponse 返回 webdriver
func (u *PreBaseDriverUrl) GetResponse() interface{} {
return u.driver
}
func (u *PreBaseDriverUrl) Hunt() (requests.IResponse, error) {
err := u.driver.Get(string(u.PreUrl))
return u, err
}

46
pre_chrome_driver_com.go Normal file
View File

@ -0,0 +1,46 @@
package hunter
import (
"fmt"
"log"
"github.com/Pallinder/go-randomdata"
"github.com/tebeka/selenium"
)
// PreChromeUrl Chrome的url预处理
type PreChromeUrl struct {
PreBaseDriverUrl
}
// Before 驱动的预处理
func (u *PreChromeUrl) Before(ctx *TaskContext) {
var err error
var service *selenium.Service
if u.service == nil {
for i := 0; i < 20; i++ {
if u.Port == 0 {
u.Port = randomdata.Number(10000, 50000)
}
service, err = selenium.NewChromeDriverService("chromedriver", u.Port)
if err != nil {
log.Println(i, err)
} else {
break
}
}
u.service = service
}
if u.driver == nil {
caps := selenium.Capabilities{"browserName": "chrome"}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", u.Port))
if err != nil {
panic(err)
}
u.driver = wd
}
}

View File

@ -1,61 +0,0 @@
package hunter
import (
"fmt"
"log"
"runtime"
"github.com/474420502/requests"
"github.com/tebeka/selenium"
)
// https://github.com/tebeka/selenium
// PreDriverUrl Task的 curl bash 预处理组件
type PreDriverUrl struct {
url string
service *selenium.Service
driver selenium.WebDriver
}
func (u *PreDriverUrl) Before(ctx *TaskContext) {
service, err := selenium.NewChromeDriverService("chromedriver", 1030)
if err != nil {
log.Panic(err)
}
if err != nil {
panic(err) // panic is used only as an example and is not otherwise recommended.
}
u.service = service
caps := selenium.Capabilities{"browserName": "chrome"}
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", 1030))
if err != nil {
panic(err)
}
u.driver = wd
runtime.SetFinalizer(&[]interface{}{service, wd}, func(obj interface{}) {
iobj := obj.([]interface{})
service := iobj[0].(*selenium.Service)
service.Stop()
wd := iobj[1].(selenium.WebDriver)
wd.Quit()
})
err = wd.Get(string(u.url))
if err != nil {
panic(err)
}
ele, err := wd.FindElement(selenium.ByXPATH, "//title")
log.Println(ele.Text())
log.Println(ele.TagName())
}
func (u *PreDriverUrl) Hunt() (requests.IResponse, error) {
err := u.driver.Get(string(u.url))
return nil, err
}

View File

@ -1,9 +1,15 @@
package hunter
import "testing"
import (
"log"
"testing"
"github.com/lestrrat-go/libxml2"
"github.com/tebeka/selenium"
)
type WebPreDriverUrl struct {
PreDriverUrl
PreChromeUrl
}
func (web *WebPreDriverUrl) Execute(cxt *TaskContext) {
@ -12,9 +18,51 @@ func (web *WebPreDriverUrl) Execute(cxt *TaskContext) {
panic(err)
}
cxt.SetShare("test", resp.Content())
wd := resp.GetResponse().(selenium.WebDriver)
ele, err := wd.FindElement(selenium.ByXPATH, "//title")
if err != nil {
log.Panic(err)
}
title, err := ele.GetAttribute("text")
if err != nil {
log.Panic(err)
}
cxt.SetShare("driver-title", title)
}
func TestDriver(t *testing.T) {
hunter := NewHunter(&WebPreDriverUrl{PreDriverUrl("http://httpbin.org")}) // first params PreCurlUrl
preurl := &WebPreDriverUrl{}
preurl.PreUrl = "http://httpbin.org"
hunter := NewHunter(preurl) // first params PreCurlUrl
hunter.Execute()
defer hunter.Stop()
content := hunter.GetShare("test").([]byte)
if content != nil {
doc, err := libxml2.ParseHTML(content)
if err != nil {
t.Error(err)
} else {
if result, err := doc.Find("//title"); err == nil {
iter := result.NodeIter()
if iter.Next() {
n := iter.Node()
if n.TextContent() != "httpbin.org" {
t.Error(n.TextContent())
}
} else {
t.Error("can't xpath title")
}
} else {
t.Error(err)
}
}
}
title := hunter.GetShare("driver-title").(string)
if title != "httpbin.org" {
t.Error(title)
}
}

View File

@ -29,6 +29,11 @@ type IIdentity interface {
GetID() string
}
// IClose 关闭
type IClose interface {
Close() error
}
// ITaskNode 任务节点
type ITaskNode interface {
Parent() ITaskNode