Trie 优化 设置的前缀, 改了构建的方式

This commit is contained in:
huangsimin 2018-11-29 16:19:40 +08:00
parent c28147be31
commit 420f307ad2
7 changed files with 325 additions and 41 deletions

View File

@ -12,7 +12,7 @@ type parseFunction struct {
ExecuteFunction func(u *CURL, soption string)
ParamCURL *CURL
ParamData string
Prioty int
Priority int
}
// Execute 执行 函数
@ -29,7 +29,7 @@ func (nodes *parseQueue) Swap(i, j int) {
// Less Priority Want Less
func (nodes *parseQueue) Less(i, j int) bool {
ns := *nodes
return ns[i].Prioty < ns[j].Prioty
return ns[i].Priority < ns[j].Priority
}
// Push 实现heap.Interface接口定义的额外方法

View File

@ -7,17 +7,17 @@ import (
func TestPQueue(t *testing.T) {
PQExec := newPQueueExecute()
PQExec.Push(&parseFunction{Prioty: 5})
PQExec.Push(&parseFunction{Prioty: 10})
PQExec.Push(&parseFunction{Prioty: 4})
PQExec.Push(&parseFunction{Prioty: 4})
PQExec.Push(&parseFunction{Prioty: 20})
PQExec.Push(&parseFunction{Prioty: 10})
PQExec.Push(&parseFunction{Prioty: 15})
PQExec.Push(&parseFunction{Priority: 5})
PQExec.Push(&parseFunction{Priority: 10})
PQExec.Push(&parseFunction{Priority: 4})
PQExec.Push(&parseFunction{Priority: 4})
PQExec.Push(&parseFunction{Priority: 20})
PQExec.Push(&parseFunction{Priority: 10})
PQExec.Push(&parseFunction{Priority: 15})
content := ""
for PQExec.Len() > 0 {
content += strconv.Itoa(PQExec.Pop().Prioty)
content += strconv.Itoa(PQExec.Pop().Priority)
content += " "
}
if content != "4 4 5 10 10 15 20 " {

67
option.go Normal file
View File

@ -0,0 +1,67 @@
package curl2info
func init() {
optionTrie = NewTrie()
oelist := []*optionExecute{
{"-H", 10, parseHeader, nil},
{"-X", 10, parseOptX, nil},
{"-A", 15, parseUserAgent, &extract{re: "^-A +(.+)", execute: extractData}},
{"-I", 15, parseOptI, nil},
{"-d", 10, parseBodyASCII, &extract{re: "^-d +(.+)", execute: extractData}},
{"-u", 15, parseUser, &extract{re: "^-u +(.+)", execute: extractData}},
{"-k", 15, parseInsecure, nil},
// Body
{"--data", 10, parseBodyASCII, &extract{re: "--data +(.+)", execute: extractData}},
{"--data-urlencode", 10, parseBodyURLEncode, &extract{re: "--data-urlencode +(.+)", execute: extractData}},
{"--data-binary", 10, parseBodyBinary, &extract{re: "--data-binary +(.+)", execute: extractData}},
{"--data-ascii", 10, parseBodyASCII, &extract{re: "--data-ascii +(.+)", execute: extractData}},
{"--data-raw", 10, parseBodyRaw, &extract{re: "--data-raw +(.+)", execute: extractData}},
//"--"
{"--header", 10, parseHeader, nil},
{"--insecure", 15, parseInsecure, nil},
{"--call", 10, parseCallBack, &extract{re: "--call +(.+)", execute: extractData}},
{"--user-agent", 15, parseUserAgent, &extract{re: "--user-agent +(.+)", execute: extractData}},
{"--user", 15, parseUser, &extract{re: "--user +(.+)", execute: extractData}},
{"--connect-timeout", 15, parseTimeout, &extract{re: "--connect-timeout +(.+)", execute: extractData}},
}
for _, oe := range oelist {
optionTrie.Insert(oe)
}
// log.Println("support options:", optionTrie.AllWords())
}
// extract 用于提取设置的数据
type extract struct {
re string
execute func(re, soption string) string
}
func (et *extract) Execute(soption string) string {
return et.execute(et.re, soption)
}
// OptionTrie 设置的前缀树
var optionTrie *Trie
type optionExecute struct {
Prefix string
Priority int
Execute func(*CURL, string) // 执行函数
Extract *extract // 提取的方法结构与参数
}
func (oe *optionExecute) GetWord() string {
return oe.Prefix + " "
}
func (oe *optionExecute) BuildFunction(curl *CURL, soption string) *parseFunction {
data := soption
if oe.Extract != nil {
data = oe.Extract.Execute(data)
}
return &parseFunction{ParamCURL: curl, ParamData: data, ExecuteFunction: oe.Execute, Priority: oe.Priority}
}

View File

@ -99,7 +99,39 @@ func (curl *CURL) CreateWorkflow(ses *requests.Session) *requests.Workflow {
return wf
}
// ParseRawCURL curl_bash
func init() {
optionTrie = NewTrie()
oelist := []*optionExecute{
{"-H", 10, parseHeader, nil},
{"-X", 10, parseOptX, nil},
{"-A", 15, parseUserAgent, &extract{re: "^-A +(.+)", execute: extractData}},
{"-I", 15, parseOptI, nil},
{"-d", 10, parseBodyASCII, &extract{re: "^-d +(.+)", execute: extractData}},
{"-u", 15, parseUser, &extract{re: "^-u +(.+)", execute: extractData}},
{"-k", 15, parseInsecure, nil},
// Body
{"--data", 10, parseBodyASCII, &extract{re: "--data +(.+)", execute: extractData}},
{"--data-urlencode", 10, parseBodyURLEncode, &extract{re: "--data-urlencode +(.+)", execute: extractData}},
{"--data-binary", 10, parseBodyBinary, &extract{re: "--data-binary +(.+)", execute: extractData}},
{"--data-ascii", 10, parseBodyASCII, &extract{re: "--data-ascii +(.+)", execute: extractData}},
{"--data-raw", 10, parseBodyRaw, &extract{re: "--data-raw +(.+)", execute: extractData}},
//"--"
{"--header", 10, parseHeader, nil},
{"--insecure", 15, parseInsecure, nil},
{"--call", 10, parseCallBack, &extract{re: "--call +(.+)", execute: extractData}},
{"--user-agent", 15, parseUserAgent, &extract{re: "--user-agent +(.+)", execute: extractData}},
{"--user", 15, parseUser, &extract{re: "--user +(.+)", execute: extractData}},
{"--connect-timeout", 15, parseTimeout, &extract{re: "--connect-timeout +(.+)", execute: extractData}},
}
for _, oe := range oelist {
optionTrie.Insert(oe)
}
log.Println("support options:", optionTrie.AllWords())
}
// ParseRawCURL curl_bash 可以用trie改进 没空改
func ParseRawCURL(scurl string) (cURL *CURL, err error) {
defer func() {
@ -110,20 +142,17 @@ func ParseRawCURL(scurl string) (cURL *CURL, err error) {
}()
executor := newPQueueExecute()
curl := NewCURL()
if scurl[0] == '"' && scurl[len(scurl)-1] == '"' {
scurl = strings.Trim(scurl, `"`)
scurl = strings.TrimSpace(scurl)
} else if scurl[0] == '\'' && scurl[len(scurl)-1] == '\'' {
scurl = strings.Trim(scurl, `'`)
scurl = strings.TrimSpace(scurl)
} else {
scurl = strings.TrimSpace(scurl)
}
scurl = strings.TrimSpace(scurl)
scurl = strings.TrimLeft(scurl, "curl")
mathches := regexp.MustCompile(`--[^ ]+ +'[^']+'|--[^ ]+ +[^ ]+|-[A-Za-z] +'[^']+'|-[A-Za-z] +[^ ]+| '[^']+'|--[a-z]+ {0,}`).FindAllString(scurl, -1)
for _, m := range mathches {
m = strings.TrimSpace(m)
@ -135,7 +164,7 @@ func ParseRawCURL(scurl string) (cURL *CURL, err error) {
}
curl.ParsedURL = purl
case '-':
exec := judgeAndParseOptions(curl, m)
exec := judgeOptions(curl, m)
if exec != nil {
executor.Push(exec)
}
@ -154,27 +183,38 @@ func ParseRawCURL(scurl string) (cURL *CURL, err error) {
return curl, nil
}
func judgeOptions(u *CURL, soption string) *parseFunction {
word := TrieStrWord(soption)
if ioe := optionTrie.SearchMostPrefix(&word); ioe != nil {
oe := ioe.(*optionExecute)
return oe.BuildFunction(u, soption)
}
log.Println(soption, " no haved this option")
return nil
}
func judgeAndParseOptions(u *CURL, soption string) *parseFunction {
switch prefix := soption[0:2]; prefix {
case "-H":
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseHeader, Prioty: 10}
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseHeader, Priority: 10}
case "-X":
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseOptX, Prioty: 10}
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseOptX, Priority: 10}
case "-A": // User-Agent 先后顺序的问题
data := extractData("^-A +(.+)", soption)
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUserAgent, Prioty: 15}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUserAgent, Priority: 15}
case "-I":
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseOptI, Prioty: 15}
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseOptI, Priority: 15}
case "--":
return parseLongOption(u, soption)
case "-d":
data := extractData("^-d +(.+)", soption)
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Prioty: 10}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Priority: 10}
case "-u":
data := extractData("^-u +(.+)", soption)
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUser, Prioty: 15}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUser, Priority: 15}
case "-k": // -k, --insecure Allow insecure server connections when using SSL
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseInsecure, Prioty: 15}
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseInsecure, Priority: 15}
}
return nil
}
@ -193,39 +233,35 @@ func parseLongOption(u *CURL, soption string) *parseFunction {
dtype := datas[1]
data := strings.Trim(datas[2], "'")
if u.Method != "" {
u.Method = "POST"
}
switch dtype {
case "binary":
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyBinary, Prioty: 10}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyBinary, Priority: 10}
case "ascii":
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Prioty: 10}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Priority: 10}
case "raw":
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyRaw, Prioty: 10}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyRaw, Priority: 10}
case "urlencode":
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyURLEncode, Prioty: 10}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyURLEncode, Priority: 10}
case "data":
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Prioty: 10}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Priority: 10}
}
case regexp.MustCompile("^--header").MatchString(soption):
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseHeader, Prioty: 10}
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseHeader, Priority: 10}
case regexp.MustCompile("^--call").MatchString(soption):
data := extractData("^--call +(.+)", soption)
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseCallBack, Prioty: 10}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseCallBack, Priority: 10}
case regexp.MustCompile("^--user-agent").MatchString(soption):
data := extractData("^--user-agent +(.+)", soption)
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUserAgent, Prioty: 15}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUserAgent, Priority: 15}
case regexp.MustCompile("^--user").MatchString(soption):
data := extractData("^--user +(.+)", soption)
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUser, Prioty: 15}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUser, Priority: 15}
case regexp.MustCompile("^--insecure").MatchString(soption):
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseInsecure, Prioty: 15}
return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseInsecure, Priority: 15}
case regexp.MustCompile("^--connect-timeout").MatchString(soption):
data := extractData("^--connect-timeout +(.+)", soption)
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseTimeout, Prioty: 15}
return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseTimeout, Priority: 15}
}
log.Println("can't parseOption", soption)
@ -273,16 +309,28 @@ func parseOptX(u *CURL, soption string) {
}
func parseBodyURLEncode(u *CURL, data string) {
if u.Method != "" {
u.Method = "POST"
}
u.Body.SetPrefix(requests.TypeURLENCODED)
u.Body.SetIOBody(data)
}
func parseBodyRaw(u *CURL, data string) {
if u.Method != "" {
u.Method = "POST"
}
u.Body.SetPrefix(requests.TypeURLENCODED)
u.Body.SetIOBody(data)
}
func parseBodyASCII(u *CURL, data string) {
if u.Method != "" {
u.Method = "POST"
}
u.Body.SetPrefix(requests.TypeURLENCODED)
if data[0] != '@' {
@ -304,6 +352,10 @@ func parseBodyASCII(u *CURL, data string) {
// 处理@ 并且替/r/n符号
func parseBodyBinary(u *CURL, data string) {
if u.Method != "" {
u.Method = "POST"
}
u.Body.SetPrefix(requests.TypeURLENCODED)
if data[0] != '@' {

View File

@ -3,6 +3,7 @@ package curl2info
import (
"log"
"reflect"
"regexp"
"testing"
)
@ -47,7 +48,10 @@ func TestTouTiaoCURL(t *testing.T) {
if err != nil {
t.Error(err)
}
t.Log(curl, "Content:\n", resp.Content())
if !regexp.MustCompile(`"data".*comment_count.*"context"`).Match([]byte(resp.Content())) {
t.Error(curl, resp.Content())
}
}
func TestErrorCurl(t *testing.T) {
@ -63,7 +67,10 @@ func TestErrorCurl(t *testing.T) {
if err != nil {
t.Error(err)
}
t.Log("Content:\n", resp.Content(), reflect.TypeOf(curl.Body.GetIOBody()))
if !regexp.MustCompile(`App Growing`).Match([]byte(resp.Content())) {
t.Error(resp.Content(), curl, reflect.TypeOf(curl.Body.GetIOBody()))
}
}
func TestCurlTimeout(t *testing.T) {

157
structure.go Normal file
View File

@ -0,0 +1,157 @@
package curl2info
// TrieWord Trie 需要的Word接口
type TrieWord interface {
GetWord() string
}
// TrieStrWord 最简单的TrieWord 结构
type TrieStrWord string
// GetWord 获取单词
func (tsw *TrieStrWord) GetWord() string {
return (string)(*tsw)
}
// Trie 前缀树
type Trie struct {
isWord bool
value interface{}
char byte
prev *Trie
next map[byte]*Trie
}
// NewTrie Initialize your data structure here.
func NewTrie() *Trie {
return &Trie{next: make(map[byte]*Trie)}
}
// Insert a word into the trie.
func (trie *Trie) Insert(iword TrieWord) {
cur := trie
word := iword.GetWord()
l := len(word)
for i := 0; i < l; i++ {
c := word[i]
if next, ok := cur.next[c]; ok {
cur = next
} else {
create := NewTrie()
cur.next[c] = create
create.char = c
create.prev = cur
cur = create
}
}
cur.isWord = true
cur.value = iword
}
// AllWords 所有单词
func (trie *Trie) AllWords() []string {
var result []string
for _, v := range trie.next {
look(v, "", &result)
}
return result
}
func look(cur *Trie, content string, result *[]string) {
content += string(cur.char)
if cur.isWord {
*result = append(*result, content)
}
for _, v := range cur.next {
look(v, content, result)
}
}
// Remove 移除单词
func (trie *Trie) Remove(word string) {
cur := trie
l := len(word)
for i := 0; i < l; i++ {
c := word[i]
if next, ok := cur.next[c]; ok {
cur = next
} else {
return
}
}
if cur != nil {
cur.isWord = false
cur.value = nil
lastchar := cur.char
if len(cur.next) == 0 {
for cur.isWord != true && cur.prev != nil {
lastchar = cur.char
cur = cur.prev
if len(cur.next) > 1 {
return
}
}
delete(cur.next, lastchar)
}
}
}
// SearchMostPrefix Returns if the word is in the trie.
func (trie *Trie) SearchMostPrefix(iword TrieWord) interface{} {
cur := trie
word := iword.GetWord()
l := len(word)
var result interface{}
for i := 0; i < l; i++ {
c := word[i]
if next, ok := cur.next[c]; ok {
cur = next
if cur.isWord {
result = cur.value
}
} else {
return result
}
}
return result
}
// Match Returns if the word is in the trie.
func (trie *Trie) Match(iword TrieWord) interface{} {
cur := trie
word := iword.GetWord()
l := len(word)
for i := 0; i < l; i++ {
c := word[i]
if next, ok := cur.next[c]; ok {
cur = next
} else {
return nil
}
}
return cur.value
}
// StartsWith Returns if there is any word in the trie that starts with the given prefix. */
func (trie *Trie) StartsWith(prefix string) bool {
cur := trie
l := len(prefix)
for i := 0; i < l; i++ {
c := prefix[i]
if next, ok := cur.next[c]; ok {
cur = next
} else {
return false
}
}
return true
}

1
structure_test.go Normal file
View File

@ -0,0 +1 @@
package curl2info