From 420f307ad2a2fdabafd567ef9a070046abcd8322 Mon Sep 17 00:00:00 2001 From: huangsimin Date: Thu, 29 Nov 2018 16:19:40 +0800 Subject: [PATCH] =?UTF-8?q?Trie=20=E4=BC=98=E5=8C=96=20=E8=AE=BE=E7=BD=AE?= =?UTF-8?q?=E7=9A=84=E5=89=8D=E7=BC=80,=20=E6=94=B9=E4=BA=86=E6=9E=84?= =?UTF-8?q?=E5=BB=BA=E7=9A=84=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base.go | 4 +- base_test.go | 16 ++--- option.go | 67 +++++++++++++++++++ parse_curl.go | 110 ++++++++++++++++++++++--------- parse_curl_test.go | 11 +++- structure.go | 157 +++++++++++++++++++++++++++++++++++++++++++++ structure_test.go | 1 + 7 files changed, 325 insertions(+), 41 deletions(-) create mode 100644 option.go create mode 100644 structure.go create mode 100644 structure_test.go diff --git a/base.go b/base.go index a3585a8..38ae84b 100644 --- a/base.go +++ b/base.go @@ -12,7 +12,7 @@ type parseFunction struct { ExecuteFunction func(u *CURL, soption string) ParamCURL *CURL ParamData string - Prioty int + Priority int } // Execute 执行 函数 @@ -29,7 +29,7 @@ func (nodes *parseQueue) Swap(i, j int) { // Less Priority Want Less func (nodes *parseQueue) Less(i, j int) bool { ns := *nodes - return ns[i].Prioty < ns[j].Prioty + return ns[i].Priority < ns[j].Priority } // Push 实现heap.Interface接口定义的额外方法 diff --git a/base_test.go b/base_test.go index 02956be..320dbc0 100644 --- a/base_test.go +++ b/base_test.go @@ -7,17 +7,17 @@ import ( func TestPQueue(t *testing.T) { PQExec := newPQueueExecute() - PQExec.Push(&parseFunction{Prioty: 5}) - PQExec.Push(&parseFunction{Prioty: 10}) - PQExec.Push(&parseFunction{Prioty: 4}) - PQExec.Push(&parseFunction{Prioty: 4}) - PQExec.Push(&parseFunction{Prioty: 20}) - PQExec.Push(&parseFunction{Prioty: 10}) - PQExec.Push(&parseFunction{Prioty: 15}) + PQExec.Push(&parseFunction{Priority: 5}) + PQExec.Push(&parseFunction{Priority: 10}) + PQExec.Push(&parseFunction{Priority: 4}) + PQExec.Push(&parseFunction{Priority: 4}) + PQExec.Push(&parseFunction{Priority: 20}) + PQExec.Push(&parseFunction{Priority: 10}) + PQExec.Push(&parseFunction{Priority: 15}) content := "" for PQExec.Len() > 0 { - content += strconv.Itoa(PQExec.Pop().Prioty) + content += strconv.Itoa(PQExec.Pop().Priority) content += " " } if content != "4 4 5 10 10 15 20 " { diff --git a/option.go b/option.go new file mode 100644 index 0000000..280badf --- /dev/null +++ b/option.go @@ -0,0 +1,67 @@ +package curl2info + +func init() { + optionTrie = NewTrie() + oelist := []*optionExecute{ + {"-H", 10, parseHeader, nil}, + {"-X", 10, parseOptX, nil}, + {"-A", 15, parseUserAgent, &extract{re: "^-A +(.+)", execute: extractData}}, + {"-I", 15, parseOptI, nil}, + {"-d", 10, parseBodyASCII, &extract{re: "^-d +(.+)", execute: extractData}}, + {"-u", 15, parseUser, &extract{re: "^-u +(.+)", execute: extractData}}, + {"-k", 15, parseInsecure, nil}, + // Body + {"--data", 10, parseBodyASCII, &extract{re: "--data +(.+)", execute: extractData}}, + {"--data-urlencode", 10, parseBodyURLEncode, &extract{re: "--data-urlencode +(.+)", execute: extractData}}, + {"--data-binary", 10, parseBodyBinary, &extract{re: "--data-binary +(.+)", execute: extractData}}, + {"--data-ascii", 10, parseBodyASCII, &extract{re: "--data-ascii +(.+)", execute: extractData}}, + {"--data-raw", 10, parseBodyRaw, &extract{re: "--data-raw +(.+)", execute: extractData}}, + //"--" + {"--header", 10, parseHeader, nil}, + {"--insecure", 15, parseInsecure, nil}, + {"--call", 10, parseCallBack, &extract{re: "--call +(.+)", execute: extractData}}, + {"--user-agent", 15, parseUserAgent, &extract{re: "--user-agent +(.+)", execute: extractData}}, + {"--user", 15, parseUser, &extract{re: "--user +(.+)", execute: extractData}}, + {"--connect-timeout", 15, parseTimeout, &extract{re: "--connect-timeout +(.+)", execute: extractData}}, + } + + for _, oe := range oelist { + optionTrie.Insert(oe) + } + + // log.Println("support options:", optionTrie.AllWords()) +} + +// extract 用于提取设置的数据 +type extract struct { + re string + execute func(re, soption string) string +} + +func (et *extract) Execute(soption string) string { + return et.execute(et.re, soption) +} + +// OptionTrie 设置的前缀树 +var optionTrie *Trie + +type optionExecute struct { + Prefix string + + Priority int + + Execute func(*CURL, string) // 执行函数 + Extract *extract // 提取的方法结构与参数 +} + +func (oe *optionExecute) GetWord() string { + return oe.Prefix + " " +} + +func (oe *optionExecute) BuildFunction(curl *CURL, soption string) *parseFunction { + data := soption + if oe.Extract != nil { + data = oe.Extract.Execute(data) + } + return &parseFunction{ParamCURL: curl, ParamData: data, ExecuteFunction: oe.Execute, Priority: oe.Priority} +} diff --git a/parse_curl.go b/parse_curl.go index 9c56913..b1fadc9 100644 --- a/parse_curl.go +++ b/parse_curl.go @@ -99,7 +99,39 @@ func (curl *CURL) CreateWorkflow(ses *requests.Session) *requests.Workflow { return wf } -// ParseRawCURL curl_bash +func init() { + optionTrie = NewTrie() + oelist := []*optionExecute{ + {"-H", 10, parseHeader, nil}, + {"-X", 10, parseOptX, nil}, + {"-A", 15, parseUserAgent, &extract{re: "^-A +(.+)", execute: extractData}}, + {"-I", 15, parseOptI, nil}, + {"-d", 10, parseBodyASCII, &extract{re: "^-d +(.+)", execute: extractData}}, + {"-u", 15, parseUser, &extract{re: "^-u +(.+)", execute: extractData}}, + {"-k", 15, parseInsecure, nil}, + // Body + {"--data", 10, parseBodyASCII, &extract{re: "--data +(.+)", execute: extractData}}, + {"--data-urlencode", 10, parseBodyURLEncode, &extract{re: "--data-urlencode +(.+)", execute: extractData}}, + {"--data-binary", 10, parseBodyBinary, &extract{re: "--data-binary +(.+)", execute: extractData}}, + {"--data-ascii", 10, parseBodyASCII, &extract{re: "--data-ascii +(.+)", execute: extractData}}, + {"--data-raw", 10, parseBodyRaw, &extract{re: "--data-raw +(.+)", execute: extractData}}, + //"--" + {"--header", 10, parseHeader, nil}, + {"--insecure", 15, parseInsecure, nil}, + {"--call", 10, parseCallBack, &extract{re: "--call +(.+)", execute: extractData}}, + {"--user-agent", 15, parseUserAgent, &extract{re: "--user-agent +(.+)", execute: extractData}}, + {"--user", 15, parseUser, &extract{re: "--user +(.+)", execute: extractData}}, + {"--connect-timeout", 15, parseTimeout, &extract{re: "--connect-timeout +(.+)", execute: extractData}}, + } + + for _, oe := range oelist { + optionTrie.Insert(oe) + } + + log.Println("support options:", optionTrie.AllWords()) +} + +// ParseRawCURL curl_bash 可以用trie改进 没空改 func ParseRawCURL(scurl string) (cURL *CURL, err error) { defer func() { @@ -110,20 +142,17 @@ func ParseRawCURL(scurl string) (cURL *CURL, err error) { }() executor := newPQueueExecute() - curl := NewCURL() if scurl[0] == '"' && scurl[len(scurl)-1] == '"' { scurl = strings.Trim(scurl, `"`) - scurl = strings.TrimSpace(scurl) } else if scurl[0] == '\'' && scurl[len(scurl)-1] == '\'' { scurl = strings.Trim(scurl, `'`) - scurl = strings.TrimSpace(scurl) - } else { - scurl = strings.TrimSpace(scurl) } + scurl = strings.TrimSpace(scurl) scurl = strings.TrimLeft(scurl, "curl") + mathches := regexp.MustCompile(`--[^ ]+ +'[^']+'|--[^ ]+ +[^ ]+|-[A-Za-z] +'[^']+'|-[A-Za-z] +[^ ]+| '[^']+'|--[a-z]+ {0,}`).FindAllString(scurl, -1) for _, m := range mathches { m = strings.TrimSpace(m) @@ -135,7 +164,7 @@ func ParseRawCURL(scurl string) (cURL *CURL, err error) { } curl.ParsedURL = purl case '-': - exec := judgeAndParseOptions(curl, m) + exec := judgeOptions(curl, m) if exec != nil { executor.Push(exec) } @@ -154,27 +183,38 @@ func ParseRawCURL(scurl string) (cURL *CURL, err error) { return curl, nil } +func judgeOptions(u *CURL, soption string) *parseFunction { + word := TrieStrWord(soption) + if ioe := optionTrie.SearchMostPrefix(&word); ioe != nil { + oe := ioe.(*optionExecute) + return oe.BuildFunction(u, soption) + } + + log.Println(soption, " no haved this option") + return nil +} + func judgeAndParseOptions(u *CURL, soption string) *parseFunction { switch prefix := soption[0:2]; prefix { case "-H": - return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseHeader, Prioty: 10} + return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseHeader, Priority: 10} case "-X": - return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseOptX, Prioty: 10} + return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseOptX, Priority: 10} case "-A": // User-Agent 先后顺序的问题 data := extractData("^-A +(.+)", soption) - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUserAgent, Prioty: 15} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUserAgent, Priority: 15} case "-I": - return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseOptI, Prioty: 15} + return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseOptI, Priority: 15} case "--": return parseLongOption(u, soption) case "-d": data := extractData("^-d +(.+)", soption) - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Prioty: 10} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Priority: 10} case "-u": data := extractData("^-u +(.+)", soption) - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUser, Prioty: 15} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUser, Priority: 15} case "-k": // -k, --insecure Allow insecure server connections when using SSL - return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseInsecure, Prioty: 15} + return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseInsecure, Priority: 15} } return nil } @@ -193,39 +233,35 @@ func parseLongOption(u *CURL, soption string) *parseFunction { dtype := datas[1] data := strings.Trim(datas[2], "'") - if u.Method != "" { - u.Method = "POST" - } - switch dtype { case "binary": - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyBinary, Prioty: 10} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyBinary, Priority: 10} case "ascii": - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Prioty: 10} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Priority: 10} case "raw": - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyRaw, Prioty: 10} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyRaw, Priority: 10} case "urlencode": - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyURLEncode, Prioty: 10} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyURLEncode, Priority: 10} case "data": - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Prioty: 10} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseBodyASCII, Priority: 10} } case regexp.MustCompile("^--header").MatchString(soption): - return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseHeader, Prioty: 10} + return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseHeader, Priority: 10} case regexp.MustCompile("^--call").MatchString(soption): data := extractData("^--call +(.+)", soption) - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseCallBack, Prioty: 10} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseCallBack, Priority: 10} case regexp.MustCompile("^--user-agent").MatchString(soption): data := extractData("^--user-agent +(.+)", soption) - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUserAgent, Prioty: 15} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUserAgent, Priority: 15} case regexp.MustCompile("^--user").MatchString(soption): data := extractData("^--user +(.+)", soption) - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUser, Prioty: 15} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseUser, Priority: 15} case regexp.MustCompile("^--insecure").MatchString(soption): - return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseInsecure, Prioty: 15} + return &parseFunction{ParamCURL: u, ParamData: soption, ExecuteFunction: parseInsecure, Priority: 15} case regexp.MustCompile("^--connect-timeout").MatchString(soption): data := extractData("^--connect-timeout +(.+)", soption) - return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseTimeout, Prioty: 15} + return &parseFunction{ParamCURL: u, ParamData: data, ExecuteFunction: parseTimeout, Priority: 15} } log.Println("can't parseOption", soption) @@ -273,16 +309,28 @@ func parseOptX(u *CURL, soption string) { } func parseBodyURLEncode(u *CURL, data string) { + if u.Method != "" { + u.Method = "POST" + } + u.Body.SetPrefix(requests.TypeURLENCODED) u.Body.SetIOBody(data) } func parseBodyRaw(u *CURL, data string) { + if u.Method != "" { + u.Method = "POST" + } + u.Body.SetPrefix(requests.TypeURLENCODED) u.Body.SetIOBody(data) } func parseBodyASCII(u *CURL, data string) { + if u.Method != "" { + u.Method = "POST" + } + u.Body.SetPrefix(requests.TypeURLENCODED) if data[0] != '@' { @@ -304,6 +352,10 @@ func parseBodyASCII(u *CURL, data string) { // 处理@ 并且替/r/n符号 func parseBodyBinary(u *CURL, data string) { + if u.Method != "" { + u.Method = "POST" + } + u.Body.SetPrefix(requests.TypeURLENCODED) if data[0] != '@' { diff --git a/parse_curl_test.go b/parse_curl_test.go index 952dd27..78cb979 100644 --- a/parse_curl_test.go +++ b/parse_curl_test.go @@ -3,6 +3,7 @@ package curl2info import ( "log" "reflect" + "regexp" "testing" ) @@ -47,7 +48,10 @@ func TestTouTiaoCURL(t *testing.T) { if err != nil { t.Error(err) } - t.Log(curl, "Content:\n", resp.Content()) + + if !regexp.MustCompile(`"data".*comment_count.*"context"`).Match([]byte(resp.Content())) { + t.Error(curl, resp.Content()) + } } func TestErrorCurl(t *testing.T) { @@ -63,7 +67,10 @@ func TestErrorCurl(t *testing.T) { if err != nil { t.Error(err) } - t.Log("Content:\n", resp.Content(), reflect.TypeOf(curl.Body.GetIOBody())) + + if !regexp.MustCompile(`App Growing`).Match([]byte(resp.Content())) { + t.Error(resp.Content(), curl, reflect.TypeOf(curl.Body.GetIOBody())) + } } func TestCurlTimeout(t *testing.T) { diff --git a/structure.go b/structure.go new file mode 100644 index 0000000..09c2c1b --- /dev/null +++ b/structure.go @@ -0,0 +1,157 @@ +package curl2info + +// TrieWord Trie 需要的Word接口 +type TrieWord interface { + GetWord() string +} + +// TrieStrWord 最简单的TrieWord 结构 +type TrieStrWord string + +// GetWord 获取单词 +func (tsw *TrieStrWord) GetWord() string { + return (string)(*tsw) +} + +// Trie 前缀树 +type Trie struct { + isWord bool + value interface{} + char byte + prev *Trie + next map[byte]*Trie +} + +// NewTrie Initialize your data structure here. +func NewTrie() *Trie { + return &Trie{next: make(map[byte]*Trie)} +} + +// Insert a word into the trie. +func (trie *Trie) Insert(iword TrieWord) { + cur := trie + word := iword.GetWord() + l := len(word) + + for i := 0; i < l; i++ { + c := word[i] + if next, ok := cur.next[c]; ok { + cur = next + } else { + create := NewTrie() + cur.next[c] = create + create.char = c + create.prev = cur + cur = create + } + } + + cur.isWord = true + cur.value = iword +} + +// AllWords 所有单词 +func (trie *Trie) AllWords() []string { + var result []string + for _, v := range trie.next { + look(v, "", &result) + } + return result +} + +func look(cur *Trie, content string, result *[]string) { + content += string(cur.char) + if cur.isWord { + *result = append(*result, content) + } + for _, v := range cur.next { + look(v, content, result) + } +} + +// Remove 移除单词 +func (trie *Trie) Remove(word string) { + cur := trie + l := len(word) + for i := 0; i < l; i++ { + c := word[i] + if next, ok := cur.next[c]; ok { + cur = next + } else { + return + } + } + + if cur != nil { + cur.isWord = false + cur.value = nil + + lastchar := cur.char + + if len(cur.next) == 0 { + for cur.isWord != true && cur.prev != nil { + lastchar = cur.char + cur = cur.prev + if len(cur.next) > 1 { + return + } + } + delete(cur.next, lastchar) + } + } +} + +// SearchMostPrefix Returns if the word is in the trie. +func (trie *Trie) SearchMostPrefix(iword TrieWord) interface{} { + cur := trie + word := iword.GetWord() + + l := len(word) + + var result interface{} + for i := 0; i < l; i++ { + c := word[i] + if next, ok := cur.next[c]; ok { + cur = next + if cur.isWord { + result = cur.value + } + } else { + return result + } + } + return result +} + +// Match Returns if the word is in the trie. +func (trie *Trie) Match(iword TrieWord) interface{} { + cur := trie + word := iword.GetWord() + + l := len(word) + for i := 0; i < l; i++ { + c := word[i] + if next, ok := cur.next[c]; ok { + cur = next + } else { + return nil + } + } + + return cur.value +} + +// StartsWith Returns if there is any word in the trie that starts with the given prefix. */ +func (trie *Trie) StartsWith(prefix string) bool { + cur := trie + l := len(prefix) + for i := 0; i < l; i++ { + c := prefix[i] + if next, ok := cur.next[c]; ok { + cur = next + } else { + return false + } + } + return true +} diff --git a/structure_test.go b/structure_test.go new file mode 100644 index 0000000..810a7cd --- /dev/null +++ b/structure_test.go @@ -0,0 +1 @@ +package curl2info