From f4cd890053840b883261a47e802c0fdcbfc87b81 Mon Sep 17 00:00:00 2001 From: eson <474420502@qq.com> Date: Mon, 19 Aug 2019 03:46:12 +0800 Subject: [PATCH 01/12] TODO: Tried best interface. eg. type GetInterfaceType interface --- tree/tried/tried.go | 96 ++++++++++++++++++++++++++++++++++++++++ tree/tried/tried_test.go | 71 +++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 tree/tried/tried.go create mode 100644 tree/tried/tried_test.go diff --git a/tree/tried/tried.go b/tree/tried/tried.go new file mode 100644 index 0000000..b4678f2 --- /dev/null +++ b/tree/tried/tried.go @@ -0,0 +1,96 @@ +package tried + +type Tried struct { + root *Node + datasize uint + // wordIndex func () +} + +type Node struct { + data []*Node + value interface{} +} + +func New() *Tried { + tried := &Tried{} + tried.root = new(Node) + return tried +} + +func (tried *Tried) Put(words string, values ...interface{}) { + cur := tried.root + var n *Node + for i := 0; i < len(words); i++ { + w := uint(words[i] - 'a') + + if cur.data == nil { + cur.data = make([]*Node, 26) + } + + if n = cur.data[w]; n == nil { + n = new(Node) + cur.data[w] = n + } + cur = n + } + + vlen := len(values) + switch vlen { + case 0: + cur.value = tried + case 1: + cur.value = values[0] + case 2: + // TODO: 执行函数 values[1] 为函数类型 func (cur *Node, value interface{}) ...可以插入, 也可以不插入 + default: + panic("unknow select to do") + } + +} + +func (tried *Tried) Get(words string) interface{} { + cur := tried.root + var n *Node + for i := 0; i < len(words); i++ { + w := uint(words[i] - 'a') //TODO: 升级Index 函数 + if n = cur.data[w]; n == nil { + return nil + } + cur = n + } + return n.value +} + +func (tried *Tried) Has(words string) bool { + return tried.Get(words) != nil +} + +func (tried *Tried) Traversal(every func(cidx uint, value interface{}) bool) { + + var traversal func(*Node) + traversal = func(cur *Node) { + if cur != nil { + for i, n := range cur.data { + if n != nil { + if n.value != nil { + if !every(uint(i), n.value) { + return + } + } + traversal(n) + } + } + } + } + + root := tried.root + traversal(root) +} + +// func (tried *Tried) String() []string { +// var result []string +// tried.Traversal(func(cidx uint, value interface{}) bool { +// result = append(result, spew.) +// }) +// return result +// } diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go new file mode 100644 index 0000000..fbc6d1d --- /dev/null +++ b/tree/tried/tried_test.go @@ -0,0 +1,71 @@ +package tried + +import ( + "testing" +) + +func TestTried_PutAndGet1(t *testing.T) { + tried := New() + tried.Put("asdf") + tried.Put("hehe", "hehe") + tried.Put("xixi", 3) + + var result interface{} + + result = tried.Get("asdf") + if result != tried { + t.Error("result should be 3") + } + + result = tried.Get("xixi") + if result != 3 { + t.Error("result should be 3") + } + + result = tried.Get("hehe") + if result != "hehe" { + t.Error("result should be hehe") + } + + result = tried.Get("haha") + if result != nil { + t.Error("result should be nil") + } + + result = tried.Get("b") + if result != nil { + t.Error("result should be nil") + } +} + +func TestTried_Traversal(t *testing.T) { + tried := New() + tried.Put("asdf") + tried.Put("abdf", "ab") + tried.Put("hehe", "hehe") + tried.Put("xixi", 3) + + var result []interface{} + tried.Traversal(func(idx uint, v interface{}) bool { + // t.Error(idx, v) + result = append(result, v) + return true + }) + + if result[0] != "ab" { + t.Error(result[0]) + } + + if result[1] != tried { + t.Error(result[1]) + } + + if result[2] != "hehe" { + t.Error(result[2]) + } + + if result[3] != 3 { + t.Error(result[3]) + } + +} From ebbce1e0a2ecccb6cda6822846d3f2d744eb8ad8 Mon Sep 17 00:00:00 2001 From: huangsimin Date: Mon, 19 Aug 2019 14:30:24 +0800 Subject: [PATCH 02/12] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=B8=BAInterface?= =?UTF-8?q?=E7=9A=84=E5=BD=A2=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go.sum | 1 + tree/tried/tried.go | 50 ++++++++++++++++++++++----- tree/tried/tried_test.go | 73 +++++++++++++++++++++++++++++++++------- 3 files changed, 102 insertions(+), 22 deletions(-) diff --git a/go.sum b/go.sum index b3f692e..fc26243 100644 --- a/go.sum +++ b/go.sum @@ -4,3 +4,4 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg= github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o= +github.com/petar/GoLLRB v0.0.0-20190514000832-33fb24c13b99 h1:KcEvVBAvyHkUdFAygKAzwB6LAcZ6LS32WHmRD2VyXMI= diff --git a/tree/tried/tried.go b/tree/tried/tried.go index b4678f2..77f5a6c 100644 --- a/tree/tried/tried.go +++ b/tree/tried/tried.go @@ -1,9 +1,30 @@ package tried +type TriedString string + +func (ts TriedString) Size() uint { + return uint(len(ts)) +} + +func (ts TriedString) WordIndex(idx uint) uint { + w := ts[idx] + if w >= 'a' && w <= 'z' { + return uint(w) - 'a' + } else if w >= 'A' && w <= 'Z' { + return uint(w) - 'A' + 26 + } else { + return uint(w) - '0' + 52 + } +} + +type ObjectIndex interface { + WordIndex(idx uint) uint + Size() uint +} + type Tried struct { root *Node datasize uint - // wordIndex func () } type Node struct { @@ -14,17 +35,28 @@ type Node struct { func New() *Tried { tried := &Tried{} tried.root = new(Node) + tried.datasize = 62 return tried } -func (tried *Tried) Put(words string, values ...interface{}) { +func (tried *Tried) wordIndex(w byte) uint { + if w >= 'a' && w <= 'z' { + return uint(w) - 'a' + } else if w >= 'A' && w <= 'Z' { + return uint(w) - 'A' + 26 + } else { + return uint(w) - '0' + 52 + } +} + +func (tried *Tried) Put(words ObjectIndex, values ...interface{}) { cur := tried.root var n *Node - for i := 0; i < len(words); i++ { - w := uint(words[i] - 'a') + for i := uint(0); i < words.Size(); i++ { + w := words.WordIndex(i) if cur.data == nil { - cur.data = make([]*Node, 26) + cur.data = make([]*Node, tried.datasize) } if n = cur.data[w]; n == nil { @@ -48,11 +80,11 @@ func (tried *Tried) Put(words string, values ...interface{}) { } -func (tried *Tried) Get(words string) interface{} { +func (tried *Tried) Get(words ObjectIndex) interface{} { cur := tried.root var n *Node - for i := 0; i < len(words); i++ { - w := uint(words[i] - 'a') //TODO: 升级Index 函数 + for i := uint(0); i < words.Size(); i++ { + w := words.WordIndex(i) //TODO: 升级Index 函数 if n = cur.data[w]; n == nil { return nil } @@ -61,7 +93,7 @@ func (tried *Tried) Get(words string) interface{} { return n.value } -func (tried *Tried) Has(words string) bool { +func (tried *Tried) Has(words ObjectIndex) bool { return tried.Get(words) != nil } diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index fbc6d1d..9ff23c3 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -2,37 +2,40 @@ package tried import ( "testing" + + "github.com/Pallinder/go-randomdata" ) func TestTried_PutAndGet1(t *testing.T) { tried := New() - tried.Put("asdf") - tried.Put("hehe", "hehe") - tried.Put("xixi", 3) + + tried.Put(TriedString("asdf")) + tried.Put(TriedString("hehe"), "hehe") + tried.Put(TriedString("xixi"), 3) var result interface{} - result = tried.Get("asdf") + result = tried.Get(TriedString("asdf")) if result != tried { t.Error("result should be 3") } - result = tried.Get("xixi") + result = tried.Get(TriedString("xixi")) if result != 3 { t.Error("result should be 3") } - result = tried.Get("hehe") + result = tried.Get(TriedString("hehe")) if result != "hehe" { t.Error("result should be hehe") } - result = tried.Get("haha") + result = tried.Get(TriedString("haha")) if result != nil { t.Error("result should be nil") } - result = tried.Get("b") + result = tried.Get(TriedString("b")) if result != nil { t.Error("result should be nil") } @@ -40,10 +43,10 @@ func TestTried_PutAndGet1(t *testing.T) { func TestTried_Traversal(t *testing.T) { tried := New() - tried.Put("asdf") - tried.Put("abdf", "ab") - tried.Put("hehe", "hehe") - tried.Put("xixi", 3) + tried.Put(TriedString("asdf")) + tried.Put(TriedString("abdf"), "ab") + tried.Put(TriedString("hehe"), "hehe") + tried.Put(TriedString("xixi"), 3) var result []interface{} tried.Traversal(func(idx uint, v interface{}) bool { @@ -67,5 +70,49 @@ func TestTried_Traversal(t *testing.T) { if result[3] != 3 { t.Error(result[3]) } - +} + +func BenchmarkTried_Put(b *testing.B) { + + var data []TriedString + b.N = 10000 + count := 1000 + + for i := 0; i < b.N; i++ { + data = append(data, TriedString(randomdata.RandStringRunes(10)+randomdata.RandStringRunes(4))) + } + + b.ResetTimer() + b.N = b.N * count + for c := 0; c < count; c++ { + tried := New() + for _, v := range data { + tried.Put(v) + } + } +} + +func BenchmarkTried_Get(b *testing.B) { + + var data []TriedString + b.N = 10000 + count := 1000 + + for i := 0; i < b.N; i++ { + data = append(data, TriedString(randomdata.RandStringRunes(10)+randomdata.RandStringRunes(4))) + } + + b.N = b.N * count + + tried := New() + for _, v := range data { + tried.Put(v) + } + + b.ResetTimer() + for c := 0; c < count; c++ { + for _, v := range data { + tried.Get(v) + } + } } From 874acd62439589b0dfc83caa4c02c053b574c356 Mon Sep 17 00:00:00 2001 From: huangsimin Date: Mon, 19 Aug 2019 15:08:49 +0800 Subject: [PATCH 03/12] =?UTF-8?q?=E5=87=BD=E6=95=B0=E5=BD=A2=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tree/tried/tried.go | 14 +++++++------- tree/tried/tried_test.go | 40 ++++++++++++++++++++-------------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tree/tried/tried.go b/tree/tried/tried.go index 77f5a6c..2465ade 100644 --- a/tree/tried/tried.go +++ b/tree/tried/tried.go @@ -49,11 +49,11 @@ func (tried *Tried) wordIndex(w byte) uint { } } -func (tried *Tried) Put(words ObjectIndex, values ...interface{}) { +func (tried *Tried) Put(words string, values ...interface{}) { cur := tried.root var n *Node - for i := uint(0); i < words.Size(); i++ { - w := words.WordIndex(i) + for i := 0; i < len(words); i++ { + w := tried.wordIndex(words[i]) if cur.data == nil { cur.data = make([]*Node, tried.datasize) @@ -80,11 +80,11 @@ func (tried *Tried) Put(words ObjectIndex, values ...interface{}) { } -func (tried *Tried) Get(words ObjectIndex) interface{} { +func (tried *Tried) Get(words string) interface{} { cur := tried.root var n *Node - for i := uint(0); i < words.Size(); i++ { - w := words.WordIndex(i) //TODO: 升级Index 函数 + for i := 0; i < len(words); i++ { + w := tried.wordIndex(words[i]) //TODO: 升级Index 函数 if n = cur.data[w]; n == nil { return nil } @@ -93,7 +93,7 @@ func (tried *Tried) Get(words ObjectIndex) interface{} { return n.value } -func (tried *Tried) Has(words ObjectIndex) bool { +func (tried *Tried) Has(words string) bool { return tried.Get(words) != nil } diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index 9ff23c3..51f9600 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -9,33 +9,33 @@ import ( func TestTried_PutAndGet1(t *testing.T) { tried := New() - tried.Put(TriedString("asdf")) - tried.Put(TriedString("hehe"), "hehe") - tried.Put(TriedString("xixi"), 3) + tried.Put(("asdf")) + tried.Put(("hehe"), "hehe") + tried.Put(("xixi"), 3) var result interface{} - result = tried.Get(TriedString("asdf")) + result = tried.Get("asdf") if result != tried { t.Error("result should be 3") } - result = tried.Get(TriedString("xixi")) + result = tried.Get("xixi") if result != 3 { t.Error("result should be 3") } - result = tried.Get(TriedString("hehe")) + result = tried.Get("hehe") if result != "hehe" { t.Error("result should be hehe") } - result = tried.Get(TriedString("haha")) + result = tried.Get("haha") if result != nil { t.Error("result should be nil") } - result = tried.Get(TriedString("b")) + result = tried.Get("b") if result != nil { t.Error("result should be nil") } @@ -43,10 +43,10 @@ func TestTried_PutAndGet1(t *testing.T) { func TestTried_Traversal(t *testing.T) { tried := New() - tried.Put(TriedString("asdf")) - tried.Put(TriedString("abdf"), "ab") - tried.Put(TriedString("hehe"), "hehe") - tried.Put(TriedString("xixi"), 3) + tried.Put("asdf") + tried.Put(("abdf"), "ab") + tried.Put(("hehe"), "hehe") + tried.Put(("xixi"), 3) var result []interface{} tried.Traversal(func(idx uint, v interface{}) bool { @@ -74,12 +74,12 @@ func TestTried_Traversal(t *testing.T) { func BenchmarkTried_Put(b *testing.B) { - var data []TriedString - b.N = 10000 - count := 1000 + var data []string + b.N = 100000 + count := 50 for i := 0; i < b.N; i++ { - data = append(data, TriedString(randomdata.RandStringRunes(10)+randomdata.RandStringRunes(4))) + data = append(data, (randomdata.RandStringRunes(10) + randomdata.RandStringRunes(4))) } b.ResetTimer() @@ -94,12 +94,12 @@ func BenchmarkTried_Put(b *testing.B) { func BenchmarkTried_Get(b *testing.B) { - var data []TriedString - b.N = 10000 - count := 1000 + var data []string + b.N = 100000 + count := 50 for i := 0; i < b.N; i++ { - data = append(data, TriedString(randomdata.RandStringRunes(10)+randomdata.RandStringRunes(4))) + data = append(data, (randomdata.RandStringRunes(10) + randomdata.RandStringRunes(4))) } b.N = b.N * count From 86253b5bfbbe0a5a518cabd28b30cbf546e548b1 Mon Sep 17 00:00:00 2001 From: huangsimin Date: Mon, 19 Aug 2019 15:24:22 +0800 Subject: [PATCH 04/12] =?UTF-8?q?=E5=86=8D=E6=AC=A1=E4=BF=AE=E6=94=B9TypeS?= =?UTF-8?q?tring=20=E5=AF=B9=E6=AF=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tree/tried/tried.go | 42 ++++++------- tree/tried/tried_test.go | 130 +++++++++++++++++++++------------------ 2 files changed, 90 insertions(+), 82 deletions(-) diff --git a/tree/tried/tried.go b/tree/tried/tried.go index 2465ade..3253131 100644 --- a/tree/tried/tried.go +++ b/tree/tried/tried.go @@ -7,16 +7,20 @@ func (ts TriedString) Size() uint { } func (ts TriedString) WordIndex(idx uint) uint { - w := ts[idx] - if w >= 'a' && w <= 'z' { - return uint(w) - 'a' - } else if w >= 'A' && w <= 'Z' { - return uint(w) - 'A' + 26 - } else { - return uint(w) - '0' + 52 - } + return uint(ts[idx]) - 'a' } +// func (ts TriedString) WordIndex(idx uint) uint { +// w := ts[idx] +// if w >= 'a' && w <= 'z' { +// return uint(w) - 'a' +// } else if w >= 'A' && w <= 'Z' { +// return uint(w) - 'A' + 26 +// } else { +// return uint(w) - '0' + 52 +// } +// } + type ObjectIndex interface { WordIndex(idx uint) uint Size() uint @@ -40,20 +44,14 @@ func New() *Tried { } func (tried *Tried) wordIndex(w byte) uint { - if w >= 'a' && w <= 'z' { - return uint(w) - 'a' - } else if w >= 'A' && w <= 'Z' { - return uint(w) - 'A' + 26 - } else { - return uint(w) - '0' + 52 - } + return uint(w) - 'a' } -func (tried *Tried) Put(words string, values ...interface{}) { +func (tried *Tried) Put(words ObjectIndex, values ...interface{}) { cur := tried.root var n *Node - for i := 0; i < len(words); i++ { - w := tried.wordIndex(words[i]) + for i := uint(0); i < words.Size(); i++ { + w := words.WordIndex(i) if cur.data == nil { cur.data = make([]*Node, tried.datasize) @@ -80,11 +78,11 @@ func (tried *Tried) Put(words string, values ...interface{}) { } -func (tried *Tried) Get(words string) interface{} { +func (tried *Tried) Get(words ObjectIndex) interface{} { cur := tried.root var n *Node - for i := 0; i < len(words); i++ { - w := tried.wordIndex(words[i]) //TODO: 升级Index 函数 + for i := uint(0); i < words.Size(); i++ { + w := words.WordIndex(i) //TODO: 升级Index 函数 if n = cur.data[w]; n == nil { return nil } @@ -93,7 +91,7 @@ func (tried *Tried) Get(words string) interface{} { return n.value } -func (tried *Tried) Has(words string) bool { +func (tried *Tried) Has(words ObjectIndex) bool { return tried.Get(words) != nil } diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index 51f9600..73df52d 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -6,80 +6,85 @@ import ( "github.com/Pallinder/go-randomdata" ) -func TestTried_PutAndGet1(t *testing.T) { - tried := New() +// func TestTried_PutAndGet1(t *testing.T) { +// tried := New() - tried.Put(("asdf")) - tried.Put(("hehe"), "hehe") - tried.Put(("xixi"), 3) +// tried.Put(("asdf")) +// tried.Put(("hehe"), "hehe") +// tried.Put(("xixi"), 3) - var result interface{} +// var result interface{} - result = tried.Get("asdf") - if result != tried { - t.Error("result should be 3") - } +// result = tried.Get("asdf") +// if result != tried { +// t.Error("result should be 3") +// } - result = tried.Get("xixi") - if result != 3 { - t.Error("result should be 3") - } +// result = tried.Get("xixi") +// if result != 3 { +// t.Error("result should be 3") +// } - result = tried.Get("hehe") - if result != "hehe" { - t.Error("result should be hehe") - } +// result = tried.Get("hehe") +// if result != "hehe" { +// t.Error("result should be hehe") +// } - result = tried.Get("haha") - if result != nil { - t.Error("result should be nil") - } +// result = tried.Get("haha") +// if result != nil { +// t.Error("result should be nil") +// } - result = tried.Get("b") - if result != nil { - t.Error("result should be nil") - } -} +// result = tried.Get("b") +// if result != nil { +// t.Error("result should be nil") +// } +// } -func TestTried_Traversal(t *testing.T) { - tried := New() - tried.Put("asdf") - tried.Put(("abdf"), "ab") - tried.Put(("hehe"), "hehe") - tried.Put(("xixi"), 3) +// func TestTried_Traversal(t *testing.T) { +// tried := New() +// tried.Put("asdf") +// tried.Put(("abdf"), "ab") +// tried.Put(("hehe"), "hehe") +// tried.Put(("xixi"), 3) - var result []interface{} - tried.Traversal(func(idx uint, v interface{}) bool { - // t.Error(idx, v) - result = append(result, v) - return true - }) +// var result []interface{} +// tried.Traversal(func(idx uint, v interface{}) bool { +// // t.Error(idx, v) +// result = append(result, v) +// return true +// }) - if result[0] != "ab" { - t.Error(result[0]) - } +// if result[0] != "ab" { +// t.Error(result[0]) +// } - if result[1] != tried { - t.Error(result[1]) - } +// if result[1] != tried { +// t.Error(result[1]) +// } - if result[2] != "hehe" { - t.Error(result[2]) - } +// if result[2] != "hehe" { +// t.Error(result[2]) +// } - if result[3] != 3 { - t.Error(result[3]) - } -} +// if result[3] != 3 { +// t.Error(result[3]) +// } +// } func BenchmarkTried_Put(b *testing.B) { - var data []string - b.N = 100000 - count := 50 + var data []TriedString + b.N = 1000000 + count := 10 for i := 0; i < b.N; i++ { - data = append(data, (randomdata.RandStringRunes(10) + randomdata.RandStringRunes(4))) + var content []rune + for c := 0; c < randomdata.Number(5, 15); c++ { + char := randomdata.Number(0, 26) + 'a' + content = append(content, rune(byte(char))) + } + data = append(data, TriedString(string(content))) } b.ResetTimer() @@ -94,12 +99,17 @@ func BenchmarkTried_Put(b *testing.B) { func BenchmarkTried_Get(b *testing.B) { - var data []string - b.N = 100000 - count := 50 + var data []TriedString + b.N = 1000000 + count := 10 for i := 0; i < b.N; i++ { - data = append(data, (randomdata.RandStringRunes(10) + randomdata.RandStringRunes(4))) + var content []rune + for c := 0; c < randomdata.Number(5, 15); c++ { + char := randomdata.Number(0, 26) + 'a' + content = append(content, rune(byte(char))) + } + data = append(data, TriedString(content)) } b.N = b.N * count From b4829ba058808b1f2b559b4afa15bec6c46168ec Mon Sep 17 00:00:00 2001 From: huangsimin Date: Mon, 19 Aug 2019 15:28:09 +0800 Subject: [PATCH 05/12] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=B8=BA=E5=87=BD?= =?UTF-8?q?=E6=95=B0=E5=BD=A2=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tree/tried/tried.go | 14 ++--- tree/tried/tried_test.go | 112 +++++++++++++++++++-------------------- 2 files changed, 63 insertions(+), 63 deletions(-) diff --git a/tree/tried/tried.go b/tree/tried/tried.go index 3253131..fb93ebc 100644 --- a/tree/tried/tried.go +++ b/tree/tried/tried.go @@ -47,11 +47,11 @@ func (tried *Tried) wordIndex(w byte) uint { return uint(w) - 'a' } -func (tried *Tried) Put(words ObjectIndex, values ...interface{}) { +func (tried *Tried) Put(words string, values ...interface{}) { cur := tried.root var n *Node - for i := uint(0); i < words.Size(); i++ { - w := words.WordIndex(i) + for i := 0; i < len(words); i++ { + w := tried.wordIndex(words[i]) if cur.data == nil { cur.data = make([]*Node, tried.datasize) @@ -78,11 +78,11 @@ func (tried *Tried) Put(words ObjectIndex, values ...interface{}) { } -func (tried *Tried) Get(words ObjectIndex) interface{} { +func (tried *Tried) Get(words string) interface{} { cur := tried.root var n *Node - for i := uint(0); i < words.Size(); i++ { - w := words.WordIndex(i) //TODO: 升级Index 函数 + for i := 0; i < len(words); i++ { + w := tried.wordIndex(words[i]) //TODO: 升级Index 函数 if n = cur.data[w]; n == nil { return nil } @@ -91,7 +91,7 @@ func (tried *Tried) Get(words ObjectIndex) interface{} { return n.value } -func (tried *Tried) Has(words ObjectIndex) bool { +func (tried *Tried) Has(words string) bool { return tried.Get(words) != nil } diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index 73df52d..1713c87 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -6,75 +6,75 @@ import ( "github.com/Pallinder/go-randomdata" ) -// func TestTried_PutAndGet1(t *testing.T) { -// tried := New() +func TestTried_PutAndGet1(t *testing.T) { + tried := New() -// tried.Put(("asdf")) -// tried.Put(("hehe"), "hehe") -// tried.Put(("xixi"), 3) + tried.Put(("asdf")) + tried.Put(("hehe"), "hehe") + tried.Put(("xixi"), 3) -// var result interface{} + var result interface{} -// result = tried.Get("asdf") -// if result != tried { -// t.Error("result should be 3") -// } + result = tried.Get("asdf") + if result != tried { + t.Error("result should be 3") + } -// result = tried.Get("xixi") -// if result != 3 { -// t.Error("result should be 3") -// } + result = tried.Get("xixi") + if result != 3 { + t.Error("result should be 3") + } -// result = tried.Get("hehe") -// if result != "hehe" { -// t.Error("result should be hehe") -// } + result = tried.Get("hehe") + if result != "hehe" { + t.Error("result should be hehe") + } -// result = tried.Get("haha") -// if result != nil { -// t.Error("result should be nil") -// } + result = tried.Get("haha") + if result != nil { + t.Error("result should be nil") + } -// result = tried.Get("b") -// if result != nil { -// t.Error("result should be nil") -// } -// } + result = tried.Get("b") + if result != nil { + t.Error("result should be nil") + } +} -// func TestTried_Traversal(t *testing.T) { -// tried := New() -// tried.Put("asdf") -// tried.Put(("abdf"), "ab") -// tried.Put(("hehe"), "hehe") -// tried.Put(("xixi"), 3) +func TestTried_Traversal(t *testing.T) { + tried := New() + tried.Put("asdf") + tried.Put(("abdf"), "ab") + tried.Put(("hehe"), "hehe") + tried.Put(("xixi"), 3) -// var result []interface{} -// tried.Traversal(func(idx uint, v interface{}) bool { -// // t.Error(idx, v) -// result = append(result, v) -// return true -// }) + var result []interface{} + tried.Traversal(func(idx uint, v interface{}) bool { + // t.Error(idx, v) + result = append(result, v) + return true + }) -// if result[0] != "ab" { -// t.Error(result[0]) -// } + if result[0] != "ab" { + t.Error(result[0]) + } -// if result[1] != tried { -// t.Error(result[1]) -// } + if result[1] != tried { + t.Error(result[1]) + } -// if result[2] != "hehe" { -// t.Error(result[2]) -// } + if result[2] != "hehe" { + t.Error(result[2]) + } -// if result[3] != 3 { -// t.Error(result[3]) -// } -// } + if result[3] != 3 { + t.Error(result[3]) + } +} func BenchmarkTried_Put(b *testing.B) { - var data []TriedString + var data []string b.N = 1000000 count := 10 @@ -84,7 +84,7 @@ func BenchmarkTried_Put(b *testing.B) { char := randomdata.Number(0, 26) + 'a' content = append(content, rune(byte(char))) } - data = append(data, TriedString(string(content))) + data = append(data, (string(content))) } b.ResetTimer() @@ -99,7 +99,7 @@ func BenchmarkTried_Put(b *testing.B) { func BenchmarkTried_Get(b *testing.B) { - var data []TriedString + var data []string b.N = 1000000 count := 10 @@ -109,7 +109,7 @@ func BenchmarkTried_Get(b *testing.B) { char := randomdata.Number(0, 26) + 'a' content = append(content, rune(byte(char))) } - data = append(data, TriedString(content)) + data = append(data, string(content)) } b.N = b.N * count From 5a7a4f2c9201e76f91260eb94357393abbabf945 Mon Sep 17 00:00:00 2001 From: huangsimin Date: Mon, 19 Aug 2019 19:03:58 +0800 Subject: [PATCH 06/12] =?UTF-8?q?TODO:=20wordIndexUpperLower=20=E5=8E=9F?= =?UTF-8?q?=E5=9B=A0:=20=E8=BE=B9=E7=95=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tree/tried/tried.go | 41 +++++----- tree/tried/tried_index.go | 152 ++++++++++++++++++++++++++++++++++++++ tree/tried/tried_test.go | 99 ++++++++++++++++++++----- 3 files changed, 251 insertions(+), 41 deletions(-) create mode 100644 tree/tried/tried_index.go diff --git a/tree/tried/tried.go b/tree/tried/tried.go index fb93ebc..abfe0b8 100644 --- a/tree/tried/tried.go +++ b/tree/tried/tried.go @@ -1,15 +1,5 @@ package tried -type TriedString string - -func (ts TriedString) Size() uint { - return uint(len(ts)) -} - -func (ts TriedString) WordIndex(idx uint) uint { - return uint(ts[idx]) - 'a' -} - // func (ts TriedString) WordIndex(idx uint) uint { // w := ts[idx] // if w >= 'a' && w <= 'z' { @@ -21,14 +11,9 @@ func (ts TriedString) WordIndex(idx uint) uint { // } // } -type ObjectIndex interface { - WordIndex(idx uint) uint - Size() uint -} - type Tried struct { - root *Node - datasize uint + root *Node + wiStore *wordIndexStore } type Node struct { @@ -36,25 +21,34 @@ type Node struct { value interface{} } +// New 默认 WordIndexLower 意味着只支持小写 func New() *Tried { tried := &Tried{} tried.root = new(Node) - tried.datasize = 62 + + tried.wiStore = WordIndexDict[WordIndexLower] return tried } -func (tried *Tried) wordIndex(w byte) uint { - return uint(w) - 'a' +// NewWithWordType 选择单词的类型 WordIndexLower 意味着只支持小写 +func NewWithWordType(t WordIndexType) *Tried { + tried := &Tried{} + tried.root = new(Node) + + tried.wiStore = WordIndexDict[t] + + return tried } func (tried *Tried) Put(words string, values ...interface{}) { cur := tried.root var n *Node + for i := 0; i < len(words); i++ { - w := tried.wordIndex(words[i]) + w := tried.wiStore.Byte2Index(words[i]) if cur.data == nil { - cur.data = make([]*Node, tried.datasize) + cur.data = make([]*Node, tried.wiStore.DataSize) } if n = cur.data[w]; n == nil { @@ -81,8 +75,9 @@ func (tried *Tried) Put(words string, values ...interface{}) { func (tried *Tried) Get(words string) interface{} { cur := tried.root var n *Node + for i := 0; i < len(words); i++ { - w := tried.wordIndex(words[i]) //TODO: 升级Index 函数 + w := tried.wiStore.Byte2Index(words[i]) //TODO: 升级Index 函数 if n = cur.data[w]; n == nil { return nil } diff --git a/tree/tried/tried_index.go b/tree/tried/tried_index.go new file mode 100644 index 0000000..00c2a80 --- /dev/null +++ b/tree/tried/tried_index.go @@ -0,0 +1,152 @@ +package tried + +var WordIndexDict map[WordIndexType]*wordIndexStore + +func init() { + WordIndexDict = make(map[WordIndexType]*wordIndexStore) + WordIndexDict[WordIndexLower] = &wordIndexStore{WordIndexLower, wordIndexLower, indexWordLower, 26} + WordIndexDict[WordIndexUpper] = &wordIndexStore{WordIndexUpper, wordIndexUpper, indexWordUpper, 26} + WordIndexDict[WordIndexDigital] = &wordIndexStore{WordIndexDigital, wordIndexDigital, indexWordDigital, 10} + WordIndexDict[WordIndexUpperLower] = &wordIndexStore{WordIndexUpperLower, wordIndexUpperLower, indexWordUpperLower, 52} + WordIndexDict[WordIndexLowerDigital] = &wordIndexStore{WordIndexLowerDigital, wordIndexLowerDigital, indexWordLowerDigital, 36} + WordIndexDict[WordIndexUpperDigital] = &wordIndexStore{WordIndexUpperDigital, wordIndexUpperDigital, indexWordUpperDigital, 36} + WordIndexDict[WordIndexUpperLowerDigital] = &wordIndexStore{WordIndexUpperLowerDigital, wordIndexUpperLowerDigital, indexWordUpperLowerDigital, 62} + WordIndexDict[WordIndex256] = &wordIndexStore{WordIndex256, wordIndex256, indexWord256, 256} + WordIndexDict[WordIndex32to126] = &wordIndexStore{WordIndex32to126, wordIndex32to126, indexWord32to126, ('~' - ' ' + 1)} +} + +// WordIndexType 单词统计的类型 eg. WordIndexLower 意味Put的单词只支持小写... +type WordIndexType int + +const ( + _ WordIndexType = iota + WordIndexLower + WordIndexUpper + WordIndexDigital + WordIndexUpperLower + WordIndexLowerDigital + WordIndexUpperDigital + WordIndexUpperLowerDigital + WordIndex256 + WordIndex32to126 +) + +type wordIndexStore struct { + Type WordIndexType + Byte2Index func(byte) uint + Index2Byte func(uint) byte + DataSize uint +} + +func wordIndexLower(w byte) uint { + return uint(w) - 'a' +} + +func indexWordLower(w uint) byte { + return byte(w) + 'a' +} + +// +func wordIndexUpper(w byte) uint { + return uint(w) - 'A' +} + +func indexWordUpper(w uint) byte { + return byte(w) + 'A' +} + +// +func wordIndexDigital(w byte) uint { + return uint(w) - '0' +} + +func indexWordDigital(w uint) byte { + return byte(w) + '0' +} + +// +func wordIndexUpperLower(w byte) uint { + iw := uint(w) + if iw > 'a' { + return iw - 'a' + } + return iw - 'A' + 26 +} + +func indexWordUpperLower(w uint) byte { + + if w >= 26 { + return byte(w) + 'A' + } + return byte(w) + 'a' +} + +// +func wordIndexLowerDigital(w byte) uint { + iw := uint(w) + if iw > 'a' { + return iw - 'a' + } + return iw - '0' + 26 +} + +func indexWordLowerDigital(w uint) byte { + if w >= 26 { + return byte(w) + '0' + } + return byte(w) + 'a' +} + +// +func wordIndexUpperDigital(w byte) uint { + iw := uint(w) + if iw > 'A' { + return iw - 'A' + } + return iw - '0' + 26 +} + +func indexWordUpperDigital(w uint) byte { + if w >= 26 { + return byte(w) + '0' + } + return byte(w) + 'a' +} + +// +func wordIndexUpperLowerDigital(w byte) uint { + iw := uint(w) + if iw > 'a' { + return iw - 'a' + } else if iw > 'A' { + return iw - 'A' + 26 + } + return iw - '0' + 52 +} + +func indexWordUpperLowerDigital(w uint) byte { + if w >= 52 { + return byte(w) + '0' + } else if w >= 26 { + return byte(w) + 'A' + } + return byte(w) + 'a' +} + +// wordIndex256 all byte +func wordIndex256(w byte) uint { + return uint(w) +} + +func indexWord256(w uint) byte { + return byte(w) +} + +// wordIndex32to126 空格-~ 0-9 a-z A-Z 符号等 +func wordIndex32to126(w byte) uint { + return uint(w) - ' ' +} + +func indexWord32to126(w uint) byte { + return byte(w) + ' ' +} diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index 1713c87..422c18c 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -1,11 +1,44 @@ package tried import ( + "bytes" + "encoding/gob" + "os" "testing" "github.com/Pallinder/go-randomdata" ) +func TestTried_NewWith(t *testing.T) { + tried := NewWithWordType(WordIndex32to126) + words := "~ 23fd " + tried.Put(words) + if tried.Get(words) == nil { + t.Error("should be not nil") + } + + tried = NewWithWordType(WordIndexLower) + words = "az" + tried.Put(words) + if tried.Get(words) == nil { + t.Error("should be not nil") + } + + tried = NewWithWordType(WordIndexUpper) + words = "AZ" + tried.Put(words) + if tried.Get(words) == nil { + t.Error("should be not nil") + } + + tried = NewWithWordType(WordIndexUpperLower) + words = "AZazsdfsd" + tried.Put(words) + if tried.Get(words) == nil { + t.Error("should be not nil") + } +} + func TestTried_PutAndGet1(t *testing.T) { tried := New() @@ -72,20 +105,49 @@ func TestTried_Traversal(t *testing.T) { } } +func TesStoreData(t *testing.T) { + var l []string + const N = 1000000 + for i := 0; i < N; i++ { + var content []rune + for c := 0; c < randomdata.Number(5, 15); c++ { + char := randomdata.Number(0, 26) + 'a' + content = append(content, rune(byte(char))) + } + l = append(l, (string(content))) + } + + var result bytes.Buffer + encoder := gob.NewEncoder(&result) + encoder.Encode(l) + lbytes := result.Bytes() + f, _ := os.OpenFile("tried.log", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666) + f.Write(lbytes) +} + +func Load() []string { + var result []string + f, _ := os.Open("tried.log") + gob.NewDecoder(f).Decode(&result) + return result +} + func BenchmarkTried_Put(b *testing.B) { var data []string b.N = 1000000 count := 10 - for i := 0; i < b.N; i++ { - var content []rune - for c := 0; c < randomdata.Number(5, 15); c++ { - char := randomdata.Number(0, 26) + 'a' - content = append(content, rune(byte(char))) - } - data = append(data, (string(content))) - } + // for i := 0; i < b.N; i++ { + // var content []rune + // for c := 0; c < randomdata.Number(5, 15); c++ { + // char := randomdata.Number(0, 26) + 'a' + // content = append(content, rune(byte(char))) + // } + // data = append(data, (string(content))) + // } + + data = Load() b.ResetTimer() b.N = b.N * count @@ -98,19 +160,20 @@ func BenchmarkTried_Put(b *testing.B) { } func BenchmarkTried_Get(b *testing.B) { - + b.StopTimer() var data []string b.N = 1000000 count := 10 - for i := 0; i < b.N; i++ { - var content []rune - for c := 0; c < randomdata.Number(5, 15); c++ { - char := randomdata.Number(0, 26) + 'a' - content = append(content, rune(byte(char))) - } - data = append(data, string(content)) - } + // for i := 0; i < b.N; i++ { + // var content []rune + // for c := 0; c < randomdata.Number(5, 15); c++ { + // char := randomdata.Number(0, 26) + 'a' + // content = append(content, rune(byte(char))) + // } + // data = append(data, string(content)) + // } + data = Load() b.N = b.N * count @@ -119,7 +182,7 @@ func BenchmarkTried_Get(b *testing.B) { tried.Put(v) } - b.ResetTimer() + b.StartTimer() for c := 0; c < count; c++ { for _, v := range data { tried.Get(v) From 6679cba338b17b008b5e0d7c0f276aa7653d5c7e Mon Sep 17 00:00:00 2001 From: eson <474420502@qq.com> Date: Tue, 20 Aug 2019 01:39:14 +0800 Subject: [PATCH 07/12] =?UTF-8?q?TODO:=20Test=20Prefix=20=E6=89=80?= =?UTF-8?q?=E6=9C=89Index2word=E5=87=BD=E6=95=B0=E6=98=AF=E5=90=A6?= =?UTF-8?q?=E6=AD=A3=E7=A1=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tree/tried/tried.go | 41 +++++++++++++++++++----- tree/tried/tried_index.go | 10 +++--- tree/tried/tried_test.go | 66 +++++++++++++++++++++++++++++++++++---- 3 files changed, 99 insertions(+), 18 deletions(-) diff --git a/tree/tried/tried.go b/tree/tried/tried.go index abfe0b8..d0052de 100644 --- a/tree/tried/tried.go +++ b/tree/tried/tried.go @@ -1,5 +1,7 @@ package tried +import "github.com/davecgh/go-spew/spew" + // func (ts TriedString) WordIndex(idx uint) uint { // w := ts[idx] // if w >= 'a' && w <= 'z' { @@ -90,6 +92,10 @@ func (tried *Tried) Has(words string) bool { return tried.Get(words) != nil } +func (tried *Tried) HasPrefix(words string) bool { + return tried.Get(words) != nil +} + func (tried *Tried) Traversal(every func(cidx uint, value interface{}) bool) { var traversal func(*Node) @@ -112,10 +118,31 @@ func (tried *Tried) Traversal(every func(cidx uint, value interface{}) bool) { traversal(root) } -// func (tried *Tried) String() []string { -// var result []string -// tried.Traversal(func(cidx uint, value interface{}) bool { -// result = append(result, spew.) -// }) -// return result -// } +func (tried *Tried) WordsArray() []string { + var result []string + + var traversal func([]rune, *Node) + traversal = func(prefix []rune, cur *Node) { + + for i, n := range cur.data { + if n != nil { + prefix = append(prefix, rune(tried.wiStore.Index2Byte(uint(i)))) + traversal(prefix, n) + if n.value != nil { + result = append(result, string(prefix)) + } + } + } + + } + + if tried.root != nil { + traversal([]rune{}, tried.root) + } + + return result +} + +func (tried *Tried) String() string { + return spew.Sprint(tried.WordsArray()) +} diff --git a/tree/tried/tried_index.go b/tree/tried/tried_index.go index 00c2a80..62a489a 100644 --- a/tree/tried/tried_index.go +++ b/tree/tried/tried_index.go @@ -67,7 +67,7 @@ func indexWordDigital(w uint) byte { // func wordIndexUpperLower(w byte) uint { iw := uint(w) - if iw > 'a' { + if iw >= 'a' { return iw - 'a' } return iw - 'A' + 26 @@ -84,7 +84,7 @@ func indexWordUpperLower(w uint) byte { // func wordIndexLowerDigital(w byte) uint { iw := uint(w) - if iw > 'a' { + if iw >= 'a' { return iw - 'a' } return iw - '0' + 26 @@ -100,7 +100,7 @@ func indexWordLowerDigital(w uint) byte { // func wordIndexUpperDigital(w byte) uint { iw := uint(w) - if iw > 'A' { + if iw >= 'A' { return iw - 'A' } return iw - '0' + 26 @@ -116,9 +116,9 @@ func indexWordUpperDigital(w uint) byte { // func wordIndexUpperLowerDigital(w byte) uint { iw := uint(w) - if iw > 'a' { + if iw >= 'a' { return iw - 'a' - } else if iw > 'A' { + } else if iw >= 'A' { return iw - 'A' + 26 } return iw - '0' + 52 diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index 422c18c..a9585de 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -10,12 +10,9 @@ import ( ) func TestTried_NewWith(t *testing.T) { - tried := NewWithWordType(WordIndex32to126) - words := "~ 23fd " - tried.Put(words) - if tried.Get(words) == nil { - t.Error("should be not nil") - } + + var tried *Tried + var words string tried = NewWithWordType(WordIndexLower) words = "az" @@ -37,6 +34,63 @@ func TestTried_NewWith(t *testing.T) { if tried.Get(words) == nil { t.Error("should be not nil") } + + tried = NewWithWordType(WordIndexUpperDigital) + words = "AZ021365546987" + tried.Put(words) + if tried.Get(words) == nil { + t.Error("should be not nil") + } + + tried = NewWithWordType(WordIndexLowerDigital) + words = "azfdgyjmnbjhkpuizxasd021365546987" + tried.Put(words) + if tried.Get(words) == nil { + t.Error("should be not nil") + } + + tried = NewWithWordType(WordIndexUpperLowerDigital) + words = "AZazsdfsd131209" + tried.Put(words) + if tried.Get(words) == nil { + t.Error("should be not nil") + } + + tried = NewWithWordType(WordIndex256) + words = "21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! 09-阿萨德发生的官方说的对符合规定" + tried.Put(words) + if tried.Get(words) == nil { + t.Error("should be not nil") + } + + tried = NewWithWordType(WordIndex32to126) + words = " 21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! " + tried.Put(words) + if tried.Get(words) == nil { + t.Error("should be not nil") + } +} + +func TestTried_String(t *testing.T) { + var tried *Tried + var wordsCollection []string + var wordsList [][]string + var triedList []*Tried + + triedList = append(triedList, NewWithWordType(WordIndexLower)) + wordsList = append(wordsList, []string{"adazx", "assdfhgnvb", "ewqyiouyasdfmzvxz"}) + + for i := 0; i < len(triedList); i++ { + tried = triedList[i] + wordsCollection = wordsList[i] + for _, words := range wordsCollection { + tried.Put(words) + if tried.Get(words) == nil { + t.Error("should be not nil") + } + } + t.Error(tried.WordsArray()) + } } func TestTried_PutAndGet1(t *testing.T) { From e63bfa5193c5f50807bc380f686e9dcb32174818 Mon Sep 17 00:00:00 2001 From: huangsimin Date: Tue, 20 Aug 2019 11:11:10 +0800 Subject: [PATCH 08/12] =?UTF-8?q?=E5=AE=8C=E6=88=90WordsArray?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tree/tried/tried.go | 6 +- tree/tried/tried_index.go | 14 ++--- tree/tried/tried_test.go | 127 +++++++++++++++++++++++--------------- 3 files changed, 86 insertions(+), 61 deletions(-) diff --git a/tree/tried/tried.go b/tree/tried/tried.go index d0052de..4b511bb 100644 --- a/tree/tried/tried.go +++ b/tree/tried/tried.go @@ -126,10 +126,10 @@ func (tried *Tried) WordsArray() []string { for i, n := range cur.data { if n != nil { - prefix = append(prefix, rune(tried.wiStore.Index2Byte(uint(i)))) - traversal(prefix, n) + nextPrefix := append(prefix, rune(tried.wiStore.Index2Byte(uint(i)))) + traversal(nextPrefix, n) if n.value != nil { - result = append(result, string(prefix)) + result = append(result, string(nextPrefix)) } } } diff --git a/tree/tried/tried_index.go b/tree/tried/tried_index.go index 62a489a..657773a 100644 --- a/tree/tried/tried_index.go +++ b/tree/tried/tried_index.go @@ -76,7 +76,7 @@ func wordIndexUpperLower(w byte) uint { func indexWordUpperLower(w uint) byte { if w >= 26 { - return byte(w) + 'A' + return byte(w) - 26 + 'A' } return byte(w) + 'a' } @@ -92,7 +92,7 @@ func wordIndexLowerDigital(w byte) uint { func indexWordLowerDigital(w uint) byte { if w >= 26 { - return byte(w) + '0' + return byte(w) - 26 + '0' } return byte(w) + 'a' } @@ -108,9 +108,9 @@ func wordIndexUpperDigital(w byte) uint { func indexWordUpperDigital(w uint) byte { if w >= 26 { - return byte(w) + '0' + return byte(w) - 26 + '0' } - return byte(w) + 'a' + return byte(w) + 'A' } // @@ -126,14 +126,14 @@ func wordIndexUpperLowerDigital(w byte) uint { func indexWordUpperLowerDigital(w uint) byte { if w >= 52 { - return byte(w) + '0' + return byte(w) - 52 + '0' } else if w >= 26 { - return byte(w) + 'A' + return byte(w) - 26 + 'A' } return byte(w) + 'a' } -// wordIndex256 all byte +// wordIndex256 all byte 不支持中文 func wordIndex256(w byte) uint { return uint(w) } diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index a9585de..5ec2f7e 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -4,70 +4,55 @@ import ( "bytes" "encoding/gob" "os" + "sort" "testing" + "github.com/davecgh/go-spew/spew" + "github.com/Pallinder/go-randomdata" ) func TestTried_NewWith(t *testing.T) { - var tried *Tried - var words string + var wordsCollection []string + var wordsList [][]string + var triedList []*Tried - tried = NewWithWordType(WordIndexLower) - words = "az" - tried.Put(words) - if tried.Get(words) == nil { - t.Error("should be not nil") - } + triedList = append(triedList, NewWithWordType(WordIndexLower)) + wordsList = append(wordsList, []string{"adazx", "assdfhgnvb", "ewqyiouyasdfmzvxz"}) - tried = NewWithWordType(WordIndexUpper) - words = "AZ" - tried.Put(words) - if tried.Get(words) == nil { - t.Error("should be not nil") - } + triedList = append(triedList, NewWithWordType(WordIndexUpper)) + wordsList = append(wordsList, []string{"ADFSZ", "DEFASEWRQWER", "GFHJERQWREWTNBVFGFH"}) - tried = NewWithWordType(WordIndexUpperLower) - words = "AZazsdfsd" - tried.Put(words) - if tried.Get(words) == nil { - t.Error("should be not nil") - } + triedList = append(triedList, NewWithWordType(WordIndexUpperLower)) + wordsList = append(wordsList, []string{"adazxAZDSAFASZRETHGFTUIPK", "assdfhgDSFGnvb", "yaXZLMPOIQsdGHFfmFBzvxz"}) - tried = NewWithWordType(WordIndexUpperDigital) - words = "AZ021365546987" - tried.Put(words) - if tried.Get(words) == nil { - t.Error("should be not nil") - } + triedList = append(triedList, NewWithWordType(WordIndexUpperDigital)) + wordsList = append(wordsList, []string{"AZ3428934470193", "ZPQPDEK09876543629812", "AZEWIRU0192456FDEWR9032"}) - tried = NewWithWordType(WordIndexLowerDigital) - words = "azfdgyjmnbjhkpuizxasd021365546987" - tried.Put(words) - if tried.Get(words) == nil { - t.Error("should be not nil") - } + triedList = append(triedList, NewWithWordType(WordIndexLowerDigital)) + wordsList = append(wordsList, []string{"az3428934470193", "zpqwe0987654362sf9812", "az21301az09azdstr540"}) - tried = NewWithWordType(WordIndexUpperLowerDigital) - words = "AZazsdfsd131209" - tried.Put(words) - if tried.Get(words) == nil { - t.Error("should be not nil") - } + triedList = append(triedList, NewWithWordType(WordIndexUpperLowerDigital)) + wordsList = append(wordsList, []string{"azAZ09", "aRGFDSFDSzAasdZ06789", "A28374JHFudfsu09qwzzdsw874FDSAZfer"}) - tried = NewWithWordType(WordIndex256) - words = "21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! 09-阿萨德发生的官方说的对符合规定" - tried.Put(words) - if tried.Get(words) == nil { - t.Error("should be not nil") - } + triedList = append(triedList, NewWithWordType(WordIndex256)) + wordsList = append(wordsList, []string{"21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! 09-阿萨德发生的官方说的对符合规定", "符号!@$*#))(#*", "╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳▁▂▃▄▅▆▇█ ▉ ▊▋▌▍▎▏"}) - tried = NewWithWordType(WordIndex32to126) - words = " 21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! " - tried.Put(words) - if tried.Get(words) == nil { - t.Error("should be not nil") + triedList = append(triedList, NewWithWordType(WordIndex32to126)) + wordsList = append(wordsList, []string{" 21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! ", "AZaz09~ dys!@#$)(*^$#", "<>.,?/"}) + + for i := 0; i < len(triedList); i++ { + tried = triedList[i] + wordsCollection = wordsList[i] + for _, words := range wordsCollection { + tried.Put(words) + + if tried.Get(words) == nil { + t.Error("should be not nil the type is ", tried.wiStore.Type) + } + } + // t.Error(tried.WordsArray()) } } @@ -80,16 +65,56 @@ func TestTried_String(t *testing.T) { triedList = append(triedList, NewWithWordType(WordIndexLower)) wordsList = append(wordsList, []string{"adazx", "assdfhgnvb", "ewqyiouyasdfmzvxz"}) + triedList = append(triedList, NewWithWordType(WordIndexUpper)) + wordsList = append(wordsList, []string{"ADFSZ", "DEFASEWRQWER", "GFHJERQWREWTNBVFGFH"}) + + triedList = append(triedList, NewWithWordType(WordIndexUpperLower)) + wordsList = append(wordsList, []string{"adazxAZDSAFASZRETHGFTUIPK", "assdfhgDSFGnvb", "yaXZLMPOIQsdGHFfmFBzvxz"}) + + triedList = append(triedList, NewWithWordType(WordIndexUpperDigital)) + wordsList = append(wordsList, []string{"AZ3428934470193", "ZPQPDEK09876543629812", "AZEWIRU0192456FDEWR9032"}) + + triedList = append(triedList, NewWithWordType(WordIndexLowerDigital)) + wordsList = append(wordsList, []string{"az3428934470193", "zpqwe0987654362sf9812", "az21301az09azdstr540"}) + + triedList = append(triedList, NewWithWordType(WordIndexUpperLowerDigital)) + wordsList = append(wordsList, []string{"azAZ09", "aRGFDSFDSzAasdZ06789", "A28374JHFudfsu09qwzzdsw874FDSAZfer"}) + + triedList = append(triedList, NewWithWordType(WordIndex256)) + wordsList = append(wordsList, []string{"21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**!\x01 09-213", "!@$*#))(#*", `\/213dsfsdf`}) + + triedList = append(triedList, NewWithWordType(WordIndex32to126)) + wordsList = append(wordsList, []string{" 21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! ", "AZaz09~ dys!@#$)(*^$#", "<>.,?/"}) + for i := 0; i < len(triedList); i++ { tried = triedList[i] wordsCollection = wordsList[i] for _, words := range wordsCollection { tried.Put(words) if tried.Get(words) == nil { - t.Error("should be not nil") + t.Error("should be not nil the type is ", tried.wiStore.Type) } } - t.Error(tried.WordsArray()) + sort.Slice(wordsCollection, func(i, j int) bool { + if wordsCollection[i] < wordsCollection[j] { + return true + } + return false + }) + + resultArray := tried.WordsArray() + sort.Slice(resultArray, func(i, j int) bool { + if resultArray[i] < resultArray[j] { + return true + } + return false + }) + result1 := spew.Sprint(resultArray) + result2 := spew.Sprint(wordsCollection) + if result1 != result2 { + t.Error(result1, " != ", result2) + } + // t.Error(tried.WordsArray()) } } From ddef34cf20f03d5a91e24050ba3f48055dd82d03 Mon Sep 17 00:00:00 2001 From: huangsimin Date: Tue, 20 Aug 2019 11:13:26 +0800 Subject: [PATCH 09/12] =?UTF-8?q?=E8=A1=A5=E5=85=85=E7=BC=BA=E5=B0=91?= =?UTF-8?q?=E7=9A=84=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tree/tried/tried_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index 5ec2f7e..4e5378f 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -68,6 +68,9 @@ func TestTried_String(t *testing.T) { triedList = append(triedList, NewWithWordType(WordIndexUpper)) wordsList = append(wordsList, []string{"ADFSZ", "DEFASEWRQWER", "GFHJERQWREWTNBVFGFH"}) + triedList = append(triedList, NewWithWordType(WordIndexDigital)) + wordsList = append(wordsList, []string{"093875239457", "09123406534", "0912340846"}) + triedList = append(triedList, NewWithWordType(WordIndexUpperLower)) wordsList = append(wordsList, []string{"adazxAZDSAFASZRETHGFTUIPK", "assdfhgDSFGnvb", "yaXZLMPOIQsdGHFfmFBzvxz"}) From edecf0453e549cb77e3be2cc5aa176bda41d4e31 Mon Sep 17 00:00:00 2001 From: huangsimin Date: Tue, 20 Aug 2019 11:38:24 +0800 Subject: [PATCH 10/12] =?UTF-8?q?tried=20WordsArray=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E4=B8=AD=E6=96=87,=20=E5=B9=B6=E4=B8=94=E5=8D=95=E5=85=83?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=E9=80=9A=E8=BF=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tree/tried/tried.go | 14 ++++++++------ tree/tried/tried_index.go | 2 +- tree/tried/tried_test.go | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tree/tried/tried.go b/tree/tried/tried.go index 4b511bb..4c58fff 100644 --- a/tree/tried/tried.go +++ b/tree/tried/tried.go @@ -46,8 +46,10 @@ func (tried *Tried) Put(words string, values ...interface{}) { cur := tried.root var n *Node - for i := 0; i < len(words); i++ { - w := tried.wiStore.Byte2Index(words[i]) + bytes := []byte(words) + + for i := 0; i < len(bytes); i++ { + w := tried.wiStore.Byte2Index(bytes[i]) if cur.data == nil { cur.data = make([]*Node, tried.wiStore.DataSize) @@ -121,12 +123,12 @@ func (tried *Tried) Traversal(every func(cidx uint, value interface{}) bool) { func (tried *Tried) WordsArray() []string { var result []string - var traversal func([]rune, *Node) - traversal = func(prefix []rune, cur *Node) { + var traversal func([]byte, *Node) + traversal = func(prefix []byte, cur *Node) { for i, n := range cur.data { if n != nil { - nextPrefix := append(prefix, rune(tried.wiStore.Index2Byte(uint(i)))) + nextPrefix := append(prefix, tried.wiStore.Index2Byte(uint(i))) traversal(nextPrefix, n) if n.value != nil { result = append(result, string(nextPrefix)) @@ -137,7 +139,7 @@ func (tried *Tried) WordsArray() []string { } if tried.root != nil { - traversal([]rune{}, tried.root) + traversal([]byte{}, tried.root) } return result diff --git a/tree/tried/tried_index.go b/tree/tried/tried_index.go index 657773a..c73b70b 100644 --- a/tree/tried/tried_index.go +++ b/tree/tried/tried_index.go @@ -133,7 +133,7 @@ func indexWordUpperLowerDigital(w uint) byte { return byte(w) + 'a' } -// wordIndex256 all byte 不支持中文 +// wordIndex256 all byte 支持中文 func wordIndex256(w byte) uint { return uint(w) } diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index 4e5378f..b6b7789 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -84,7 +84,7 @@ func TestTried_String(t *testing.T) { wordsList = append(wordsList, []string{"azAZ09", "aRGFDSFDSzAasdZ06789", "A28374JHFudfsu09qwzzdsw874FDSAZfer"}) triedList = append(triedList, NewWithWordType(WordIndex256)) - wordsList = append(wordsList, []string{"21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**!\x01 09-213", "!@$*#))(#*", `\/213dsfsdf`}) + wordsList = append(wordsList, []string{"21`3tcdbxcf囉hyop打算8901zc[]\\'/?()#$%^&**!\x01 09-213", "的支持中文", "!@$*#)中文)(#*", `\/213dsfsdf`}) triedList = append(triedList, NewWithWordType(WordIndex32to126)) wordsList = append(wordsList, []string{" 21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! ", "AZaz09~ dys!@#$)(*^$#", "<>.,?/"}) From b92f270429eb8c524158ec6650a4bfbfc0d448eb Mon Sep 17 00:00:00 2001 From: huangsimin Date: Tue, 20 Aug 2019 15:03:05 +0800 Subject: [PATCH 11/12] =?UTF-8?q?=E5=89=8D=E7=BC=80=E6=A0=91=20=E5=AE=8C?= =?UTF-8?q?=E6=88=90=E5=9F=BA=E6=9C=AC=E5=8A=9F=E8=83=BD,=20=E5=87=86?= =?UTF-8?q?=E5=A4=87=E6=B7=BB=E5=8A=A0=E4=B8=80=E4=B8=AA=E9=AB=98=E5=BA=A6?= =?UTF-8?q?=E5=B1=9E=E6=80=A7=E8=BE=BE=E5=88=B0=E9=95=BF=E5=BA=A6=E6=A3=80?= =?UTF-8?q?=E7=B4=A2=E7=9A=84=E7=9B=AE=E7=9A=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tree/tried/tried.go | 74 +++++++++++++++++---- tree/tried/tried_test.go | 140 ++++++++++++++++++++++++++++++++++----- 2 files changed, 185 insertions(+), 29 deletions(-) diff --git a/tree/tried/tried.go b/tree/tried/tried.go index 4c58fff..3150406 100644 --- a/tree/tried/tried.go +++ b/tree/tried/tried.go @@ -62,26 +62,20 @@ func (tried *Tried) Put(words string, values ...interface{}) { cur = n } - vlen := len(values) - switch vlen { - case 0: - cur.value = tried - case 1: + if values != nil { cur.value = values[0] - case 2: - // TODO: 执行函数 values[1] 为函数类型 func (cur *Node, value interface{}) ...可以插入, 也可以不插入 - default: - panic("unknow select to do") + } else { + cur.value = tried } - } func (tried *Tried) Get(words string) interface{} { cur := tried.root var n *Node + bytes := []byte(words) - for i := 0; i < len(words); i++ { - w := tried.wiStore.Byte2Index(words[i]) //TODO: 升级Index 函数 + for i := 0; i < len(bytes); i++ { + w := tried.wiStore.Byte2Index(bytes[i]) //TODO: 升级Index 函数 if n = cur.data[w]; n == nil { return nil } @@ -95,7 +89,61 @@ func (tried *Tried) Has(words string) bool { } func (tried *Tried) HasPrefix(words string) bool { - return tried.Get(words) != nil + cur := tried.root + var n *Node + bytes := []byte(words) + + for i := 0; i < len(bytes); i++ { + w := tried.wiStore.Byte2Index(bytes[i]) //TODO: 升级Index 函数 + if n = cur.data[w]; n == nil { + return false + } + cur = n + } + return true +} + +func (tried *Tried) PrefixWords(words string) []string { + cur := tried.root + var n *Node + bytes := []byte(words) + + var header []byte + for i := 0; i < len(bytes); i++ { + curbyte := bytes[i] + header = append(header, curbyte) + w := tried.wiStore.Byte2Index(curbyte) + if n = cur.data[w]; n == nil { + return nil + } + cur = n + } + + var result []string + + var traversal func([]byte, *Node) + traversal = func(prefix []byte, cur *Node) { + + for i, n := range cur.data { + if n != nil { + nextPrefix := append(prefix, tried.wiStore.Index2Byte(uint(i))) + traversal(nextPrefix, n) + if n.value != nil { + result = append(result, string(append(header, nextPrefix...))) + } + } + } + + } + // 拼接头 + if n != nil { + if n.value != nil { + result = append(result, string(header)) + } + traversal([]byte{}, n) + } + + return result } func (tried *Tried) Traversal(every func(cidx uint, value interface{}) bool) { diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index b6b7789..5cbd88a 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -12,6 +12,127 @@ import ( "github.com/Pallinder/go-randomdata" ) +func CompareSliceWithSorted(source, words []string) (bool, string) { + sort.Slice(words, func(i, j int) bool { + if words[i] < words[j] { + return true + } + return false + }) + + // source := tried.WordsArray() + sort.Slice(source, func(i, j int) bool { + if source[i] < source[j] { + return true + } + return false + }) + result1 := spew.Sprint(source) + result2 := spew.Sprint(words) + + if result1 != result2 { + return false, spew.Sprint(result1, " != ", result2) + } + return true, "" +} + +func TestTried_Has(t *testing.T) { + var tried *Tried + tried = NewWithWordType(WordIndexLower) + tried.Put("ads") + tried.Put("zadads") + tried.Put("asdgdf") + if !tried.Has("ads") { + t.Error("ads is exist, but not has") + } + + if !tried.HasPrefix("ad") { + t.Error("ads is exist, but not HasPrefix") + } + + if !tried.HasPrefix("za") { + t.Error("ads is exist, but not HasPrefix") + } + + if tried.HasPrefix("fsdf") { + t.Error("fsdf is not exist, but HasPrefix") + } + + if len(tried.String()) < 10 { + t.Error(tried.WordsArray()) + } +} +func TestTried_PrefixWords(t *testing.T) { + + var tried *Tried + var wordsCollection []string + var input []string + + var wordsList [][]string + var inputParams [][]string + var triedList []*Tried + + triedList = append(triedList, NewWithWordType(WordIndexLower)) + inputParams = append(inputParams, []string{"ad", "adf"}) + wordsList = append(wordsList, []string{"ad", "adfsxzcdas", "adfadsasd"}) + + triedList = append(triedList, NewWithWordType(WordIndexUpper)) + inputParams = append(inputParams, []string{"AD", "ADF"}) + wordsList = append(wordsList, []string{"AD", "ADFSXZCDAS", "ADFADSASD"}) + + triedList = append(triedList, NewWithWordType(WordIndexUpperLower)) + inputParams = append(inputParams, []string{"aD", "aDf"}) + wordsList = append(wordsList, []string{"aDF", "aDfsxzcdas", "aDfadsasd"}) + + triedList = append(triedList, NewWithWordType(WordIndexUpperDigital)) + inputParams = append(inputParams, []string{"A09D", "A09DF"}) + wordsList = append(wordsList, []string{"A09D", "A09DFSXZCD312AS", "A09DFA32DSASD"}) + + triedList = append(triedList, NewWithWordType(WordIndexLowerDigital)) + inputParams = append(inputParams, []string{"a09d", "a09df"}) + wordsList = append(wordsList, []string{"a09d", "a09dfsxzcd312as", "a09dfa32dsasd"}) + + triedList = append(triedList, NewWithWordType(WordIndexUpperLowerDigital)) + inputParams = append(inputParams, []string{"A09d", "A09dZ"}) + wordsList = append(wordsList, []string{"A09d", "A09dZsxzcd312as", "A09dZa32dsasd"}) + + triedList = append(triedList, NewWithWordType(WordIndex256)) + inputParams = append(inputParams, []string{"阿萨德", "阿萨德!"}) + wordsList = append(wordsList, []string{"阿萨德", "阿萨德!@$*#))(#*", "阿萨德!╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳▁▂▃▄▅▆▇█ ▉ ▊▋▌▍▎▏"}) + + triedList = append(triedList, NewWithWordType(WordIndex32to126)) + inputParams = append(inputParams, []string{" `", " `<"}) + wordsList = append(wordsList, []string{" `21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! ", " `.,?/"}) + + for i := 0; i < len(triedList); i++ { + tried = triedList[i] + input = inputParams[i] + wordsCollection = wordsList[i] + for _, words := range wordsCollection { + tried.Put(words) + } + var prefixWords []string + prefixWords = tried.PrefixWords(input[0]) + if ok, errorResult := CompareSliceWithSorted(prefixWords, wordsCollection); !ok { + t.Error(errorResult) + } + + prefixWords = tried.PrefixWords(input[1]) + if ok, _ := CompareSliceWithSorted(prefixWords, wordsCollection); ok { + t.Error("should be not ok") + } + if len(prefixWords) != 2 { + t.Error(prefixWords, " Size of Array should be 2") + } + + if ok, errorResult := CompareSliceWithSorted(prefixWords, wordsCollection[1:]); !ok { + t.Error(errorResult) + } + + // t.Error(tried.WordsArray()) + } +} + func TestTried_NewWith(t *testing.T) { var tried *Tried var wordsCollection []string @@ -98,25 +219,12 @@ func TestTried_String(t *testing.T) { t.Error("should be not nil the type is ", tried.wiStore.Type) } } - sort.Slice(wordsCollection, func(i, j int) bool { - if wordsCollection[i] < wordsCollection[j] { - return true - } - return false - }) resultArray := tried.WordsArray() - sort.Slice(resultArray, func(i, j int) bool { - if resultArray[i] < resultArray[j] { - return true - } - return false - }) - result1 := spew.Sprint(resultArray) - result2 := spew.Sprint(wordsCollection) - if result1 != result2 { - t.Error(result1, " != ", result2) + if ok, errorResult := CompareSliceWithSorted(resultArray, wordsCollection); !ok { + t.Error(errorResult) } + // t.Error(tried.WordsArray()) } } From c7dd13034213bf0c048bd32a204365d530716d55 Mon Sep 17 00:00:00 2001 From: huangsimin Date: Tue, 20 Aug 2019 15:37:35 +0800 Subject: [PATCH 12/12] =?UTF-8?q?Tried=20=E5=9F=BA=E6=9C=AC=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E5=AE=8C=E6=88=90.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tree/tried/tried.go | 1 - tree/tried/tried_test.go | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tree/tried/tried.go b/tree/tried/tried.go index 3150406..a4b5e0a 100644 --- a/tree/tried/tried.go +++ b/tree/tried/tried.go @@ -27,7 +27,6 @@ type Node struct { func New() *Tried { tried := &Tried{} tried.root = new(Node) - tried.wiStore = WordIndexDict[WordIndexLower] return tried } diff --git a/tree/tried/tried_test.go b/tree/tried/tried_test.go index 5cbd88a..a92e15f 100644 --- a/tree/tried/tried_test.go +++ b/tree/tried/tried_test.go @@ -317,7 +317,10 @@ func TesStoreData(t *testing.T) { func Load() []string { var result []string - f, _ := os.Open("tried.log") + f, err := os.Open("tried.log") + if err != nil { + panic("先执行TesStoreData 然后再测试Benchmark") + } gob.NewDecoder(f).Decode(&result) return result }