Merge branch 'feature/todo-trie' into develop

This commit is contained in:
huangsimin 2019-08-20 15:38:08 +08:00
commit 2f0c0e8d89
4 changed files with 734 additions and 0 deletions

1
go.sum
View File

@ -4,3 +4,4 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg=
github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o=
github.com/petar/GoLLRB v0.0.0-20190514000832-33fb24c13b99 h1:KcEvVBAvyHkUdFAygKAzwB6LAcZ6LS32WHmRD2VyXMI=

197
tree/tried/tried.go Normal file
View File

@ -0,0 +1,197 @@
package tried
import "github.com/davecgh/go-spew/spew"
// func (ts TriedString) WordIndex(idx uint) uint {
// w := ts[idx]
// if w >= 'a' && w <= 'z' {
// return uint(w) - 'a'
// } else if w >= 'A' && w <= 'Z' {
// return uint(w) - 'A' + 26
// } else {
// return uint(w) - '0' + 52
// }
// }
type Tried struct {
root *Node
wiStore *wordIndexStore
}
type Node struct {
data []*Node
value interface{}
}
// New 默认 WordIndexLower 意味着只支持小写
func New() *Tried {
tried := &Tried{}
tried.root = new(Node)
tried.wiStore = WordIndexDict[WordIndexLower]
return tried
}
// NewWithWordType 选择单词的类型 WordIndexLower 意味着只支持小写
func NewWithWordType(t WordIndexType) *Tried {
tried := &Tried{}
tried.root = new(Node)
tried.wiStore = WordIndexDict[t]
return tried
}
func (tried *Tried) Put(words string, values ...interface{}) {
cur := tried.root
var n *Node
bytes := []byte(words)
for i := 0; i < len(bytes); i++ {
w := tried.wiStore.Byte2Index(bytes[i])
if cur.data == nil {
cur.data = make([]*Node, tried.wiStore.DataSize)
}
if n = cur.data[w]; n == nil {
n = new(Node)
cur.data[w] = n
}
cur = n
}
if values != nil {
cur.value = values[0]
} else {
cur.value = tried
}
}
func (tried *Tried) Get(words string) interface{} {
cur := tried.root
var n *Node
bytes := []byte(words)
for i := 0; i < len(bytes); i++ {
w := tried.wiStore.Byte2Index(bytes[i]) //TODO: 升级Index 函数
if n = cur.data[w]; n == nil {
return nil
}
cur = n
}
return n.value
}
func (tried *Tried) Has(words string) bool {
return tried.Get(words) != nil
}
func (tried *Tried) HasPrefix(words string) bool {
cur := tried.root
var n *Node
bytes := []byte(words)
for i := 0; i < len(bytes); i++ {
w := tried.wiStore.Byte2Index(bytes[i]) //TODO: 升级Index 函数
if n = cur.data[w]; n == nil {
return false
}
cur = n
}
return true
}
func (tried *Tried) PrefixWords(words string) []string {
cur := tried.root
var n *Node
bytes := []byte(words)
var header []byte
for i := 0; i < len(bytes); i++ {
curbyte := bytes[i]
header = append(header, curbyte)
w := tried.wiStore.Byte2Index(curbyte)
if n = cur.data[w]; n == nil {
return nil
}
cur = n
}
var result []string
var traversal func([]byte, *Node)
traversal = func(prefix []byte, cur *Node) {
for i, n := range cur.data {
if n != nil {
nextPrefix := append(prefix, tried.wiStore.Index2Byte(uint(i)))
traversal(nextPrefix, n)
if n.value != nil {
result = append(result, string(append(header, nextPrefix...)))
}
}
}
}
// 拼接头
if n != nil {
if n.value != nil {
result = append(result, string(header))
}
traversal([]byte{}, n)
}
return result
}
func (tried *Tried) Traversal(every func(cidx uint, value interface{}) bool) {
var traversal func(*Node)
traversal = func(cur *Node) {
if cur != nil {
for i, n := range cur.data {
if n != nil {
if n.value != nil {
if !every(uint(i), n.value) {
return
}
}
traversal(n)
}
}
}
}
root := tried.root
traversal(root)
}
func (tried *Tried) WordsArray() []string {
var result []string
var traversal func([]byte, *Node)
traversal = func(prefix []byte, cur *Node) {
for i, n := range cur.data {
if n != nil {
nextPrefix := append(prefix, tried.wiStore.Index2Byte(uint(i)))
traversal(nextPrefix, n)
if n.value != nil {
result = append(result, string(nextPrefix))
}
}
}
}
if tried.root != nil {
traversal([]byte{}, tried.root)
}
return result
}
func (tried *Tried) String() string {
return spew.Sprint(tried.WordsArray())
}

152
tree/tried/tried_index.go Normal file
View File

@ -0,0 +1,152 @@
package tried
var WordIndexDict map[WordIndexType]*wordIndexStore
func init() {
WordIndexDict = make(map[WordIndexType]*wordIndexStore)
WordIndexDict[WordIndexLower] = &wordIndexStore{WordIndexLower, wordIndexLower, indexWordLower, 26}
WordIndexDict[WordIndexUpper] = &wordIndexStore{WordIndexUpper, wordIndexUpper, indexWordUpper, 26}
WordIndexDict[WordIndexDigital] = &wordIndexStore{WordIndexDigital, wordIndexDigital, indexWordDigital, 10}
WordIndexDict[WordIndexUpperLower] = &wordIndexStore{WordIndexUpperLower, wordIndexUpperLower, indexWordUpperLower, 52}
WordIndexDict[WordIndexLowerDigital] = &wordIndexStore{WordIndexLowerDigital, wordIndexLowerDigital, indexWordLowerDigital, 36}
WordIndexDict[WordIndexUpperDigital] = &wordIndexStore{WordIndexUpperDigital, wordIndexUpperDigital, indexWordUpperDigital, 36}
WordIndexDict[WordIndexUpperLowerDigital] = &wordIndexStore{WordIndexUpperLowerDigital, wordIndexUpperLowerDigital, indexWordUpperLowerDigital, 62}
WordIndexDict[WordIndex256] = &wordIndexStore{WordIndex256, wordIndex256, indexWord256, 256}
WordIndexDict[WordIndex32to126] = &wordIndexStore{WordIndex32to126, wordIndex32to126, indexWord32to126, ('~' - ' ' + 1)}
}
// WordIndexType 单词统计的类型 eg. WordIndexLower 意味Put的单词只支持小写...
type WordIndexType int
const (
_ WordIndexType = iota
WordIndexLower
WordIndexUpper
WordIndexDigital
WordIndexUpperLower
WordIndexLowerDigital
WordIndexUpperDigital
WordIndexUpperLowerDigital
WordIndex256
WordIndex32to126
)
type wordIndexStore struct {
Type WordIndexType
Byte2Index func(byte) uint
Index2Byte func(uint) byte
DataSize uint
}
func wordIndexLower(w byte) uint {
return uint(w) - 'a'
}
func indexWordLower(w uint) byte {
return byte(w) + 'a'
}
//
func wordIndexUpper(w byte) uint {
return uint(w) - 'A'
}
func indexWordUpper(w uint) byte {
return byte(w) + 'A'
}
//
func wordIndexDigital(w byte) uint {
return uint(w) - '0'
}
func indexWordDigital(w uint) byte {
return byte(w) + '0'
}
//
func wordIndexUpperLower(w byte) uint {
iw := uint(w)
if iw >= 'a' {
return iw - 'a'
}
return iw - 'A' + 26
}
func indexWordUpperLower(w uint) byte {
if w >= 26 {
return byte(w) - 26 + 'A'
}
return byte(w) + 'a'
}
//
func wordIndexLowerDigital(w byte) uint {
iw := uint(w)
if iw >= 'a' {
return iw - 'a'
}
return iw - '0' + 26
}
func indexWordLowerDigital(w uint) byte {
if w >= 26 {
return byte(w) - 26 + '0'
}
return byte(w) + 'a'
}
//
func wordIndexUpperDigital(w byte) uint {
iw := uint(w)
if iw >= 'A' {
return iw - 'A'
}
return iw - '0' + 26
}
func indexWordUpperDigital(w uint) byte {
if w >= 26 {
return byte(w) - 26 + '0'
}
return byte(w) + 'A'
}
//
func wordIndexUpperLowerDigital(w byte) uint {
iw := uint(w)
if iw >= 'a' {
return iw - 'a'
} else if iw >= 'A' {
return iw - 'A' + 26
}
return iw - '0' + 52
}
func indexWordUpperLowerDigital(w uint) byte {
if w >= 52 {
return byte(w) - 52 + '0'
} else if w >= 26 {
return byte(w) - 26 + 'A'
}
return byte(w) + 'a'
}
// wordIndex256 all byte 支持中文
func wordIndex256(w byte) uint {
return uint(w)
}
func indexWord256(w uint) byte {
return byte(w)
}
// wordIndex32to126 空格-~ 0-9 a-z A-Z 符号等
func wordIndex32to126(w byte) uint {
return uint(w) - ' '
}
func indexWord32to126(w uint) byte {
return byte(w) + ' '
}

384
tree/tried/tried_test.go Normal file
View File

@ -0,0 +1,384 @@
package tried
import (
"bytes"
"encoding/gob"
"os"
"sort"
"testing"
"github.com/davecgh/go-spew/spew"
"github.com/Pallinder/go-randomdata"
)
func CompareSliceWithSorted(source, words []string) (bool, string) {
sort.Slice(words, func(i, j int) bool {
if words[i] < words[j] {
return true
}
return false
})
// source := tried.WordsArray()
sort.Slice(source, func(i, j int) bool {
if source[i] < source[j] {
return true
}
return false
})
result1 := spew.Sprint(source)
result2 := spew.Sprint(words)
if result1 != result2 {
return false, spew.Sprint(result1, " != ", result2)
}
return true, ""
}
func TestTried_Has(t *testing.T) {
var tried *Tried
tried = NewWithWordType(WordIndexLower)
tried.Put("ads")
tried.Put("zadads")
tried.Put("asdgdf")
if !tried.Has("ads") {
t.Error("ads is exist, but not has")
}
if !tried.HasPrefix("ad") {
t.Error("ads is exist, but not HasPrefix")
}
if !tried.HasPrefix("za") {
t.Error("ads is exist, but not HasPrefix")
}
if tried.HasPrefix("fsdf") {
t.Error("fsdf is not exist, but HasPrefix")
}
if len(tried.String()) < 10 {
t.Error(tried.WordsArray())
}
}
func TestTried_PrefixWords(t *testing.T) {
var tried *Tried
var wordsCollection []string
var input []string
var wordsList [][]string
var inputParams [][]string
var triedList []*Tried
triedList = append(triedList, NewWithWordType(WordIndexLower))
inputParams = append(inputParams, []string{"ad", "adf"})
wordsList = append(wordsList, []string{"ad", "adfsxzcdas", "adfadsasd"})
triedList = append(triedList, NewWithWordType(WordIndexUpper))
inputParams = append(inputParams, []string{"AD", "ADF"})
wordsList = append(wordsList, []string{"AD", "ADFSXZCDAS", "ADFADSASD"})
triedList = append(triedList, NewWithWordType(WordIndexUpperLower))
inputParams = append(inputParams, []string{"aD", "aDf"})
wordsList = append(wordsList, []string{"aDF", "aDfsxzcdas", "aDfadsasd"})
triedList = append(triedList, NewWithWordType(WordIndexUpperDigital))
inputParams = append(inputParams, []string{"A09D", "A09DF"})
wordsList = append(wordsList, []string{"A09D", "A09DFSXZCD312AS", "A09DFA32DSASD"})
triedList = append(triedList, NewWithWordType(WordIndexLowerDigital))
inputParams = append(inputParams, []string{"a09d", "a09df"})
wordsList = append(wordsList, []string{"a09d", "a09dfsxzcd312as", "a09dfa32dsasd"})
triedList = append(triedList, NewWithWordType(WordIndexUpperLowerDigital))
inputParams = append(inputParams, []string{"A09d", "A09dZ"})
wordsList = append(wordsList, []string{"A09d", "A09dZsxzcd312as", "A09dZa32dsasd"})
triedList = append(triedList, NewWithWordType(WordIndex256))
inputParams = append(inputParams, []string{"阿萨德", "阿萨德!"})
wordsList = append(wordsList, []string{"阿萨德", "阿萨德!@$*#))(#*", "阿萨德!╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳▁▂▃▄▅▆▇█ ▉ ▊▋▌▍▎▏"})
triedList = append(triedList, NewWithWordType(WordIndex32to126))
inputParams = append(inputParams, []string{" `", " `<"})
wordsList = append(wordsList, []string{" `21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! ", " `<AZaz09~ dys!@#$)(*^$#", " `<>.,?/"})
for i := 0; i < len(triedList); i++ {
tried = triedList[i]
input = inputParams[i]
wordsCollection = wordsList[i]
for _, words := range wordsCollection {
tried.Put(words)
}
var prefixWords []string
prefixWords = tried.PrefixWords(input[0])
if ok, errorResult := CompareSliceWithSorted(prefixWords, wordsCollection); !ok {
t.Error(errorResult)
}
prefixWords = tried.PrefixWords(input[1])
if ok, _ := CompareSliceWithSorted(prefixWords, wordsCollection); ok {
t.Error("should be not ok")
}
if len(prefixWords) != 2 {
t.Error(prefixWords, " Size of Array should be 2")
}
if ok, errorResult := CompareSliceWithSorted(prefixWords, wordsCollection[1:]); !ok {
t.Error(errorResult)
}
// t.Error(tried.WordsArray())
}
}
func TestTried_NewWith(t *testing.T) {
var tried *Tried
var wordsCollection []string
var wordsList [][]string
var triedList []*Tried
triedList = append(triedList, NewWithWordType(WordIndexLower))
wordsList = append(wordsList, []string{"adazx", "assdfhgnvb", "ewqyiouyasdfmzvxz"})
triedList = append(triedList, NewWithWordType(WordIndexUpper))
wordsList = append(wordsList, []string{"ADFSZ", "DEFASEWRQWER", "GFHJERQWREWTNBVFGFH"})
triedList = append(triedList, NewWithWordType(WordIndexUpperLower))
wordsList = append(wordsList, []string{"adazxAZDSAFASZRETHGFTUIPK", "assdfhgDSFGnvb", "yaXZLMPOIQsdGHFfmFBzvxz"})
triedList = append(triedList, NewWithWordType(WordIndexUpperDigital))
wordsList = append(wordsList, []string{"AZ3428934470193", "ZPQPDEK09876543629812", "AZEWIRU0192456FDEWR9032"})
triedList = append(triedList, NewWithWordType(WordIndexLowerDigital))
wordsList = append(wordsList, []string{"az3428934470193", "zpqwe0987654362sf9812", "az21301az09azdstr540"})
triedList = append(triedList, NewWithWordType(WordIndexUpperLowerDigital))
wordsList = append(wordsList, []string{"azAZ09", "aRGFDSFDSzAasdZ06789", "A28374JHFudfsu09qwzzdsw874FDSAZfer"})
triedList = append(triedList, NewWithWordType(WordIndex256))
wordsList = append(wordsList, []string{"21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! 09-阿萨德发生的官方说的对符合规定", "符号!@$*#))(#*", "╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳▁▂▃▄▅▆▇█ ▉ ▊▋▌▍▎▏"})
triedList = append(triedList, NewWithWordType(WordIndex32to126))
wordsList = append(wordsList, []string{" 21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! ", "AZaz09~ dys!@#$)(*^$#", "<>.,?/"})
for i := 0; i < len(triedList); i++ {
tried = triedList[i]
wordsCollection = wordsList[i]
for _, words := range wordsCollection {
tried.Put(words)
if tried.Get(words) == nil {
t.Error("should be not nil the type is ", tried.wiStore.Type)
}
}
// t.Error(tried.WordsArray())
}
}
func TestTried_String(t *testing.T) {
var tried *Tried
var wordsCollection []string
var wordsList [][]string
var triedList []*Tried
triedList = append(triedList, NewWithWordType(WordIndexLower))
wordsList = append(wordsList, []string{"adazx", "assdfhgnvb", "ewqyiouyasdfmzvxz"})
triedList = append(triedList, NewWithWordType(WordIndexUpper))
wordsList = append(wordsList, []string{"ADFSZ", "DEFASEWRQWER", "GFHJERQWREWTNBVFGFH"})
triedList = append(triedList, NewWithWordType(WordIndexDigital))
wordsList = append(wordsList, []string{"093875239457", "09123406534", "0912340846"})
triedList = append(triedList, NewWithWordType(WordIndexUpperLower))
wordsList = append(wordsList, []string{"adazxAZDSAFASZRETHGFTUIPK", "assdfhgDSFGnvb", "yaXZLMPOIQsdGHFfmFBzvxz"})
triedList = append(triedList, NewWithWordType(WordIndexUpperDigital))
wordsList = append(wordsList, []string{"AZ3428934470193", "ZPQPDEK09876543629812", "AZEWIRU0192456FDEWR9032"})
triedList = append(triedList, NewWithWordType(WordIndexLowerDigital))
wordsList = append(wordsList, []string{"az3428934470193", "zpqwe0987654362sf9812", "az21301az09azdstr540"})
triedList = append(triedList, NewWithWordType(WordIndexUpperLowerDigital))
wordsList = append(wordsList, []string{"azAZ09", "aRGFDSFDSzAasdZ06789", "A28374JHFudfsu09qwzzdsw874FDSAZfer"})
triedList = append(triedList, NewWithWordType(WordIndex256))
wordsList = append(wordsList, []string{"21`3tcdbxcf囉hyop打算8901zc[]\\'/?()#$%^&**!\x01 09-213", "的支持中文", "!@$*#)中文)(#*", `\/213dsfsdf`})
triedList = append(triedList, NewWithWordType(WordIndex32to126))
wordsList = append(wordsList, []string{" 21`3tcdbxcfhyop8901zc[]\\'/?()#$%^&**! ", "AZaz09~ dys!@#$)(*^$#", "<>.,?/"})
for i := 0; i < len(triedList); i++ {
tried = triedList[i]
wordsCollection = wordsList[i]
for _, words := range wordsCollection {
tried.Put(words)
if tried.Get(words) == nil {
t.Error("should be not nil the type is ", tried.wiStore.Type)
}
}
resultArray := tried.WordsArray()
if ok, errorResult := CompareSliceWithSorted(resultArray, wordsCollection); !ok {
t.Error(errorResult)
}
// t.Error(tried.WordsArray())
}
}
func TestTried_PutAndGet1(t *testing.T) {
tried := New()
tried.Put(("asdf"))
tried.Put(("hehe"), "hehe")
tried.Put(("xixi"), 3)
var result interface{}
result = tried.Get("asdf")
if result != tried {
t.Error("result should be 3")
}
result = tried.Get("xixi")
if result != 3 {
t.Error("result should be 3")
}
result = tried.Get("hehe")
if result != "hehe" {
t.Error("result should be hehe")
}
result = tried.Get("haha")
if result != nil {
t.Error("result should be nil")
}
result = tried.Get("b")
if result != nil {
t.Error("result should be nil")
}
}
func TestTried_Traversal(t *testing.T) {
tried := New()
tried.Put("asdf")
tried.Put(("abdf"), "ab")
tried.Put(("hehe"), "hehe")
tried.Put(("xixi"), 3)
var result []interface{}
tried.Traversal(func(idx uint, v interface{}) bool {
// t.Error(idx, v)
result = append(result, v)
return true
})
if result[0] != "ab" {
t.Error(result[0])
}
if result[1] != tried {
t.Error(result[1])
}
if result[2] != "hehe" {
t.Error(result[2])
}
if result[3] != 3 {
t.Error(result[3])
}
}
func TesStoreData(t *testing.T) {
var l []string
const N = 1000000
for i := 0; i < N; i++ {
var content []rune
for c := 0; c < randomdata.Number(5, 15); c++ {
char := randomdata.Number(0, 26) + 'a'
content = append(content, rune(byte(char)))
}
l = append(l, (string(content)))
}
var result bytes.Buffer
encoder := gob.NewEncoder(&result)
encoder.Encode(l)
lbytes := result.Bytes()
f, _ := os.OpenFile("tried.log", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
f.Write(lbytes)
}
func Load() []string {
var result []string
f, err := os.Open("tried.log")
if err != nil {
panic("先执行TesStoreData 然后再测试Benchmark")
}
gob.NewDecoder(f).Decode(&result)
return result
}
func BenchmarkTried_Put(b *testing.B) {
var data []string
b.N = 1000000
count := 10
// for i := 0; i < b.N; i++ {
// var content []rune
// for c := 0; c < randomdata.Number(5, 15); c++ {
// char := randomdata.Number(0, 26) + 'a'
// content = append(content, rune(byte(char)))
// }
// data = append(data, (string(content)))
// }
data = Load()
b.ResetTimer()
b.N = b.N * count
for c := 0; c < count; c++ {
tried := New()
for _, v := range data {
tried.Put(v)
}
}
}
func BenchmarkTried_Get(b *testing.B) {
b.StopTimer()
var data []string
b.N = 1000000
count := 10
// for i := 0; i < b.N; i++ {
// var content []rune
// for c := 0; c < randomdata.Number(5, 15); c++ {
// char := randomdata.Number(0, 26) + 'a'
// content = append(content, rune(byte(char)))
// }
// data = append(data, string(content))
// }
data = Load()
b.N = b.N * count
tried := New()
for _, v := range data {
tried.Put(v)
}
b.StartTimer()
for c := 0; c < count; c++ {
for _, v := range data {
tried.Get(v)
}
}
}