Radix Tree
Search
Insert
Insert ‘water’ at the root
Insert ‘slower’ while keeping ‘slow’
Insert ‘test’ which is a prefix of ‘tester’
Insert ‘team’ while splitting ‘test’ and creating a new edge label ‘st’
Insert ‘toast’ while splitting ‘te’ and moving previous strings a level lower
Implement
package main
import (
"fmt"
"math/rand"
"strings"
"time"
)
type node[T any] struct {
children []*node[T]
childrenIndex []byte
key string
value T
hasValue bool
}
func (n *node[T]) get(key string) *node[T] {
if n == nil {
return nil
}
if l := getLongestPublicPrefixIndex(key, n.key); l == len(n.key) { //与node的key全匹配
key = key[l:]
if len(key) == 0 {
return n
}
for i := 0; i < len(n.childrenIndex); i++ {
if n.childrenIndex[i] == key[0] {
return n.children[i].get(key)
}
}
}
return nil
}
/*
To delete a string x from a tree, we first locate the leaf representing x.
Then, assuming x exists, we remove the corresponding leaf node.
If the parent of our leaf node has only one other child,
then that child's incoming label is appended to the parent's incoming label and the child is removed.
*/
func (n *node[T]) delete(key string) (oldV T) {
if n == nil {
return
}
if l := getLongestPublicPrefixIndex(key, n.key); l == len(n.key) {
key = key[l:]
if len(key) == 0 {
oldV = n.value
if len(n.children) == 0 { // 是叶子节点,防止不经过回溯,不被删除
*n = *new(node[T])
}
n.hasValue = false // 非叶子节点 逻辑删除
return
}
for i := 0; i < len(n.childrenIndex); i++ {
if n.childrenIndex[i] == key[0] {
oldV = n.children[i].delete(key)
//回溯,在parent的视角
if len(n.children[i].children) == 0 && !n.children[i].hasValue { //是叶子节点,被逻辑删除,进行物理删除
n.children = append(n.children[:i], n.children[i+1:]...)
n.childrenIndex = append(n.childrenIndex[:i], n.childrenIndex[i+1:]...)
}
if len(n.children) == 1 && n.hasValue == false { // 清理工作,向上扫一遍,有一个孩子的向上合并
n.children[0].key = n.key + n.children[0].key
*n = *n.children[0]
}
return
}
}
}
return
}
func (n *node[T]) set(key string, value T) (oldValue T) {
if l := getLongestPublicPrefixIndex(key, n.key); l == len(n.key) {
key = key[l:]
if len(key) == 0 {
oldValue, n.value = n.value, value
n.hasValue = true
return
}
for i := 0; i < len(n.childrenIndex); i++ {
if n.childrenIndex[i] == key[0] {
return n.children[i].set(key, value)
}
}
} else {
prefix, suffix := n.key[:l], n.key[l:]
child := &node[T]{
children: n.children,
childrenIndex: n.childrenIndex,
key: suffix,
value: n.value,
hasValue: n.hasValue,
}
*n = node[T]{
key: prefix,
children: []*node[T]{child},
childrenIndex: []byte{child.key[0]},
}
key = key[l:]
if len(key) == 0 {
oldValue, n.value = n.value, value
n.hasValue = true
return
}
}
n.children = append(n.children, &node[T]{key: key, value: value, hasValue: true})
n.childrenIndex = append(n.childrenIndex, key[0])
return
}
type RadixTreeMap[T any] struct {
root *node[T]
}
func NewRadixTreeMap[T any]() *RadixTreeMap[T] {
return &RadixTreeMap[T]{}
}
func (t *RadixTreeMap[T]) Get(key string) T {
if len(key) == 0 {
return *new(T)
}
n := t.root.get(key)
if n != nil {
return n.value
}
return *new(T)
}
func (t *RadixTreeMap[T]) Delete(key string) T {
return t.root.delete(key)
}
func (t *RadixTreeMap[T]) Set(key string, value T) (oldValue T) {
if len(key) == 0 {
return
}
if t.root == nil {
t.root = &node[T]{key: key, value: value, hasValue: true}
return
}
return t.root.set(key, value)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func getLongestPublicPrefixIndex(str1, str2 string) int {
minLen := min(len(str1), len(str2))
index := 0
for index < minLen && str1[index] == str2[index] {
index++
}
return index
}
//----------------------- for test -----------------------
func printTree[T any](root *node[T], weight int) {
if root == nil {
return
}
if weight <= 0 {
weight = 1
}
fmt.Println(strings.Repeat("->", weight))
if len(root.key) != 0 {
fmt.Println(string(root.key), ":", root.value)
}
for i := 0; i < len(root.children); i++ {
printTree(root.children[i], weight+1)
}
fmt.Println(strings.Repeat("<-", weight))
}
func generateStringMap(totalCount, eachLength int) (map[string]string, int64) {
b := strings.Builder{}
for i := 'a'; i <= 'z'; i++ {
b.WriteString(string(i))
b.WriteString(string(i - ' '))
}
b.WriteString("0123456789")
dic := b.String()
length := len(dic)
mp := map[string]string{}
total := int64(0)
for i := 0; i < totalCount; i++ {
b1 := strings.Builder{}
//l := rand.Intn(eachLength) + 1
for j := 0; j < eachLength; j++ {
b1.WriteString(string(dic[rand.Intn(length)]))
}
mp[b1.String()] = b1.String()
total += int64(len(b1.String()))
}
return mp, total
}
func test() {
start := time.Now()
mp, total := generateStringMap(1000000, 1024)
fmt.Println("生成样本用时:", time.Since(start))
fmt.Println("样本量:", len(mp))
fmt.Println("样本size:", total*2)
start = time.Now()
t := NewRadixTreeMap[string]()
for k, v := range mp {
if res := t.Set(k, v); res != "" {
panic("Set result is not nil" + string(res) + v)
}
}
fmt.Println("SetAll 用时:", time.Since(start))
start = time.Now()
for k, v := range mp {
if res := t.Get(k); res != v {
panic("Get not equal " + string(res) + v)
}
}
fmt.Println("GetAll 用时:", time.Since(start))
start = time.Now()
for k, v := range mp {
if res := t.Delete(k); res != v {
panic("Delete not equal " + string(res) + v)
}
}
fmt.Println("DeleteAll 用时:", time.Since(start))
printTree(t.root, 5)
fmt.Println(t.root)
}
func main() {
test()
return
t := NewRadixTreeMap[string]()
t.Set("b", "b")
t.Set("f", "f")
t.Set("a", "a")
t.Set("abc", "abc")
t.Set("ab", "ab")
t.Set("abd", "abd")
t.Set("acd", "acd")
fmt.Println(t.Delete("b"))
fmt.Println(t.Delete("a"))
fmt.Println(t.Delete("ab"))
fmt.Println(t.Delete("abc"))
fmt.Println(t.Delete("acd"))
fmt.Println(t.Delete("abd"))
fmt.Println(t.Delete("c"))
fmt.Println(t.Delete("f"))
printTree(t.root, 5)
}
Perfermence
CPU:i5-7200U,双核4线程
生成样本用时: 42.0999128s
样本量: 1000000
样本size: 2048000000
SetAll 用时: 1.5978911s
GetAll 用时: 2.3042968s
DeleteAll 用时: 2.0477653s