Files
tsnet-proxy/vendor/github.com/gaissmai/bart/node.go
2025-04-09 01:00:12 +01:00

730 lines
19 KiB
Go

// Copyright (c) 2024 Karl Gaissmaier
// SPDX-License-Identifier: MIT
package bart
import (
"net/netip"
"slices"
"github.com/gaissmai/bart/internal/sparse"
)
const (
strideLen = 8 // octet
maxTreeDepth = 16 // 16 for IPv6
maxNodeChildren = 256 // 256
maxNodePrefixes = 512 // 512
)
// stridePath, max 16 octets deep
type stridePath [maxTreeDepth]uint8
// node is a level node in the multibit-trie.
// A node has prefixes and children, forming the multibit trie.
//
// The prefixes form a complete binary tree, see the artlookup.pdf
// paper in the doc folder to understand the data structure.
//
// In contrast to the ART algorithm, sparse arrays
// (popcount-compressed slices) are used instead of fixed-size arrays.
//
// The array slots are also not pre-allocated (alloted) as described
// in the ART algorithm, fast backtracking with a bitset vector is used
// to get the longest-prefix-match.
//
// The sparse child array recursively spans the trie with a branching factor of 256
// and also records path-compressed leaves in the free node slots.
type node[V any] struct {
// prefixes contains the routes indexed as a complete binary tree with payload V
// with the help of the baseIndex function from the ART algorithm.
prefixes sparse.Array[V]
// children, recursively spans the trie with a branching factor of 256.
//
// Sorry, here we have to use a mixture of generics and interfaces:
//
// Without path compression, the definition would naturally be:
// children sparse.Array[*node[V]]
//
// ... but with path compression the child can now be a node or a path compressed leaf.
//
// With path compression we could define:
// type noder[V any] interface {
// isLeaf[V]() bool
// }
//
// and:
// children sparse.Array[noder[V]]
//
// But we use the empty interface{} instead, by intention, see below!
//
// The empty interface{} consumes less memory and type assertions are faster than
// indirect method calls like node.isLeaf()
children sparse.Array[interface{}]
}
// isEmpty returns true if node has neither prefixes nor children
func (n *node[V]) isEmpty() bool {
return n.prefixes.Len() == 0 && n.children.Len() == 0
}
// leaf is a prefix with value, used as a path compressed child
type leaf[V any] struct {
prefix netip.Prefix
value V
}
// cloneOrCopy, helper function,
// deep copy if v implements the Cloner interface.
func cloneOrCopy[V any](val V) V {
if cloner, ok := any(val).(Cloner[V]); ok {
return cloner.Clone()
}
// just a shallow copy
return val
}
// cloneLeaf returns a clone of the leaf
// if the value implements the Cloner interface.
func (l *leaf[V]) cloneLeaf() *leaf[V] {
return &leaf[V]{l.prefix, cloneOrCopy(l.value)}
}
// insertAtDepth insert a prefix/val into a node tree at depth.
// n must not be nil, prefix must be valid and already in canonical form.
//
// If a path compression has to be resolved because a new value is added
// that collides with a leaf, the compressed leaf is then reinserted
// one depth down in the node trie.
func (n *node[V]) insertAtDepth(pfx netip.Prefix, val V, depth int) (exists bool) {
ip := pfx.Addr()
bits := pfx.Bits()
lastIdx, lastBits := lastOctetIdxAndBits(bits)
octets := ip.AsSlice()
octets = octets[:lastIdx+1]
// find the proper trie node to insert prefix
// start with prefix octet at depth
for ; depth < len(octets); depth++ {
octet := octets[depth]
addr := uint(octet)
// last significant octet: insert/override prefix/val into node
if depth == lastIdx {
return n.prefixes.InsertAt(pfxToIdx(octet, lastBits), val)
}
if !n.children.Test(addr) {
// insert prefix path compressed
return n.children.InsertAt(addr, &leaf[V]{pfx, val})
}
// get the child: node or leaf
switch kid := n.children.MustGet(addr).(type) {
case *node[V]:
n = kid
continue // descend down to next trie level
case *leaf[V]:
// reached a path compressed prefix
// override value in slot if prefixes are equal
if kid.prefix == pfx {
kid.value = val
// exists
return true
}
// create new node
// push the leaf down
// insert new child at current leaf position (addr)
// descend down, replace n with new child
newNode := new(node[V])
newNode.insertAtDepth(kid.prefix, kid.value, depth+1)
n.children.InsertAt(addr, newNode)
n = newNode
default:
panic("logic error, wrong node type")
}
}
panic("unreachable")
}
// insertAtDepthPersist is the immutable version of insertAtDepth.
// All visited nodes are cloned during insertion.
func (n *node[V]) insertAtDepthPersist(pfx netip.Prefix, val V, depth int) (exists bool) {
ip := pfx.Addr()
bits := pfx.Bits()
lastIdx, lastBits := lastOctetIdxAndBits(bits)
octets := ip.AsSlice()
octets = octets[:lastIdx+1]
// find the proper trie node to insert prefix
// start with prefix octet at depth
for ; depth < len(octets); depth++ {
octet := octets[depth]
addr := uint(octet)
// last significant octet: insert/override prefix/val into node
if depth == lastIdx {
return n.prefixes.InsertAt(pfxToIdx(octet, lastBits), val)
}
if !n.children.Test(addr) {
// insert new prefix path compressed
return n.children.InsertAt(addr, &leaf[V]{pfx, val})
}
// get the child: node or leaf, but clone the path down
switch kid := n.children.MustGet(addr).(type) {
case *node[V]:
// proceed to next level
kid = kid.cloneFlat()
n.children.InsertAt(addr, kid)
n = kid
continue // descend down to next trie level
case *leaf[V]:
kid = kid.cloneLeaf()
// override value in slot if prefixes are equal
if kid.prefix == pfx {
// exists
return n.children.InsertAt(addr, &leaf[V]{pfx, val})
}
// create new node
// push the leaf down
// insert new child at current leaf position (addr)
// descend down, replace n with new child
newNode := new(node[V])
newNode.insertAtDepth(kid.prefix, kid.value, depth+1)
n.children.InsertAt(addr, newNode)
n = newNode
default:
panic("logic error, wrong node type")
}
}
panic("unreachable")
}
// purgeAndCompress, purge empty nodes or compress nodes with single prefix or leaf.
func (n *node[V]) purgeAndCompress(parentStack []*node[V], childPath []uint8, is4 bool) {
// unwind the stack
for i := len(parentStack) - 1; i >= 0; i-- {
parent := parentStack[i]
addr := uint(childPath[i])
pfxCount := n.prefixes.Len()
childCount := n.children.Len()
switch {
case n.isEmpty():
// purge empty node
parent.children.DeleteAt(addr)
case pfxCount == 1 && childCount == 0:
// make leaf from prefix idx, shift leaf one level up
// and override current node with new leaf
idx, _ := n.prefixes.FirstSet()
val := n.prefixes.Items[0]
path := stridePath{}
copy(path[:], childPath)
pfx := cidrFromPath(path, i+1, is4, idx)
parent.children.InsertAt(addr, &leaf[V]{pfx, val})
case pfxCount == 0 && childCount == 1:
// if single child is a leaf, shift it up one level
// and override current node with this leaf
if leafPtr, ok := n.children.Items[0].(*leaf[V]); ok {
parent.children.InsertAt(addr, leafPtr)
}
}
n = parent
}
}
// lpmGet does a route lookup for idx in the 8-bit (stride) routing table
// at this depth and returns (baseIdx, value, true) if a matching
// longest prefix exists, or ok=false otherwise.
//
// The prefixes in the stride form a complete binary tree (CBT) using the baseIndex function.
// In contrast to the ART algorithm, I do not use an allotment approach but map
// the backtracking in the CBT by a bitset operation with a precalculated backtracking path
// for the respective idx.
func (n *node[V]) lpmGet(idx uint) (baseIdx uint, val V, ok bool) {
// top is the idx of the longest-prefix-match
if top, ok := n.prefixes.IntersectionTop(lpmLookupTbl[idx]); ok {
return top, n.prefixes.MustGet(top), true
}
// not found (on this level)
return 0, val, false
}
// lpmTest for faster lpm tests without value returns.
func (n *node[V]) lpmTest(idx uint) bool {
return n.prefixes.IntersectsAny(lpmLookupTbl[idx])
}
// cloneRec, clones the node recursive.
func (n *node[V]) cloneRec() *node[V] {
if n == nil {
return nil
}
c := new(node[V])
if n.isEmpty() {
return c
}
// shallow
c.prefixes = *(n.prefixes.Copy())
_, isCloner := any(*new(V)).(Cloner[V])
// deep copy if V implements Cloner[V]
if isCloner {
for i, val := range c.prefixes.Items {
c.prefixes.Items[i] = cloneOrCopy(val)
}
}
// shallow
c.children = *(n.children.Copy())
// deep copy of nodes and leaves
for i, kidAny := range c.children.Items {
switch kid := kidAny.(type) {
case *node[V]:
// clone the child node rec-descent
c.children.Items[i] = kid.cloneRec()
case *leaf[V]:
// deep copy if V implements Cloner[V]
c.children.Items[i] = kid.cloneLeaf()
default:
panic("logic error, wrong node type")
}
}
return c
}
// cloneFlat, copies the node and clone the values in prefixes and path compressed leaves
// if V implements Cloner. Used in the various ...Persist functions.
func (n *node[V]) cloneFlat() *node[V] {
if n == nil {
return nil
}
c := new(node[V])
if n.isEmpty() {
return c
}
// shallow copy
c.prefixes = *(n.prefixes.Copy())
c.children = *(n.children.Copy())
if _, ok := any(*new(V)).(Cloner[V]); !ok {
// if V doesn't implement Cloner[V], return early
return c
}
// deep copy of values in prefixes
for i, val := range c.prefixes.Items {
c.prefixes.Items[i] = cloneOrCopy(val)
}
// deep copy of values in path compressed leaves
for i, kidAny := range c.children.Items {
if kidLeaf, ok := kidAny.(*leaf[V]); ok {
c.children.Items[i] = kidLeaf.cloneLeaf()
}
}
return c
}
// allRec runs recursive the trie, starting at this node and
// the yield function is called for each route entry with prefix and value.
// If the yield function returns false the recursion ends prematurely and the
// false value is propagated.
//
// The iteration order is not defined, just the simplest and fastest recursive implementation.
func (n *node[V]) allRec(path stridePath, depth int, is4 bool, yield func(netip.Prefix, V) bool) bool {
// for all prefixes in this node do ...
allIndices := n.prefixes.AsSlice(make([]uint, 0, maxNodePrefixes))
for _, idx := range allIndices {
cidr := cidrFromPath(path, depth, is4, idx)
// callback for this prefix and val
if !yield(cidr, n.prefixes.MustGet(idx)) {
// early exit
return false
}
}
// for all children (nodes and leaves) in this node do ...
allChildAddrs := n.children.AsSlice(make([]uint, 0, maxNodeChildren))
for i, addr := range allChildAddrs {
switch kid := n.children.Items[i].(type) {
case *node[V]:
// rec-descent with this node
path[depth] = byte(addr)
if !kid.allRec(path, depth+1, is4, yield) {
// early exit
return false
}
case *leaf[V]:
// callback for this leaf
if !yield(kid.prefix, kid.value) {
// early exit
return false
}
default:
panic("logic error, wrong node type")
}
}
return true
}
// allRecSorted runs recursive the trie, starting at node and
// the yield function is called for each route entry with prefix and value.
// The iteration is in prefix sort order.
//
// If the yield function returns false the recursion ends prematurely and the
// false value is propagated.
func (n *node[V]) allRecSorted(path stridePath, depth int, is4 bool, yield func(netip.Prefix, V) bool) bool {
// get slice of all child octets, sorted by addr
allChildAddrs := n.children.AsSlice(make([]uint, 0, maxNodeChildren))
// get slice of all indexes, sorted by idx
allIndices := n.prefixes.AsSlice(make([]uint, 0, maxNodePrefixes))
// sort indices in CIDR sort order
slices.SortFunc(allIndices, cmpIndexRank)
childCursor := 0
// yield indices and childs in CIDR sort order
for _, pfxIdx := range allIndices {
pfxOctet, _ := idxToPfx(pfxIdx)
// yield all childs before idx
for j := childCursor; j < len(allChildAddrs); j++ {
childAddr := allChildAddrs[j]
if childAddr >= uint(pfxOctet) {
break
}
// yield the node (rec-descent) or leaf
switch kid := n.children.Items[j].(type) {
case *node[V]:
path[depth] = byte(childAddr)
if !kid.allRecSorted(path, depth+1, is4, yield) {
return false
}
case *leaf[V]:
if !yield(kid.prefix, kid.value) {
return false
}
default:
panic("logic error, wrong node type")
}
childCursor++
}
// yield the prefix for this idx
cidr := cidrFromPath(path, depth, is4, pfxIdx)
// n.prefixes.Items[i] not possible after sorting allIndices
if !yield(cidr, n.prefixes.MustGet(pfxIdx)) {
return false
}
}
// yield the rest of leaves and nodes (rec-descent)
for j := childCursor; j < len(allChildAddrs); j++ {
addr := allChildAddrs[j]
switch kid := n.children.Items[j].(type) {
case *node[V]:
path[depth] = byte(addr)
if !kid.allRecSorted(path, depth+1, is4, yield) {
return false
}
case *leaf[V]:
if !yield(kid.prefix, kid.value) {
return false
}
default:
panic("logic error, wrong node type")
}
}
return true
}
// unionRec combines two nodes, changing the receiver node.
// If there are duplicate entries, the value is taken from the other node.
// Count duplicate entries to adjust the t.size struct members.
func (n *node[V]) unionRec(o *node[V], depth int) (duplicates int) {
// for all prefixes in other node do ...
allIndices := o.prefixes.AsSlice(make([]uint, 0, maxNodePrefixes))
for i, oIdx := range allIndices {
// insert/overwrite prefix/value from oNode to nNode
exists := n.prefixes.InsertAt(oIdx, o.prefixes.Items[i])
// this prefix is duplicate in n and o
if exists {
duplicates++
}
}
// for all child addrs in other node do ...
allOtherChildAddrs := o.children.AsSlice(make([]uint, 0, maxNodeChildren))
LOOP:
for i, addr := range allOtherChildAddrs {
// 6 possible combinations for this child and other child child
//
// THIS, OTHER:
// ----------
// NULL, node <-- easy, insert at cloned node
// NULL, leaf <-- easy, insert at cloned leaf
// node, node <-- easy, union rec-descent
// node, leaf <-- easy, insert other cloned leaf at depth+1
// leaf, node <-- complex, push this leaf down, union rec-descent
// leaf, leaf <-- complex, push this leaf down, insert other cloned leaf at depth+1
//
// try to get child at same addr from n
thisChild, thisExists := n.children.Get(addr)
if !thisExists {
switch otherChild := o.children.Items[i].(type) {
case *node[V]: // NULL, node
if !thisExists {
n.children.InsertAt(addr, otherChild.cloneRec())
continue LOOP
}
case *leaf[V]: // NULL, leaf
if !thisExists {
n.children.InsertAt(addr, otherChild.cloneLeaf())
continue LOOP
}
default:
panic("logic error, wrong node type")
}
}
switch otherChild := o.children.Items[i].(type) {
case *node[V]:
switch this := thisChild.(type) {
case *node[V]: // node, node
// both childs have node in octet, call union rec-descent on child nodes
duplicates += this.unionRec(otherChild, depth+1)
continue LOOP
case *leaf[V]: // leaf, node
// create new node
nc := new(node[V])
// push this leaf down
nc.insertAtDepth(this.prefix, this.value, depth+1)
// insert new node at current addr
n.children.InsertAt(addr, nc)
// union rec-descent new node with other node
duplicates += nc.unionRec(otherChild, depth+1)
continue LOOP
}
case *leaf[V]:
switch this := thisChild.(type) {
case *node[V]: // node, leaf
clonedLeaf := otherChild.cloneLeaf()
if this.insertAtDepth(clonedLeaf.prefix, clonedLeaf.value, depth+1) {
duplicates++
}
continue LOOP
case *leaf[V]: // leaf, leaf
// create new node
nc := new(node[V])
// push this leaf down
nc.insertAtDepth(this.prefix, this.value, depth+1)
// insert at depth cloned leaf
clonedLeaf := otherChild.cloneLeaf()
if nc.insertAtDepth(clonedLeaf.prefix, clonedLeaf.value, depth+1) {
duplicates++
}
// insert the new node at current addr
n.children.InsertAt(addr, nc)
continue LOOP
}
default:
panic("logic error, wrong node type")
}
}
return duplicates
}
// eachLookupPrefix does an all prefix match in the 8-bit (stride) routing table
// at this depth and calls yield() for any matching CIDR.
func (n *node[V]) eachLookupPrefix(octets []byte, depth int, is4 bool, pfxLen int, yield func(netip.Prefix, V) bool) (ok bool) {
if n.prefixes.Len() == 0 {
return true
}
// octets as array, needed below more than once
var path stridePath
copy(path[:], octets)
// backtracking the CBT
for idx := pfxToIdx(octets[depth], pfxLen); idx > 0; idx >>= 1 {
if n.prefixes.Test(idx) {
val := n.prefixes.MustGet(idx)
cidr := cidrFromPath(path, depth, is4, idx)
if !yield(cidr, val) {
return false
}
}
}
return true
}
// eachSubnet calls yield() for any covered CIDR by parent prefix in natural CIDR sort order.
func (n *node[V]) eachSubnet(octets []byte, depth int, is4 bool, pfxLen int, yield func(netip.Prefix, V) bool) bool {
// octets as array, needed below more than once
var path stridePath
copy(path[:], octets)
pfxFirstAddr := uint(octets[depth])
pfxLastAddr := uint(octets[depth] | ^netMask(pfxLen))
// 1. collect all indices in n covered by prefix
allCoveredIndices := make([]uint, 0, maxNodePrefixes)
for _, idx := range n.prefixes.AsSlice(make([]uint, 0, maxNodePrefixes)) {
thisOctet, thisPfxLen := idxToPfx(idx)
thisFirstAddr := uint(thisOctet)
thisLastAddr := uint(thisOctet | ^netMask(thisPfxLen))
if thisFirstAddr >= pfxFirstAddr && thisLastAddr <= pfxLastAddr {
allCoveredIndices = append(allCoveredIndices, idx)
}
}
// sort indices in CIDR sort order
slices.SortFunc(allCoveredIndices, cmpIndexRank)
// 2. collect all covered child addrs by prefix
allCoveredChildAddrs := make([]uint, 0, maxNodeChildren)
for _, addr := range n.children.AsSlice(make([]uint, 0, maxNodeChildren)) {
if addr >= pfxFirstAddr && addr <= pfxLastAddr {
allCoveredChildAddrs = append(allCoveredChildAddrs, addr)
}
}
// 3. yield covered indices, pathcomp prefixes and childs in CIDR sort order
addrCursor := 0
// yield indices and childs in CIDR sort order
for _, pfxIdx := range allCoveredIndices {
pfxOctet, _ := idxToPfx(pfxIdx)
// yield all childs before idx
for j := addrCursor; j < len(allCoveredChildAddrs); j++ {
addr := allCoveredChildAddrs[j]
if addr >= uint(pfxOctet) {
break
}
// yield the node or leaf?
switch kid := n.children.MustGet(addr).(type) {
case *node[V]:
path[depth] = byte(addr)
if !kid.allRecSorted(path, depth+1, is4, yield) {
return false
}
case *leaf[V]:
if !yield(kid.prefix, kid.value) {
return false
}
default:
panic("logic error, wrong node type")
}
addrCursor++
}
// yield the prefix for this idx
cidr := cidrFromPath(path, depth, is4, pfxIdx)
// n.prefixes.Items[i] not possible after sorting allIndices
if !yield(cidr, n.prefixes.MustGet(pfxIdx)) {
return false
}
}
// yield the rest of leaves and nodes (rec-descent)
for _, addr := range allCoveredChildAddrs[addrCursor:] {
// yield the node or leaf?
switch kid := n.children.MustGet(addr).(type) {
case *node[V]:
path[depth] = byte(addr)
if !kid.allRecSorted(path, depth+1, is4, yield) {
return false
}
case *leaf[V]:
if !yield(kid.prefix, kid.value) {
return false
}
default:
panic("logic error, wrong node type")
}
}
return true
}