2023-04-12 19:36:57 +00:00
|
|
|
"""Bluesky / AT Protocol Merkle search tree implementation.
|
|
|
|
|
|
|
|
* https://atproto.com/guides/data-repos
|
|
|
|
* https://atproto.com/lexicons/com-atproto-sync
|
|
|
|
* https://hal.inria.fr/hal-02303490/document
|
|
|
|
|
|
|
|
Heavily based on:
|
|
|
|
https://github.com/snarfed/atproto/blob/main/packages/repo/src/mst/mst.ts
|
|
|
|
|
|
|
|
Huge thanks to the Bluesky team for working in the public, in open source, and to
|
2023-04-13 04:26:17 +00:00
|
|
|
Daniel Holmgren and Devin Ivy for this code specifically!
|
|
|
|
|
|
|
|
Notable differences:
|
|
|
|
* All in memory, no block storage (yet)
|
|
|
|
X * MST class is mutable, not immutable
|
|
|
|
|
|
|
|
From that file:
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
This is an implementation of a Merkle Search Tree (MST)
|
|
|
|
The data structure is described here: https://hal.inria.fr/hal-02303490/document
|
|
|
|
The MST is an ordered, insert-order-independent, deterministic tree.
|
|
|
|
Keys are laid out in alphabetic order.
|
|
|
|
The key insight of an MST is that each key is hashed and starting 0s are counted
|
|
|
|
to determine which layer it falls on (5 zeros for ~32 fanout).
|
|
|
|
This is a merkle tree, so each subtree is referred to by it's hash (CID).
|
|
|
|
When a leaf is changed, ever tree on the path to that leaf is changed as well,
|
|
|
|
thereby updating the root hash.
|
|
|
|
|
|
|
|
For atproto, we use SHA-256 as the key hashing algorithm, and ~4 fanout
|
|
|
|
(2-bits of zero per layer).
|
|
|
|
|
|
|
|
A couple notes on CBOR encoding:
|
|
|
|
|
|
|
|
There are never two neighboring subtrees.
|
|
|
|
Therefore, we can represent a node as an array of
|
|
|
|
leaves & pointers to their right neighbor (possibly null),
|
|
|
|
along with a pointer to the left-most subtree (also possibly null).
|
|
|
|
|
|
|
|
Most keys in a subtree will have overlap.
|
|
|
|
We do compression on prefixes by describing keys as:
|
|
|
|
* the length of the prefix that it shares in common with the preceding key
|
|
|
|
* the rest of the string
|
|
|
|
|
|
|
|
For example:
|
|
|
|
|
|
|
|
If the first leaf in a tree is `bsky/posts/abcdefg` and the second is
|
|
|
|
`bsky/posts/abcdehi` Then the first will be described as `prefix: 0, key:
|
|
|
|
'bsky/posts/abcdefg'`, and the second will be described as `prefix: 16, key:
|
|
|
|
'hi'.`
|
|
|
|
"""
|
2023-04-13 04:26:17 +00:00
|
|
|
from collections import namedtuple
|
|
|
|
from hashlib import sha256
|
2023-04-12 19:36:57 +00:00
|
|
|
from os.path import commonprefix
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
Entry = namedtuple('Entry', [
|
|
|
|
'p', # int, length of prefix that this key shares with the prev key
|
|
|
|
'k', # bytes, the rest of the key outside the shared prefix
|
|
|
|
'v', # str CID, value
|
|
|
|
't', # str CID, next subtree (to the right of leaf), or None
|
|
|
|
])
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
Data = namedtuple('Data', [
|
|
|
|
'l', # str CID, left-most subtree, or None
|
|
|
|
'e', # list of Entry
|
|
|
|
])
|
|
|
|
|
|
|
|
Leaf = namedtuple('Leaf', [
|
|
|
|
'key', # str, record key ???
|
|
|
|
'value', # CID ???
|
|
|
|
])
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
|
|
|
|
class MST:
|
2023-04-13 04:26:17 +00:00
|
|
|
"""Merkle search tree class.
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
entries: sequence of :class:`MST` and :class:`Leaf`
|
|
|
|
layer: int, this MST's layer in the root MST
|
|
|
|
pointer: :class:`CID`
|
|
|
|
outdated_pointer: boolean, ???
|
|
|
|
"""
|
|
|
|
entries = None
|
|
|
|
layer = None
|
|
|
|
pointer = None
|
|
|
|
outdated_pointer = False
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
# pointer: CID,
|
|
|
|
# entries: NodeEntry[],
|
|
|
|
# layer: number,
|
|
|
|
# ):
|
|
|
|
# assert pointer
|
2023-04-13 04:26:17 +00:00
|
|
|
# self.entries = entries
|
|
|
|
# self.layer = layer
|
|
|
|
# self.pointer = pointer
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def __init__(self):
|
|
|
|
"""Constructor."""
|
|
|
|
# self.pointer = cid_for_entries(entries)
|
|
|
|
# { layer = None } = opts or {}
|
|
|
|
# return MST(pointer, entries, layer)
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# def from_data(
|
2023-04-12 19:36:57 +00:00
|
|
|
# data: NodeData,
|
|
|
|
# opts?: Partial<MstOpts>,
|
|
|
|
# ):
|
|
|
|
# """
|
|
|
|
# Returns:
|
|
|
|
# MST
|
|
|
|
# """
|
|
|
|
# { layer = None } = opts or {}
|
2023-04-13 04:26:17 +00:00
|
|
|
# entries = deserialize_node_data(data, opts)
|
|
|
|
# pointer = cid_for_cbor(data)
|
2023-04-12 19:36:57 +00:00
|
|
|
# return new MST(pointer, entries, layer)
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def __eq__(self, other):
|
|
|
|
if isinstance(other, MST):
|
|
|
|
return self.get_pointer() == other.get_pointer()
|
|
|
|
|
2023-04-12 19:36:57 +00:00
|
|
|
# # Getters (lazy load)
|
|
|
|
# # -------------------
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# We don't want to load entries of every subtree, just the ones we need
|
|
|
|
def get_entries(self):
|
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
sequence of :class:`MST` and :class:`Leaf`
|
|
|
|
"""
|
|
|
|
# if self.entries:
|
|
|
|
# return [...self.entries]
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# if self.pointer:
|
|
|
|
# data = self.storage.read_obj(self.pointer, node_data_def)
|
|
|
|
# first_leaf = data.e[0]
|
2023-04-12 19:36:57 +00:00
|
|
|
# layer =
|
2023-04-13 04:26:17 +00:00
|
|
|
# first_leaf != undefined
|
|
|
|
# ? leading_zeros_on_hash(first_leaf.k)
|
2023-04-12 19:36:57 +00:00
|
|
|
# : undefined
|
2023-04-13 04:26:17 +00:00
|
|
|
# self.entries = deserialize_node_data(self.storage, data, {
|
2023-04-12 19:36:57 +00:00
|
|
|
# layer,
|
|
|
|
# })
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# return self.entries
|
2023-04-12 19:36:57 +00:00
|
|
|
# throw new Error('No entries or CID provided')
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def get_pointer(self):
|
|
|
|
"""Returns this MST's root CID ??? pointer. Calculates it if needed.
|
|
|
|
|
|
|
|
We don't hash the node on every mutation for performance reasons. Instead
|
|
|
|
we keep track of whether the pointer is outdated and only (recursively)
|
|
|
|
calculate when needed.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
:class:`CID`
|
|
|
|
"""
|
|
|
|
# if not self.outdated_pointer:
|
|
|
|
# return self.pointer
|
|
|
|
# entries = self.get_entries()
|
2023-04-12 19:36:57 +00:00
|
|
|
# outdated = entries.filter(
|
2023-04-13 04:26:17 +00:00
|
|
|
# (e) => e.is_tree() and e.outdated_pointer,
|
2023-04-12 19:36:57 +00:00
|
|
|
# ) as MST[]
|
|
|
|
# if outdated.length > 0:
|
2023-04-13 04:26:17 +00:00
|
|
|
# Promise.all(outdated.map((e) => e.get_pointer()))
|
|
|
|
# entries = self.get_entries()
|
|
|
|
# self.pointer = cid_for_entries(entries)
|
|
|
|
# self.outdated_pointer = false
|
|
|
|
# return self.pointer
|
|
|
|
|
|
|
|
def get_layer(self):
|
|
|
|
"""Returns this MST's layer, and sets self.layer.
|
|
|
|
|
|
|
|
In most cases, we get the layer of a node from a hint on creation. In the
|
|
|
|
case of the topmost node in the tree, we look for a key in the node &
|
|
|
|
determine the layer. In the case where we don't find one, we recurse down
|
|
|
|
until we do. If we still can't find one, then we have an empty tree and the
|
|
|
|
node is layer 0.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
int
|
|
|
|
"""
|
|
|
|
# self.layer = self.attempt_get_layer()
|
|
|
|
# if self.layer == None:
|
|
|
|
# self.layer = 0
|
|
|
|
# return self.layer
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def attempt_get_layer(self):
|
|
|
|
"""Returns this MST's layer, and sets self.layer.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
int or None
|
|
|
|
"""
|
|
|
|
# if self.layer != None:
|
|
|
|
# return self.layer
|
|
|
|
# entries = self.get_entries()
|
|
|
|
# layer = layer_for_entries(entries)
|
2023-04-12 19:36:57 +00:00
|
|
|
# if layer == None:
|
|
|
|
# for entry in entries:
|
2023-04-13 04:26:17 +00:00
|
|
|
# if entry.is_tree():
|
|
|
|
# child_layer = entry.attempt_get_layer()
|
|
|
|
# if child_layer != None:
|
|
|
|
# layer = child_layer + 1
|
2023-04-12 19:36:57 +00:00
|
|
|
# break
|
|
|
|
# if layer != None:
|
2023-04-13 04:26:17 +00:00
|
|
|
# self.layer = layer
|
2023-04-12 19:36:57 +00:00
|
|
|
# return layer
|
|
|
|
|
|
|
|
# Core functionality
|
|
|
|
# -------------------
|
|
|
|
|
|
|
|
def add(self, key, value=None, known_zeros=None):
|
2023-04-13 04:26:17 +00:00
|
|
|
"""Adds a new leaf for the given key/value pair.
|
|
|
|
|
2023-04-12 19:36:57 +00:00
|
|
|
Args:
|
|
|
|
key: str
|
|
|
|
value: :class:`CID`
|
|
|
|
known_zeros: int
|
2023-04-13 04:26:17 +00:00
|
|
|
|
|
|
|
Raises:
|
|
|
|
ValueError if a leaf with that key already exists
|
2023-04-12 19:36:57 +00:00
|
|
|
"""
|
|
|
|
ensure_valid_key(key)
|
2023-04-13 04:26:17 +00:00
|
|
|
# key_zeros = known_zeros ?? (leading_zeros_on_hash(key))
|
|
|
|
# layer = self.get_layer()
|
|
|
|
# new_leaf = new Leaf(key, value)
|
|
|
|
# if key_zeros == layer:
|
|
|
|
# # it belongs in self layer
|
|
|
|
# index = self.find_gt_or_equal_leaf_index(key)
|
|
|
|
# found = self.at_index(index)
|
|
|
|
# if found.is_leaf() and found.key == key:
|
2023-04-12 19:36:57 +00:00
|
|
|
# throw new Error(`There is already a value at key: ${key}`)
|
2023-04-13 04:26:17 +00:00
|
|
|
# prev_node = self.at_index(index - 1)
|
|
|
|
# if not prev_node or prev_node.is_leaf():
|
2023-04-12 19:36:57 +00:00
|
|
|
# # if entry before is a leaf, (or we're on far left) we can just splice in
|
2023-04-13 04:26:17 +00:00
|
|
|
# return self.splice_in(new_leaf, index)
|
2023-04-12 19:36:57 +00:00
|
|
|
# else:
|
|
|
|
# # else we try to split the subtree around the key
|
2023-04-13 04:26:17 +00:00
|
|
|
# split_sub_tree = prev_node.split_around(key)
|
|
|
|
# return self.replace_with_split(
|
2023-04-12 19:36:57 +00:00
|
|
|
# index - 1,
|
2023-04-13 04:26:17 +00:00
|
|
|
# split_sub_tree[0],
|
|
|
|
# new_leaf,
|
|
|
|
# split_sub_tree[1],
|
2023-04-12 19:36:57 +00:00
|
|
|
# )
|
2023-04-13 04:26:17 +00:00
|
|
|
# else if key_zeros < layer:
|
2023-04-12 19:36:57 +00:00
|
|
|
# # it belongs on a lower layer
|
2023-04-13 04:26:17 +00:00
|
|
|
# index = self.find_gt_or_equal_leaf_index(key)
|
|
|
|
# prev_node = self.at_index(index - 1)
|
|
|
|
# if prev_node and prev_node.is_tree():
|
2023-04-12 19:36:57 +00:00
|
|
|
# # if entry before is a tree, we add it to that tree
|
2023-04-13 04:26:17 +00:00
|
|
|
# new_subtree = prev_node.add(key, value, key_zeros)
|
|
|
|
# return self.update_entry(index - 1, new_subtree)
|
2023-04-12 19:36:57 +00:00
|
|
|
# else:
|
2023-04-13 04:26:17 +00:00
|
|
|
# sub_tree = self.create_child()
|
|
|
|
# new_sub_tree = sub_tree.add(key, value, key_zeros)
|
|
|
|
# return self.splice_in(new_sub_tree, index)
|
2023-04-12 19:36:57 +00:00
|
|
|
# else:
|
|
|
|
# # it belongs on a higher layer & we must push the rest of the tree down
|
2023-04-13 04:26:17 +00:00
|
|
|
# split = self.split_around(key)
|
2023-04-12 19:36:57 +00:00
|
|
|
# # if the newly added key has >=2 more leading zeros than the current highest layer
|
|
|
|
# # then we need to add in structural nodes in between as well
|
|
|
|
# left: MST | None = split[0]
|
|
|
|
# right: MST | None = split[1]
|
2023-04-13 04:26:17 +00:00
|
|
|
# layer = self.get_layer()
|
|
|
|
# extra_layers_to_add = key_zeros - layer
|
2023-04-12 19:36:57 +00:00
|
|
|
# # intentionally starting at 1, since first layer is taken care of by split
|
2023-04-13 04:26:17 +00:00
|
|
|
# for i in range(1, extra_layers_to_add):
|
2023-04-12 19:36:57 +00:00
|
|
|
# if left != None:
|
2023-04-13 04:26:17 +00:00
|
|
|
# left = left.create_parent()
|
2023-04-12 19:36:57 +00:00
|
|
|
# if right != None:
|
2023-04-13 04:26:17 +00:00
|
|
|
# right = right.create_parent()
|
2023-04-12 19:36:57 +00:00
|
|
|
# updated: NodeEntry[] = []
|
|
|
|
# if left:
|
|
|
|
# updated.push(left)
|
|
|
|
# updated.push(new Leaf(key, value))
|
|
|
|
# if right:
|
|
|
|
# updated.push(right)
|
2023-04-13 04:26:17 +00:00
|
|
|
# new_root = MST.create(updated, {
|
|
|
|
# layer: key_zeros,
|
2023-04-12 19:36:57 +00:00
|
|
|
# })
|
2023-04-13 04:26:17 +00:00
|
|
|
# new_root.outdated_pointer = true
|
|
|
|
# return new_root
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def get(self, key):
|
|
|
|
"""Gets the value at the given key.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
key: str
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
:class:`CID` or None
|
|
|
|
"""
|
|
|
|
# index = self.find_gt_or_equal_leaf_index(key)
|
|
|
|
# found = self.at_index(index)
|
|
|
|
# if found and found.is_leaf() and found.key == key:
|
2023-04-12 19:36:57 +00:00
|
|
|
# return found.value
|
2023-04-13 04:26:17 +00:00
|
|
|
# prev = self.at_index(index - 1)
|
|
|
|
# if prev and prev.is_tree():
|
2023-04-12 19:36:57 +00:00
|
|
|
# return prev.get(key)
|
|
|
|
# return None
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def update(self, key, value):
|
|
|
|
"""Edits the value at the given key
|
|
|
|
|
|
|
|
Args:
|
|
|
|
key: str
|
|
|
|
value: :class:`CID`
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
KeyError if key doesn't exist
|
|
|
|
"""
|
|
|
|
# ensure_valid_key(key)
|
|
|
|
# index = self.find_gt_or_equal_leaf_index(key)
|
|
|
|
# found = self.at_index(index)
|
|
|
|
# if found and found.is_leaf() and found.key == key:
|
|
|
|
# return self.update_entry(index, new Leaf(key, value))
|
|
|
|
# prev = self.at_index(index - 1)
|
|
|
|
# if prev and prev.is_tree():
|
|
|
|
# updated_tree = prev.update(key, value)
|
|
|
|
# return self.update_entry(index - 1, updated_tree)
|
2023-04-12 19:36:57 +00:00
|
|
|
# throw new Error(`Could not find a record with key: ${key}`)
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def delete(self, key):
|
|
|
|
"""Deletes the value at the given key.
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
Args:
|
|
|
|
key: str
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
:class:`MST`
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
KeyError if key doesn't exist
|
|
|
|
"""
|
|
|
|
# altered = self.delete_recurse(key)
|
|
|
|
# return altered.trim_top()
|
|
|
|
|
|
|
|
def delete_recurse(self, key):
|
|
|
|
"""Deletes the value and subtree, if any, at the given key.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
key: str
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
:class:`MST`
|
|
|
|
"""
|
|
|
|
# index = self.find_gt_or_equal_leaf_index(key)
|
|
|
|
# found = self.at_index(index)
|
|
|
|
# # if found, remove it on self level
|
|
|
|
# if found.is_leaf() and found.key == key:
|
|
|
|
# prev = self.at_index(index - 1)
|
|
|
|
# next = self.at_index(index + 1)
|
|
|
|
# if prev.is_tree() and next.is_tree():
|
|
|
|
# merged = prev.append_merge(next)
|
|
|
|
# return self.new_tree([
|
|
|
|
# ...(self.slice(0, index - 1)),
|
2023-04-12 19:36:57 +00:00
|
|
|
# merged,
|
2023-04-13 04:26:17 +00:00
|
|
|
# ...(self.slice(index + 2)),
|
2023-04-12 19:36:57 +00:00
|
|
|
# ])
|
|
|
|
# else:
|
2023-04-13 04:26:17 +00:00
|
|
|
# return self.remove_entry(index)
|
2023-04-12 19:36:57 +00:00
|
|
|
# # else recurse down to find it
|
2023-04-13 04:26:17 +00:00
|
|
|
# prev = self.at_index(index - 1)
|
|
|
|
# if prev.is_tree():
|
|
|
|
# subtree = prev.delete_recurse(key)
|
|
|
|
# sub_tree_entries = subtree.get_entries()
|
|
|
|
# if sub_tree_entries.length == 0:
|
|
|
|
# return self.remove_entry(index - 1)
|
2023-04-12 19:36:57 +00:00
|
|
|
# else:
|
2023-04-13 04:26:17 +00:00
|
|
|
# return self.update_entry(index - 1, subtree)
|
2023-04-12 19:36:57 +00:00
|
|
|
# else:
|
|
|
|
# throw new Error(`Could not find a record with key: ${key}`)
|
|
|
|
|
|
|
|
# # Simple Operations
|
|
|
|
# # -------------------
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def update_entry(self, index, entry):
|
|
|
|
"""Updates an entry in place.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
index: int
|
|
|
|
entry: :class:`MST` or :class:`Leaf`
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
"""
|
2023-04-12 19:36:57 +00:00
|
|
|
# update = [
|
2023-04-13 04:26:17 +00:00
|
|
|
# ...(self.slice(0, index)),
|
2023-04-12 19:36:57 +00:00
|
|
|
# entry,
|
2023-04-13 04:26:17 +00:00
|
|
|
# ...(self.slice(index + 1)),
|
2023-04-12 19:36:57 +00:00
|
|
|
# ]
|
2023-04-13 04:26:17 +00:00
|
|
|
# return self.new_tree(update)
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def remove_entry(self, index):
|
|
|
|
"""Removes the entry at a given index.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
index: int
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
"""
|
2023-04-12 19:36:57 +00:00
|
|
|
# updated = [
|
2023-04-13 04:26:17 +00:00
|
|
|
# ...(self.slice(0, index)),
|
|
|
|
# ...(self.slice(index + 1)),
|
2023-04-12 19:36:57 +00:00
|
|
|
# ]
|
2023-04-13 04:26:17 +00:00
|
|
|
# return self.new_tree(updated)
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def append(self, entry):
|
|
|
|
"""Appends an entry to the end of the node.
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
Args:
|
|
|
|
entry: :class:`MST` or :class:`Leaf`
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
"""
|
|
|
|
# entries = self.get_entries()
|
|
|
|
# return self.new_tree([...entries, entry])
|
|
|
|
|
|
|
|
def prepend(self, entry):
|
|
|
|
"""Prepends an entry to the start of the node.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
entry: :class:`MST` or :class:`Leaf`
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
"""
|
|
|
|
# entries = self.get_entries()
|
|
|
|
# return self.new_tree([entry, ...entries])
|
|
|
|
|
|
|
|
def at_index(self, index):
|
|
|
|
"""Returns the entry at a given index.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
index: int
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
:class:`MST` or :class:`Leaf` or None
|
|
|
|
"""
|
|
|
|
# entries = self.get_entries()
|
2023-04-12 19:36:57 +00:00
|
|
|
# return entries[index] ?? None
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def slice(self, start=None, end=None):
|
|
|
|
"""Returns a slice of this node.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
start: int, optional
|
|
|
|
end: int, optional
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
sequence of :class:`MST` and :class:`Leaf`
|
|
|
|
"""
|
|
|
|
# entries = self.get_entries()
|
2023-04-12 19:36:57 +00:00
|
|
|
# return entries.slice(start, end)
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def splice_in(self, entry, index):
|
|
|
|
"""Inserts an entry at a given index.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
entry: :class:`MST` or :class:`Leaf`
|
|
|
|
index: int
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
"""
|
2023-04-12 19:36:57 +00:00
|
|
|
# update = [
|
2023-04-13 04:26:17 +00:00
|
|
|
# ...(self.slice(0, index)),
|
2023-04-12 19:36:57 +00:00
|
|
|
# entry,
|
2023-04-13 04:26:17 +00:00
|
|
|
# ...(self.slice(index)),
|
2023-04-12 19:36:57 +00:00
|
|
|
# ]
|
2023-04-13 04:26:17 +00:00
|
|
|
# return self.new_tree(update)
|
|
|
|
|
|
|
|
def replace_with_split(self, index, left=None, leaf=None, right=None):
|
|
|
|
"""Replaces an entry with [ Maybe(tree), Leaf, Maybe(tree) ].
|
|
|
|
|
|
|
|
Args:
|
|
|
|
index: int
|
|
|
|
left: :class:`MST` or :class:`Leaf`
|
|
|
|
leaf: :class:`Leaf`
|
|
|
|
right: :class:`MST` or :class:`Leaf`
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
"""
|
|
|
|
# update = self.slice(0, index)
|
2023-04-12 19:36:57 +00:00
|
|
|
# if left:
|
|
|
|
# update.push(left)
|
|
|
|
# update.push(leaf)
|
|
|
|
# if right:
|
|
|
|
# update.push(right)
|
2023-04-13 04:26:17 +00:00
|
|
|
# update.push(...(self.slice(index + 1)))
|
|
|
|
# return self.new_tree(update)
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def trim_top(self):
|
|
|
|
"""Trims the top and return its subtree, if necessary.
|
|
|
|
|
|
|
|
Only if the topmost node in the tree only points to another tree.
|
|
|
|
Otherwise, does nothing.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
"""
|
|
|
|
# entries = self.get_entries()
|
|
|
|
# if entries.length == 1 and entries[0].is_tree():
|
|
|
|
# return entries[0].trim_top()
|
2023-04-12 19:36:57 +00:00
|
|
|
# else:
|
2023-04-13 04:26:17 +00:00
|
|
|
# return self
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
# # Subtree & Splits
|
|
|
|
# # -------------------
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def split_around(self, key):
|
|
|
|
"""Recursively splits a subtree around a given key.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
key: str
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
tuple, (:class:`MST` or None, :class:`MST or None)
|
|
|
|
"""
|
|
|
|
# index = self.find_gt_or_equal_leaf_index(key)
|
2023-04-12 19:36:57 +00:00
|
|
|
# # split tree around key
|
2023-04-13 04:26:17 +00:00
|
|
|
# left_data = self.slice(0, index)
|
|
|
|
# right_data = self.slice(index)
|
|
|
|
# left = self.new_tree(left_data)
|
|
|
|
# right = self.new_tree(right_data)
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
# # if the far right of the left side is a subtree,
|
|
|
|
# # we need to split it on the key as well
|
2023-04-13 04:26:17 +00:00
|
|
|
# last_in_left = left_data[left_data.length - 1]
|
|
|
|
# if last_in_left.is_tree():
|
|
|
|
# left = left.remove_entry(left_data.length - 1)
|
|
|
|
# split = last_in_left.split_around(key)
|
2023-04-12 19:36:57 +00:00
|
|
|
# if split[0]:
|
|
|
|
# left = left.append(split[0])
|
|
|
|
# if split[1]:
|
|
|
|
# right = right.prepend(split[1])
|
|
|
|
|
|
|
|
# return [
|
2023-04-13 04:26:17 +00:00
|
|
|
# (left.get_entries()).length > 0 ? left : None,
|
|
|
|
# (right.get_entries()).length > 0 ? right : None,
|
2023-04-12 19:36:57 +00:00
|
|
|
# ]
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def append_merge(self, to_merge):
|
|
|
|
"""Merges another tree with this one.
|
2023-04-12 19:36:57 +00:00
|
|
|
# # The simple merge case where every key in the right tree is greater than every key in the left tree
|
|
|
|
# # (used primarily for deletes)
|
2023-04-13 04:26:17 +00:00
|
|
|
|
|
|
|
Args:
|
|
|
|
to_merge: :class:`MST`
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
"""
|
|
|
|
# assert self.get_layer() == to_merge.get_layer(), \
|
2023-04-12 19:36:57 +00:00
|
|
|
# 'Trying to merge two nodes from different layers of the MST'
|
2023-04-13 04:26:17 +00:00
|
|
|
# self_entries = self.get_entries()
|
|
|
|
# to_merge_entries = to_merge.get_entries()
|
|
|
|
# last_in_left = self_entries[self_entries.length - 1]
|
|
|
|
# first_in_right = to_merge_entries[0]
|
|
|
|
# if last_in_left.is_tree() and first_in_right.is_tree():
|
|
|
|
# merged = last_in_left.append_merge(first_in_right)
|
|
|
|
# return self.new_tree([
|
|
|
|
# ...self_entries.slice(0, self_entries.length - 1),
|
2023-04-12 19:36:57 +00:00
|
|
|
# merged,
|
2023-04-13 04:26:17 +00:00
|
|
|
# ...to_merge_entries.slice(1),
|
2023-04-12 19:36:57 +00:00
|
|
|
# ])
|
|
|
|
# else:
|
2023-04-13 04:26:17 +00:00
|
|
|
# return self.new_tree([...self_entries, ...to_merge_entries])
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
# # Create relatives
|
|
|
|
# # -------------------
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def create_child(self):
|
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
"""
|
|
|
|
# layer = self.get_layer()
|
2023-04-12 19:36:57 +00:00
|
|
|
# return MST.create([], {
|
|
|
|
# layer: layer - 1,
|
|
|
|
# })
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def create_parent(self):
|
|
|
|
"""
|
|
|
|
Returns:
|
|
|
|
MST
|
|
|
|
"""
|
|
|
|
# layer = self.get_layer()
|
|
|
|
# parent = MST.create([self], {
|
2023-04-12 19:36:57 +00:00
|
|
|
# layer: layer + 1,
|
|
|
|
# })
|
2023-04-13 04:26:17 +00:00
|
|
|
# parent.outdated_pointer = true
|
2023-04-12 19:36:57 +00:00
|
|
|
# return parent
|
|
|
|
|
|
|
|
# # Finding insertion points
|
|
|
|
# # -------------------
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def find_gt_or_equal_leaf_index(self, key):
|
|
|
|
"""Finds the index of the first leaf node greater than or equal to the value.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
key: str
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
int
|
|
|
|
"""
|
|
|
|
# entries = self.get_entries()
|
|
|
|
# maybe_index = entries.find_index(
|
|
|
|
# (entry) => entry.is_leaf() and entry.key >= key,
|
2023-04-12 19:36:57 +00:00
|
|
|
# )
|
|
|
|
# # if we can't find, we're on the end
|
2023-04-13 04:26:17 +00:00
|
|
|
# return maybe_index >= 0 ? maybe_index : entries.length
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
# # List operations (partial tree traversal)
|
|
|
|
# # -------------------
|
|
|
|
|
|
|
|
# # @TODO write tests for these
|
|
|
|
|
|
|
|
# # Walk tree starting at key
|
2023-04-13 04:26:17 +00:00
|
|
|
# def walk_leaves_from(key: string): AsyncIterable<Leaf>:
|
|
|
|
# index = self.find_gt_or_equal_leaf_index(key)
|
|
|
|
# entries = self.get_entries()
|
2023-04-12 19:36:57 +00:00
|
|
|
# prev = entries[index - 1]
|
2023-04-13 04:26:17 +00:00
|
|
|
# if prev and prev.is_tree():
|
|
|
|
# for e in prev.walk_leaves_from(key):
|
2023-04-12 19:36:57 +00:00
|
|
|
# yield e
|
|
|
|
# for entry in entries[index:]:
|
2023-04-13 04:26:17 +00:00
|
|
|
# if entry.is_leaf():
|
2023-04-12 19:36:57 +00:00
|
|
|
# yield entry
|
|
|
|
# else:
|
2023-04-13 04:26:17 +00:00
|
|
|
# for e in entry.walk_leaves_from(key):
|
2023-04-12 19:36:57 +00:00
|
|
|
# yield e
|
|
|
|
|
|
|
|
# def list(
|
|
|
|
# count = Number.MAX_SAFE_INTEGER,
|
|
|
|
# after?: string,
|
|
|
|
# before?: string,
|
|
|
|
# ):
|
|
|
|
# """
|
|
|
|
# Returns:
|
|
|
|
# Leaf[]
|
|
|
|
# """
|
|
|
|
# vals: Leaf[] = []
|
2023-04-13 04:26:17 +00:00
|
|
|
# for leaf in self.walk_leaves_from(after or ''):
|
2023-04-12 19:36:57 +00:00
|
|
|
# if leaf.key == after:
|
|
|
|
# continue
|
|
|
|
# if vals.length >= count:
|
|
|
|
# break
|
|
|
|
# if before and leaf.key >= before:
|
|
|
|
# break
|
|
|
|
# vals.push(leaf)
|
|
|
|
# return vals
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# def list_with_prefix(
|
2023-04-12 19:36:57 +00:00
|
|
|
# prefix: string,
|
|
|
|
# count = Number.MAX_SAFE_INTEGER,
|
|
|
|
# ):
|
|
|
|
# """
|
|
|
|
# Returns:
|
|
|
|
# Leaf[]
|
|
|
|
# """
|
|
|
|
# vals: Leaf[] = []
|
2023-04-13 04:26:17 +00:00
|
|
|
# for leaf in self.walk_leaves_from(prefix):
|
|
|
|
# if vals.length >= count or !leaf.key.starts_with(prefix):
|
2023-04-12 19:36:57 +00:00
|
|
|
# break
|
|
|
|
# vals.push(leaf)
|
|
|
|
# return vals
|
|
|
|
|
|
|
|
# # Full tree traversal
|
|
|
|
# # -------------------
|
|
|
|
|
|
|
|
# # Walk full tree & emit nodes, consumer can bail at any point by returning false
|
|
|
|
# def walk(): AsyncIterable<NodeEntry>:
|
2023-04-13 04:26:17 +00:00
|
|
|
# yield self
|
|
|
|
# entries = self.get_entries()
|
2023-04-12 19:36:57 +00:00
|
|
|
# for entry in entries:
|
2023-04-13 04:26:17 +00:00
|
|
|
# if entry.is_tree():
|
2023-04-12 19:36:57 +00:00
|
|
|
# for e in entry.walk():
|
|
|
|
# yield e
|
|
|
|
# else:
|
|
|
|
# yield entry
|
|
|
|
|
|
|
|
# # Walk full tree & emit nodes, consumer can bail at any point by returning false
|
|
|
|
# def paths():
|
|
|
|
# """
|
|
|
|
# Returns:
|
2023-04-13 04:26:17 +00:00
|
|
|
# sequence of :class:`MST` and :class:`Leaf`
|
2023-04-12 19:36:57 +00:00
|
|
|
# """
|
2023-04-13 04:26:17 +00:00
|
|
|
# entries = self.get_entries()
|
2023-04-12 19:36:57 +00:00
|
|
|
# paths: NodeEntry[][] = []
|
|
|
|
# for entry in entries:
|
2023-04-13 04:26:17 +00:00
|
|
|
# if entry.is_leaf():
|
2023-04-12 19:36:57 +00:00
|
|
|
# paths.push([entry])
|
2023-04-13 04:26:17 +00:00
|
|
|
# if entry.is_tree():
|
|
|
|
# sub_paths = entry.paths()
|
|
|
|
# paths = [...paths, ...sub_paths.map((p) => [entry, ...p])]
|
2023-04-12 19:36:57 +00:00
|
|
|
# return paths
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def all_nodes(self):
|
|
|
|
"""Walks the tree and returns all nodes.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
sequence of :class:`MST` and :class:`Leaf`
|
|
|
|
"""
|
2023-04-12 19:36:57 +00:00
|
|
|
# nodes: NodeEntry[] = []
|
2023-04-13 04:26:17 +00:00
|
|
|
# for entry in self.walk():
|
2023-04-12 19:36:57 +00:00
|
|
|
# nodes.push(entry)
|
|
|
|
# return nodes
|
|
|
|
|
|
|
|
# # Walks tree & returns all cids
|
2023-04-13 04:26:17 +00:00
|
|
|
# def all_cids():
|
2023-04-12 19:36:57 +00:00
|
|
|
# """
|
|
|
|
# Returns:
|
|
|
|
# CidSet
|
|
|
|
# """
|
|
|
|
# cids = new CidSet()
|
2023-04-13 04:26:17 +00:00
|
|
|
# entries = self.get_entries()
|
2023-04-12 19:36:57 +00:00
|
|
|
# for entry in entries:
|
2023-04-13 04:26:17 +00:00
|
|
|
# if entry.is_leaf():
|
2023-04-12 19:36:57 +00:00
|
|
|
# cids.add(entry.value)
|
|
|
|
# else:
|
2023-04-13 04:26:17 +00:00
|
|
|
# subtree_cids = entry.all_cids()
|
|
|
|
# cids.add_set(subtree_cids)
|
|
|
|
# cids.add(self.get_pointer())
|
2023-04-12 19:36:57 +00:00
|
|
|
# return cids
|
|
|
|
|
|
|
|
# # Walks tree & returns all leaves
|
|
|
|
# def leaves():
|
|
|
|
# leaves: Leaf[] = []
|
2023-04-13 04:26:17 +00:00
|
|
|
# for entry in self.walk():
|
|
|
|
# if entry.is_leaf():
|
2023-04-12 19:36:57 +00:00
|
|
|
# leaves.push(entry)
|
|
|
|
# return leaves
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def leaf_count(self):
|
|
|
|
"""Returns the total number of leaves in this MST.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
int
|
|
|
|
"""
|
|
|
|
return self.leaves().length
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
# # Reachable tree traversal
|
|
|
|
# # -------------------
|
|
|
|
|
|
|
|
# # Walk reachable branches of tree & emit nodes, consumer can bail at any point by returning false
|
2023-04-13 04:26:17 +00:00
|
|
|
# def walk_reachable(): AsyncIterable<NodeEntry>:
|
|
|
|
# yield self
|
|
|
|
# entries = self.get_entries()
|
2023-04-12 19:36:57 +00:00
|
|
|
# for entry in entries:
|
2023-04-13 04:26:17 +00:00
|
|
|
# if entry.is_tree():
|
2023-04-12 19:36:57 +00:00
|
|
|
# try:
|
2023-04-13 04:26:17 +00:00
|
|
|
# for e in entry.walk_reachable():
|
2023-04-12 19:36:57 +00:00
|
|
|
# yield e
|
|
|
|
# catch (err):
|
|
|
|
# if err instanceof MissingBlockError:
|
|
|
|
# continue
|
|
|
|
# else:
|
|
|
|
# throw err
|
|
|
|
# else:
|
|
|
|
# yield entry
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# def reachable_leaves():
|
2023-04-12 19:36:57 +00:00
|
|
|
# """
|
|
|
|
# Returns:
|
|
|
|
# Leaf[]
|
|
|
|
# """
|
|
|
|
# leaves: Leaf[] = []
|
2023-04-13 04:26:17 +00:00
|
|
|
# for entry in self.walk_reachable():
|
|
|
|
# if entry.is_leaf():
|
2023-04-12 19:36:57 +00:00
|
|
|
# leaves.push(entry)
|
|
|
|
# return leaves
|
|
|
|
|
|
|
|
# # Sync Protocol
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# def write_to_car_stream(car: BlockWriter):
|
2023-04-12 19:36:57 +00:00
|
|
|
# """
|
|
|
|
# Returns:
|
|
|
|
# void
|
|
|
|
# """
|
2023-04-13 04:26:17 +00:00
|
|
|
# entries = self.get_entries()
|
2023-04-12 19:36:57 +00:00
|
|
|
# leaves = new CidSet()
|
2023-04-13 04:26:17 +00:00
|
|
|
# to_fetch = new CidSet()
|
|
|
|
# to_fetch.add(self.get_pointer())
|
2023-04-12 19:36:57 +00:00
|
|
|
# for entry in entries:
|
2023-04-13 04:26:17 +00:00
|
|
|
# if entry.is_leaf():
|
2023-04-12 19:36:57 +00:00
|
|
|
# leaves.add(entry.value)
|
|
|
|
# else:
|
2023-04-13 04:26:17 +00:00
|
|
|
# to_fetch.add(entry.get_pointer())
|
|
|
|
# while (to_fetch.size() > 0):
|
|
|
|
# next_layer = new CidSet()
|
|
|
|
# fetched = self.storage.get_blocks(to_fetch.to_list())
|
2023-04-12 19:36:57 +00:00
|
|
|
# if fetched.missing.length > 0:
|
|
|
|
# throw new MissingBlocksError('mst node', fetched.missing)
|
2023-04-13 04:26:17 +00:00
|
|
|
# for cid in to_fetch.to_list():
|
|
|
|
# found = parse.get_and_parse_by_def(
|
2023-04-12 19:36:57 +00:00
|
|
|
# fetched.blocks,
|
|
|
|
# cid,
|
2023-04-13 04:26:17 +00:00
|
|
|
# node_data_def,
|
2023-04-12 19:36:57 +00:00
|
|
|
# )
|
|
|
|
# car.put({ cid, bytes: found.bytes })
|
2023-04-13 04:26:17 +00:00
|
|
|
# entries = deserialize_node_data(self.storage, found.obj)
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
# for entry in entries:
|
2023-04-13 04:26:17 +00:00
|
|
|
# if entry.is_leaf():
|
2023-04-12 19:36:57 +00:00
|
|
|
# leaves.add(entry.value)
|
|
|
|
# else:
|
2023-04-13 04:26:17 +00:00
|
|
|
# next_layer.add(entry.get_pointer())
|
|
|
|
# to_fetch = next_layer
|
|
|
|
# leaf_data = self.storage.get_blocks(leaves.to_list())
|
|
|
|
# if leaf_data.missing.length > 0:
|
|
|
|
# throw new MissingBlocksError('mst leaf', leaf_data.missing)
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# for leaf in leaf_data.blocks.entries():
|
2023-04-12 19:36:57 +00:00
|
|
|
# car.put(leaf)
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def cids_for_path(self, key):
|
|
|
|
"""Returns the CIDs in a given key path. ???
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
Args:
|
|
|
|
key: str
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
Returns:
|
|
|
|
sequence of :class:`CID`
|
|
|
|
"""
|
|
|
|
# cids: CID[] = [self.get_pointer()]
|
|
|
|
# index = self.find_gt_or_equal_leaf_index(key)
|
|
|
|
# found = self.at_index(index)
|
|
|
|
# if found and found.is_leaf() and found.key == key:
|
|
|
|
# return [...cids, found.value]
|
|
|
|
# prev = self.at_index(index - 1)
|
|
|
|
# if prev and prev.is_tree():
|
|
|
|
# return [...cids, ...(prev.cids_for_path(key))]
|
|
|
|
# return cids
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def leading_zeros_on_hash(self, key):
|
|
|
|
"""Returns the number of leading zeros in a key's hash.
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
Args:
|
|
|
|
key: str or bytes
|
2023-04-12 19:36:57 +00:00
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
Returns:
|
|
|
|
int
|
|
|
|
"""
|
|
|
|
hash = sha256(key).hexdigest()
|
|
|
|
leading_zeros = 0
|
|
|
|
for byte in hash:
|
|
|
|
if byte < 64:
|
|
|
|
leading_zeros += 1
|
|
|
|
if byte < 16:
|
|
|
|
leading_zeros += 1
|
|
|
|
if byte < 4:
|
|
|
|
leading_zeros += 1
|
|
|
|
if byte == 0:
|
|
|
|
leading_zeros += 1
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
|
|
|
|
return leading_zeros
|
|
|
|
|
|
|
|
|
|
|
|
def layer_for_entries(entries):
|
|
|
|
"""
|
|
|
|
sequence of :class:`MST` and :class:`Leaf`
|
|
|
|
Returns:
|
|
|
|
number | None
|
|
|
|
"""
|
|
|
|
# first_leaf = entries.find((entry) => entry.is_leaf())
|
|
|
|
# if not first_leaf or first_leaf.is_tree():
|
|
|
|
# return None
|
|
|
|
# return leading_zeros_on_hash(first_leaf.key)
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# def deserialize_node_data = (
|
2023-04-12 19:36:57 +00:00
|
|
|
# storage: ReadableBlockstore,
|
|
|
|
# data: NodeData,
|
|
|
|
# opts?: Partial<MstOpts>,
|
|
|
|
# ):
|
|
|
|
# """
|
|
|
|
# Returns:
|
2023-04-13 04:26:17 +00:00
|
|
|
# sequence of :class:`MST` and :class:`Leaf`
|
2023-04-12 19:36:57 +00:00
|
|
|
# """
|
|
|
|
# { layer } = opts or {}
|
|
|
|
# entries: NodeEntry[] = []
|
|
|
|
# if (data.l != None):
|
|
|
|
# entries.push(
|
|
|
|
# MST.load(storage, data.l,:
|
|
|
|
# layer: layer ? layer - 1 : undefined,
|
|
|
|
# )
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# last_key = ''
|
2023-04-12 19:36:57 +00:00
|
|
|
# for entry in data.e:
|
2023-04-13 04:26:17 +00:00
|
|
|
# key_str = uint8arrays.to_string(entry.k, 'ascii')
|
|
|
|
# key = last_key.slice(0, entry.p) + key_str
|
2023-04-12 19:36:57 +00:00
|
|
|
# ensure_valid_key(key)
|
|
|
|
# entries.push(new Leaf(key, entry.v))
|
2023-04-13 04:26:17 +00:00
|
|
|
# last_key = key
|
2023-04-12 19:36:57 +00:00
|
|
|
# if entry.t != None:
|
|
|
|
# entries.push(
|
|
|
|
# MST.load(storage, entry.t,:
|
|
|
|
# layer: layer ? layer - 1 : undefined,
|
|
|
|
# )
|
|
|
|
|
|
|
|
# return entries
|
|
|
|
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def serialize_node_data(entries):
|
|
|
|
"""
|
|
|
|
Args:
|
|
|
|
entries: sequence of :class:`MST` and :class:`Leaf`
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
:class:`Data`
|
|
|
|
"""
|
2023-04-12 19:36:57 +00:00
|
|
|
# data: NodeData =:
|
|
|
|
# l: None,
|
|
|
|
# e: [],
|
|
|
|
|
|
|
|
# i = 0
|
2023-04-13 04:26:17 +00:00
|
|
|
# if entries[0].is_tree():
|
2023-04-12 19:36:57 +00:00
|
|
|
# i += 1
|
|
|
|
# data.l = entries[0].pointer
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# last_key = ''
|
2023-04-12 19:36:57 +00:00
|
|
|
# while i < entries.length:
|
|
|
|
# leaf = entries[i]
|
|
|
|
# next = entries[i + 1]
|
2023-04-13 04:26:17 +00:00
|
|
|
# if not leaf.is_leaf():
|
2023-04-12 19:36:57 +00:00
|
|
|
# throw new Error('Not a valid node: two subtrees next to each other')
|
|
|
|
# i += 1
|
|
|
|
|
|
|
|
# subtree: CID | None = None
|
2023-04-13 04:26:17 +00:00
|
|
|
# if next.is_tree():
|
2023-04-12 19:36:57 +00:00
|
|
|
# subtree = next.pointer
|
|
|
|
# i += 1
|
|
|
|
|
|
|
|
# ensure_valid_key(leaf.key)
|
2023-04-13 04:26:17 +00:00
|
|
|
# prefix_len = count_prefix_len(last_key, leaf.key)
|
2023-04-12 19:36:57 +00:00
|
|
|
# data.e.push({
|
2023-04-13 04:26:17 +00:00
|
|
|
# 'p': prefix_len,
|
|
|
|
# 'k': uint8arrays.from_string(leaf.key.slice(prefix_len), 'ascii'),
|
2023-04-12 19:36:57 +00:00
|
|
|
# 'v': leaf.value,
|
|
|
|
# 't': subtree,
|
|
|
|
# })
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
# last_key = leaf.key
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
# return data
|
|
|
|
|
|
|
|
|
|
|
|
def common_prefix_len(a, b):
|
|
|
|
"""
|
|
|
|
Args:
|
|
|
|
a, b: str
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
int
|
|
|
|
"""
|
|
|
|
return len(commonprefix((a, b)))
|
|
|
|
|
|
|
|
|
2023-04-13 04:26:17 +00:00
|
|
|
def cid_for_entries(entries):
|
2023-04-12 19:36:57 +00:00
|
|
|
"""
|
|
|
|
Args:
|
2023-04-13 04:26:17 +00:00
|
|
|
entries: sequence of :class:`MST` and :class:`Leaf`
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
CID
|
|
|
|
"""
|
2023-04-13 04:26:17 +00:00
|
|
|
data = serialize_node_data(entries)
|
|
|
|
return cid_for_cbor(data)
|
|
|
|
NodeData = {
|
|
|
|
l: null,
|
|
|
|
e: [],
|
|
|
|
}
|
2023-04-12 19:36:57 +00:00
|
|
|
|
|
|
|
def ensure_valid_key(key):
|
|
|
|
"""
|
|
|
|
Args:
|
|
|
|
key: str
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
ValueError if key is not a valid MST key.
|
|
|
|
"""
|
|
|
|
valid = re.compile('[a-zA-Z0-9_\-:.]*$')
|
|
|
|
split = key.split('/')
|
|
|
|
if not (len(key) <= 256 and
|
|
|
|
len(split) == 2 and
|
|
|
|
split[0] and
|
|
|
|
split[1] and
|
|
|
|
valid.match(split[0]) and
|
|
|
|
valid.match(split[1])
|
|
|
|
):
|
|
|
|
raise ValueError(f'Invalid MST key: {key}')
|