I am working on a project and have fleshed out this class but I am not passing my test cases. Here is the implementation of my B-tree class:
class BTree:
def __init__(self, M: int, L: int):
self.M = M
self.L = L
self.root_addr: Address = DISK.new()
DISK.write(self.root_addr, BTreeNode(self.root_addr, None, None, True))
def _find_leaf(self, key: KT) -> Address:
current_addr = self.root_addr
while True:
node = DISK.read(current_addr)
if node.is_leaf:
return current_addr
idx = node.find_idx(key)
if idx < len(node.keys) and node.keys[idx] == key:
idx += 1
current_addr = node.children_addrs[idx]
def insert(self, key: KT, value: VT) -> None:
leaf_addr = self._find_leaf(key)
leaf = DISK.read(leaf_addr)
leaf.insert_data(key, value)
if len(leaf.keys) > self.M - 1:
self._split_node(leaf_addr)
DISK.write(leaf_addr, leaf)
def _split_node(self, node_addr: Address) -> None:
node = DISK.read(node_addr)
middle_index = len(node.keys) // 2
middle_key = node.keys[middle_index]
left_node = BTreeNode(DISK.new(), node.parent_addr, None, node.is_leaf)
right_node = BTreeNode(DISK.new(), node.parent_addr, None, node.is_leaf)
left_node.keys = node.keys[:middle_index]
right_node.keys = node.keys[middle_index + 1:]
if node.is_leaf:
left_node.data = node.data[:middle_index]
right_node.data = node.data[middle_index + 1:]
else:
left_node.children_addrs = node.children_addrs[:middle_index + 1]
right_node.children_addrs = node.children_addrs[middle_index + 1:]
DISK.write(left_node.my_addr, left_node)
DISK.write(right_node.my_addr, right_node)
if node.parent_addr is not None:
parent = DISK.read(node.parent_addr)
insert_index = parent.find_idx(middle_key)
parent.keys.insert(insert_index, middle_key)
parent.children_addrs[insert_index] = left_node.my_addr
parent.children_addrs.insert(insert_index + 1, right_node.my_addr)
DISK.write(node.parent_addr, parent)
else:
new_root = BTreeNode(DISK.new(), None, None, False)
new_root.keys = [middle_key]
new_root.children_addrs = [left_node.my_addr, right_node.my_addr]
self.root_addr = new_root.my_addr
DISK.write(new_root.my_addr, new_root)
def find(self, key: KT) -> Optional[VT]:
current_addr = self.root_addr
while True:
node = DISK.read(current_addr)
idx = node.find_idx(key)
if node.is_leaf:
if idx < len(node.keys) and node.keys[idx] == key:
return node.data[idx]
return None
else:
if idx < len(node.keys) and node.keys[idx] == key:
idx += 1
current_addr = node.children_addrs[idx]
Here is the tests that are failing:
def btree_properties_recurse(root_node_addr, node, M, L):
assert sorted(node.keys) == node.keys # Keys should remain sorted so that a binary search is possible
if node.is_leaf:
# Leaf node general properties
assert len(node.children_addrs) == 0
assert len(node.keys) == len(node.data)
assert len(node.data) <= L
else:
# Non-leaf node general properties
assert len(node.data) == 0
assert len(node.keys) == len(node.children_addrs) - 1
assert len(node.children_addrs) <= M
if node.my_addr == root_node_addr:
# Root node properties
assert node.parent_addr is None
assert node.index_in_parent is None
if not node.is_leaf:
assert len(node.children_addrs) >= 2
else:
# Non-root node properties
assert node.parent_addr is not None
assert node.index_in_parent is not None
if node.is_leaf:
assert len(node.data) >= (L+1)//2
else:
assert len(node.children_addrs) >= (M+1)//2
# Run the assertions on all children
for child_addr in node.children_addrs:
btree_properties_recurse(root_node_addr, DISK.read(child_addr), M, L)
def test_btree_properties_even() -> None:
M = 6
L = 6
btree = BTree(M, L)
for i in range(100):
btree.insert(i, str(i))
for i in range(0, -100, -1):
btree.insert(i, str(i))
root_addr = btree.root_addr
btree_properties_recurse(root_addr, DISK.read(root_addr), M, L)
The tests are fairly generic and my implementation is not working as intended. I have tried to understand with assert statements that I have since removed. Any guidance is welcome, my tests do run, but the assertions are always wrong. I believe my _split_node function is incorrect.