Lindenii Project Forge
Fix some btree and swiss bugs o Handle case where key is equal to promoted median after splitting full child o Descend with new index after merging children o Fix weird slice alloc bug in swiss
// SPDX-License-Identifier: MPL-2.0 use bytes; use sort; fn keycmp(a: []u8, b: []u8) int = { let n = if (len(a) < len(b)) len(a) else len(b); for (let i = 0z; i < n; i += 1) { if (a[i] < b[i]) return -1; if (a[i] > b[i]) return 1; }; if (len(a) < len(b)) return -1; if (len(a) > len(b)) return 1; return 0; }; fn cmp_u8slice(a: const *opaque, b: const *opaque) int = { let sa = *(a: *[]u8); let sb = *(b: *[]u8); return keycmp(sa, sb); }; fn node_new(t: size, leaf: bool) (*node | nomem) = { let capk = 2 * t - 1; let capc = if (leaf) 0z else 2z * t; let empty_keys: [][]u8 = []; let keys = alloc(empty_keys, capk)?; let empty_vals: []*opaque = []; let vals = alloc(empty_vals, capk)?; let children: []*node = if (leaf) { yield []; } else { let empty_children: []*node = []; yield alloc(empty_children, capc)?; }; let nd = alloc(node { leaf = leaf, keys = keys, vals = vals, children = children, })?; return nd; }; fn split_child(m: *map, x: *node, i: size) (void | nomem) = { const t = m.t; let y = x.children[i]; let z = node_new(t, y.leaf)?; let medk = y.keys[t - 1]; let medv = y.vals[t - 1]; append(z.keys, y.keys[t..]...)?; append(z.vals, y.vals[t..]...)?; if (!y.leaf) { append(z.children, y.children[t..]...)?; }; y.keys = y.keys[..t - 1]; y.vals = y.vals[..t - 1]; if (!y.leaf) { y.children = y.children[..t]; }; insert(x.keys[i], medk)?; insert(x.vals[i], medv)?; insert(x.children[i + 1], z)?; }; fn insert_nonfull(m: *map, x: *node, key: []u8, val: *opaque) (void | nomem) = { let i = sort::lbisect((x.keys: []const opaque), size([]u8), (&key: const *opaque), &cmp_u8slice); if (i < len(x.keys) && bytes::equal(x.keys[i], key)) { x.vals[i] = val; return; }; if (x.leaf) { insert(x.keys[i], key)?; insert(x.vals[i], val)?; return; }; if (len(x.children[i].keys) == 2 * m.t - 1) { split_child(m, x, i)?;
if (cmp_u8slice((&key: const *opaque), (&x.keys[i]: const *opaque)) > 0) { insert_nonfull(m, x.children[i + 1], key, val)?;
let cmp = cmp_u8slice((&key: const *opaque), (&x.keys[i]: const *opaque)); if (cmp == 0) { x.vals[i] = val;
return;
}; if (cmp > 0) { i += 1;
}; }; insert_nonfull(m, x.children[i], key, val)?; }; fn merge_children(m: *map, x: *node, i: size) void = { let left = x.children[i]; let right = x.children[i + 1]; insert(left.keys[len(left.keys)], x.keys[i])!; insert(left.vals[len(left.vals)], x.vals[i])!; append(left.keys, right.keys...)!; append(left.vals, right.vals...)!; if (!left.leaf) { append(left.children, right.children...)!; }; delete(x.keys[i]); delete(x.vals[i]); delete(x.children[i + 1]); }; fn ensure_child_has_space(m: *map, x: *node, i: size) void = { const t = m.t; let c = x.children[i]; if (len(c.keys) >= t) return; if (i > 0 && len(x.children[i - 1].keys) >= t) { let ls = x.children[i - 1]; insert(c.keys[0], x.keys[i - 1])!; insert(c.vals[0], x.vals[i - 1])!; if (!c.leaf) { let moved = ls.children[len(ls.children) - 1]; insert(c.children[0], moved)!; delete(ls.children[len(ls.children) - 1]); }; x.keys[i - 1] = ls.keys[len(ls.keys) - 1]; x.vals[i - 1] = ls.vals[len(ls.vals) - 1]; delete(ls.keys[len(ls.keys) - 1]); delete(ls.vals[len(ls.vals) - 1]); return; }; if (i + 1 < len(x.children) && len(x.children[i + 1].keys) >= t) { let rs = x.children[i + 1]; insert(c.keys[len(c.keys)], x.keys[i])!; insert(c.vals[len(c.vals)], x.vals[i])!; if (!c.leaf) { let moved = rs.children[0]; insert(c.children[len(c.children)], moved)!; delete(rs.children[0]); }; x.keys[i] = rs.keys[0]; x.vals[i] = rs.vals[0]; delete(rs.keys[0]); delete(rs.vals[0]); return; }; if (i + 1 < len(x.children)) { merge_children(m, x, i); } else { merge_children(m, x, i - 1); }; }; fn pop_max(m: *map, x: *node) ([]u8, *opaque) = { let cur = x; for (!cur.leaf) {
let last_before = len(cur.children) - 1; ensure_child_has_space(m, cur, last_before);
let last = len(cur.children) - 1;
ensure_child_has_space(m, cur, last);
cur = cur.children[last]; }; let k = cur.keys[len(cur.keys) - 1]; let v = cur.vals[len(cur.vals) - 1]; delete(cur.keys[len(cur.keys) - 1]); delete(cur.vals[len(cur.vals) - 1]); return (k, v); }; fn pop_min(m: *map, x: *node) ([]u8, *opaque) = { let cur = x; for (!cur.leaf) { ensure_child_has_space(m, cur, 0); cur = cur.children[0]; }; let k = cur.keys[0]; let v = cur.vals[0]; delete(cur.keys[0]); delete(cur.vals[0]); return (k, v); }; fn delete_rec(m: *map, x: *node, key: []u8) (*opaque | void) = { let i = sort::lbisect((x.keys: []const opaque), size([]u8), (&key: const *opaque), &cmp_u8slice); if (i < len(x.keys) && bytes::equal(x.keys[i], key)) { if (x.leaf) { let ret = x.vals[i]; delete(x.keys[i]); delete(x.vals[i]); return ret; }; const t = m.t; let y = x.children[i]; let z = x.children[i + 1]; if (len(y.keys) >= t) { let (pk, pv) = pop_max(m, y); let ret = x.vals[i]; x.keys[i] = pk; x.vals[i] = pv; return ret; } else if (len(z.keys) >= t) { let (sk, sv) = pop_min(m, z); let ret = x.vals[i]; x.keys[i] = sk; x.vals[i] = sv; return ret; } else { merge_children(m, x, i); return delete_rec(m, y, key); }; }; if (x.leaf) { return; }; ensure_child_has_space(m, x, i);
if (i >= len(x.children)) { i = len(x.children) - 1; };
return delete_rec(m, x.children[i], key); };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu export def GROUP_SIZE: size = 8z; export def CTRL_EMPTY: u8 = 0x80; export def CTRL_DELETED: u8 = 0xFE; export type group = struct { ctrl: [GROUP_SIZE]u8, keys: [GROUP_SIZE][]u8, vals: [GROUP_SIZE]nullable *opaque, }; fn group_set_empty(g: *group) void = { for (let i = 0z; i < GROUP_SIZE; i += 1) { g.ctrl[i] = CTRL_EMPTY; g.keys[i] = []; g.vals[i] = null; }; }; fn is_full_ctrl(c: u8) bool = (c & 0x80) == 0 && c != CTRL_DELETED; fn h1(h: u64) u64 = h >> 7u64; fn h2(h: u64) u8 = (h & 0x7Fu64): u8; fn probe_next(off: size, idx: size, mask: size) (size, size) = { let nidx = idx + 1; let noff = (off + nidx) & mask; return (noff, nidx); }; fn capacity_slots(m: *map) size = (m.group_mask + 1) * GROUP_SIZE; fn max_used_with_tombs(m: *map) size = { return (capacity_slots(m) * 7z) / 8z; }; fn ensure_capacity_for_insert(m: *map) (void | nomem) = { if (m.used + m.tombs < max_used_with_tombs(m)) { return; }; return resize(m, (m.group_mask + 1) * 2); }; fn rehash_in_place(m: *map) void = { if (len(m.groups) == 0) return; let new_groups: []group = alloc([group{...}...], (m.group_mask + 1))!; for (let i = 0z; i < len(new_groups); i += 1) { group_set_empty(&new_groups[i]); }; let old = m.groups; m.groups = new_groups; let old_groups = old; let old_mask = m.group_mask; m.used = 0; m.tombs = 0; for (let gi = 0z; gi <= old_mask; gi += 1) { let g = &old_groups[gi]; for (let si = 0z; si < GROUP_SIZE; si += 1) { let c = g.ctrl[si]; if (!is_full_ctrl(c)) continue; let k = g.keys[si]; let v = g.vals[si]; unchecked_put(m, k, v); }; }; free(old_groups); }; fn resize(m: *map, new_groups_len: size) (void | nomem) = { if (new_groups_len == 0) new_groups_len = 1;
let gs: []group = match (alloc([group{...}...]: []group, new_groups_len)) {
let gs = match (alloc([group{...}...], new_groups_len)) {
case let a: []group => yield a; case nomem => return nomem; }; for (let i = 0z; i < len(gs); i += 1) { group_set_empty(&gs[i]); }; let old = m.groups; let old_mask = m.group_mask; m.groups = gs; m.group_mask = new_groups_len - 1; m.used = 0; m.tombs = 0; for (let gi = 0z; gi <= old_mask; gi += 1) { let g = &old[gi]; for (let si = 0z; si < GROUP_SIZE; si += 1) { let c = g.ctrl[si]; if (!is_full_ctrl(c)) continue; unchecked_put(m, g.keys[si], g.vals[si]); }; }; if (len(old) != 0) { free(old); }; }; fn unchecked_put(m: *map, key: []u8, val: nullable *opaque) void = { let hv = m.hash64(m.hash_params, key): u64; let t = h2(hv); let mask = m.group_mask; let off: size = (h1(hv): size) & mask; let idx: size = 0; for (true) { let g = &m.groups[off]; let first_dead: (size | void) = void; for (let i = 0z; i < GROUP_SIZE; i += 1) { let c = g.ctrl[i]; if (is_full_ctrl(c)) { continue; } else if (c == CTRL_DELETED) { if (first_dead is void) first_dead = i; } else { let slot = match (first_dead) { case void => yield i; case let di: size => yield di; }; g.keys[slot] = key; g.vals[slot] = val; g.ctrl[slot] = t; m.used += 1; if (slot == i) { void; } else { m.tombs -= 1; }; return; }; }; let next = probe_next(off, idx, mask); off = next.0; idx = next.1; }; };
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0 // SPDX-FileCopyrightText: 2024 The Cockroach Authors // SPDX-FileCopyrightText: 2025 Runxi Yu use errors; // Creates a new [[map]] with an initial number of groups and hash function. // // n_groups must be greater than zero. export fn new( n_groups: size, hash64: *fn(hash_params: nullable *opaque, key: []u8) size, hash_params: nullable *opaque, ) (*map | errors::invalid | nomem) = { if (n_groups == 0) { return errors::invalid; }; let v: size = 1; for (v < n_groups) { v *= 2; }; let groups_count = v;
let gs: []group = match (alloc([group{...}...]: []group, groups_count)) {
let gs = match (alloc([group{...}...], groups_count)) {
case let a: []group => yield a; case nomem => return nomem; }; for (let i = 0z; i < len(gs); i += 1) { group_set_empty(&gs[i]); }; let m = alloc(map { vt = &_vt, group_mask = groups_count - 1, used = 0, tombs = 0, hash64 = hash64, hash_params = hash_params, groups = gs, })?; return m; };