Lindenii Project Forge
Login

hare-ds

Data structures for Hare
Commit info
ID
5b01423f06103e2b09f42bf59d0532e39c70569d
Author
Runxi Yu <me@runxiyu.org>
Author date
Wed, 17 Sep 2025 01:43:19 +0800
Committer
Runxi Yu <me@runxiyu.org>
Committer date
Wed, 17 Sep 2025 03:11:30 +0800
Actions
Fix some btree and swiss bugs

o Handle case where key is equal to promoted median after splitting full
  child
o Descend with new index after merging children
o Fix weird slice alloc bug in swiss
// SPDX-License-Identifier: MPL-2.0

use bytes;
use sort;

fn keycmp(a: []u8, b: []u8) int = {
	let n = if (len(a) < len(b)) len(a) else len(b);
	for (let i = 0z; i < n; i += 1) {
		if (a[i] < b[i]) return -1;
		if (a[i] > b[i]) return 1;
	};
	if (len(a) < len(b)) return -1;
	if (len(a) > len(b)) return 1;
	return 0;
};

fn cmp_u8slice(a: const *opaque, b: const *opaque) int = {
	let sa = *(a: *[]u8);
	let sb = *(b: *[]u8);
	return keycmp(sa, sb);
};

fn node_new(t: size, leaf: bool) (*node | nomem) = {
	let capk = 2 * t - 1;
	let capc = if (leaf) 0z else 2z * t;

	let empty_keys: [][]u8 = [];
	let keys = alloc(empty_keys, capk)?;

	let empty_vals: []*opaque = [];
	let vals = alloc(empty_vals, capk)?;

	let children: []*node = if (leaf) {
		yield [];
	} else {
		let empty_children: []*node = [];
		yield alloc(empty_children, capc)?;
	};

	let nd = alloc(node {
		leaf = leaf,
		keys = keys,
		vals = vals,
		children = children,
	})?;
	return nd;
};

fn split_child(m: *map, x: *node, i: size) (void | nomem) = {
	const t = m.t;
	let y = x.children[i];
	let z = node_new(t, y.leaf)?;

	let medk = y.keys[t - 1];
	let medv = y.vals[t - 1];

	append(z.keys, y.keys[t..]...)?;
	append(z.vals, y.vals[t..]...)?;
	if (!y.leaf) {
		append(z.children, y.children[t..]...)?;
	};

	y.keys = y.keys[..t - 1];
	y.vals = y.vals[..t - 1];
	if (!y.leaf) {
		y.children = y.children[..t];
	};

	insert(x.keys[i], medk)?;
	insert(x.vals[i], medv)?;
	insert(x.children[i + 1], z)?;
};

fn insert_nonfull(m: *map, x: *node, key: []u8, val: *opaque) (void | nomem) = {
	let i = sort::lbisect((x.keys: []const opaque), size([]u8),
		(&key: const *opaque), &cmp_u8slice);

	if (i < len(x.keys) && bytes::equal(x.keys[i], key)) {
		x.vals[i] = val;
		return;
	};

	if (x.leaf) {
		insert(x.keys[i], key)?;
		insert(x.vals[i], val)?;
		return;
	};

	if (len(x.children[i].keys) == 2 * m.t - 1) {
		split_child(m, x, i)?;
		if (cmp_u8slice((&key: const *opaque),
				(&x.keys[i]: const *opaque)) > 0) {
			insert_nonfull(m, x.children[i + 1], key, val)?;
		let cmp = cmp_u8slice((&key: const *opaque),
			(&x.keys[i]: const *opaque));
		if (cmp == 0) {
			x.vals[i] = val;
			return;
		};
		if (cmp > 0) {
			i += 1;
		};
	};
	insert_nonfull(m, x.children[i], key, val)?;
};

fn merge_children(m: *map, x: *node, i: size) void = {
	let left = x.children[i];
	let right = x.children[i + 1];

	insert(left.keys[len(left.keys)], x.keys[i])!;
	insert(left.vals[len(left.vals)], x.vals[i])!;

	append(left.keys, right.keys...)!;
	append(left.vals, right.vals...)!;
	if (!left.leaf) {
		append(left.children, right.children...)!;
	};

	delete(x.keys[i]);
	delete(x.vals[i]);
	delete(x.children[i + 1]);
};

fn ensure_child_has_space(m: *map, x: *node, i: size) void = {
	const t = m.t;
	let c = x.children[i];

	if (len(c.keys) >= t) return;

	if (i > 0 && len(x.children[i - 1].keys) >= t) {
		let ls = x.children[i - 1];

		insert(c.keys[0], x.keys[i - 1])!;
		insert(c.vals[0], x.vals[i - 1])!;

		if (!c.leaf) {
			let moved = ls.children[len(ls.children) - 1];
			insert(c.children[0], moved)!;
			delete(ls.children[len(ls.children) - 1]);
		};

		x.keys[i - 1] = ls.keys[len(ls.keys) - 1];
		x.vals[i - 1] = ls.vals[len(ls.vals) - 1];
		delete(ls.keys[len(ls.keys) - 1]);
		delete(ls.vals[len(ls.vals) - 1]);
		return;
	};

	if (i + 1 < len(x.children) && len(x.children[i + 1].keys) >= t) {
		let rs = x.children[i + 1];

		insert(c.keys[len(c.keys)], x.keys[i])!;
		insert(c.vals[len(c.vals)], x.vals[i])!;

		if (!c.leaf) {
			let moved = rs.children[0];
			insert(c.children[len(c.children)], moved)!;
			delete(rs.children[0]);
		};

		x.keys[i] = rs.keys[0];
		x.vals[i] = rs.vals[0];
		delete(rs.keys[0]);
		delete(rs.vals[0]);
		return;
	};

	if (i + 1 < len(x.children)) {
		merge_children(m, x, i);
	} else {
		merge_children(m, x, i - 1);
	};
};

fn pop_max(m: *map, x: *node) ([]u8, *opaque) = {
	let cur = x;
	for (!cur.leaf) {
		let last_before = len(cur.children) - 1;
		ensure_child_has_space(m, cur, last_before);
		let last = len(cur.children) - 1;
		ensure_child_has_space(m, cur, last);
		cur = cur.children[last];
	};
	let k = cur.keys[len(cur.keys) - 1];
	let v = cur.vals[len(cur.vals) - 1];
	delete(cur.keys[len(cur.keys) - 1]);
	delete(cur.vals[len(cur.vals) - 1]);
	return (k, v);
};

fn pop_min(m: *map, x: *node) ([]u8, *opaque) = {
	let cur = x;
	for (!cur.leaf) {
		ensure_child_has_space(m, cur, 0);
		cur = cur.children[0];
	};
	let k = cur.keys[0];
	let v = cur.vals[0];
	delete(cur.keys[0]);
	delete(cur.vals[0]);
	return (k, v);
};

fn delete_rec(m: *map, x: *node, key: []u8) (*opaque | void) = {
	let i = sort::lbisect((x.keys: []const opaque), size([]u8),
		(&key: const *opaque), &cmp_u8slice);

	if (i < len(x.keys) && bytes::equal(x.keys[i], key)) {
		if (x.leaf) {
			let ret = x.vals[i];
			delete(x.keys[i]);
			delete(x.vals[i]);
			return ret;
		};

		const t = m.t;
		let y = x.children[i];
		let z = x.children[i + 1];

		if (len(y.keys) >= t) {
			let (pk, pv) = pop_max(m, y);
			let ret = x.vals[i];
			x.keys[i] = pk;
			x.vals[i] = pv;
			return ret;
		} else if (len(z.keys) >= t) {
			let (sk, sv) = pop_min(m, z);
			let ret = x.vals[i];
			x.keys[i] = sk;
			x.vals[i] = sv;
			return ret;
		} else {
			merge_children(m, x, i);
			return delete_rec(m, y, key);
		};
	};

	if (x.leaf) {
		return;
	};

	ensure_child_has_space(m, x, i);
	if (i >= len(x.children)) {
		i = len(x.children) - 1;
	};
	return delete_rec(m, x.children[i], key);
};
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors
// SPDX-FileCopyrightText: 2025 Runxi Yu

export def GROUP_SIZE: size = 8z;
export def CTRL_EMPTY: u8 = 0x80;
export def CTRL_DELETED: u8 = 0xFE;

export type group = struct {
	ctrl: [GROUP_SIZE]u8,
	keys: [GROUP_SIZE][]u8,
	vals: [GROUP_SIZE]nullable *opaque,
};

fn group_set_empty(g: *group) void = {
	for (let i = 0z; i < GROUP_SIZE; i += 1) {
		g.ctrl[i] = CTRL_EMPTY;
		g.keys[i] = [];
		g.vals[i] = null;
	};
};

fn is_full_ctrl(c: u8) bool = (c & 0x80) == 0 && c != CTRL_DELETED;

fn h1(h: u64) u64 = h >> 7u64;
fn h2(h: u64) u8 = (h & 0x7Fu64): u8;

fn probe_next(off: size, idx: size, mask: size) (size, size) = {
	let nidx = idx + 1;
	let noff = (off + nidx) & mask;
	return (noff, nidx);
};

fn capacity_slots(m: *map) size = (m.group_mask + 1) * GROUP_SIZE;

fn max_used_with_tombs(m: *map) size = {
	return (capacity_slots(m) * 7z) / 8z;
};

fn ensure_capacity_for_insert(m: *map) (void | nomem) = {
	if (m.used + m.tombs < max_used_with_tombs(m)) {
		return;
	};
	return resize(m, (m.group_mask + 1) * 2);
};

fn rehash_in_place(m: *map) void = {
	if (len(m.groups) == 0) return;
	let new_groups: []group = alloc([group{...}...], (m.group_mask + 1))!;
	for (let i = 0z; i < len(new_groups); i += 1) {
		group_set_empty(&new_groups[i]);
	};
	let old = m.groups;
	m.groups = new_groups;
	let old_groups = old;
	let old_mask = m.group_mask;
	m.used = 0;
	m.tombs = 0;

	for (let gi = 0z; gi <= old_mask; gi += 1) {
		let g = &old_groups[gi];
		for (let si = 0z; si < GROUP_SIZE; si += 1) {
			let c = g.ctrl[si];
			if (!is_full_ctrl(c)) continue;
			let k = g.keys[si];
			let v = g.vals[si];
			unchecked_put(m, k, v);
		};
	};
	free(old_groups);
};

fn resize(m: *map, new_groups_len: size) (void | nomem) = {
	if (new_groups_len == 0) new_groups_len = 1;
	let gs: []group = match (alloc([group{...}...]: []group, new_groups_len)) {
	let gs = match (alloc([group{...}...], new_groups_len)) {
	case let a: []group => yield a;
	case nomem => return nomem;
	};
	for (let i = 0z; i < len(gs); i += 1) {
		group_set_empty(&gs[i]);
	};
	let old = m.groups;
	let old_mask = m.group_mask;
	m.groups = gs;
	m.group_mask = new_groups_len - 1;
	m.used = 0;
	m.tombs = 0;

	for (let gi = 0z; gi <= old_mask; gi += 1) {
		let g = &old[gi];
		for (let si = 0z; si < GROUP_SIZE; si += 1) {
			let c = g.ctrl[si];
			if (!is_full_ctrl(c)) continue;
			unchecked_put(m, g.keys[si], g.vals[si]);
		};
	};
	if (len(old) != 0) {
		free(old);
	};
};

fn unchecked_put(m: *map, key: []u8, val: nullable *opaque) void = {
	let hv = m.hash64(m.hash_params, key): u64;
	let t = h2(hv);
	let mask = m.group_mask;
	let off: size = (h1(hv): size) & mask;
	let idx: size = 0;

	for (true) {
		let g = &m.groups[off];
		let first_dead: (size | void) = void;
		for (let i = 0z; i < GROUP_SIZE; i += 1) {
			let c = g.ctrl[i];
			if (is_full_ctrl(c)) {
				continue;
			} else if (c == CTRL_DELETED) {
				if (first_dead is void) first_dead = i;
			} else {
				let slot = match (first_dead) {
				case void => yield i;
				case let di: size => yield di;
				};
				g.keys[slot] = key;
				g.vals[slot] = val;
				g.ctrl[slot] = t;
				m.used += 1;
				if (slot == i) {
					void;
				} else {
					m.tombs -= 1;
				};
				return;
			};
		};
		let next = probe_next(off, idx, mask);
		off = next.0;
		idx = next.1;
	};
};
// SPDX-License-Identifier: Apache-2.0 AND MPL-2.0
// SPDX-FileCopyrightText: 2024 The Cockroach Authors
// SPDX-FileCopyrightText: 2025 Runxi Yu

use errors;

// Creates a new [[map]] with an initial number of groups and hash function.
//
// n_groups must be greater than zero.
export fn new(
	n_groups: size,
	hash64: *fn(hash_params: nullable *opaque, key: []u8) size,
	hash_params: nullable *opaque,
) (*map | errors::invalid | nomem) = {
	if (n_groups == 0) {
		return errors::invalid;
	};

	let v: size = 1;
	for (v < n_groups) {
		v *= 2;
	};
	let groups_count = v;

	let gs: []group = match (alloc([group{...}...]: []group, groups_count)) {
	let gs = match (alloc([group{...}...], groups_count)) {
	case let a: []group => yield a;
	case nomem => return nomem;
	};
	for (let i = 0z; i < len(gs); i += 1) {
		group_set_empty(&gs[i]);
	};

	let m = alloc(map {
		vt = &_vt,
		group_mask = groups_count - 1,
		used = 0,
		tombs = 0,
		hash64 = hash64,
		hash_params = hash_params,
		groups = gs,
	})?;
	return m;
};