Lindenii Project Forge
Login

hare-git

WIP Git library for Hare
Commit info
ID
73f66064338fc974a71e78dccc1266e714f9cb7d
Author
Runxi Yu <me@runxiyu.org>
Author date
Sun, 14 Sep 2025 05:45:40 +0800
Committer
Runxi Yu <me@runxiyu.org>
Committer date
Sun, 14 Sep 2025 05:54:41 +0800
Actions
Packfile parsing (probably very buggy)
use bytes;
use compress::zlib;
use errors;
use fmt;
use fs;
use io;
use strconv;
use strings;

fn loose_relpath(id: oid) (str | nomem) = {
	const hex = oid_string(id)?;
	defer free(hex);

	const dir = strings::bytesub(hex, 0z, 2z)!;
	const file = strings::bytesub(hex, 2z, strings::end)!;

	return fmt::asprintf("objects/{}/{}", dir, file);
};

export fn read_loose(
	r: repo,
	id: oid,
) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) = {
	const rel = loose_relpath(id)?;
	defer free(rel);

	const fh = fs::open(r.root, rel)?;
	defer io::close(fh)!;

	let zr = zlib::decompress(fh)?;
	defer io::close(&zr.vtable)!;

	let buf = io::drain(&zr.vtable)?;
	defer free(buf);

	let mnul = bytes::index(buf, 0u8);
	if (mnul is void) {
		return errors::invalid;
	};
	let nul = mnul: size;

	const header = buf[..nul];
	const body = buf[nul + 1z ..];

	let msp = bytes::index(header, ' ');
	if (msp is void) {
		return errors::invalid;
	};
	let sp = msp: size;

	const ty = strings::fromutf8_unsafe(header[..sp]);
	const szs = strings::fromutf8_unsafe(header[sp + 1z ..]);
	const expect = strconv::stoz(szs)?;

	if (expect != len(body)) {
		return errors::invalid;
	};

	if (!verify_oid(buf, id)) {
		return errors::invalid;
	};

	if (ty == "blob") {
		const b = parse_blob(id, body)?;
		return (b: object);
	} else if (ty == "tree") {
		const t = parse_tree(id, body)?;
		return (t: object);
	} else if (ty == "commit") {
		const c = parse_commit(id, body)?;
		return (c: object);
	} else {
		return errors::invalid;
	};
};

export fn read_loose_typed(
	r: repo,
	id: oid,
) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = {
	const rel = loose_relpath(id)?;
	defer free(rel);

	let fh = fs::open(r.root, rel)?;
	defer io::close(fh)!;

	let zr = zlib::decompress(fh)?;
	defer io::close(&zr.vtable)!;

	let buf = io::drain(&zr.vtable)?;
	defer free(buf);

	let mnul = bytes::index(buf, 0u8);
	if (mnul is void) {
		return errors::invalid;
	};
	let nul = mnul: size;

	const header = buf[..nul];
	const body = buf[nul + 1z ..];

	let msp = bytes::index(header, ' ');
	if (msp is void) {
		return errors::invalid;
	};
	let sp = msp: size;

	const ty = strings::fromutf8_unsafe(header[..sp]);
	const szs = strings::fromutf8_unsafe(header[sp + 1z ..]);
	const expect = strconv::stoz(szs)?;
	if (expect != len(body)) {
		return errors::invalid;
	};

	let code: u8 = 0u8;
	if (ty == "blob") {
		code = OBJ_BLOB;
	} else if (ty == "tree") {
		code = OBJ_TREE;
	} else if (ty == "commit") {
		code = OBJ_COMMIT;
	} else {
		return errors::invalid;
	};

	let out = alloc(body...)?;
	return (code, out);
};
use bytes;
use crypto::sha256;
use errors;
use fmt;
use fs;
use hash;
use io;
use strconv;
use strings;

export type object = (blob | tree | commit);

export def OBJ_COMMIT: u8    = 1u8;
export def OBJ_TREE: u8      = 2u8;
export def OBJ_BLOB: u8      = 3u8;
export def OBJ_TAG: u8       = 4u8;
export def OBJ_OFS_DELTA: u8 = 6u8;
export def OBJ_REF_DELTA: u8 = 7u8;

export fn object_finish(o: object) void = {
	match (o) {
	case let b: blob =>
		blob_finish(b);
	case let t: tree =>
		tree_finish(t);
	case let c: commit =>
		commit_finish(c);
	};
};

export fn verify_oid(buf: []u8, want: oid) bool = {
	let st = sha256::sha256();
	hash::write(&st, buf);
	let got: oid = [0...];
	hash::sum(&st, got);
	hash::close(&st);

	if (bytes::equal(got[..], want[..])) {
		return true;
	} else {
		return false;
	};
};

export fn verify_typed(ty: str, body: []u8, want: oid) bool = {
	let st = sha256::sha256();
	defer hash::close(&st);

	if (ty == "blob") {
		hash::write(&st, strings::toutf8("blob"));
	} else if (ty == "tree") {
		hash::write(&st, strings::toutf8("tree"));
	} else if (ty == "commit") {
		hash::write(&st, strings::toutf8("commit"));
	} else {
		return false;
	};

	hash::write(&st, strings::toutf8(" "));
	let szs = strconv::ztos(len(body));
	hash::write(&st, strings::toutf8(szs));
	hash::write(&st, strings::toutf8("\x00"));

	hash::write(&st, body);

	let got: oid = [0...];
	hash::sum(&st, got);

	if (bytes::equal(got[..], want[..])) {
		return true;
	} else {
		return false;
	};
};

export fn read_object(
	r: repo,
	id: oid,
) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) = {
	match (read_loose(r, id)) {
	case let o: object =>
		return o;
	case let fe: fs::error =>
		if (fe is errors::noentry) {
			void; // fallthrough to the packfile thing
			void;
		} else {
			return fe;
		};
	case let e: (io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) =>
		return e;
	};

	match (read_packed(r, id)) {
	case let o: object =>
		return o;
	case let fe: fs::error =>
		if (fe is errors::noentry) {
			return errors::invalid;
		} else {
			return fe;
		};
	case let e: (io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) =>
		return e;
	};
};
use compress::zlib;
use crypto::sha256;
use endian;
use errors;
use fmt;
use fs;
use io;
use strconv;
use strings;

def IDX_MAGIC: u32 = 0xff744f63u32;
def IDX_V2: u32    = 2u32;

def PACK_MAGIC: u32 = 0x5041434bu32;
def PACK_V2: u32    = 2u32;

type pack_loc = struct {
	pack_rel: str,
	ofs: u64,
};

fn cmp_oid(a: []u8, b: oid) i32 = {
	for (let i = 0z; i < sha256::SZ; i += 1z) {
		let av = a[i];
		let bv = b[i];
		if (av < bv) {
			return -1;
		};
		if (av > bv) {
			return 1;
		};
	};
	return 0;
};

fn count_large_before(off32: []u8, idx: size) size = {
	let n = 0z;
	for (let i = 0z; i < idx; i += 1z) {
		let o32 = endian::begetu32(off32[i*4z .. i*4z + 4z]);
		if ((o32 & 0x8000_0000u32) != 0u32) {
			n += 1z;
		};
	};
	return n;
};

export fn read_packed(
	r: repo,
	id: oid,
) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | errors::noentry | nomem) = {
	let loc = find_in_indexes(r, id)?;
	return read_from_pack_at(r, loc, id);
};

fn find_in_indexes(
	r: repo,
	id: oid,
) (pack_loc | errors::noentry | fs::error | io::error | errors::invalid | nomem) = {
	const dir = "objects/pack";
	let it = fs::iter(r.root, dir)?;
	defer fs::finish(it);

	for (true) {
		match (fs::next(it)) {
		case let de: fs::dirent =>
			if (!strings::hassuffix(de.name, ".idx")) {
				continue;
			};

			{
				let rel = fmt::asprintf("{}/{}", dir, de.name)?;
				match (idx_lookup(r, rel, id)) {
				case let pl: pack_loc =>
					free(rel);
					return pl;
				case errors::noentry =>
					free(rel);
					continue;
				case let fe: fs::error =>
					free(rel);
					return fe;
				case let ioe: io::error =>
					free(rel);
					return ioe;
				case let inv: errors::invalid =>
					free(rel);
					return inv;
				case nomem =>
					free(rel);
					return nomem;
				};
			};

		case done =>
			break;
		case let fe: fs::error =>
			return fe;
		};
	};

	return errors::noentry;
};

fn idx_lookup(
	r: repo,
	idx_rel: const str,
	id: oid,
) (pack_loc | errors::noentry | fs::error | io::error | errors::invalid | nomem) = {
	let h = fs::open(r.root, idx_rel)?;
	defer io::close(h)!;

	let buf = io::drain(h)?;
	defer free(buf);

	if (len(buf) < 8z + 256z*4z) {
		return errors::invalid;
	};

	let off = 0z;
	let magic = endian::begetu32(buf[off..off+4]);
	off += 4z;
	if (magic != IDX_MAGIC) {
		return errors::invalid;
	};

	let ver = endian::begetu32(buf[off..off+4]);
	off += 4z;
	if (ver != IDX_V2) {
		return errors::invalid;
	};

	let fanout: [256]u32 = [0...];
	for (let i = 0z; i < 256z; i += 1z) {
		fanout[i] = endian::begetu32(buf[off..off+4]);
		off += 4z;
	};
	let nobj = fanout[255]: size;

	let need = off
		+ nobj * sha256::SZ
		+ nobj * 4z
		+ nobj * 4z
		+ 2z * sha256::SZ;
	if (need > len(buf)) {
		return errors::invalid;
	};

	let names_off = off;
	let crcs_off = names_off + nobj * sha256::SZ;
	let off32_off = crcs_off + nobj * 4z;

	let large_count = 0z;
	for (let i = 0z; i < nobj; i += 1z) {
		let o32 = endian::begetu32(buf[off32_off + i*4z .. off32_off + i*4z + 4z]);
		if ((o32 & 0x8000_0000u32) != 0u32) {
			large_count += 1z;
		};
	};

	let off64_off = off32_off + nobj * 4z;
	let trailer_off = off64_off + large_count * 8z;
	if (trailer_off + 2z * sha256::SZ > len(buf)) {
		return errors::invalid;
	};

	let first = (id[0]: u8): size;
	let lo: size = if (first == 0u8) { yield 0z; } else { yield fanout[first - 1z]: size; };
	let hi: size = fanout[first]: size;

	let found = false;
	let idx = 0z;
	let l = lo;
	let h = hi;
	for (l < h) {
		let m = l + (h - l) / 2z;
		let cand = buf[names_off + m*sha256::SZ .. names_off + (m+1z)*sha256::SZ];

		let c = cmp_oid(cand, id);
		if (c == 0) {
			found = true;
			idx = m;
			break;
		} else if (c < 0) {
			l = m + 1z;
		} else {
			h = m;
		};
	};

	if (!found) {
		return errors::noentry;
	};

	let o32 = endian::begetu32(buf[off32_off + idx*4z .. off32_off + idx*4z + 4z]);
	let ofs: u64 = 0u64;
	if ((o32 & 0x8000_0000u32) == 0u32) {
		ofs = (o32: u64);
	} else {
		let nlarge_before = count_large_before(buf[off32_off..], idx);
		let p = off64_off + nlarge_before * 8z;
		let o64be = endian::begetu64(buf[p .. p + 8z]);
		ofs = o64be;
	};

	if (!strings::hassuffix(idx_rel, ".idx")) {
		return errors::invalid;
	};

	let stem = strings::bytesub(idx_rel, 0z, len(idx_rel) - 4z)!;
	let packpath = fmt::asprintf("{}{}", stem, ".pack")?;

	return pack_loc { pack_rel = packpath, ofs = ofs };
};

fn read_from_pack_at(
	r: repo,
	loc: pack_loc,
	want: oid,
) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | errors::noentry | nomem) = {
	defer free(loc.pack_rel);

	let h = fs::open(r.root, loc.pack_rel)?;
	defer io::close(h)!;

	let header: [12]u8 = [0...];
	match (io::readall(h, header)) {
	case size =>
		void;
	case io::EOF =>
		return errors::invalid;
	case let ioe: io::error =>
		return ioe;
	};
	let magic = endian::begetu32(header[..4]);
	let ver = endian::begetu32(header[4..8]);
	if (magic != PACK_MAGIC || ver != PACK_V2) {
		return errors::invalid;
	};

	io::seek(h, (loc.ofs: i64), io::whence::SET)?;
	let ty: u8 = 0u8;

	match (read_obj_header(h)) {
	case let t: (u8, size, size) =>
		ty = t.0;
	case let ioe: io::error =>
		return ioe;
	case =>
		return errors::invalid;
	};

	let full_ty: u8 = 0u8;
	let body: []u8 = [];
	defer if (len(body) != 0) {
		free(body);
	};

	switch (ty) {
	case OBJ_COMMIT =>
		body = inflate_section(h)?;
		full_ty = OBJ_COMMIT;
	case OBJ_TREE =>
		body = inflate_section(h)?;
		full_ty = OBJ_TREE;
	case OBJ_BLOB =>
		body = inflate_section(h)?;
		full_ty = OBJ_BLOB;
	case OBJ_REF_DELTA =>
		match (resolve_ref_delta(r, h)) {
		case let t: (u8, []u8) =>
			full_ty = t.0;
			body = t.1;
		case let e: (fs::error | io::error | errors::invalid | errors::noentry | nomem) =>
			return e;
		};
	case OBJ_OFS_DELTA =>
		match (resolve_ofs_delta(r, h, loc)) {
		case let t: (u8, []u8) =>
			full_ty = t.0;
			body = t.1;
		case let e: (fs::error | io::error | errors::invalid | errors::noentry | nomem) =>
			return e;
		};
	case =>
		return errors::invalid;
	};

	let tystr = if (full_ty == OBJ_BLOB) {
		yield "blob";
	} else if (full_ty == OBJ_TREE) {
		yield "tree";
	} else if (full_ty == OBJ_COMMIT) {
		yield "commit";
	} else {
		yield "";
	};
	if (tystr == "" || !verify_typed(tystr, body, want)) {
		return errors::invalid;
	};

	if (full_ty == OBJ_BLOB) {
		const b = parse_blob(want, body)?;
		return (b: object);
	};
	if (full_ty == OBJ_TREE) {
		const t = parse_tree(want, body)?;
		return (t: object);
	};
	if (full_ty == OBJ_COMMIT) {
		const c = parse_commit(want, body)?;
		return (c: object);
	};

	return errors::invalid;
};

fn read_obj_header(h: io::handle) ((u8, size, size) | io::error | errors::invalid) = {
	let consumed = 0z;

	let b0: [1]u8 = [0];
	match (io::readall(h, b0)) {
	case size =>
		void;
	case io::EOF =>
		return errors::invalid;
	case let ioe: io::error =>
		return ioe;
	};
	consumed += 1z;

	let ty = (b0[0] >> 4) & 0x07u8;
	let sz: size = (b0[0] & 0x0fu8): size;

	let shift = 4z;
	if ((b0[0] & 0x80u8) != 0u8) {
		for (true) {
			let bb: [1]u8 = [0];
			match (io::readall(h, bb)) {
			case size =>
				void;
			case io::EOF =>
				return errors::invalid;
			case let ioe: io::error =>
				return ioe;
			};
			consumed += 1z;

			let v = (bb[0] & 0x7fu8): size;
			sz += v << shift;
			if ((bb[0] & 0x80u8) == 0u8) {
				break;
			};
			shift += 7z;
		};
	};

	return (ty, sz, consumed);
};

fn inflate_section(h: io::handle) ([]u8 | io::error | nomem) = {
	let zr = zlib::decompress(h)?;
	defer io::close(&zr.vtable)!;

	let out = io::drain(&zr.vtable)?;
	return out;
};

fn resolve_ref_delta(
	r: repo,
	h: io::handle,
) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = {
	let base: oid = [0...];
	match (io::readall(h, base)) {
	case size =>
		void;
	case io::EOF =>
		return errors::invalid;
	case let ioe: io::error =>
		return ioe;
	};

	let delta = inflate_section(h)?;
	defer free(delta);

	let bt = read_resolved_body_by_id(r, base)?;
	let out = apply_delta(bt.1, delta)?;
	return (bt.0, out);
};

fn read_ofs_distance(h: io::handle) (u64 | io::error | errors::invalid) = {
	let b: [1]u8 = [0];
	match (io::readall(h, b)) {
	case size =>
		void;
	case io::EOF =>
		return errors::invalid;
	case let ioe: io::error =>
		return ioe;
	};

	let dist: u64 = (b[0] & 0x7fu8): u64;

	if ((b[0] & 0x80u8) != 0u8) {
		for (true) {
			match (io::readall(h, b)) {
			case size =>
				void;
			case io::EOF =>
				return errors::invalid;
			case let ioe: io::error =>
				return ioe;
			};

			dist = ((dist + 1u64) << 7u64) + ((b[0] & 0x7fu8): u64);

			if ((b[0] & 0x80u8) == 0u8) {
				break;
			};
		};
	};

	return dist;
};

fn resolve_ofs_delta(
	r: repo,
	h: io::handle,
	loc: pack_loc,
) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = {
	let dist = read_ofs_distance(h)?;
	let base_ofs: u64 = if (loc.ofs > dist) {
		yield loc.ofs - dist;
	} else {
		yield 0u64;
	};
	if (base_ofs == 0u64) {
		return errors::invalid;
	};

	let bt = read_resolved_body_at_ofs(r, loc.pack_rel, base_ofs)?;
	let delta = inflate_section(h)?;
	defer free(delta);

	let out = apply_delta(bt.1, delta)?;
	return (bt.0, out);
};

fn read_resolved_body_by_id(
	r: repo,
	id: oid,
) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = {
	match (find_in_indexes(r, id)) {
	case let pl: pack_loc =>
		let res = read_resolved_body_at_ofs(r, pl.pack_rel, pl.ofs);
		free(pl.pack_rel);
		return res;
	case errors::noentry =>
		return read_loose_typed(r, id);
	case let fe: fs::error =>
		return fe;
	case let ioe: io::error =>
		return ioe;
	case let inv: errors::invalid =>
		return inv;
	case nomem =>
		return nomem;
	};
};

fn read_resolved_body_at_ofs(
	r: repo,
	pack_rel: str,
	ofs: u64,
) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = {
	let h = fs::open(r.root, pack_rel)?;
	defer io::close(h)!;

	let header: [12]u8 = [0...];
	match (io::readall(h, header)) {
	case size =>
		void;
	case io::EOF =>
		return errors::invalid;
	case let ioe: io::error =>
		return ioe;
	};
	let magic = endian::begetu32(header[..4]);
	let ver = endian::begetu32(header[4..8]);
	if (magic != PACK_MAGIC || ver != PACK_V2) {
		return errors::invalid;
	};

	io::seek(h, (ofs: i64), io::whence::SET)?;
	match (read_obj_header(h)) {
	case let t: (u8, size, size) =>
		switch (t.0) {
		case OBJ_COMMIT =>
			let body = inflate_section(h)?;
			return (OBJ_COMMIT, body);
		case OBJ_TREE =>
			let body = inflate_section(h)?;
			return (OBJ_TREE, body);
		case OBJ_BLOB =>
			let body = inflate_section(h)?;
			return (OBJ_BLOB, body);
		case OBJ_REF_DELTA =>
			let base: oid = [0...];
			match (io::readall(h, base)) {
			case size =>
				void;
			case io::EOF =>
				return errors::invalid;
			case let ioe: io::error =>
				return ioe;
			};
			let delta = inflate_section(h)?;
			defer free(delta);
			let bt = read_resolved_body_by_id(r, base)?;
			let out = apply_delta(bt.1, delta)?;
			return (bt.0, out);
		case OBJ_OFS_DELTA =>
			let dist = read_ofs_distance(h)?;
			let base_ofs: u64 = if (ofs > dist) {
				yield ofs - dist;
			} else {
				yield 0u64;
			};
			if (base_ofs == 0u64) {
				return errors::invalid;
			};

			let delta = inflate_section(h)?;
			defer free(delta);
			let bt = read_resolved_body_at_ofs(r, pack_rel, base_ofs)?;
			let out = apply_delta(bt.1, delta)?;
			return (bt.0, out);
		case =>
			return errors::invalid;
		};
	case let ioe: io::error =>
		return ioe;
	case =>
		return errors::invalid;
	};
};

fn apply_delta(base: []u8, delta: []u8) ([]u8 | errors::invalid | nomem) = {
	let i = 0z;

	let srcsz = read_varint(delta, &i)?;
	let dstsz = read_varint(delta, &i)?;

	if (srcsz != len(base)) {
		return errors::invalid;
	};
	let out: []u8 = alloc([0u8...], dstsz)?;
	let outpos = 0z;

	for (i < len(delta)) {
		let op = delta[i];
		i += 1z;

		if ((op & 0x80u8) != 0u8) {
			let off = 0z;
			let n = 0z;

			if ((op & 0x01u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				off |= (delta[i]: size);
				i += 1z;
			};
			if ((op & 0x02u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				off |= (delta[i]: size) << 8z;
				i += 1z;
			};
			if ((op & 0x04u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				off |= (delta[i]: size) << 16z;
				i += 1z;
			};
			if ((op & 0x08u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				off |= (delta[i]: size) << 24z;
				i += 1z;
			};
			if ((op & 0x10u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				n |= (delta[i]: size);
				i += 1z;
			};
			if ((op & 0x20u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				n |= (delta[i]: size) << 8z;
				i += 1z;
			};
			if ((op & 0x40u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				n |= (delta[i]: size) << 16z;
				i += 1z;
			};
			if (n == 0z) {
				n = 0x10000z;
			};

			if (off + n > len(base) || outpos + n > len(out)) {
				return errors::invalid;
			};

			out[outpos .. outpos + n] = base[off .. off + n];
			outpos += n;
		} else if (op != 0u8) {
			let n = (op: size);
			if (i + n > len(delta) || outpos + n > len(out)) {
				return errors::invalid;
			};
			out[outpos .. outpos + n] = delta[i .. i + n];
			i += n;
			outpos += n;
		} else {
			return errors::invalid;
		};
	};

	if (outpos != len(out)) {
		return errors::invalid;
	};

	return out;
};

fn read_varint(buf: []u8, ip: *size) (size | errors::invalid) = {
	let res = 0z;
	let shift = 0z;
	for (true) {
		if (*ip >= len(buf)) {
			return errors::invalid;
		};
		let b = buf[*ip];
		*ip += 1z;

		res |= ((b & 0x7fu8): size) << shift;
		if ((b & 0x80u8) == 0u8) {
			break;
		};
		shift += 7z;
	};
	return res;
};