Lindenii Project Forge
Login

hare-git

Git library for Hare

Warning: Due to various recent migrations, viewing non-HEAD refs may be broken.

/git/packed.ha (raw)

use compress::zlib;
use crypto::sha256;
use endian;
use errors;
use fmt;
use fs;
use io;
use strconv;
use strings;

def IDX_MAGIC: u32 = 0xff744f63u32;
def IDX_V2: u32    = 2u32;

def PACK_MAGIC: u32 = 0x5041434bu32;
def PACK_V2: u32    = 2u32;

type pack_loc = struct {
	pack_rel: str,
	ofs: u64,
};

fn cmp_oid(a: []u8, b: oid) i32 = {
	for (let i = 0z; i < sha256::SZ; i += 1z) {
		let av = a[i];
		let bv = b[i];
		if (av < bv) {
			return -1;
		};
		if (av > bv) {
			return 1;
		};
	};
	return 0;
};

fn count_large_before(off32: []u8, idx: size) size = {
	let n = 0z;
	for (let i = 0z; i < idx; i += 1z) {
		let o32 = endian::begetu32(off32[i*4z .. i*4z + 4z]);
		if ((o32 & 0x8000_0000u32) != 0u32) {
			n += 1z;
		};
	};
	return n;
};

// Reads a packed object by its ID from the given repository.
export fn read_packed(
	r: repo,
	id: oid,
) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | errors::noentry | nomem) = {
	let loc = find_in_indexes(r, id)?;
	return read_from_pack_at(r, loc, id);
};

fn find_in_indexes(
	r: repo,
	id: oid,
) (pack_loc | errors::noentry | fs::error | io::error | errors::invalid | nomem) = {
	const dir = "objects/pack";
	let it = fs::iter(r.root, dir)?;
	defer fs::finish(it);

	for (true) {
		match (fs::next(it)) {
		case let de: fs::dirent =>
			if (!strings::hassuffix(de.name, ".idx")) {
				continue;
			};

			{
				let rel = fmt::asprintf("{}/{}", dir, de.name)?;
				match (idx_lookup(r, rel, id)) {
				case let pl: pack_loc =>
					free(rel);
					return pl;
				case errors::noentry =>
					free(rel);
					continue;
				case let fe: fs::error =>
					free(rel);
					return fe;
				case let ioe: io::error =>
					free(rel);
					return ioe;
				case let inv: errors::invalid =>
					free(rel);
					return inv;
				case nomem =>
					free(rel);
					return nomem;
				};
			};

		case done =>
			break;
		case let fe: fs::error =>
			return fe;
		};
	};

	return errors::noentry;
};

fn idx_lookup(
	r: repo,
	idx_rel: const str,
	id: oid,
) (pack_loc | errors::noentry | fs::error | io::error | errors::invalid | nomem) = {
	let h = fs::open(r.root, idx_rel)?;
	defer io::close(h)!;

	let buf = io::drain(h)?;
	defer free(buf);

	if (len(buf) < 8z + 256z*4z) {
		return errors::invalid;
	};

	let off = 0z;
	let magic = endian::begetu32(buf[off..off+4]);
	off += 4z;
	if (magic != IDX_MAGIC) {
		return errors::invalid;
	};

	let ver = endian::begetu32(buf[off..off+4]);
	off += 4z;
	if (ver != IDX_V2) {
		return errors::invalid;
	};

	let fanout: [256]u32 = [0...];
	for (let i = 0z; i < 256z; i += 1z) {
		fanout[i] = endian::begetu32(buf[off..off+4]);
		off += 4z;
	};
	let nobj = fanout[255]: size;

	let need = off
		+ nobj * sha256::SZ
		+ nobj * 4z
		+ nobj * 4z
		+ 2z * sha256::SZ;
	if (need > len(buf)) {
		return errors::invalid;
	};

	let names_off = off;
	let crcs_off = names_off + nobj * sha256::SZ;
	let off32_off = crcs_off + nobj * 4z;

	let large_count = 0z;
	for (let i = 0z; i < nobj; i += 1z) {
		let o32 = endian::begetu32(buf[off32_off + i*4z .. off32_off + i*4z + 4z]);
		if ((o32 & 0x8000_0000u32) != 0u32) {
			large_count += 1z;
		};
	};

	let off64_off = off32_off + nobj * 4z;
	let trailer_off = off64_off + large_count * 8z;
	if (trailer_off + 2z * sha256::SZ > len(buf)) {
		return errors::invalid;
	};

	let first = (id[0]: u8): size;
	let lo: size = if (first == 0u8) { yield 0z; } else { yield fanout[first - 1z]: size; };
	let hi: size = fanout[first]: size;

	let found = false;
	let idx = 0z;
	let l = lo;
	let h = hi;
	for (l < h) {
		let m = l + (h - l) / 2z;
		let cand = buf[names_off + m*sha256::SZ .. names_off + (m+1z)*sha256::SZ];

		let c = cmp_oid(cand, id);
		if (c == 0) {
			found = true;
			idx = m;
			break;
		} else if (c < 0) {
			l = m + 1z;
		} else {
			h = m;
		};
	};

	if (!found) {
		return errors::noentry;
	};

	let o32 = endian::begetu32(buf[off32_off + idx*4z .. off32_off + idx*4z + 4z]);
	let ofs: u64 = 0u64;
	if ((o32 & 0x8000_0000u32) == 0u32) {
		ofs = (o32: u64);
	} else {
		let nlarge_before = count_large_before(buf[off32_off..], idx);
		let p = off64_off + nlarge_before * 8z;
		let o64be = endian::begetu64(buf[p .. p + 8z]);
		ofs = o64be;
	};

	if (!strings::hassuffix(idx_rel, ".idx")) {
		return errors::invalid;
	};

	let stem = strings::bytesub(idx_rel, 0z, len(idx_rel) - 4z)!;
	let packpath = fmt::asprintf("{}{}", stem, ".pack")?;

	return pack_loc { pack_rel = packpath, ofs = ofs };
};

fn read_from_pack_at(
	r: repo,
	loc: pack_loc,
	want: oid,
) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | errors::noentry | nomem) = {
	defer free(loc.pack_rel);

	let h = fs::open(r.root, loc.pack_rel)?;
	defer io::close(h)!;

	let header: [12]u8 = [0...];
	match (io::readall(h, header)) {
	case size =>
		void;
	case io::EOF =>
		return errors::invalid;
	case let ioe: io::error =>
		return ioe;
	};
	let magic = endian::begetu32(header[..4]);
	let ver = endian::begetu32(header[4..8]);
	if (magic != PACK_MAGIC || ver != PACK_V2) {
		return errors::invalid;
	};

	io::seek(h, (loc.ofs: i64), io::whence::SET)?;
	let ty: u8 = 0u8;

	match (read_obj_header(h)) {
	case let t: (u8, size, size) =>
		ty = t.0;
	case let ioe: io::error =>
		return ioe;
	case =>
		return errors::invalid;
	};

	let full_ty: u8 = 0u8;
	let body: []u8 = [];
	defer if (len(body) != 0) {
		free(body);
	};

	switch (ty) {
	case OBJ_COMMIT =>
		body = inflate_section(h)?;
		full_ty = OBJ_COMMIT;
	case OBJ_TREE =>
		body = inflate_section(h)?;
		full_ty = OBJ_TREE;
	case OBJ_BLOB =>
		body = inflate_section(h)?;
		full_ty = OBJ_BLOB;
	case OBJ_REF_DELTA =>
		match (resolve_ref_delta(r, h)) {
		case let t: (u8, []u8) =>
			full_ty = t.0;
			body = t.1;
		case let e: (fs::error | io::error | errors::invalid | errors::noentry | nomem) =>
			return e;
		};
	case OBJ_OFS_DELTA =>
		match (resolve_ofs_delta(r, h, loc)) {
		case let t: (u8, []u8) =>
			full_ty = t.0;
			body = t.1;
		case let e: (fs::error | io::error | errors::invalid | errors::noentry | nomem) =>
			return e;
		};
	case =>
		return errors::invalid;
	};

	let tystr = if (full_ty == OBJ_BLOB) {
		yield "blob";
	} else if (full_ty == OBJ_TREE) {
		yield "tree";
	} else if (full_ty == OBJ_COMMIT) {
		yield "commit";
	} else {
		yield "";
	};
	if (tystr == "" || !verify_typed(tystr, body, want)) {
		return errors::invalid;
	};

	if (full_ty == OBJ_BLOB) {
		const b = parse_blob(want, body)?;
		return (b: object);
	};
	if (full_ty == OBJ_TREE) {
		const t = parse_tree(want, body)?;
		return (t: object);
	};
	if (full_ty == OBJ_COMMIT) {
		const c = parse_commit(want, body)?;
		return (c: object);
	};

	return errors::invalid;
};

fn read_obj_header(h: io::handle) ((u8, size, size) | io::error | errors::invalid) = {
	let consumed = 0z;

	let b0: [1]u8 = [0];
	match (io::readall(h, b0)) {
	case size =>
		void;
	case io::EOF =>
		return errors::invalid;
	case let ioe: io::error =>
		return ioe;
	};
	consumed += 1z;

	let ty = (b0[0] >> 4) & 0x07u8;
	let sz: size = (b0[0] & 0x0fu8): size;

	let shift = 4z;
	if ((b0[0] & 0x80u8) != 0u8) {
		for (true) {
			let bb: [1]u8 = [0];
			match (io::readall(h, bb)) {
			case size =>
				void;
			case io::EOF =>
				return errors::invalid;
			case let ioe: io::error =>
				return ioe;
			};
			consumed += 1z;

			let v = (bb[0] & 0x7fu8): size;
			sz += v << shift;
			if ((bb[0] & 0x80u8) == 0u8) {
				break;
			};
			shift += 7z;
		};
	};

	return (ty, sz, consumed);
};

fn inflate_section(h: io::handle) ([]u8 | io::error | nomem) = {
	let zr = zlib::decompress(h)?;
	defer io::close(&zr.vtable)!;

	let out = io::drain(&zr.vtable)?;
	return out;
};

fn resolve_ref_delta(
	r: repo,
	h: io::handle,
) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = {
	let base: oid = [0...];
	match (io::readall(h, base)) {
	case size =>
		void;
	case io::EOF =>
		return errors::invalid;
	case let ioe: io::error =>
		return ioe;
	};

	let delta = inflate_section(h)?;
	defer free(delta);

	let bt = read_resolved_body_by_id(r, base)?;
	let out = apply_delta(bt.1, delta)?;
	return (bt.0, out);
};

fn read_ofs_distance(h: io::handle) (u64 | io::error | errors::invalid) = {
	let b: [1]u8 = [0];
	match (io::readall(h, b)) {
	case size =>
		void;
	case io::EOF =>
		return errors::invalid;
	case let ioe: io::error =>
		return ioe;
	};

	let dist: u64 = (b[0] & 0x7fu8): u64;

	if ((b[0] & 0x80u8) != 0u8) {
		for (true) {
			match (io::readall(h, b)) {
			case size =>
				void;
			case io::EOF =>
				return errors::invalid;
			case let ioe: io::error =>
				return ioe;
			};

			dist = ((dist + 1u64) << 7u64) + ((b[0] & 0x7fu8): u64);

			if ((b[0] & 0x80u8) == 0u8) {
				break;
			};
		};
	};

	return dist;
};

fn resolve_ofs_delta(
	r: repo,
	h: io::handle,
	loc: pack_loc,
) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = {
	let dist = read_ofs_distance(h)?;
	let base_ofs: u64 = if (loc.ofs > dist) {
		yield loc.ofs - dist;
	} else {
		yield 0u64;
	};
	if (base_ofs == 0u64) {
		return errors::invalid;
	};

	let bt = read_resolved_body_at_ofs(r, loc.pack_rel, base_ofs)?;
	let delta = inflate_section(h)?;
	defer free(delta);

	let out = apply_delta(bt.1, delta)?;
	return (bt.0, out);
};

fn read_resolved_body_by_id(
	r: repo,
	id: oid,
) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = {
	match (find_in_indexes(r, id)) {
	case let pl: pack_loc =>
		let res = read_resolved_body_at_ofs(r, pl.pack_rel, pl.ofs);
		free(pl.pack_rel);
		return res;
	case errors::noentry =>
		return read_loose_typed(r, id);
	case let fe: fs::error =>
		return fe;
	case let ioe: io::error =>
		return ioe;
	case let inv: errors::invalid =>
		return inv;
	case nomem =>
		return nomem;
	};
};

fn read_resolved_body_at_ofs(
	r: repo,
	pack_rel: str,
	ofs: u64,
) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = {
	let h = fs::open(r.root, pack_rel)?;
	defer io::close(h)!;

	let header: [12]u8 = [0...];
	match (io::readall(h, header)) {
	case size =>
		void;
	case io::EOF =>
		return errors::invalid;
	case let ioe: io::error =>
		return ioe;
	};
	let magic = endian::begetu32(header[..4]);
	let ver = endian::begetu32(header[4..8]);
	if (magic != PACK_MAGIC || ver != PACK_V2) {
		return errors::invalid;
	};

	io::seek(h, (ofs: i64), io::whence::SET)?;
	match (read_obj_header(h)) {
	case let t: (u8, size, size) =>
		switch (t.0) {
		case OBJ_COMMIT =>
			let body = inflate_section(h)?;
			return (OBJ_COMMIT, body);
		case OBJ_TREE =>
			let body = inflate_section(h)?;
			return (OBJ_TREE, body);
		case OBJ_BLOB =>
			let body = inflate_section(h)?;
			return (OBJ_BLOB, body);
		case OBJ_REF_DELTA =>
			let base: oid = [0...];
			match (io::readall(h, base)) {
			case size =>
				void;
			case io::EOF =>
				return errors::invalid;
			case let ioe: io::error =>
				return ioe;
			};
			let delta = inflate_section(h)?;
			defer free(delta);
			let bt = read_resolved_body_by_id(r, base)?;
			let out = apply_delta(bt.1, delta)?;
			return (bt.0, out);
		case OBJ_OFS_DELTA =>
			let dist = read_ofs_distance(h)?;
			let base_ofs: u64 = if (ofs > dist) {
				yield ofs - dist;
			} else {
				yield 0u64;
			};
			if (base_ofs == 0u64) {
				return errors::invalid;
			};

			let delta = inflate_section(h)?;
			defer free(delta);
			let bt = read_resolved_body_at_ofs(r, pack_rel, base_ofs)?;
			let out = apply_delta(bt.1, delta)?;
			return (bt.0, out);
		case =>
			return errors::invalid;
		};
	case let ioe: io::error =>
		return ioe;
	case =>
		return errors::invalid;
	};
};

fn apply_delta(base: []u8, delta: []u8) ([]u8 | errors::invalid | nomem) = {
	let i = 0z;

	let srcsz = read_varint(delta, &i)?;
	let dstsz = read_varint(delta, &i)?;

	if (srcsz != len(base)) {
		return errors::invalid;
	};
	let out: []u8 = alloc([0u8...], dstsz)?;
	let outpos = 0z;

	for (i < len(delta)) {
		let op = delta[i];
		i += 1z;

		if ((op & 0x80u8) != 0u8) {
			let off = 0z;
			let n = 0z;

			if ((op & 0x01u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				off |= (delta[i]: size);
				i += 1z;
			};
			if ((op & 0x02u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				off |= (delta[i]: size) << 8z;
				i += 1z;
			};
			if ((op & 0x04u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				off |= (delta[i]: size) << 16z;
				i += 1z;
			};
			if ((op & 0x08u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				off |= (delta[i]: size) << 24z;
				i += 1z;
			};
			if ((op & 0x10u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				n |= (delta[i]: size);
				i += 1z;
			};
			if ((op & 0x20u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				n |= (delta[i]: size) << 8z;
				i += 1z;
			};
			if ((op & 0x40u8) != 0u8) {
				if (i >= len(delta)) {
					return errors::invalid;
				};
				n |= (delta[i]: size) << 16z;
				i += 1z;
			};
			if (n == 0z) {
				n = 0x10000z;
			};

			if (off + n > len(base) || outpos + n > len(out)) {
				return errors::invalid;
			};

			out[outpos .. outpos + n] = base[off .. off + n];
			outpos += n;
		} else if (op != 0u8) {
			let n = (op: size);
			if (i + n > len(delta) || outpos + n > len(out)) {
				return errors::invalid;
			};
			out[outpos .. outpos + n] = delta[i .. i + n];
			i += n;
			outpos += n;
		} else {
			return errors::invalid;
		};
	};

	if (outpos != len(out)) {
		return errors::invalid;
	};

	return out;
};

fn read_varint(buf: []u8, ip: *size) (size | errors::invalid) = {
	let res = 0z;
	let shift = 0z;
	for (true) {
		if (*ip >= len(buf)) {
			return errors::invalid;
		};
		let b = buf[*ip];
		*ip += 1z;

		res |= ((b & 0x7fu8): size) << shift;
		if ((b & 0x80u8) == 0u8) {
			break;
		};
		shift += 7z;
	};
	return res;
};