Lindenii Project Forge
Packfile parsing (probably very buggy)
use bytes; use compress::zlib; use errors; use fmt; use fs; use io; use strconv; use strings; fn loose_relpath(id: oid) (str | nomem) = { const hex = oid_string(id)?; defer free(hex); const dir = strings::bytesub(hex, 0z, 2z)!; const file = strings::bytesub(hex, 2z, strings::end)!; return fmt::asprintf("objects/{}/{}", dir, file); }; export fn read_loose( r: repo, id: oid, ) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) = { const rel = loose_relpath(id)?; defer free(rel); const fh = fs::open(r.root, rel)?; defer io::close(fh)!; let zr = zlib::decompress(fh)?; defer io::close(&zr.vtable)!; let buf = io::drain(&zr.vtable)?; defer free(buf); let mnul = bytes::index(buf, 0u8); if (mnul is void) { return errors::invalid; }; let nul = mnul: size; const header = buf[..nul]; const body = buf[nul + 1z ..]; let msp = bytes::index(header, ' '); if (msp is void) { return errors::invalid; }; let sp = msp: size; const ty = strings::fromutf8_unsafe(header[..sp]); const szs = strings::fromutf8_unsafe(header[sp + 1z ..]); const expect = strconv::stoz(szs)?; if (expect != len(body)) { return errors::invalid; }; if (!verify_oid(buf, id)) { return errors::invalid; }; if (ty == "blob") { const b = parse_blob(id, body)?; return (b: object); } else if (ty == "tree") { const t = parse_tree(id, body)?; return (t: object); } else if (ty == "commit") { const c = parse_commit(id, body)?; return (c: object); } else { return errors::invalid; }; };
export fn read_loose_typed( r: repo, id: oid, ) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = { const rel = loose_relpath(id)?; defer free(rel); let fh = fs::open(r.root, rel)?; defer io::close(fh)!; let zr = zlib::decompress(fh)?; defer io::close(&zr.vtable)!; let buf = io::drain(&zr.vtable)?; defer free(buf); let mnul = bytes::index(buf, 0u8); if (mnul is void) { return errors::invalid; }; let nul = mnul: size; const header = buf[..nul]; const body = buf[nul + 1z ..]; let msp = bytes::index(header, ' '); if (msp is void) { return errors::invalid; }; let sp = msp: size; const ty = strings::fromutf8_unsafe(header[..sp]); const szs = strings::fromutf8_unsafe(header[sp + 1z ..]); const expect = strconv::stoz(szs)?; if (expect != len(body)) { return errors::invalid; }; let code: u8 = 0u8; if (ty == "blob") { code = OBJ_BLOB; } else if (ty == "tree") { code = OBJ_TREE; } else if (ty == "commit") { code = OBJ_COMMIT; } else { return errors::invalid; }; let out = alloc(body...)?; return (code, out); };
use bytes; use crypto::sha256; use errors;
use fmt;
use fs; use hash; use io; use strconv;
use strings;
export type object = (blob | tree | commit);
export def OBJ_COMMIT: u8 = 1u8; export def OBJ_TREE: u8 = 2u8; export def OBJ_BLOB: u8 = 3u8; export def OBJ_TAG: u8 = 4u8; export def OBJ_OFS_DELTA: u8 = 6u8; export def OBJ_REF_DELTA: u8 = 7u8;
export fn object_finish(o: object) void = { match (o) { case let b: blob => blob_finish(b); case let t: tree => tree_finish(t); case let c: commit => commit_finish(c); }; }; export fn verify_oid(buf: []u8, want: oid) bool = { let st = sha256::sha256(); hash::write(&st, buf); let got: oid = [0...]; hash::sum(&st, got); hash::close(&st); if (bytes::equal(got[..], want[..])) { return true; } else { return false; }; };
export fn verify_typed(ty: str, body: []u8, want: oid) bool = { let st = sha256::sha256(); defer hash::close(&st); if (ty == "blob") { hash::write(&st, strings::toutf8("blob")); } else if (ty == "tree") { hash::write(&st, strings::toutf8("tree")); } else if (ty == "commit") { hash::write(&st, strings::toutf8("commit")); } else { return false; }; hash::write(&st, strings::toutf8(" ")); let szs = strconv::ztos(len(body)); hash::write(&st, strings::toutf8(szs)); hash::write(&st, strings::toutf8("\x00")); hash::write(&st, body); let got: oid = [0...]; hash::sum(&st, got); if (bytes::equal(got[..], want[..])) { return true; } else { return false; }; };
export fn read_object( r: repo, id: oid, ) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) = { match (read_loose(r, id)) { case let o: object => return o; case let fe: fs::error => if (fe is errors::noentry) {
void; // fallthrough to the packfile thing
void;
} else { return fe; }; case let e: (io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) => return e; }; match (read_packed(r, id)) { case let o: object => return o; case let fe: fs::error => if (fe is errors::noentry) { return errors::invalid; } else { return fe; }; case let e: (io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) => return e; }; };
use compress::zlib; use crypto::sha256; use endian;
use errors;
use fmt;
use fs; use io; use strconv;
use strings; def IDX_MAGIC: u32 = 0xff744f63u32; def IDX_V2: u32 = 2u32; def PACK_MAGIC: u32 = 0x5041434bu32; def PACK_V2: u32 = 2u32; type pack_loc = struct { pack_rel: str, ofs: u64, }; fn cmp_oid(a: []u8, b: oid) i32 = { for (let i = 0z; i < sha256::SZ; i += 1z) { let av = a[i]; let bv = b[i]; if (av < bv) { return -1; }; if (av > bv) { return 1; }; }; return 0; }; fn count_large_before(off32: []u8, idx: size) size = { let n = 0z; for (let i = 0z; i < idx; i += 1z) { let o32 = endian::begetu32(off32[i*4z .. i*4z + 4z]); if ((o32 & 0x8000_0000u32) != 0u32) { n += 1z; }; }; return n; };
export fn read_packed( r: repo, id: oid, ) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | errors::noentry | nomem) = {
let loc = find_in_indexes(r, id)?; return read_from_pack_at(r, loc, id); }; fn find_in_indexes( r: repo, id: oid, ) (pack_loc | errors::noentry | fs::error | io::error | errors::invalid | nomem) = { const dir = "objects/pack"; let it = fs::iter(r.root, dir)?; defer fs::finish(it); for (true) { match (fs::next(it)) { case let de: fs::dirent => if (!strings::hassuffix(de.name, ".idx")) { continue; }; { let rel = fmt::asprintf("{}/{}", dir, de.name)?; match (idx_lookup(r, rel, id)) { case let pl: pack_loc => free(rel); return pl; case errors::noentry => free(rel); continue; case let fe: fs::error => free(rel); return fe; case let ioe: io::error => free(rel); return ioe; case let inv: errors::invalid => free(rel); return inv; case nomem => free(rel); return nomem; }; }; case done => break; case let fe: fs::error => return fe; }; };
return errors::noentry; };
fn idx_lookup( r: repo, idx_rel: const str, id: oid, ) (pack_loc | errors::noentry | fs::error | io::error | errors::invalid | nomem) = { let h = fs::open(r.root, idx_rel)?; defer io::close(h)!; let buf = io::drain(h)?; defer free(buf); if (len(buf) < 8z + 256z*4z) { return errors::invalid; }; let off = 0z; let magic = endian::begetu32(buf[off..off+4]); off += 4z; if (magic != IDX_MAGIC) { return errors::invalid; }; let ver = endian::begetu32(buf[off..off+4]); off += 4z; if (ver != IDX_V2) { return errors::invalid; }; let fanout: [256]u32 = [0...]; for (let i = 0z; i < 256z; i += 1z) { fanout[i] = endian::begetu32(buf[off..off+4]); off += 4z; }; let nobj = fanout[255]: size; let need = off + nobj * sha256::SZ + nobj * 4z + nobj * 4z + 2z * sha256::SZ; if (need > len(buf)) { return errors::invalid; }; let names_off = off; let crcs_off = names_off + nobj * sha256::SZ; let off32_off = crcs_off + nobj * 4z; let large_count = 0z; for (let i = 0z; i < nobj; i += 1z) { let o32 = endian::begetu32(buf[off32_off + i*4z .. off32_off + i*4z + 4z]); if ((o32 & 0x8000_0000u32) != 0u32) { large_count += 1z; }; }; let off64_off = off32_off + nobj * 4z; let trailer_off = off64_off + large_count * 8z; if (trailer_off + 2z * sha256::SZ > len(buf)) { return errors::invalid; }; let first = (id[0]: u8): size; let lo: size = if (first == 0u8) { yield 0z; } else { yield fanout[first - 1z]: size; }; let hi: size = fanout[first]: size; let found = false; let idx = 0z; let l = lo; let h = hi; for (l < h) { let m = l + (h - l) / 2z; let cand = buf[names_off + m*sha256::SZ .. names_off + (m+1z)*sha256::SZ]; let c = cmp_oid(cand, id); if (c == 0) { found = true; idx = m; break; } else if (c < 0) { l = m + 1z; } else { h = m; }; }; if (!found) { return errors::noentry; }; let o32 = endian::begetu32(buf[off32_off + idx*4z .. off32_off + idx*4z + 4z]); let ofs: u64 = 0u64; if ((o32 & 0x8000_0000u32) == 0u32) { ofs = (o32: u64); } else { let nlarge_before = count_large_before(buf[off32_off..], idx); let p = off64_off + nlarge_before * 8z; let o64be = endian::begetu64(buf[p .. p + 8z]); ofs = o64be; }; if (!strings::hassuffix(idx_rel, ".idx")) { return errors::invalid; }; let stem = strings::bytesub(idx_rel, 0z, len(idx_rel) - 4z)!; let packpath = fmt::asprintf("{}{}", stem, ".pack")?; return pack_loc { pack_rel = packpath, ofs = ofs }; }; fn read_from_pack_at( r: repo, loc: pack_loc, want: oid, ) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | errors::noentry | nomem) = { defer free(loc.pack_rel); let h = fs::open(r.root, loc.pack_rel)?; defer io::close(h)!; let header: [12]u8 = [0...]; match (io::readall(h, header)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; let magic = endian::begetu32(header[..4]); let ver = endian::begetu32(header[4..8]); if (magic != PACK_MAGIC || ver != PACK_V2) { return errors::invalid; }; io::seek(h, (loc.ofs: i64), io::whence::SET)?; let ty: u8 = 0u8; match (read_obj_header(h)) { case let t: (u8, size, size) => ty = t.0; case let ioe: io::error => return ioe; case => return errors::invalid; }; let full_ty: u8 = 0u8; let body: []u8 = []; defer if (len(body) != 0) { free(body); }; switch (ty) { case OBJ_COMMIT => body = inflate_section(h)?; full_ty = OBJ_COMMIT; case OBJ_TREE => body = inflate_section(h)?; full_ty = OBJ_TREE; case OBJ_BLOB => body = inflate_section(h)?; full_ty = OBJ_BLOB; case OBJ_REF_DELTA => match (resolve_ref_delta(r, h)) { case let t: (u8, []u8) => full_ty = t.0; body = t.1; case let e: (fs::error | io::error | errors::invalid | errors::noentry | nomem) => return e; }; case OBJ_OFS_DELTA => match (resolve_ofs_delta(r, h, loc)) { case let t: (u8, []u8) => full_ty = t.0; body = t.1; case let e: (fs::error | io::error | errors::invalid | errors::noentry | nomem) => return e; }; case => return errors::invalid; }; let tystr = if (full_ty == OBJ_BLOB) { yield "blob"; } else if (full_ty == OBJ_TREE) { yield "tree"; } else if (full_ty == OBJ_COMMIT) { yield "commit"; } else { yield ""; }; if (tystr == "" || !verify_typed(tystr, body, want)) { return errors::invalid; }; if (full_ty == OBJ_BLOB) { const b = parse_blob(want, body)?; return (b: object); }; if (full_ty == OBJ_TREE) { const t = parse_tree(want, body)?; return (t: object); }; if (full_ty == OBJ_COMMIT) { const c = parse_commit(want, body)?; return (c: object); }; return errors::invalid; }; fn read_obj_header(h: io::handle) ((u8, size, size) | io::error | errors::invalid) = { let consumed = 0z; let b0: [1]u8 = [0]; match (io::readall(h, b0)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; consumed += 1z; let ty = (b0[0] >> 4) & 0x07u8; let sz: size = (b0[0] & 0x0fu8): size; let shift = 4z; if ((b0[0] & 0x80u8) != 0u8) { for (true) { let bb: [1]u8 = [0]; match (io::readall(h, bb)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; consumed += 1z; let v = (bb[0] & 0x7fu8): size; sz += v << shift; if ((bb[0] & 0x80u8) == 0u8) { break; }; shift += 7z; }; }; return (ty, sz, consumed); }; fn inflate_section(h: io::handle) ([]u8 | io::error | nomem) = { let zr = zlib::decompress(h)?; defer io::close(&zr.vtable)!; let out = io::drain(&zr.vtable)?; return out; }; fn resolve_ref_delta( r: repo, h: io::handle, ) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = { let base: oid = [0...]; match (io::readall(h, base)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; let delta = inflate_section(h)?; defer free(delta); let bt = read_resolved_body_by_id(r, base)?; let out = apply_delta(bt.1, delta)?; return (bt.0, out); }; fn read_ofs_distance(h: io::handle) (u64 | io::error | errors::invalid) = { let b: [1]u8 = [0]; match (io::readall(h, b)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; let dist: u64 = (b[0] & 0x7fu8): u64; if ((b[0] & 0x80u8) != 0u8) { for (true) { match (io::readall(h, b)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; dist = ((dist + 1u64) << 7u64) + ((b[0] & 0x7fu8): u64); if ((b[0] & 0x80u8) == 0u8) { break; }; }; }; return dist; }; fn resolve_ofs_delta( r: repo, h: io::handle, loc: pack_loc, ) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = { let dist = read_ofs_distance(h)?; let base_ofs: u64 = if (loc.ofs > dist) { yield loc.ofs - dist; } else { yield 0u64; }; if (base_ofs == 0u64) { return errors::invalid; }; let bt = read_resolved_body_at_ofs(r, loc.pack_rel, base_ofs)?; let delta = inflate_section(h)?; defer free(delta); let out = apply_delta(bt.1, delta)?; return (bt.0, out); }; fn read_resolved_body_by_id( r: repo, id: oid, ) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = { match (find_in_indexes(r, id)) { case let pl: pack_loc => let res = read_resolved_body_at_ofs(r, pl.pack_rel, pl.ofs); free(pl.pack_rel); return res; case errors::noentry => return read_loose_typed(r, id); case let fe: fs::error => return fe; case let ioe: io::error => return ioe; case let inv: errors::invalid => return inv; case nomem => return nomem; }; }; fn read_resolved_body_at_ofs( r: repo, pack_rel: str, ofs: u64, ) ((u8, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = { let h = fs::open(r.root, pack_rel)?; defer io::close(h)!; let header: [12]u8 = [0...]; match (io::readall(h, header)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; let magic = endian::begetu32(header[..4]); let ver = endian::begetu32(header[4..8]); if (magic != PACK_MAGIC || ver != PACK_V2) { return errors::invalid; }; io::seek(h, (ofs: i64), io::whence::SET)?; match (read_obj_header(h)) { case let t: (u8, size, size) => switch (t.0) { case OBJ_COMMIT => let body = inflate_section(h)?; return (OBJ_COMMIT, body); case OBJ_TREE => let body = inflate_section(h)?; return (OBJ_TREE, body); case OBJ_BLOB => let body = inflate_section(h)?; return (OBJ_BLOB, body); case OBJ_REF_DELTA => let base: oid = [0...]; match (io::readall(h, base)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; let delta = inflate_section(h)?; defer free(delta); let bt = read_resolved_body_by_id(r, base)?; let out = apply_delta(bt.1, delta)?; return (bt.0, out); case OBJ_OFS_DELTA => let dist = read_ofs_distance(h)?; let base_ofs: u64 = if (ofs > dist) { yield ofs - dist; } else { yield 0u64; }; if (base_ofs == 0u64) { return errors::invalid; }; let delta = inflate_section(h)?; defer free(delta); let bt = read_resolved_body_at_ofs(r, pack_rel, base_ofs)?; let out = apply_delta(bt.1, delta)?; return (bt.0, out); case => return errors::invalid; }; case let ioe: io::error => return ioe; case => return errors::invalid; }; }; fn apply_delta(base: []u8, delta: []u8) ([]u8 | errors::invalid | nomem) = { let i = 0z; let srcsz = read_varint(delta, &i)?; let dstsz = read_varint(delta, &i)?; if (srcsz != len(base)) { return errors::invalid; }; let out: []u8 = alloc([0u8...], dstsz)?; let outpos = 0z; for (i < len(delta)) { let op = delta[i]; i += 1z; if ((op & 0x80u8) != 0u8) { let off = 0z; let n = 0z; if ((op & 0x01u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; off |= (delta[i]: size); i += 1z; }; if ((op & 0x02u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; off |= (delta[i]: size) << 8z; i += 1z; }; if ((op & 0x04u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; off |= (delta[i]: size) << 16z; i += 1z; }; if ((op & 0x08u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; off |= (delta[i]: size) << 24z; i += 1z; }; if ((op & 0x10u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; n |= (delta[i]: size); i += 1z; }; if ((op & 0x20u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; n |= (delta[i]: size) << 8z; i += 1z; }; if ((op & 0x40u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; n |= (delta[i]: size) << 16z; i += 1z; }; if (n == 0z) { n = 0x10000z; }; if (off + n > len(base) || outpos + n > len(out)) { return errors::invalid; }; out[outpos .. outpos + n] = base[off .. off + n]; outpos += n; } else if (op != 0u8) { let n = (op: size); if (i + n > len(delta) || outpos + n > len(out)) { return errors::invalid; }; out[outpos .. outpos + n] = delta[i .. i + n]; i += n; outpos += n; } else { return errors::invalid; }; }; if (outpos != len(out)) { return errors::invalid; }; return out; }; fn read_varint(buf: []u8, ip: *size) (size | errors::invalid) = { let res = 0z; let shift = 0z; for (true) { if (*ip >= len(buf)) { return errors::invalid; }; let b = buf[*ip]; *ip += 1z; res |= ((b & 0x7fu8): size) << shift; if ((b & 0x80u8) == 0u8) { break; }; shift += 7z; }; return res; };