Lindenii Project Forge
Add serialization functions and use objtype more
use bytes; use errors; use encoding::utf8; use strings; use strconv; // Author/committer identity and its associated timestamp // and timezone offset. export type ident = struct { name: []u8, email: []u8, when: i64, ofs: i32, }; // Frees resources associated with an [[ident]]. export fn ident_finish(p: ident) void = { free(p.name); free(p.email); }; // Parses an [[ident]] from its canonical byte-slice representation. fn parse_ident( line: []u8, ) (ident | errors::invalid | strconv::invalid | strconv::overflow | utf8::invalid | nomem) = { let mlt = bytes::index(line, '<'); if (mlt is void) { return errors::invalid; }; let lt = mlt: size; let mgt_rel = bytes::index(line[lt + 1z..], '>'); if (mgt_rel is void) { return errors::invalid; }; let gt_rel = mgt_rel: size; const gt = lt + 1z + gt_rel; const name_b = line[..lt]; const email_b = line[lt + 1z .. gt]; let rest = line[gt + 1z..]; if (len(rest) == 0 || rest[0] != ' ') { return errors::invalid; }; rest = rest[1..]; let msp = bytes::index(rest, ' '); if (msp is void) { return errors::invalid; }; let sp = msp: size; const when_s = strings::fromutf8(rest[..sp])?; const tz_b = rest[sp + 1z..]; if (len(tz_b) < 5) { return errors::invalid; }; const when = strconv::stoi64(when_s)?; let sign: i32 = 1; if (tz_b[0] == '-') { sign = -1; }; const hh = strconv::stou32(strings::fromutf8(tz_b[1..3])?, strconv::base::DEC)?; const mm = strconv::stou32(strings::fromutf8(tz_b[3..5])?, strconv::base::DEC)?; const mins: i32 = (hh: i32) * 60 + (mm: i32); const ofs: i32 = sign * mins; let name = alloc(name_b...)?; let email = alloc(email_b...)?; return ident { name = name, email = email, when = when, ofs = ofs }; };
// Returns the canonical form for an ident. export fn serialize_ident(p: ident) ([]u8 | nomem) = { const whens = strconv::i64tos(p.when); const whenb = strings::toutf8(whens); let mins = p.ofs; let sign: u8 = '+'; if (mins < 0) { sign = '-'; mins = -mins; }; let hh: u32 = ((mins / 60): u32); let mm: u32 = ((mins % 60): u32); let outlen = len(p.name) + 2z + len(p.email) + 2z + len(whenb) + 1z + 5z; let out = alloc([0u8...], outlen)?; let pos = 0z; out[pos .. pos + len(p.name)] = p.name; pos += len(p.name); out[pos] = ' '; pos += 1z; out[pos] = '<'; pos += 1z; out[pos .. pos + len(p.email)] = p.email; pos += len(p.email); out[pos] = '>'; pos += 1z; out[pos] = ' '; pos += 1z; out[pos .. pos + len(whenb)] = whenb; pos += len(whenb); out[pos] = ' '; pos += 1z; out[pos] = sign; pos += 1z; out[pos] = ('0' + (hh / 10u32): u8); pos += 1z; out[pos] = ('0' + (hh % 10u32): u8); pos += 1z; out[pos] = ('0' + (mm / 10u32): u8); pos += 1z; out[pos] = ('0' + (mm % 10u32): u8); pos += 1z; return out; };
use bytes; use compress::zlib; use encoding::utf8; use errors; use fmt; use fs; use io; use strconv; use strings; use encoding::utf8; // Find the path to a loose object with the given ID, // relative to the repository root. fn loose_relpath(id: oid) (str | nomem) = { const hex = oid_string(id)?; defer free(hex); const dir = strings::bytesub(hex, 0z, 2z)!; const file = strings::bytesub(hex, 2z, strings::end)!; return fmt::asprintf("objects/{}/{}", dir, file); }; // Reads a loose object from the repository by its ID. export fn read_loose( r: repo, id: oid, ) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem | utf8::invalid) = { const rel = loose_relpath(id)?; defer free(rel); const fh = fs::open(r.root, rel)?; defer io::close(fh)!; let zr = zlib::decompress(fh)?; defer io::close(&zr.vtable)!; let buf = io::drain(&zr.vtable)?; defer free(buf); let mnul = bytes::index(buf, 0u8); if (mnul is void) { return errors::invalid; }; let nul = mnul: size; const header = buf[..nul]; const body = buf[nul + 1z ..]; let msp = bytes::index(header, ' '); if (msp is void) { return errors::invalid; }; let sp = msp: size; const ty = strings::fromutf8(header[..sp])?; const szs = strings::fromutf8(header[sp + 1z ..])?; const expect = strconv::stoz(szs)?; if (expect != len(body)) { return errors::invalid; }; if (!verify_oid(buf, id)) { return errors::invalid; }; if (ty == "blob") { const b = parse_blob(body)?; return (b: object); } else if (ty == "tree") { const t = parse_tree(body)?; return (t: object); } else if (ty == "commit") { const c = parse_commit(body)?; return (c: object); } else if (ty == "tag") { const g = parse_tag(body)?; return (g: object);
} else { return errors::invalid;
};
return errors::invalid;
}; // Reads a loose object from the repository by its ID, // returning its type and raw data. export fn read_loose_typed( r: repo, id: oid, ) ((objtype, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = { const rel = loose_relpath(id)?; defer free(rel); let fh = fs::open(r.root, rel)?; defer io::close(fh)!; let zr = zlib::decompress(fh)?; defer io::close(&zr.vtable)!; let buf = io::drain(&zr.vtable)?; defer free(buf); let mnul = bytes::index(buf, 0u8); if (mnul is void) { return errors::invalid; }; let nul = mnul: size; const header = buf[..nul]; const body = buf[nul + 1z ..]; let msp = bytes::index(header, ' '); if (msp is void) { return errors::invalid; }; let sp = msp: size; const ty = strings::fromutf8(header[..sp])?; const szs = strings::fromutf8(header[sp + 1z ..])?; const expect = strconv::stoz(szs)?; if (expect != len(body)) { return errors::invalid; }; let code: objtype = objtype::OBJ_INVALID; if (ty == "blob") { code = objtype::OBJ_BLOB; } else if (ty == "tree") { code = objtype::OBJ_TREE; } else if (ty == "commit") { code = objtype::OBJ_COMMIT; } else if (ty == "tag") { code = objtype::OBJ_TAG; } else { return errors::invalid; }; let out = alloc(body...)?; return (code, out); };
use strconv; use strings;
// A simple Git blob with its object ID and raw data. export type blob = struct { data: []u8, }; // Frees resources associated with a [[blob]]. export fn blob_finish(b: blob) void = { free(b.data); }; // Parses a blob from its raw data. // The data is copied and the resulting blob // must be finished with [[blob_finish]]. export fn parse_blob(body: []u8) (blob | nomem) = { let data = alloc(body...)?; return blob { data = data }; };
// Serializes a blob into the on-disk format. export fn serialize_blob(b: blob) ([]u8 | nomem) = { const sizes = strconv::ztos(len(b.data)); const ty = strings::toutf8("blob "); const sizesb = strings::toutf8(sizes); let hlen = len(ty) + len(sizesb) + 1z; let out = alloc([0u8...], hlen + len(b.data))?; let pos = 0z; out[pos .. pos + len(ty)] = ty; pos += len(ty); out[pos .. pos + len(sizesb)] = sizesb; pos += len(sizesb); out[pos] = 0u8; pos += 1z; out[pos .. pos + len(b.data)] = b.data; return out; };
use bytes; use encoding::utf8; use errors; use strconv; use strings; // A Git commit object. export type commit = struct { tree: oid, parents: []oid, author: ident, committer: ident, message: []u8, // other raw headers? }; // Frees resources associated with a [[commit]]. export fn commit_finish(c: commit) void = { free(c.parents); ident_finish(c.author); ident_finish(c.committer); free(c.message); }; // Parses a commit from its raw data and object ID. export fn parse_commit( body: []u8, ) (commit | errors::invalid | strconv::invalid | strconv::overflow | utf8::invalid | nomem) = { let c = commit { tree = [0...], parents = [], author = ident { name = [], email = [], when = 0, ofs = 0 }, committer = ident { name = [], email = [], when = 0, ofs = 0 }, message = [], }; let i = 0z; for (true) { let mrel = bytes::index(body[i..], '\n'); if (mrel is void) { return errors::invalid; }; let rel = mrel: size; const line = body[i .. i + rel]; if (len(line) == 0) { i += rel + 1z; break; }; if (bytes::hasprefix(line, strings::toutf8("tree "))) { const hex = strings::fromutf8(line[5..])?; match (parse_oid(hex)) { case let o: oid => c.tree = o; case nomem => return nomem; case => return errors::invalid; }; } else if (bytes::hasprefix(line, strings::toutf8("parent "))) { const hex = strings::fromutf8(line[7..])?; match (parse_oid(hex)) { case let o: oid => append(c.parents, o)!; case nomem => return nomem; case => return errors::invalid; }; } else if (bytes::hasprefix(line, strings::toutf8("author "))) { const per = parse_ident(line[7..])?; ident_finish(c.author); c.author = per; } else if (bytes::hasprefix(line, strings::toutf8("committer "))) { const per = parse_ident(line[10..])?; ident_finish(c.committer); c.committer = per; } else if ( bytes::hasprefix(line, strings::toutf8("gpgsig ")) || bytes::hasprefix(line, strings::toutf8("gpgsig-sha256 ")) ) { i += rel + 1z; for (true) { if (i >= len(body)) { return errors::invalid; }; let mnext = bytes::index(body[i..], '\n'); if (mnext is void) { return errors::invalid; }; let next = mnext: size; if (body[i] != ' ') { break; }; i += next + 1z; }; continue; }; i += rel + 1z; }; c.message = alloc(body[i..]...)?; return c; };
// Serializes a commit into its on-disk format. export fn serialize_commit(c: commit) ([]u8 | nomem) = { const treehex = oid_string(c.tree)?; defer free(treehex); let parenthex: []const str = []; for (let i = 0z; i < len(c.parents); i += 1z) { const hex = oid_string(c.parents[i])?; append(parenthex, hex)!; }; let authorb = serialize_ident(c.author)?; defer free(authorb); let committerb = serialize_ident(c.committer)?; defer free(committerb); let bodylen = 0z; bodylen += 5z + len(treehex) + 1z; for (let i = 0z; i < len(parenthex); i += 1z) { bodylen += 7z + len(parenthex[i]) + 1z; }; bodylen += 7z + len(authorb) + 1z; bodylen += 10z + len(committerb) + 1z; bodylen += 1z + len(c.message); const sizes = strconv::ztos(bodylen); const ty = strings::toutf8("commit "); const sizesb = strings::toutf8(sizes); let hlen = len(ty) + len(sizesb) + 1z; let out = alloc([0u8...], hlen + bodylen)?; let pos = 0z; out[pos .. pos + len(ty)] = ty; pos += len(ty); out[pos .. pos + len(sizesb)] = sizesb; pos += len(sizesb); out[pos] = 0u8; pos += 1z; { const pre = strings::toutf8("tree "); out[pos .. pos + len(pre)] = pre; pos += len(pre); const hb = strings::toutf8(treehex); out[pos .. pos + len(hb)] = hb; pos += len(hb); out[pos] = '\n'; pos += 1z; }; for (let i = 0z; i < len(parenthex); i += 1z) { const pre = strings::toutf8("parent "); out[pos .. pos + len(pre)] = pre; pos += len(pre); const hb = strings::toutf8(parenthex[i]); out[pos .. pos + len(hb)] = hb; pos += len(hb); out[pos] = '\n'; pos += 1z; }; { const pre = strings::toutf8("author "); out[pos .. pos + len(pre)] = pre; pos += len(pre); out[pos .. pos + len(authorb)] = authorb; pos += len(authorb); out[pos] = '\n'; pos += 1z; }; { const pre = strings::toutf8("committer "); out[pos .. pos + len(pre)] = pre; pos += len(pre); out[pos .. pos + len(committerb)] = committerb; pos += len(committerb); out[pos] = '\n'; pos += 1z; }; out[pos] = '\n'; pos += 1z; out[pos .. pos + len(c.message)] = c.message; for (let i = 0z; i < len(parenthex); i += 1z) { free(parenthex[i]); }; return out; };
use bytes; use encoding::utf8; use errors; use strconv; use strings; // A Git annotated tag object. export type tag = struct { target: oid, target_type: objtype, name: []u8, tagger: (void | ident), message: []u8, }; // Frees resources associated with a [[tag]]. export fn tag_finish(t: tag) void = { free(t.name); match (t.tagger) { case let id: ident => ident_finish(id); case void => void; }; free(t.message); }; // Parses a tag from its raw data. export fn parse_tag( body: []u8, ) (tag | errors::invalid | strconv::invalid | strconv::overflow | utf8::invalid | nomem) = { let t = tag { target = [0...], target_type = objtype::OBJ_INVALID, name = [], tagger = void, message = [], }; let have_target = false; let have_type = false; let i = 0z; for (true) { let mrel = bytes::index(body[i..], '\n'); if (mrel is void) { return errors::invalid; }; let rel = mrel: size; let line = body[i .. i + rel]; if (len(line) == 0) { i += rel + 1z; break; }; if (bytes::hasprefix(line, strings::toutf8("object "))) { const hex = strings::fromutf8(line[7..])?; match (parse_oid(hex)) { case let o: oid => t.target = o; have_target = true; case => return errors::invalid; }; } else if (bytes::hasprefix(line, strings::toutf8("type "))) { const ty = strings::fromutf8(line[5..])?; if (ty == "commit") { t.target_type = objtype::OBJ_COMMIT; } else if (ty == "tree") { t.target_type = objtype::OBJ_TREE; } else if (ty == "blob") { t.target_type = objtype::OBJ_BLOB; } else if (ty == "tag") { t.target_type = objtype::OBJ_TAG; } else { return errors::invalid; }; have_type = true; } else if (bytes::hasprefix(line, strings::toutf8("tag "))) { const name_b = line[4..]; let name = alloc(name_b...)?; free(t.name); t.name = name; } else if (bytes::hasprefix(line, strings::toutf8("tagger "))) { const per = parse_ident(line[7..])?; match (t.tagger) { case let old: ident => ident_finish(old); case void => void; }; t.tagger = per; } else if ( bytes::hasprefix(line, strings::toutf8("gpgsig ")) || bytes::hasprefix(line, strings::toutf8("gpgsig-sha256 ")) ) { i += rel + 1z; for (true) { if (i >= len(body)) { return errors::invalid; }; let mnext = bytes::index(body[i..], '\n'); if (mnext is void) { return errors::invalid; }; let next = mnext: size; if (body[i] != ' ') { break; }; i += next + 1z; }; continue; }; i += rel + 1z; }; if (!have_target || !have_type) { return errors::invalid; }; t.message = alloc(body[i..]...)?; return t; };
// Serializes a tag into the on-disk format. export fn serialize_tag(t: tag) ([]u8 | errors::invalid | nomem) = { let tyname: (const str | void) = void; switch (t.target_type) { case objtype::OBJ_COMMIT => tyname = "commit"; case objtype::OBJ_TREE => tyname = "tree"; case objtype::OBJ_BLOB => tyname = "blob"; case objtype::OBJ_TAG => tyname = "tag"; case => return errors::invalid; }; const tynameb = strings::toutf8((tyname: const str)); const objhex = oid_string(t.target)?; defer free(objhex); let taggerb: ([]u8 | void) = void; match (t.tagger) { case let id: ident => taggerb = serialize_ident(id)?; case void => void; }; let bodylen = 0z; bodylen += 7z + len(objhex) + 1z; bodylen += 5z + len(tynameb) + 1z; bodylen += 4z + len(t.name) + 1z; match (taggerb) { case let tb: []u8 => bodylen += 7z + len(tb) + 1z; case void => void; }; bodylen += 1z + len(t.message); const sizes = strconv::ztos(bodylen); const ty = strings::toutf8("tag "); const sizesb = strings::toutf8(sizes); let hlen = len(ty) + len(sizesb) + 1z; let out = alloc([0u8...], hlen + bodylen)?; let pos = 0z; out[pos .. pos + len(ty)] = ty; pos += len(ty); out[pos .. pos + len(sizesb)] = sizesb; pos += len(sizesb); out[pos] = 0u8; pos += 1z; { const pre = strings::toutf8("object "); out[pos .. pos + len(pre)] = pre; pos += len(pre); const hb = strings::toutf8(objhex); out[pos .. pos + len(hb)] = hb; pos += len(hb); out[pos] = '\n'; pos += 1z; }; { const pre = strings::toutf8("type "); out[pos .. pos + len(pre)] = pre; pos += len(pre); out[pos .. pos + len(tynameb)] = tynameb; pos += len(tynameb); out[pos] = '\n'; pos += 1z; }; { const pre = strings::toutf8("tag "); out[pos .. pos + len(pre)] = pre; pos += len(pre); out[pos .. pos + len(t.name)] = t.name; pos += len(t.name); out[pos] = '\n'; pos += 1z; }; match (taggerb) { case let tb: []u8 => const pre = strings::toutf8("tagger "); out[pos .. pos + len(pre)] = pre; pos += len(pre); out[pos .. pos + len(tb)] = tb; pos += len(tb); out[pos] = '\n'; pos += 1z; case void => void; }; out[pos] = '\n'; pos += 1z; out[pos .. pos + len(t.message)] = t.message; return out; };
use bytes; use crypto::sha256; use encoding::utf8; use errors; use fs; use io; use strconv; use strings; // A Git tree object. export type tree = struct { entries: []tree_entry, }; // Frees resources associated with a [[tree]]. export fn tree_finish(t: tree) void = { for (let entry .. t.entries) { tree_entry_finish(entry); }; free(t.entries); }; // A single entry in a Git tree. In general, the oid // either refers to a blob (file) or another tree (directory). export type tree_entry = struct { mode: u32, name: []u8, oid: oid, }; // Frees resources associated with a [[tree_entry]]. export fn tree_entry_finish(te: tree_entry) void = { free(te.name); }; // Parses a tree from its raw data and object ID. export fn parse_tree(body: []u8) (tree | errors::invalid | strconv::invalid | strconv::overflow | utf8::invalid | nomem) = { let entries: []tree_entry = []; let i = 0z; for (i < len(body)) { const sp = match (bytes::index(body[i..], ' ')) { case let j: size => yield j; case void => return errors::invalid; }; const mode_b = body[i .. i+sp]; i += sp + 1z; const nul = match (bytes::index(body[i..], 0u8)) { case let j: size => yield j; case void => return errors::invalid; }; const name_b = body[i .. i+nul]; i += nul + 1z; if (i + sha256::SZ > len(body)) return errors::invalid; let child: oid = [0...]; child[..] = body[i .. i+sha256::SZ]; i += sha256::SZ; const mode_s = strings::fromutf8(mode_b)?; const mode = strconv::stou32(mode_s, strconv::base::OCT)?; const name = alloc(name_b...)?; append(entries, tree_entry { mode = mode, name = name, oid = child })!; }; return tree { entries = entries }; }; // Looks up a tree entry by name. fn tree_entry_by_name_raw(t: *const tree, name: []const u8) (*const tree_entry | void) = { for (let i = 0z; i < len(t.entries); i += 1z) { if (bytes::equal(t.entries[i].name, name)) { return &t.entries[i]; }; }; return void; }; // Recursively looks up a tree or blob at the given path, export fn tree_at_path( r: repo, root: const tree, path: const []u8, ) (tree | blob | errors::invalid | fs::error | io::error | strconv::invalid | strconv::overflow | nomem) = { if (len(path) == 0) { return root; }; let owned_root = false; defer if (owned_root) { tree_finish(root); }; let i = 0z; for (i < len(path)) { let j = match (bytes::index(path[i..], '/')) { case let k: size => yield i + k; case void => yield len(path); }; if (j == i) { return errors::invalid; }; let comp = path[i..j]; let entp = tree_entry_by_name_raw(&root, comp); let ent: *const tree_entry = match (entp) { case let p: *const tree_entry => yield p; case void => return errors::invalid; }; let last = (j == len(path)); if (last) { match (read_object(r, ent.oid)) { case let t: tree => if (owned_root) { tree_finish(root); }; return t; case let b: blob => if (owned_root) { tree_finish(root); }; return b; case => if (owned_root) { tree_finish(root); }; return errors::invalid; }; } else { match (read_object(r, ent.oid)) { case let t: tree => if (owned_root) { tree_finish(root); }; root = t; owned_root = true; case => if (owned_root) { tree_finish(root); }; return errors::invalid; }; i = j + 1z; if (i >= len(path)) { return errors::invalid; }; }; }; return errors::invalid; };
// Serializes a tree into the on-disk format. export fn serialize_tree(t: tree) ([]u8 | nomem) = { let bodylen = 0z; for (let e .. t.entries) { const modes = strconv::u32tos(e.mode, strconv::base::OCT); bodylen += len(strings::toutf8(modes)); bodylen += 1z; bodylen += len(e.name); bodylen += 1z; bodylen += (sha256::SZ: size); }; const sizes = strconv::ztos(bodylen); const ty = strings::toutf8("tree "); const sizesb = strings::toutf8(sizes); let hlen = len(ty) + len(sizesb) + 1z; let out = alloc([0u8...], hlen + bodylen)?; let pos = 0z; out[pos .. pos + len(ty)] = ty; pos += len(ty); out[pos .. pos + len(sizesb)] = sizesb; pos += len(sizesb); out[pos] = 0u8; pos += 1z; for (let e .. t.entries) { const modes = strconv::u32tos(e.mode, strconv::base::OCT); const modeb = strings::toutf8(modes); out[pos .. pos + len(modeb)] = modeb; pos += len(modeb); out[pos] = ' '; pos += 1z; out[pos .. pos + len(e.name)] = e.name; pos += len(e.name); out[pos] = 0u8; pos += 1z; out[pos .. pos + (sha256::SZ: size)] = e.oid[..]; pos += (sha256::SZ: size); }; return out; };
use bytes; use crypto::sha256; use errors; use fmt; use fs; use hash; use io; use strconv; use strings; // Object/pack type tags. // // These are not typically used as we could represent objects with tagged // unions. However, they may be useful in scenarios where a full object is // undesirable or unavailable. export type objtype = enum u8 { OBJ_INVALID = 0u8, OBJ_COMMIT = 1u8, OBJ_TREE = 2u8, OBJ_BLOB = 3u8, OBJ_TAG = 4u8, OBJ_FUTURE = 5u8, OBJ_OFS_DELTA = 6u8, OBJ_REF_DELTA = 7u8, }; // Any Git object. export type object = (blob | tree | commit | tag); // Frees resources associated with any Git object. export fn object_finish(o: object) void = { match (o) { case let b: blob => blob_finish(b); case let t: tree => tree_finish(t); case let c: commit => commit_finish(c); case let g: tag => tag_finish(g); case => abort("Unknown object type being freed..."); }; };
// Verifies that the given body matches the given object ID.
// Verifies that the given buffer (which must be the exact on-disk format // structured as "type size\0body") matches the given object ID.
export fn verify_oid(buf: []u8, want: oid) bool = { let st = sha256::sha256(); hash::write(&st, buf);
let got: oid = [0...]; hash::sum(&st, got); hash::close(&st);
if (bytes::equal(got[..], want[..])) { return true; } else { return false; };
return bytes::equal(got[..], want[..]);
};
// Verifies that the given body matches the given object ID. // // Adds the type and size, as a real serialized git object has. export fn verify_typed(ty: str, body: []u8, want: oid) bool = {
// Verifies that the given typed body matches the given object ID. export fn verify_typed(ty: objtype, body: []u8, want: oid) bool = {
let st = sha256::sha256(); defer hash::close(&st);
if (ty == "blob") {
switch (ty) { case objtype::OBJ_BLOB =>
hash::write(&st, strings::toutf8("blob"));
} else if (ty == "tree") {
case objtype::OBJ_TREE =>
hash::write(&st, strings::toutf8("tree"));
} else if (ty == "commit") {
case objtype::OBJ_COMMIT =>
hash::write(&st, strings::toutf8("commit"));
} else if (ty == "tag") {
case objtype::OBJ_TAG =>
hash::write(&st, strings::toutf8("tag"));
} else {
case =>
return false; }; hash::write(&st, strings::toutf8(" ")); let szs = strconv::ztos(len(body)); hash::write(&st, strings::toutf8(szs)); hash::write(&st, strings::toutf8("\x00")); hash::write(&st, body); let got: oid = [0...]; hash::sum(&st, got);
if (bytes::equal(got[..], want[..])) { return true; } else { return false; };
return bytes::equal(got[..], want[..]);
}; // Reads a Git object from the repository by its ID. export fn read_object( r: repo, id: oid, ) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) = { match (read_loose(r, id)) { case let o: object => return o; case let fe: fs::error => if (fe is errors::noentry) { void; } else { return fe; }; case let e: (io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) => return e; }; match (read_packed(r, id)) { case let o: object => return o; case let fe: fs::error => if (fe is errors::noentry) { return errors::invalid; } else { return fe; }; case let e: (io::error | errors::invalid | strconv::invalid | strconv::overflow | nomem) => return e; }; };
use compress::zlib; use crypto::sha256; use encoding::utf8; use endian; use errors; use fmt; use fs; use io; use strconv; use strings; def IDX_MAGIC: u32 = 0xff744f63u32; def IDX_V2: u32 = 2u32; def PACK_MAGIC: u32 = 0x5041434bu32; def PACK_V2: u32 = 2u32; type pack_loc = struct { pack_rel: str, ofs: u64, }; fn cmp_oid(a: []u8, b: oid) i32 = { for (let i = 0z; i < sha256::SZ; i += 1z) { let av = a[i]; let bv = b[i]; if (av < bv) { return -1; }; if (av > bv) { return 1; }; }; return 0; }; fn count_large_before(off32: []u8, idx: size) size = { let n = 0z; for (let i = 0z; i < idx; i += 1z) { let o32 = endian::begetu32(off32[i*4z .. i*4z + 4z]); if ((o32 & 0x8000_0000u32) != 0u32) { n += 1z; }; }; return n; }; // Reads a packed object by its ID from the given repository. export fn read_packed( r: repo, id: oid, ) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | errors::noentry | utf8::invalid | nomem) = { let loc = find_in_indexes(r, id)?; return read_from_pack_at(r, loc, id); }; fn find_in_indexes( r: repo, id: oid, ) (pack_loc | errors::noentry | fs::error | io::error | errors::invalid | nomem) = { const dir = "objects/pack"; let it = fs::iter(r.root, dir)?; defer fs::finish(it); for (true) { match (fs::next(it)) { case let de: fs::dirent => if (!strings::hassuffix(de.name, ".idx")) { continue; }; { let rel = fmt::asprintf("{}/{}", dir, de.name)?; match (idx_lookup(r, rel, id)) { case let pl: pack_loc => free(rel); return pl; case errors::noentry => free(rel); continue; case let fe: fs::error => free(rel); return fe; case let ioe: io::error => free(rel); return ioe; case let inv: errors::invalid => free(rel); return inv; case nomem => free(rel); return nomem; }; }; case done => break; case let fe: fs::error => return fe; }; }; return errors::noentry; }; fn idx_lookup( r: repo, idx_rel: const str, id: oid, ) (pack_loc | errors::noentry | fs::error | io::error | errors::invalid | nomem) = { let h = fs::open(r.root, idx_rel)?; defer io::close(h)!; let buf = io::drain(h)?; defer free(buf); if (len(buf) < 8z + 256z*4z) { return errors::invalid; }; let off = 0z; let magic = endian::begetu32(buf[off..off+4]); off += 4z; if (magic != IDX_MAGIC) { return errors::invalid; }; let ver = endian::begetu32(buf[off..off+4]); off += 4z; if (ver != IDX_V2) { return errors::invalid; }; let fanout: [256]u32 = [0...]; for (let i = 0z; i < 256z; i += 1z) { fanout[i] = endian::begetu32(buf[off..off+4]); off += 4z; }; let nobj = fanout[255]: size; let need = off + nobj * sha256::SZ + nobj * 4z + nobj * 4z + 2z * sha256::SZ; if (need > len(buf)) { return errors::invalid; }; let names_off = off; let crcs_off = names_off + nobj * sha256::SZ; let off32_off = crcs_off + nobj * 4z; let large_count = 0z; for (let i = 0z; i < nobj; i += 1z) { let o32 = endian::begetu32(buf[off32_off + i*4z .. off32_off + i*4z + 4z]); if ((o32 & 0x8000_0000u32) != 0u32) { large_count += 1z; }; }; let off64_off = off32_off + nobj * 4z; let trailer_off = off64_off + large_count * 8z; if (trailer_off + 2z * sha256::SZ > len(buf)) { return errors::invalid; }; let first = (id[0]: u8): size; let lo: size = if (first == 0u8) { yield 0z; } else { yield fanout[first - 1z]: size; }; let hi: size = fanout[first]: size; let found = false; let idx = 0z; let l = lo; let h = hi; for (l < h) { let m = l + (h - l) / 2z; let cand = buf[names_off + m*sha256::SZ .. names_off + (m+1z)*sha256::SZ]; let c = cmp_oid(cand, id); if (c == 0) { found = true; idx = m; break; } else if (c < 0) { l = m + 1z; } else { h = m; }; }; if (!found) { return errors::noentry; }; let o32 = endian::begetu32(buf[off32_off + idx*4z .. off32_off + idx*4z + 4z]); let ofs: u64 = 0u64; if ((o32 & 0x8000_0000u32) == 0u32) { ofs = (o32: u64); } else { let nlarge_before = count_large_before(buf[off32_off..], idx); let p = off64_off + nlarge_before * 8z; let o64be = endian::begetu64(buf[p .. p + 8z]); ofs = o64be; }; if (!strings::hassuffix(idx_rel, ".idx")) { return errors::invalid; }; let stem = strings::bytesub(idx_rel, 0z, len(idx_rel) - 4z)!; let packpath = fmt::asprintf("{}{}", stem, ".pack")?; return pack_loc { pack_rel = packpath, ofs = ofs }; }; fn read_from_pack_at( r: repo, loc: pack_loc, want: oid, ) (object | fs::error | io::error | errors::invalid | strconv::invalid | strconv::overflow | errors::noentry | utf8::invalid | nomem) = { defer free(loc.pack_rel); let h = fs::open(r.root, loc.pack_rel)?; defer io::close(h)!; let header: [12]u8 = [0...]; match (io::readall(h, header)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; let magic = endian::begetu32(header[..4]); let ver = endian::begetu32(header[4..8]); if (magic != PACK_MAGIC || ver != PACK_V2) { return errors::invalid; }; io::seek(h, (loc.ofs: i64), io::whence::SET)?; let ty: objtype = objtype::OBJ_INVALID; match (read_obj_header(h)) { case let t: (u8, size, size) => ty = (t.0: objtype); case let ioe: io::error => return ioe; case => return errors::invalid; }; let full_ty: objtype = objtype::OBJ_INVALID; let body: []u8 = []; defer if (len(body) != 0) { free(body); }; switch (ty) { case objtype::OBJ_COMMIT => body = inflate_section(h)?; full_ty = objtype::OBJ_COMMIT; case objtype::OBJ_TREE => body = inflate_section(h)?; full_ty = objtype::OBJ_TREE; case objtype::OBJ_BLOB => body = inflate_section(h)?; full_ty = objtype::OBJ_BLOB; case objtype::OBJ_TAG => body = inflate_section(h)?; full_ty = objtype::OBJ_TAG; case objtype::OBJ_REF_DELTA => match (resolve_ref_delta(r, h)) {
case let t: (objtype, []u8) => full_ty = t.0; body = t.1;
case let t2: (objtype, []u8) => full_ty = t2.0; body = t2.1;
case let e: (fs::error | io::error | errors::invalid | errors::noentry | nomem) => return e; }; case objtype::OBJ_OFS_DELTA => match (resolve_ofs_delta(r, h, loc)) {
case let t: (objtype, []u8) => full_ty = t.0; body = t.1;
case let t2: (objtype, []u8) => full_ty = t2.0; body = t2.1;
case let e: (fs::error | io::error | errors::invalid | errors::noentry | nomem) => return e; }; case => return errors::invalid; };
let tystr = if (full_ty == objtype::OBJ_BLOB) { yield "blob"; } else if (full_ty == objtype::OBJ_TREE) { yield "tree"; } else if (full_ty == objtype::OBJ_COMMIT) { yield "commit"; } else if (full_ty == objtype::OBJ_TAG) { yield "tag"; } else { yield ""; }; if (tystr == "" || !verify_typed(tystr, body, want)) {
// Verify BEFORE parsing using objtype (not a string). if (!verify_typed(full_ty, body, want)) {
return errors::invalid; };
// Parse into a structured object.
if (full_ty == objtype::OBJ_BLOB) { const b = parse_blob(body)?; return (b: object);
}; if (full_ty == objtype::OBJ_TREE) {
} else if (full_ty == objtype::OBJ_TREE) {
const t = parse_tree(body)?; return (t: object);
}; if (full_ty == objtype::OBJ_COMMIT) {
} else if (full_ty == objtype::OBJ_COMMIT) {
const c = parse_commit(body)?; return (c: object);
}; if (full_ty == objtype::OBJ_TAG) {
} else if (full_ty == objtype::OBJ_TAG) {
const g = parse_tag(body)?; return (g: object); }; return errors::invalid; }; fn read_obj_header(h: io::handle) ((u8, size, size) | io::error | errors::invalid) = { let consumed = 0z; let b0: [1]u8 = [0]; match (io::readall(h, b0)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; consumed += 1z; let ty = (b0[0] >> 4) & 0x07u8; let sz: size = (b0[0] & 0x0fu8): size; let shift = 4z; if ((b0[0] & 0x80u8) != 0u8) { for (true) { let bb: [1]u8 = [0]; match (io::readall(h, bb)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; consumed += 1z; let v = (bb[0] & 0x7fu8): size; sz += v << shift; if ((bb[0] & 0x80u8) == 0u8) { break; }; shift += 7z; }; }; return (ty, sz, consumed); }; fn inflate_section(h: io::handle) ([]u8 | io::error | nomem) = { let zr = zlib::decompress(h)?; defer io::close(&zr.vtable)!; let out = io::drain(&zr.vtable)?; return out; }; fn resolve_ref_delta( r: repo, h: io::handle, ) ((objtype, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = { let base: oid = [0...]; match (io::readall(h, base)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; let delta = inflate_section(h)?; defer free(delta); let bt = read_resolved_body_by_id(r, base)?; let out = apply_delta(bt.1, delta)?; return (bt.0, out); }; fn read_ofs_distance(h: io::handle) (u64 | io::error | errors::invalid) = { let b: [1]u8 = [0]; match (io::readall(h, b)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; let dist: u64 = (b[0] & 0x7fu8): u64; if ((b[0] & 0x80u8) != 0u8) { for (true) { match (io::readall(h, b)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; dist = ((dist + 1u64) << 7u64) + ((b[0] & 0x7fu8): u64); if ((b[0] & 0x80u8) == 0u8) { break; }; }; }; return dist; }; fn resolve_ofs_delta( r: repo, h: io::handle, loc: pack_loc, ) ((objtype, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = { let dist = read_ofs_distance(h)?; let base_ofs: u64 = if (loc.ofs > dist) { yield loc.ofs - dist; } else { yield 0u64; }; if (base_ofs == 0u64) { return errors::invalid; }; let bt = read_resolved_body_at_ofs(r, loc.pack_rel, base_ofs)?; let delta = inflate_section(h)?; defer free(delta); let out = apply_delta(bt.1, delta)?; return (bt.0, out); }; fn read_resolved_body_by_id( r: repo, id: oid, ) ((objtype, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = { match (find_in_indexes(r, id)) { case let pl: pack_loc => let res = read_resolved_body_at_ofs(r, pl.pack_rel, pl.ofs); free(pl.pack_rel); return res; case errors::noentry => return read_loose_typed(r, id); case let fe: fs::error => return fe; case let ioe: io::error => return ioe; case let inv: errors::invalid => return inv; case nomem => return nomem; }; }; fn read_resolved_body_at_ofs( r: repo, pack_rel: str, ofs: u64, ) ((objtype, []u8) | fs::error | io::error | errors::invalid | errors::noentry | strconv::invalid | strconv::overflow | nomem) = { let h = fs::open(r.root, pack_rel)?; defer io::close(h)!; let header: [12]u8 = [0...]; match (io::readall(h, header)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; let magic = endian::begetu32(header[..4]); let ver = endian::begetu32(header[4..8]); if (magic != PACK_MAGIC || ver != PACK_V2) { return errors::invalid; }; io::seek(h, (ofs: i64), io::whence::SET)?; match (read_obj_header(h)) { case let t: (u8, size, size) => switch ((t.0: objtype)) { case objtype::OBJ_COMMIT => let body = inflate_section(h)?; return (objtype::OBJ_COMMIT, body); case objtype::OBJ_TREE => let body = inflate_section(h)?; return (objtype::OBJ_TREE, body); case objtype::OBJ_BLOB => let body = inflate_section(h)?; return (objtype::OBJ_BLOB, body); case objtype::OBJ_TAG => let body = inflate_section(h)?; return (objtype::OBJ_TAG, body); case objtype::OBJ_REF_DELTA => let base: oid = [0...]; match (io::readall(h, base)) { case size => void; case io::EOF => return errors::invalid; case let ioe: io::error => return ioe; }; let delta = inflate_section(h)?; defer free(delta); let bt = read_resolved_body_by_id(r, base)?; let out = apply_delta(bt.1, delta)?; return (bt.0, out); case objtype::OBJ_OFS_DELTA => let dist = read_ofs_distance(h)?; let base_ofs: u64 = if (ofs > dist) { yield ofs - dist; } else { yield 0u64; }; if (base_ofs == 0u64) { return errors::invalid; }; let delta = inflate_section(h)?; defer free(delta); let bt = read_resolved_body_at_ofs(r, pack_rel, base_ofs)?; let out = apply_delta(bt.1, delta)?; return (bt.0, out); case => return errors::invalid; }; case let ioe: io::error => return ioe; case => return errors::invalid; }; }; fn apply_delta(base: []u8, delta: []u8) ([]u8 | errors::invalid | nomem) = { let i = 0z; let srcsz = read_varint(delta, &i)?; let dstsz = read_varint(delta, &i)?; if (srcsz != len(base)) { return errors::invalid; }; let out: []u8 = alloc([0u8...], dstsz)?; let outpos = 0z; for (i < len(delta)) { let op = delta[i]; i += 1z; if ((op & 0x80u8) != 0u8) { let off = 0z; let n = 0z; if ((op & 0x01u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; off |= (delta[i]: size); i += 1z; }; if ((op & 0x02u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; off |= (delta[i]: size) << 8z; i += 1z; }; if ((op & 0x04u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; off |= (delta[i]: size) << 16z; i += 1z; }; if ((op & 0x08u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; off |= (delta[i]: size) << 24z; i += 1z; }; if ((op & 0x10u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; n |= (delta[i]: size); i += 1z; }; if ((op & 0x20u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; n |= (delta[i]: size) << 8z; i += 1z; }; if ((op & 0x40u8) != 0u8) { if (i >= len(delta)) { return errors::invalid; }; n |= (delta[i]: size) << 16z; i += 1z; }; if (n == 0z) { n = 0x10000z; }; if (off + n > len(base) || outpos + n > len(out)) { return errors::invalid; }; out[outpos .. outpos + n] = base[off .. off + n]; outpos += n; } else if (op != 0u8) { let n = (op: size); if (i + n > len(delta) || outpos + n > len(out)) { return errors::invalid; }; out[outpos .. outpos + n] = delta[i .. i + n]; i += n; outpos += n; } else { return errors::invalid; }; }; if (outpos != len(out)) { return errors::invalid; }; return out; }; fn read_varint(buf: []u8, ip: *size) (size | errors::invalid) = { let res = 0z; let shift = 0z; for (true) { if (*ip >= len(buf)) { return errors::invalid; }; let b = buf[*ip]; *ip += 1z; res |= ((b & 0x7fu8): size) << shift; if ((b & 0x80u8) == 0u8) { break; }; shift += 7z; }; return res; };