From ef9d69afc6f40e8a5cb2a7ec515efd6c2c92040f Mon Sep 17 00:00:00 2001 From: Runxi Yu Date: Sun, 21 Sep 2025 11:06:21 +0800 Subject: [PATCH] Add serialization functions and use objtype more --- git/ident.ha | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++ git/loose.ha | 4 ++-- git/obj_blob.ha | 27 +++++++++++++++++++++++++++ git/obj_commit.ha | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++ git/obj_tag.ha | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++ git/obj_tree.ha | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ git/object.ha | 33 +++++++++++++-------------------- git/packed.ha | 36 ++++++++++++------------------------ diff --git a/git/ident.ha b/git/ident.ha index 74e9e48c80d802fbcec3389518c61ad3776710ef..d6c038c6f807d2525953b703cf78d266fe7b91ed 100644 --- a/git/ident.ha +++ b/git/ident.ha @@ -74,3 +74,61 @@ let name = alloc(name_b...)?; let email = alloc(email_b...)?; return ident { name = name, email = email, when = when, ofs = ofs }; }; + +// Returns the canonical form for an ident. +export fn serialize_ident(p: ident) ([]u8 | nomem) = { + const whens = strconv::i64tos(p.when); + const whenb = strings::toutf8(whens); + + let mins = p.ofs; + let sign: u8 = '+'; + if (mins < 0) { + sign = '-'; + mins = -mins; + }; + let hh: u32 = ((mins / 60): u32); + let mm: u32 = ((mins % 60): u32); + + let outlen = len(p.name) + 2z + len(p.email) + 2z + len(whenb) + 1z + 5z; + let out = alloc([0u8...], outlen)?; + let pos = 0z; + + out[pos .. pos + len(p.name)] = p.name; + pos += len(p.name); + + out[pos] = ' '; + pos += 1z; + + out[pos] = '<'; + pos += 1z; + + out[pos .. pos + len(p.email)] = p.email; + pos += len(p.email); + + out[pos] = '>'; + pos += 1z; + + out[pos] = ' '; + pos += 1z; + + out[pos .. pos + len(whenb)] = whenb; + pos += len(whenb); + + out[pos] = ' '; + pos += 1z; + + out[pos] = sign; + pos += 1z; + + out[pos] = ('0' + (hh / 10u32): u8); + pos += 1z; + out[pos] = ('0' + (hh % 10u32): u8); + pos += 1z; + + out[pos] = ('0' + (mm / 10u32): u8); + pos += 1z; + out[pos] = ('0' + (mm % 10u32): u8); + pos += 1z; + + return out; +}; diff --git a/git/loose.ha b/git/loose.ha index 04640b338f8005fd15ed71e93466a034edd28510..9d633d34780d6be3a9fe6c2bd4bf7410d38b42cd 100644 --- a/git/loose.ha +++ b/git/loose.ha @@ -77,9 +77,9 @@ return (c: object); } else if (ty == "tag") { const g = parse_tag(body)?; return (g: object); - } else { - return errors::invalid; }; + + return errors::invalid; }; // Reads a loose object from the repository by its ID, diff --git a/git/obj_blob.ha b/git/obj_blob.ha index 66b308790bdd62a1217f734af92faa75bf188788..4a94f87ecc20cdc445142daa1011c1198073cb46 100644 --- a/git/obj_blob.ha +++ b/git/obj_blob.ha @@ -1,3 +1,6 @@ +use strconv; +use strings; + // A simple Git blob with its object ID and raw data. export type blob = struct { data: []u8, @@ -15,3 +18,27 @@ export fn parse_blob(body: []u8) (blob | nomem) = { let data = alloc(body...)?; return blob { data = data }; }; + + +// Serializes a blob into the on-disk format. +export fn serialize_blob(b: blob) ([]u8 | nomem) = { + const sizes = strconv::ztos(len(b.data)); + const ty = strings::toutf8("blob "); + const sizesb = strings::toutf8(sizes); + + let hlen = len(ty) + len(sizesb) + 1z; + let out = alloc([0u8...], hlen + len(b.data))?; + let pos = 0z; + + out[pos .. pos + len(ty)] = ty; + pos += len(ty); + + out[pos .. pos + len(sizesb)] = sizesb; + pos += len(sizesb); + + out[pos] = 0u8; + pos += 1z; + + out[pos .. pos + len(b.data)] = b.data; + return out; +}; diff --git a/git/obj_commit.ha b/git/obj_commit.ha index 15d59cf024bc0b244f1fc8f55fdecd6d3776a784..f48a389ccea92bb2a8cffc890e350053b46b17a7 100644 --- a/git/obj_commit.ha +++ b/git/obj_commit.ha @@ -106,3 +106,84 @@ c.message = alloc(body[i..]...)?; return c; }; +// Serializes a commit into its on-disk format. +export fn serialize_commit(c: commit) ([]u8 | nomem) = { + const treehex = oid_string(c.tree)?; + defer free(treehex); + + let parenthex: []const str = []; + for (let i = 0z; i < len(c.parents); i += 1z) { + const hex = oid_string(c.parents[i])?; + append(parenthex, hex)!; + }; + + let authorb = serialize_ident(c.author)?; + defer free(authorb); + + let committerb = serialize_ident(c.committer)?; + defer free(committerb); + + let bodylen = 0z; + bodylen += 5z + len(treehex) + 1z; + for (let i = 0z; i < len(parenthex); i += 1z) { + bodylen += 7z + len(parenthex[i]) + 1z; + }; + bodylen += 7z + len(authorb) + 1z; + bodylen += 10z + len(committerb) + 1z; + bodylen += 1z + len(c.message); + + const sizes = strconv::ztos(bodylen); + const ty = strings::toutf8("commit "); + const sizesb = strings::toutf8(sizes); + let hlen = len(ty) + len(sizesb) + 1z; + + let out = alloc([0u8...], hlen + bodylen)?; + let pos = 0z; + + out[pos .. pos + len(ty)] = ty; + pos += len(ty); + out[pos .. pos + len(sizesb)] = sizesb; + pos += len(sizesb); + out[pos] = 0u8; + pos += 1z; + + { + const pre = strings::toutf8("tree "); + out[pos .. pos + len(pre)] = pre; pos += len(pre); + const hb = strings::toutf8(treehex); + out[pos .. pos + len(hb)] = hb; pos += len(hb); + out[pos] = '\n'; pos += 1z; + }; + + for (let i = 0z; i < len(parenthex); i += 1z) { + const pre = strings::toutf8("parent "); + out[pos .. pos + len(pre)] = pre; pos += len(pre); + const hb = strings::toutf8(parenthex[i]); + out[pos .. pos + len(hb)] = hb; pos += len(hb); + out[pos] = '\n'; pos += 1z; + }; + + { + const pre = strings::toutf8("author "); + out[pos .. pos + len(pre)] = pre; pos += len(pre); + out[pos .. pos + len(authorb)] = authorb; pos += len(authorb); + out[pos] = '\n'; pos += 1z; + }; + + { + const pre = strings::toutf8("committer "); + out[pos .. pos + len(pre)] = pre; pos += len(pre); + out[pos .. pos + len(committerb)] = committerb; pos += len(committerb); + out[pos] = '\n'; pos += 1z; + }; + + out[pos] = '\n'; + pos += 1z; + out[pos .. pos + len(c.message)] = c.message; + + for (let i = 0z; i < len(parenthex); i += 1z) { + free(parenthex[i]); + }; + + return out; +}; diff --git a/git/obj_tag.ha b/git/obj_tag.ha index 3c0718395bc3bc2405296fb75c35f1070bc78b95..5b72bb537b977adda471df6b92a329512d891bf4 100644 --- a/git/obj_tag.ha +++ b/git/obj_tag.ha @@ -123,3 +123,88 @@ t.message = alloc(body[i..]...)?; return t; }; + +// Serializes a tag into the on-disk format. +export fn serialize_tag(t: tag) ([]u8 | errors::invalid | nomem) = { + let tyname: (const str | void) = void; + switch (t.target_type) { + case objtype::OBJ_COMMIT => tyname = "commit"; + case objtype::OBJ_TREE => tyname = "tree"; + case objtype::OBJ_BLOB => tyname = "blob"; + case objtype::OBJ_TAG => tyname = "tag"; + case => return errors::invalid; + }; + const tynameb = strings::toutf8((tyname: const str)); + + const objhex = oid_string(t.target)?; + defer free(objhex); + + let taggerb: ([]u8 | void) = void; + match (t.tagger) { + case let id: ident => + taggerb = serialize_ident(id)?; + case void => + void; + }; + + let bodylen = 0z; + bodylen += 7z + len(objhex) + 1z; + bodylen += 5z + len(tynameb) + 1z; + bodylen += 4z + len(t.name) + 1z; + match (taggerb) { + case let tb: []u8 => + bodylen += 7z + len(tb) + 1z; + case void => + void; + }; + bodylen += 1z + len(t.message); + + const sizes = strconv::ztos(bodylen); + const ty = strings::toutf8("tag "); + const sizesb = strings::toutf8(sizes); + let hlen = len(ty) + len(sizesb) + 1z; + + let out = alloc([0u8...], hlen + bodylen)?; + let pos = 0z; + + out[pos .. pos + len(ty)] = ty; pos += len(ty); + out[pos .. pos + len(sizesb)] = sizesb; pos += len(sizesb); + out[pos] = 0u8; pos += 1z; + + { + const pre = strings::toutf8("object "); + out[pos .. pos + len(pre)] = pre; pos += len(pre); + const hb = strings::toutf8(objhex); + out[pos .. pos + len(hb)] = hb; pos += len(hb); + out[pos] = '\n'; pos += 1z; + }; + + { + const pre = strings::toutf8("type "); + out[pos .. pos + len(pre)] = pre; pos += len(pre); + out[pos .. pos + len(tynameb)] = tynameb; pos += len(tynameb); + out[pos] = '\n'; pos += 1z; + }; + + { + const pre = strings::toutf8("tag "); + out[pos .. pos + len(pre)] = pre; pos += len(pre); + out[pos .. pos + len(t.name)] = t.name; pos += len(t.name); + out[pos] = '\n'; pos += 1z; + }; + + match (taggerb) { + case let tb: []u8 => + const pre = strings::toutf8("tagger "); + out[pos .. pos + len(pre)] = pre; pos += len(pre); + out[pos .. pos + len(tb)] = tb; pos += len(tb); + out[pos] = '\n'; pos += 1z; + case void => + void; + }; + + out[pos] = '\n'; pos += 1z; + out[pos .. pos + len(t.message)] = t.message; + + return out; +}; diff --git a/git/obj_tree.ha b/git/obj_tree.ha index 1532af0bd210b346e828623ed836c8845e419a6b..f1c9be7d567c4d35b3964f97054b5a0f7d930f3b 100644 --- a/git/obj_tree.ha +++ b/git/obj_tree.ha @@ -154,3 +154,53 @@ }; return errors::invalid; }; + +// Serializes a tree into the on-disk format. +export fn serialize_tree(t: tree) ([]u8 | nomem) = { + let bodylen = 0z; + for (let e .. t.entries) { + const modes = strconv::u32tos(e.mode, strconv::base::OCT); + bodylen += len(strings::toutf8(modes)); + bodylen += 1z; + bodylen += len(e.name); + bodylen += 1z; + bodylen += (sha256::SZ: size); + }; + + const sizes = strconv::ztos(bodylen); + const ty = strings::toutf8("tree "); + const sizesb = strings::toutf8(sizes); + + let hlen = len(ty) + len(sizesb) + 1z; + let out = alloc([0u8...], hlen + bodylen)?; + let pos = 0z; + + out[pos .. pos + len(ty)] = ty; + pos += len(ty); + out[pos .. pos + len(sizesb)] = sizesb; + pos += len(sizesb); + out[pos] = 0u8; + pos += 1z; + + for (let e .. t.entries) { + const modes = strconv::u32tos(e.mode, strconv::base::OCT); + const modeb = strings::toutf8(modes); + + out[pos .. pos + len(modeb)] = modeb; + pos += len(modeb); + + out[pos] = ' '; + pos += 1z; + + out[pos .. pos + len(e.name)] = e.name; + pos += len(e.name); + + out[pos] = 0u8; + pos += 1z; + + out[pos .. pos + (sha256::SZ: size)] = e.oid[..]; + pos += (sha256::SZ: size); + }; + + return out; +}; diff --git a/git/object.ha b/git/object.ha index 077210189d637b69c075ffb335ad17661c9755c0..d1960aab8239c478dd03959c987b1e20c0c42448 100644 --- a/git/object.ha +++ b/git/object.ha @@ -43,37 +43,34 @@ abort("Unknown object type being freed..."); }; }; -// Verifies that the given body matches the given object ID. +// Verifies that the given buffer (which must be the exact on-disk format +// structured as "type size\0body") matches the given object ID. export fn verify_oid(buf: []u8, want: oid) bool = { let st = sha256::sha256(); hash::write(&st, buf); + let got: oid = [0...]; hash::sum(&st, got); hash::close(&st); - if (bytes::equal(got[..], want[..])) { - return true; - } else { - return false; - }; + return bytes::equal(got[..], want[..]); }; -// Verifies that the given body matches the given object ID. -// -// Adds the type and size, as a real serialized git object has. -export fn verify_typed(ty: str, body: []u8, want: oid) bool = { +// Verifies that the given typed body matches the given object ID. +export fn verify_typed(ty: objtype, body: []u8, want: oid) bool = { let st = sha256::sha256(); defer hash::close(&st); - if (ty == "blob") { + switch (ty) { + case objtype::OBJ_BLOB => hash::write(&st, strings::toutf8("blob")); - } else if (ty == "tree") { + case objtype::OBJ_TREE => hash::write(&st, strings::toutf8("tree")); - } else if (ty == "commit") { + case objtype::OBJ_COMMIT => hash::write(&st, strings::toutf8("commit")); - } else if (ty == "tag") { + case objtype::OBJ_TAG => hash::write(&st, strings::toutf8("tag")); - } else { + case => return false; }; @@ -87,11 +84,7 @@ let got: oid = [0...]; hash::sum(&st, got); - if (bytes::equal(got[..], want[..])) { - return true; - } else { - return false; - }; + return bytes::equal(got[..], want[..]); }; // Reads a Git object from the repository by its ID. diff --git a/git/packed.ha b/git/packed.ha index 9729a4944e48e50a818afefce969edd7c9dc0372..4b4eacd35e9c49b1ab783d779e2e122bf87db54a 100644 --- a/git/packed.ha +++ b/git/packed.ha @@ -272,17 +272,17 @@ body = inflate_section(h)?; full_ty = objtype::OBJ_TAG; case objtype::OBJ_REF_DELTA => match (resolve_ref_delta(r, h)) { - case let t: (objtype, []u8) => - full_ty = t.0; - body = t.1; + case let t2: (objtype, []u8) => + full_ty = t2.0; + body = t2.1; case let e: (fs::error | io::error | errors::invalid | errors::noentry | nomem) => return e; }; case objtype::OBJ_OFS_DELTA => match (resolve_ofs_delta(r, h, loc)) { - case let t: (objtype, []u8) => - full_ty = t.0; - body = t.1; + case let t2: (objtype, []u8) => + full_ty = t2.0; + body = t2.1; case let e: (fs::error | io::error | errors::invalid | errors::noentry | nomem) => return e; }; @@ -290,34 +290,22 @@ case => return errors::invalid; }; - let tystr = if (full_ty == objtype::OBJ_BLOB) { - yield "blob"; - } else if (full_ty == objtype::OBJ_TREE) { - yield "tree"; - } else if (full_ty == objtype::OBJ_COMMIT) { - yield "commit"; - } else if (full_ty == objtype::OBJ_TAG) { - yield "tag"; - } else { - yield ""; - }; - if (tystr == "" || !verify_typed(tystr, body, want)) { + // Verify BEFORE parsing using objtype (not a string). + if (!verify_typed(full_ty, body, want)) { return errors::invalid; }; + // Parse into a structured object. if (full_ty == objtype::OBJ_BLOB) { const b = parse_blob(body)?; return (b: object); - }; - if (full_ty == objtype::OBJ_TREE) { + } else if (full_ty == objtype::OBJ_TREE) { const t = parse_tree(body)?; return (t: object); - }; - if (full_ty == objtype::OBJ_COMMIT) { + } else if (full_ty == objtype::OBJ_COMMIT) { const c = parse_commit(body)?; return (c: object); - }; - if (full_ty == objtype::OBJ_TAG) { + } else if (full_ty == objtype::OBJ_TAG) { const g = parse_tag(body)?; return (g: object); }; -- 2.48.1