From bc2f6115693874b2e8feed80ff5539743d257920 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:46:48 -0400 Subject: [PATCH 01/93] t/helper: add 'test-tool bitmap write' subcommand In f16eb1c091 (pseudo-merge: fix disk reads from find_pseudo_merge(), 2026-03-31), we noted that `apply_pseudo_merges_for_commit()` is never triggered by the existing test suite, and that this bears further investigation. This patch is the first one to begin that investigation. The following patches will expose and fix a variety of bugs in the implementation of pseudo-merge bitmaps. In order to do so, however, many of these tests require very precise selection of which commits receive bitmaps and which do not. To date, there isn't a standard approach to easily facilitate this. Address this by introducing a `test-tool bitmap write` subcommand that writes a bitmap for a given packfile, reading the set of commits which should receive individual bitmaps from stdin like so: test-tool bitmap write " is the filename for a specific packfile (e.g., "pack-abc123.pack"), and "/path/to/commits.list" is a list of commit OIDs which will receive bitmaps. The helper respects `bitmapPseudoMerge.*` configuration for creating pseudo-merge bitmaps alongside the regular commit bitmaps. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/helper/test-bitmap.c | 113 +++++++++++++++++++++++++++++++++++++++- t/t5310-pack-bitmaps.sh | 24 +++++++++ 2 files changed, 136 insertions(+), 1 deletion(-) diff --git a/t/helper/test-bitmap.c b/t/helper/test-bitmap.c index 16a01669e4149a..381e9b58b2c42b 100644 --- a/t/helper/test-bitmap.c +++ b/t/helper/test-bitmap.c @@ -2,7 +2,10 @@ #include "test-tool.h" #include "git-compat-util.h" +#include "hex.h" +#include "odb.h" #include "pack-bitmap.h" +#include "pseudo-merge.h" #include "setup.h" static int bitmap_list_commits(void) @@ -35,6 +38,111 @@ static int bitmap_dump_pseudo_merge_objects(uint32_t n) return test_bitmap_pseudo_merge_objects(the_repository, n); } +static int add_packed_object(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *_data) +{ + struct packing_data *packed = _data; + struct object_entry *entry; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type type; + + oi.typep = &type; + + entry = packlist_alloc(packed, oid); + entry->idx.offset = nth_packed_object_offset(pack, pos); + if (packed_object_info(pack, entry->idx.offset, &oi) < 0) + die("could not get type of object %s", + oid_to_hex(oid)); + oe_set_type(entry, type); + oe_set_in_pack(packed, entry, pack); + + return 0; +} + +static int idx_oid_cmp(const void *va, const void *vb) +{ + const struct pack_idx_entry *a = *(const struct pack_idx_entry **)va; + const struct pack_idx_entry *b = *(const struct pack_idx_entry **)vb; + + return oidcmp(&a->oid, &b->oid); +} + +static int bitmap_write(const char *basename) +{ + struct packed_git *p = NULL; + struct packing_data packed = { 0 }; + struct bitmap_writer writer; + struct pack_idx_entry **index; + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + prepare_repo_settings(the_repository); + repo_for_each_pack(the_repository, p) { + if (!strcmp(pack_basename(p), basename)) + break; + } + + if (!p) + die("could not find pack '%s'", basename); + + if (open_pack_index(p)) + die("cannot open pack index for '%s'", p->pack_name); + + prepare_packing_data(the_repository, &packed); + + for_each_object_in_pack(p, add_packed_object, &packed, + ODB_FOR_EACH_OBJECT_PACK_ORDER); + + /* + * Build the index array now that data.packed.objects[] is + * fully allocated (packlist_alloc() may have reallocated it + * during the loop above). + */ + ALLOC_ARRAY(index, p->num_objects); + for (i = 0; i < p->num_objects; i++) + index[i] = &packed.objects[i].idx; + + bitmap_writer_init(&writer, the_repository, &packed, NULL); + bitmap_writer_build_type_index(&writer, index); + + while (strbuf_getline_lf(&buf, stdin) != EOF) { + struct object_id oid; + struct commit *c; + + if (get_oid_hex(buf.buf, &oid)) + die("invalid OID: %s", buf.buf); + + c = lookup_commit(the_repository, &oid); + if (!c || repo_parse_commit(the_repository, c)) + die("could not parse commit %s", buf.buf); + + bitmap_writer_push_commit(&writer, c, 0); + } + + select_pseudo_merges(&writer); + if (bitmap_writer_build(&writer) < 0) + die("failed to build bitmaps"); + + bitmap_writer_set_checksum(&writer, p->hash); + + QSORT(index, p->num_objects, idx_oid_cmp); + + strbuf_reset(&buf); + strbuf_addstr(&buf, p->pack_name); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".bitmap"); + bitmap_writer_finish(&writer, index, buf.buf, 0); + + bitmap_writer_free(&writer); + strbuf_release(&buf); + free(index); + clear_packing_data(&packed); + + return 0; +} + int cmd__bitmap(int argc, const char **argv) { setup_git_directory(); @@ -51,13 +159,16 @@ int cmd__bitmap(int argc, const char **argv) return bitmap_dump_pseudo_merge_commits(atoi(argv[2])); if (argc == 3 && !strcmp(argv[1], "dump-pseudo-merge-objects")) return bitmap_dump_pseudo_merge_objects(atoi(argv[2])); + if (argc == 3 && !strcmp(argv[1], "write")) + return bitmap_write(argv[2]); usage("\ttest-tool bitmap list-commits\n" "\ttest-tool bitmap list-commits-with-offset\n" "\ttest-tool bitmap dump-hashes\n" "\ttest-tool bitmap dump-pseudo-merges\n" "\ttest-tool bitmap dump-pseudo-merge-commits \n" - "\ttest-tool bitmap dump-pseudo-merge-objects "); + "\ttest-tool bitmap dump-pseudo-merge-objects \n" + "\ttest-tool bitmap write < "); return -1; } diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index f693cb56691988..efeb71593bf7f6 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -648,4 +648,28 @@ test_expect_success 'truncated bitmap fails gracefully (lookup table)' ' test_grep corrupted.bitmap.index stderr ' +test_expect_success 'test-tool bitmap write determines bitmap selection' ' + test_when_finished "rm -fr bitmap-write-helper" && + git init bitmap-write-helper && + ( + cd bitmap-write-helper && + + test_commit_bulk 64 && + git repack -ad && + + pack="$(ls .git/objects/pack/pack-*.pack)" && + + git rev-parse HEAD >in && + test-tool bitmap write "$(basename $pack)" bitmaps.raw && + sort bitmaps.raw >bitmaps && + test_cmp in bitmaps && + + git rev-list --count --objects --use-bitmap-index HEAD >actual && + git rev-list --count --objects HEAD >expect && + test_cmp expect actual + ) +' + test_done From 49369d8290c3a5c95d835df85fdf53eba7562496 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:46:51 -0400 Subject: [PATCH 02/93] t5333: demonstrate various pseudo-merge bugs Using the test helper introduced via the previous commit, add various failing tests demonstrating bugs in the pseudo-merge implementation. These are all marked as failing with one exception. The "sampleRate=0" test describes a latent bug, which is only reachable through a code path that is itself masked by a separate bug. A future commit will fix that bug, and, in turn, cause the aforementioned test to fail. Accordingly, that commit will mark the test as failing, and it will be re-marked as passing in a separate commit which fixes the once-latent bug. For the rest: the following commits will explain and fix the underlying bugs in detail. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t5333-pseudo-merge-bitmaps.sh | 198 ++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 1f7a5d82ee4d44..0e9638c31c3d41 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -462,4 +462,202 @@ test_expect_success 'use pseudo-merge in boundary traversal' ' ) ' +test_expect_failure 'apply pseudo-merges during fill-in traversal' ' + test_when_finished "rm -fr pseudo-merge-fill-in-traversal" && + git init pseudo-merge-fill-in-traversal && + ( + cd pseudo-merge-fill-in-traversal && + + git config bitmapPseudoMerge.test.pattern refs/tags/ && + git config bitmapPseudoMerge.test.maxMerges 1 && + git config bitmapPseudoMerge.test.stableThreshold never && + + test_commit_bulk 64 && + tag_everything && + git repack -ad && + + pack=$(ls .git/objects/pack/pack-*.pack) && + git rev-parse HEAD~63 >in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 1 merges && + + test_commit stale && + + git rev-list --count --objects HEAD >expect && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --objects --use-bitmap-index HEAD >actual && + test_pseudo_merges_satisfied 1 in && + while read oid + do + echo "create refs/group-$side/$oid $oid" || return 1 + done in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 2 merges && + + test_commit stale && + + git rev-list --count --objects HEAD >expect && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --objects --use-bitmap-index HEAD >actual && + test_pseudo_merges_satisfied 2 in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 2 merges && + + test_commit stale && + + git rev-list --count --objects HEAD >expect && + + : >trace2.txt && + GIT_TRACE2_EVENT=$PWD/trace2.txt \ + git rev-list --count --objects --use-bitmap-index HEAD >actual && + test_pseudo_merges_satisfied 2 in && + GIT_TEST_DATE_NOW=$test_tick \ + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 1 merges + ) +' + +test_expect_success 'sampleRate=0 does not cause division by zero' ' + test_when_finished "rm -fr pseudo-merge-sample-rate-zero" && + git init pseudo-merge-sample-rate-zero && + ( + cd pseudo-merge-sample-rate-zero && + + test_commit_bulk 64 && + tag_everything && + git repack -ad && + + pack="$(ls .git/objects/pack/pack-*.pack)" && + + git config bitmapPseudoMerge.test.pattern "refs/tags/" && + git config bitmapPseudoMerge.test.maxMerges 1 && + git config bitmapPseudoMerge.test.sampleRate 0 && + git config bitmapPseudoMerge.test.threshold now && + git config bitmapPseudoMerge.test.stableThreshold never && + + git rev-parse HEAD~63 >in && + test-tool bitmap write "$(basename $pack)" Date: Mon, 11 May 2026 20:46:54 -0400 Subject: [PATCH 03/93] pack-bitmap-write: sort pseudo-merge commit lookup table in pack order The pseudo-merge commit lookup table stores each commit's position in the pack- or pseudo-pack order, and is used to perform a binary search in order to determine which pseudo-merge(s) a given commit belongs to. However, the table was previously sorted in lexical order (via `oid_array_sort()`), causing the binary search to fail. While this causes pseudo-merge bitmaps to be de-facto broken for fill-in traversal, there are a couple of important points to keep in mind: * Pseudo-merge application during the initial phases of a bitmap-based traversal are applied via `cascade_pseudo_merges_1()`. This function enumerates the known pseudo-merges and determines if its parents are a subset of the traversal roots. This is a different path than the fill-in traversal, where we are looking for any pseudo-merges which may be satisfied after visiting some commit along an object walk, which involves the aforementioned (broken) binary search. As a consequence, any pseudo-merges we apply at this stage are done so correctly. * While this bug makes applying pseudo-merges during fill-in traversal effectively broken, it does not produce wrong results. Instead of applying the *wrong* pseudo-merge, we will simply fail to find satisfied pseudo-merges, leaving the traversal to use the existing fill-in routines. Fix this by sorting the table by bit position before writing, matching the order that the reader's binary search expects. This does produce a change the on-disk format insofar as the actual code now complies with the documented format (for more details, refer to: Documentation/technical/bitmap-format.adoc). Given that this never worked in the first place, such a change should be OK to perform. If an out-of-tree implementation of pseudo-merges happened to generate bitmaps that comply with the documented format, they will continue to be read and interpreted as normal. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 21 ++++++++++++++++++++- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 8338d7217ef48f..86ed6a5d78cd04 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -819,6 +819,20 @@ static void write_selected_commits_v1(struct bitmap_writer *writer, } } +static int pseudo_merge_commit_pos_cmp(const void *_va, const void *_vb, + void *_data) +{ + struct bitmap_writer *writer = _data; + uint32_t pos_a = find_object_pos(writer, _va, NULL); + uint32_t pos_b = find_object_pos(writer, _vb, NULL); + + if (pos_a < pos_b) + return -1; + if (pos_a > pos_b) + return 1; + return 0; +} + static void write_pseudo_merges(struct bitmap_writer *writer, struct hashfile *f) { @@ -876,7 +890,12 @@ static void write_pseudo_merges(struct bitmap_writer *writer, oid_array_append(&commits, &kh_key(writer->pseudo_merge_commits, i)); } - oid_array_sort(&commits); + /* + * Sort the commits by their bit position so that the lookup + * table can be binary searched by the reader (see + * find_pseudo_merge()). + */ + QSORT_S(commits.oid, commits.nr, pseudo_merge_commit_pos_cmp, writer); /* write lookup table (non-extended) */ for (i = 0; i < commits.nr; i++) { diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 0e9638c31c3d41..3d7a7668121f49 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -462,7 +462,7 @@ test_expect_success 'use pseudo-merge in boundary traversal' ' ) ' -test_expect_failure 'apply pseudo-merges during fill-in traversal' ' +test_expect_success 'apply pseudo-merges during fill-in traversal' ' test_when_finished "rm -fr pseudo-merge-fill-in-traversal" && git init pseudo-merge-fill-in-traversal && ( From b1e3fcdb9b087b4d69836dd5a228648008ff419a Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:46:57 -0400 Subject: [PATCH 04/93] pack-bitmap: fix inverted binary search in `pseudo_merge_at()` The binary search in `pseudo_merge_at()` has its "lo" and "hi" updates swapped: when the midpoint's offset is less than the target, it sets `hi = mi` (searching left) instead of `lo = mi + 1` (searching right), and vice versa. This means that lookups for pseudo-merges whose offset is not near the midpoint of the pseudo-merge table are likely to fail. In practice, with a single pseudo-merge group this is masked because the lone entry is always at the midpoint. With multiple groups, the inverted comparisons cause lookups to search in the wrong direction, potentially missing entries. Swap the "lo" and "hi" assignments to search in the correct direction, making it possible to apply pseudo-merges during fill-in when more than one pseudo-merge exists in a group. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pseudo-merge.c | 4 ++-- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pseudo-merge.c b/pseudo-merge.c index ff18b6c364245e..fb71c7617924a7 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -559,9 +559,9 @@ static struct pseudo_merge *pseudo_merge_at(const struct pseudo_merge_map *pm, if (got == want) return use_pseudo_merge(pm, &pm->v[mi]); else if (got < want) - hi = mi; - else lo = mi + 1; + else + hi = mi; } warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX), diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 3d7a7668121f49..5411fbf1e04516 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -496,7 +496,7 @@ test_expect_success 'apply pseudo-merges during fill-in traversal' ' ) ' -test_expect_failure 'apply pseudo-merges from multiple groups during fill-in' ' +test_expect_success 'apply pseudo-merges from multiple groups during fill-in' ' test_when_finished "rm -fr pseudo-merge-fill-in-multi" && git init pseudo-merge-fill-in-multi && ( From 8b5f199f302869cc60cf9390ce46003b6b3fab48 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:47:00 -0400 Subject: [PATCH 05/93] pack-bitmap: fix pseudo-merge lookup for shared commits When a commit appears in more than one pseudo-merge group, its entry in the commit lookup table has the high bit set in its offset field, indicating that the offset points to an "extended" table containing the set of pseudo-merges for that commit. There are three bugs in this path: * The `next_ext` offset in `write_pseudo_merges()` undercounts the per-entry size of the lookup table (8 vs. 12 bytes). * `nth_pseudo_merge_ext()` calls `read_pseudo_merge_commit_at()` on a pseudo-merge bitmap offset, misinterpreting it as a 12-byte commit table entry. * The error check after `pseudo_merge_ext_at()` in `apply_pseudo_merges_for_commit()` tests `< -1` instead of `< 0`, silently swallowing errors from `error()`. The first bug is on the write side: each commit lookup entry contains a 4- and 8-byte unsigned value for a total of 12 bytes, but the calculation assumes that the entry only contains 8 bytes of data. This makes `next_ext` too small, so the extended-table offsets that get written point into the middle of the non-extended lookup table rather than past it. The reader then interprets non-extended lookup data as extended entries, producing garbage. The second bug is on the read side and is independently fatal: even with a correctly positioned extended table, `nth_pseudo_merge_ext()` feeds the offset it reads (which points at pseudo-merge bitmap data) to `read_pseudo_merge_commit_at()`. That function tries to parse 12 bytes as a `pseudo_merge_commit` struct, clobbering `merge->pseudo_merge_ofs` with whatever happens to be at that location. The caller only needs `pseudo_merge_ofs`, so the fix is to store the offset directly rather than re-parsing a commit table entry. The `commit_pos` field is left untouched, retaining the value that `find_pseudo_merge()` set earlier. The third bug is latent. With the first two fixes applied, the extended table is correctly written and read, so `pseudo_merge_ext_at()` does not fail during normal operation. The `< -1` vs `< 0` distinction only matters when the bitmap file is corrupt or truncated, in which case the error would be silently ignored and the code would proceed with uninitialized data. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 2 +- pseudo-merge.c | 4 ++-- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 86ed6a5d78cd04..1c8070f99c03ca 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -877,7 +877,7 @@ static void write_pseudo_merges(struct bitmap_writer *writer, next_ext = st_add(hashfile_total(f), st_mult(kh_size(writer->pseudo_merge_commits), - sizeof(uint64_t))); + sizeof(uint32_t) + sizeof(uint64_t))); table_start = hashfile_total(f); diff --git a/pseudo-merge.c b/pseudo-merge.c index fb71c7617924a7..34e1da00b4e200 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -600,7 +600,7 @@ static int nth_pseudo_merge_ext(const struct pseudo_merge_map *pm, return error(_("out-of-bounds read: (%"PRIuMAX" >= %"PRIuMAX")"), (uintmax_t)ofs, (uintmax_t)pm->map_size); - read_pseudo_merge_commit_at(merge, pm->map + ofs); + merge->pseudo_merge_ofs = ofs; return 0; } @@ -671,7 +671,7 @@ int apply_pseudo_merges_for_commit(const struct pseudo_merge_map *pm, off_t ofs = merge_commit.pseudo_merge_ofs & ~((uint64_t)1<<63); uint32_t i; - if (pseudo_merge_ext_at(pm, &ext, ofs) < -1) { + if (pseudo_merge_ext_at(pm, &ext, ofs) < 0) { warning(_("could not read extended pseudo-merge table " "for commit %s"), oid_to_hex(&commit->object.oid)); diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 5411fbf1e04516..90459da5e63c31 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -549,7 +549,7 @@ test_expect_success 'apply pseudo-merges from multiple groups during fill-in' ' ) ' -test_expect_failure 'apply pseudo-merges with overlapping groups during fill-in' ' +test_expect_success 'apply pseudo-merges with overlapping groups during fill-in' ' test_when_finished "rm -fr pseudo-merge-fill-in-overlap" && git init pseudo-merge-fill-in-overlap && ( From 78e85e05f3341c70d1b9a147eef8c61478cc15f9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:47:03 -0400 Subject: [PATCH 06/93] pack-bitmap: parse commits in `find_pseudo_merge_group_for_ref()` `find_pseudo_merge_group_for_ref()` uses the commit's date to classify it as either "stable" (older than the stable threshold) or "unstable" (otherwise). However, to find the relevant commit from a given OID, the function `find_pseudo_merge_group_for_ref()` uses `lookup_commit()` which does not parse commits. Because an unparsed commit has its "date" set to zero, every candidate is placed in the "stable" bucket regardless of its actual committer timestamp. This means the `bitmapPseudoMerge.*.threshold` and `stableThreshold` configuration options have no effect: the stable/unstable split is always determined by comparing against zero rather than the real commit date. The net result is that pseudo-merge groups are partitioned by `stableSize` instead of the intended decay-based sizing, and the `sampleRate` knob (which only applies to the unstable path) is never exercised. Fix this by calling `repo_parse_commit()` after `lookup_commit()`, bailing out of the callback if parsing fails. The corresponding test configures two pseudo-merge groups that both match all tags. The "stable" group uses `threshold=1.month.ago`, and the "all" group uses `threshold=now`. The test use our custom "GIT_TEST_DATE_NOW" environment variable by setting it to the value of "$test_tick" to align Git's notion of "now" (and therefore "1.month.ago") with the `test_tick` timestamps, so the commits appear to be younger than one month: only the "all" group matches them, producing exactly one pseudo-merge. Without the fix every commit has `date == 0`, which satisfies `date <= threshold` for both groups (since 0 is older than one month ago), and the "stable" group erroneously matches as well. Now that commits are correctly classified as "unstable", the bug described in the test exercising the "sampleRate=0" test is reachable, and the test is marked as failing. It will be fixed in a following commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pseudo-merge.c | 2 ++ t/t5333-pseudo-merge-bitmaps.sh | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pseudo-merge.c b/pseudo-merge.c index 34e1da00b4e200..d79e5fb649a8b5 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -236,6 +236,8 @@ static int find_pseudo_merge_group_for_ref(const struct reference *ref, void *_d c = lookup_commit(the_repository, maybe_peeled); if (!c) return 0; + if (repo_parse_commit(the_repository, c)) + return 0; if (!packlist_find(writer->to_pack, maybe_peeled)) return 0; diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 90459da5e63c31..0032a16606bd70 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -592,32 +592,34 @@ test_expect_success 'apply pseudo-merges with overlapping groups during fill-in' ) ' -test_expect_failure 'pseudo-merge commits are correctly classified by date' ' +test_expect_success 'pseudo-merge commits are correctly classified by date' ' test_when_finished "rm -fr pseudo-merge-date-classification" && git init pseudo-merge-date-classification && ( cd pseudo-merge-date-classification && test_commit_bulk 64 && + tag_everything && git repack -ad && pack="$(ls .git/objects/pack/pack-*.pack)" && # Configure two pseudo-merge groups: one that only - # matches "stable" refs (older than one month), and one - # that matches all refs. With 64 freshly-created tags - # (all younger than one month) the stable group should - # have zero pseudo-merges and the catch-all group should - # have one. + # matches "stable" refs (older than one month), and + # one that matches all refs. With 64 tags whose + # commits are all younger than one month, the + # "stable" group should have zero pseudo-merges and + # the "all" group should have one. # # Use GIT_TEST_DATE_NOW to align "now" (and therefore # "1.month.ago") with the test_tick timestamps so that # the commits are within the last month. # - # This exercises the date-based classification in - # find_pseudo_merge_group_for_ref(), which requires - # that commits are parsed before inspecting their date. + # Without parsing the commit, its date field would + # be zero, causing it to satisfy date <= threshold + # for the "stable" group as well, and both groups + # would produce pseudo-merges. git config bitmapPseudoMerge.stable.pattern "refs/tags/" && git config bitmapPseudoMerge.stable.maxMerges 64 && git config bitmapPseudoMerge.stable.stableThreshold never && @@ -637,7 +639,7 @@ test_expect_failure 'pseudo-merge commits are correctly classified by date' ' ) ' -test_expect_success 'sampleRate=0 does not cause division by zero' ' +test_expect_failure 'sampleRate=0 does not cause division by zero' ' test_when_finished "rm -fr pseudo-merge-sample-rate-zero" && git init pseudo-merge-sample-rate-zero && ( From 03c7a30ceeaee8d70ff2e2cbd9b4bce896841bfa Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:47:06 -0400 Subject: [PATCH 07/93] pack-bitmap: reject pseudo-merge "sampleRate" of 0 The "bitmapPseudoMerge.*.sampleRate" configuration controls what fraction of unstable commits are included in each pseudo-merge group. The config validation accepts values in the range `[0, 1]`, but a value of exactly 0 causes a division by zero in `select_pseudo_merges_1()`: if (j % (uint32_t)(1.0 / group->sample_rate)) When `sample_rate` is 0, `1.0 / 0.0` produces `+inf`, and casting infinity to `uint32_t` is undefined behavior in C. On most platforms this yields 0, making the subsequent modulo operation (`j % 0`) a fatal arithmetic trap. This path was not previously reachable because an earlier bug caused all pseudo-merge candidates to be classified as "stable" (where the sampling rate is not used), regardless of their actual commit date. Now that the date classification is fixed, the unstable path is exercised and the division by zero can fire. Fix this by changing the validation to require a strict lower bound and thus reject 0. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/bitmap-pseudo-merge.adoc | 4 ++-- pseudo-merge.c | 4 ++-- t/t5333-pseudo-merge-bitmaps.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/config/bitmap-pseudo-merge.adoc b/Documentation/config/bitmap-pseudo-merge.adoc index 1f264eca99b847..6bf52c80ba786c 100644 --- a/Documentation/config/bitmap-pseudo-merge.adoc +++ b/Documentation/config/bitmap-pseudo-merge.adoc @@ -47,8 +47,8 @@ will be updated more often than a reference pointing at an old commit. bitmapPseudoMerge..sampleRate:: Determines the proportion of non-bitmapped commits (among reference tips) which are selected for inclusion in an - unstable pseudo-merge bitmap. Must be between `0` and `1` - (inclusive). The default is `1`. + unstable pseudo-merge bitmap. Must be greater than `0` and + less than or equal to `1`. The default is `1`. bitmapPseudoMerge..threshold:: Determines the minimum age of non-bitmapped commits (among diff --git a/pseudo-merge.c b/pseudo-merge.c index d79e5fb649a8b5..75bed043602744 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -169,8 +169,8 @@ static int pseudo_merge_config(const char *var, const char *value, } } else if (!strcmp(key, "samplerate")) { group->sample_rate = git_config_double(var, value, ctx->kvi); - if (!(0 <= group->sample_rate && group->sample_rate <= 1)) { - warning(_("%s must be between 0 and 1, using default"), var); + if (!(0 < group->sample_rate && group->sample_rate <= 1)) { + warning(_("%s must be between 0 (exclusive) and 1, using default"), var); group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; } } else if (!strcmp(key, "threshold")) { diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 0032a16606bd70..5bfbbd4214e1cf 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -639,7 +639,7 @@ test_expect_success 'pseudo-merge commits are correctly classified by date' ' ) ' -test_expect_failure 'sampleRate=0 does not cause division by zero' ' +test_expect_success 'sampleRate=0 does not cause division by zero' ' test_when_finished "rm -fr pseudo-merge-sample-rate-zero" && git init pseudo-merge-sample-rate-zero && ( From 84780db63657057bee11d898ad6c211730d4212f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:47:09 -0400 Subject: [PATCH 08/93] Documentation: fix broken `sampleRate` in gitpacking(7) The documentation explaining some sample configurations for bitmap pseudo-merges incorrectly uses a sample rate outside of the allowed (0,1] range. This dates back to faf558b23ef (pseudo-merge: implement support for selecting pseudo-merge commits, 2024-05-23), and was likely written when the allowable range for this configuration was the integral values between (0,100]. Fix this to conform to the actual allowable range for this configuration. Noticed-by: Elijah Newren Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/gitpacking.adoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/gitpacking.adoc b/Documentation/gitpacking.adoc index a56596e2d1d84d..e6de6ec8249172 100644 --- a/Documentation/gitpacking.adoc +++ b/Documentation/gitpacking.adoc @@ -150,7 +150,7 @@ with a configuration like so: pattern = "refs/" threshold = now stableThreshold = never - sampleRate = 100 + sampleRate = 1 maxMerges = 64 ---- @@ -177,7 +177,7 @@ like: pattern = "refs/virtual/([0-9]+)/(heads|tags)/" threshold = now stableThreshold = never - sampleRate = 100 + sampleRate = 1 maxMerges = 64 ---- From 5e6e8dc7860374d79bad3e2a3ade0c2d391bbad6 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 11 May 2026 20:47:12 -0400 Subject: [PATCH 09/93] pack-bitmap: prevent pattern leak on pseudo-merge re-assignment When "bitmapPseudoMerge.*.pattern" appears more than once for the same group, `pseudo_merge_config()` frees the old `regex_t *` pointer but does not call `regfree()` on it first. This leaks whatever internal state `regcomp()` allocated. The final cleanup path in `pseudo_merge_group_release()` does call `regfree()` before `free()`, so only the intermediate replacement is affected. Fix this by guarding the replacement with a NULL check and calling `regfree()` before `free()` when the pointer is non-NULL. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pseudo-merge.c | 5 ++++- t/t5333-pseudo-merge-bitmaps.sh | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/pseudo-merge.c b/pseudo-merge.c index 75bed043602744..22b8600d689de5 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -150,7 +150,10 @@ static int pseudo_merge_config(const char *var, const char *value, if (!strcmp(key, "pattern")) { struct strbuf re = STRBUF_INIT; - free(group->pattern); + if (group->pattern) { + regfree(group->pattern); + free(group->pattern); + } if (*value != '^') strbuf_addch(&re, '^'); strbuf_addstr(&re, value); diff --git a/t/t5333-pseudo-merge-bitmaps.sh b/t/t5333-pseudo-merge-bitmaps.sh index 5bfbbd4214e1cf..305d6771082d55 100755 --- a/t/t5333-pseudo-merge-bitmaps.sh +++ b/t/t5333-pseudo-merge-bitmaps.sh @@ -662,4 +662,33 @@ test_expect_success 'sampleRate=0 does not cause division by zero' ' ) ' +test_expect_success 'duplicate pseudo-merge pattern does not leak' ' + test_when_finished "rm -fr pseudo-merge-dup-pattern" && + git init pseudo-merge-dup-pattern && + ( + cd pseudo-merge-dup-pattern && + + test_commit_bulk 64 && + tag_everything && + git repack -ad && + + pack=$(ls .git/objects/pack/pack-*.pack) && + + # Set the same group'\''s pattern twice. The second + # assignment should cleanly release the compiled regex + # from the first without leaking. + git config bitmapPseudoMerge.test.pattern "refs/tags/" && + git config --add bitmapPseudoMerge.test.pattern "refs/tags/" && + git config bitmapPseudoMerge.test.maxMerges 1 && + git config bitmapPseudoMerge.test.threshold now && + git config bitmapPseudoMerge.test.stableThreshold never && + + git rev-parse HEAD~63 >in && + test-tool bitmap write "$(basename $pack)" merges && + test_line_count = 1 merges + ) +' + test_done From 0acbaf9000bd50bfbec24e5c40ab96bbd5e79281 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Tue, 12 May 2026 06:11:26 +0000 Subject: [PATCH 10/93] merge: use repo_in_merge_bases for octopus up-to-date check The octopus merge path checks whether each remote head is already an ancestor of HEAD by computing all merge-bases via repo_get_merge_bases() and comparing the first result's OID to the remote head. This is more expensive than necessary: repo_get_merge_bases() calls paint_down_to_common() with min_generation=0, performs the full STALE drain, and may run remove_redundant(), when all we need is a yes/no reachability answer. Replace this with repo_in_merge_bases(), which answers the is-ancestor question directly. When generation numbers are available, repo_in_merge_bases() uses can_all_from_reach() -- a DFS bounded by generation number that stops as soon as the target is found or ruled out, without entering paint_down_to_common() at all. Without generation numbers, it still benefits from a tighter min_generation floor. Signed-off-by: Kristofer Karlsson Signed-off-by: Junio C Hamano --- builtin/merge.c | 18 ++++-------------- t/t6408-merge-up-to-date.sh | 10 ++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/builtin/merge.c b/builtin/merge.c index 2cbce56f8da9f7..862107cf412c3c 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -1735,21 +1735,11 @@ int cmd_merge(int argc, struct commit_list *j; for (j = remoteheads; j; j = j->next) { - struct commit_list *common_one = NULL; - struct commit *common_item; - - /* - * Here we *have* to calculate the individual - * merge_bases again, otherwise "git merge HEAD^ - * HEAD^^" would be missed. - */ - if (repo_get_merge_bases(the_repository, head_commit, - j->item, &common_one) < 0) + int ret = repo_in_merge_bases(the_repository, + j->item, head_commit); + if (ret < 0) exit(128); - - common_item = common_one->item; - commit_list_free(common_one); - if (!oideq(&common_item->object.oid, &j->item->object.oid)) { + if (!ret) { up_to_date = 0; break; } diff --git a/t/t6408-merge-up-to-date.sh b/t/t6408-merge-up-to-date.sh index 7763c1ba98080d..be0840efb697f2 100755 --- a/t/t6408-merge-up-to-date.sh +++ b/t/t6408-merge-up-to-date.sh @@ -89,4 +89,14 @@ test_expect_success 'merge fast-forward octopus' ' test "$expect" = "$current" ' +test_expect_success 'merge octopus already up to date' ' + + git reset --hard c2 && + test_tick && + git merge c0 c1 && + expect=$(git rev-parse c2) && + current=$(git rev-parse HEAD) && + test "$expect" = "$current" +' + test_done From 7f582faa060f958410fe53091553c13edd953376 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 14 May 2026 16:25:25 +0000 Subject: [PATCH 11/93] promisor-remote: document caller filtering contract promisor_remote_get_direct() does not, on its happy path, filter out OIDs that are already present in the local object store: every OID the caller supplies is written to the fetch subprocess's stdin and ends up in the response pack. The only filtering it performs is in remove_fetched_oids(), and that only runs after a fetch failure when falling back to a different configured promisor remote. Almost every existing caller already filters locally-present OIDs out itself (typically with odb_read_object_info_extended() and OBJECT_INFO_FOR_PREFETCH, or odb_has_object() with no fetch flag). But the existing API comment does not state this expectation, so a new caller is easy to write incorrectly (I missed this originally and wrote two problematic callers). Omitting the filter still "works" in the sense that the desired objects end up local, but it silently makes the fetch request -- and the response pack -- larger than necessary, defeating part of the point of batching. Spell the contract out so future callers know to filter (and deduplicate) themselves, and point them at the helpers they should use to check local presence without accidentally triggering a lazy fetch. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- promisor-remote.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/promisor-remote.h b/promisor-remote.h index 3d4d2de01818ea..301f5ac5cb4b58 100644 --- a/promisor-remote.h +++ b/promisor-remote.h @@ -29,6 +29,17 @@ int repo_has_promisor_remote(struct repository *r); * Fetches all requested objects from all promisor remotes, trying them one at * a time until all objects are fetched. * + * Callers are responsible for filtering out OIDs that are already present + * locally before calling this function: every supplied OID is sent in the + * fetch request, even if the object already exists in the local object + * store. (Only after a fetch failure does this function fall back to + * stripping already-present OIDs from the list before trying the next + * configured promisor remote.) Callers should also deduplicate the OIDs. + * + * To test for local presence without triggering a lazy fetch (which would + * defeat the purpose of batching), use odb_has_object(..., 0) or + * odb_read_object_info_extended() with OBJECT_INFO_FOR_PREFETCH. + * * If oid_nr is 0, this function returns immediately. */ void promisor_remote_get_direct(struct repository *repo, From adfb1e4993074fd478c96d3b9da30706e5dd030a Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 14 May 2026 16:25:26 +0000 Subject: [PATCH 12/93] patch-ids.h: add missing trailing parenthesis in documentation comment Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- patch-ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patch-ids.h b/patch-ids.h index 490d739371666e..57534ee7222560 100644 --- a/patch-ids.h +++ b/patch-ids.h @@ -37,7 +37,7 @@ int has_commit_patch_id(struct commit *commit, struct patch_ids *); * struct patch_id *cur; * for (cur = patch_id_iter_first(commit, ids); * cur; - * cur = patch_id_iter_next(cur, ids) { + * cur = patch_id_iter_next(cur, ids)) { * ... look at cur->commit * } */ From 463c1bfc2b65357569055916e348e1bd9c7a5b25 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 14 May 2026 16:25:27 +0000 Subject: [PATCH 13/93] builtin/log: prefetch necessary blobs for `git cherry` In partial clones, `git cherry` fetches necessary blobs on-demand one at a time, which can be very slow. We would like to prefetch all necessary blobs upfront. To do so, we need to be able to first figure out which blobs are needed. `git cherry` does its work in a two-phase approach: first computing header-only IDs (based on file paths and modes), then falling back to full content-based IDs only when header-only IDs collide -- or, more accurately, whenever the oidhash() of the header-only object_ids collide. patch-ids.c handles this by creating an ids->patches hashmap that has all the data we need, but the problem is that any attempt to query the hashmap will invoke the patch_id_neq() function on any colliding objects, which causes the on-demand fetching. Insert a new prefetch_cherry_blobs() function before checking for collisions. Use a temporary replacement on the ids->patches.cmpfn in order to enumerate the blobs that would be needed without yet fetching them, and then fetch them all at once, then restore the old ids->patches.cmpfn. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- builtin/log.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++ t/t3500-cherry.sh | 27 ++++++++++ 2 files changed, 158 insertions(+) diff --git a/builtin/log.c b/builtin/log.c index 8c0939dd42ada2..e464b30af4bcae 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -21,10 +21,12 @@ #include "color.h" #include "commit.h" #include "diff.h" +#include "diffcore.h" #include "diff-merges.h" #include "revision.h" #include "log-tree.h" #include "oid-array.h" +#include "oidset.h" #include "tag.h" #include "reflog-walk.h" #include "patch-ids.h" @@ -43,9 +45,11 @@ #include "utf8.h" #include "commit-reach.h" +#include "promisor-remote.h" #include "range-diff.h" #include "tmp-objdir.h" #include "tree.h" +#include "userdiff.h" #include "write-or-die.h" #define MAIL_DEFAULT_WRAP 72 @@ -2602,6 +2606,131 @@ static void print_commit(char sign, struct commit *commit, int verbose, } } +/* + * Enumerate blob OIDs from a single commit's diff, inserting them into blobs. + * Skips files whose userdiff driver explicitly declares binary status + * (drv->binary > 0), since patch-ID uses oid_to_hex() for those and + * never reads blob content. Use userdiff_find_by_path() since + * diff_filespec_load_driver() is static in diff.c. + * + * Clean up with diff_queue_clear() (from diffcore.h). + */ +static void collect_diff_blob_oids(struct commit *commit, + struct diff_options *opts, + struct oidset *blobs) +{ + struct diff_queue_struct *q; + + /* + * Merge commits are filtered out by patch_id_defined() in patch-ids.c, + * so we'll never be called with one. + */ + assert(!commit->parents || !commit->parents->next); + + if (commit->parents) + diff_tree_oid(&commit->parents->item->object.oid, + &commit->object.oid, "", opts); + else + diff_root_tree_oid(&commit->object.oid, "", opts); + diffcore_std(opts); + + q = &diff_queued_diff; + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + struct userdiff_driver *drv; + + /* Skip binary files */ + drv = userdiff_find_by_path(opts->repo->index, p->one->path); + if (drv && drv->binary > 0) + continue; + + if (DIFF_FILE_VALID(p->one) && + odb_read_object_info_extended(opts->repo->objects, + &p->one->oid, NULL, + OBJECT_INFO_FOR_PREFETCH)) + oidset_insert(blobs, &p->one->oid); + if (DIFF_FILE_VALID(p->two) && + odb_read_object_info_extended(opts->repo->objects, + &p->two->oid, NULL, + OBJECT_INFO_FOR_PREFETCH)) + oidset_insert(blobs, &p->two->oid); + } + diff_queue_clear(q); +} + +static int always_match(const void *cmp_data UNUSED, + const struct hashmap_entry *entry1 UNUSED, + const struct hashmap_entry *entry2 UNUSED, + const void *keydata UNUSED) +{ + return 0; +} + +/* + * Prefetch blobs for git cherry in partial clones. + * + * Called between the revision walk (which builds the head-side + * commit list) and the has_commit_patch_id() comparison loop. + * + * Uses a cmpfn-swap trick to avoid reading blobs: temporarily + * replaces the hashmap's comparison function with a trivial + * always-match function, so hashmap_get()/hashmap_get_next() match + * any entry with the same oidhash bucket. These are the set of oids + * that would trigger patch_id_neq() during normal lookup and cause + * blobs to be read on demand, and we want to prefetch them all at + * once instead. + */ +static void prefetch_cherry_blobs(struct repository *repo, + struct commit_list *list, + struct patch_ids *ids) +{ + struct oidset blobs = OIDSET_INIT; + hashmap_cmp_fn original_cmpfn; + + /* Exit if we're not in a partial clone */ + if (!repo_has_promisor_remote(repo)) + return; + + /* Save original cmpfn, replace with always_match */ + original_cmpfn = ids->patches.cmpfn; + ids->patches.cmpfn = always_match; + + /* Find header-only collisions, gather blobs from those commits */ + for (struct commit_list *l = list; l; l = l->next) { + struct commit *c = l->item; + bool match_found = false; + for (struct patch_id *cur = patch_id_iter_first(c, ids); + cur; + cur = patch_id_iter_next(cur, ids)) { + match_found = true; + collect_diff_blob_oids(cur->commit, &ids->diffopts, + &blobs); + } + if (match_found) + collect_diff_blob_oids(c, &ids->diffopts, &blobs); + } + + /* Restore original cmpfn */ + ids->patches.cmpfn = original_cmpfn; + + /* If we have any blobs to fetch, fetch them */ + if (oidset_size(&blobs)) { + struct oid_array to_fetch = OID_ARRAY_INIT; + struct oidset_iter iter; + const struct object_id *oid; + + oidset_iter_init(&blobs, &iter); + while ((oid = oidset_iter_next(&iter))) + oid_array_append(&to_fetch, oid); + + promisor_remote_get_direct(repo, to_fetch.oid, to_fetch.nr); + + oid_array_clear(&to_fetch); + } + + oidset_clear(&blobs); +} + int cmd_cherry(int argc, const char **argv, const char *prefix, @@ -2673,6 +2802,8 @@ int cmd_cherry(int argc, commit_list_insert(commit, &list); } + prefetch_cherry_blobs(the_repository, list, &ids); + for (struct commit_list *l = list; l; l = l->next) { char sign = '+'; diff --git a/t/t3500-cherry.sh b/t/t3500-cherry.sh index 78c3eac54b599e..3e66827d7641d8 100755 --- a/t/t3500-cherry.sh +++ b/t/t3500-cherry.sh @@ -78,4 +78,31 @@ test_expect_success 'cherry ignores whitespace' ' test_cmp expect actual ' +# Reuse the expect file from the previous test, in a partial clone +test_expect_success 'cherry in partial clone does bulk prefetch' ' + test_config uploadpack.allowfilter 1 && + test_config uploadpack.allowanysha1inwant 1 && + test_when_finished "rm -rf copy" && + + git clone --bare --filter=blob:none file://"$(pwd)" copy && + ( + cd copy && + GIT_TRACE2_EVENT="$(pwd)/trace.output" git cherry upstream-with-space feature-without-space >actual && + test_cmp ../expect actual && + + grep "child_start.*fetch.negotiationAlgorithm" trace.output >fetches && + test_line_count = 1 fetches && + test_trace2_data promisor fetch_count 4 actual && + test_cmp ../expect actual && + + ! grep "child_start.*fetch.negotiationAlgorithm" trace2.output && + ! grep "\"key\":\"fetch_count\"" trace2.output + ) +' + test_done From 854061ea5484fffc6c54941332115abbd7fc950f Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 14 May 2026 16:25:28 +0000 Subject: [PATCH 14/93] grep: prefetch necessary blobs In partial clones, `git grep` fetches necessary blobs on-demand one at a time, which can be very slow. In partial clones, add an extra preliminary walk over the tree similar to grep_tree() which collects the blobs of interest, and then prefetches them. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- builtin/grep.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++ t/t7810-grep.sh | 58 ++++++++++++++++++++ 2 files changed, 201 insertions(+) diff --git a/builtin/grep.c b/builtin/grep.c index e33285e5e69289..85656d8d3f4328 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -28,9 +28,12 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "oid-array.h" +#include "oidset.h" #include "packfile.h" #include "pager.h" #include "path.h" +#include "promisor-remote.h" #include "read-cache-ll.h" #include "write-or-die.h" @@ -692,6 +695,144 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, return hit; } +static void collect_blob_oids_for_tree(struct repository *repo, + const struct pathspec *pathspec, + struct tree_desc *tree, + struct strbuf *base, + int tn_len, + struct oidset *blob_oids) +{ + struct name_entry entry; + int old_baselen = base->len; + struct strbuf name = STRBUF_INIT; + enum interesting match = entry_not_interesting; + + while (tree_entry(tree, &entry)) { + if (match != all_entries_interesting) { + strbuf_addstr(&name, base->buf + tn_len); + match = tree_entry_interesting(repo->index, + &entry, &name, + pathspec); + strbuf_reset(&name); + + if (match == all_entries_not_interesting) + break; + if (match == entry_not_interesting) + continue; + } + + strbuf_add(base, entry.path, tree_entry_len(&entry)); + + if (S_ISREG(entry.mode)) { + if (!odb_has_object(repo->objects, &entry.oid, 0)) + oidset_insert(blob_oids, &entry.oid); + } else if (S_ISDIR(entry.mode)) { + enum object_type type; + struct tree_desc sub_tree; + void *data; + unsigned long size; + + data = odb_read_object(repo->objects, &entry.oid, + &type, &size); + if (!data) + die(_("unable to read tree (%s)"), + oid_to_hex(&entry.oid)); + + strbuf_addch(base, '/'); + init_tree_desc(&sub_tree, &entry.oid, data, size); + collect_blob_oids_for_tree(repo, pathspec, &sub_tree, + base, tn_len, blob_oids); + free(data); + } + /* + * ...no else clause for S_ISGITLINK: submodules have their + * own promisor configuration and would need separate fetches + * anyway. + */ + + strbuf_setlen(base, old_baselen); + } + + strbuf_release(&name); +} + +static void collect_blob_oids_for_treeish(struct grep_opt *opt, + const struct pathspec *pathspec, + const struct object_id *tree_ish_oid, + const char *name, + struct oidset *blob_oids) +{ + struct tree_desc tree; + void *data; + unsigned long size; + struct strbuf base = STRBUF_INIT; + int len; + + data = odb_read_object_peeled(opt->repo->objects, tree_ish_oid, + OBJ_TREE, &size, NULL); + + if (!data) + return; + + len = name ? strlen(name) : 0; + if (len) { + strbuf_add(&base, name, len); + strbuf_addch(&base, ':'); + } + init_tree_desc(&tree, tree_ish_oid, data, size); + + collect_blob_oids_for_tree(opt->repo, pathspec, &tree, + &base, base.len, blob_oids); + + strbuf_release(&base); + free(data); +} + +static void prefetch_grep_blobs(struct grep_opt *opt, + const struct pathspec *pathspec, + const struct object_array *list) +{ + struct oidset blob_oids = OIDSET_INIT; + + /* Exit if we're not in a partial clone */ + if (!repo_has_promisor_remote(opt->repo)) + return; + + /* For each tree, gather the blobs in it */ + for (int i = 0; i < list->nr; i++) { + struct object *real_obj; + + obj_read_lock(); + real_obj = deref_tag(opt->repo, list->objects[i].item, + NULL, 0); + obj_read_unlock(); + + if (real_obj && + (real_obj->type == OBJ_COMMIT || + real_obj->type == OBJ_TREE)) + collect_blob_oids_for_treeish(opt, pathspec, + &real_obj->oid, + list->objects[i].name, + &blob_oids); + } + + /* Prefetch the blobs we found */ + if (oidset_size(&blob_oids)) { + struct oid_array to_fetch = OID_ARRAY_INIT; + struct oidset_iter iter; + const struct object_id *oid; + + oidset_iter_init(&blob_oids, &iter); + while ((oid = oidset_iter_next(&iter))) + oid_array_append(&to_fetch, oid); + + promisor_remote_get_direct(opt->repo, to_fetch.oid, to_fetch.nr); + + oid_array_clear(&to_fetch); + } + oidset_clear(&blob_oids); +} + static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec, struct object *obj, const char *name, const char *path) { @@ -732,6 +873,8 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec, int hit = 0; const unsigned int nr = list->nr; + prefetch_grep_blobs(opt, pathspec, list); + for (i = 0; i < nr; i++) { struct object *real_obj; diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 64ac4f04ee97ad..3d08fd2a0c70b1 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -1929,4 +1929,62 @@ test_expect_success 'grep does not report i-t-a and assume unchanged with -L' ' test_cmp expected actual ' +test_expect_success 'grep of revision in partial clone batches prefetch and honors pathspec' ' + test_when_finished "rm -rf grep-partial-src grep-partial" && + + git init grep-partial-src && + ( + cd grep-partial-src && + git config uploadpack.allowfilter 1 && + git config uploadpack.allowanysha1inwant 1 && + mkdir a b && + echo "needle in haystack" >a/matches.txt && + echo "nothing to see here" >a/nomatch.txt && + echo "needle again" >b/matches.md && + git add . && + git commit -m "initial" + ) && + + git clone --no-checkout --filter=blob:none \ + "file://$(pwd)/grep-partial-src" grep-partial && + + # All three blobs are missing immediately after a blobless clone. + git -C grep-partial rev-list --quiet --objects \ + --missing=print HEAD >missing && + test_line_count = 3 missing && + + # A pathspec-limited grep should prefetch only the two blobs + # in a/. It should fetch both blobs in one batched request. + GIT_TRACE2_EVENT="$(pwd)/grep-trace-pathspec" \ + git -C grep-partial grep -c "needle" HEAD -- "a/*.txt" >result && + + # Only a/matches.txt contains "needle" among the matched paths. + test_line_count = 1 result && + + # Exactly the two a/*.txt blobs should have been requested, and + # the server packed those two objects in the response. + test_trace2_data promisor fetch_count 2 missing && + test_line_count = 1 missing && + + # A second grep without a pathspec must recurse into both + # subdirectories, but should request only the still-missing blob + # from the promisor. + GIT_TRACE2_EVENT="$(pwd)/grep-trace-all" \ + git -C grep-partial grep -c "needle" HEAD >result && + + test_line_count = 2 result && + test_trace2_data promisor fetch_count 1 missing && + test_line_count = 0 missing +' + test_done From ef8d51a8a3e1c57201aa2c116ad27b0db580123a Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Thu, 14 May 2026 16:51:31 +0000 Subject: [PATCH 15/93] revision: use priority queue in limit_list() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit limit_list() maintains a date-sorted work queue of commits using a linked list with commit_list_insert_by_date() for insertion. Each insertion walks the list to find the right position — O(n) per insert. In repositories with merge-heavy histories, the symmetric difference can contain thousands of commits, making this O(n) insertion the dominant cost. Replace the sorted linked list with a prio_queue (binary heap). This gives O(log n) insertion and O(log n) extraction instead of O(n) insertion and O(1) extraction, which is a net win when the queue is large. The still_interesting() and everybody_uninteresting() helpers are updated to scan the prio_queue's contiguous array instead of walking a linked list. process_parents() already accepts both a commit_list and a prio_queue parameter, so the change in limit_list() simply switches which one is passed. Benchmark: git rev-list --left-right --count HEAD~N...HEAD Repository: 2.3M commits, merge-heavy DAG (monorepo) Best of 5 runs, times in seconds: commits in symmetric diff baseline patched speedup -------------- -------- ------- ------- 10 0.01 0.01 1.0x 50 0.01 0.01 1.0x 3751 21.23 8.49 2.5x 4524 21.70 8.29 2.6x 10130 20.10 6.65 3.0x No change for small traversals; 2.5-3.0x faster when the queue grows to thousands of commits. Signed-off-by: Kristofer Karlsson Signed-off-by: Junio C Hamano --- revision.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/revision.c b/revision.c index 599b3a66c369ca..2b1b3bb10e7ca1 100644 --- a/revision.c +++ b/revision.c @@ -473,10 +473,10 @@ static struct commit *handle_commit(struct rev_info *revs, die("%s is unknown object", name); } -static int everybody_uninteresting(struct commit_list *orig, +static int everybody_uninteresting(struct prio_queue *orig, struct commit **interesting_cache) { - struct commit_list *list = orig; + size_t i; if (*interesting_cache) { struct commit *commit = *interesting_cache; @@ -484,9 +484,8 @@ static int everybody_uninteresting(struct commit_list *orig, return 0; } - while (list) { - struct commit *commit = list->item; - list = list->next; + for (i = 0; i < orig->nr; i++) { + struct commit *commit = orig->array[i].data; if (commit->object.flags & UNINTERESTING) continue; @@ -1300,20 +1299,17 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs) /* How many extra uninteresting commits we want to see.. */ #define SLOP 5 -static int still_interesting(struct commit_list *src, timestamp_t date, int slop, +static int still_interesting(struct prio_queue *src, timestamp_t date, int slop, struct commit **interesting_cache) { /* - * No source list at all? We're definitely done.. + * Since src is sorted by date, it is enough to peek at the + * first entry to compare dates. No entry at all means done. */ - if (!src) + struct commit *commit = prio_queue_peek(src); + if (!commit) return 0; - - /* - * Does the destination list contain entries with a date - * before the source list? Definitely _not_ done. - */ - if (date <= src->item->date) + if (date <= commit->date) return SLOP; /* @@ -1451,6 +1447,7 @@ static int limit_list(struct rev_info *revs) struct commit_list *newlist = NULL; struct commit_list **p = &newlist; struct commit *interesting_cache = NULL; + struct prio_queue queue = { .compare = compare_commits_by_commit_date }; if (revs->ancestry_path_implicit_bottoms) { collect_bottom_commits(original_list, @@ -1461,6 +1458,11 @@ static int limit_list(struct rev_info *revs) while (original_list) { struct commit *commit = pop_commit(&original_list); + prio_queue_put(&queue, commit); + } + + while (queue.nr) { + struct commit *commit = prio_queue_get(&queue); struct object *obj = &commit->object; if (commit == interesting_cache) @@ -1468,11 +1470,13 @@ static int limit_list(struct rev_info *revs) if (revs->max_age != -1 && (commit->date < revs->max_age)) obj->flags |= UNINTERESTING; - if (process_parents(revs, commit, &original_list, NULL) < 0) + if (process_parents(revs, commit, NULL, &queue) < 0) { + clear_prio_queue(&queue); return -1; + } if (obj->flags & UNINTERESTING) { mark_parents_uninteresting(revs, commit); - slop = still_interesting(original_list, date, slop, &interesting_cache); + slop = still_interesting(&queue, date, slop, &interesting_cache); if (slop) continue; break; @@ -1509,7 +1513,7 @@ static int limit_list(struct rev_info *revs) } } - commit_list_free(original_list); + clear_prio_queue(&queue); revs->commits = newlist; return 0; } From 5f6744d3eba6264ce78c8b507b1a1d3c0e540c37 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 14 May 2026 13:37:34 -0500 Subject: [PATCH 16/93] odb: split `struct odb_transaction` into separate header The current ODB transaction interface is colocated with other ODB interfaces in "odb.{c,h}". Subsequent commits will expand `struct odb_transaction` to support write operations on the transaction directly. To keep things organized and prevent "odb.{c,h}" from becoming more unwieldy, split out `struct odb_transaction` into a separate header. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Makefile | 1 + builtin/add.c | 1 + builtin/unpack-objects.c | 1 + builtin/update-index.c | 1 + cache-tree.c | 1 + meson.build | 1 + object-file.c | 1 + odb.c | 25 ------------------------- odb.h | 31 ------------------------------- odb/transaction.c | 28 ++++++++++++++++++++++++++++ odb/transaction.h | 38 ++++++++++++++++++++++++++++++++++++++ read-cache.c | 1 + 12 files changed, 74 insertions(+), 56 deletions(-) create mode 100644 odb/transaction.c create mode 100644 odb/transaction.h diff --git a/Makefile b/Makefile index dbf00220541ce1..6342db13e53232 100644 --- a/Makefile +++ b/Makefile @@ -1219,6 +1219,7 @@ LIB_OBJS += odb.o LIB_OBJS += odb/source.o LIB_OBJS += odb/source-files.o LIB_OBJS += odb/streaming.o +LIB_OBJS += odb/transaction.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o LIB_OBJS += oidset.o diff --git a/builtin/add.c b/builtin/add.c index 7737ab878bfceb..c859f665199efa 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -16,6 +16,7 @@ #include "run-command.h" #include "object-file.h" #include "odb.h" +#include "odb/transaction.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 6fc64e9e4b8d5a..bc9b1e047e2e4e 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -9,6 +9,7 @@ #include "hex.h" #include "object-file.h" #include "odb.h" +#include "odb/transaction.h" #include "object.h" #include "delta.h" #include "pack.h" diff --git a/builtin/update-index.c b/builtin/update-index.c index 8a5907767bf297..bcc43852ef47aa 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -19,6 +19,7 @@ #include "tree-walk.h" #include "object-file.h" #include "odb.h" +#include "odb/transaction.h" #include "refs.h" #include "resolve-undo.h" #include "parse-options.h" diff --git a/cache-tree.c b/cache-tree.c index 60bcc07c3b8357..f056869cfdbcf1 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -10,6 +10,7 @@ #include "cache-tree.h" #include "object-file.h" #include "odb.h" +#include "odb/transaction.h" #include "read-cache-ll.h" #include "replace-object.h" #include "repository.h" diff --git a/meson.build b/meson.build index 8309942d184847..6dc23b3af2f387 100644 --- a/meson.build +++ b/meson.build @@ -405,6 +405,7 @@ libgit_sources = [ 'odb/source.c', 'odb/source-files.c', 'odb/streaming.c', + 'odb/transaction.c', 'oid-array.c', 'oidmap.c', 'oidset.c', diff --git a/object-file.c b/object-file.c index f0b029ff0b2cb0..bfbb632cf8b971 100644 --- a/object-file.c +++ b/object-file.c @@ -21,6 +21,7 @@ #include "object-file.h" #include "odb.h" #include "odb/streaming.h" +#include "odb/transaction.h" #include "oidtree.h" #include "pack.h" #include "packfile.h" diff --git a/odb.c b/odb.c index 350e23f3c0798d..8c3cbc1b53e11d 100644 --- a/odb.c +++ b/odb.c @@ -1069,28 +1069,3 @@ void odb_reprepare(struct object_database *o) obj_read_unlock(); } - -struct odb_transaction *odb_transaction_begin(struct object_database *odb) -{ - if (odb->transaction) - return NULL; - - odb->transaction = odb_transaction_files_begin(odb->sources); - - return odb->transaction; -} - -void odb_transaction_commit(struct odb_transaction *transaction) -{ - if (!transaction) - return; - - /* - * Ensure the transaction ending matches the pending transaction. - */ - ASSERT(transaction == transaction->source->odb->transaction); - - transaction->commit(transaction); - transaction->source->odb->transaction = NULL; - free(transaction); -} diff --git a/odb.h b/odb.h index 9aee260105ae54..ec5367b13ed8bc 100644 --- a/odb.h +++ b/odb.h @@ -35,24 +35,6 @@ struct packed_git; struct packfile_store; struct cached_object_entry; -/* - * A transaction may be started for an object database prior to writing new - * objects via odb_transaction_begin(). These objects are not committed until - * odb_transaction_commit() is invoked. Only a single transaction may be pending - * at a time. - * - * Each ODB source is expected to implement its own transaction handling. - */ -struct odb_transaction; -typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction); -struct odb_transaction { - /* The ODB source the transaction is opened against. */ - struct odb_source *source; - - /* The ODB source specific callback invoked to commit a transaction. */ - odb_transaction_commit_fn commit; -}; - /* * The object database encapsulates access to objects in a repository. It * manages one or more sources that store the actual objects which are @@ -154,19 +136,6 @@ void odb_close(struct object_database *o); */ void odb_reprepare(struct object_database *o); -/* - * Starts an ODB transaction. Subsequent objects are written to the transaction - * and not committed until odb_transaction_commit() is invoked on the - * transaction. If the ODB already has a pending transaction, NULL is returned. - */ -struct odb_transaction *odb_transaction_begin(struct object_database *odb); - -/* - * Commits an ODB transaction making the written objects visible. If the - * specified transaction is NULL, the function is a no-op. - */ -void odb_transaction_commit(struct odb_transaction *transaction); - /* * Find source by its object directory path. Returns a `NULL` pointer in case * the source could not be found. diff --git a/odb/transaction.c b/odb/transaction.c new file mode 100644 index 00000000000000..9bf3f347dcf261 --- /dev/null +++ b/odb/transaction.c @@ -0,0 +1,28 @@ +#include "git-compat-util.h" +#include "object-file.h" +#include "odb/transaction.h" + +struct odb_transaction *odb_transaction_begin(struct object_database *odb) +{ + if (odb->transaction) + return NULL; + + odb->transaction = odb_transaction_files_begin(odb->sources); + + return odb->transaction; +} + +void odb_transaction_commit(struct odb_transaction *transaction) +{ + if (!transaction) + return; + + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->source->odb->transaction); + + transaction->commit(transaction); + transaction->source->odb->transaction = NULL; + free(transaction); +} diff --git a/odb/transaction.h b/odb/transaction.h new file mode 100644 index 00000000000000..a56e392f217f4f --- /dev/null +++ b/odb/transaction.h @@ -0,0 +1,38 @@ +#ifndef ODB_TRANSACTION_H +#define ODB_TRANSACTION_H + +#include "odb.h" +#include "odb/source.h" + +/* + * A transaction may be started for an object database prior to writing new + * objects via odb_transaction_begin(). These objects are not committed until + * odb_transaction_commit() is invoked. Only a single transaction may be pending + * at a time. + * + * Each ODB source is expected to implement its own transaction handling. + */ +struct odb_transaction; +typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction); +struct odb_transaction { + /* The ODB source the transaction is opened against. */ + struct odb_source *source; + + /* The ODB source specific callback invoked to commit a transaction. */ + odb_transaction_commit_fn commit; +}; + +/* + * Starts an ODB transaction. Subsequent objects are written to the transaction + * and not committed until odb_transaction_commit() is invoked on the + * transaction. If the ODB already has a pending transaction, NULL is returned. + */ +struct odb_transaction *odb_transaction_begin(struct object_database *odb); + +/* + * Commits an ODB transaction making the written objects visible. If the + * specified transaction is NULL, the function is a no-op. + */ +void odb_transaction_commit(struct odb_transaction *transaction); + +#endif diff --git a/read-cache.c b/read-cache.c index 5049f9baca9c5e..8147c7e94a8b8e 100644 --- a/read-cache.c +++ b/read-cache.c @@ -20,6 +20,7 @@ #include "dir.h" #include "object-file.h" #include "odb.h" +#include "odb/transaction.h" #include "oid-array.h" #include "tree.h" #include "commit.h" From 10cdbc7423b2f5f9c666c48fe8a6e6e044595799 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 14 May 2026 13:37:35 -0500 Subject: [PATCH 17/93] odb/transaction: use pluggable `begin_transaction()` Each ODB source is expected to provide an ODB transaction implementation that should be used when starting a transaction. With d6fc6fe6f8 (odb/source: make `begin_transaction()` function pluggable, 2026-03-05), the `struct odb_source` now provides a pluggable callback for beginning transactions. Use the callback provided by the ODB source accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- odb/transaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/odb/transaction.c b/odb/transaction.c index 9bf3f347dcf261..592ac840759a07 100644 --- a/odb/transaction.c +++ b/odb/transaction.c @@ -1,5 +1,5 @@ #include "git-compat-util.h" -#include "object-file.h" +#include "odb/source.h" #include "odb/transaction.h" struct odb_transaction *odb_transaction_begin(struct object_database *odb) @@ -7,7 +7,7 @@ struct odb_transaction *odb_transaction_begin(struct object_database *odb) if (odb->transaction) return NULL; - odb->transaction = odb_transaction_files_begin(odb->sources); + odb_source_begin_transaction(odb->sources, &odb->transaction); return odb->transaction; } From 970f63519e494b590c807972af6c40477e14bc61 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 14 May 2026 13:37:36 -0500 Subject: [PATCH 18/93] odb: update `struct odb_write_stream` read() callback The `read()` callback used by `struct odb_write_stream` currently returns a pointer to an internal buffer along with the number of bytes read. This makes buffer ownership unclear and provides no way to report errors. Update the interface to instead require the caller to provide a buffer, and have the callback return the number of bytes written to it or a negative value on error. While at it, also move the `struct odb_write_stream` definition to "odb/streaming.h". Call sites are updated accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/unpack-objects.c | 20 ++++++++------------ object-file.c | 15 ++++++++++++--- odb.h | 6 +----- odb/streaming.c | 5 +++++ odb/streaming.h | 18 ++++++++++++++++++ 5 files changed, 44 insertions(+), 20 deletions(-) diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index bc9b1e047e2e4e..64e58e79fdac15 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -9,6 +9,7 @@ #include "hex.h" #include "object-file.h" #include "odb.h" +#include "odb/streaming.h" #include "odb/transaction.h" #include "object.h" #include "delta.h" @@ -360,24 +361,21 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size, struct input_zstream_data { git_zstream *zstream; - unsigned char buf[8192]; int status; }; -static const void *feed_input_zstream(struct odb_write_stream *in_stream, - unsigned long *readlen) +static ssize_t feed_input_zstream(struct odb_write_stream *in_stream, + unsigned char *buf, size_t buf_len) { struct input_zstream_data *data = in_stream->data; git_zstream *zstream = data->zstream; void *in = fill(1); - if (in_stream->is_finished) { - *readlen = 0; - return NULL; - } + if (in_stream->is_finished) + return 0; - zstream->next_out = data->buf; - zstream->avail_out = sizeof(data->buf); + zstream->next_out = buf; + zstream->avail_out = buf_len; zstream->next_in = in; zstream->avail_in = len; @@ -385,9 +383,7 @@ static const void *feed_input_zstream(struct odb_write_stream *in_stream, in_stream->is_finished = data->status != Z_OK; use(len - zstream->avail_in); - *readlen = sizeof(data->buf) - zstream->avail_out; - - return data->buf; + return buf_len - zstream->avail_out; } static void stream_blob(unsigned long size, unsigned nr) diff --git a/object-file.c b/object-file.c index bfbb632cf8b971..a1afca23c5e34b 100644 --- a/object-file.c +++ b/object-file.c @@ -1066,6 +1066,7 @@ int odb_source_loose_write_stream(struct odb_source *source, struct git_hash_ctx c, compat_c; struct strbuf tmp_file = STRBUF_INIT; struct strbuf filename = STRBUF_INIT; + unsigned char buf[8192]; int dirlen; char hdr[MAX_HEADER_LEN]; int hdrlen; @@ -1098,9 +1099,17 @@ int odb_source_loose_write_stream(struct odb_source *source, unsigned char *in0 = stream.next_in; if (!stream.avail_in && !in_stream->is_finished) { - const void *in = in_stream->read(in_stream, &stream.avail_in); - stream.next_in = (void *)in; - in0 = (unsigned char *)in; + ssize_t read_len = odb_write_stream_read(in_stream, buf, + sizeof(buf)); + if (read_len < 0) { + close(fd); + err = -1; + goto cleanup; + } + + stream.avail_in = read_len; + stream.next_in = buf; + in0 = buf; /* All data has been read. */ if (in_stream->is_finished) flush = 1; diff --git a/odb.h b/odb.h index ec5367b13ed8bc..6faeaa05891d7b 100644 --- a/odb.h +++ b/odb.h @@ -529,11 +529,7 @@ static inline int odb_write_object(struct object_database *odb, return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0); } -struct odb_write_stream { - const void *(*read)(struct odb_write_stream *, unsigned long *len); - void *data; - int is_finished; -}; +struct odb_write_stream; int odb_write_object_stream(struct object_database *odb, struct odb_write_stream *stream, size_t len, diff --git a/odb/streaming.c b/odb/streaming.c index 5927a12954ba59..a68dd2cbe37821 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -232,6 +232,11 @@ struct odb_read_stream *odb_read_stream_open(struct object_database *odb, return st; } +ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz) +{ + return st->read(st, buf, sz); +} + int odb_stream_blob_to_fd(struct object_database *odb, int fd, const struct object_id *oid, diff --git a/odb/streaming.h b/odb/streaming.h index c7861f7e13c606..65ced911fecd69 100644 --- a/odb/streaming.h +++ b/odb/streaming.h @@ -47,6 +47,24 @@ int odb_read_stream_close(struct odb_read_stream *stream); */ ssize_t odb_read_stream_read(struct odb_read_stream *stream, void *buf, size_t len); +/* + * A stream that provides an object to be written to the object database without + * loading all of it into memory. + */ +struct odb_write_stream { + ssize_t (*read)(struct odb_write_stream *, unsigned char *, size_t); + void *data; + int is_finished; +}; + +/* + * Read data from the stream into the buffer. Returns 0 when finished and the + * number of bytes read on success. Returns a negative error code in case + * reading from the stream fails. + */ +ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf, + size_t len); + /* * Look up the object by its ID and write the full contents to the file * descriptor. The object must be a blob, or the function will fail. When From 8a1f5ecf287bf73c2dee102d726709f444b77c44 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 14 May 2026 13:37:37 -0500 Subject: [PATCH 19/93] object-file: remove flags from transaction packfile writes The `index_blob_packfile_transaction()` function handles streaming a blob from an fd to compute its object ID and conditionally writes the object directly to a packfile if the INDEX_WRITE_OBJECT flag is set. A subsequent commit will make these packfile object writes part of the transaction interface. Consequently, having the object write be conditional on this flag is a bit awkward. In preparation for this change, introduce a dedicated `hash_blob_stream()` helper that only computes the OID from a `struct odb_write_stream`. This is invoked by `index_fd()` instead when the INDEX_WRITE_OBJECT is not set. The object write performed via `index_blob_packfile_transaction()` is made unconditional accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 132 +++++++++++++++++++++++++++++------------------- odb/streaming.c | 46 +++++++++++++++++ odb/streaming.h | 12 +++++ 3 files changed, 138 insertions(+), 52 deletions(-) diff --git a/object-file.c b/object-file.c index a1afca23c5e34b..a59030911fff05 100644 --- a/object-file.c +++ b/object-file.c @@ -1397,11 +1397,10 @@ static int already_written(struct odb_transaction_files *transaction, } /* Lazily create backing packfile for the state */ -static void prepare_packfile_transaction(struct odb_transaction_files *transaction, - unsigned flags) +static void prepare_packfile_transaction(struct odb_transaction_files *transaction) { struct transaction_packfile *state = &transaction->packfile; - if (!(flags & INDEX_WRITE_OBJECT) || state->f) + if (state->f) return; state->f = create_tmp_packfile(transaction->base.source->odb->repo, @@ -1414,6 +1413,39 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti die_errno("unable to write pack header"); } +static int hash_blob_stream(struct odb_write_stream *stream, + const struct git_hash_algo *hash_algo, + struct object_id *result_oid, size_t size) +{ + unsigned char buf[16384]; + struct git_hash_ctx ctx; + unsigned header_len; + size_t bytes_hashed = 0; + + header_len = format_object_header((char *)buf, sizeof(buf), + OBJ_BLOB, size); + hash_algo->init_fn(&ctx); + git_hash_update(&ctx, buf, header_len); + + while (!stream->is_finished) { + ssize_t read_result = odb_write_stream_read(stream, buf, + sizeof(buf)); + + if (read_result < 0) + return -1; + + git_hash_update(&ctx, buf, read_result); + bytes_hashed += read_result; + } + + if (bytes_hashed != size) + return -1; + + git_hash_final_oid(result_oid, &ctx); + + return 0; +} + /* * Read the contents from fd for size bytes, streaming it to the * packfile in state while updating the hash in ctx. Signal a failure @@ -1431,15 +1463,13 @@ static void prepare_packfile_transaction(struct odb_transaction_files *transacti */ static int stream_blob_to_pack(struct transaction_packfile *state, struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path, - unsigned flags) + int fd, size_t size, const char *path) { git_zstream s; unsigned char ibuf[16384]; unsigned char obuf[16384]; unsigned hdrlen; int status = Z_OK; - int write_object = (flags & INDEX_WRITE_OBJECT); off_t offset = 0; git_deflate_init(&s, pack_compression_level); @@ -1474,20 +1504,18 @@ static int stream_blob_to_pack(struct transaction_packfile *state, status = git_deflate(&s, size ? 0 : Z_FINISH); if (!s.avail_out || status == Z_STREAM_END) { - if (write_object) { - size_t written = s.next_out - obuf; - - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - - hashwrite(state->f, obuf, written); - state->offset += written; + size_t written = s.next_out - obuf; + + /* would we bust the size limit? */ + if (state->nr_written && + pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + written) { + git_deflate_abort(&s); + return -1; } + + hashwrite(state->f, obuf, written); + state->offset += written; s.next_out = obuf; s.avail_out = sizeof(obuf); } @@ -1575,8 +1603,7 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction */ static int index_blob_packfile_transaction(struct odb_transaction_files *transaction, struct object_id *result_oid, int fd, - size_t size, const char *path, - unsigned flags) + size_t size, const char *path) { struct transaction_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; @@ -1584,7 +1611,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac unsigned char obuf[16384]; unsigned header_len; struct hashfile_checkpoint checkpoint; - struct pack_idx_entry *idx = NULL; + struct pack_idx_entry *idx; seekback = lseek(fd, 0, SEEK_CUR); if (seekback == (off_t)-1) @@ -1595,33 +1622,26 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac transaction->base.source->odb->repo->hash_algo->init_fn(&ctx); git_hash_update(&ctx, obuf, header_len); - /* Note: idx is non-NULL when we are writing */ - if ((flags & INDEX_WRITE_OBJECT) != 0) { - CALLOC_ARRAY(idx, 1); - - prepare_packfile_transaction(transaction, flags); - hashfile_checkpoint_init(state->f, &checkpoint); - } + CALLOC_ARRAY(idx, 1); + prepare_packfile_transaction(transaction); + hashfile_checkpoint_init(state->f, &checkpoint); already_hashed_to = 0; while (1) { - prepare_packfile_transaction(transaction, flags); - if (idx) { - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - } + prepare_packfile_transaction(transaction); + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path, flags)) + fd, size, path)) break; /* * Writing this object to the current pack will make * it too big; we need to truncate it, start a new * pack, and write into it. */ - if (!idx) - BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; flush_packfile_transaction(transaction); @@ -1629,8 +1649,6 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac return error("cannot seek back"); } git_hash_final_oid(result_oid, &ctx); - if (!idx) - return 0; idx->crc32 = crc32_end(state->f); if (already_written(transaction, result_oid)) { @@ -1668,18 +1686,28 @@ int index_fd(struct index_state *istate, struct object_id *oid, ret = index_core(istate, oid, fd, xsize_t(st->st_size), type, path, flags); } else { - struct object_database *odb = the_repository->objects; - struct odb_transaction_files *files_transaction; - struct odb_transaction *transaction; - - transaction = odb_transaction_begin(odb); - files_transaction = container_of(odb->transaction, - struct odb_transaction_files, - base); - ret = index_blob_packfile_transaction(files_transaction, oid, fd, - xsize_t(st->st_size), - path, flags); - odb_transaction_commit(transaction); + struct odb_write_stream stream; + odb_write_stream_from_fd(&stream, fd, xsize_t(st->st_size)); + + if (flags & INDEX_WRITE_OBJECT) { + struct object_database *odb = the_repository->objects; + struct odb_transaction_files *files_transaction; + struct odb_transaction *transaction; + + transaction = odb_transaction_begin(odb); + files_transaction = container_of(odb->transaction, + struct odb_transaction_files, + base); + ret = index_blob_packfile_transaction(files_transaction, oid, fd, + xsize_t(st->st_size), path); + odb_transaction_commit(transaction); + } else { + ret = hash_blob_stream(&stream, + the_repository->hash_algo, oid, + xsize_t(st->st_size)); + } + + odb_write_stream_release(&stream); } close(fd); diff --git a/odb/streaming.c b/odb/streaming.c index a68dd2cbe37821..20531e864c9561 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -237,6 +237,11 @@ ssize_t odb_write_stream_read(struct odb_write_stream *st, void *buf, size_t sz) return st->read(st, buf, sz); } +void odb_write_stream_release(struct odb_write_stream *st) +{ + free(st->data); +} + int odb_stream_blob_to_fd(struct object_database *odb, int fd, const struct object_id *oid, @@ -292,3 +297,44 @@ int odb_stream_blob_to_fd(struct object_database *odb, odb_read_stream_close(st); return result; } + +struct read_object_fd_data { + int fd; + size_t remaining; +}; + +static ssize_t read_object_fd(struct odb_write_stream *stream, + unsigned char *buf, size_t len) +{ + struct read_object_fd_data *data = stream->data; + ssize_t read_result; + size_t count; + + if (stream->is_finished) + return 0; + + count = data->remaining < len ? data->remaining : len; + read_result = read_in_full(data->fd, buf, count); + if (read_result < 0 || (size_t)read_result != count) + return -1; + + data->remaining -= count; + if (!data->remaining) + stream->is_finished = 1; + + return read_result; +} + +void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd, + size_t size) +{ + struct read_object_fd_data *data; + + CALLOC_ARRAY(data, 1); + data->fd = fd; + data->remaining = size; + + stream->data = data; + stream->read = read_object_fd; + stream->is_finished = 0; +} diff --git a/odb/streaming.h b/odb/streaming.h index 65ced911fecd69..2a8cac19a43c00 100644 --- a/odb/streaming.h +++ b/odb/streaming.h @@ -5,6 +5,7 @@ #define STREAMING_H 1 #include "object.h" +#include "odb.h" struct object_database; struct odb_read_stream; @@ -65,6 +66,11 @@ struct odb_write_stream { ssize_t odb_write_stream_read(struct odb_write_stream *stream, void *buf, size_t len); +/* + * Releases memory allocated for underlying stream data. + */ +void odb_write_stream_release(struct odb_write_stream *stream); + /* * Look up the object by its ID and write the full contents to the file * descriptor. The object must be a blob, or the function will fail. When @@ -82,4 +88,10 @@ int odb_stream_blob_to_fd(struct object_database *odb, struct stream_filter *filter, int can_seek); +/* + * Sets up an ODB write stream that reads from an fd. + */ +void odb_write_stream_from_fd(struct odb_write_stream *stream, int fd, + size_t size); + #endif /* STREAMING_H */ From d4c92e2ac975f256ccc207c65bf46e3be75a2115 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 14 May 2026 13:37:38 -0500 Subject: [PATCH 20/93] object-file: avoid fd seekback by checking object size upfront In certain scenarios, Git handles writing blobs that exceed "core.bigFileThreshold" differently by streaming the object directly into a packfile. When there is an active ODB transaction, these blobs are streamed to the same packfile instead of using a separate packfile for each. If "pack.packSizeLimit" is configured and streaming another object causes the packfile to exceed the configured limit, the packfile is truncated back to the previous object and the object write is restarted in a new packfile. This works fine, but requires the fd being read from to save a checkpoint so it becomes possible to rewind the input source via seeking back to a known offset at the beginning. In a subsequent commit, blob streaming is converted to use `struct odb_write_stream` as a more generic input source instead of an fd which doesn't provide a mechanism for rewinding. For this use case though, rewinding the fd is not strictly necessary because the inflated size of the object is known and can be used to approximate whether writing the object would cause the packfile to exceed the configured limit prior to writing anything. These blobs written to the packfile are never deltified thus the size difference between what is written versus the inflated size is due to zlib compression. While this does prevent packfiles from being filled to the potential maximum is some cases, it should be good enough and still prevents the packfile from exceeding any configured limit. Use the inflated blob size to determine whether writing an object to a packfile will exceed the configured "pack.packSizeLimit". Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 86 +++++++++++++++------------------------------------ 1 file changed, 25 insertions(+), 61 deletions(-) diff --git a/object-file.c b/object-file.c index a59030911fff05..6d7afdb7239e9e 100644 --- a/object-file.c +++ b/object-file.c @@ -1448,29 +1448,17 @@ static int hash_blob_stream(struct odb_write_stream *stream, /* * Read the contents from fd for size bytes, streaming it to the - * packfile in state while updating the hash in ctx. Signal a failure - * by returning a negative value when the resulting pack would exceed - * the pack size limit and this is not the first object in the pack, - * so that the caller can discard what we wrote from the current pack - * by truncating it and opening a new one. The caller will then call - * us again after rewinding the input fd. - * - * The already_hashed_to pointer is kept untouched by the caller to - * make sure we do not hash the same byte when we are called - * again. This way, the caller does not have to checkpoint its hash - * status before calling us just in case we ask it to call us again - * with a new pack. + * packfile in state while updating the hash in ctx. */ -static int stream_blob_to_pack(struct transaction_packfile *state, - struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path) +static void stream_blob_to_pack(struct transaction_packfile *state, + struct git_hash_ctx *ctx, int fd, size_t size, + const char *path) { git_zstream s; unsigned char ibuf[16384]; unsigned char obuf[16384]; unsigned hdrlen; int status = Z_OK; - off_t offset = 0; git_deflate_init(&s, pack_compression_level); @@ -1487,15 +1475,9 @@ static int stream_blob_to_pack(struct transaction_packfile *state, if ((size_t)read_result != rsize) die("failed to read %u bytes from '%s'", (unsigned)rsize, path); - offset += rsize; - if (*already_hashed_to < offset) { - size_t hsize = offset - *already_hashed_to; - if (rsize < hsize) - hsize = rsize; - if (hsize) - git_hash_update(ctx, ibuf, hsize); - *already_hashed_to = offset; - } + + git_hash_update(ctx, ibuf, rsize); + s.next_in = ibuf; s.avail_in = rsize; size -= rsize; @@ -1506,14 +1488,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state, if (!s.avail_out || status == Z_STREAM_END) { size_t written = s.next_out - obuf; - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - hashwrite(state->f, obuf, written); state->offset += written; s.next_out = obuf; @@ -1530,7 +1504,6 @@ static int stream_blob_to_pack(struct transaction_packfile *state, } } git_deflate_end(&s); - return 0; } static void flush_packfile_transaction(struct odb_transaction_files *transaction) @@ -1606,48 +1579,39 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac size_t size, const char *path) { struct transaction_packfile *state = &transaction->packfile; - off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; unsigned header_len; struct hashfile_checkpoint checkpoint; struct pack_idx_entry *idx; - seekback = lseek(fd, 0, SEEK_CUR); - if (seekback == (off_t)-1) - return error("cannot find the current offset"); - header_len = format_object_header((char *)obuf, sizeof(obuf), OBJ_BLOB, size); transaction->base.source->odb->repo->hash_algo->init_fn(&ctx); git_hash_update(&ctx, obuf, header_len); + /* + * If writing another object to the packfile could result in it + * exceeding the configured size limit, flush the current packfile + * transaction. + * + * Note that this uses the inflated object size as an approximation. + * Blob objects written in this manner are not delta-compressed, so + * the difference between the inflated and on-disk size is limited + * to zlib compression and is sufficient for this check. + */ + if (state->nr_written && pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + size) + flush_packfile_transaction(transaction); + CALLOC_ARRAY(idx, 1); prepare_packfile_transaction(transaction); hashfile_checkpoint_init(state->f, &checkpoint); - already_hashed_to = 0; - - while (1) { - prepare_packfile_transaction(transaction); - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - - if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path)) - break; - /* - * Writing this object to the current pack will make - * it too big; we need to truncate it, start a new - * pack, and write into it. - */ - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - flush_packfile_transaction(transaction); - if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) - return error("cannot seek back"); - } + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + stream_blob_to_pack(state, &ctx, fd, size, path); git_hash_final_oid(result_oid, &ctx); idx->crc32 = crc32_end(state->f); From 45a75d6187dd8e85470e70aaada6346576333370 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 14 May 2026 13:37:39 -0500 Subject: [PATCH 21/93] object-file: generalize packfile writes to use odb_write_stream The `index_blob_packfile_transaction()` function streams blob data directly from an fd. This makes it difficult to reuse as part of a generic transactional object writing interface. Refactor the packfile write path to operate on a `struct odb_write_stream`, allowing callers to supply data from arbitrary sources. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 56 +++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/object-file.c b/object-file.c index 6d7afdb7239e9e..0d492e6962d2ae 100644 --- a/object-file.c +++ b/object-file.c @@ -1447,18 +1447,19 @@ static int hash_blob_stream(struct odb_write_stream *stream, } /* - * Read the contents from fd for size bytes, streaming it to the + * Read the contents from the stream provided, streaming it to the * packfile in state while updating the hash in ctx. */ static void stream_blob_to_pack(struct transaction_packfile *state, - struct git_hash_ctx *ctx, int fd, size_t size, - const char *path) + struct git_hash_ctx *ctx, size_t size, + struct odb_write_stream *stream) { git_zstream s; unsigned char ibuf[16384]; unsigned char obuf[16384]; unsigned hdrlen; int status = Z_OK; + size_t bytes_read = 0; git_deflate_init(&s, pack_compression_level); @@ -1467,23 +1468,21 @@ static void stream_blob_to_pack(struct transaction_packfile *state, s.avail_out = sizeof(obuf) - hdrlen; while (status != Z_STREAM_END) { - if (size && !s.avail_in) { - size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); - ssize_t read_result = read_in_full(fd, ibuf, rsize); - if (read_result < 0) - die_errno("failed to read from '%s'", path); - if ((size_t)read_result != rsize) - die("failed to read %u bytes from '%s'", - (unsigned)rsize, path); + if (!stream->is_finished && !s.avail_in) { + ssize_t rsize = odb_write_stream_read(stream, ibuf, + sizeof(ibuf)); + + if (rsize < 0) + die("failed to read blob data"); git_hash_update(ctx, ibuf, rsize); s.next_in = ibuf; s.avail_in = rsize; - size -= rsize; + bytes_read += rsize; } - status = git_deflate(&s, size ? 0 : Z_FINISH); + status = git_deflate(&s, stream->is_finished ? Z_FINISH : 0); if (!s.avail_out || status == Z_STREAM_END) { size_t written = s.next_out - obuf; @@ -1503,6 +1502,11 @@ static void stream_blob_to_pack(struct transaction_packfile *state, die("unexpected deflate failure: %d", status); } } + + if (bytes_read != size) + die("read %" PRIuMAX " bytes of blob data, but expected %" PRIuMAX " bytes", + (uintmax_t)bytes_read, (uintmax_t)size); + git_deflate_end(&s); } @@ -1574,10 +1578,13 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -static int index_blob_packfile_transaction(struct odb_transaction_files *transaction, - struct object_id *result_oid, int fd, - size_t size, const char *path) +static int index_blob_packfile_transaction(struct odb_transaction *base, + struct odb_write_stream *stream, + size_t size, struct object_id *result_oid) { + struct odb_transaction_files *transaction = container_of(base, + struct odb_transaction_files, + base); struct transaction_packfile *state = &transaction->packfile; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -1611,7 +1618,7 @@ static int index_blob_packfile_transaction(struct odb_transaction_files *transac hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; crc32_begin(state->f); - stream_blob_to_pack(state, &ctx, fd, size, path); + stream_blob_to_pack(state, &ctx, size, stream); git_hash_final_oid(result_oid, &ctx); idx->crc32 = crc32_end(state->f); @@ -1655,15 +1662,12 @@ int index_fd(struct index_state *istate, struct object_id *oid, if (flags & INDEX_WRITE_OBJECT) { struct object_database *odb = the_repository->objects; - struct odb_transaction_files *files_transaction; - struct odb_transaction *transaction; - - transaction = odb_transaction_begin(odb); - files_transaction = container_of(odb->transaction, - struct odb_transaction_files, - base); - ret = index_blob_packfile_transaction(files_transaction, oid, fd, - xsize_t(st->st_size), path); + struct odb_transaction *transaction = odb_transaction_begin(odb); + + ret = index_blob_packfile_transaction(odb->transaction, + &stream, + xsize_t(st->st_size), + oid); odb_transaction_commit(transaction); } else { ret = hash_blob_stream(&stream, From 08b6afb2a2dbf762e3d9fa7abd78090b9afbd1a8 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Thu, 14 May 2026 13:37:40 -0500 Subject: [PATCH 22/93] odb/transaction: make `write_object_stream()` pluggable How an ODB transaction handles writing objects is expected to vary between implementations. Introduce a new `write_object_stream()` callback in `struct odb_transaction` to make this function pluggable. Rename `index_blob_packfile_transaction()` to `odb_transaction_files_write_object_stream()` and wire it up for use with `struct odb_transaction_files` accordingly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 16 +++++++++------- odb/transaction.c | 7 +++++++ odb/transaction.h | 25 ++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/object-file.c b/object-file.c index 0d492e6962d2ae..23f665df9091cc 100644 --- a/object-file.c +++ b/object-file.c @@ -1578,9 +1578,10 @@ static void flush_packfile_transaction(struct odb_transaction_files *transaction * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -static int index_blob_packfile_transaction(struct odb_transaction *base, - struct odb_write_stream *stream, - size_t size, struct object_id *result_oid) +static int odb_transaction_files_write_object_stream(struct odb_transaction *base, + struct odb_write_stream *stream, + size_t size, + struct object_id *result_oid) { struct odb_transaction_files *transaction = container_of(base, struct odb_transaction_files, @@ -1664,10 +1665,10 @@ int index_fd(struct index_state *istate, struct object_id *oid, struct object_database *odb = the_repository->objects; struct odb_transaction *transaction = odb_transaction_begin(odb); - ret = index_blob_packfile_transaction(odb->transaction, - &stream, - xsize_t(st->st_size), - oid); + ret = odb_transaction_write_object_stream(odb->transaction, + &stream, + xsize_t(st->st_size), + oid); odb_transaction_commit(transaction); } else { ret = hash_blob_stream(&stream, @@ -2132,6 +2133,7 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source) transaction = xcalloc(1, sizeof(*transaction)); transaction->base.source = source; transaction->base.commit = odb_transaction_files_commit; + transaction->base.write_object_stream = odb_transaction_files_write_object_stream; return &transaction->base; } diff --git a/odb/transaction.c b/odb/transaction.c index 592ac840759a07..b16e07aebfc5ac 100644 --- a/odb/transaction.c +++ b/odb/transaction.c @@ -26,3 +26,10 @@ void odb_transaction_commit(struct odb_transaction *transaction) transaction->source->odb->transaction = NULL; free(transaction); } + +int odb_transaction_write_object_stream(struct odb_transaction *transaction, + struct odb_write_stream *stream, + size_t len, struct object_id *oid) +{ + return transaction->write_object_stream(transaction, stream, len, oid); +} diff --git a/odb/transaction.h b/odb/transaction.h index a56e392f217f4f..854fda06f576e4 100644 --- a/odb/transaction.h +++ b/odb/transaction.h @@ -12,14 +12,24 @@ * * Each ODB source is expected to implement its own transaction handling. */ -struct odb_transaction; -typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction); struct odb_transaction { /* The ODB source the transaction is opened against. */ struct odb_source *source; /* The ODB source specific callback invoked to commit a transaction. */ - odb_transaction_commit_fn commit; + void (*commit)(struct odb_transaction *transaction); + + /* + * This callback is expected to write the given object stream into + * the ODB transaction. Note that for now, only blobs support streaming. + * + * The resulting object ID shall be written into the out pointer. The + * callback is expected to return 0 on success, a negative error code + * otherwise. + */ + int (*write_object_stream)(struct odb_transaction *transaction, + struct odb_write_stream *stream, size_t len, + struct object_id *oid); }; /* @@ -35,4 +45,13 @@ struct odb_transaction *odb_transaction_begin(struct object_database *odb); */ void odb_transaction_commit(struct odb_transaction *transaction); +/* + * Writes the object in the provided stream into the transaction. The resulting + * object ID is written into the out pointer. Returns 0 on success, a negative + * error code otherwise. + */ +int odb_transaction_write_object_stream(struct odb_transaction *transaction, + struct odb_write_stream *stream, + size_t len, struct object_id *oid); + #endif From 822d403651aa6baff064095f98d8d8349d876eb8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:31 +0200 Subject: [PATCH 23/93] odb: introduce "in-memory" source Next to our typical object database sources, each object database also has an implicit source of "cached" objects. These cached objects only exist in memory and some use cases: - They contain evergreen objects that we expect to always exist, like for example the empty tree. - They can be used to store temporary objects that we don't want to persist to disk, which is used by git-blame(1) to create a fake worktree commit. Overall, their use is somewhat restricted though. For example, we don't provide the ability to use it as a temporary object database source that allows the user to write objects, but discard them after Git exists. So while these cached objects behave almost like a source, they aren't used as one. This is about to change over the following commits, where we will turn cached objects into a new "in-memory" source. This will allow us to use it exactly the same as any other source by providing the same common interface as the "files" source. For now, the in-memory source only hosts the cached objects and doesn't provide any logic yet. This will change with subsequent commits, where we move respective functionality into the source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + meson.build | 1 + odb.c | 21 +++++++++++++-------- odb.h | 4 ++-- odb/source-inmemory.c | 12 ++++++++++++ odb/source-inmemory.h | 35 +++++++++++++++++++++++++++++++++++ odb/source.h | 3 +++ 7 files changed, 67 insertions(+), 10 deletions(-) create mode 100644 odb/source-inmemory.c create mode 100644 odb/source-inmemory.h diff --git a/Makefile b/Makefile index 22a8993482b7bd..3cda12c4556a6f 100644 --- a/Makefile +++ b/Makefile @@ -1218,6 +1218,7 @@ LIB_OBJS += object.o LIB_OBJS += odb.o LIB_OBJS += odb/source.o LIB_OBJS += odb/source-files.o +LIB_OBJS += odb/source-inmemory.o LIB_OBJS += odb/streaming.o LIB_OBJS += odb/transaction.o LIB_OBJS += oid-array.o diff --git a/meson.build b/meson.build index 6dc23b3af2f387..ffa73ce7ce811c 100644 --- a/meson.build +++ b/meson.build @@ -404,6 +404,7 @@ libgit_sources = [ 'odb.c', 'odb/source.c', 'odb/source-files.c', + 'odb/source-inmemory.c', 'odb/streaming.c', 'odb/transaction.c', 'oid-array.c', diff --git a/odb.c b/odb.c index 40a5e9c4e0ae53..60e1eead25602a 100644 --- a/odb.c +++ b/odb.c @@ -14,6 +14,7 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/source-inmemory.h" #include "packfile.h" #include "path.h" #include "promisor-remote.h" @@ -53,9 +54,9 @@ static const struct cached_object *find_cached_object(struct object_database *ob .type = OBJ_TREE, .buf = "", }; - const struct cached_object_entry *co = object_store->cached_objects; + const struct cached_object_entry *co = object_store->inmemory_objects->objects; - for (size_t i = 0; i < object_store->cached_object_nr; i++, co++) + for (size_t i = 0; i < object_store->inmemory_objects->objects_nr; i++, co++) if (oideq(&co->oid, oid)) return &co->value; @@ -792,9 +793,10 @@ int odb_pretend_object(struct object_database *odb, find_cached_object(odb, oid)) return 0; - ALLOC_GROW(odb->cached_objects, - odb->cached_object_nr + 1, odb->cached_object_alloc); - co = &odb->cached_objects[odb->cached_object_nr++]; + ALLOC_GROW(odb->inmemory_objects->objects, + odb->inmemory_objects->objects_nr + 1, + odb->inmemory_objects->objects_alloc); + co = &odb->inmemory_objects->objects[odb->inmemory_objects->objects_nr++]; co->value.size = len; co->value.type = type; co_buf = xmalloc(len); @@ -1083,6 +1085,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); + o->inmemory_objects = odb_source_inmemory_new(o); free(to_free); @@ -1123,9 +1126,11 @@ void odb_free(struct object_database *o) odb_close(o); odb_free_sources(o); - for (size_t i = 0; i < o->cached_object_nr; i++) - free((char *) o->cached_objects[i].value.buf); - free(o->cached_objects); + for (size_t i = 0; i < o->inmemory_objects->objects_nr; i++) + free((char *) o->inmemory_objects->objects[i].value.buf); + free(o->inmemory_objects->objects); + free(o->inmemory_objects->base.path); + free(o->inmemory_objects); string_list_clear(&o->submodule_source_paths, 0); diff --git a/odb.h b/odb.h index 9eb8355aca540b..c3a7edf9c848dd 100644 --- a/odb.h +++ b/odb.h @@ -8,6 +8,7 @@ #include "thread-utils.h" struct cached_object_entry; +struct odb_source_inmemory; struct packed_git; struct repository; struct strbuf; @@ -80,8 +81,7 @@ struct object_database { * to write them into the object store (e.g. a browse-only * application). */ - struct cached_object_entry *cached_objects; - size_t cached_object_nr, cached_object_alloc; + struct odb_source_inmemory *inmemory_objects; /* * A fast, rough count of the number of objects in the repository. diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c new file mode 100644 index 00000000000000..c7ac5c24f08e44 --- /dev/null +++ b/odb/source-inmemory.c @@ -0,0 +1,12 @@ +#include "git-compat-util.h" +#include "odb/source-inmemory.h" + +struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) +{ + struct odb_source_inmemory *source; + + CALLOC_ARRAY(source, 1); + odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); + + return source; +} diff --git a/odb/source-inmemory.h b/odb/source-inmemory.h new file mode 100644 index 00000000000000..15db068ef70bb2 --- /dev/null +++ b/odb/source-inmemory.h @@ -0,0 +1,35 @@ +#ifndef ODB_SOURCE_INMEMORY_H +#define ODB_SOURCE_INMEMORY_H + +#include "odb/source.h" + +struct cached_object_entry; + +/* + * An in-memory source that you can write objects to that shall be made + * available for reading, but that shouldn't ever be persisted to disk. Note + * that any objects written to this source will be stored in memory, so the + * number of objects you can store is limited by available system memory. + */ +struct odb_source_inmemory { + struct odb_source base; + + struct cached_object_entry *objects; + size_t objects_nr, objects_alloc; +}; + +/* Create a new in-memory object database source. */ +struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb); + +/* + * Cast the given object database source to the in-memory backend. This will + * cause a BUG in case the source doesn't use this backend. + */ +static inline struct odb_source_inmemory *odb_source_inmemory_downcast(struct odb_source *source) +{ + if (source->type != ODB_SOURCE_INMEMORY) + BUG("trying to downcast source of type '%d' to in-memory", source->type); + return container_of(source, struct odb_source_inmemory, base); +} + +#endif diff --git a/odb/source.h b/odb/source.h index f706e0608a4855..0a440884e4f0ab 100644 --- a/odb/source.h +++ b/odb/source.h @@ -13,6 +13,9 @@ enum odb_source_type { /* The "files" backend that uses loose objects and packfiles. */ ODB_SOURCE_FILES, + + /* The "in-memory" backend that stores objects in memory. */ + ODB_SOURCE_INMEMORY, }; struct object_id; From 8caa2e090f1b83df7c0fc82ed7f7c8772f3ec5f4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:32 +0200 Subject: [PATCH 24/93] odb/source-inmemory: implement `free()` callback Implement the `free()` callback function for the "in-memory" source. Note that this requires us to define `struct cached_object_entry` in "odb/source-inmemory.h", as it is accessed in both "odb.c" and "odb/source-inmemory.c" now. This will be fixed in subsequent commits though. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 25 ++++--------------------- odb/source-inmemory.c | 12 ++++++++++++ odb/source-inmemory.h | 9 ++++++++- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/odb.c b/odb.c index 60e1eead25602a..1d65825ed3978a 100644 --- a/odb.c +++ b/odb.c @@ -32,21 +32,6 @@ KHASH_INIT(odb_path_map, const char * /* key: odb_path */, struct odb_source *, 1, fspathhash, fspatheq) -/* - * This is meant to hold a *small* number of objects that you would - * want odb_read_object() to be able to return, but yet you do not want - * to write them into the object store (e.g. a browse-only - * application). - */ -struct cached_object_entry { - struct object_id oid; - struct cached_object { - enum object_type type; - const void *buf; - unsigned long size; - } value; -}; - static const struct cached_object *find_cached_object(struct object_database *object_store, const struct object_id *oid) { @@ -1109,6 +1094,10 @@ static void odb_free_sources(struct object_database *o) odb_source_free(o->sources); o->sources = next; } + + odb_source_free(&o->inmemory_objects->base); + o->inmemory_objects = NULL; + kh_destroy_odb_path_map(o->source_by_path); o->source_by_path = NULL; } @@ -1126,12 +1115,6 @@ void odb_free(struct object_database *o) odb_close(o); odb_free_sources(o); - for (size_t i = 0; i < o->inmemory_objects->objects_nr; i++) - free((char *) o->inmemory_objects->objects[i].value.buf); - free(o->inmemory_objects->objects); - free(o->inmemory_objects->base.path); - free(o->inmemory_objects); - string_list_clear(&o->submodule_source_paths, 0); free(o); diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index c7ac5c24f08e44..ccbb622eaef031 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,6 +1,16 @@ #include "git-compat-util.h" #include "odb/source-inmemory.h" +static void odb_source_inmemory_free(struct odb_source *source) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + for (size_t i = 0; i < inmemory->objects_nr; i++) + free((char *) inmemory->objects[i].value.buf); + free(inmemory->objects); + free(inmemory->base.path); + free(inmemory); +} + struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) { struct odb_source_inmemory *source; @@ -8,5 +18,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) CALLOC_ARRAY(source, 1); odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); + source->base.free = odb_source_inmemory_free; + return source; } diff --git a/odb/source-inmemory.h b/odb/source-inmemory.h index 15db068ef70bb2..d1b05a3996468a 100644 --- a/odb/source-inmemory.h +++ b/odb/source-inmemory.h @@ -3,7 +3,14 @@ #include "odb/source.h" -struct cached_object_entry; +struct cached_object_entry { + struct object_id oid; + struct cached_object { + enum object_type type; + const void *buf; + unsigned long size; + } value; +}; /* * An in-memory source that you can write objects to that shall be made From 87de1b31e04fc5ce4f47c2a8dbfdc90b25e5bdbe Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:33 +0200 Subject: [PATCH 25/93] odb: fix unnecessary call to `find_cached_object()` The function `odb_pretend_object()` writes an object into the in-memory object database source. The effect of this is that the object will now become readable, but it won't ever be persisted to disk. Before storing the object, we first verify whether the object already exists. This is done by calling `odb_has_object()` to check all sources, followed by `find_cached_object()` to check whether we have already stored the object in our in-memory source. This is unnecessary though, as `odb_has_object()` already checks the in-memory source transitively via: - `odb_has_object()` - `odb_read_object_info_extended()` - `do_oid_object_info_extended()` - `find_cached_object()` Drop the explicit call to `find_cached_object()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/odb.c b/odb.c index 1d65825ed3978a..ea3fcf5e118d72 100644 --- a/odb.c +++ b/odb.c @@ -774,8 +774,7 @@ int odb_pretend_object(struct object_database *odb, char *co_buf; hash_object_file(odb->repo->hash_algo, buf, len, type, oid); - if (odb_has_object(odb, oid, 0) || - find_cached_object(odb, oid)) + if (odb_has_object(odb, oid, 0)) return 0; ALLOC_GROW(odb->inmemory_objects->objects, From ec45c1e8bf8958bdcca2b324573d02ac934c51ea Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:34 +0200 Subject: [PATCH 26/93] odb/source-inmemory: implement `read_object_info()` callback Implement the `read_object_info()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 39 +------------------------------ odb/source-inmemory.c | 53 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 38 deletions(-) diff --git a/odb.c b/odb.c index ea3fcf5e118d72..6a3912adac2f0e 100644 --- a/odb.c +++ b/odb.c @@ -32,25 +32,6 @@ KHASH_INIT(odb_path_map, const char * /* key: odb_path */, struct odb_source *, 1, fspathhash, fspatheq) -static const struct cached_object *find_cached_object(struct object_database *object_store, - const struct object_id *oid) -{ - static const struct cached_object empty_tree = { - .type = OBJ_TREE, - .buf = "", - }; - const struct cached_object_entry *co = object_store->inmemory_objects->objects; - - for (size_t i = 0; i < object_store->inmemory_objects->objects_nr; i++, co++) - if (oideq(&co->oid, oid)) - return &co->value; - - if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) - return &empty_tree; - - return NULL; -} - int odb_mkstemp(struct object_database *odb, struct strbuf *temp_filename, const char *pattern) { @@ -570,7 +551,6 @@ static int do_oid_object_info_extended(struct object_database *odb, const struct object_id *oid, struct object_info *oi, unsigned flags) { - const struct cached_object *co; const struct object_id *real = oid; int already_retried = 0; @@ -580,25 +560,8 @@ static int do_oid_object_info_extended(struct object_database *odb, if (is_null_oid(real)) return -1; - co = find_cached_object(odb, real); - if (co) { - if (oi) { - if (oi->typep) - *(oi->typep) = co->type; - if (oi->sizep) - *(oi->sizep) = co->size; - if (oi->disk_sizep) - *(oi->disk_sizep) = 0; - if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, odb->repo->hash_algo); - if (oi->contentp) - *oi->contentp = xmemdupz(co->buf, co->size); - if (oi->mtimep) - *oi->mtimep = 0; - oi->whence = OI_CACHED; - } + if (!odb_source_read_object_info(&odb->inmemory_objects->base, oid, oi, flags)) return 0; - } odb_prepare_alternates(odb); diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index ccbb622eaef031..12c80f9b34a58a 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,5 +1,57 @@ #include "git-compat-util.h" +#include "odb.h" #include "odb/source-inmemory.h" +#include "repository.h" + +static const struct cached_object *find_cached_object(struct odb_source_inmemory *source, + const struct object_id *oid) +{ + static const struct cached_object empty_tree = { + .type = OBJ_TREE, + .buf = "", + }; + const struct cached_object_entry *co = source->objects; + + for (size_t i = 0; i < source->objects_nr; i++, co++) + if (oideq(&co->oid, oid)) + return &co->value; + + if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) + return &empty_tree; + + return NULL; +} + +static int odb_source_inmemory_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags UNUSED) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + const struct cached_object *object; + + object = find_cached_object(inmemory, oid); + if (!object) + return -1; + + if (oi) { + if (oi->typep) + *(oi->typep) = object->type; + if (oi->sizep) + *(oi->sizep) = object->size; + if (oi->disk_sizep) + *(oi->disk_sizep) = 0; + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); + if (oi->contentp) + *oi->contentp = xmemdupz(object->buf, object->size); + if (oi->mtimep) + *oi->mtimep = 0; + oi->whence = OI_CACHED; + } + + return 0; +} static void odb_source_inmemory_free(struct odb_source *source) { @@ -19,6 +71,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); source->base.free = odb_source_inmemory_free; + source->base.read_object_info = odb_source_inmemory_read_object_info; return source; } From 8d9c1e421ce36be06ff304ce166593cf2e4ef66f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:35 +0200 Subject: [PATCH 27/93] odb/source-inmemory: implement `read_object_stream()` callback Implement the `read_object_stream()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 12c80f9b34a58a..39f0e799c74519 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,6 +1,7 @@ #include "git-compat-util.h" #include "odb.h" #include "odb/source-inmemory.h" +#include "odb/streaming.h" #include "repository.h" static const struct cached_object *find_cached_object(struct odb_source_inmemory *source, @@ -53,6 +54,56 @@ static int odb_source_inmemory_read_object_info(struct odb_source *source, return 0; } +struct odb_read_stream_inmemory { + struct odb_read_stream base; + const unsigned char *buf; + size_t offset; +}; + +static ssize_t odb_read_stream_inmemory_read(struct odb_read_stream *stream, + char *buf, size_t buf_len) +{ + struct odb_read_stream_inmemory *inmemory = + container_of(stream, struct odb_read_stream_inmemory, base); + size_t bytes = buf_len; + + if (buf_len > inmemory->base.size - inmemory->offset) + bytes = inmemory->base.size - inmemory->offset; + + memcpy(buf, inmemory->buf + inmemory->offset, bytes); + inmemory->offset += bytes; + + return bytes; +} + +static int odb_read_stream_inmemory_close(struct odb_read_stream *stream UNUSED) +{ + return 0; +} + +static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct odb_read_stream_inmemory *stream; + const struct cached_object *object; + + object = find_cached_object(inmemory, oid); + if (!object) + return -1; + + CALLOC_ARRAY(stream, 1); + stream->base.read = odb_read_stream_inmemory_read; + stream->base.close = odb_read_stream_inmemory_close; + stream->base.size = object->size; + stream->base.type = object->type; + stream->buf = object->buf; + + *out = &stream->base; + return 0; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); @@ -72,6 +123,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.free = odb_source_inmemory_free; source->base.read_object_info = odb_source_inmemory_read_object_info; + source->base.read_object_stream = odb_source_inmemory_read_object_stream; return source; } From f611f4ba41de07a89649c74c01477cf55b20bc31 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:36 +0200 Subject: [PATCH 28/93] odb/source-inmemory: implement `write_object()` callback Implement the `write_object()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 16 ++-------------- odb/source-inmemory.c | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index 6a3912adac2f0e..24e929f03cbccf 100644 --- a/odb.c +++ b/odb.c @@ -733,24 +733,12 @@ int odb_pretend_object(struct object_database *odb, void *buf, unsigned long len, enum object_type type, struct object_id *oid) { - struct cached_object_entry *co; - char *co_buf; - hash_object_file(odb->repo->hash_algo, buf, len, type, oid); if (odb_has_object(odb, oid, 0)) return 0; - ALLOC_GROW(odb->inmemory_objects->objects, - odb->inmemory_objects->objects_nr + 1, - odb->inmemory_objects->objects_alloc); - co = &odb->inmemory_objects->objects[odb->inmemory_objects->objects_nr++]; - co->value.size = len; - co->value.type = type; - co_buf = xmalloc(len); - memcpy(co_buf, buf, len); - co->value.buf = co_buf; - oidcpy(&co->oid, oid); - return 0; + return odb_source_write_object(&odb->inmemory_objects->base, + buf, len, type, oid, NULL, 0); } void *odb_read_object(struct object_database *odb, diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 39f0e799c74519..4848011df5189c 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -1,4 +1,5 @@ #include "git-compat-util.h" +#include "object-file.h" #include "odb.h" #include "odb/source-inmemory.h" #include "odb/streaming.h" @@ -104,6 +105,29 @@ static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, return 0; } +static int odb_source_inmemory_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, + struct object_id *oid, + struct object_id *compat_oid UNUSED, + enum odb_write_object_flags flags UNUSED) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct cached_object_entry *object; + + hash_object_file(source->odb->repo->hash_algo, buf, len, type, oid); + + ALLOC_GROW(inmemory->objects, inmemory->objects_nr + 1, + inmemory->objects_alloc); + object = &inmemory->objects[inmemory->objects_nr++]; + object->value.size = len; + object->value.type = type; + object->value.buf = xmemdupz(buf, len); + oidcpy(&object->oid, oid); + + return 0; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); @@ -124,6 +148,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.free = odb_source_inmemory_free; source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; + source->base.write_object = odb_source_inmemory_write_object; return source; } From 197c8a85e37720e54afda1ed92bf8b393cca92f1 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:37 +0200 Subject: [PATCH 29/93] odb/source-inmemory: implement `write_object_stream()` callback Implement the `write_object_stream()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 4848011df5189c..d05a13df45ea5f 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -128,6 +128,45 @@ static int odb_source_inmemory_write_object(struct odb_source *source, return 0; } +static int odb_source_inmemory_write_object_stream(struct odb_source *source, + struct odb_write_stream *stream, + size_t len, + struct object_id *oid) +{ + char buf[16384]; + size_t total_read = 0; + char *data; + int ret; + + CALLOC_ARRAY(data, len); + while (!stream->is_finished) { + ssize_t bytes_read; + + bytes_read = odb_write_stream_read(stream, buf, sizeof(buf)); + if (total_read + bytes_read > len) { + ret = error("object stream yielded more bytes than expected"); + goto out; + } + + memcpy(data + total_read, buf, bytes_read); + total_read += bytes_read; + } + + if (total_read != len) { + ret = error("object stream yielded less bytes than expected"); + goto out; + } + + ret = odb_source_inmemory_write_object(source, data, len, OBJ_BLOB, oid, + NULL, 0); + if (ret < 0) + goto out; + +out: + free(data); + return ret; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); @@ -149,6 +188,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.write_object = odb_source_inmemory_write_object; + source->base.write_object_stream = odb_source_inmemory_write_object_stream; return source; } From 550d7b7c89a9cf80794c72e8c7d036164a5b1927 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:38 +0200 Subject: [PATCH 30/93] cbtree: allow using arbitrary wrapper structures for nodes The cbtree subsystem allows the user to store arbitrary data in a prefix-free set of strings. This is used by us to store object IDs in a way that we can easily iterate through them in lexicograph order, and so that we can easily perform lookups with shortened object IDs. In its current form, it is not easily possible to store arbitrary data with the tree nodes. There are a couple of approaches such a caller could try to use, but none of them really work: - One may embed the `struct cb_node` in a custom structure. This does not work though as `struct cb_node` contains a flex array, and embedding such a struct in another struct is forbidden. - One may use a `union` over `struct cb_node` and ones own data type, which _is_ allowed even if the struct contains a flex array. This does not work though, as the compiler may align members of the struct so that the node key would not immediately start where the flex array starts. - One may allocate `struct cb_node` such that it has room for both its key and the custom data. This has the downside though that if the custom data is itself a pointer to allocated memory, then the leak checker will not consider the pointer to be alive anymore. Refactor the cbtree to drop the flex array and instead take in an explicit offset for where to find the key, which allows the caller to embed `struct cb_node` is a wrapper struct. Note that this change has the downside that we now have a bit of padding in our structure, which grows the size from 60 to 64 bytes on a 64 bit system. On the other hand though, it allows us to get rid of the memory copies that we previously had to do to ensure proper alignment. This seems like a reasonable tradeoff. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- cbtree.c | 25 ++++++++++++++++++------- cbtree.h | 17 +++++++++-------- oidtree.c | 33 ++++++++++++++------------------- 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/cbtree.c b/cbtree.c index 4ab794bddce0c6..8f5edbb80ace51 100644 --- a/cbtree.c +++ b/cbtree.c @@ -7,6 +7,11 @@ #include "git-compat-util.h" #include "cbtree.h" +static inline uint8_t *cb_node_key(struct cb_tree *t, struct cb_node *node) +{ + return (uint8_t *) node + t->key_offset; +} + static struct cb_node *cb_node_of(const void *p) { return (struct cb_node *)((uintptr_t)p - 1); @@ -33,6 +38,7 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) uint8_t c; int newdirection; struct cb_node **wherep, *p; + uint8_t *node_key, *p_key; assert(!((uintptr_t)node & 1)); /* allocations must be aligned */ @@ -41,23 +47,26 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) return NULL; /* success */ } + node_key = cb_node_key(t, node); + /* see if a node already exists */ - p = cb_internal_best_match(t->root, node->k, klen); + p = cb_internal_best_match(t->root, node_key, klen); + p_key = cb_node_key(t, p); /* find first differing byte */ for (newbyte = 0; newbyte < klen; newbyte++) { - if (p->k[newbyte] != node->k[newbyte]) + if (p_key[newbyte] != node_key[newbyte]) goto different_byte_found; } return p; /* element exists, let user deal with it */ different_byte_found: - newotherbits = p->k[newbyte] ^ node->k[newbyte]; + newotherbits = p_key[newbyte] ^ node_key[newbyte]; newotherbits |= newotherbits >> 1; newotherbits |= newotherbits >> 2; newotherbits |= newotherbits >> 4; newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255; - c = p->k[newbyte]; + c = p_key[newbyte]; newdirection = (1 + (newotherbits | c)) >> 8; node->byte = newbyte; @@ -78,7 +87,7 @@ struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) break; if (q->byte == newbyte && q->otherbits > newotherbits) break; - c = q->byte < klen ? node->k[q->byte] : 0; + c = q->byte < klen ? node_key[q->byte] : 0; direction = (1 + (q->otherbits | c)) >> 8; wherep = q->child + direction; } @@ -93,7 +102,7 @@ struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen) { struct cb_node *p = cb_internal_best_match(t->root, k, klen); - return p && !memcmp(p->k, k, klen) ? p : NULL; + return p && !memcmp(cb_node_key(t, p), k, klen) ? p : NULL; } static int cb_descend(struct cb_node *p, cb_iter fn, void *arg) @@ -115,6 +124,7 @@ int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, struct cb_node *p = t->root; struct cb_node *top = p; size_t i = 0; + uint8_t *p_key; if (!p) return 0; /* empty tree */ @@ -130,8 +140,9 @@ int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, top = p; } + p_key = cb_node_key(t, p); for (i = 0; i < klen; i++) { - if (p->k[i] != kpfx[i]) + if (p_key[i] != kpfx[i]) return 0; /* "best" match failed */ } diff --git a/cbtree.h b/cbtree.h index c374b1b3db9d82..4647d4a32f87c6 100644 --- a/cbtree.h +++ b/cbtree.h @@ -6,9 +6,9 @@ * * This is adapted to store arbitrary data (not just NUL-terminated C strings * and allocates no memory internally. The user needs to allocate - * "struct cb_node" and fill cb_node.k[] with arbitrary match data - * for memcmp. - * If "klen" is variable, then it should be embedded into "c_node.k[]" + * "struct cb_node" and provide `key_offset` to indicate where the key can be + * found relative to the `struct cb_node` for memcmp. + * If "klen" is variable, then it should be embedded into the key. * Recursion is bound by the maximum value of "klen" used. */ #ifndef CBTREE_H @@ -23,18 +23,19 @@ struct cb_node { */ uint32_t byte; uint8_t otherbits; - uint8_t k[FLEX_ARRAY]; /* arbitrary data, unaligned */ }; struct cb_tree { struct cb_node *root; + ptrdiff_t key_offset; }; -#define CBTREE_INIT { 0 } - -static inline void cb_init(struct cb_tree *t) +static inline void cb_init(struct cb_tree *t, + ptrdiff_t key_offset) { - struct cb_tree blank = CBTREE_INIT; + struct cb_tree blank = { + .key_offset = key_offset, + }; memcpy(t, &blank, sizeof(*t)); } diff --git a/oidtree.c b/oidtree.c index ab9fe7ec7aecce..117649753fbc1f 100644 --- a/oidtree.c +++ b/oidtree.c @@ -6,9 +6,14 @@ #include "oidtree.h" #include "hash.h" +struct oidtree_node { + struct cb_node base; + struct object_id key; +}; + void oidtree_init(struct oidtree *ot) { - cb_init(&ot->tree); + cb_init(&ot->tree, offsetof(struct oidtree_node, key)); mem_pool_init(&ot->mem_pool, 0); } @@ -22,20 +27,13 @@ void oidtree_clear(struct oidtree *ot) void oidtree_insert(struct oidtree *ot, const struct object_id *oid) { - struct cb_node *on; - struct object_id k; + struct oidtree_node *on; if (!oid->algo) BUG("oidtree_insert requires oid->algo"); - on = mem_pool_alloc(&ot->mem_pool, sizeof(*on) + sizeof(*oid)); - - /* - * Clear the padding and copy the result in separate steps to - * respect the 4-byte alignment needed by struct object_id. - */ - oidcpy(&k, oid); - memcpy(on->k, &k, sizeof(k)); + on = mem_pool_alloc(&ot->mem_pool, sizeof(*on)); + oidcpy(&on->key, oid); /* * n.b. Current callers won't get us duplicates, here. If a @@ -43,7 +41,7 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) * that won't be freed until oidtree_clear. Currently it's not * worth maintaining a free list */ - cb_insert(&ot->tree, on, sizeof(*oid)); + cb_insert(&ot->tree, &on->base, sizeof(*oid)); } bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) @@ -73,21 +71,18 @@ struct oidtree_each_data { static int iter(struct cb_node *n, void *cb_data) { + struct oidtree_node *node = container_of(n, struct oidtree_node, base); struct oidtree_each_data *data = cb_data; - struct object_id k; - - /* Copy to provide 4-byte alignment needed by struct object_id. */ - memcpy(&k, n->k, sizeof(k)); - if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo) + if (data->algo != GIT_HASH_UNKNOWN && data->algo != node->key.algo) return 0; if (data->last_nibble_at) { - if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) + if ((node->key.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0) return 0; } - return data->cb(&k, data->cb_data); + return data->cb(&node->key, data->cb_data); } int oidtree_each(struct oidtree *ot, const struct object_id *prefix, From 449650decf49b1fe5b1dac1c48dfb919e9b57b0d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:39 +0200 Subject: [PATCH 31/93] oidtree: add ability to store data The oidtree data structure is currently only used to store object IDs, without any associated data. So consequently, it can only really be used to track which object IDs exist, and we can use the tree structure to efficiently operate on OID prefixes. But there are valid use cases where we want to both: - Store object IDs in a sorted order. - Associated arbitrary data with them. Refactor the oidtree interface so that it allows us to store arbitrary payloads within the respective nodes. This will be used in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 2 +- object-file.c | 3 ++- oidtree.c | 37 ++++++++++++++++++++++++++++++++----- oidtree.h | 12 ++++++++++-- t/unit-tests/u-oidtree.c | 26 +++++++++++++++++++++++--- 5 files changed, 68 insertions(+), 12 deletions(-) diff --git a/loose.c b/loose.c index 07333be6969fcc..f7a3dd1a72f0fc 100644 --- a/loose.c +++ b/loose.c @@ -57,7 +57,7 @@ static int insert_loose_map(struct odb_source *source, inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(files->loose->cache, compat_oid); + oidtree_insert(files->loose->cache, compat_oid, NULL); return inserted; } diff --git a/object-file.c b/object-file.c index 7b1a12f8eb6006..8705251e4d1e29 100644 --- a/object-file.c +++ b/object-file.c @@ -1858,6 +1858,7 @@ static int for_each_object_wrapper_cb(const struct object_id *oid, } static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *node_data UNUSED, void *cb_data) { struct for_each_object_wrapper_data *data = cb_data; @@ -2003,7 +2004,7 @@ static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) { - oidtree_insert(data, oid); + oidtree_insert(data, oid, NULL); return 0; } diff --git a/oidtree.c b/oidtree.c index 117649753fbc1f..e43f18026e1041 100644 --- a/oidtree.c +++ b/oidtree.c @@ -9,6 +9,7 @@ struct oidtree_node { struct cb_node base; struct object_id key; + void *data; }; void oidtree_init(struct oidtree *ot) @@ -25,15 +26,22 @@ void oidtree_clear(struct oidtree *ot) } } -void oidtree_insert(struct oidtree *ot, const struct object_id *oid) +struct oidtree_data { + struct object_id oid; +}; + +void oidtree_insert(struct oidtree *ot, const struct object_id *oid, + void *data) { struct oidtree_node *on; + struct cb_node *node; if (!oid->algo) BUG("oidtree_insert requires oid->algo"); on = mem_pool_alloc(&ot->mem_pool, sizeof(*on)); oidcpy(&on->key, oid); + on->data = data; /* * n.b. Current callers won't get us duplicates, here. If a @@ -41,13 +49,19 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid) * that won't be freed until oidtree_clear. Currently it's not * worth maintaining a free list */ - cb_insert(&ot->tree, &on->base, sizeof(*oid)); + node = cb_insert(&ot->tree, &on->base, sizeof(*oid)); + if (node) { + struct oidtree_node *preexisting = container_of(node, struct oidtree_node, base); + preexisting->data = data; + } } -bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) +static struct oidtree_node *oidtree_lookup(struct oidtree *ot, + const struct object_id *oid) { struct object_id k; size_t klen = sizeof(k); + struct cb_node *node; oidcpy(&k, oid); @@ -58,7 +72,20 @@ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) < offsetof(struct object_id, algo)); - return !!cb_lookup(&ot->tree, (const uint8_t *)&k, klen); + node = cb_lookup(&ot->tree, (const uint8_t *)&k, klen); + return node ? container_of(node, struct oidtree_node, base) : NULL; +} + +bool oidtree_contains(struct oidtree *ot, const struct object_id *oid) +{ + struct oidtree_node *node = oidtree_lookup(ot, oid); + return node ? 1 : 0; +} + +void *oidtree_get(struct oidtree *ot, const struct object_id *oid) +{ + struct oidtree_node *node = oidtree_lookup(ot, oid); + return node ? node->data : NULL; } struct oidtree_each_data { @@ -82,7 +109,7 @@ static int iter(struct cb_node *n, void *cb_data) return 0; } - return data->cb(&node->key, data->cb_data); + return data->cb(&node->key, node->data, data->cb_data); } int oidtree_each(struct oidtree *ot, const struct object_id *prefix, diff --git a/oidtree.h b/oidtree.h index 2b7bad2e60a51d..baa5a436eadaf8 100644 --- a/oidtree.h +++ b/oidtree.h @@ -29,18 +29,26 @@ void oidtree_init(struct oidtree *ot); */ void oidtree_clear(struct oidtree *ot); -/* Insert the object ID into the tree. */ -void oidtree_insert(struct oidtree *ot, const struct object_id *oid); +/* + * Insert the object ID into the tree and store the given pointer alongside + * with it. The data pointer of any preexisting entry will be overwritten. + */ +void oidtree_insert(struct oidtree *ot, const struct object_id *oid, + void *data); /* Check whether the tree contains the given object ID. */ bool oidtree_contains(struct oidtree *ot, const struct object_id *oid); +/* Get the payload stored with the given object ID. */ +void *oidtree_get(struct oidtree *ot, const struct object_id *oid); + /* * Callback function used for `oidtree_each()`. Returning a non-zero exit code * will cause iteration to stop. The exit code will be propagated to the caller * of `oidtree_each()`. */ typedef int (*oidtree_each_cb)(const struct object_id *oid, + void *node_data, void *cb_data); /* diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c index d4d05c7dc3e4f7..f0d5ebb733bfce 100644 --- a/t/unit-tests/u-oidtree.c +++ b/t/unit-tests/u-oidtree.c @@ -19,7 +19,7 @@ static int fill_tree_loc(struct oidtree *ot, const char *hexes[], size_t n) for (size_t i = 0; i < n; i++) { struct object_id oid; cl_parse_any_oid(hexes[i], &oid); - oidtree_insert(ot, &oid); + oidtree_insert(ot, &oid, NULL); } return 0; } @@ -38,9 +38,9 @@ struct expected_hex_iter { const char *query; }; -static int check_each_cb(const struct object_id *oid, void *data) +static int check_each_cb(const struct object_id *oid, void *node_data UNUSED, void *cb_data) { - struct expected_hex_iter *hex_iter = data; + struct expected_hex_iter *hex_iter = cb_data; struct object_id expected; cl_assert(hex_iter->i < hex_iter->expected_hexes.nr); @@ -105,3 +105,23 @@ void test_oidtree__each(void) check_each(&ot, "32100", "321", NULL); check_each(&ot, "32", "320", "321", NULL); } + +void test_oidtree__insert_overwrites_data(void) +{ + struct object_id oid; + struct oidtree ot; + int a, b; + + cl_parse_any_oid("1", &oid); + + oidtree_init(&ot); + + oidtree_insert(&ot, &oid, NULL); + cl_assert_equal_p(oidtree_get(&ot, &oid), NULL); + oidtree_insert(&ot, &oid, &a); + cl_assert_equal_p(oidtree_get(&ot, &oid), &a); + oidtree_insert(&ot, &oid, &b); + cl_assert_equal_p(oidtree_get(&ot, &oid), &b); + + oidtree_clear(&ot); +} From c04907694601556de0ce862ad4f80fc55ec38c62 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:40 +0200 Subject: [PATCH 32/93] odb/source-inmemory: convert to use oidtree The in-memory source stores its objects in a simple array that we grow as needed. This has a couple of downsides: - The object lookup is O(n). This doesn't matter in practice because we only store a small number of objects. - We don't have an easy way to iterate over all objects in lexicographic order. - We don't have an easy way to compute unique object ID prefixes. Refactor the code to use an oidtree instead. This is the same data structure used by our loose object source, and thus it means we get a bunch of functionality for free. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 72 +++++++++++++++++++++++++++++++------------ odb/source-inmemory.h | 13 ++------ 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index d05a13df45ea5f..3b51cc7fefd86b 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -3,20 +3,29 @@ #include "odb.h" #include "odb/source-inmemory.h" #include "odb/streaming.h" +#include "oidtree.h" #include "repository.h" -static const struct cached_object *find_cached_object(struct odb_source_inmemory *source, - const struct object_id *oid) +struct inmemory_object { + enum object_type type; + const void *buf; + unsigned long size; +}; + +static const struct inmemory_object *find_cached_object(struct odb_source_inmemory *source, + const struct object_id *oid) { - static const struct cached_object empty_tree = { + static const struct inmemory_object empty_tree = { .type = OBJ_TREE, .buf = "", }; - const struct cached_object_entry *co = source->objects; + const struct inmemory_object *object; - for (size_t i = 0; i < source->objects_nr; i++, co++) - if (oideq(&co->oid, oid)) - return &co->value; + if (source->objects) { + object = oidtree_get(source->objects, oid); + if (object) + return object; + } if (oid->algo && oideq(oid, hash_algos[oid->algo].empty_tree)) return &empty_tree; @@ -30,7 +39,7 @@ static int odb_source_inmemory_read_object_info(struct odb_source *source, enum object_info_flags flags UNUSED) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); - const struct cached_object *object; + const struct inmemory_object *object; object = find_cached_object(inmemory, oid); if (!object) @@ -88,7 +97,7 @@ static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); struct odb_read_stream_inmemory *stream; - const struct cached_object *object; + const struct inmemory_object *object; object = find_cached_object(inmemory, oid); if (!object) @@ -113,17 +122,23 @@ static int odb_source_inmemory_write_object(struct odb_source *source, enum odb_write_object_flags flags UNUSED) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); - struct cached_object_entry *object; + struct inmemory_object *object; hash_object_file(source->odb->repo->hash_algo, buf, len, type, oid); - ALLOC_GROW(inmemory->objects, inmemory->objects_nr + 1, - inmemory->objects_alloc); - object = &inmemory->objects[inmemory->objects_nr++]; - object->value.size = len; - object->value.type = type; - object->value.buf = xmemdupz(buf, len); - oidcpy(&object->oid, oid); + if (!inmemory->objects) { + CALLOC_ARRAY(inmemory->objects, 1); + oidtree_init(inmemory->objects); + } else if (oidtree_contains(inmemory->objects, oid)) { + return 0; + } + + CALLOC_ARRAY(object, 1); + object->size = len; + object->type = type; + object->buf = xmemdupz(buf, len); + + oidtree_insert(inmemory->objects, oid, object); return 0; } @@ -167,12 +182,29 @@ static int odb_source_inmemory_write_object_stream(struct odb_source *source, return ret; } +static int inmemory_object_free(const struct object_id *oid UNUSED, + void *node_data, + void *cb_data UNUSED) +{ + struct inmemory_object *object = node_data; + free((void *) object->buf); + free(object); + return 0; +} + static void odb_source_inmemory_free(struct odb_source *source) { struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); - for (size_t i = 0; i < inmemory->objects_nr; i++) - free((char *) inmemory->objects[i].value.buf); - free(inmemory->objects); + + if (inmemory->objects) { + struct object_id null_oid = { 0 }; + + oidtree_each(inmemory->objects, &null_oid, 0, + inmemory_object_free, NULL); + oidtree_clear(inmemory->objects); + free(inmemory->objects); + } + free(inmemory->base.path); free(inmemory); } diff --git a/odb/source-inmemory.h b/odb/source-inmemory.h index d1b05a3996468a..a88fc2e320ed5c 100644 --- a/odb/source-inmemory.h +++ b/odb/source-inmemory.h @@ -3,14 +3,7 @@ #include "odb/source.h" -struct cached_object_entry { - struct object_id oid; - struct cached_object { - enum object_type type; - const void *buf; - unsigned long size; - } value; -}; +struct oidtree; /* * An in-memory source that you can write objects to that shall be made @@ -20,9 +13,7 @@ struct cached_object_entry { */ struct odb_source_inmemory { struct odb_source base; - - struct cached_object_entry *objects; - size_t objects_nr, objects_alloc; + struct oidtree *objects; }; /* Create a new in-memory object database source. */ From 4babe3b673882adf853526475192ae7e3007877c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:41 +0200 Subject: [PATCH 33/93] odb/source-inmemory: implement `for_each_object()` callback Implement the `for_each_object()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 88 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 72 insertions(+), 16 deletions(-) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 3b51cc7fefd86b..f60eecbdbbdfff 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -33,6 +33,28 @@ static const struct inmemory_object *find_cached_object(struct odb_source_inmemo return NULL; } +static void populate_object_info(struct odb_source_inmemory *source, + struct object_info *oi, + const struct inmemory_object *object) +{ + if (!oi) + return; + + if (oi->typep) + *(oi->typep) = object->type; + if (oi->sizep) + *(oi->sizep) = object->size; + if (oi->disk_sizep) + *(oi->disk_sizep) = 0; + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid, source->base.odb->repo->hash_algo); + if (oi->contentp) + *oi->contentp = xmemdupz(object->buf, object->size); + if (oi->mtimep) + *oi->mtimep = 0; + oi->whence = OI_CACHED; +} + static int odb_source_inmemory_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, @@ -45,22 +67,7 @@ static int odb_source_inmemory_read_object_info(struct odb_source *source, if (!object) return -1; - if (oi) { - if (oi->typep) - *(oi->typep) = object->type; - if (oi->sizep) - *(oi->sizep) = object->size; - if (oi->disk_sizep) - *(oi->disk_sizep) = 0; - if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); - if (oi->contentp) - *oi->contentp = xmemdupz(object->buf, object->size); - if (oi->mtimep) - *oi->mtimep = 0; - oi->whence = OI_CACHED; - } - + populate_object_info(inmemory, oi, object); return 0; } @@ -114,6 +121,54 @@ static int odb_source_inmemory_read_object_stream(struct odb_read_stream **out, return 0; } +struct odb_source_inmemory_for_each_object_data { + struct odb_source_inmemory *inmemory; + const struct object_info *request; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int odb_source_inmemory_for_each_object_cb(const struct object_id *oid, + void *node_data, void *cb_data) +{ + struct odb_source_inmemory_for_each_object_data *data = cb_data; + struct inmemory_object *object = node_data; + + if (data->request) { + struct object_info oi = *data->request; + populate_object_info(data->inmemory, &oi, object); + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +static int odb_source_inmemory_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + struct odb_source_inmemory_for_each_object_data payload = { + .inmemory = inmemory, + .request = request, + .cb = cb, + .cb_data = cb_data, + }; + struct object_id null_oid = { 0 }; + + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) || + (opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)) + return 0; + if (!inmemory->objects) + return 0; + + return oidtree_each(inmemory->objects, + opts->prefix ? opts->prefix : &null_oid, opts->prefix_hex_len, + odb_source_inmemory_for_each_object_cb, &payload); +} + static int odb_source_inmemory_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, @@ -219,6 +274,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.free = odb_source_inmemory_free; source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; + source->base.for_each_object = odb_source_inmemory_for_each_object; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; From 3bd2856d3448943c4037d454f3e9cc0135330e73 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:42 +0200 Subject: [PATCH 34/93] odb/source-inmemory: implement `find_abbrev_len()` callback Implement the `find_abbrev_len()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index f60eecbdbbdfff..44d9bbedeca6f2 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -169,6 +169,44 @@ static int odb_source_inmemory_for_each_object(struct odb_source *source, odb_source_inmemory_for_each_object_cb, &payload); } +struct find_abbrev_len_data { + const struct object_id *oid; + unsigned len; +}; + +static int find_abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) +{ + struct find_abbrev_len_data *data = cb_data; + unsigned len = oid_common_prefix_hexlen(oid, data->oid); + if (len != hash_algos[oid->algo].hexsz && len >= data->len) + data->len = len + 1; + return 0; +} + +static int odb_source_inmemory_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_for_each_object_options opts = { + .prefix = oid, + .prefix_hex_len = min_len, + }; + struct find_abbrev_len_data data = { + .oid = oid, + .len = min_len, + }; + int ret; + + ret = odb_source_inmemory_for_each_object(source, NULL, find_abbrev_len_cb, + &data, &opts); + *out = data.len; + + return ret; +} + static int odb_source_inmemory_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, @@ -275,6 +313,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.for_each_object = odb_source_inmemory_for_each_object; + source->base.find_abbrev_len = odb_source_inmemory_find_abbrev_len; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; From 27d219132afe13db43d9732caeb37a14c026e717 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:43 +0200 Subject: [PATCH 35/93] odb/source-inmemory: implement `count_objects()` callback Implement the `count_objects()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 44d9bbedeca6f2..674dbcad3001a6 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -207,6 +207,25 @@ static int odb_source_inmemory_find_abbrev_len(struct odb_source *source, return ret; } +static int count_objects_cb(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *cb_data) +{ + unsigned long *counter = cb_data; + (*counter)++; + return 0; +} + +static int odb_source_inmemory_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags UNUSED, + unsigned long *out) +{ + struct odb_for_each_object_options opts = { 0 }; + *out = 0; + return odb_source_inmemory_for_each_object(source, NULL, count_objects_cb, + out, &opts); +} + static int odb_source_inmemory_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, @@ -314,6 +333,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.for_each_object = odb_source_inmemory_for_each_object; source->base.find_abbrev_len = odb_source_inmemory_find_abbrev_len; + source->base.count_objects = odb_source_inmemory_count_objects; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; From 7357196c49d537588d6c450fa3a902fac13cfbb9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:44 +0200 Subject: [PATCH 36/93] odb/source-inmemory: implement `freshen_object()` callback Implement the `freshen_object()` callback function for the in-memory source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 674dbcad3001a6..8934e0f54785de 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -294,6 +294,15 @@ static int odb_source_inmemory_write_object_stream(struct odb_source *source, return ret; } +static int odb_source_inmemory_freshen_object(struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_inmemory *inmemory = odb_source_inmemory_downcast(source); + if (find_cached_object(inmemory, oid)) + return 1; + return 0; +} + static int inmemory_object_free(const struct object_id *oid UNUSED, void *node_data, void *cb_data UNUSED) @@ -336,6 +345,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.count_objects = odb_source_inmemory_count_objects; source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; + source->base.freshen_object = odb_source_inmemory_freshen_object; return source; } From 314fa0199ddc1a37069ab7c006a5b0bb8e72f45d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:45 +0200 Subject: [PATCH 37/93] odb/source-inmemory: stub out remaining functions Stub out remaining functions that we either don't need or that are basically no-ops. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-inmemory.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c index 8934e0f54785de..e004566d768b01 100644 --- a/odb/source-inmemory.c +++ b/odb/source-inmemory.c @@ -303,6 +303,32 @@ static int odb_source_inmemory_freshen_object(struct odb_source *source, return 0; } +static int odb_source_inmemory_begin_transaction(struct odb_source *source UNUSED, + struct odb_transaction **out UNUSED) +{ + return error("in-memory source does not support transactions"); +} + +static int odb_source_inmemory_read_alternates(struct odb_source *source UNUSED, + struct strvec *out UNUSED) +{ + return 0; +} + +static int odb_source_inmemory_write_alternate(struct odb_source *source UNUSED, + const char *alternate UNUSED) +{ + return error("in-memory source does not support alternates"); +} + +static void odb_source_inmemory_close(struct odb_source *source UNUSED) +{ +} + +static void odb_source_inmemory_reprepare(struct odb_source *source UNUSED) +{ +} + static int inmemory_object_free(const struct object_id *oid UNUSED, void *node_data, void *cb_data UNUSED) @@ -338,6 +364,8 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) odb_source_init(&source->base, odb, ODB_SOURCE_INMEMORY, "source", false); source->base.free = odb_source_inmemory_free; + source->base.close = odb_source_inmemory_close; + source->base.reprepare = odb_source_inmemory_reprepare; source->base.read_object_info = odb_source_inmemory_read_object_info; source->base.read_object_stream = odb_source_inmemory_read_object_stream; source->base.for_each_object = odb_source_inmemory_for_each_object; @@ -346,6 +374,9 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb) source->base.write_object = odb_source_inmemory_write_object; source->base.write_object_stream = odb_source_inmemory_write_object_stream; source->base.freshen_object = odb_source_inmemory_freshen_object; + source->base.begin_transaction = odb_source_inmemory_begin_transaction; + source->base.read_alternates = odb_source_inmemory_read_alternates; + source->base.write_alternate = odb_source_inmemory_write_alternate; return source; } From fdf74cb2cab6a4a95fd6e7e589ac6a4508bf358f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:46 +0200 Subject: [PATCH 38/93] odb: generic in-memory source Make the in-memory source generic. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 8 ++++---- odb.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/odb.c b/odb.c index 24e929f03cbccf..965ef68e4eca22 100644 --- a/odb.c +++ b/odb.c @@ -560,7 +560,7 @@ static int do_oid_object_info_extended(struct object_database *odb, if (is_null_oid(real)) return -1; - if (!odb_source_read_object_info(&odb->inmemory_objects->base, oid, oi, flags)) + if (!odb_source_read_object_info(odb->inmemory_objects, oid, oi, flags)) return 0; odb_prepare_alternates(odb); @@ -737,7 +737,7 @@ int odb_pretend_object(struct object_database *odb, if (odb_has_object(odb, oid, 0)) return 0; - return odb_source_write_object(&odb->inmemory_objects->base, + return odb_source_write_object(odb->inmemory_objects, buf, len, type, oid, NULL, 0); } @@ -1020,7 +1020,7 @@ struct object_database *odb_new(struct repository *repo, o->sources = odb_source_new(o, primary_source, true); o->sources_tail = &o->sources->next; o->alternate_db = xstrdup_or_null(secondary_sources); - o->inmemory_objects = odb_source_inmemory_new(o); + o->inmemory_objects = &odb_source_inmemory_new(o)->base; free(to_free); @@ -1045,7 +1045,7 @@ static void odb_free_sources(struct object_database *o) o->sources = next; } - odb_source_free(&o->inmemory_objects->base); + odb_source_free(o->inmemory_objects); o->inmemory_objects = NULL; kh_destroy_odb_path_map(o->source_by_path); diff --git a/odb.h b/odb.h index c3a7edf9c848dd..73553ed5a7b1ea 100644 --- a/odb.h +++ b/odb.h @@ -81,7 +81,7 @@ struct object_database { * to write them into the object store (e.g. a browse-only * application). */ - struct odb_source_inmemory *inmemory_objects; + struct odb_source *inmemory_objects; /* * A fast, rough count of the number of objects in the repository. From d2902a45498793f8dc69abc6448f517b69437eec Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 10 Apr 2026 14:12:47 +0200 Subject: [PATCH 39/93] t/unit-tests: add tests for the in-memory object source While the in-memory object source is a full-fledged source, our code base only exercises parts of its functionality because we only use it in git-blame(1). Implement unit tests to verify that the yet-unused functionality of the backend works as expected. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + t/meson.build | 1 + t/unit-tests/u-odb-inmemory.c | 313 ++++++++++++++++++++++++++++++++++ 3 files changed, 315 insertions(+) create mode 100644 t/unit-tests/u-odb-inmemory.c diff --git a/Makefile b/Makefile index 3cda12c4556a6f..68b4daa1ad275f 100644 --- a/Makefile +++ b/Makefile @@ -1529,6 +1529,7 @@ CLAR_TEST_SUITES += u-hash CLAR_TEST_SUITES += u-hashmap CLAR_TEST_SUITES += u-list-objects-filter-options CLAR_TEST_SUITES += u-mem-pool +CLAR_TEST_SUITES += u-odb-inmemory CLAR_TEST_SUITES += u-oid-array CLAR_TEST_SUITES += u-oidmap CLAR_TEST_SUITES += u-oidtree diff --git a/t/meson.build b/t/meson.build index 7528e5cda5fef0..db5e01c49b9b2b 100644 --- a/t/meson.build +++ b/t/meson.build @@ -6,6 +6,7 @@ clar_test_suites = [ 'unit-tests/u-hashmap.c', 'unit-tests/u-list-objects-filter-options.c', 'unit-tests/u-mem-pool.c', + 'unit-tests/u-odb-inmemory.c', 'unit-tests/u-oid-array.c', 'unit-tests/u-oidmap.c', 'unit-tests/u-oidtree.c', diff --git a/t/unit-tests/u-odb-inmemory.c b/t/unit-tests/u-odb-inmemory.c new file mode 100644 index 00000000000000..482502ef4b1e11 --- /dev/null +++ b/t/unit-tests/u-odb-inmemory.c @@ -0,0 +1,313 @@ +#include "unit-test.h" +#include "hex.h" +#include "odb/source-inmemory.h" +#include "odb/streaming.h" +#include "oidset.h" +#include "repository.h" +#include "strbuf.h" + +#define RANDOM_OID "da39a3ee5e6b4b0d3255bfef95601890afd80709" +#define FOOBAR_OID "f6ea0495187600e7b2288c8ac19c5886383a4632" + +static struct repository repo = { + .hash_algo = &hash_algos[GIT_HASH_SHA1], +}; +static struct object_database *odb; + +static void cl_assert_object_info(struct odb_source_inmemory *source, + const struct object_id *oid, + enum object_type expected_type, + const char *expected_content) +{ + enum object_type actual_type; + unsigned long actual_size; + void *actual_content; + struct object_info oi = { + .typep = &actual_type, + .sizep = &actual_size, + .contentp = &actual_content, + }; + + cl_must_pass(odb_source_read_object_info(&source->base, oid, &oi, 0)); + cl_assert_equal_u(actual_size, strlen(expected_content)); + cl_assert_equal_u(actual_type, expected_type); + cl_assert_equal_s((char *) actual_content, expected_content); + + free(actual_content); +} + +void test_odb_inmemory__initialize(void) +{ + odb = odb_new(&repo, "", ""); +} + +void test_odb_inmemory__cleanup(void) +{ + odb_free(odb); +} + +void test_odb_inmemory__new(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + cl_assert_equal_i(source->base.type, ODB_SOURCE_INMEMORY); + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_missing_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id oid; + const char *end; + + cl_must_pass(parse_oid_hex_algop(RANDOM_OID, &oid, &end, repo.hash_algo)); + cl_must_fail(odb_source_read_object_info(&source->base, &oid, NULL, 0)); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_empty_tree(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + cl_assert_object_info(source, repo.hash_algo->empty_tree, OBJ_TREE, ""); + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_written_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + const char data[] = "foobar"; + struct object_id written_oid; + + cl_must_pass(odb_source_write_object(&source->base, data, strlen(data), + OBJ_BLOB, &written_oid, NULL, 0)); + cl_assert_equal_s(oid_to_hex(&written_oid), FOOBAR_OID); + cl_assert_object_info(source, &written_oid, OBJ_BLOB, "foobar"); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__read_stream_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_read_stream *stream; + struct object_id written_oid; + const char data[] = "foobar"; + char buf[3] = { 0 }; + + cl_must_pass(odb_source_write_object(&source->base, data, strlen(data), + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_must_pass(odb_source_read_object_stream(&stream, &source->base, + &written_oid)); + cl_assert_equal_i(stream->type, OBJ_BLOB); + cl_assert_equal_u(stream->size, 6); + + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "fo"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "ob"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 2); + cl_assert_equal_s(buf, "ar"); + cl_assert_equal_i(odb_read_stream_read(stream, buf, 2), 0); + + odb_read_stream_close(stream); + odb_source_free(&source->base); +} + +static int add_one_object(const struct object_id *oid, + struct object_info *oi UNUSED, + void *payload) +{ + struct oidset *actual_oids = payload; + cl_must_pass(oidset_insert(actual_oids, oid)); + return 0; +} + +void test_odb_inmemory__for_each_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_for_each_object_options opts = { 0 }; + struct oidset expected_oids = OIDSET_INIT; + struct oidset actual_oids = OIDSET_INIT; + struct strbuf buf = STRBUF_INIT; + + cl_must_pass(odb_source_for_each_object(&source->base, NULL, + add_one_object, &actual_oids, &opts)); + cl_assert_equal_u(oidset_size(&actual_oids), 0); + + for (int i = 0; i < 10; i++) { + struct object_id written_oid; + + strbuf_reset(&buf); + strbuf_addf(&buf, "%d", i); + + cl_must_pass(odb_source_write_object(&source->base, buf.buf, buf.len, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(oidset_insert(&expected_oids, &written_oid)); + } + + cl_must_pass(odb_source_for_each_object(&source->base, NULL, + add_one_object, &actual_oids, &opts)); + cl_assert_equal_b(oidset_equal(&expected_oids, &actual_oids), true); + + odb_source_free(&source->base); + oidset_clear(&expected_oids); + oidset_clear(&actual_oids); + strbuf_release(&buf); +} + +static int abort_after_two_objects(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *payload) +{ + unsigned *counter = payload; + (*counter)++; + if (*counter == 2) + return 123; + return 0; +} + +void test_odb_inmemory__for_each_object_can_abort_iteration(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct odb_for_each_object_options opts = { 0 }; + struct object_id written_oid; + unsigned counter = 0; + + cl_must_pass(odb_source_write_object(&source->base, "1", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "2", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "3", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_assert_equal_i(odb_source_for_each_object(&source->base, NULL, + abort_after_two_objects, + &counter, &opts), + 123); + cl_assert_equal_u(counter, 2); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__count_objects(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id written_oid; + unsigned long count; + + cl_must_pass(odb_source_count_objects(&source->base, 0, &count)); + cl_assert_equal_u(count, 0); + + cl_must_pass(odb_source_write_object(&source->base, "1", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "2", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + cl_must_pass(odb_source_write_object(&source->base, "3", 1, + OBJ_BLOB, &written_oid, NULL, 0)); + + cl_must_pass(odb_source_count_objects(&source->base, 0, &count)); + cl_assert_equal_u(count, 3); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__find_abbrev_len(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id oid1, oid2; + unsigned abbrev_len; + + /* + * The two blobs we're about to write share the first 10 hex characters + * of their object IDs ("a09f43dc45"), so at least 11 characters are + * needed to tell them apart: + * + * "368317" -> a09f43dc4562d45115583f5094640ae237df55f7 + * "514796" -> a09f43dc45fef837235eb7e6b1a6ca5e169a3981 + * + * With only one blob written we expect a length of 4. + */ + cl_must_pass(odb_source_write_object(&source->base, "368317", strlen("368317"), + OBJ_BLOB, &oid1, NULL, 0)); + cl_must_pass(odb_source_find_abbrev_len(&source->base, &oid1, 4, + &abbrev_len)); + cl_assert_equal_u(abbrev_len, 4); + + /* + * With both objects present, the shared 10-character prefix means we + * need at least 11 characters to uniquely identify either object. + */ + cl_must_pass(odb_source_write_object(&source->base, "514796", strlen("514796"), + OBJ_BLOB, &oid2, NULL, 0)); + cl_must_pass(odb_source_find_abbrev_len(&source->base, &oid1, 4, + &abbrev_len)); + cl_assert_equal_u(abbrev_len, 11); + + odb_source_free(&source->base); +} + +void test_odb_inmemory__freshen_object(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + struct object_id written_oid; + struct object_id oid; + const char *end; + + cl_must_pass(parse_oid_hex_algop(RANDOM_OID, &oid, &end, repo.hash_algo)); + cl_assert_equal_i(odb_source_freshen_object(&source->base, &oid), 0); + + cl_must_pass(odb_source_write_object(&source->base, "foobar", + strlen("foobar"), OBJ_BLOB, + &written_oid, NULL, 0)); + cl_assert_equal_i(odb_source_freshen_object(&source->base, + &written_oid), 1); + + odb_source_free(&source->base); +} + +struct membuf_write_stream { + struct odb_write_stream base; + const char *buf; + size_t offset; + size_t size; +}; + +static ssize_t membuf_write_stream_read(struct odb_write_stream *stream, + unsigned char *buf, size_t len) +{ + struct membuf_write_stream *s = container_of(stream, struct membuf_write_stream, base); + size_t chunk_size = 2; + + if (chunk_size > len) + chunk_size = len; + if (chunk_size > s->size - s->offset) + chunk_size = s->size - s->offset; + + memcpy(buf, s->buf + s->offset, chunk_size); + + s->offset += chunk_size; + if (s->offset == s->size) + s->base.is_finished = 1; + + return chunk_size; +} + +void test_odb_inmemory__write_object_stream(void) +{ + struct odb_source_inmemory *source = odb_source_inmemory_new(odb); + const char data[] = "foobar"; + struct membuf_write_stream stream = { + .base.read = membuf_write_stream_read, + .buf = data, + .size = strlen(data), + }; + struct object_id written_oid; + + cl_must_pass(odb_source_write_object_stream(&source->base, &stream.base, + strlen(data), &written_oid)); + cl_assert_equal_s(oid_to_hex(&written_oid), FOOBAR_OID); + cl_assert_object_info(source, &written_oid, OBJ_BLOB, "foobar"); + + odb_source_free(&source->base); +} From 34a891a2d30865316be2628949d4f1b005f65662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 15 May 2026 09:33:53 +0200 Subject: [PATCH 40/93] trailer: change strbuf in-place in unfold_value() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid an allocation by doing s/\n\s*/ /g (replacing NL and any following whitespace with a SP) right in the strbuf instead of copying the result to a temporary one and swapping them in the end. We can safely do that because the replacement is never longer than the original string. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- trailer.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/trailer.c b/trailer.c index 470f86a4a2f83e..6d8ec7fa8d88b5 100644 --- a/trailer.c +++ b/trailer.c @@ -988,10 +988,9 @@ static int ends_with_blank_line(const char *buf, size_t len) static void unfold_value(struct strbuf *val) { - struct strbuf out = STRBUF_INIT; size_t i; + size_t pos = 0; - strbuf_grow(&out, val->len); i = 0; while (i < val->len) { char c = val->buf[i++]; @@ -999,18 +998,14 @@ static void unfold_value(struct strbuf *val) /* Collapse continuation down to a single space. */ while (i < val->len && isspace(val->buf[i])) i++; - strbuf_addch(&out, ' '); - } else { - strbuf_addch(&out, c); + c = ' '; } + val->buf[pos++] = c; } + strbuf_setlen(val, pos); /* Empty lines may have left us with whitespace cruft at the edges */ - strbuf_trim(&out); - - /* output goes back to val as if we modified it in-place */ - strbuf_swap(&out, val); - strbuf_release(&out); + strbuf_trim(val); } static struct trailer_block *trailer_block_new(void) From 15abb27e3a12f8cf423eeca679634168ae40c526 Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Fri, 15 May 2026 15:48:09 +0000 Subject: [PATCH 41/93] diff-format.adoc: remove mention of diff-tree specific output In the "Raw output format" section, we start by mentioning that 'git diff-tree' prints the hashes of what is being compared. This is only true in --stdin mode, and is already mentioned in the description of '--stdin' in git-diff-tree.adoc. Remove this sentence such that we only focus on the common output between diff-tree, diff-index, diff-files and Signed-off-by: Junio C Hamano --- Documentation/diff-format.adoc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Documentation/diff-format.adoc b/Documentation/diff-format.adoc index 9f7e9882418349..7f18c64f1efc1f 100644 --- a/Documentation/diff-format.adoc +++ b/Documentation/diff-format.adoc @@ -19,9 +19,7 @@ compared differs: `git-diff-files [...]`:: compares the index and the files on the filesystem. -The `git-diff-tree` command begins its output by printing the hash of -what is being compared. After that, all the commands print one output -line per changed file. +All the commands print one output line per changed file. An output line is formatted this way: From 4cd5d8a3c84a7d6f66f9bed67c1b974043b8a639 Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Fri, 15 May 2026 15:48:10 +0000 Subject: [PATCH 42/93] diff-format.adoc: 'git diff-files' prints two lines for unmerged files Since 10637b84d9 (diff-files: -1/-2/-3 to diff against unmerged stage., 2005-11-29), for unmerged entries 'git diff-files' print both an "unmerged" line ('U'), as well as an "in-place edit" line ('M') comparing stage 2 (by default) with the working tree. The "Raw output format" documentation however mentions that all commands print a single line per changed file. Adjust diff-format.adoc to also mention this special case, for completeness. Signed-off-by: Philippe Blain Signed-off-by: Junio C Hamano --- Documentation/diff-format.adoc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/diff-format.adoc b/Documentation/diff-format.adoc index 7f18c64f1efc1f..43d91ef8684b0a 100644 --- a/Documentation/diff-format.adoc +++ b/Documentation/diff-format.adoc @@ -19,7 +19,9 @@ compared differs: `git-diff-files [...]`:: compares the index and the files on the filesystem. -All the commands print one output line per changed file. +All the commands print one output line per changed file, +except `git diff-files` in the case of an unmerged file, which prints +both an "unmerged" and an "in-place edit" line. An output line is formatted this way: From 6d09e798bcfba92ef071abb27ad807985681122c Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Fri, 15 May 2026 15:48:11 +0000 Subject: [PATCH 43/93] diff-format.adoc: mode and hash are 0* for unmerged paths from index only In the "Raw output format" section, we mention that the 'mode' and 'sha1' for "src" and "dst" are 0* if "(creation|deletion) or unmerged". For unmerged entries, 'mode' and 'sha1' are in fact 0* only when we are looking at the index, i.e. on the left side for 'git diff-files' and on the right side for 'git diff-index --cached'. Be more precise by mentioning this, and while at it uniformize the wording of the "work tree out of sync with the index" case. Signed-off-by: Philippe Blain Signed-off-by: Junio C Hamano --- Documentation/diff-format.adoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/diff-format.adoc b/Documentation/diff-format.adoc index 43d91ef8684b0a..ef5df140fe5814 100644 --- a/Documentation/diff-format.adoc +++ b/Documentation/diff-format.adoc @@ -37,13 +37,13 @@ unmerged :000000 000000 0000000 0000000 U file6 That is, from the left to the right: . a colon. -. mode for "src"; 000000 if creation or unmerged. +. mode for "src"; 000000 if creation, or if "src" is from the index and is unmerged. . a space. -. mode for "dst"; 000000 if deletion or unmerged. +. mode for "dst"; 000000 if deletion, or if "dst" is from the index and is unmerged. . a space. -. sha1 for "src"; 0\{40\} if creation or unmerged. +. sha1 for "src"; 0\{40\} if creation, or if "src" is from the index and is unmerged. . a space. -. sha1 for "dst"; 0\{40\} if deletion, unmerged or "work tree out of sync with the index". +. sha1 for "dst"; 0\{40\} if deletion, if "dst" is from the index and is unmerged, or if "dst" is from the work tree and is out of sync with the index. . a space. . status, followed by optional "score" number. . a tab or a NUL when `-z` option is used. From 499f9048e0ef09285fe112dc387324404fb99b1d Mon Sep 17 00:00:00 2001 From: Pushkar Singh Date: Sat, 16 May 2026 18:33:48 +0000 Subject: [PATCH 44/93] stash: add coverage for show --include-untracked Add a test for 'git stash show --include-untracked' to cover the case where untracked files saved in the stash are included in the output. While stash creation and restoration of untracked files are already tested, there is currently no explicit test covering the output behavior of 'stash show --include-untracked'. Signed-off-by: Pushkar Singh Signed-off-by: Junio C Hamano --- t/t3903-stash.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/t/t3903-stash.sh b/t/t3903-stash.sh index 70879941c22f8c..d4867536b98271 100755 --- a/t/t3903-stash.sh +++ b/t/t3903-stash.sh @@ -1790,4 +1790,21 @@ test_expect_success 'stash.index=false overridden by --index' ' test_cmp expect file ' +test_expect_success 'stash show --include-untracked includes untracked files' ' + git reset --hard && + + echo tracked >tracked && + git add tracked && + git commit -m "base" && + + echo change >>tracked && + echo untracked >untracked && + + git stash push --include-untracked && + test_path_is_missing untracked && + + git stash show --include-untracked >actual && + test_grep "untracked" actual +' + test_done From e0fcba2d9cf86ba45943066924f111006d55ba08 Mon Sep 17 00:00:00 2001 From: Karthik Nayak Date: Sun, 17 May 2026 19:32:05 +0200 Subject: [PATCH 45/93] refs/files: skip lock files during consistency checks Consistency checks in the files reference backend involve two steps: 1. Iterate over all entries within the 'refs/' directory and call `files_fsck_ref()` on each. 2. Iterate over all root refs via `for_each_root_ref()` and call `files_fsck_ref()` on each. `files_fsck_ref()` then runs all fsck checks defined in `fsck_refs_fn[]`. Step 2 goes through the refs API and only sees valid refs, but step 1 iterates the directory directly and may also encounter intermediate '*.lock' files. Currently, `files_fsck_refs_name()`, one of the functions in `fsck_refs_fn[]`, filters out lock files itself. The other function, `files_fsck_refs_content()`, has no such check and would parse the lock file. Any new function added to `fsck_refs_fn[]` would have the same problem. Move the filter up into `files_fsck_refs_dir()`, where the directory iteration happens. Since step 2 cannot produce lock files, this is the only site where the filter is needed, and individual checks no longer have to re-implement it. Signed-off-by: Karthik Nayak Signed-off-by: Junio C Hamano --- refs/files-backend.c | 22 ++++++++++----------- t/t0602-reffiles-fsck.sh | 41 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index b3b0c25f84e503..1504a1e2f36c56 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3864,22 +3864,12 @@ static int files_fsck_refs_content(struct ref_store *ref_store, static int files_fsck_refs_name(struct ref_store *ref_store UNUSED, struct fsck_options *o, const char *refname, - const char *path, + const char *path UNUSED, int mode UNUSED) { struct strbuf sb = STRBUF_INIT; - const char *filename; int ret = 0; - filename = basename((char *) path); - - /* - * Ignore the files ending with ".lock" as they may be lock files - * However, do not allow bare ".lock" files. - */ - if (filename[0] != '.' && ends_with(filename, ".lock")) - goto cleanup; - if (is_root_ref(refname)) goto cleanup; @@ -3939,6 +3929,7 @@ static int files_fsck_refs_dir(struct ref_store *ref_store, struct strbuf refname = STRBUF_INIT; struct strbuf sb = STRBUF_INIT; struct dir_iterator *iter; + const char *filename; int iter_status; int ret = 0; @@ -3962,6 +3953,15 @@ static int files_fsck_refs_dir(struct ref_store *ref_store, strbuf_addf(&refname, "worktrees/%s/", wt->id); strbuf_addf(&refname, "refs/%s", iter->relative_path); + filename = basename((char *) iter->path.buf); + + /* + * Ignore the files ending with ".lock" as they may be lock files. + * However, do not skip invalid refnames with '.lock' suffix. + */ + if (filename[0] != '.' && ends_with(filename, ".lock")) + continue; + if (files_fsck_ref(ref_store, o, refname.buf, iter->path.buf, iter->st.st_mode) < 0) ret = -1; diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh index 3c1f553b8120ec..13259821a02ef1 100755 --- a/t/t0602-reffiles-fsck.sh +++ b/t/t0602-reffiles-fsck.sh @@ -87,6 +87,47 @@ test_expect_success 'ref name should be checked' ' ) ' +test_expect_success 'lock files should be ignored' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git commit --allow-empty -m initial && + git checkout -b branch-1 && + + touch .git/refs/heads/branch-1.lock && + git refs verify 2>err && + test_must_be_empty err && + + echo "foobar" >.git/refs/heads/branch-2 && + test_must_fail git refs verify 2>err && + cat >expect <<-EOF && + error: refs/heads/branch-2: badRefContent: foobar + EOF + test_cmp expect err + ) +' + +test_expect_success 'bare lock files should not be ignored' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + git commit --allow-empty -m initial && + git checkout -b branch-1 && + + # invalid refname should be reported + cp .git/refs/heads/branch-1 .git/refs/heads/.branch-1.lock && + # invalid refname and content should be reported + touch .git/refs/heads/.lock && + + test_must_fail git refs verify 2>err && + test_grep "error: refs/heads/.branch-1.lock: badRefName: invalid refname format" err && + test_grep "error: refs/heads/.lock: badRefName: invalid refname format" err && + test_grep "error: refs/heads/.lock: badRefContent: " err + ) +' + test_expect_success 'ref name check should be adapted into fsck messages' ' test_when_finished "rm -rf repo" && git init repo && From c13d0f7bd4e696d5db3345e00f763df23347b6ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 18 May 2026 22:25:01 +0200 Subject: [PATCH 46/93] strbuf: use st_add3() in strbuf_grow() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify the code by calling st_add3() to do overflow checks instead of open-coding it. This changes the error message to include the offending summands, which can be helpful when tracking down the cause. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- strbuf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/strbuf.c b/strbuf.c index 3e04addc22febb..8610965d531298 100644 --- a/strbuf.c +++ b/strbuf.c @@ -106,12 +106,10 @@ void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc) void strbuf_grow(struct strbuf *sb, size_t extra) { int new_buf = !sb->alloc; - if (unsigned_add_overflows(extra, 1) || - unsigned_add_overflows(sb->len, extra + 1)) - die("you want to use way too much memory"); + size_t new_len = st_add3(sb->len, extra, 1); if (new_buf) sb->buf = NULL; - ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); + ALLOC_GROW(sb->buf, new_len, sb->alloc); if (new_buf) sb->buf[0] = '\0'; } From 29d9fdcf1098672b2146c60eea5202e840615772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 18 May 2026 22:25:02 +0200 Subject: [PATCH 47/93] use __builtin_add_overflow() in st_add() with Clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang and GCC optimize away comparisons of overflow checks by checking the carry flag on x64. GCC does the same on ARM64, but Clang currently (version 22.1) doesn't. It does this optimization for overflow checks that use its builtin function __builtin_add_overflow(), though. Provide a non-generic lookalike for size_t that does the same checks as before as a fallback and use the original with Clang. Use it on all platforms for simplicity. On an Apple M1 I get a nice speedup for a command that builds lots of strings using a strbuf, which exercises the st_add3() in strbuf_grow() for every line of output: Benchmark 1: ./git_main cat-file --batch-all-objects --batch-check='%(objectname)' Time (mean ± σ): 120.4 ms ± 0.2 ms [User: 113.8 ms, System: 6.0 ms] Range (min … max): 120.1 ms … 121.1 ms 24 runs Benchmark 2: ./git cat-file --batch-all-objects --batch-check='%(objectname)' Time (mean ± σ): 115.5 ms ± 0.1 ms [User: 108.6 ms, System: 5.8 ms] Range (min … max): 115.2 ms … 115.8 ms 25 runs Summary ./git cat-file --batch-all-objects --batch-check='%(objectname)' ran 1.04 ± 0.00 times faster than ./git_main cat-file --batch-all-objects --batch-check='%(objectname)' Suggested-by: Jeff King Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- git-compat-util.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/git-compat-util.h b/git-compat-util.h index ae1bdc90a4cd6a..5b1d15fe4f3562 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -614,12 +614,30 @@ static inline bool strip_suffix(const char *str, const char *suffix, int git_open_cloexec(const char *name, int flags); #define git_open(name) git_open_cloexec(name, O_RDONLY) -static inline size_t st_add(size_t a, size_t b) + +/* + * Help Clang; GCC generates the same instructions for both variants on + * x64 and aarch64. + */ +#ifdef __clang__ +#define st_add_overflow __builtin_add_overflow +#else +static inline bool st_add_overflow(size_t a, size_t b, size_t *out) { if (unsigned_add_overflows(a, b)) + return true; + *out = a + b; + return false; +} +#endif + +static inline size_t st_add(size_t a, size_t b) +{ + size_t result; + if (st_add_overflow(a, b, &result)) die("size_t overflow: %"PRIuMAX" + %"PRIuMAX, (uintmax_t)a, (uintmax_t)b); - return a + b; + return result; } #define st_add3(a,b,c) st_add(st_add((a),(b)),(c)) #define st_add4(a,b,c,d) st_add(st_add3((a),(b),(c)),(d)) From 6a1964c22a2655a4c995d9a8bbf25512b1a2bc16 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 May 2026 21:19:01 -0400 Subject: [PATCH 48/93] quote.h: bump strvec forward declaration to the top We usually put forward declarations at the top of header files, rather than next to the functions that need them. In theory placing it next to the function has some explanatory value, but it's also just as likely to become stale if other uses are added. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- quote.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quote.h b/quote.h index 0300c291041b02..400397b11a7cfc 100644 --- a/quote.h +++ b/quote.h @@ -2,6 +2,7 @@ #define QUOTE_H struct strbuf; +struct strvec; extern int quote_path_fully; @@ -77,7 +78,6 @@ int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); * still modify arg in place, but unlike sq_dequote_to_argv, the strvec * will duplicate and take ownership of the strings. */ -struct strvec; int sq_dequote_to_strvec(char *arg, struct strvec *); int unquote_c_style(struct strbuf *, const char *quoted, const char **endp); From c5f52d03e223e1ea0c4639d22ad1ca180ec85097 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 May 2026 21:19:34 -0400 Subject: [PATCH 49/93] quote: drop sq_dequote_to_argv() The last caller went away in f9dbb64fad (config: parse more robust format in GIT_CONFIG_PARAMETERS, 2021-01-12), when we switched to using sq_dequote_step(). The "to_argv()" form is not a great interface. If you care about raw speed, then sq_dequote_step() lets you work incrementally without extra allocations. If you care about simplicity, then sq_dequote_to_strvec() puts the result in an encapsulated data structure. With sq_dequote_to_argv(), you have a data dependency on the original string but still have to remember to manually free the argv array itself (but not its elements). So it's sort of a worst-of-both-worlds middle ground. Let's get rid of it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- quote.c | 5 ----- quote.h | 12 +++--------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/quote.c b/quote.c index b9f6bdc775c468..cff78af3a4d7db 100644 --- a/quote.c +++ b/quote.c @@ -202,11 +202,6 @@ static int sq_dequote_to_argv_internal(char *arg, return 0; } -int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) -{ - return sq_dequote_to_argv_internal(arg, argv, nr, alloc, NULL); -} - int sq_dequote_to_strvec(char *arg, struct strvec *array) { return sq_dequote_to_argv_internal(arg, NULL, NULL, NULL, array); diff --git a/quote.h b/quote.h index 400397b11a7cfc..989f2388c05974 100644 --- a/quote.h +++ b/quote.h @@ -68,15 +68,9 @@ char *sq_dequote_step(char *src, char **next); /* * Same as the above, but can be used to unwrap many arguments in the - * same string separated by space. Like sq_quote, it works in place, - * modifying arg and appending pointers into it to argv. - */ -int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); - -/* - * Same as above, but store the unquoted strings in a strvec. We will - * still modify arg in place, but unlike sq_dequote_to_argv, the strvec - * will duplicate and take ownership of the strings. + * same string separated by space. The strvec will duplicate and take + * ownership of the strings, but note that "arg" is still modified in-place + * during parsing. */ int sq_dequote_to_strvec(char *arg, struct strvec *); From b56ab270aab71168ab7d0731f0a3853dac7aa62f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 18 May 2026 21:20:59 -0400 Subject: [PATCH 50/93] quote: simplify internals of dequoting Our sq_dequote_to_argv_internal() helper was wrapped by the to_argv() and to_strvec() forms. Now that we have only the latter, we can stop wrapping it and drop the argv-only bits. Note that in theory sq_dequote_to_strvec() could take a const input string, which would be friendlier to its callers. We couldn't do that with the to_argv() form because it reused the input string to hold the output elements. But since we're built on sq_dequote_step(), which munges the input, we'd have to rework the parser. Since no callers care about it currently, we'll leave that for another day. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- quote.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/quote.c b/quote.c index cff78af3a4d7db..235fac8e47093f 100644 --- a/quote.c +++ b/quote.c @@ -171,9 +171,7 @@ char *sq_dequote(char *arg) return sq_dequote_step(arg, NULL); } -static int sq_dequote_to_argv_internal(char *arg, - const char ***argv, int *nr, int *alloc, - struct strvec *array) +int sq_dequote_to_strvec(char *arg, struct strvec *array) { char *next = arg; @@ -191,22 +189,12 @@ static int sq_dequote_to_argv_internal(char *arg, c = *++next; } while (isspace(c)); } - if (argv) { - ALLOC_GROW(*argv, *nr + 1, *alloc); - (*argv)[(*nr)++] = dequoted; - } - if (array) - strvec_push(array, dequoted); + strvec_push(array, dequoted); } while (next); return 0; } -int sq_dequote_to_strvec(char *arg, struct strvec *array) -{ - return sq_dequote_to_argv_internal(arg, NULL, NULL, NULL, array); -} - /* 1 means: quote as octal * 0 means: quote as octal if (quote_path_fully) * -1 means: never quote From bb50ec6b26a37789208c18d0d3f7cbf047090145 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:05 +0200 Subject: [PATCH 51/93] setup: replace use of `the_repository` in static functions Replace the use of `the_repository` in "setup.c" for all static functions. For now, we simply add `the_repository` to invocations of these functions. This will be addressed in subsequent commits, where we'll move up `the_repository` one more layer to callers of "setup.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- setup.c | 188 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 100 insertions(+), 88 deletions(-) diff --git a/setup.c b/setup.c index 7ec4427368a2a7..ba2898473a58a3 100644 --- a/setup.c +++ b/setup.c @@ -50,13 +50,13 @@ const char *tmp_original_cwd; * /dir/repolink/file (repolink points to /dir/repo) -> file * /dir/repo (exactly equal to work tree) -> (empty string) */ -static int abspath_part_inside_repo(char *path) +static int abspath_part_inside_repo(struct repository *repo, char *path) { size_t len; size_t wtlen; char *path0; int off; - const char *work_tree = precompose_string_if_needed(repo_get_work_tree(the_repository)); + const char *work_tree = precompose_string_if_needed(repo_get_work_tree(repo)); struct strbuf realpath = STRBUF_INIT; if (!work_tree) @@ -132,7 +132,7 @@ char *prefix_path_gently(const char *prefix, int len, free(sanitized); return NULL; } - if (abspath_part_inside_repo(sanitized)) { + if (abspath_part_inside_repo(the_repository, sanitized)) { free(sanitized); return NULL; } @@ -509,7 +509,7 @@ void setup_work_tree(void) initialized = 1; } -static void setup_original_cwd(void) +static void setup_original_cwd(struct repository *repo) { struct strbuf tmp = STRBUF_INIT; const char *worktree = NULL; @@ -535,9 +535,9 @@ static void setup_original_cwd(void) /* Normalize the directory */ if (!strbuf_realpath(&tmp, tmp_original_cwd, 0)) { - trace2_data_string("setup", the_repository, + trace2_data_string("setup", repo, "realpath-path", tmp_original_cwd); - trace2_data_string("setup", the_repository, + trace2_data_string("setup", repo, "realpath-failure", strerror(errno)); free((char*)tmp_original_cwd); tmp_original_cwd = NULL; @@ -552,7 +552,7 @@ static void setup_original_cwd(void) * Get our worktree; we only protect the current working directory * if it's in the worktree. */ - worktree = repo_get_work_tree(the_repository); + worktree = repo_get_work_tree(repo); if (!worktree) goto no_prevention_needed; @@ -747,7 +747,10 @@ static int check_repo_format(const char *var, const char *value, return read_worktree_config(var, value, ctx, vdata); } -static int check_repository_format_gently(const char *gitdir, struct repository_format *candidate, int *nongit_ok) +static int check_repository_format_gently(struct repository *repo, + const char *gitdir, + struct repository_format *candidate, + int *nongit_ok) { struct strbuf sb = STRBUF_INIT; struct strbuf err = STRBUF_INIT; @@ -776,7 +779,7 @@ static int check_repository_format_gently(const char *gitdir, struct repository_ die("%s", err.buf); } - the_repository->repository_format_precious_objects = candidate->precious_objects; + repo->repository_format_precious_objects = candidate->precious_objects; string_list_clear(&candidate->unknown_extensions, 0); string_list_clear(&candidate->v1_only_extensions, 0); @@ -1034,7 +1037,8 @@ const char *read_gitfile_gently(const char *path, int *return_error_code) return error_code ? NULL : path; } -static void setup_git_env_internal(const char *git_dir, +static void setup_git_env_internal(struct repository *repo, + const char *git_dir, bool skip_initializing_odb) { char *git_replace_ref_base; @@ -1052,7 +1056,7 @@ static void setup_git_env_internal(const char *git_dir, args.disable_ref_updates = true; args.skip_initializing_odb = skip_initializing_odb; - repo_set_gitdir(the_repository, git_dir, &args); + repo_set_gitdir(repo, git_dir, &args); strvec_clear(&to_free); if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) @@ -1064,7 +1068,7 @@ static void setup_git_env_internal(const char *git_dir, shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT); if (shallow_file) - set_alternate_shallow_file(the_repository, shallow_file, 0); + set_alternate_shallow_file(repo, shallow_file, 0); if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0)) fetch_if_missing = 0; @@ -1072,30 +1076,31 @@ static void setup_git_env_internal(const char *git_dir, void setup_git_env(const char *git_dir) { - setup_git_env_internal(git_dir, false); + setup_git_env_internal(the_repository, git_dir, false); } -static void set_git_dir_1(const char *path, bool skip_initializing_odb) +static void set_git_dir_1(struct repository *repo, const char *path, bool skip_initializing_odb) { xsetenv(GIT_DIR_ENVIRONMENT, path, 1); - setup_git_env_internal(path, skip_initializing_odb); + setup_git_env_internal(repo, path, skip_initializing_odb); } static void update_relative_gitdir(const char *name UNUSED, const char *old_cwd, const char *new_cwd, - void *data UNUSED) + void *data) { + struct repository *repo = data; char *path = reparent_relative_path(old_cwd, new_cwd, - repo_get_git_dir(the_repository)); + repo_get_git_dir(repo)); trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); - set_git_dir_1(path, true); + set_git_dir_1(repo, path, true); free(path); } -static void set_git_dir(const char *path, int make_realpath) +static void set_git_dir(struct repository *repo, const char *path, int make_realpath) { struct strbuf realpath = STRBUF_INIT; @@ -1104,14 +1109,15 @@ static void set_git_dir(const char *path, int make_realpath) path = realpath.buf; } - set_git_dir_1(path, false); + set_git_dir_1(repo, path, false); if (!is_absolute_path(path)) - chdir_notify_register(NULL, update_relative_gitdir, NULL); + chdir_notify_register(NULL, update_relative_gitdir, repo); strbuf_release(&realpath); } -static const char *setup_explicit_git_dir(const char *gitdirenv, +static const char *setup_explicit_git_dir(struct repository *repo, + const char *gitdirenv, struct strbuf *cwd, struct repository_format *repo_fmt, int *nongit_ok) @@ -1139,7 +1145,7 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, die(_("not a git repository: '%s'"), gitdirenv); } - if (check_repository_format_gently(gitdirenv, repo_fmt, nongit_ok)) { + if (check_repository_format_gently(repo, gitdirenv, repo_fmt, nongit_ok)) { free(gitfile); return NULL; } @@ -1155,7 +1161,7 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, } /* #18, #26 */ - set_git_dir(gitdirenv, 0); + set_git_dir(repo, gitdirenv, 0); free(gitfile); return NULL; } @@ -1177,7 +1183,7 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, } else if (!git_env_bool(GIT_IMPLICIT_WORK_TREE_ENVIRONMENT, 1)) { /* #16d */ - set_git_dir(gitdirenv, 0); + set_git_dir(repo, gitdirenv, 0); free(gitfile); return NULL; } @@ -1185,18 +1191,18 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, set_git_work_tree("."); /* set_git_work_tree() must have been called by now */ - worktree = repo_get_work_tree(the_repository); + worktree = repo_get_work_tree(repo); /* both repo_get_work_tree() and cwd are already normalized */ if (!strcmp(cwd->buf, worktree)) { /* cwd == worktree */ - set_git_dir(gitdirenv, 0); + set_git_dir(repo, gitdirenv, 0); free(gitfile); return NULL; } offset = dir_inside_of(cwd->buf, worktree); if (offset >= 0) { /* cwd inside worktree? */ - set_git_dir(gitdirenv, 1); + set_git_dir(repo, gitdirenv, 1); if (chdir(worktree)) die_errno(_("cannot chdir to '%s'"), worktree); strbuf_addch(cwd, '/'); @@ -1205,17 +1211,18 @@ static const char *setup_explicit_git_dir(const char *gitdirenv, } /* cwd outside worktree */ - set_git_dir(gitdirenv, 0); + set_git_dir(repo, gitdirenv, 0); free(gitfile); return NULL; } -static const char *setup_discovered_git_dir(const char *gitdir, +static const char *setup_discovered_git_dir(struct repository *repo, + const char *gitdir, struct strbuf *cwd, int offset, struct repository_format *repo_fmt, int *nongit_ok) { - if (check_repository_format_gently(gitdir, repo_fmt, nongit_ok)) + if (check_repository_format_gently(repo, gitdir, repo_fmt, nongit_ok)) return NULL; /* --work-tree is set without --git-dir; use discovered one */ @@ -1227,14 +1234,14 @@ static const char *setup_discovered_git_dir(const char *gitdir, gitdir = to_free = real_pathdup(gitdir, 1); if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); - ret = setup_explicit_git_dir(gitdir, cwd, repo_fmt, nongit_ok); + ret = setup_explicit_git_dir(repo, gitdir, cwd, repo_fmt, nongit_ok); free(to_free); return ret; } /* #16.2, #17.2, #20.2, #21.2, #24, #25, #28, #29 (see t1510) */ if (is_bare_repository_cfg > 0) { - set_git_dir(gitdir, (offset != cwd->len)); + set_git_dir(repo, gitdir, (offset != cwd->len)); if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); return NULL; @@ -1243,7 +1250,7 @@ static const char *setup_discovered_git_dir(const char *gitdir, /* #0, #1, #5, #8, #9, #12, #13 */ set_git_work_tree("."); if (strcmp(gitdir, DEFAULT_GIT_DIR_ENVIRONMENT)) - set_git_dir(gitdir, 0); + set_git_dir(repo, gitdir, 0); inside_git_dir = 0; inside_work_tree = 1; if (offset >= cwd->len) @@ -1258,13 +1265,14 @@ static const char *setup_discovered_git_dir(const char *gitdir, } /* #16.1, #17.1, #20.1, #21.1, #22.1 (see t1510) */ -static const char *setup_bare_git_dir(struct strbuf *cwd, int offset, +static const char *setup_bare_git_dir(struct repository *repo, + struct strbuf *cwd, int offset, struct repository_format *repo_fmt, int *nongit_ok) { int root_len; - if (check_repository_format_gently(".", repo_fmt, nongit_ok)) + if (check_repository_format_gently(repo, ".", repo_fmt, nongit_ok)) return NULL; setenv(GIT_IMPLICIT_WORK_TREE_ENVIRONMENT, "0", 1); @@ -1276,7 +1284,7 @@ static const char *setup_bare_git_dir(struct strbuf *cwd, int offset, gitdir = offset == cwd->len ? "." : xmemdupz(cwd->buf, offset); if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); - return setup_explicit_git_dir(gitdir, cwd, repo_fmt, nongit_ok); + return setup_explicit_git_dir(repo, gitdir, cwd, repo_fmt, nongit_ok); } inside_git_dir = 1; @@ -1286,10 +1294,10 @@ static const char *setup_bare_git_dir(struct strbuf *cwd, int offset, die_errno(_("cannot come back to cwd")); root_len = offset_1st_component(cwd->buf); strbuf_setlen(cwd, offset > root_len ? offset : root_len); - set_git_dir(cwd->buf, 0); + set_git_dir(repo, cwd->buf, 0); } else - set_git_dir(".", 0); + set_git_dir(repo, ".", 0); return NULL; } @@ -1827,7 +1835,7 @@ const char *enter_repo(const char *path, unsigned flags) } if (is_git_directory(".")) { - set_git_dir(".", 0); + set_git_dir(the_repository, ".", 0); check_repository_format(NULL); return path; } @@ -1891,18 +1899,18 @@ const char *setup_git_directory_gently(int *nongit_ok) switch (setup_git_directory_gently_1(&dir, &gitdir, &report, 1)) { case GIT_DIR_EXPLICIT: - prefix = setup_explicit_git_dir(gitdir.buf, &cwd, &repo_fmt, nongit_ok); + prefix = setup_explicit_git_dir(the_repository, gitdir.buf, &cwd, &repo_fmt, nongit_ok); break; case GIT_DIR_DISCOVERED: if (dir.len < cwd.len && chdir(dir.buf)) die(_("cannot change to '%s'"), dir.buf); - prefix = setup_discovered_git_dir(gitdir.buf, &cwd, dir.len, + prefix = setup_discovered_git_dir(the_repository, gitdir.buf, &cwd, dir.len, &repo_fmt, nongit_ok); break; case GIT_DIR_BARE: if (dir.len < cwd.len && chdir(dir.buf)) die(_("cannot change to '%s'"), dir.buf); - prefix = setup_bare_git_dir(&cwd, dir.len, &repo_fmt, nongit_ok); + prefix = setup_bare_git_dir(the_repository, &cwd, dir.len, &repo_fmt, nongit_ok); break; case GIT_DIR_HIT_CEILING: if (!nongit_ok) @@ -2044,7 +2052,7 @@ const char *setup_git_directory_gently(int *nongit_ok) free(payload); } - setup_original_cwd(); + setup_original_cwd(the_repository); strbuf_release(&dir); strbuf_release(&gitdir); @@ -2110,7 +2118,7 @@ void check_repository_format(struct repository_format *fmt) struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; if (!fmt) fmt = &repo_fmt; - check_repository_format_gently(repo_get_git_dir(the_repository), fmt, NULL); + check_repository_format_gently(the_repository, repo_get_git_dir(the_repository), fmt, NULL); startup_info->have_repository = 1; repo_set_hash_algo(the_repository, fmt->hash_algo); repo_set_compat_hash_algo(the_repository, fmt->compat_hash_algo); @@ -2239,7 +2247,9 @@ const char *get_template_dir(const char *option_template) #define GIT_DEFAULT_HASH_ENVIRONMENT "GIT_DEFAULT_HASH" -static void copy_templates_1(struct strbuf *path, struct strbuf *template_path, +static void copy_templates_1(struct repository *repo, + struct strbuf *path, + struct strbuf *template_path, DIR *dir) { size_t path_baselen = path->len; @@ -2253,7 +2263,7 @@ static void copy_templates_1(struct strbuf *path, struct strbuf *template_path, * with the way the namespace under .git/ is organized, should * be really carefully chosen. */ - safe_create_dir(the_repository, path->buf, 1); + safe_create_dir(repo, path->buf, 1); while ((de = readdir(dir)) != NULL) { struct stat st_git, st_template; int exists = 0; @@ -2281,7 +2291,7 @@ static void copy_templates_1(struct strbuf *path, struct strbuf *template_path, die_errno(_("cannot opendir '%s'"), template_path->buf); strbuf_addch(path, '/'); strbuf_addch(template_path, '/'); - copy_templates_1(path, template_path, subdir); + copy_templates_1(repo, path, template_path, subdir); closedir(subdir); } else if (exists) @@ -2306,7 +2316,7 @@ static void copy_templates_1(struct strbuf *path, struct strbuf *template_path, } } -static void copy_templates(const char *option_template) +static void copy_templates(struct repository *repo, const char *option_template) { const char *template_dir = get_template_dir(option_template); struct strbuf path = STRBUF_INIT; @@ -2347,9 +2357,9 @@ static void copy_templates(const char *option_template) goto close_free_return; } - strbuf_addstr(&path, repo_get_common_dir(the_repository)); + strbuf_addstr(&path, repo_get_common_dir(repo)); strbuf_complete(&path, '/'); - copy_templates_1(&path, &template_path, dir); + copy_templates_1(repo, &path, &template_path, dir); close_free_return: closedir(dir); free_return: @@ -2443,13 +2453,13 @@ void initialize_repository_version(int hash_algo, strbuf_release(&repo_version); } -static int is_reinit(void) +static int is_reinit(struct repository *repo) { struct strbuf buf = STRBUF_INIT; char junk[2]; int ret; - repo_git_path_replace(the_repository, &buf, "HEAD"); + repo_git_path_replace(repo, &buf, "HEAD"); ret = !access(buf.buf, R_OK) || readlink(buf.buf, junk, sizeof(junk) - 1) != -1; strbuf_release(&buf); return ret; @@ -2459,7 +2469,7 @@ void create_reference_database(const char *initial_branch, int quiet) { struct strbuf err = STRBUF_INIT; char *to_free = NULL; - int reinit = is_reinit(); + int reinit = is_reinit(the_repository); if (ref_store_create_on_disk(get_main_ref_store(the_repository), 0, &err)) die("failed to set up refs db: %s", err.buf); @@ -2493,7 +2503,8 @@ void create_reference_database(const char *initial_branch, int quiet) free(to_free); } -static int create_default_files(const char *template_path, +static int create_default_files(struct repository *repo, + const char *template_path, const char *original_git_dir, const struct repository_format *fmt, int init_shared_repository) @@ -2502,7 +2513,7 @@ static int create_default_files(const char *template_path, struct strbuf path = STRBUF_INIT; int reinit; int filemode; - const char *work_tree = repo_get_work_tree(the_repository); + const char *work_tree = repo_get_work_tree(repo); /* * First copy the templates -- we might have the default @@ -2513,19 +2524,19 @@ static int create_default_files(const char *template_path, * values (since we've just potentially changed what's available on * disk). */ - copy_templates(template_path); - repo_config_clear(the_repository); - repo_settings_reset_shared_repository(the_repository); - repo_config(the_repository, git_default_config, NULL); + copy_templates(repo, template_path); + repo_config_clear(repo); + repo_settings_reset_shared_repository(repo); + repo_config(repo, git_default_config, NULL); - reinit = is_reinit(); + reinit = is_reinit(repo); /* * We must make sure command-line options continue to override any * values we might have just re-read from the config. */ if (init_shared_repository != -1) - repo_settings_set_shared_repository(the_repository, + repo_settings_set_shared_repository(repo, init_shared_repository); is_bare_repository_cfg = !work_tree; @@ -2534,14 +2545,14 @@ static int create_default_files(const char *template_path, * We would have created the above under user's umask -- under * shared-repository settings, we would need to fix them up. */ - if (repo_settings_get_shared_repository(the_repository)) { - adjust_shared_perm(the_repository, repo_get_git_dir(the_repository)); + if (repo_settings_get_shared_repository(repo)) { + adjust_shared_perm(repo, repo_get_git_dir(repo)); } initialize_repository_version(fmt->hash_algo, fmt->ref_storage_format, reinit); /* Check filemode trustability */ - repo_git_path_replace(the_repository, &path, "config"); + repo_git_path_replace(repo, &path, "config"); filemode = TEST_FILEMODE; if (TEST_FILEMODE && !lstat(path.buf, &st1)) { struct stat st2; @@ -2552,22 +2563,22 @@ static int create_default_files(const char *template_path, if (filemode && !reinit && (st1.st_mode & S_IXUSR)) filemode = 0; } - repo_config_set(the_repository, "core.filemode", filemode ? "true" : "false"); + repo_config_set(repo, "core.filemode", filemode ? "true" : "false"); if (is_bare_repository()) - repo_config_set(the_repository, "core.bare", "true"); + repo_config_set(repo, "core.bare", "true"); else { - repo_config_set(the_repository, "core.bare", "false"); + repo_config_set(repo, "core.bare", "false"); /* allow template config file to override the default */ - if (repo_settings_get_log_all_ref_updates(the_repository) == LOG_REFS_UNSET) - repo_config_set(the_repository, "core.logallrefupdates", "true"); + if (repo_settings_get_log_all_ref_updates(repo) == LOG_REFS_UNSET) + repo_config_set(repo, "core.logallrefupdates", "true"); if (needs_work_tree_config(original_git_dir, work_tree)) - repo_config_set(the_repository, "core.worktree", work_tree); + repo_config_set(repo, "core.worktree", work_tree); } if (!reinit) { /* Check if symlink is supported in the work tree */ - repo_git_path_replace(the_repository, &path, "tXXXXXX"); + repo_git_path_replace(repo, &path, "tXXXXXX"); if (!close(xmkstemp(path.buf)) && !unlink(path.buf) && !symlink("testing", path.buf) && @@ -2575,12 +2586,12 @@ static int create_default_files(const char *template_path, S_ISLNK(st1.st_mode)) unlink(path.buf); /* good */ else - repo_config_set(the_repository, "core.symlinks", "false"); + repo_config_set(repo, "core.symlinks", "false"); /* Check if the filesystem is case-insensitive */ - repo_git_path_replace(the_repository, &path, "CoNfIg"); + repo_git_path_replace(repo, &path, "CoNfIg"); if (!access(path.buf, F_OK)) - repo_config_set(the_repository, "core.ignorecase", "true"); + repo_config_set(repo, "core.ignorecase", "true"); probe_utf8_pathname_composition(); } @@ -2588,23 +2599,23 @@ static int create_default_files(const char *template_path, return reinit; } -static void create_object_directory(void) +static void create_object_directory(struct repository *repo) { struct strbuf path = STRBUF_INIT; size_t baselen; - strbuf_addstr(&path, repo_get_object_directory(the_repository)); + strbuf_addstr(&path, repo_get_object_directory(repo)); baselen = path.len; - safe_create_dir(the_repository, path.buf, 1); + safe_create_dir(repo, path.buf, 1); strbuf_setlen(&path, baselen); strbuf_addstr(&path, "/pack"); - safe_create_dir(the_repository, path.buf, 1); + safe_create_dir(repo, path.buf, 1); strbuf_setlen(&path, baselen); strbuf_addstr(&path, "/info"); - safe_create_dir(the_repository, path.buf, 1); + safe_create_dir(repo, path.buf, 1); strbuf_release(&path); } @@ -2682,7 +2693,8 @@ static int read_default_format_config(const char *key, const char *value, return ret; } -static void repository_format_configure(struct repository_format *repo_fmt, +static void repository_format_configure(struct repository *repo, + struct repository_format *repo_fmt, int hash, enum ref_storage_format ref_format) { struct default_format_config cfg = { @@ -2719,7 +2731,7 @@ static void repository_format_configure(struct repository_format *repo_fmt, } else if (cfg.hash != GIT_HASH_UNKNOWN) { repo_fmt->hash_algo = cfg.hash; } - repo_set_hash_algo(the_repository, repo_fmt->hash_algo); + repo_set_hash_algo(repo, repo_fmt->hash_algo); env = getenv("GIT_DEFAULT_REF_FORMAT"); if (repo_fmt->version >= 0 && @@ -2758,7 +2770,7 @@ static void repository_format_configure(struct repository_format *repo_fmt, free(backend); } - repo_set_ref_storage_format(the_repository, repo_fmt->ref_storage_format, + repo_set_ref_storage_format(repo, repo_fmt->ref_storage_format, repo_fmt->ref_storage_payload); } @@ -2782,12 +2794,12 @@ int init_db(const char *git_dir, const char *real_git_dir, if (!exist_ok && !stat(real_git_dir, &st)) die(_("%s already exists"), real_git_dir); - set_git_dir(real_git_dir, 1); + set_git_dir(the_repository, real_git_dir, 1); git_dir = repo_get_git_dir(the_repository); separate_git_dir(git_dir, original_git_dir); } else { - set_git_dir(git_dir, 1); + set_git_dir(the_repository, git_dir, 1); git_dir = repo_get_git_dir(the_repository); } startup_info->have_repository = 1; @@ -2800,7 +2812,7 @@ int init_db(const char *git_dir, const char *real_git_dir, */ check_repository_format(&repo_fmt); - repository_format_configure(&repo_fmt, hash, ref_storage_format); + repository_format_configure(the_repository, &repo_fmt, hash, ref_storage_format); /* * Ensure `core.hidedotfiles` is processed. This must happen after we @@ -2811,12 +2823,12 @@ int init_db(const char *git_dir, const char *real_git_dir, safe_create_dir(the_repository, git_dir, 0); - reinit = create_default_files(template_dir, original_git_dir, + reinit = create_default_files(the_repository, template_dir, original_git_dir, &repo_fmt, init_shared_repository); if (!(flags & INIT_DB_SKIP_REFDB)) create_reference_database(initial_branch, flags & INIT_DB_QUIET); - create_object_directory(); + create_object_directory(the_repository); if (repo_settings_get_shared_repository(the_repository)) { char buf[10]; From ce70cbc294f2f1f9a853307d35a78baa16207e58 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:06 +0200 Subject: [PATCH 52/93] setup: stop using `the_repository` in `is_inside_git_dir()` The function `is_inside_git_dir()` verifies whether or not the current working directory is located inside the gitdir of `the_repository`. This is done by taking the gitdir path and verifying that it's a prefix of the current working directory. This information is cached so that we don't have to re-do this change multiple times. Furthermore, we proactively set the value in multiple locations so that we don't even have to perform the check when we have discovered the repository. While we could simply move the caching variable into the repository, the current layout doesn't really feel sensible in the first place: - It can easily lead to false positives or negatives if at any point in time we may switch the current working directory. - We don't call the function in a hot loop, and neither is it overly expensive to compute. Drop the caching infrastructure and instead compute the property ad-hoc via an injected repository. Note that there is one small gotcha: we often end up with relative gitdir paths, and if so `is_inside_dir()` might fail. This wasn't an issue before because of how we proactively set the cached value during repository discovery. Now that we stop doing that it becomes a problem though, which we work around by resolving the gitdir via `realpath()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/rev-parse.c | 2 +- setup.c | 14 ++++++-------- setup.h | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 218b5f34d6a893..a216be63cf7cec 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -1063,7 +1063,7 @@ int cmd_rev_parse(int argc, continue; } if (!strcmp(arg, "--is-inside-git-dir")) { - printf("%s\n", is_inside_git_dir() ? "true" + printf("%s\n", is_inside_git_dir(the_repository) ? "true" : "false"); continue; } diff --git a/setup.c b/setup.c index ba2898473a58a3..80f3ba0d621f44 100644 --- a/setup.c +++ b/setup.c @@ -26,7 +26,6 @@ #include "trace2.h" #include "worktree.h" -static int inside_git_dir = -1; static int inside_work_tree = -1; static int work_tree_config_is_bogus; enum allowed_bare_repo { @@ -299,7 +298,7 @@ void verify_filename(const char *prefix, */ void verify_non_filename(const char *prefix, const char *arg) { - if (!is_inside_work_tree() || is_inside_git_dir()) + if (!is_inside_work_tree() || is_inside_git_dir(the_repository)) return; if (*arg == '-') return; /* flag */ @@ -470,11 +469,12 @@ int is_nonbare_repository_dir(struct strbuf *path) return ret; } -int is_inside_git_dir(void) +int is_inside_git_dir(struct repository *repo) { - if (inside_git_dir < 0) - inside_git_dir = is_inside_dir(repo_get_git_dir(the_repository)); - return inside_git_dir; + struct strbuf buf = STRBUF_INIT; + int ret = is_inside_dir(strbuf_realpath(&buf, repo_get_git_dir(repo), 1)); + strbuf_release(&buf); + return ret; } int is_inside_work_tree(void) @@ -1251,7 +1251,6 @@ static const char *setup_discovered_git_dir(struct repository *repo, set_git_work_tree("."); if (strcmp(gitdir, DEFAULT_GIT_DIR_ENVIRONMENT)) set_git_dir(repo, gitdir, 0); - inside_git_dir = 0; inside_work_tree = 1; if (offset >= cwd->len) return NULL; @@ -1287,7 +1286,6 @@ static const char *setup_bare_git_dir(struct repository *repo, return setup_explicit_git_dir(repo, gitdir, cwd, repo_fmt, nongit_ok); } - inside_git_dir = 1; inside_work_tree = 0; if (offset != cwd->len) { if (chdir(cwd->buf)) diff --git a/setup.h b/setup.h index 80bc6e5f078af8..115bda647c6e94 100644 --- a/setup.h +++ b/setup.h @@ -4,7 +4,7 @@ #include "refs.h" #include "string-list.h" -int is_inside_git_dir(void); +int is_inside_git_dir(struct repository *repo); int is_inside_work_tree(void); int get_common_dir_noenv(struct strbuf *sb, const char *gitdir); int get_common_dir(struct strbuf *sb, const char *gitdir); From 8da5ecdb4d72594ee126e949bfe813c0f89fe692 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:07 +0200 Subject: [PATCH 53/93] setup: stop using `the_repository` in `is_inside_work_tree()` Similar as with the preceding commit, `is_inside_work_tree()` determines whether the current working directory is located inside the worktree of `the_repository`. Perform the same refactoring by dropping the caching mechanism and injecting the repository that shall be checked. Note that, same as in the preceding commit, we're also resolving the worktree path via `realpath()`. In theory this step is not necessary as we always set the worktree path via `repo_set_worktree()`, and that function already resolves the path for us. But resolving the path a second time is unlikely to matter performance-wise, and it feels fragile to rely on the repository's worktree path being absolute. We thus perform the same extra step even though it's ultimately not required. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 2 +- builtin/rev-parse.c | 4 ++-- object-name.c | 2 +- setup.c | 25 ++++++++++++++----------- setup.h | 2 +- submodule.c | 2 +- 6 files changed, 20 insertions(+), 17 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index b148607f7a1468..09d95111b35b9f 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -703,7 +703,7 @@ int cmd_ls_files(int argc, if (dir.exclude_per_dir) exc_given = 1; - if (require_work_tree && !is_inside_work_tree()) + if (require_work_tree && !is_inside_work_tree(repo)) setup_work_tree(); if (recurse_submodules && diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index a216be63cf7cec..2fcd6851d18397 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -1006,7 +1006,7 @@ int cmd_rev_parse(int argc, } if (!strcmp(arg, "--show-cdup")) { const char *pfx = prefix; - if (!is_inside_work_tree()) { + if (!is_inside_work_tree(the_repository)) { const char *work_tree = repo_get_work_tree(the_repository); if (work_tree) @@ -1068,7 +1068,7 @@ int cmd_rev_parse(int argc, continue; } if (!strcmp(arg, "--is-inside-work-tree")) { - printf("%s\n", is_inside_work_tree() ? "true" + printf("%s\n", is_inside_work_tree(the_repository) ? "true" : "false"); continue; } diff --git a/object-name.c b/object-name.c index 21dcdc4a0e7c55..37a9ce8e872572 100644 --- a/object-name.c +++ b/object-name.c @@ -1703,7 +1703,7 @@ static char *resolve_relative_path(struct repository *r, const char *rel) if (!starts_with(rel, "./") && !starts_with(rel, "../")) return NULL; - if (r != the_repository || !is_inside_work_tree()) + if (r != the_repository || !is_inside_work_tree(the_repository)) die(_("relative path syntax can't be used outside working tree")); /* die() inside prefix_path() if resolved path is outside worktree */ diff --git a/setup.c b/setup.c index 80f3ba0d621f44..041e08b98ddf89 100644 --- a/setup.c +++ b/setup.c @@ -26,7 +26,6 @@ #include "trace2.h" #include "worktree.h" -static int inside_work_tree = -1; static int work_tree_config_is_bogus; enum allowed_bare_repo { ALLOWED_BARE_REPO_EXPLICIT = 0, @@ -298,7 +297,7 @@ void verify_filename(const char *prefix, */ void verify_non_filename(const char *prefix, const char *arg) { - if (!is_inside_work_tree() || is_inside_git_dir(the_repository)) + if (!is_inside_work_tree(the_repository) || is_inside_git_dir(the_repository)) return; if (*arg == '-') return; /* flag */ @@ -477,11 +476,20 @@ int is_inside_git_dir(struct repository *repo) return ret; } -int is_inside_work_tree(void) +int is_inside_work_tree(struct repository *repo) { - if (inside_work_tree < 0) - inside_work_tree = is_inside_dir(repo_get_work_tree(the_repository)); - return inside_work_tree; + struct strbuf buf = STRBUF_INIT; + const char *worktree; + int ret; + + worktree = repo_get_work_tree(repo); + if (!worktree) + return 0; + + ret = is_inside_dir(strbuf_realpath(&buf, worktree, 1)); + + strbuf_release(&buf); + return ret; } void setup_work_tree(void) @@ -798,13 +806,10 @@ static int check_repository_format_gently(struct repository *repo, if (!has_common) { if (candidate->is_bare != -1) { is_bare_repository_cfg = candidate->is_bare; - if (is_bare_repository_cfg == 1) - inside_work_tree = -1; } if (candidate->work_tree) { free(git_work_tree_cfg); git_work_tree_cfg = xstrdup(candidate->work_tree); - inside_work_tree = -1; } } @@ -1251,7 +1256,6 @@ static const char *setup_discovered_git_dir(struct repository *repo, set_git_work_tree("."); if (strcmp(gitdir, DEFAULT_GIT_DIR_ENVIRONMENT)) set_git_dir(repo, gitdir, 0); - inside_work_tree = 1; if (offset >= cwd->len) return NULL; @@ -1286,7 +1290,6 @@ static const char *setup_bare_git_dir(struct repository *repo, return setup_explicit_git_dir(repo, gitdir, cwd, repo_fmt, nongit_ok); } - inside_work_tree = 0; if (offset != cwd->len) { if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); diff --git a/setup.h b/setup.h index 115bda647c6e94..71d3f918837873 100644 --- a/setup.h +++ b/setup.h @@ -5,7 +5,7 @@ #include "string-list.h" int is_inside_git_dir(struct repository *repo); -int is_inside_work_tree(void); +int is_inside_work_tree(struct repository *repo); int get_common_dir_noenv(struct strbuf *sb, const char *gitdir); int get_common_dir(struct strbuf *sb, const char *gitdir); diff --git a/submodule.c b/submodule.c index b1a0363f9d2a96..a939ff5072726f 100644 --- a/submodule.c +++ b/submodule.c @@ -2620,7 +2620,7 @@ int get_superproject_working_tree(struct strbuf *buf) int code; ssize_t len; - if (!is_inside_work_tree()) + if (!is_inside_work_tree(the_repository)) /* * FIXME: * We might have a superproject, but it is harder From 2c46e933fa1c2f4ea7e49a26d5dcabaadcfcecb6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:08 +0200 Subject: [PATCH 54/93] setup: stop using `the_repository` in `prefix_path()` Stop using `the_repository` in `prefix_path()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/blame.c | 2 +- builtin/check-attr.c | 2 +- builtin/checkout-index.c | 4 ++-- builtin/mv.c | 5 +++-- builtin/sparse-checkout.c | 3 ++- builtin/update-index.c | 6 +++--- line-log.c | 2 +- object-name.c | 2 +- pathspec.c | 2 +- setup.c | 15 ++++++++------- setup.h | 4 ++-- t/helper/test-path-utils.c | 2 +- 12 files changed, 26 insertions(+), 23 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index f3a11eff44ffc7..ffbd3ce5c5a2e3 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -708,7 +708,7 @@ static unsigned parse_score(const char *arg) static char *add_prefix(const char *prefix, const char *path) { - return prefix_path(prefix, prefix ? strlen(prefix) : 0, path); + return prefix_path(the_repository, prefix, prefix ? strlen(prefix) : 0, path); } static int git_blame_config(const char *var, const char *value, diff --git a/builtin/check-attr.c b/builtin/check-attr.c index 51ed48ce4370c3..04b86e42ae66ca 100644 --- a/builtin/check-attr.c +++ b/builtin/check-attr.c @@ -67,7 +67,7 @@ static void check_attr(const char *prefix, struct attr_check *check, { char *full_path = - prefix_path(prefix, prefix ? strlen(prefix) : 0, file); + prefix_path(the_repository, prefix, prefix ? strlen(prefix) : 0, file); if (collect_all) { git_all_attrs(the_repository->index, full_path, check); diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c index 188128aebd9bc0..311b94ff3174a6 100644 --- a/builtin/checkout-index.c +++ b/builtin/checkout-index.c @@ -303,7 +303,7 @@ int cmd_checkout_index(int argc, die("git checkout-index: don't mix '--all' and explicit filenames"); if (read_from_stdin) die("git checkout-index: don't mix '--stdin' and explicit filenames"); - p = prefix_path(prefix, prefix_length, arg); + p = prefix_path(repo, prefix, prefix_length, arg); err |= checkout_file(repo->index, p, prefix); free(p); } @@ -325,7 +325,7 @@ int cmd_checkout_index(int argc, die("line is badly quoted"); strbuf_swap(&buf, &unquoted); } - p = prefix_path(prefix, prefix_length, buf.buf); + p = prefix_path(repo, prefix, prefix_length, buf.buf); err |= checkout_file(repo->index, p, prefix); free(p); } diff --git a/builtin/mv.c b/builtin/mv.c index 2215d34e31f29a..948b3306390337 100644 --- a/builtin/mv.c +++ b/builtin/mv.c @@ -71,7 +71,7 @@ static void internal_prefix_pathspec(struct strvec *out, trimmed = xmemdupz(pathspec[i], to_copy); maybe_basename = (flags & DUP_BASENAME) ? basename(trimmed) : trimmed; - prefixed_path = prefix_path(prefix, prefixlen, maybe_basename); + prefixed_path = prefix_path(the_repository, prefix, prefixlen, maybe_basename); strvec_push(out, prefixed_path); free(prefixed_path); @@ -394,7 +394,8 @@ int cmd_mv(int argc, for (j = 0; j < last - first; j++) { const struct cache_entry *ce = the_repository->index->cache[first + j]; const char *path = ce->name; - char *prefixed_path = prefix_path(dst_with_slash, dst_with_slash_len, path + length + 1); + char *prefixed_path = prefix_path(the_repository, dst_with_slash, + dst_with_slash_len, path + length + 1); strvec_push(&sources, path); strvec_push(&destinations, prefixed_path); diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index f4aa405da93760..2af50fb2f9cb22 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -735,7 +735,8 @@ static void sanitize_paths(struct repository *repo, int prefix_len = strlen(prefix); for (i = 0; i < args->nr; i++) { - char *prefixed_path = prefix_path(prefix, prefix_len, args->v[i]); + char *prefixed_path = prefix_path(the_repository, prefix, + prefix_len, args->v[i]); strvec_replace(args, i, prefixed_path); free(prefixed_path); } diff --git a/builtin/update-index.c b/builtin/update-index.c index 8a5907767bf297..7434112b8e69b0 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -655,7 +655,7 @@ static int do_unresolve(int ac, const char **av, for (i = 1; i < ac; i++) { const char *arg = av[i]; - char *p = prefix_path(prefix, prefix_length, arg); + char *p = prefix_path(the_repository, prefix, prefix_length, arg); err |= unresolve_one(p); free(p); } @@ -1158,7 +1158,7 @@ int cmd_update_index(int argc, } setup_work_tree(); - p = prefix_path(prefix, prefix_length, path); + p = prefix_path(the_repository, prefix, prefix_length, path); update_one(p); if (set_executable_bit) chmod_path(set_executable_bit, p); @@ -1208,7 +1208,7 @@ int cmd_update_index(int argc, die("line is badly quoted"); strbuf_swap(&buf, &unquoted); } - p = prefix_path(prefix, prefix_length, buf.buf); + p = prefix_path(the_repository, prefix, prefix_length, buf.buf); update_one(p); if (set_executable_bit) chmod_path(set_executable_bit, p); diff --git a/line-log.c b/line-log.c index 858a899cd2a61d..346c60c554b278 100644 --- a/line-log.c +++ b/line-log.c @@ -589,7 +589,7 @@ parse_lines(struct repository *r, struct commit *commit, range_part = xstrndup(item->string, name_part - item->string); name_part++; - full_name = prefix_path(prefix, prefix ? strlen(prefix) : 0, + full_name = prefix_path(r, prefix, prefix ? strlen(prefix) : 0, name_part); spec = alloc_filespec(full_name); diff --git a/object-name.c b/object-name.c index 37a9ce8e872572..9ac86f19c77bbd 100644 --- a/object-name.c +++ b/object-name.c @@ -1707,7 +1707,7 @@ static char *resolve_relative_path(struct repository *r, const char *rel) die(_("relative path syntax can't be used outside working tree")); /* die() inside prefix_path() if resolved path is outside worktree */ - return prefix_path(startup_info->prefix, + return prefix_path(the_repository, startup_info->prefix, startup_info->prefix ? strlen(startup_info->prefix) : 0, rel); } diff --git a/pathspec.c b/pathspec.c index 5993c4afa0eb37..f78b22709ccb67 100644 --- a/pathspec.c +++ b/pathspec.c @@ -486,7 +486,7 @@ static void init_pathspec_item(struct pathspec_item *item, unsigned flags, match = xstrdup(copyfrom); prefixlen = 0; } else { - match = prefix_path_gently(prefix, prefixlen, + match = prefix_path_gently(the_repository, prefix, prefixlen, &prefixlen, copyfrom); if (!match) { const char *hint_path; diff --git a/setup.c b/setup.c index 041e08b98ddf89..adad6ceec0cfa7 100644 --- a/setup.c +++ b/setup.c @@ -117,7 +117,8 @@ static int abspath_part_inside_repo(struct repository *repo, char *path) * ../../sub1/sub2/foo -> sub1/sub2/foo (but no remaining prefix) * `pwd`/../bar -> sub1/bar (no remaining prefix) */ -char *prefix_path_gently(const char *prefix, int len, +char *prefix_path_gently(struct repository *repo, + const char *prefix, int len, int *remaining_prefix, const char *path) { const char *orig = path; @@ -130,7 +131,7 @@ char *prefix_path_gently(const char *prefix, int len, free(sanitized); return NULL; } - if (abspath_part_inside_repo(the_repository, sanitized)) { + if (abspath_part_inside_repo(repo, sanitized)) { free(sanitized); return NULL; } @@ -146,13 +147,13 @@ char *prefix_path_gently(const char *prefix, int len, return sanitized; } -char *prefix_path(const char *prefix, int len, const char *path) +char *prefix_path(struct repository *repo, const char *prefix, int len, const char *path) { - char *r = prefix_path_gently(prefix, len, NULL, path); + char *r = prefix_path_gently(repo, prefix, len, NULL, path); if (!r) { - const char *hint_path = repo_get_work_tree(the_repository); + const char *hint_path = repo_get_work_tree(repo); if (!hint_path) - hint_path = repo_get_git_dir(the_repository); + hint_path = repo_get_git_dir(repo); die(_("'%s' is outside repository at '%s'"), path, absolute_path(hint_path)); } @@ -162,7 +163,7 @@ char *prefix_path(const char *prefix, int len, const char *path) int path_inside_repo(const char *prefix, const char *path) { int len = prefix ? strlen(prefix) : 0; - char *r = prefix_path_gently(prefix, len, NULL, path); + char *r = prefix_path_gently(the_repository, prefix, len, NULL, path); if (r) { free(r); return 1; diff --git a/setup.h b/setup.h index 71d3f918837873..24034572b150c7 100644 --- a/setup.h +++ b/setup.h @@ -138,8 +138,8 @@ const char *enter_repo(const char *path, unsigned flags); const char *setup_git_directory_gently(int *); const char *setup_git_directory(void); -char *prefix_path(const char *prefix, int len, const char *path); -char *prefix_path_gently(const char *prefix, int len, int *remaining, const char *path); +char *prefix_path(struct repository *repo, const char *prefix, int len, const char *path); +char *prefix_path_gently(struct repository *repo, const char *prefix, int len, int *remaining, const char *path); int check_filename(const char *prefix, const char *name); void verify_filename(const char *prefix, diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 874542ec3462a5..163fdeefb0f7d9 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -379,7 +379,7 @@ int cmd__path_utils(int argc, const char **argv) int nongit_ok; setup_git_directory_gently(&nongit_ok); while (argc > 3) { - char *pfx = prefix_path(prefix, prefix_len, argv[3]); + char *pfx = prefix_path(the_repository, prefix, prefix_len, argv[3]); puts(pfx); free(pfx); From e6a380201e841b4e4aa0a7b9c1b65330cc90377f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:09 +0200 Subject: [PATCH 55/93] setup: stop using `the_repository` in `path_inside_repo()` Stop using `the_repository` in `path_inside_repo()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/diff.c | 4 ++-- setup.c | 4 ++-- setup.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/builtin/diff.c b/builtin/diff.c index 0b23c41456837f..7ddebce2ac7cec 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -471,8 +471,8 @@ int cmd_diff(int argc, * as a colourful "diff" replacement. */ if (nongit || ((argc == i + 2) && - (!path_inside_repo(prefix, argv[i]) || - !path_inside_repo(prefix, argv[i + 1])))) + (!path_inside_repo(the_repository, prefix, argv[i]) || + !path_inside_repo(the_repository, prefix, argv[i + 1])))) no_index = DIFF_NO_INDEX_IMPLICIT; } diff --git a/setup.c b/setup.c index adad6ceec0cfa7..4ef6216e829d3f 100644 --- a/setup.c +++ b/setup.c @@ -160,10 +160,10 @@ char *prefix_path(struct repository *repo, const char *prefix, int len, const ch return r; } -int path_inside_repo(const char *prefix, const char *path) +int path_inside_repo(struct repository *repo, const char *prefix, const char *path) { int len = prefix ? strlen(prefix) : 0; - char *r = prefix_path_gently(the_repository, prefix, len, NULL, path); + char *r = prefix_path_gently(repo, prefix, len, NULL, path); if (r) { free(r); return 1; diff --git a/setup.h b/setup.h index 24034572b150c7..c3247d7fc8d4b7 100644 --- a/setup.h +++ b/setup.h @@ -146,7 +146,7 @@ void verify_filename(const char *prefix, const char *name, int diagnose_misspelt_rev); void verify_non_filename(const char *prefix, const char *name); -int path_inside_repo(const char *prefix, const char *path); +int path_inside_repo(struct repository *repo, const char *prefix, const char *path); void sanitize_stdfds(void); int daemonize(void); From 6e7e50cc7b9beab495c579249ba411a348bbdca4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:10 +0200 Subject: [PATCH 56/93] setup: stop using `the_repository` in `verify_filename()` Stop using `the_repository` in `verify_filename()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/grep.c | 2 +- builtin/reset.c | 2 +- builtin/rev-parse.c | 4 ++-- revision.c | 2 +- setup.c | 5 +++-- setup.h | 3 ++- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index e33285e5e69289..b0e350cf8938fb 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1163,7 +1163,7 @@ int cmd_grep(int argc, if (!seen_dashdash) { int j; for (j = i; j < argc; j++) - verify_filename(prefix, argv[j], j == i && allow_revs); + verify_filename(the_repository, prefix, argv[j], j == i && allow_revs); } parse_pathspec(&pathspec, 0, diff --git a/builtin/reset.c b/builtin/reset.c index 3590be57a5f03c..1ac374d31b9a5e 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -285,7 +285,7 @@ static void parse_args(struct pathspec *pathspec, rev = *argv++; } else { /* Otherwise we treat this as a filename */ - verify_filename(prefix, argv[0], 1); + verify_filename(the_repository, prefix, argv[0], 1); } } diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 2fcd6851d18397..8fdb75413d33a6 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -749,7 +749,7 @@ int cmd_rev_parse(int argc, if (as_is) { if (show_file(arg, output_prefix) && as_is < 2) - verify_filename(prefix, arg, 0); + verify_filename(the_repository, prefix, arg, 0); continue; } @@ -1173,7 +1173,7 @@ int cmd_rev_parse(int argc, as_is = 1; if (!show_file(arg, output_prefix)) continue; - verify_filename(prefix, arg, 1); + verify_filename(the_repository, prefix, arg, 1); } strbuf_release(&buf); if (verify) { diff --git a/revision.c b/revision.c index 599b3a66c369ca..5d5324437969e9 100644 --- a/revision.c +++ b/revision.c @@ -3067,7 +3067,7 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s * but the latter we have checked in the main loop. */ for (j = i; j < argc; j++) - verify_filename(revs->prefix, argv[j], j == i); + verify_filename(the_repository, revs->prefix, argv[j], j == i); strvec_pushv(&prune_data, argv + i); break; diff --git a/setup.c b/setup.c index 4ef6216e829d3f..e673663cabb582 100644 --- a/setup.c +++ b/setup.c @@ -280,7 +280,8 @@ static int looks_like_pathspec(const char *arg) * diagnose_misspelt_rev == 0 for the next ones (because we already * saw a filename, there's not ambiguity anymore). */ -void verify_filename(const char *prefix, +void verify_filename(struct repository *repo, + const char *prefix, const char *arg, int diagnose_misspelt_rev) { @@ -288,7 +289,7 @@ void verify_filename(const char *prefix, die(_("option '%s' must come before non-option arguments"), arg); if (looks_like_pathspec(arg) || check_filename(prefix, arg)) return; - die_verify_filename(the_repository, prefix, arg, diagnose_misspelt_rev); + die_verify_filename(repo, prefix, arg, diagnose_misspelt_rev); } /* diff --git a/setup.h b/setup.h index c3247d7fc8d4b7..24a6f666294b71 100644 --- a/setup.h +++ b/setup.h @@ -142,7 +142,8 @@ char *prefix_path(struct repository *repo, const char *prefix, int len, const ch char *prefix_path_gently(struct repository *repo, const char *prefix, int len, int *remaining, const char *path); int check_filename(const char *prefix, const char *name); -void verify_filename(const char *prefix, +void verify_filename(struct repository *repo, + const char *prefix, const char *name, int diagnose_misspelt_rev); void verify_non_filename(const char *prefix, const char *name); From 920dba458188c41b4c2d354101c662bfedf6fe02 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:11 +0200 Subject: [PATCH 57/93] setup: stop using `the_repository` in `verify_non_filename()` Stop using `the_repository` in `verify_non_filename()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/checkout.c | 2 +- builtin/grep.c | 2 +- builtin/reset.c | 2 +- revision.c | 4 ++-- setup.c | 4 ++-- setup.h | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index 1345e8574a79c8..f82adcb740287e 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -1484,7 +1484,7 @@ static int parse_branchname_arg(int argc, const char **argv, * it would be extremely annoying. */ if (argc) - verify_non_filename(opts->prefix, arg); + verify_non_filename(the_repository, opts->prefix, arg); } else if (opts->accept_pathspec) { argcount++; argv++; diff --git a/builtin/grep.c b/builtin/grep.c index b0e350cf8938fb..4ec0c016b1f11c 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1151,7 +1151,7 @@ int cmd_grep(int argc, object = parse_object_or_die(the_repository, &oid, arg); if (!seen_dashdash) - verify_non_filename(prefix, arg); + verify_non_filename(the_repository, prefix, arg); add_object_array_with_path(object, arg, &list, oc.mode, oc.path); object_context_release(&oc); } diff --git a/builtin/reset.c b/builtin/reset.c index 1ac374d31b9a5e..11f57605b510ae 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -281,7 +281,7 @@ static void parse_args(struct pathspec *pathspec, * Ok, argv[0] looks like a commit/tree; it should not * be a filename. */ - verify_non_filename(prefix, argv[0]); + verify_non_filename(the_repository, prefix, argv[0]); rev = *argv++; } else { /* Otherwise we treat this as a filename */ diff --git a/revision.c b/revision.c index 5d5324437969e9..b5fe3ef95d7e6b 100644 --- a/revision.c +++ b/revision.c @@ -2072,7 +2072,7 @@ static int handle_dotdot_1(const char *a_name, const char *b_name, return -1; if (!cant_be_filename) { - verify_non_filename(revs->prefix, full_name); + verify_non_filename(the_repository, revs->prefix, full_name); } a_obj = parse_object(revs->repo, &a_oid); @@ -2225,7 +2225,7 @@ static int handle_revision_arg_1(const char *arg_, struct rev_info *revs, int fl goto out; } if (!cant_be_filename) - verify_non_filename(revs->prefix, arg); + verify_non_filename(the_repository, revs->prefix, arg); object = get_reference(revs, arg, &oid, flags ^ local_flags); if (!object) { ret = (revs->ignore_missing || revs->do_not_die_on_missing_objects) ? 0 : -1; diff --git a/setup.c b/setup.c index e673663cabb582..759aba4e2c9fe5 100644 --- a/setup.c +++ b/setup.c @@ -297,9 +297,9 @@ void verify_filename(struct repository *repo, * and we parsed the arg as a refname. It should not be interpretable * as a filename. */ -void verify_non_filename(const char *prefix, const char *arg) +void verify_non_filename(struct repository *repo, const char *prefix, const char *arg) { - if (!is_inside_work_tree(the_repository) || is_inside_git_dir(the_repository)) + if (!is_inside_work_tree(repo) || is_inside_git_dir(repo)) return; if (*arg == '-') return; /* flag */ diff --git a/setup.h b/setup.h index 24a6f666294b71..364c2c728a69d6 100644 --- a/setup.h +++ b/setup.h @@ -146,7 +146,7 @@ void verify_filename(struct repository *repo, const char *prefix, const char *name, int diagnose_misspelt_rev); -void verify_non_filename(const char *prefix, const char *name); +void verify_non_filename(struct repository *repo, const char *prefix, const char *name); int path_inside_repo(struct repository *repo, const char *prefix, const char *path); void sanitize_stdfds(void); From ea1d0f886da89edb28a0a64f19e7f4a67a50c6ef Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:12 +0200 Subject: [PATCH 58/93] setup: stop using `the_repository` in `enter_repo()` Stop using `the_repository` in `enter_repo()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 2 +- builtin/upload-archive.c | 2 +- builtin/upload-pack.c | 2 +- daemon.c | 4 ++-- http-backend.c | 2 +- setup.c | 4 ++-- setup.h | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index f0771590a73ef0..322d178c926452 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2643,7 +2643,7 @@ int cmd_receive_pack(int argc, setup_path(); - if (!enter_repo(service_dir, 0)) + if (!enter_repo(the_repository, service_dir, 0)) die("'%s' does not appear to be a git repository", service_dir); repo_config(the_repository, receive_pack_config, NULL); diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c index 25312bb2a52887..718e74b3acf85d 100644 --- a/builtin/upload-archive.c +++ b/builtin/upload-archive.c @@ -31,7 +31,7 @@ int cmd_upload_archive_writer(int argc, if (argc != 2) usage(upload_archive_usage); - if (!enter_repo(argv[1], 0)) + if (!enter_repo(the_repository, argv[1], 0)) die("'%s' does not appear to be a git repository", argv[1]); init_archivers(); diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c index 30498fafea3a8b..32831fb8796acc 100644 --- a/builtin/upload-pack.c +++ b/builtin/upload-pack.c @@ -59,7 +59,7 @@ int cmd_upload_pack(int argc, if (strict) enter_repo_flags |= ENTER_REPO_STRICT; - if (!enter_repo(dir, enter_repo_flags)) + if (!enter_repo(the_repository, dir, enter_repo_flags)) die("'%s' does not appear to be a git repository", dir); switch (determine_protocol_version_server()) { diff --git a/daemon.c b/daemon.c index 0a7b1aae447912..947dd906554963 100644 --- a/daemon.c +++ b/daemon.c @@ -244,14 +244,14 @@ static const char *path_ok(const char *directory, struct hostinfo *hi) } enter_repo_flags = strict_paths ? ENTER_REPO_STRICT : 0; - path = enter_repo(dir, enter_repo_flags); + path = enter_repo(the_repository, dir, enter_repo_flags); if (!path && base_path && base_path_relaxed) { /* * if we fail and base_path_relaxed is enabled, try without * prefixing the base path */ dir = directory; - path = enter_repo(dir, enter_repo_flags); + path = enter_repo(the_repository, dir, enter_repo_flags); } if (!path) { diff --git a/http-backend.c b/http-backend.c index 1a171c5c5a0b02..c7566b1d12d35f 100644 --- a/http-backend.c +++ b/http-backend.c @@ -809,7 +809,7 @@ int cmd_main(int argc UNUSED, const char **argv UNUSED) not_found(&hdr, "Request not supported: '%s'", dir); setup_path(); - if (!enter_repo(dir, 0)) + if (!enter_repo(the_repository, dir, 0)) not_found(&hdr, "Not a git repository: '%s'", dir); if (!getenv("GIT_HTTP_EXPORT_ALL") && access("git-daemon-export-ok", F_OK) ) diff --git a/setup.c b/setup.c index 759aba4e2c9fe5..cb479cd91a871b 100644 --- a/setup.c +++ b/setup.c @@ -1765,7 +1765,7 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, return result; } -const char *enter_repo(const char *path, unsigned flags) +const char *enter_repo(struct repository *repo, const char *path, unsigned flags) { static struct strbuf validated_path = STRBUF_INIT; static struct strbuf used_path = STRBUF_INIT; @@ -1838,7 +1838,7 @@ const char *enter_repo(const char *path, unsigned flags) } if (is_git_directory(".")) { - set_git_dir(the_repository, ".", 0); + set_git_dir(repo, ".", 0); check_repository_format(NULL); return path; } diff --git a/setup.h b/setup.h index 364c2c728a69d6..d0cfdfd44a67a8 100644 --- a/setup.h +++ b/setup.h @@ -134,7 +134,7 @@ enum { * links. User relative paths are also returned as they are given, * except DWIM suffixing. */ -const char *enter_repo(const char *path, unsigned flags); +const char *enter_repo(struct repository *repo, const char *path, unsigned flags); const char *setup_git_directory_gently(int *); const char *setup_git_directory(void); From bd2851d84ffe438cf4621da48abbff1877935d9a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:13 +0200 Subject: [PATCH 59/93] setup: stop using `the_repository` in `setup_work_tree()` Stop using `the_repository` in `setup_work_tree()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Note that the function tracks two bits of information via global variables. This of course doesn't make much sense anymore now that we can set up worktrees for arbitrary repositories: - We track whether the worktree has already been initialized and, if so, we skip the call to `chdir_notify()` and setenv(3p). It does not make much sense to store this info in the repository, as we _would_ want to update the environment when switching between worktrees back and forth. So instead of storing this info in the repository, we drop this state entirely and live with the fact that we may execute the logic twice. It should ultimately be idempotent though and thus not be much of a problem. - We track whether the worktree configuration is bogus. If so, and if later on some caller tries to setup the worktree, then we'll die instead. This is indeed information that we can move into the repository itself. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- blame.c | 2 +- builtin/check-attr.c | 2 +- builtin/clone.c | 2 +- builtin/describe.c | 2 +- builtin/diff-index.c | 2 +- builtin/diff.c | 4 ++-- builtin/difftool.c | 2 +- builtin/grep.c | 2 +- builtin/ls-files.c | 2 +- builtin/read-tree.c | 2 +- builtin/reset.c | 2 +- builtin/rm.c | 2 +- builtin/sparse-checkout.c | 16 ++++++++-------- builtin/submodule--helper.c | 2 +- builtin/update-index.c | 10 +++++----- git.c | 2 +- repository.h | 1 + setup.c | 15 ++++----------- setup.h | 2 +- t/helper/test-subprocess.c | 4 +++- wt-status.c | 2 +- 21 files changed, 38 insertions(+), 42 deletions(-) diff --git a/blame.c b/blame.c index a3c49d132e4ae1..977cbb70974f8c 100644 --- a/blame.c +++ b/blame.c @@ -2813,7 +2813,7 @@ void setup_scoreboard(struct blame_scoreboard *sb, } if (!sb->contents_from) - setup_work_tree(); + setup_work_tree(the_repository); sb->final = fake_working_tree_commit(sb->repo, &sb->revs->diffopt, diff --git a/builtin/check-attr.c b/builtin/check-attr.c index 04b86e42ae66ca..98f64d5b922e6c 100644 --- a/builtin/check-attr.c +++ b/builtin/check-attr.c @@ -117,7 +117,7 @@ int cmd_check_attr(int argc, int cnt, i, doubledash, filei; if (!is_bare_repository()) - setup_work_tree(); + setup_work_tree(the_repository); repo_config(the_repository, git_default_config, NULL); diff --git a/builtin/clone.c b/builtin/clone.c index d23b0cafcfec30..09f6d976587cab 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -669,7 +669,7 @@ static int checkout(int submodule_progress, } /* We need to be in the new work tree for the checkout */ - setup_work_tree(); + setup_work_tree(the_repository); repo_hold_locked_index(the_repository, &lock_file, LOCK_DIE_ON_ERROR); diff --git a/builtin/describe.c b/builtin/describe.c index bffeed13a3cb14..1c47d7c0b7c38d 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -781,7 +781,7 @@ int cmd_describe(int argc, struct rev_info revs; int fd; - setup_work_tree(); + setup_work_tree(the_repository); prepare_repo_settings(the_repository); the_repository->settings.command_requires_full_index = 0; repo_read_index(the_repository); diff --git a/builtin/diff-index.c b/builtin/diff-index.c index 522dacfc4cf097..3db7cffede578c 100644 --- a/builtin/diff-index.c +++ b/builtin/diff-index.c @@ -69,7 +69,7 @@ int cmd_diff_index(int argc, rev.max_count != -1 || rev.min_age != -1 || rev.max_age != -1) usage(diff_cache_usage); if (!(option & DIFF_INDEX_CACHED)) { - setup_work_tree(); + setup_work_tree(the_repository); if (repo_read_index_preload(the_repository, &rev.diffopt.pathspec, 0) < 0) { perror("repo_read_index_preload"); return -1; diff --git a/builtin/diff.c b/builtin/diff.c index 7ddebce2ac7cec..1ede873ac1895d 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -159,7 +159,7 @@ static void builtin_diff_index(struct rev_info *revs, revs->max_age != -1) usage(builtin_diff_usage); if (!(option & DIFF_INDEX_CACHED)) { - setup_work_tree(); + setup_work_tree(the_repository); if (repo_read_index_preload(the_repository, &revs->diffopt.pathspec, 0) < 0) { die_errno("repo_read_index_preload"); @@ -281,7 +281,7 @@ static void builtin_diff_files(struct rev_info *revs, int argc, const char **arg (revs->diffopt.output_format & DIFF_FORMAT_PATCH)) diff_merges_set_dense_combined_if_unset(revs); - setup_work_tree(); + setup_work_tree(the_repository); if (repo_read_index_preload(the_repository, &revs->diffopt.pathspec, 0) < 0) { die_errno("repo_read_index_preload"); diff --git a/builtin/difftool.c b/builtin/difftool.c index e4bc1f831696a8..2a21005f2ee264 100644 --- a/builtin/difftool.c +++ b/builtin/difftool.c @@ -767,7 +767,7 @@ int cmd_difftool(int argc, die(_("difftool requires worktree or --no-index")); if (!no_index){ - setup_work_tree(); + setup_work_tree(repo); setenv(GIT_DIR_ENVIRONMENT, absolute_path(repo_get_git_dir(repo)), 1); setenv(GIT_WORK_TREE_ENVIRONMENT, absolute_path(repo_get_work_tree(repo)), 1); } else if (dir_diff) diff --git a/builtin/grep.c b/builtin/grep.c index 4ec0c016b1f11c..679f8b567a1578 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1272,7 +1272,7 @@ int cmd_grep(int argc, die(_("--[no-]exclude-standard cannot be used for tracked contents")); } else if (!list.nr) { if (!cached) - setup_work_tree(); + setup_work_tree(the_repository); hit = grep_cache(&opt, &pathspec, cached); } else { diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 09d95111b35b9f..e1a22b41b94c08 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -704,7 +704,7 @@ int cmd_ls_files(int argc, exc_given = 1; if (require_work_tree && !is_inside_work_tree(repo)) - setup_work_tree(); + setup_work_tree(repo); if (recurse_submodules && (show_deleted || show_others || show_unmerged || diff --git a/builtin/read-tree.c b/builtin/read-tree.c index 460b21e40ac914..999a82ecdfd737 100644 --- a/builtin/read-tree.c +++ b/builtin/read-tree.c @@ -229,7 +229,7 @@ int cmd_read_tree(int argc, opts.preserve_ignored = 0; /* otherwise, opts.preserve_ignored is irrelevant */ if (opts.merge && !opts.index_only) - setup_work_tree(); + setup_work_tree(the_repository); if (opts.skip_sparse_checkout) ensure_full_index(the_repository->index); diff --git a/builtin/reset.c b/builtin/reset.c index 11f57605b510ae..3be6bd0121afe5 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -468,7 +468,7 @@ int cmd_reset(int argc, trace2_cmd_mode(reset_type_names[reset_type]); if (reset_type != SOFT && (reset_type != MIXED || repo_get_work_tree(the_repository))) - setup_work_tree(); + setup_work_tree(the_repository); if (reset_type == MIXED && is_bare_repository()) die(_("%s reset is not allowed in a bare repository"), diff --git a/builtin/rm.c b/builtin/rm.c index 05d89e98c3c6b8..081d0bc3754c52 100644 --- a/builtin/rm.c +++ b/builtin/rm.c @@ -296,7 +296,7 @@ int cmd_rm(int argc, die(_("No pathspec was given. Which files should I remove?")); if (!index_only) - setup_work_tree(); + setup_work_tree(the_repository); prepare_repo_settings(the_repository); the_repository->settings.command_requires_full_index = 0; diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 2af50fb2f9cb22..d89acbeb533bd8 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -63,7 +63,7 @@ static int sparse_checkout_list(int argc, const char **argv, const char *prefix, int res; struct repo_config_values *cfg = repo_config_values(the_repository); - setup_work_tree(); + setup_work_tree(the_repository); if (!cfg->apply_sparse_checkout) die(_("this worktree is not sparse")); @@ -229,7 +229,7 @@ static int update_working_directory(struct repository *r, o.dst_index = r->index; o.skip_sparse_checkout = 0; - setup_work_tree(); + setup_work_tree(the_repository); repo_hold_locked_index(r, &lock_file, LOCK_DIE_ON_ERROR); @@ -468,7 +468,7 @@ static int sparse_checkout_init(int argc, const char **argv, const char *prefix, OPT_END(), }; - setup_work_tree(); + setup_work_tree(the_repository); repo_read_index(repo); init_opts.cone_mode = -1; @@ -802,7 +802,7 @@ static int sparse_checkout_add(int argc, const char **argv, const char *prefix, int ret; struct repo_config_values *cfg = repo_config_values(the_repository); - setup_work_tree(); + setup_work_tree(the_repository); if (!cfg->apply_sparse_checkout) die(_("no sparse-checkout to add to")); @@ -856,7 +856,7 @@ static int sparse_checkout_set(int argc, const char **argv, const char *prefix, struct strvec patterns = STRVEC_INIT; int ret; - setup_work_tree(); + setup_work_tree(the_repository); repo_read_index(repo); set_opts.cone_mode = -1; @@ -912,7 +912,7 @@ static int sparse_checkout_reapply(int argc, const char **argv, }; struct repo_config_values *cfg = repo_config_values(the_repository); - setup_work_tree(); + setup_work_tree(the_repository); if (!cfg->apply_sparse_checkout) die(_("must be in a sparse-checkout to reapply sparsity patterns")); @@ -975,7 +975,7 @@ static int sparse_checkout_clean(int argc, const char **argv, OPT_END(), }; - setup_work_tree(); + setup_work_tree(the_repository); if (!cfg->apply_sparse_checkout) die(_("must be in a sparse-checkout to clean directories")); if (!core_sparse_checkout_cone) @@ -1053,7 +1053,7 @@ static int sparse_checkout_disable(int argc, const char **argv, * forcibly return to a dense checkout regardless of initial state. */ - setup_work_tree(); + setup_work_tree(the_repository); argc = parse_options(argc, argv, prefix, builtin_sparse_checkout_disable_options, builtin_sparse_checkout_disable_usage, 0); diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 2f589e3b378d3f..1cc82a134db22e 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1250,7 +1250,7 @@ static int compute_summary_module_list(struct object_id *head_oid, if (!info->cached) { if (diff_cmd == DIFF_INDEX) - setup_work_tree(); + setup_work_tree(the_repository); if (repo_read_index_preload(the_repository, &rev.diffopt.pathspec, 0) < 0) { perror("repo_read_index_preload"); ret = -1; diff --git a/builtin/update-index.c b/builtin/update-index.c index 7434112b8e69b0..d6dabacfd1275d 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -732,7 +732,7 @@ struct refresh_params { static int refresh(struct refresh_params *o, unsigned int flag) { - setup_work_tree(); + setup_work_tree(the_repository); repo_read_index(the_repository); *o->has_errors |= refresh_index(the_repository->index, o->flags | flag, NULL, NULL, NULL); @@ -901,7 +901,7 @@ static enum parse_opt_result reupdate_callback( BUG_ON_OPT_ARG(arg); /* consume remaining arguments. */ - setup_work_tree(); + setup_work_tree(the_repository); *has_errors = do_reupdate(ctx->argv + 1, prefix); if (*has_errors) the_repository->index->cache_changed = 0; @@ -1157,7 +1157,7 @@ int cmd_update_index(int argc, transaction = NULL; } - setup_work_tree(); + setup_work_tree(the_repository); p = prefix_path(the_repository, prefix, prefix_length, path); update_one(p); if (set_executable_bit) @@ -1199,7 +1199,7 @@ int cmd_update_index(int argc, struct strbuf buf = STRBUF_INIT; struct strbuf unquoted = STRBUF_INIT; - setup_work_tree(); + setup_work_tree(the_repository); while (getline_fn(&buf, stdin) != EOF) { char *p; if (!nul_term_line && buf.buf[0] == '"') { @@ -1253,7 +1253,7 @@ int cmd_update_index(int argc, report(_("Untracked cache disabled")); break; case UC_TEST: - setup_work_tree(); + setup_work_tree(the_repository); return !test_if_untracked_cache_is_supported(); case UC_ENABLE: case UC_FORCE: diff --git a/git.c b/git.c index 5a40eab8a26a66..eaede42c4e8644 100644 --- a/git.c +++ b/git.c @@ -497,7 +497,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv, struct commit_pager_choice(); if (!help && p->option & NEED_WORK_TREE) - setup_work_tree(); + setup_work_tree(the_repository); trace_argv_printf(argv, "trace: built-in: git"); trace2_cmd_name(p->cmd); diff --git a/repository.h b/repository.h index 4969d8b8ebed60..832451fc616593 100644 --- a/repository.h +++ b/repository.h @@ -114,6 +114,7 @@ struct repository { * A NULL value indicates that there is no working directory. */ char *worktree; + bool worktree_config_is_bogus; /* * Path from the root of the top-level superproject down to this diff --git a/setup.c b/setup.c index cb479cd91a871b..50324f8f37081a 100644 --- a/setup.c +++ b/setup.c @@ -26,7 +26,6 @@ #include "trace2.h" #include "worktree.h" -static int work_tree_config_is_bogus; enum allowed_bare_repo { ALLOWED_BARE_REPO_EXPLICIT = 0, ALLOWED_BARE_REPO_ALL, @@ -494,18 +493,14 @@ int is_inside_work_tree(struct repository *repo) return ret; } -void setup_work_tree(void) +void setup_work_tree(struct repository *repo) { const char *work_tree; - static int initialized = 0; - if (initialized) - return; - - if (work_tree_config_is_bogus) + if (repo->worktree_config_is_bogus) die(_("unable to set up work tree using invalid config")); - work_tree = repo_get_work_tree(the_repository); + work_tree = repo_get_work_tree(repo); if (!work_tree || chdir_notify(work_tree)) die(_("this operation must be run in a work tree")); @@ -515,8 +510,6 @@ void setup_work_tree(void) */ if (getenv(GIT_WORK_TREE_ENVIRONMENT)) setenv(GIT_WORK_TREE_ENVIRONMENT, ".", 1); - - initialized = 1; } static void setup_original_cwd(struct repository *repo) @@ -1164,7 +1157,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, if (git_work_tree_cfg) { /* #22.2, #30 */ warning("core.bare and core.worktree do not make sense"); - work_tree_config_is_bogus = 1; + repo->worktree_config_is_bogus = true; } /* #18, #26 */ diff --git a/setup.h b/setup.h index d0cfdfd44a67a8..8fed365637ec2b 100644 --- a/setup.h +++ b/setup.h @@ -56,7 +56,7 @@ const char *resolve_gitdir_gently(const char *suspect, int *return_error_code); void die_upon_dubious_ownership(const char *gitfile, const char *worktree, const char *gitdir); -void setup_work_tree(void); +void setup_work_tree(struct repository *repo); /* * discover_git_directory_reason() is similar to discover_git_directory(), diff --git a/t/helper/test-subprocess.c b/t/helper/test-subprocess.c index c344f1694df28d..8a070e47cddb51 100644 --- a/t/helper/test-subprocess.c +++ b/t/helper/test-subprocess.c @@ -1,3 +1,5 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "test-tool.h" #include "run-command.h" #include "setup.h" @@ -11,7 +13,7 @@ int cmd__subprocess(int argc, const char **argv) if (nogit) die("No git repo found"); if (argc > 1 && !strcmp(argv[1], "--setup-work-tree")) { - setup_work_tree(); + setup_work_tree(the_repository); argv++; } cp.git_cmd = 1; diff --git a/wt-status.c b/wt-status.c index c12fca70781299..b17372390cf96c 100644 --- a/wt-status.c +++ b/wt-status.c @@ -1206,7 +1206,7 @@ static void wt_longstatus_print_verbose(struct wt_status *s) status_printf_ln(s, c, "--------------------------------------------------"); status_printf_ln(s, c, _("Changes not staged for commit:")); - setup_work_tree(); + setup_work_tree(the_repository); rev.diffopt.a_prefix = "i/"; rev.diffopt.b_prefix = "w/"; run_diff_files(&rev, 0); From 7a6a82fba02fb6644647e5beddb54d978918cec0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:14 +0200 Subject: [PATCH 60/93] setup: stop using `the_repository` in `set_git_work_tree()` Stop using `the_repository` in `set_git_work_tree()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Similar as with the preceding commit, we track whether the worktree has been initialized already via a global variable so that we can die in case the repository is re-initialized with a different worktree path. Store this info in the `struct repository` instead so that we correctly handle this per repository. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- builtin/init-db.c | 6 +++--- repository.h | 1 + setup.c | 24 +++++++++++------------- setup.h | 2 +- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 09f6d976587cab..8844e3d4811281 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1116,7 +1116,7 @@ int cmd_clone(int argc, die_errno(_("could not create work tree dir '%s'"), work_tree); junk_work_tree = work_tree; - set_git_work_tree(work_tree); + set_git_work_tree(the_repository, work_tree); } if (real_git_dir) { diff --git a/builtin/init-db.c b/builtin/init-db.c index bb853e69f5426e..e626b0d8b7ccc6 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -237,9 +237,9 @@ int cmd_init_db(int argc, if (!git_work_tree_cfg) git_work_tree_cfg = xgetcwd(); if (work_tree) - set_git_work_tree(work_tree); + set_git_work_tree(the_repository, work_tree); else - set_git_work_tree(git_work_tree_cfg); + set_git_work_tree(the_repository, git_work_tree_cfg); if (access(repo_get_work_tree(the_repository), X_OK)) die_errno (_("Cannot access work tree '%s'"), repo_get_work_tree(the_repository)); @@ -248,7 +248,7 @@ int cmd_init_db(int argc, if (real_git_dir) die(_("--separate-git-dir incompatible with bare repository")); if (work_tree) - set_git_work_tree(work_tree); + set_git_work_tree(the_repository, work_tree); } flags |= INIT_DB_EXIST_OK; diff --git a/repository.h b/repository.h index 832451fc616593..d391aff8ab4a90 100644 --- a/repository.h +++ b/repository.h @@ -114,6 +114,7 @@ struct repository { * A NULL value indicates that there is no working directory. */ char *worktree; + bool worktree_initialized; bool worktree_config_is_bogus; /* diff --git a/setup.c b/setup.c index 50324f8f37081a..796ac5792fe9fd 100644 --- a/setup.c +++ b/setup.c @@ -1152,7 +1152,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, /* #3, #7, #11, #15, #19, #23, #27, #31 (see t1510) */ if (work_tree_env) - set_git_work_tree(work_tree_env); + set_git_work_tree(repo, work_tree_env); else if (is_bare_repository_cfg > 0) { if (git_work_tree_cfg) { /* #22.2, #30 */ @@ -1167,7 +1167,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, } else if (git_work_tree_cfg) { /* #6, #14 */ if (is_absolute_path(git_work_tree_cfg)) - set_git_work_tree(git_work_tree_cfg); + set_git_work_tree(repo, git_work_tree_cfg); else { char *core_worktree; if (chdir(gitdirenv)) @@ -1177,7 +1177,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, core_worktree = xgetcwd(); if (chdir(cwd->buf)) die_errno(_("cannot come back to cwd")); - set_git_work_tree(core_worktree); + set_git_work_tree(repo, core_worktree); free(core_worktree); } } @@ -1188,7 +1188,7 @@ static const char *setup_explicit_git_dir(struct repository *repo, return NULL; } else /* #2, #10 */ - set_git_work_tree("."); + set_git_work_tree(repo, "."); /* set_git_work_tree() must have been called by now */ worktree = repo_get_work_tree(repo); @@ -1248,7 +1248,7 @@ static const char *setup_discovered_git_dir(struct repository *repo, } /* #0, #1, #5, #8, #9, #12, #13 */ - set_git_work_tree("."); + set_git_work_tree(repo, "."); if (strcmp(gitdir, DEFAULT_GIT_DIR_ENVIRONMENT)) set_git_dir(repo, gitdir, 0); if (offset >= cwd->len) @@ -1839,29 +1839,27 @@ const char *enter_repo(struct repository *repo, const char *path, unsigned flags return NULL; } -static int git_work_tree_initialized; - /* * Note. This works only before you used a work tree. This was added * primarily to support git-clone to work in a new repository it just * created, and is not meant to flip between different work trees. */ -void set_git_work_tree(const char *new_work_tree) +void set_git_work_tree(struct repository *repo, const char *new_work_tree) { - if (git_work_tree_initialized) { + if (repo->worktree_initialized) { struct strbuf realpath = STRBUF_INIT; strbuf_realpath(&realpath, new_work_tree, 1); new_work_tree = realpath.buf; - if (strcmp(new_work_tree, the_repository->worktree)) + if (strcmp(new_work_tree, repo->worktree)) die("internal error: work tree has already been set\n" "Current worktree: %s\nNew worktree: %s", - the_repository->worktree, new_work_tree); + repo->worktree, new_work_tree); strbuf_release(&realpath); return; } - git_work_tree_initialized = 1; - repo_set_worktree(the_repository, new_work_tree); + repo->worktree_initialized = true; + repo_set_worktree(repo, new_work_tree); } const char *setup_git_directory_gently(int *nongit_ok) diff --git a/setup.h b/setup.h index 8fed365637ec2b..1a37089fa0aa54 100644 --- a/setup.h +++ b/setup.h @@ -96,7 +96,7 @@ static inline int discover_git_directory(struct strbuf *commondir, return 0; } -void set_git_work_tree(const char *tree); +void set_git_work_tree(struct repository *repo, const char *tree); /* Flags that can be passed to `enter_repo()`. */ enum { From 27b76d1862b970527cd4abb2a1725138a933a118 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:15 +0200 Subject: [PATCH 61/93] setup: stop using `the_repository` in `setup_git_env()` Stop using `the_repository` in `setup_git_env()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Furthermore, the function is never used outside of "setup.c". Drop the declaration in "environment.h" and make it static. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- environment.h | 2 -- setup.c | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/environment.h b/environment.h index 123a71cdc8d14e..9eb97b3869c9b1 100644 --- a/environment.h +++ b/environment.h @@ -147,8 +147,6 @@ void repo_config_values_init(struct repo_config_values *cfg); * Please do not add new global config variables here. */ # ifdef USE_THE_REPOSITORY_VARIABLE -void setup_git_env(const char *git_dir); - /* * Returns true iff we have a configured git repository (either via * setup_git_directory, or in the environment via $GIT_DIR). diff --git a/setup.c b/setup.c index 796ac5792fe9fd..8965f8ccaf20b1 100644 --- a/setup.c +++ b/setup.c @@ -1074,9 +1074,9 @@ static void setup_git_env_internal(struct repository *repo, fetch_if_missing = 0; } -void setup_git_env(const char *git_dir) +static void setup_git_env(struct repository *repo, const char *git_dir) { - setup_git_env_internal(the_repository, git_dir, false); + setup_git_env_internal(repo, git_dir, false); } static void set_git_dir_1(struct repository *repo, const char *path, bool skip_initializing_odb) @@ -1988,7 +1988,7 @@ const char *setup_git_directory_gently(int *nongit_ok) const char *gitdir = getenv(GIT_DIR_ENVIRONMENT); if (!gitdir) gitdir = DEFAULT_GIT_DIR_ENVIRONMENT; - setup_git_env(gitdir); + setup_git_env(the_repository, gitdir); } if (startup_info->have_repository) { repo_set_hash_algo(the_repository, repo_fmt.hash_algo); From a80a8e3ea6a070185840219778df24db832899f6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:16 +0200 Subject: [PATCH 62/93] setup: stop using `the_repository` in `setup_git_directory_gently()` Stop using `the_repository` in `setup_git_directory_gently()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/check-ref-format.c | 5 ++++- builtin/diff.c | 2 +- builtin/hash-object.c | 2 +- builtin/help.c | 2 +- builtin/stripspace.c | 2 +- git.c | 6 +++--- http-fetch.c | 2 +- imap-send.c | 2 +- remote-curl.c | 4 ++-- setup.c | 36 ++++++++++++++++++------------------ setup.h | 2 +- t/helper/test-path-utils.c | 2 +- t/helper/test-subprocess.c | 2 +- 13 files changed, 36 insertions(+), 33 deletions(-) diff --git a/builtin/check-ref-format.c b/builtin/check-ref-format.c index 5d80afeec05e3d..e42b0444ead269 100644 --- a/builtin/check-ref-format.c +++ b/builtin/check-ref-format.c @@ -1,6 +1,9 @@ /* * GIT - The information manager from hell */ + +#define USE_THE_REPOSITORY_VARIABLE + #include "builtin.h" #include "refs.h" #include "setup.h" @@ -41,7 +44,7 @@ static int check_ref_format_branch(const char *arg) const char *name; int nongit; - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); if (check_branch_ref(&sb, arg) || !skip_prefix(sb.buf, "refs/heads/", &name)) die("'%s' is not a valid branch name", arg); diff --git a/builtin/diff.c b/builtin/diff.c index 1ede873ac1895d..4b46e394cecb8d 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -455,7 +455,7 @@ int cmd_diff(int argc, break; } - prefix = setup_git_directory_gently(&nongit); + prefix = setup_git_directory_gently(the_repository, &nongit); if (!nongit) { prepare_repo_settings(the_repository); diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 5d900a6b8c953e..d7905bedc2dfd3 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -102,7 +102,7 @@ int cmd_hash_object(int argc, if (flags & INDEX_WRITE_OBJECT) prefix = setup_git_directory(); else - prefix = setup_git_directory_gently(&nongit); + prefix = setup_git_directory_gently(the_repository, &nongit); if (nongit && !the_hash_algo) repo_set_hash_algo(the_repository, GIT_HASH_DEFAULT); diff --git a/builtin/help.c b/builtin/help.c index c0aece4da39c81..a140339999debe 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -740,7 +740,7 @@ int cmd_help(int argc, return 0; } - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); repo_config(the_repository, git_help_config, NULL); if (parsed_help_format != HELP_FORMAT_NONE) diff --git a/builtin/stripspace.c b/builtin/stripspace.c index 4a566cbc5de672..18705f1a5b1150 100644 --- a/builtin/stripspace.c +++ b/builtin/stripspace.c @@ -54,7 +54,7 @@ int cmd_stripspace(int argc, usage_with_options(stripspace_usage, options); if (mode == STRIP_COMMENTS || mode == COMMENT_LINES) { - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); repo_config(the_repository, git_default_config, NULL); } diff --git a/git.c b/git.c index eaede42c4e8644..2cc018fc5cc112 100644 --- a/git.c +++ b/git.c @@ -84,7 +84,7 @@ static int list_cmds(const char *spec) * Set up the repository so we can pick up any repo-level config (like * completion.commands). */ - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); while (*spec) { const char *sep = strchrnul(spec, ','); @@ -386,7 +386,7 @@ static int handle_alias(struct strvec *args, struct string_list *expanded_aliase int nongit_ok; /* Aliases expect GIT_PREFIX, GIT_DIR etc to be set */ - setup_git_directory_gently(&nongit_ok); + setup_git_directory_gently(the_repository, &nongit_ok); commit_pager_choice(); @@ -480,7 +480,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv, struct prefix = setup_git_directory(); no_repo = 0; } else if (run_setup & RUN_SETUP_GENTLY) { - prefix = setup_git_directory_gently(&no_repo); + prefix = setup_git_directory_gently(the_repository, &no_repo); } else { prefix = NULL; } diff --git a/http-fetch.c b/http-fetch.c index 1922e23fcd3f97..f9b6ecb0616fe0 100644 --- a/http-fetch.c +++ b/http-fetch.c @@ -109,7 +109,7 @@ int cmd_main(int argc, const char **argv) struct strvec index_pack_args = STRVEC_INIT; int ret; - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); while (arg < argc && argv[arg][0] == '-') { const char *p; diff --git a/imap-send.c b/imap-send.c index af02c6a689495e..cfd6a5120c50e4 100644 --- a/imap-send.c +++ b/imap-send.c @@ -1799,7 +1799,7 @@ int cmd_main(int argc, const char **argv) int nongit_ok; int ret; - setup_git_directory_gently(&nongit_ok); + setup_git_directory_gently(the_repository, &nongit_ok); repo_config(the_repository, git_imap_config, &server); argc = parse_options(argc, (const char **)argv, "", imap_send_options, imap_send_usage, 0); diff --git a/remote-curl.c b/remote-curl.c index aba60d571282d3..a84fc860ec0dd4 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -1557,7 +1557,7 @@ int cmd_main(int argc, const char **argv) int nongit; int ret = 1; - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); if (argc < 2) { error(_("remote-curl: usage: git remote-curl []")); goto cleanup; @@ -1605,7 +1605,7 @@ int cmd_main(int argc, const char **argv) break; if (starts_with(buf.buf, "fetch ")) { if (nongit) { - setup_git_directory_gently(&nongit); + setup_git_directory_gently(the_repository, &nongit); if (nongit) die(_("remote-curl: fetch attempted without a local repo")); } diff --git a/setup.c b/setup.c index 8965f8ccaf20b1..c12c6cbda2fa99 100644 --- a/setup.c +++ b/setup.c @@ -1862,7 +1862,7 @@ void set_git_work_tree(struct repository *repo, const char *new_work_tree) repo_set_worktree(repo, new_work_tree); } -const char *setup_git_directory_gently(int *nongit_ok) +const char *setup_git_directory_gently(struct repository *repo, int *nongit_ok) { static struct strbuf cwd = STRBUF_INIT; struct strbuf dir = STRBUF_INIT, gitdir = STRBUF_INIT, report = STRBUF_INIT; @@ -1877,7 +1877,7 @@ const char *setup_git_directory_gently(int *nongit_ok) * configuration (including the per-repo config file that we * ignored previously). */ - repo_config_clear(the_repository); + repo_config_clear(repo); /* * Let's assume that we are in a git repository. @@ -1893,18 +1893,18 @@ const char *setup_git_directory_gently(int *nongit_ok) switch (setup_git_directory_gently_1(&dir, &gitdir, &report, 1)) { case GIT_DIR_EXPLICIT: - prefix = setup_explicit_git_dir(the_repository, gitdir.buf, &cwd, &repo_fmt, nongit_ok); + prefix = setup_explicit_git_dir(repo, gitdir.buf, &cwd, &repo_fmt, nongit_ok); break; case GIT_DIR_DISCOVERED: if (dir.len < cwd.len && chdir(dir.buf)) die(_("cannot change to '%s'"), dir.buf); - prefix = setup_discovered_git_dir(the_repository, gitdir.buf, &cwd, dir.len, + prefix = setup_discovered_git_dir(repo, gitdir.buf, &cwd, dir.len, &repo_fmt, nongit_ok); break; case GIT_DIR_BARE: if (dir.len < cwd.len && chdir(dir.buf)) die(_("cannot change to '%s'"), dir.buf); - prefix = setup_bare_git_dir(the_repository, &cwd, dir.len, &repo_fmt, nongit_ok); + prefix = setup_bare_git_dir(repo, &cwd, dir.len, &repo_fmt, nongit_ok); break; case GIT_DIR_HIT_CEILING: if (!nongit_ok) @@ -1984,30 +1984,30 @@ const char *setup_git_directory_gently(int *nongit_ok) startup_info->have_repository || /* GIT_DIR_EXPLICIT */ getenv(GIT_DIR_ENVIRONMENT)) { - if (!the_repository->gitdir) { + if (!repo->gitdir) { const char *gitdir = getenv(GIT_DIR_ENVIRONMENT); if (!gitdir) gitdir = DEFAULT_GIT_DIR_ENVIRONMENT; - setup_git_env(the_repository, gitdir); + setup_git_env(repo, gitdir); } if (startup_info->have_repository) { - repo_set_hash_algo(the_repository, repo_fmt.hash_algo); - repo_set_compat_hash_algo(the_repository, + repo_set_hash_algo(repo, repo_fmt.hash_algo); + repo_set_compat_hash_algo(repo, repo_fmt.compat_hash_algo); - repo_set_ref_storage_format(the_repository, + repo_set_ref_storage_format(repo, repo_fmt.ref_storage_format, repo_fmt.ref_storage_payload); - the_repository->repository_format_worktree_config = + repo->repository_format_worktree_config = repo_fmt.worktree_config; - the_repository->repository_format_relative_worktrees = + repo->repository_format_relative_worktrees = repo_fmt.relative_worktrees; - the_repository->repository_format_submodule_path_cfg = + repo->repository_format_submodule_path_cfg = repo_fmt.submodule_path_cfg; /* take ownership of repo_fmt.partial_clone */ - the_repository->repository_format_partial_clone = + repo->repository_format_partial_clone = repo_fmt.partial_clone; repo_fmt.partial_clone = NULL; - the_repository->repository_format_precious_objects = + repo->repository_format_precious_objects = repo_fmt.precious_objects; } } @@ -2040,13 +2040,13 @@ const char *setup_git_directory_gently(int *nongit_ok) format = ref_storage_format_by_name(backend); if (format == REF_STORAGE_FORMAT_UNKNOWN) die(_("unknown ref storage format: '%s'"), backend); - repo_set_ref_storage_format(the_repository, format, payload); + repo_set_ref_storage_format(repo, format, payload); free(backend); free(payload); } - setup_original_cwd(the_repository); + setup_original_cwd(repo); strbuf_release(&dir); strbuf_release(&gitdir); @@ -2138,7 +2138,7 @@ void check_repository_format(struct repository_format *fmt) */ const char *setup_git_directory(void) { - return setup_git_directory_gently(NULL); + return setup_git_directory_gently(the_repository, NULL); } const char *resolve_gitdir_gently(const char *suspect, int *return_error_code) diff --git a/setup.h b/setup.h index 1a37089fa0aa54..18092fbf1630f5 100644 --- a/setup.h +++ b/setup.h @@ -136,7 +136,7 @@ enum { */ const char *enter_repo(struct repository *repo, const char *path, unsigned flags); -const char *setup_git_directory_gently(int *); +const char *setup_git_directory_gently(struct repository *repo, int *); const char *setup_git_directory(void); char *prefix_path(struct repository *repo, const char *prefix, int len, const char *path); char *prefix_path_gently(struct repository *repo, const char *prefix, int len, int *remaining, const char *path); diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 163fdeefb0f7d9..15eb44485cda3d 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -377,7 +377,7 @@ int cmd__path_utils(int argc, const char **argv) const char *prefix = argv[2]; int prefix_len = strlen(prefix); int nongit_ok; - setup_git_directory_gently(&nongit_ok); + setup_git_directory_gently(the_repository, &nongit_ok); while (argc > 3) { char *pfx = prefix_path(the_repository, prefix, prefix_len, argv[3]); diff --git a/t/helper/test-subprocess.c b/t/helper/test-subprocess.c index 8a070e47cddb51..a8194d24b31db2 100644 --- a/t/helper/test-subprocess.c +++ b/t/helper/test-subprocess.c @@ -9,7 +9,7 @@ int cmd__subprocess(int argc, const char **argv) struct child_process cp = CHILD_PROCESS_INIT; int nogit = 0; - setup_git_directory_gently(&nogit); + setup_git_directory_gently(the_repository, &nogit); if (nogit) die("No git repo found"); if (argc > 1 && !strcmp(argv[1], "--setup-work-tree")) { From f9210dbc8add0ddd6bf31eb479d0d1d40a42850c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:17 +0200 Subject: [PATCH 63/93] setup: stop using `the_repository` in `setup_git_directory()` Stop using `the_repository` in `setup_git_directory()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- archive.c | 2 +- builtin/grep.c | 2 +- builtin/hash-object.c | 2 +- builtin/merge-file.c | 2 +- builtin/rev-parse.c | 4 ++-- git.c | 2 +- http-push.c | 2 +- scalar.c | 4 ++-- setup.c | 4 ++-- setup.h | 2 +- t/helper/test-advise.c | 2 +- t/helper/test-bitmap.c | 2 +- t/helper/test-bloom.c | 2 +- t/helper/test-cache-tree.c | 2 +- t/helper/test-config.c | 2 +- t/helper/test-dump-cache-tree.c | 2 +- t/helper/test-dump-fsmonitor.c | 2 +- t/helper/test-dump-split-index.c | 2 +- t/helper/test-dump-untracked-cache.c | 2 +- t/helper/test-find-pack.c | 2 +- t/helper/test-fsmonitor-client.c | 2 +- t/helper/test-lazy-init-name-hash.c | 2 +- t/helper/test-match-trees.c | 2 +- t/helper/test-pack-deltas.c | 2 +- t/helper/test-pack-mtimes.c | 2 +- t/helper/test-partial-clone.c | 4 +++- t/helper/test-path-walk.c | 2 +- t/helper/test-reach.c | 2 +- t/helper/test-read-cache.c | 2 +- t/helper/test-read-graph.c | 2 +- t/helper/test-read-midx.c | 2 +- t/helper/test-ref-store.c | 2 +- t/helper/test-revision-walking.c | 2 +- t/helper/test-scrap-cache-tree.c | 2 +- t/helper/test-serve-v2.c | 2 +- t/helper/test-submodule-config.c | 2 +- t/helper/test-submodule-nested-repo-config.c | 2 +- t/helper/test-submodule.c | 10 +++++----- t/helper/test-userdiff.c | 2 +- t/helper/test-write-cache.c | 2 +- 40 files changed, 49 insertions(+), 47 deletions(-) diff --git a/archive.c b/archive.c index fcd474c682ffe5..51229107a57495 100644 --- a/archive.c +++ b/archive.c @@ -786,7 +786,7 @@ int write_archive(int argc, const char **argv, const char *prefix, * die ourselves; but its error message will be more specific * than what we could write here. */ - setup_git_directory(); + setup_git_directory(the_repository); } parse_treeish_arg(argv, &args, remote); diff --git a/builtin/grep.c b/builtin/grep.c index 679f8b567a1578..560133feb88783 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1064,7 +1064,7 @@ int cmd_grep(int argc, use_index = 0; else /* die the same way as if we did it at the beginning */ - setup_git_directory(); + setup_git_directory(the_repository); } /* Ignore --recurse-submodules if --no-index is given or implied */ if (!use_index) diff --git a/builtin/hash-object.c b/builtin/hash-object.c index d7905bedc2dfd3..f306b0643f4a04 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -100,7 +100,7 @@ int cmd_hash_object(int argc, hash_object_usage, 0); if (flags & INDEX_WRITE_OBJECT) - prefix = setup_git_directory(); + prefix = setup_git_directory(the_repository); else prefix = setup_git_directory_gently(the_repository, &nongit); diff --git a/builtin/merge-file.c b/builtin/merge-file.c index 59a97922086a58..8fa576523927e8 100644 --- a/builtin/merge-file.c +++ b/builtin/merge-file.c @@ -110,7 +110,7 @@ int cmd_merge_file(int argc, if (!repo && object_id) /* emit the correct "not a git repo" error in this case */ - setup_git_directory(); + setup_git_directory(the_repository); for (i = 0; i < 3; i++) { char *fname; diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 8fdb75413d33a6..0a01ff7a753938 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -739,7 +739,7 @@ int cmd_rev_parse(int argc, /* No options; just report on whether we're in a git repo or not. */ if (argc == 1) { - setup_git_directory(); + setup_git_directory(the_repository); repo_config(the_repository, git_default_config, NULL); return 0; } @@ -774,7 +774,7 @@ int cmd_rev_parse(int argc, /* The rest of the options require a git repository. */ if (!did_repo_setup) { - prefix = setup_git_directory(); + prefix = setup_git_directory(the_repository); repo_config(the_repository, git_default_config, NULL); did_repo_setup = 1; diff --git a/git.c b/git.c index 2cc018fc5cc112..42cd5f5b3c5f4d 100644 --- a/git.c +++ b/git.c @@ -477,7 +477,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv, struct run_setup = RUN_SETUP_GENTLY; if (run_setup & RUN_SETUP) { - prefix = setup_git_directory(); + prefix = setup_git_directory(the_repository); no_repo = 0; } else if (run_setup & RUN_SETUP_GENTLY) { prefix = setup_git_directory_gently(the_repository, &no_repo); diff --git a/http-push.c b/http-push.c index d143fe28455623..520d6c3b6ade1f 100644 --- a/http-push.c +++ b/http-push.c @@ -1788,7 +1788,7 @@ int cmd_main(int argc, const char **argv) if (delete_branch && rs.nr != 1) die("You must specify only one branch name when deleting a remote branch"); - gitdir = setup_git_directory(); + gitdir = setup_git_directory(the_repository); memset(remote_dir_exists, -1, 256); diff --git a/scalar.c b/scalar.c index 4efb6ac36d888e..a80d8ee3ff54a7 100644 --- a/scalar.c +++ b/scalar.c @@ -58,7 +58,7 @@ static void setup_enlistment_directory(int argc, const char **argv, } strbuf_setlen(&path, len); - setup_git_directory(); + setup_git_directory(the_repository); if (!the_repository->worktree) die(_("Scalar enlistments require a worktree")); @@ -514,7 +514,7 @@ static int cmd_clone(int argc, const char **argv) goto cleanup; } - setup_git_directory(); + setup_git_directory(the_repository); /* common-main already logs `argv` */ trace2_def_repo(the_repository); diff --git a/setup.c b/setup.c index c12c6cbda2fa99..5dc27caf156b24 100644 --- a/setup.c +++ b/setup.c @@ -2136,9 +2136,9 @@ void check_repository_format(struct repository_format *fmt) * directory is not a strict subdirectory of the work tree root. The * prefix always ends with a '/' character. */ -const char *setup_git_directory(void) +const char *setup_git_directory(struct repository *repo) { - return setup_git_directory_gently(the_repository, NULL); + return setup_git_directory_gently(repo, NULL); } const char *resolve_gitdir_gently(const char *suspect, int *return_error_code) diff --git a/setup.h b/setup.h index 18092fbf1630f5..b779661ce7df47 100644 --- a/setup.h +++ b/setup.h @@ -137,7 +137,7 @@ enum { const char *enter_repo(struct repository *repo, const char *path, unsigned flags); const char *setup_git_directory_gently(struct repository *repo, int *); -const char *setup_git_directory(void); +const char *setup_git_directory(struct repository *repo); char *prefix_path(struct repository *repo, const char *prefix, int len, const char *path); char *prefix_path_gently(struct repository *repo, const char *prefix, int len, int *remaining, const char *path); diff --git a/t/helper/test-advise.c b/t/helper/test-advise.c index 81ed93a05c9ac7..8f9db2693ed87a 100644 --- a/t/helper/test-advise.c +++ b/t/helper/test-advise.c @@ -11,7 +11,7 @@ int cmd__advise_if_enabled(int argc, const char **argv) if (argc != 2) die("usage: %s ", argv[0]); - setup_git_directory(); + setup_git_directory(the_repository); repo_config(the_repository, git_default_config, NULL); /* diff --git a/t/helper/test-bitmap.c b/t/helper/test-bitmap.c index 16a01669e4149a..d9b9a83b8f0596 100644 --- a/t/helper/test-bitmap.c +++ b/t/helper/test-bitmap.c @@ -37,7 +37,7 @@ static int bitmap_dump_pseudo_merge_objects(uint32_t n) int cmd__bitmap(int argc, const char **argv) { - setup_git_directory(); + setup_git_directory(the_repository); if (argc == 2 && !strcmp(argv[1], "list-commits")) return bitmap_list_commits(); diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c index 3283544bd33db6..0c65befbf00f7d 100644 --- a/t/helper/test-bloom.c +++ b/t/helper/test-bloom.c @@ -52,7 +52,7 @@ static const char *const bloom_usage = "\n" int cmd__bloom(int argc, const char **argv) { - setup_git_directory(); + setup_git_directory(the_repository); if (argc < 2) usage(bloom_usage); diff --git a/t/helper/test-cache-tree.c b/t/helper/test-cache-tree.c index ff61d0ca7e2fe3..d42e2600921029 100644 --- a/t/helper/test-cache-tree.c +++ b/t/helper/test-cache-tree.c @@ -33,7 +33,7 @@ int cmd__cache_tree(int argc, const char **argv) OPT_END() }; - setup_git_directory(); + setup_git_directory(the_repository); argc = parse_options(argc, argv, NULL, options, test_cache_tree_usage, 0); diff --git a/t/helper/test-config.c b/t/helper/test-config.c index 9f8cca7c48cfe3..cfb3f4b111939c 100644 --- a/t/helper/test-config.c +++ b/t/helper/test-config.c @@ -102,7 +102,7 @@ int cmd__config(int argc, const char **argv) return 0; } - setup_git_directory(); + setup_git_directory(the_repository); git_configset_init(&cs); diff --git a/t/helper/test-dump-cache-tree.c b/t/helper/test-dump-cache-tree.c index 3f0c7d0ed07ec9..ccb41a423975f8 100644 --- a/t/helper/test-dump-cache-tree.c +++ b/t/helper/test-dump-cache-tree.c @@ -66,7 +66,7 @@ int cmd__dump_cache_tree(int ac UNUSED, const char **av UNUSED) struct cache_tree *another = cache_tree(); int ret; - setup_git_directory(); + setup_git_directory(the_repository); if (repo_read_index(the_repository) < 0) die("unable to read index file"); istate = *the_repository->index; diff --git a/t/helper/test-dump-fsmonitor.c b/t/helper/test-dump-fsmonitor.c index efd017ca357e0b..c991cbbb8a542c 100644 --- a/t/helper/test-dump-fsmonitor.c +++ b/t/helper/test-dump-fsmonitor.c @@ -9,7 +9,7 @@ int cmd__dump_fsmonitor(int ac UNUSED, const char **av UNUSED) { struct index_state *istate = the_repository->index; - setup_git_directory(); + setup_git_directory(the_repository); if (do_read_index(istate, the_repository->index_file, 0) < 0) die("unable to read index file"); if (!istate->fsmonitor_last_update) { diff --git a/t/helper/test-dump-split-index.c b/t/helper/test-dump-split-index.c index f855a3862c97bb..aae0a40a74bcbc 100644 --- a/t/helper/test-dump-split-index.c +++ b/t/helper/test-dump-split-index.c @@ -17,7 +17,7 @@ int cmd__dump_split_index(int ac UNUSED, const char **av) { struct split_index *si; - setup_git_directory(); + setup_git_directory(the_repository); do_read_index(the_repository->index, av[1], 1); printf("own %s\n", oid_to_hex(&the_repository->index->oid)); diff --git a/t/helper/test-dump-untracked-cache.c b/t/helper/test-dump-untracked-cache.c index 01a109496bee78..24308bd371b6be 100644 --- a/t/helper/test-dump-untracked-cache.c +++ b/t/helper/test-dump-untracked-cache.c @@ -54,7 +54,7 @@ int cmd__dump_untracked_cache(int ac UNUSED, const char **av UNUSED) xsetenv("GIT_CONFIG_KEY_0", "core.untrackedCache", 1); xsetenv("GIT_CONFIG_VALUE_0", "keep", 1); - setup_git_directory(); + setup_git_directory(the_repository); if (repo_read_index(the_repository) < 0) die("unable to read index file"); uc = the_repository->index->untracked; diff --git a/t/helper/test-find-pack.c b/t/helper/test-find-pack.c index fc4b8a77b3007a..28d5b1fe094345 100644 --- a/t/helper/test-find-pack.c +++ b/t/helper/test-find-pack.c @@ -25,7 +25,7 @@ int cmd__find_pack(int argc, const char **argv) struct object_id oid; struct packed_git *p; int count = -1, actual_count = 0; - const char *prefix = setup_git_directory(); + const char *prefix = setup_git_directory(the_repository); struct option options[] = { OPT_INTEGER('c', "check-count", &count, "expected number of packs"), diff --git a/t/helper/test-fsmonitor-client.c b/t/helper/test-fsmonitor-client.c index 02bfe92e8d55b7..dc1dff23fb8ed5 100644 --- a/t/helper/test-fsmonitor-client.c +++ b/t/helper/test-fsmonitor-client.c @@ -210,7 +210,7 @@ int cmd__fsmonitor_client(int argc, const char **argv) subcmd = argv[0]; - setup_git_directory(); + setup_git_directory(the_repository); if (!strcmp(subcmd, "query")) return !!do_send_query(token); diff --git a/t/helper/test-lazy-init-name-hash.c b/t/helper/test-lazy-init-name-hash.c index 40f5df4412adf1..e542985c943452 100644 --- a/t/helper/test-lazy-init-name-hash.c +++ b/t/helper/test-lazy-init-name-hash.c @@ -211,7 +211,7 @@ int cmd__lazy_init_name_hash(int argc, const char **argv) const char *prefix; uint64_t avg_single, avg_multi; - prefix = setup_git_directory(); + prefix = setup_git_directory(the_repository); argc = parse_options(argc, argv, prefix, options, usage, 0); diff --git a/t/helper/test-match-trees.c b/t/helper/test-match-trees.c index 2ed064b9716ac8..006ce5278e23a0 100644 --- a/t/helper/test-match-trees.c +++ b/t/helper/test-match-trees.c @@ -13,7 +13,7 @@ int cmd__match_trees(int ac UNUSED, const char **av) struct object_id hash1, hash2, shifted; struct tree *one, *two; - setup_git_directory(); + setup_git_directory(the_repository); if (repo_get_oid(the_repository, av[1], &hash1)) die("cannot parse %s as an object name", av[1]); diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c index 4981401eaa6664..c493b75e02a99a 100644 --- a/t/helper/test-pack-deltas.c +++ b/t/helper/test-pack-deltas.c @@ -95,7 +95,7 @@ int cmd__pack_deltas(int argc, const char **argv) if (argc || num_objects < 0) usage_with_options(usage_str, options); - setup_git_directory(); + setup_git_directory(the_repository); f = hashfd(the_repository->hash_algo, 1, ""); write_pack_header(f, num_objects); diff --git a/t/helper/test-pack-mtimes.c b/t/helper/test-pack-mtimes.c index 7a8ee1de24ba83..b7740567999299 100644 --- a/t/helper/test-pack-mtimes.c +++ b/t/helper/test-pack-mtimes.c @@ -32,7 +32,7 @@ int cmd__pack_mtimes(int argc, const char **argv) struct strbuf buf = STRBUF_INIT; struct packed_git *p; - setup_git_directory(); + setup_git_directory(the_repository); if (argc != 2) usage(pack_mtimes_usage); diff --git a/t/helper/test-partial-clone.c b/t/helper/test-partial-clone.c index d84880074930b2..a7aab426d0194a 100644 --- a/t/helper/test-partial-clone.c +++ b/t/helper/test-partial-clone.c @@ -1,3 +1,5 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "test-tool.h" #include "hex.h" #include "repository.h" @@ -32,7 +34,7 @@ static void object_info(const char *gitdir, const char *oid_hex) int cmd__partial_clone(int argc, const char **argv) { - setup_git_directory(); + setup_git_directory(the_repository); if (argc < 4) die("too few arguments"); diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index fe63002c2be27d..69676b15a53f73 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -89,7 +89,7 @@ int cmd__path_walk(int argc, const char **argv) OPT_END(), }; - setup_git_directory(); + setup_git_directory(the_repository); revs.repo = the_repository; argc = parse_options(argc, argv, NULL, diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 3131b54a871c1b..5d86a96c17e4e5 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -39,7 +39,7 @@ int cmd__reach(int ac, const char **av) struct strbuf buf = STRBUF_INIT; struct repository *r = the_repository; - setup_git_directory(); + setup_git_directory(the_repository); if (ac < 2) exit(1); diff --git a/t/helper/test-read-cache.c b/t/helper/test-read-cache.c index 9ae71cefb302c9..6b08ba8f078d00 100644 --- a/t/helper/test-read-cache.c +++ b/t/helper/test-read-cache.c @@ -19,7 +19,7 @@ int cmd__read_cache(int argc, const char **argv) if (argc == 2) cnt = strtol(argv[1], NULL, 0); - setup_git_directory(); + setup_git_directory(the_repository); repo_config(the_repository, git_default_config, NULL); for (i = 0; i < cnt; i++) { diff --git a/t/helper/test-read-graph.c b/t/helper/test-read-graph.c index 6a5f64e473f2b6..9f07b9c25a66e3 100644 --- a/t/helper/test-read-graph.c +++ b/t/helper/test-read-graph.c @@ -76,7 +76,7 @@ int cmd__read_graph(int argc, const char **argv) struct odb_source *source; int ret = 0; - setup_git_directory(); + setup_git_directory(the_repository); source = the_repository->objects->sources; prepare_repo_settings(the_repository); diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 388d29e2b53db3..790000fb26c270 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -14,7 +14,7 @@ static struct multi_pack_index *setup_midx(const char *object_dir) { struct odb_source *source; - setup_git_directory(); + setup_git_directory(the_repository); source = odb_find_source(the_repository->objects, object_dir); if (!source) source = odb_add_to_alternates_memory(the_repository->objects, diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c index 74edf2029a28fc..3866d0aca49bc2 100644 --- a/t/helper/test-ref-store.c +++ b/t/helper/test-ref-store.c @@ -340,7 +340,7 @@ int cmd__ref_store(int argc UNUSED, const char **argv) const char *func; struct command *cmd; - setup_git_directory(); + setup_git_directory(the_repository); argv = get_store(argv + 1, &refs); diff --git a/t/helper/test-revision-walking.c b/t/helper/test-revision-walking.c index 071f5bd1e21974..70051eeaf848e7 100644 --- a/t/helper/test-revision-walking.c +++ b/t/helper/test-revision-walking.c @@ -56,7 +56,7 @@ int cmd__revision_walking(int argc, const char **argv) if (argc < 2) return 1; - setup_git_directory(); + setup_git_directory(the_repository); if (!strcmp(argv[1], "run-twice")) { printf("1st\n"); diff --git a/t/helper/test-scrap-cache-tree.c b/t/helper/test-scrap-cache-tree.c index 64fff6e9e3ce87..7b5ce501d9eec0 100644 --- a/t/helper/test-scrap-cache-tree.c +++ b/t/helper/test-scrap-cache-tree.c @@ -12,7 +12,7 @@ int cmd__scrap_cache_tree(int ac UNUSED, const char **av UNUSED) { struct lock_file index_lock = LOCK_INIT; - setup_git_directory(); + setup_git_directory(the_repository); repo_hold_locked_index(the_repository, &index_lock, LOCK_DIE_ON_ERROR); if (repo_read_index(the_repository) < 0) die("unable to read index file"); diff --git a/t/helper/test-serve-v2.c b/t/helper/test-serve-v2.c index 63a200b8d46f68..27f3ed89478fde 100644 --- a/t/helper/test-serve-v2.c +++ b/t/helper/test-serve-v2.c @@ -23,7 +23,7 @@ int cmd__serve_v2(int argc, const char **argv) N_("exit immediately after advertising capabilities")), OPT_END() }; - const char *prefix = setup_git_directory(); + const char *prefix = setup_git_directory(the_repository); /* ignore all unknown cmdline switches for now */ argc = parse_options(argc, argv, prefix, options, serve_usage, diff --git a/t/helper/test-submodule-config.c b/t/helper/test-submodule-config.c index cbe93f2f9e0178..3f302921793378 100644 --- a/t/helper/test-submodule-config.c +++ b/t/helper/test-submodule-config.c @@ -34,7 +34,7 @@ int cmd__submodule_config(int argc, const char **argv) if (my_argc % 2 != 0) die_usage(argc, argv, "Wrong number of arguments."); - setup_git_directory(); + setup_git_directory(the_repository); while (*arg) { struct object_id commit_oid; diff --git a/t/helper/test-submodule-nested-repo-config.c b/t/helper/test-submodule-nested-repo-config.c index 2710341cd5db11..7e31d3fe47a105 100644 --- a/t/helper/test-submodule-nested-repo-config.c +++ b/t/helper/test-submodule-nested-repo-config.c @@ -19,7 +19,7 @@ int cmd__submodule_nested_repo_config(int argc, const char **argv) if (argc < 3) die_usage(argv, "Wrong number of arguments."); - setup_git_directory(); + setup_git_directory(the_repository); if (repo_submodule_init(&subrepo, the_repository, argv[1], null_oid(the_hash_algo))) { die_usage(argv, "Submodule not found."); diff --git a/t/helper/test-submodule.c b/t/helper/test-submodule.c index 0133852e1ebfc7..3c5c4c4a090e98 100644 --- a/t/helper/test-submodule.c +++ b/t/helper/test-submodule.c @@ -99,7 +99,7 @@ static int cmd__submodule_is_active(int argc, const char **argv) if (argc != 1) usage_with_options(submodule_is_active_usage, options); - setup_git_directory(); + setup_git_directory(the_repository); return !is_submodule_active(the_repository, argv[0]); } @@ -142,7 +142,7 @@ static int cmd__submodule_config_list(int argc, const char **argv) argc = parse_options(argc, argv, "test-tools", options, usage, PARSE_OPT_KEEP_ARGV0); - setup_git_directory(); + setup_git_directory(the_repository); if (argc == 2) return print_config_from_gitmodules(the_repository, argv[1]); @@ -161,7 +161,7 @@ static int cmd__submodule_config_set(int argc, const char **argv) argc = parse_options(argc, argv, "test-tools", options, usage, PARSE_OPT_KEEP_ARGV0); - setup_git_directory(); + setup_git_directory(the_repository); /* Equivalent to ACTION_SET in builtin/config.c */ if (argc == 3) { @@ -183,7 +183,7 @@ static int cmd__submodule_config_unset(int argc, const char **argv) NULL }; - setup_git_directory(); + setup_git_directory(the_repository); if (argc == 2) { if (!is_writing_gitmodules_ok()) @@ -202,7 +202,7 @@ static int cmd__submodule_config_writeable(int argc, const char **argv UNUSED) "test-tool submodule config-writeable", NULL }; - setup_git_directory(); + setup_git_directory(the_repository); if (argc == 1) return is_writing_gitmodules_ok() ? 0 : -1; diff --git a/t/helper/test-userdiff.c b/t/helper/test-userdiff.c index aa3a9894d257ef..fc34c589b3d106 100644 --- a/t/helper/test-userdiff.c +++ b/t/helper/test-userdiff.c @@ -40,7 +40,7 @@ int cmd__userdiff(int argc, const char **argv) return error("unknown argument %s", argv[1]); if (want & USERDIFF_DRIVER_TYPE_CUSTOM) { - setup_git_directory(); + setup_git_directory(the_repository); repo_config(the_repository, cmd__userdiff_config, NULL); } diff --git a/t/helper/test-write-cache.c b/t/helper/test-write-cache.c index b37dd2c5d6db64..98e1477c98d1f7 100644 --- a/t/helper/test-write-cache.c +++ b/t/helper/test-write-cache.c @@ -12,7 +12,7 @@ int cmd__write_cache(int argc, const char **argv) int i, cnt = 1; if (argc == 2) cnt = strtol(argv[1], NULL, 0); - setup_git_directory(); + setup_git_directory(the_repository); repo_read_index(the_repository); for (i = 0; i < cnt; i++) { repo_hold_locked_index(the_repository, &index_lock, From 9cae7229c99bb606dcbe81b454bf19ada82769a4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:18 +0200 Subject: [PATCH 64/93] setup: stop using `the_repository` in `upgrade_repository_format()` Stop using `the_repository` in `upgrade_repository_format()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 2 +- repository.h | 2 +- setup.c | 6 +++--- worktree.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index cef67e591954a9..bc5d98f9e613cf 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -378,7 +378,7 @@ void partial_clone_register( */ return; } else { - if (upgrade_repository_format(1) < 0) + if (upgrade_repository_format(the_repository, 1) < 0) die(_("unable to upgrade repository format to support partial clone")); /* Add promisor config for the remote */ diff --git a/repository.h b/repository.h index d391aff8ab4a90..c3ec0f4b790b00 100644 --- a/repository.h +++ b/repository.h @@ -281,6 +281,6 @@ void repo_update_index_if_able(struct repository *, struct lock_file *); * Return 1 if upgrade repository format to target_version succeeded, * 0 if no upgrade is necessary, and -1 when upgrade is not possible. */ -int upgrade_repository_format(int target_version); +int upgrade_repository_format(struct repository *repo, int target_version); #endif /* REPOSITORY_H */ diff --git a/setup.c b/setup.c index 5dc27caf156b24..ed0c14e98e0b85 100644 --- a/setup.c +++ b/setup.c @@ -811,7 +811,7 @@ static int check_repository_format_gently(struct repository *repo, return 0; } -int upgrade_repository_format(int target_version) +int upgrade_repository_format(struct repository *repo, int target_version) { struct strbuf sb = STRBUF_INIT; struct strbuf err = STRBUF_INIT; @@ -819,7 +819,7 @@ int upgrade_repository_format(int target_version) struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; int ret; - repo_common_path_append(the_repository, &sb, "config"); + repo_common_path_append(repo, &sb, "config"); read_repository_format(&repo_fmt, sb.buf); strbuf_release(&sb); @@ -841,7 +841,7 @@ int upgrade_repository_format(int target_version) } strbuf_addf(&repo_version, "%d", target_version); - repo_config_set(the_repository, "core.repositoryformatversion", repo_version.buf); + repo_config_set(repo, "core.repositoryformatversion", repo_version.buf); ret = 1; diff --git a/worktree.c b/worktree.c index 340b4ed777cc25..97eddc39166953 100644 --- a/worktree.c +++ b/worktree.c @@ -1104,7 +1104,7 @@ void write_worktree_linking_files(const char *dotgit, const char *gitdir, strbuf_realpath(&repo, repo.buf, 1); if (use_relative_paths && !the_repository->repository_format_relative_worktrees) { - if (upgrade_repository_format(1) < 0) + if (upgrade_repository_format(the_repository, 1) < 0) die(_("unable to upgrade repository format to support relative worktrees")); if (repo_config_set_gently(the_repository, "extensions.relativeWorktrees", "true")) die(_("unable to set extensions.relativeWorktrees setting")); From 602254dfb032b47349076132ab07dd3951aa2c3d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:19 +0200 Subject: [PATCH 65/93] setup: stop using `the_repository` in `check_repository_format()` Stop using `the_repository` in `check_repository_format()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Furthermore, the function is never used outside "setup.c". Drop its declaration in "setup.h" and make it static. Note that this requires us to reorder the function. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- setup.c | 58 ++++++++++++++++++++++++++++++++------------------------- setup.h | 10 ---------- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/setup.c b/setup.c index ed0c14e98e0b85..406984b62c2501 100644 --- a/setup.c +++ b/setup.c @@ -1758,6 +1758,37 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, return result; } +/* + * Check the repository format version in the path found in repo_get_git_dir(repo), + * and die if it is a version we don't understand. Generally one would + * set_git_dir() before calling this, and use it only for "are we in a valid + * repo?". + * + * If successful and fmt is not NULL, fill fmt with data. + */ +static void check_repository_format(struct repository *repo, struct repository_format *fmt) +{ + struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; + if (!fmt) + fmt = &repo_fmt; + check_repository_format_gently(repo, repo_get_git_dir(repo), fmt, NULL); + startup_info->have_repository = 1; + repo_set_hash_algo(repo, fmt->hash_algo); + repo_set_compat_hash_algo(repo, fmt->compat_hash_algo); + repo_set_ref_storage_format(repo, + fmt->ref_storage_format, + fmt->ref_storage_payload); + repo->repository_format_worktree_config = + fmt->worktree_config; + repo->repository_format_submodule_path_cfg = + fmt->submodule_path_cfg; + repo->repository_format_relative_worktrees = + fmt->relative_worktrees; + repo->repository_format_partial_clone = + xstrdup_or_null(fmt->partial_clone); + clear_repository_format(&repo_fmt); +} + const char *enter_repo(struct repository *repo, const char *path, unsigned flags) { static struct strbuf validated_path = STRBUF_INIT; @@ -1832,7 +1863,7 @@ const char *enter_repo(struct repository *repo, const char *path, unsigned flags if (is_git_directory(".")) { set_git_dir(repo, ".", 0); - check_repository_format(NULL); + check_repository_format(repo, NULL); return path; } @@ -2107,29 +2138,6 @@ int git_config_perm(const char *var, const char *value) return -(i & 0666); } -void check_repository_format(struct repository_format *fmt) -{ - struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; - if (!fmt) - fmt = &repo_fmt; - check_repository_format_gently(the_repository, repo_get_git_dir(the_repository), fmt, NULL); - startup_info->have_repository = 1; - repo_set_hash_algo(the_repository, fmt->hash_algo); - repo_set_compat_hash_algo(the_repository, fmt->compat_hash_algo); - repo_set_ref_storage_format(the_repository, - fmt->ref_storage_format, - fmt->ref_storage_payload); - the_repository->repository_format_worktree_config = - fmt->worktree_config; - the_repository->repository_format_submodule_path_cfg = - fmt->submodule_path_cfg; - the_repository->repository_format_relative_worktrees = - fmt->relative_worktrees; - the_repository->repository_format_partial_clone = - xstrdup_or_null(fmt->partial_clone); - clear_repository_format(&repo_fmt); -} - /* * Returns the "prefix", a path to the current working directory * relative to the work tree root, or NULL, if the current working @@ -2804,7 +2812,7 @@ int init_db(const char *git_dir, const char *real_git_dir, * config file, so this will not fail. What we are catching * is an attempt to reinitialize new repository with an old tool. */ - check_repository_format(&repo_fmt); + check_repository_format(the_repository, &repo_fmt); repository_format_configure(the_repository, &repo_fmt, hash, ref_storage_format); diff --git a/setup.h b/setup.h index b779661ce7df47..a820041af05ffb 100644 --- a/setup.h +++ b/setup.h @@ -221,16 +221,6 @@ void clear_repository_format(struct repository_format *format); int verify_repository_format(const struct repository_format *format, struct strbuf *err); -/* - * Check the repository format version in the path found in repo_get_git_dir(the_repository), - * and die if it is a version we don't understand. Generally one would - * set_git_dir() before calling this, and use it only for "are we in a valid - * repo?". - * - * If successful and fmt is not NULL, fill fmt with data. - */ -void check_repository_format(struct repository_format *fmt); - const char *get_template_dir(const char *option_template); #define INIT_DB_QUIET (1 << 0) From 779fbcd9ebe8590e13ecd072d2e37dcbebd86ce5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:20 +0200 Subject: [PATCH 66/93] setup: stop using `the_repository` in `initialize_repository_version()` Stop using `the_repository` in `initialize_repository_version()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 4 ++-- refs.c | 2 +- setup.c | 29 +++++++++++++++-------------- setup.h | 3 ++- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 8844e3d4811281..24fe0eead55d40 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1229,7 +1229,7 @@ int cmd_clone(int argc, * * This is sufficient for Git commands to discover the Git directory. */ - initialize_repository_version(GIT_HASH_UNKNOWN, + initialize_repository_version(the_repository, GIT_HASH_UNKNOWN, the_repository->ref_storage_format, 1); refs_create_refdir_stubs(the_repository, git_dir, NULL); @@ -1442,7 +1442,7 @@ int cmd_clone(int argc, * ours to the same thing. */ hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport)); - initialize_repository_version(hash_algo, the_repository->ref_storage_format, 1); + initialize_repository_version(the_repository, hash_algo, the_repository->ref_storage_format, 1); repo_set_hash_algo(the_repository, hash_algo); create_reference_database(NULL, 1); diff --git a/refs.c b/refs.c index 844785219d5826..c36a322f4c810c 100644 --- a/refs.c +++ b/refs.c @@ -3453,7 +3453,7 @@ int repo_migrate_ref_storage_format(struct repository *repo, * repository format so that clients will use the new ref store. * We also need to swap out the repository's main ref store. */ - initialize_repository_version(hash_algo_by_ptr(repo->hash_algo), format, 1); + initialize_repository_version(the_repository, hash_algo_by_ptr(repo->hash_algo), format, 1); /* * Unset the old ref store and release it. `get_main_ref_store()` will diff --git a/setup.c b/setup.c index 406984b62c2501..e09483ba346db4 100644 --- a/setup.c +++ b/setup.c @@ -2385,7 +2385,8 @@ static int needs_work_tree_config(const char *git_dir, const char *work_tree) return 1; } -void initialize_repository_version(int hash_algo, +void initialize_repository_version(struct repository *repo, + int hash_algo, enum ref_storage_format ref_storage_format, int reinit) { @@ -2402,35 +2403,35 @@ void initialize_repository_version(int hash_algo, */ if (hash_algo != GIT_HASH_SHA1_LEGACY || ref_storage_format != REF_STORAGE_FORMAT_FILES || - the_repository->ref_storage_payload) + repo->ref_storage_payload) target_version = GIT_REPO_VERSION_READ; if (hash_algo != GIT_HASH_SHA1_LEGACY && hash_algo != GIT_HASH_UNKNOWN) - repo_config_set(the_repository, "extensions.objectformat", + repo_config_set(repo, "extensions.objectformat", hash_algos[hash_algo].name); else if (reinit) - repo_config_set_gently(the_repository, "extensions.objectformat", NULL); + repo_config_set_gently(repo, "extensions.objectformat", NULL); - if (the_repository->ref_storage_payload) { + if (repo->ref_storage_payload) { struct strbuf ref_uri = STRBUF_INIT; strbuf_addf(&ref_uri, "%s://%s", ref_storage_format_to_name(ref_storage_format), - the_repository->ref_storage_payload); - repo_config_set(the_repository, "extensions.refstorage", ref_uri.buf); + repo->ref_storage_payload); + repo_config_set(repo, "extensions.refstorage", ref_uri.buf); strbuf_release(&ref_uri); } else if (ref_storage_format != REF_STORAGE_FORMAT_FILES) { - repo_config_set(the_repository, "extensions.refstorage", + repo_config_set(repo, "extensions.refstorage", ref_storage_format_to_name(ref_storage_format)); } else if (reinit) { - repo_config_set_gently(the_repository, "extensions.refstorage", NULL); + repo_config_set_gently(repo, "extensions.refstorage", NULL); } if (reinit) { struct strbuf config = STRBUF_INIT; struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT; - repo_common_path_append(the_repository, &config, "config"); + repo_common_path_append(repo, &config, "config"); read_repository_format(&repo_fmt, config.buf); if (repo_fmt.v1_only_extensions.nr) @@ -2440,17 +2441,17 @@ void initialize_repository_version(int hash_algo, clear_repository_format(&repo_fmt); } - repo_config_get_bool(the_repository, "init.defaultSubmodulePathConfig", + repo_config_get_bool(repo, "init.defaultSubmodulePathConfig", &default_submodule_path_config); if (default_submodule_path_config) { /* extensions.submodulepathconfig requires at least version 1 */ if (target_version == 0) target_version = 1; - repo_config_set(the_repository, "extensions.submodulepathconfig", "true"); + repo_config_set(repo, "extensions.submodulepathconfig", "true"); } strbuf_addf(&repo_version, "%d", target_version); - repo_config_set(the_repository, "core.repositoryformatversion", repo_version.buf); + repo_config_set(repo, "core.repositoryformatversion", repo_version.buf); strbuf_release(&repo_version); } @@ -2551,7 +2552,7 @@ static int create_default_files(struct repository *repo, adjust_shared_perm(repo, repo_get_git_dir(repo)); } - initialize_repository_version(fmt->hash_algo, fmt->ref_storage_format, reinit); + initialize_repository_version(repo, fmt->hash_algo, fmt->ref_storage_format, reinit); /* Check filemode trustability */ repo_git_path_replace(repo, &path, "config"); diff --git a/setup.h b/setup.h index a820041af05ffb..c33b675ccfd95f 100644 --- a/setup.h +++ b/setup.h @@ -232,7 +232,8 @@ int init_db(const char *git_dir, const char *real_git_dir, enum ref_storage_format ref_storage_format, const char *initial_branch, int init_shared_repository, unsigned int flags); -void initialize_repository_version(int hash_algo, +void initialize_repository_version(struct repository *repo, + int hash_algo, enum ref_storage_format ref_storage_format, int reinit); void create_reference_database(const char *initial_branch, int quiet); From 15053894cb55afdd50b938df9c89d98d2af0548a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:21 +0200 Subject: [PATCH 67/93] setup: stop using `the_repository` in `create_reference_database()` Stop using `the_repository` in `create_reference_database()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- setup.c | 13 +++++++------ setup.h | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 24fe0eead55d40..53a41629e673eb 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1444,7 +1444,7 @@ int cmd_clone(int argc, hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport)); initialize_repository_version(the_repository, hash_algo, the_repository->ref_storage_format, 1); repo_set_hash_algo(the_repository, hash_algo); - create_reference_database(NULL, 1); + create_reference_database(the_repository, NULL, 1); /* * Before fetching from the remote, download and install bundle diff --git a/setup.c b/setup.c index e09483ba346db4..9c493195687040 100644 --- a/setup.c +++ b/setup.c @@ -2468,13 +2468,14 @@ static int is_reinit(struct repository *repo) return ret; } -void create_reference_database(const char *initial_branch, int quiet) +void create_reference_database(struct repository *repo, + const char *initial_branch, int quiet) { struct strbuf err = STRBUF_INIT; char *to_free = NULL; - int reinit = is_reinit(the_repository); + int reinit = is_reinit(repo); - if (ref_store_create_on_disk(get_main_ref_store(the_repository), 0, &err)) + if (ref_store_create_on_disk(get_main_ref_store(repo), 0, &err)) die("failed to set up refs db: %s", err.buf); /* @@ -2486,14 +2487,14 @@ void create_reference_database(const char *initial_branch, int quiet) if (!initial_branch) initial_branch = to_free = - repo_default_branch_name(the_repository, quiet); + repo_default_branch_name(repo, quiet); ref = xstrfmt("refs/heads/%s", initial_branch); if (check_refname_format(ref, 0) < 0) die(_("invalid initial branch name: '%s'"), initial_branch); - if (refs_update_symref(get_main_ref_store(the_repository), "HEAD", ref, NULL) < 0) + if (refs_update_symref(get_main_ref_store(repo), "HEAD", ref, NULL) < 0) exit(1); free(ref); } @@ -2830,7 +2831,7 @@ int init_db(const char *git_dir, const char *real_git_dir, &repo_fmt, init_shared_repository); if (!(flags & INIT_DB_SKIP_REFDB)) - create_reference_database(initial_branch, flags & INIT_DB_QUIET); + create_reference_database(the_repository, initial_branch, flags & INIT_DB_QUIET); create_object_directory(the_repository); if (repo_settings_get_shared_repository(the_repository)) { diff --git a/setup.h b/setup.h index c33b675ccfd95f..21737e9bd69108 100644 --- a/setup.h +++ b/setup.h @@ -236,7 +236,7 @@ void initialize_repository_version(struct repository *repo, int hash_algo, enum ref_storage_format ref_storage_format, int reinit); -void create_reference_database(const char *initial_branch, int quiet); +void create_reference_database(struct repository *repo, const char *initial_branch, int quiet); /* * NOTE NOTE NOTE!! From df69f40c34de003ebc43cfe514526b11ffdec113 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 19 May 2026 11:52:22 +0200 Subject: [PATCH 68/93] setup: stop using `the_repository` in `init_db()` Stop using `the_repository` in `init_db()` and instead accept the repository as a parameter. The injection of `the_repository` is thus bumped one level higher, where callers now pass it in explicitly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- builtin/init-db.c | 2 +- setup.c | 43 ++++++++++++++++++++++--------------------- setup.h | 3 ++- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 53a41629e673eb..d60d1b60bc238c 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1186,7 +1186,7 @@ int cmd_clone(int argc, * repository, and reference backends may persist that information into * their on-disk data structures. */ - init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, + init_db(the_repository, git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, ref_storage_format, NULL, do_not_override_repo_unix_permissions, INIT_DB_QUIET | INIT_DB_SKIP_REFDB); diff --git a/builtin/init-db.c b/builtin/init-db.c index e626b0d8b7ccc6..c55517ad94d447 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -252,7 +252,7 @@ int cmd_init_db(int argc, } flags |= INIT_DB_EXIST_OK; - ret = init_db(git_dir, real_git_dir, template_dir, hash_algo, + ret = init_db(the_repository, git_dir, real_git_dir, template_dir, hash_algo, ref_storage_format, initial_branch, init_shared_repository, flags); diff --git a/setup.c b/setup.c index 9c493195687040..6aee839d8c8c07 100644 --- a/setup.c +++ b/setup.c @@ -2778,7 +2778,8 @@ static void repository_format_configure(struct repository *repo, repo_fmt->ref_storage_payload); } -int init_db(const char *git_dir, const char *real_git_dir, +int init_db(struct repository *repo, + const char *git_dir, const char *real_git_dir, const char *template_dir, int hash, enum ref_storage_format ref_storage_format, const char *initial_branch, @@ -2798,13 +2799,13 @@ int init_db(const char *git_dir, const char *real_git_dir, if (!exist_ok && !stat(real_git_dir, &st)) die(_("%s already exists"), real_git_dir); - set_git_dir(the_repository, real_git_dir, 1); - git_dir = repo_get_git_dir(the_repository); + set_git_dir(repo, real_git_dir, 1); + git_dir = repo_get_git_dir(repo); separate_git_dir(git_dir, original_git_dir); } else { - set_git_dir(the_repository, git_dir, 1); - git_dir = repo_get_git_dir(the_repository); + set_git_dir(repo, git_dir, 1); + git_dir = repo_get_git_dir(repo); } startup_info->have_repository = 1; @@ -2814,27 +2815,27 @@ int init_db(const char *git_dir, const char *real_git_dir, * config file, so this will not fail. What we are catching * is an attempt to reinitialize new repository with an old tool. */ - check_repository_format(the_repository, &repo_fmt); + check_repository_format(repo, &repo_fmt); - repository_format_configure(the_repository, &repo_fmt, hash, ref_storage_format); + repository_format_configure(repo, &repo_fmt, hash, ref_storage_format); /* * Ensure `core.hidedotfiles` is processed. This must happen after we * have set up the repository format such that we can evaluate * includeIf conditions correctly in the case of re-initialization. */ - repo_config(the_repository, git_default_core_config, NULL); + repo_config(repo, git_default_core_config, NULL); - safe_create_dir(the_repository, git_dir, 0); + safe_create_dir(repo, git_dir, 0); - reinit = create_default_files(the_repository, template_dir, original_git_dir, + reinit = create_default_files(repo, template_dir, original_git_dir, &repo_fmt, init_shared_repository); if (!(flags & INIT_DB_SKIP_REFDB)) - create_reference_database(the_repository, initial_branch, flags & INIT_DB_QUIET); - create_object_directory(the_repository); + create_reference_database(repo, initial_branch, flags & INIT_DB_QUIET); + create_object_directory(repo); - if (repo_settings_get_shared_repository(the_repository)) { + if (repo_settings_get_shared_repository(repo)) { char buf[10]; /* We do not spell "group" and such, so that * the configuration can be read by older version @@ -2842,29 +2843,29 @@ int init_db(const char *git_dir, const char *real_git_dir, * and compatibility values for PERM_GROUP and * PERM_EVERYBODY. */ - if (repo_settings_get_shared_repository(the_repository) < 0) + if (repo_settings_get_shared_repository(repo) < 0) /* force to the mode value */ - xsnprintf(buf, sizeof(buf), "0%o", -repo_settings_get_shared_repository(the_repository)); - else if (repo_settings_get_shared_repository(the_repository) == PERM_GROUP) + xsnprintf(buf, sizeof(buf), "0%o", -repo_settings_get_shared_repository(repo)); + else if (repo_settings_get_shared_repository(repo) == PERM_GROUP) xsnprintf(buf, sizeof(buf), "%d", OLD_PERM_GROUP); - else if (repo_settings_get_shared_repository(the_repository) == PERM_EVERYBODY) + else if (repo_settings_get_shared_repository(repo) == PERM_EVERYBODY) xsnprintf(buf, sizeof(buf), "%d", OLD_PERM_EVERYBODY); else BUG("invalid value for shared_repository"); - repo_config_set(the_repository, "core.sharedrepository", buf); - repo_config_set(the_repository, "receive.denyNonFastforwards", "true"); + repo_config_set(repo, "core.sharedrepository", buf); + repo_config_set(repo, "receive.denyNonFastforwards", "true"); } if (!(flags & INIT_DB_QUIET)) { int len = strlen(git_dir); if (reinit) - printf(repo_settings_get_shared_repository(the_repository) + printf(repo_settings_get_shared_repository(repo) ? _("Reinitialized existing shared Git repository in %s%s\n") : _("Reinitialized existing Git repository in %s%s\n"), git_dir, len && git_dir[len-1] != '/' ? "/" : ""); else - printf(repo_settings_get_shared_repository(the_repository) + printf(repo_settings_get_shared_repository(repo) ? _("Initialized empty shared Git repository in %s%s\n") : _("Initialized empty Git repository in %s%s\n"), git_dir, len && git_dir[len-1] != '/' ? "/" : ""); diff --git a/setup.h b/setup.h index 21737e9bd69108..9409326fe47c70 100644 --- a/setup.h +++ b/setup.h @@ -227,7 +227,8 @@ const char *get_template_dir(const char *option_template); #define INIT_DB_EXIST_OK (1 << 1) #define INIT_DB_SKIP_REFDB (1 << 2) -int init_db(const char *git_dir, const char *real_git_dir, +int init_db(struct repository *repo, + const char *git_dir, const char *real_git_dir, const char *template_dir, int hash_algo, enum ref_storage_format ref_storage_format, const char *initial_branch, int init_shared_repository, From 22235136406ce54b752ec1aa7df76bfb00805bbc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:39 -0400 Subject: [PATCH 69/93] midx-write: handle noop writes when converting incremental chains When updating a MIDX, we optimize out writes that will result in an identical MIDX as the one we already have on disk. See b3bab9d2729 (midx-write: extract function to test whether MIDX needs updating, 2025-12-10) for more details on exactly which writes are optimized out. If `midx_needs_update()` can't rule out any of the obvious cases (e.g., the checksum is invalid, we're requesting a different version, or performing compaction which always requires an update), then we compare the packs we're writing to the packs we already know about. If there are an equal number of packs being written as there are in any existing MIDX layer(s), then we compare the packs by their name. This comparison fails when we have an incremental MIDX chain with at least two layers, since we do not recursively peel through earlier layers, instead treating the `->pack_names` array of the tip MIDX layer as containing all `m->num_packs + m->num_packs_in_base` packs. Adjust this to instead look through the MIDX layers one by one when comparing pack names. While we're at it, fix a typo above in the same function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 18 ++++++++++-------- t/t5334-incremental-multi-pack-index.sh | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/midx-write.c b/midx-write.c index a25cab75abad11..9328f65a20187a 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1152,7 +1152,7 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c /* * Ensure that we have a valid checksum before consulting the - * exisiting MIDX in order to determine if we can avoid an + * existing MIDX in order to determine if we can avoid an * update. * * This is necessary because the given MIDX is loaded directly @@ -1208,14 +1208,16 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c BUG("same pack added twice?"); } - for (uint32_t i = 0; i < ctx->nr; i++) { - strbuf_reset(&buf); - strbuf_addstr(&buf, midx->pack_names[i]); - strbuf_strip_suffix(&buf, ".idx"); + for (struct multi_pack_index *m = midx; m; m = m->base_midx) { + for (uint32_t i = 0; i < m->num_packs; i++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, m->pack_names[i]); + strbuf_strip_suffix(&buf, ".idx"); - if (!strset_contains(&packs, buf.buf)) - goto out; - strset_remove(&packs, buf.buf); + if (!strset_contains(&packs, buf.buf)) + goto out; + strset_remove(&packs, buf.buf); + } } needed = false; diff --git a/t/t5334-incremental-multi-pack-index.sh b/t/t5334-incremental-multi-pack-index.sh index 99c7d44d8e9d34..c9f5b4e87aa035 100755 --- a/t/t5334-incremental-multi-pack-index.sh +++ b/t/t5334-incremental-multi-pack-index.sh @@ -132,4 +132,20 @@ test_expect_success 'relink existing MIDX layer' ' ' +test_expect_success 'non-incremental write with existing incremental chain' ' + git init non-incremental-write-with-existing && + test_when_finished "rm -fr non-incremental-write-with-existing" && + + ( + cd non-incremental-write-with-existing && + + git config set maintenance.auto false && + + write_midx_layer && + write_midx_layer && + + git multi-pack-index write + ) +' + test_done From ddaa7a6fb79038a30b59341ed3f0f2097014ccbf Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:42 -0400 Subject: [PATCH 70/93] midx: use `strset` for retained MIDX files Both `clear_midx_files_ext()` and `clear_incremental_midx_files_ext()` build a list of filenames to keep while pruning stale MIDX files. Today they hand-roll an array instead of using a `strset`, thus requiring us to pass an additional length parameter, and makes lookups linear. Replace the bare array with a `strset` which can be passed around as a single parameter. Though it improves lookup performance, the difference is likely immeasurable given how small the keep_hashes array typically is. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 57 +++++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/midx.c b/midx.c index 81d6ab11e6eb0e..f75e3c9fa6dcfe 100644 --- a/midx.c +++ b/midx.c @@ -758,8 +758,7 @@ int midx_checksum_valid(struct multi_pack_index *m) } struct clear_midx_data { - char **keep; - uint32_t keep_nr; + struct strset keep; const char *ext; }; @@ -767,15 +766,12 @@ static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUS const char *file_name, void *_data) { struct clear_midx_data *data = _data; - uint32_t i; if (!(starts_with(file_name, "multi-pack-index-") && ends_with(file_name, data->ext))) return; - for (i = 0; i < data->keep_nr; i++) { - if (!strcmp(data->keep[i], file_name)) - return; - } + if (strset_contains(&data->keep, file_name)) + return; if (unlink(full_path)) die_errno(_("failed to remove %s"), full_path); } @@ -783,48 +779,49 @@ static void clear_midx_file_ext(const char *full_path, size_t full_path_len UNUS void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash) { - struct clear_midx_data data; - memset(&data, 0, sizeof(struct clear_midx_data)); + struct clear_midx_data data = { + .keep = STRSET_INIT, + .ext = ext, + }; if (keep_hash) { - ALLOC_ARRAY(data.keep, 1); + struct strbuf buf = STRBUF_INIT; + strbuf_addf(&buf, "multi-pack-index-%s.%s", keep_hash, ext); + + strset_add(&data.keep, buf.buf); - data.keep[0] = xstrfmt("multi-pack-index-%s.%s", keep_hash, ext); - data.keep_nr = 1; + strbuf_release(&buf); } - data.ext = ext; - for_each_file_in_pack_dir(source->path, - clear_midx_file_ext, - &data); + for_each_file_in_pack_dir(source->path, clear_midx_file_ext, &data); - if (keep_hash) - free(data.keep[0]); - free(data.keep); + strset_clear(&data.keep); } void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, char **keep_hashes, uint32_t hashes_nr) { - struct clear_midx_data data; + struct clear_midx_data data = { + .keep = STRSET_INIT, + .ext = ext, + }; + struct strbuf buf = STRBUF_INIT; uint32_t i; - memset(&data, 0, sizeof(struct clear_midx_data)); + for (i = 0; i < hashes_nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "multi-pack-index-%s.%s", keep_hashes[i], + ext); - ALLOC_ARRAY(data.keep, hashes_nr); - for (i = 0; i < hashes_nr; i++) - data.keep[i] = xstrfmt("multi-pack-index-%s.%s", keep_hashes[i], - ext); - data.keep_nr = hashes_nr; - data.ext = ext; + strset_add(&data.keep, buf.buf); + } for_each_file_in_pack_subdir(source->path, "multi-pack-index.d", clear_midx_file_ext, &data); - for (i = 0; i < hashes_nr; i++) - free(data.keep[i]); - free(data.keep); + strbuf_release(&buf); + strset_clear(&data.keep); } void clear_midx_file(struct repository *r) From 3a5ebfac2f8910f335dc1f269e8a8cbdcacb7157 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:45 -0400 Subject: [PATCH 71/93] midx: build `keep_hashes` array in order Instead of filling the keep_hashes array using reverse indexing (e.g., `keep_hashes[count - i - 1]`) while traversing linked lists forward, collect linked list nodes into a temporary `layers` array and then iterate it backwards to fill `keep_hashes` sequentially. This makes the filling logic easier to follow, since each segment of the array is filled with a simple forward-marching index. Moreover, this change prepares us for a subsequent commit that will switch to using a `strvec`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 64 ++++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/midx-write.c b/midx-write.c index 9328f65a20187a..55c778a97cbe59 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1731,6 +1731,9 @@ static int write_midx_internal(struct write_midx_opts *opts) FILE *chainf = fdopen_lock_file(&lk, "w"); struct strbuf final_midx_name = STRBUF_INIT; struct multi_pack_index *m = ctx.base_midx; + struct multi_pack_index **layers = NULL; + size_t layers_nr = 0, layers_alloc = 0; + size_t j = 0; if (!chainf) { error_errno(_("unable to open multi-pack-index chain file")); @@ -1751,46 +1754,49 @@ static int write_midx_internal(struct write_midx_opts *opts) strbuf_release(&final_midx_name); if (ctx.compact) { - struct multi_pack_index *m; - uint32_t num_layers_before_from = 0; - uint32_t i; + struct multi_pack_index *mp; - for (m = ctx.base_midx; m; m = m->base_midx) - num_layers_before_from++; - - m = ctx.base_midx; - for (i = 0; i < num_layers_before_from; i++) { - uint32_t j = num_layers_before_from - i - 1; - - keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); - m = m->base_midx; + for (mp = ctx.base_midx; mp; mp = mp->base_midx) { + ALLOC_GROW(layers, layers_nr + 1, layers_alloc); + layers[layers_nr++] = mp; } + while (layers_nr) + keep_hashes[j++] = + xstrdup(midx_get_checksum_hex(layers[--layers_nr])); - keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash, - r->hash_algo)); + keep_hashes[j++] = + xstrdup(hash_to_hex_algop(midx_hash, + r->hash_algo)); - i = 0; - for (m = ctx.m; - m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); - m = m->base_midx) { - keep_hashes[keep_hashes_nr - i - 1] = - xstrdup(midx_get_checksum_hex(m)); - i++; + for (mp = ctx.m; + mp && midx_hashcmp(mp, ctx.compact_to, + r->hash_algo); + mp = mp->base_midx) { + ALLOC_GROW(layers, layers_nr + 1, layers_alloc); + layers[layers_nr++] = mp; } + while (layers_nr) + keep_hashes[j++] = + xstrdup(midx_get_checksum_hex(layers[--layers_nr])); } else { - keep_hashes[ctx.num_multi_pack_indexes_before] = + for (; m; m = m->base_midx) { + ALLOC_GROW(layers, layers_nr + 1, layers_alloc); + layers[layers_nr++] = m; + } + while (layers_nr) + keep_hashes[j++] = + xstrdup(midx_get_checksum_hex(layers[--layers_nr])); + + keep_hashes[j++] = xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + } - for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { - uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; + ASSERT(j == keep_hashes_nr); - keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); - m = m->base_midx; - } - } + free(layers); - for (uint32_t i = 0; i < keep_hashes_nr; i++) + for (uint32_t i = 0; i < j; i++) fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); } else { keep_hashes[ctx.num_multi_pack_indexes_before] = From 046a8686a406ebff7ca01dd4a3fabf9a679e010f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:48 -0400 Subject: [PATCH 72/93] midx: use `strvec` for `keep_hashes` The `keep_hashes` array in `write_midx_internal()` accumulates the checksums of MIDX files that should be retained when pruning stale entries from the MIDX chain. For similar reasons as in a previous commit, rewrite this using a strvec, requiring us to pass one fewer parameter. Unlike the aforementioned previous commit, use a `strvec` instead of a `string_list`, which provides a more ergonomic interface to adjust the values at a particular index. The ordering is important here, as this value is used to determine the contents of the resulting `multi-pack-index-chain` file when writing with "--incremental". Since the previous commit already builds the array in forward order, the conversion is straightforward: replace indexed assignments with `strvec_push()`, drop the pre-counting and `CALLOC_ARRAY()`, and simplify cleanup via `strvec_clear()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx-write.c | 84 ++++++++++++++++++---------------------------------- midx.c | 20 ++++++------- 2 files changed, 38 insertions(+), 66 deletions(-) diff --git a/midx-write.c b/midx-write.c index 55c778a97cbe59..5d9409a9741eee 100644 --- a/midx-write.c +++ b/midx-write.c @@ -29,8 +29,7 @@ extern void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash); extern void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - const char **keep_hashes, - uint32_t hashes_nr); + const struct strvec *keep_hashes); extern int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); @@ -1109,8 +1108,7 @@ static int link_midx_to_chain(struct multi_pack_index *m) } static void clear_midx_files(struct odb_source *source, - const char **hashes, uint32_t hashes_nr, - unsigned incremental) + const struct strvec *hashes, unsigned incremental) { /* * if incremental: @@ -1124,13 +1122,15 @@ static void clear_midx_files(struct odb_source *source, */ struct strbuf buf = STRBUF_INIT; const char *exts[] = { MIDX_EXT_BITMAP, MIDX_EXT_REV, MIDX_EXT_MIDX }; - uint32_t i, j; + uint32_t i; for (i = 0; i < ARRAY_SIZE(exts); i++) { - clear_incremental_midx_files_ext(source, exts[i], - hashes, hashes_nr); - for (j = 0; j < hashes_nr; j++) - clear_midx_files_ext(source, exts[i], hashes[j]); + clear_incremental_midx_files_ext(source, exts[i], hashes); + if (hashes) { + for (size_t j = 0; j < hashes->nr; j++) + clear_midx_files_ext(source, exts[i], + hashes->v[j]); + } } if (incremental) @@ -1267,8 +1267,7 @@ static int write_midx_internal(struct write_midx_opts *opts) int pack_name_concat_len = 0; int dropped_packs = 0; int result = -1; - const char **keep_hashes = NULL; - size_t keep_hashes_nr = 0; + struct strvec keep_hashes = STRVEC_INIT; struct chunkfile *cf; trace2_region_enter("midx", "write_midx_internal", r); @@ -1708,32 +1707,12 @@ static int write_midx_internal(struct write_midx_opts *opts) if (ctx.num_multi_pack_indexes_before == UINT32_MAX) die(_("too many multi-pack-indexes")); - if (ctx.compact) { - struct multi_pack_index *m; - - /* - * Keep all MIDX layers excluding those in the range [from, to]. - */ - for (m = ctx.base_midx; m; m = m->base_midx) - keep_hashes_nr++; - for (m = ctx.m; - m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); - m = m->base_midx) - keep_hashes_nr++; - - keep_hashes_nr++; /* include the compacted layer */ - } else { - keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1; - } - CALLOC_ARRAY(keep_hashes, keep_hashes_nr); - if (ctx.incremental) { FILE *chainf = fdopen_lock_file(&lk, "w"); struct strbuf final_midx_name = STRBUF_INIT; struct multi_pack_index *m = ctx.base_midx; struct multi_pack_index **layers = NULL; size_t layers_nr = 0, layers_alloc = 0; - size_t j = 0; if (!chainf) { error_errno(_("unable to open multi-pack-index chain file")); @@ -1761,12 +1740,12 @@ static int write_midx_internal(struct write_midx_opts *opts) layers[layers_nr++] = mp; } while (layers_nr) - keep_hashes[j++] = - xstrdup(midx_get_checksum_hex(layers[--layers_nr])); + strvec_push(&keep_hashes, + midx_get_checksum_hex(layers[--layers_nr])); - keep_hashes[j++] = - xstrdup(hash_to_hex_algop(midx_hash, - r->hash_algo)); + strvec_push(&keep_hashes, + hash_to_hex_algop(midx_hash, + r->hash_algo)); for (mp = ctx.m; mp && midx_hashcmp(mp, ctx.compact_to, @@ -1776,31 +1755,29 @@ static int write_midx_internal(struct write_midx_opts *opts) layers[layers_nr++] = mp; } while (layers_nr) - keep_hashes[j++] = - xstrdup(midx_get_checksum_hex(layers[--layers_nr])); + strvec_push(&keep_hashes, + midx_get_checksum_hex(layers[--layers_nr])); } else { for (; m; m = m->base_midx) { ALLOC_GROW(layers, layers_nr + 1, layers_alloc); layers[layers_nr++] = m; } while (layers_nr) - keep_hashes[j++] = - xstrdup(midx_get_checksum_hex(layers[--layers_nr])); + strvec_push(&keep_hashes, + midx_get_checksum_hex(layers[--layers_nr])); - keep_hashes[j++] = - xstrdup(hash_to_hex_algop(midx_hash, - r->hash_algo)); + strvec_push(&keep_hashes, + hash_to_hex_algop(midx_hash, + r->hash_algo)); } - ASSERT(j == keep_hashes_nr); - free(layers); - for (uint32_t i = 0; i < j; i++) - fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); + for (size_t i = 0; i < keep_hashes.nr; i++) + fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes.v[i]); } else { - keep_hashes[ctx.num_multi_pack_indexes_before] = - xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + strvec_push(&keep_hashes, + hash_to_hex_algop(midx_hash, r->hash_algo)); } if (ctx.m || ctx.base_midx) @@ -1809,8 +1786,7 @@ static int write_midx_internal(struct write_midx_opts *opts) if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); - clear_midx_files(opts->source, keep_hashes, keep_hashes_nr, - ctx.incremental); + clear_midx_files(opts->source, &keep_hashes, ctx.incremental); result = 0; cleanup: @@ -1826,11 +1802,7 @@ static int write_midx_internal(struct write_midx_opts *opts) free(ctx.entries); free(ctx.pack_perm); free(ctx.pack_order); - if (keep_hashes) { - for (uint32_t i = 0; i < keep_hashes_nr; i++) - free((char *)keep_hashes[i]); - free(keep_hashes); - } + strvec_clear(&keep_hashes); strbuf_release(&midx_name); close_midx(midx_to_free); diff --git a/midx.c b/midx.c index f75e3c9fa6dcfe..bcb8c999015980 100644 --- a/midx.c +++ b/midx.c @@ -12,6 +12,7 @@ #include "chunk-format.h" #include "pack-bitmap.h" #include "pack-revindex.h" +#include "strvec.h" #define MIDX_PACK_ERROR ((void *)(intptr_t)-1) @@ -19,8 +20,7 @@ int midx_checksum_valid(struct multi_pack_index *m); void clear_midx_files_ext(struct odb_source *source, const char *ext, const char *keep_hash); void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - char **keep_hashes, - uint32_t hashes_nr); + const struct strvec *keep_hashes); int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); @@ -799,22 +799,22 @@ void clear_midx_files_ext(struct odb_source *source, const char *ext, } void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext, - char **keep_hashes, - uint32_t hashes_nr) + const struct strvec *keep_hashes) { struct clear_midx_data data = { .keep = STRSET_INIT, .ext = ext, }; struct strbuf buf = STRBUF_INIT; - uint32_t i; - for (i = 0; i < hashes_nr; i++) { - strbuf_reset(&buf); - strbuf_addf(&buf, "multi-pack-index-%s.%s", keep_hashes[i], - ext); + if (keep_hashes) { + for (size_t i = 0; i < keep_hashes->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "multi-pack-index-%s.%s", + keep_hashes->v[i], ext); - strset_add(&data.keep, buf.buf); + strset_add(&data.keep, buf.buf); + } } for_each_file_in_pack_subdir(source->path, "multi-pack-index.d", From 8d342ed4b5dfbaa16f8bbc4537907d1e1224358e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:51 -0400 Subject: [PATCH 73/93] midx: introduce `--no-write-chain-file` for incremental MIDX writes When writing an incremental MIDX layer, the MIDX machinery writes the new layer into the multi-pack-index.d directory and then updates the multi-pack-index-chain file to include the freshly written layer. Future callers however may not wish to immediately update the MIDX chain itself, preferring instead to write out new layer(s) themselves before atomically updating the chain. Concretely, the new incremental MIDX-based repacking strategy will want to do exactly this (that is, assemble the new MIDX chain itself before writing a new chain file and atomically linking it into place). Introduce a `--no-write-chain-file` flag that: * writes the new MIDX layer into the multi-pack-index.d directory * prints its checksum * does not update the multi-pack-index-chain file. The MIDX chain file (and thus, the lock protecting it) remain untouched, allowing callers to assemble the chain themselves. This flag requires `--incremental`, since the notion of a separate layer only makes sense for incremental MIDXs. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 17 ++++++++-- builtin/multi-pack-index.c | 28 +++++++++++++++-- midx-write.c | 42 ++++++++++++++++--------- midx.h | 1 + t/t5334-incremental-multi-pack-index.sh | 17 ++++++++++ t/t5335-compact-multi-pack-index.sh | 36 +++++++++++++++++++++ 6 files changed, 123 insertions(+), 18 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 3a5aa227784071..c26196815e218a 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -11,9 +11,9 @@ SYNOPSIS [verse] 'git multi-pack-index' [] write [--preferred-pack=] [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] - [--refs-snapshot=] + [--refs-snapshot=] [--[no-]write-chain-file] 'git multi-pack-index' [] compact [--[no-]incremental] - [--[no-]bitmap] + [--[no-]bitmap] [--[no-]write-chain-file] 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] @@ -83,6 +83,13 @@ marker). and packs not present in an existing MIDX layer. Migrates non-incremental MIDXs to incremental ones when necessary. + + --[no-]write-chain-file:: + When used with `--incremental`, write a new MIDX layer + but do not update the multi-pack-index-chain file. + The checksum of the new layer is printed to standard + output, allowing the caller to assemble and write the + chain itself. Requires `--incremental`. -- compact:: @@ -97,6 +104,12 @@ compact:: --[no-]bitmap:: Control whether or not a multi-pack bitmap is written. + + --[no-]write-chain-file:: + When used with `--incremental`, write a new compacted + MIDX layer but do not update the multi-pack-index-chain + file. The checksum of the new layer is printed to + standard output. Requires `--incremental`. -- + Note that the compact command requires writing a version-2 midx that diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 0f72d96c02da4c..f861b4b839463b 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -16,11 +16,11 @@ #define BUILTIN_MIDX_WRITE_USAGE \ N_("git multi-pack-index [] write [--preferred-pack=]\n" \ " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ - " [--refs-snapshot=]") + " [--refs-snapshot=] [--[no-]write-chain-file]") #define BUILTIN_MIDX_COMPACT_USAGE \ N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ - " [--[no-]bitmap] ") + " [--[no-]bitmap] [--[no-]write-chain-file] ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -153,6 +153,9 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_NEGBIT(0, "write-chain-file", &opts.flags, + N_("write the multi-pack-index chain file"), + MIDX_WRITE_NO_CHAIN), OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, N_("write multi-pack index containing only given indexes")), OPT_FILENAME(0, "refs-snapshot", &opts.refs_snapshot, @@ -178,6 +181,15 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, if (argc) usage_with_options(builtin_multi_pack_index_write_usage, options); + + if (opts.flags & MIDX_WRITE_NO_CHAIN && + !(opts.flags & MIDX_WRITE_INCREMENTAL)) { + error(_("cannot use %s without %s"), + "--no-write-chain-file", "--incremental"); + usage_with_options(builtin_multi_pack_index_write_usage, + options); + } + source = handle_object_dir_option(repo); FREE_AND_NULL(options); @@ -221,6 +233,9 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_NEGBIT(0, "write-chain-file", &opts.flags, + N_("write the multi-pack-index chain file"), + MIDX_WRITE_NO_CHAIN), OPT_END(), }; @@ -239,6 +254,15 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, if (argc != 2) usage_with_options(builtin_multi_pack_index_compact_usage, options); + + if (opts.flags & MIDX_WRITE_NO_CHAIN && + !(opts.flags & MIDX_WRITE_INCREMENTAL)) { + error(_("cannot use %s without %s"), + "--no-write-chain-file", "--incremental"); + usage_with_options(builtin_multi_pack_index_compact_usage, + options); + } + source = handle_object_dir_option(the_repository); FREE_AND_NULL(options); diff --git a/midx-write.c b/midx-write.c index 5d9409a9741eee..38c898e5ff5ef1 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1257,7 +1257,7 @@ static int write_midx_internal(struct write_midx_opts *opts) unsigned char midx_hash[GIT_MAX_RAWSZ]; uint32_t start_pack; struct hashfile *f = NULL; - struct lock_file lk; + struct lock_file lk = LOCK_INIT; struct tempfile *incr; struct write_midx_context ctx = { .preferred_pack_idx = NO_PREFERRED_PACK, @@ -1601,11 +1601,14 @@ static int write_midx_internal(struct write_midx_opts *opts) } if (ctx.incremental) { - struct strbuf lock_name = STRBUF_INIT; + if (!(opts->flags & MIDX_WRITE_NO_CHAIN)) { + struct strbuf lock_name = STRBUF_INIT; - get_midx_chain_filename(opts->source, &lock_name); - hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); - strbuf_release(&lock_name); + get_midx_chain_filename(opts->source, &lock_name); + hold_lock_file_for_update(&lk, lock_name.buf, + LOCK_DIE_ON_ERROR); + strbuf_release(&lock_name); + } incr = mks_tempfile_m(midx_name.buf, 0444); if (!incr) { @@ -1707,16 +1710,23 @@ static int write_midx_internal(struct write_midx_opts *opts) if (ctx.num_multi_pack_indexes_before == UINT32_MAX) die(_("too many multi-pack-indexes")); + if (!is_lock_file_locked(&lk)) + printf("%s\n", hash_to_hex_algop(midx_hash, r->hash_algo)); + else if (opts->flags & MIDX_WRITE_NO_CHAIN) + BUG("lockfile held with MIDX_WRITE_NO_CHAIN set?"); + if (ctx.incremental) { - FILE *chainf = fdopen_lock_file(&lk, "w"); struct strbuf final_midx_name = STRBUF_INIT; struct multi_pack_index *m = ctx.base_midx; struct multi_pack_index **layers = NULL; size_t layers_nr = 0, layers_alloc = 0; - if (!chainf) { - error_errno(_("unable to open multi-pack-index chain file")); - goto cleanup; + if (is_lock_file_locked(&lk)){ + FILE *chainf = fdopen_lock_file(&lk, "w"); + if (!chainf) { + error_errno(_("unable to open multi-pack-index chain file")); + goto cleanup; + } } if (link_midx_to_chain(ctx.base_midx) < 0) @@ -1773,8 +1783,10 @@ static int write_midx_internal(struct write_midx_opts *opts) free(layers); - for (size_t i = 0; i < keep_hashes.nr; i++) - fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes.v[i]); + if (is_lock_file_locked(&lk)) + for (size_t i = 0; i < keep_hashes.nr; i++) + fprintf(get_lock_file_fp(&lk), "%s\n", + keep_hashes.v[i]); } else { strvec_push(&keep_hashes, hash_to_hex_algop(midx_hash, r->hash_algo)); @@ -1783,10 +1795,12 @@ static int write_midx_internal(struct write_midx_opts *opts) if (ctx.m || ctx.base_midx) odb_close(ctx.repo->objects); - if (commit_lock_file(&lk) < 0) - die_errno(_("could not write multi-pack-index")); + if (is_lock_file_locked(&lk)) { + if (commit_lock_file(&lk) < 0) + die_errno(_("could not write multi-pack-index")); - clear_midx_files(opts->source, &keep_hashes, ctx.incremental); + clear_midx_files(opts->source, &keep_hashes, ctx.incremental); + } result = 0; cleanup: diff --git a/midx.h b/midx.h index 08f3728e5204b8..5b193882dcf0f1 100644 --- a/midx.h +++ b/midx.h @@ -83,6 +83,7 @@ struct multi_pack_index { #define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4) #define MIDX_WRITE_INCREMENTAL (1 << 5) #define MIDX_WRITE_COMPACT (1 << 6) +#define MIDX_WRITE_NO_CHAIN (1 << 7) #define MIDX_EXT_REV "rev" #define MIDX_EXT_BITMAP "bitmap" diff --git a/t/t5334-incremental-multi-pack-index.sh b/t/t5334-incremental-multi-pack-index.sh index c9f5b4e87aa035..66d6894761b4a3 100755 --- a/t/t5334-incremental-multi-pack-index.sh +++ b/t/t5334-incremental-multi-pack-index.sh @@ -96,6 +96,23 @@ test_expect_success 'show object from second pack' ' git cat-file -p 2.2 ' +test_expect_success 'write MIDX layer with --no-write-chain-file' ' + test_commit no-write-chain-file && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --no-write-chain-file)" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + +test_expect_success 'write non-incremental MIDX layer with --no-write-chain-file' ' + test_must_fail git multi-pack-index write --bitmap --no-write-chain-file 2>err && + test_grep "cannot use --no-write-chain-file without --incremental" err +' + for reuse in false single multi do test_expect_success "full clone (pack.allowPackReuse=$reuse)" ' diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh index 40f3844282f04e..1a65d48b62b848 100755 --- a/t/t5335-compact-multi-pack-index.sh +++ b/t/t5335-compact-multi-pack-index.sh @@ -290,4 +290,40 @@ test_expect_success 'MIDX compaction with bitmaps (non-trivial)' ' ) ' +test_expect_success 'MIDX compaction with --no-write-chain-file' ' + git init midx-compact-with--no-write-chain-file && + ( + cd midx-compact-with--no-write-chain-file && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 $midx_chain && + cp "$midx_chain" "$midx_chain".bak && + + layer="$(git multi-pack-index compact --incremental \ + --no-write-chain-file \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")")" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + + # After writing the new layer, insert it into the chain + # manually. This is done in order to make $layer visible + # to the read-midx test helper below, and matches what + # the MIDX command would do without --no-write-chain-file. + { + nth_line 1 "$midx_chain.bak" && + echo $layer && + nth_line 4 "$midx_chain.bak" + } >$midx_chain && + + test-tool read-midx $objdir $layer >midx.data && + grep "^pack-B-.*\.idx" midx.data && + grep "^pack-C-.*\.idx" midx.data + + ) +' + test_done From 0cd2255e64b4775520a6acbbb1868437fc26662d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:54 -0400 Subject: [PATCH 74/93] midx: support custom `--base` for incremental MIDX writes Both `compact` and `write --incremental` fix the base of the resulting MIDX layer: `compact` always places the compacted result on top of "from's" immediate parent in the chain, and `write --incremental` always appends a new layer to the existing tip. In both cases the base is not configurable. Future callers need additional flexibility. For instance, the incremental MIDX-based repacking code may wish to write a layer based on some intermediate ancestor rather than the current tip, or produce a root layer when replacing the bottommost entries in the chain. Introduce a new `--base` option for both subcommands to specify the checksum of the MIDX layer to use as the base. The given checksum must refer to a valid layer in the MIDX chain that is an ancestor of the topmost layer being written or compacted. The special value "none" is accepted to produce a root layer with no parent. This will be needed when the incremental repacking machinery determines that the bottommost layers of the chain should be replaced. If no `--base` is given, behavior is unchanged: `compact` uses "from's" immediate parent in the chain, and `write` appends to the existing tip. For the `write` subcommand, `--base` requires `--no-write-chain-file`. A plain `write --incremental` appends a new layer to the live chain tip with no mechanism to atomically replace it; overriding the base would produce a layer that does not extend the tip, breaking chain invariants. With `--no-write-chain-file` the chain is left unmodified and the caller is responsible for assembling a valid chain. For `compact`, no such restriction applies. The compaction operation atomically replaces the compacted range in the chain file, so writing the result on top of any valid ancestor preserves chain invariants. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-multi-pack-index.adoc | 17 +++++- builtin/multi-pack-index.c | 24 ++++++-- midx-write.c | 34 ++++++++++- midx.h | 5 +- t/t5334-incremental-multi-pack-index.sh | 30 ++++++++++ t/t5335-compact-multi-pack-index.sh | 77 +++++++++++++++++++++++++ 6 files changed, 178 insertions(+), 9 deletions(-) diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index c26196815e218a..c6d23aeeb9a037 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -12,8 +12,10 @@ SYNOPSIS 'git multi-pack-index' [] write [--preferred-pack=] [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] [--refs-snapshot=] [--[no-]write-chain-file] + [--base=] 'git multi-pack-index' [] compact [--[no-]incremental] - [--[no-]bitmap] [--[no-]write-chain-file] + [--[no-]bitmap] [--base=] [--[no-]write-chain-file] + 'git multi-pack-index' [] verify 'git multi-pack-index' [] expire 'git multi-pack-index' [] repack [--batch-size=] @@ -90,6 +92,13 @@ marker). The checksum of the new layer is printed to standard output, allowing the caller to assemble and write the chain itself. Requires `--incremental`. + + --base=:: + Specify the checksum of an existing MIDX layer to use + as the base when writing a new incremental layer. + The special value `none` indicates that the new layer + should have no base (i.e., it becomes a root layer). + Requires `--no-write-chain-file`. -- compact:: @@ -110,6 +119,12 @@ compact:: MIDX layer but do not update the multi-pack-index-chain file. The checksum of the new layer is printed to standard output. Requires `--incremental`. + + --base=:: + Specify the checksum of an existing MIDX layer to use + as the base for the compacted result, instead of using + the immediate parent of ``. The special value + `none` indicates that the result should have no base. -- + Note that the compact command requires writing a version-2 midx that diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index f861b4b839463b..00ffb36394d08c 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -16,11 +16,13 @@ #define BUILTIN_MIDX_WRITE_USAGE \ N_("git multi-pack-index [] write [--preferred-pack=]\n" \ " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ - " [--refs-snapshot=] [--[no-]write-chain-file]") + " [--refs-snapshot=] [--[no-]write-chain-file]\n" \ + " [--base=]") #define BUILTIN_MIDX_COMPACT_USAGE \ N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ - " [--[no-]bitmap] [--[no-]write-chain-file] ") + " [--[no-]bitmap] [--base=] [--[no-]write-chain-file]\n" \ + " ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -63,6 +65,7 @@ static char const * const builtin_multi_pack_index_usage[] = { static struct opts_multi_pack_index { char *object_dir; const char *preferred_pack; + const char *incremental_base; char *refs_snapshot; unsigned long batch_size; unsigned flags; @@ -151,6 +154,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, N_("pack for reuse when computing a multi-pack bitmap")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), + OPT_STRING(0, "base", &opts.incremental_base, N_("checksum"), + N_("base MIDX for incremental writes")), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), OPT_NEGBIT(0, "write-chain-file", &opts.flags, @@ -190,6 +195,13 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, options); } + if (opts.incremental_base && + !(opts.flags & MIDX_WRITE_NO_CHAIN)) { + error(_("cannot use --base without --no-write-chain-file")); + usage_with_options(builtin_multi_pack_index_write_usage, + options); + } + source = handle_object_dir_option(repo); FREE_AND_NULL(options); @@ -201,7 +213,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, ret = write_midx_file_only(source, &packs, opts.preferred_pack, - opts.refs_snapshot, opts.flags); + opts.refs_snapshot, + opts.incremental_base, opts.flags); string_list_clear(&packs, 0); free(opts.refs_snapshot); @@ -229,6 +242,8 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, struct option *options; static struct option builtin_multi_pack_index_compact_options[] = { + OPT_STRING(0, "base", &opts.incremental_base, N_("checksum"), + N_("base MIDX for incremental writes")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_BIT(0, "incremental", &opts.flags, @@ -290,7 +305,8 @@ static int cmd_multi_pack_index_compact(int argc, const char **argv, die(_("MIDX %s must be an ancestor of %s"), argv[0], argv[1]); } - ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags); + ret = write_midx_file_compact(source, from_midx, to_midx, + opts.incremental_base, opts.flags); return ret; } diff --git a/midx-write.c b/midx-write.c index 38c898e5ff5ef1..561e9eedc0e6ef 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1247,6 +1247,7 @@ struct write_midx_opts { const char *preferred_pack_name; const char *refs_snapshot; + const char *incremental_base; unsigned flags; }; @@ -1330,11 +1331,32 @@ static int write_midx_internal(struct write_midx_opts *opts) /* * If compacting MIDX layer(s) in the range [from, to], then the - * compacted MIDX will share the same base MIDX as 'from'. + * compacted MIDX will share the same base MIDX as 'from', + * unless a custom --base is specified (see below). */ if (ctx.compact) ctx.base_midx = ctx.compact_from->base_midx; + if (opts->incremental_base) { + if (!strcmp(opts->incremental_base, "none")) { + ctx.base_midx = NULL; + } else { + while (ctx.base_midx) { + const char *cmp = midx_get_checksum_hex(ctx.base_midx); + if (!strcmp(opts->incremental_base, cmp)) + break; + + ctx.base_midx = ctx.base_midx->base_midx; + } + + if (!ctx.base_midx) { + error(_("could not find base MIDX '%s'"), + opts->incremental_base); + goto cleanup; + } + } + } + ctx.nr = 0; ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16; ctx.info = NULL; @@ -1827,7 +1849,8 @@ static int write_midx_internal(struct write_midx_opts *opts) int write_midx_file(struct odb_source *source, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags) + const char *refs_snapshot, + unsigned flags) { struct write_midx_opts opts = { .source = source, @@ -1842,13 +1865,16 @@ int write_midx_file(struct odb_source *source, int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags) + const char *refs_snapshot, + const char *incremental_base, + unsigned flags) { struct write_midx_opts opts = { .source = source, .packs_to_include = packs_to_include, .preferred_pack_name = preferred_pack_name, .refs_snapshot = refs_snapshot, + .incremental_base = incremental_base, .flags = flags, }; @@ -1858,12 +1884,14 @@ int write_midx_file_only(struct odb_source *source, int write_midx_file_compact(struct odb_source *source, struct multi_pack_index *from, struct multi_pack_index *to, + const char *incremental_base, unsigned flags) { struct write_midx_opts opts = { .source = source, .compact_from = from, .compact_to = to, + .incremental_base = incremental_base, .flags = flags | MIDX_WRITE_COMPACT, }; diff --git a/midx.h b/midx.h index 5b193882dcf0f1..77dd66de02bd38 100644 --- a/midx.h +++ b/midx.h @@ -132,10 +132,13 @@ int write_midx_file(struct odb_source *source, int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, - const char *refs_snapshot, unsigned flags); + const char *refs_snapshot, + const char *incremental_base, + unsigned flags); int write_midx_file_compact(struct odb_source *source, struct multi_pack_index *from, struct multi_pack_index *to, + const char *incremental_base, unsigned flags); void clear_midx_file(struct repository *r); int verify_midx_file(struct odb_source *source, unsigned flags); diff --git a/t/t5334-incremental-multi-pack-index.sh b/t/t5334-incremental-multi-pack-index.sh index 66d6894761b4a3..68a103d13d23c3 100755 --- a/t/t5334-incremental-multi-pack-index.sh +++ b/t/t5334-incremental-multi-pack-index.sh @@ -113,6 +113,36 @@ test_expect_success 'write non-incremental MIDX layer with --no-write-chain-file test_grep "cannot use --no-write-chain-file without --incremental" err ' +test_expect_success 'write MIDX layer with --base without --no-write-chain-file' ' + test_must_fail git multi-pack-index write --bitmap --incremental \ + --base=none 2>err && + test_grep "cannot use --base without --no-write-chain-file" err +' + +test_expect_success 'write MIDX layer with --base=none and --no-write-chain-file' ' + test_commit base-none && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --no-write-chain-file --base=none)" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + +test_expect_success 'write MIDX layer with --base= and --no-write-chain-file' ' + test_commit base-hash && + git repack -d && + + cp "$midx_chain" "$midx_chain.bak" && + layer="$(git multi-pack-index write --bitmap --incremental \ + --no-write-chain-file --base="$(nth_line 1 "$midx_chain")")" && + + test_cmp "$midx_chain.bak" "$midx_chain" && + test_path_is_file "$midxdir/multi-pack-index-$layer.midx" +' + for reuse in false single multi do test_expect_success "full clone (pack.allowPackReuse=$reuse)" ' diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh index 1a65d48b62b848..ec1dafe89fcfce 100755 --- a/t/t5335-compact-multi-pack-index.sh +++ b/t/t5335-compact-multi-pack-index.sh @@ -304,6 +304,7 @@ test_expect_success 'MIDX compaction with --no-write-chain-file' ' layer="$(git multi-pack-index compact --incremental \ --no-write-chain-file \ + --base="$(nth_line 1 "$midx_chain")" \ "$(nth_line 2 "$midx_chain")" \ "$(nth_line 3 "$midx_chain")")" && @@ -326,4 +327,80 @@ test_expect_success 'MIDX compaction with --no-write-chain-file' ' ) ' +test_expect_success 'MIDX compaction with --base' ' + git init midx-compact-with--base && + ( + cd midx-compact-with--base && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 "$midx_chain" && + + cp "$midx_chain" "$midx_chain.bak" && + + git multi-pack-index compact --incremental \ + --base="$(nth_line 1 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 2 $midx_chain && + + nth_line 1 "$midx_chain.bak" >expect && + nth_line 1 "$midx_chain" >actual && + + test_cmp expect actual + ) +' + +test_expect_success 'MIDX compaction with --base=none' ' + git init midx-compact-base-none && + ( + cd midx-compact-base-none && + + git config maintenance.auto false && + + write_packs A B C D && + + test_line_count = 4 $midx_chain && + + cp "$midx_chain" "$midx_chain".bak && + + # Compact the two bottommost layers (A and B) into a new + # root layer with no parent. + git multi-pack-index compact --incremental \ + --base=none \ + "$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" && + + test_line_count = 3 $midx_chain && + + # The upper layers (C and D) should be preserved + # unchanged. + nth_line 3 "$midx_chain.bak" >expect && + nth_line 4 "$midx_chain.bak" >>expect && + nth_line 2 "$midx_chain" >actual && + nth_line 3 "$midx_chain" >>actual && + + test_cmp expect actual + ) +' + +test_expect_success 'MIDX compaction with bogus --base checksum' ' + git init midx-compact-bogus-base && + ( + cd midx-compact-bogus-base && + + git config maintenance.auto false && + + write_packs A B C && + + test_must_fail git multi-pack-index compact --incremental \ + --base=deadbeef \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 3 "$midx_chain")" 2>err && + test_grep "could not find base MIDX" err + ) +' + test_done From f0ef2afb8be0aa37b80bc1cf1f1a9acfb208f00f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:57:57 -0400 Subject: [PATCH 75/93] repack: track the ODB source via existing_packs Store the ODB source in the `existing_packs` struct and use that in place of the raw `repo->objects->sources` access within `cmd_repack()`. The source used is still assigned from the first source in the list, so there are no functional changes in this commit. The changes instead serve two purposes (one immediate, one not): - The incremental MIDX-based repacking machinery will need to know what source is being used to read the existing MIDX/chain (should one exist). - In the future, if "git repack" is taught how to operate on other object sources, this field will serve as the authoritative value for that source. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 ++--- repack.c | 2 ++ repack.h | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 4c5a82c2c8d7de..24be147d39a1ed 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -417,7 +417,7 @@ int cmd_repack(int argc, * midx_has_unknown_packs() will make the decision for * us. */ - if (!get_multi_pack_index(repo->objects->sources)) + if (!get_multi_pack_index(existing.source)) midx_must_contain_cruft = 1; } @@ -564,8 +564,7 @@ int cmd_repack(int argc, unsigned flags = 0; if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL, 0)) flags |= MIDX_WRITE_INCREMENTAL; - write_midx_file(repo->objects->sources, - NULL, NULL, flags); + write_midx_file(existing.source, NULL, NULL, flags); } cleanup: diff --git a/repack.c b/repack.c index 596841027af93f..2ee6b51420aa54 100644 --- a/repack.c +++ b/repack.c @@ -154,6 +154,8 @@ void existing_packs_collect(struct existing_packs *existing, string_list_append(&existing->non_kept_packs, buf.buf); } + existing->source = existing->repo->objects->sources; + string_list_sort(&existing->kept_packs); string_list_sort(&existing->non_kept_packs); string_list_sort(&existing->cruft_packs); diff --git a/repack.h b/repack.h index bc9f2e1a5de984..c0e9f0ca647c50 100644 --- a/repack.h +++ b/repack.h @@ -56,6 +56,7 @@ struct packed_git; struct existing_packs { struct repository *repo; + struct odb_source *source; struct string_list kept_packs; struct string_list non_kept_packs; struct string_list cruft_packs; From ee6fb5823822bb03bd8dc5b7e7645e5b319033f0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:00 -0400 Subject: [PATCH 76/93] midx: expose `midx_layer_contains_pack()` Rename the function `midx_contains_pack_1()` to instead be called `midx_layer_contains_pack()` and make it accessible. Unlike `midx_contains_pack()` (which recurses through the entire chain), this function checks only a single MIDX layer. This will be used by a subsequent commit to determine whether a given pack belongs to the tip MIDX layer specifically, rather than to any layer in the chain. No functional changes are present in this commit. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 6 +++--- midx.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/midx.c b/midx.c index bcb8c999015980..dc86c8e7fee879 100644 --- a/midx.c +++ b/midx.c @@ -667,8 +667,8 @@ static int midx_pack_names_cmp(const void *a, const void *b, void *m_) m->pack_names[*(const size_t *)b]); } -static int midx_contains_pack_1(struct multi_pack_index *m, - const char *idx_or_pack_name) +int midx_layer_contains_pack(struct multi_pack_index *m, + const char *idx_or_pack_name) { uint32_t first = 0, last = m->num_packs; @@ -709,7 +709,7 @@ static int midx_contains_pack_1(struct multi_pack_index *m, int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) { for (; m; m = m->base_midx) - if (midx_contains_pack_1(m, idx_or_pack_name)) + if (midx_layer_contains_pack(m, idx_or_pack_name)) return 1; return 0; } diff --git a/midx.h b/midx.h index 77dd66de02bd38..3ee12dd08ec087 100644 --- a/midx.h +++ b/midx.h @@ -119,6 +119,8 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, int fill_midx_entry(struct multi_pack_index *m, const struct object_id *oid, struct pack_entry *e); int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name); +int midx_layer_contains_pack(struct multi_pack_index *m, + const char *idx_or_pack_name); int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id); int prepare_multi_pack_index_one(struct odb_source *source); From 1505990d72585cbdf35cd596a2167c2a8a4edda1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:03 -0400 Subject: [PATCH 77/93] repack-midx: factor out `repack_prepare_midx_command()` The `write_midx_included_packs()` function assembles and executes a `git multi-pack-index write` command, constructing the argument list inline. Future commits will introduce additional callers that need to construct similar `git multi-pack-index` commands (for both `write` and `compact` subcommands), so extract the common portions of the command setup into a reusable `repack_prepare_midx_command()` helper. The extracted helper sets `git_cmd`, pushes `multi-pack-index` and a subcommand, and handles `--progress`/`--no-progress` and `--bitmap` flags. The remaining arguments that are specific to the `write` subcommand (such as `--stdin-packs`) are left to the caller. No functional changes are included in this patch. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- repack-midx.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/repack-midx.c b/repack-midx.c index 0682b80c4278d4..5634dc186d0cad 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -275,6 +275,23 @@ static void remove_redundant_bitmaps(struct string_list *include, strbuf_release(&path); } +static void repack_prepare_midx_command(struct child_process *cmd, + struct repack_write_midx_opts *opts, + const char *subcommand) +{ + cmd->git_cmd = 1; + + strvec_pushl(&cmd->args, "multi-pack-index", subcommand, NULL); + + if (opts->show_progress) + strvec_push(&cmd->args, "--progress"); + else + strvec_push(&cmd->args, "--no-progress"); + + if (opts->write_bitmaps) + strvec_push(&cmd->args, "--bitmap"); +} + int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; @@ -289,18 +306,9 @@ int write_midx_included_packs(struct repack_write_midx_opts *opts) goto done; cmd.in = -1; - cmd.git_cmd = 1; - strvec_push(&cmd.args, "multi-pack-index"); - strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL); - - if (opts->show_progress) - strvec_push(&cmd.args, "--progress"); - else - strvec_push(&cmd.args, "--no-progress"); - - if (opts->write_bitmaps) - strvec_push(&cmd.args, "--bitmap"); + repack_prepare_midx_command(&cmd, opts, "write"); + strvec_push(&cmd.args, "--stdin-packs"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", From 6e38bcc51014e89a430bbd4f708170f5f7795b76 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:07 -0400 Subject: [PATCH 78/93] repack-midx: extract `repack_fill_midx_stdin_packs()` The function `write_midx_included_packs()` manages the lifecycle of writing packs to stdin when running `git multi-pack-index write` as a child process. Extract a standalone `repack_fill_midx_stdin_packs()` helper, which handles `--stdin-packs` argument setup, starting the command, writing pack names to its standard input, and finishing the command. This simplifies `write_midx_included_packs()` and prepares for a subsequent commit where the same helper is called with `cmd->out = -1` to capture the MIDX's checksum from the command's standard output, which is needed when writing MIDX layers with `--no-write-chain-file`. No functional changes are included in this patch. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- repack-midx.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/repack-midx.c b/repack-midx.c index 5634dc186d0cad..3fe83715da420f 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -292,23 +292,42 @@ static void repack_prepare_midx_command(struct child_process *cmd, strvec_push(&cmd->args, "--bitmap"); } +static int repack_fill_midx_stdin_packs(struct child_process *cmd, + struct string_list *include) +{ + struct string_list_item *item; + FILE *in; + int ret; + + cmd->in = -1; + + strvec_push(&cmd->args, "--stdin-packs"); + + ret = start_command(cmd); + if (ret) + return ret; + + in = xfdopen(cmd->in, "w"); + for_each_string_list_item(item, include) + fprintf(in, "%s\n", item->string); + fclose(in); + + return finish_command(cmd); +} + int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list include = STRING_LIST_INIT_DUP; struct string_list_item *item; struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry); - FILE *in; int ret = 0; midx_included_packs(&include, opts); if (!include.nr) goto done; - cmd.in = -1; - repack_prepare_midx_command(&cmd, opts, "write"); - strvec_push(&cmd.args, "--stdin-packs"); if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", @@ -350,16 +369,7 @@ int write_midx_included_packs(struct repack_write_midx_opts *opts) strvec_pushf(&cmd.args, "--refs-snapshot=%s", opts->refs_snapshot); - ret = start_command(&cmd); - if (ret) - goto done; - - in = xfdopen(cmd.in, "w"); - for_each_string_list_item(item, &include) - fprintf(in, "%s\n", item->string); - fclose(in); - - ret = finish_command(&cmd); + ret = repack_fill_midx_stdin_packs(&cmd, &include); done: if (!ret && opts->write_bitmaps) remove_redundant_bitmaps(&include, opts->packdir); From d0ac3969f4b8a859d23c9f45cab873cbbf8cdfb8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:10 -0400 Subject: [PATCH 79/93] repack-geometry: prepare for incremental MIDX repacking Teach `pack_geometry_init()` to optionally restrict the set of repacking candidates to only packs in the tip MIDX layer when a `midx_layer_threshold` is configured. If the tip layer has fewer packs than the threshold, those packs are excluded entirely; otherwise only packs in that layer participate in the geometric repack. Also track whether any tip-layer packs were included in the rollup (`midx_tip_rewritten`), which a subsequent commit will use to decide how to update the MIDX chain after repacking. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- repack-geometry.c | 35 +++++++++++++++++++++++++++++++++++ repack.h | 4 ++++ 2 files changed, 39 insertions(+) diff --git a/repack-geometry.c b/repack-geometry.c index 7cebd0cb45f0ea..2408b8a3cc2023 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -4,6 +4,7 @@ #include "repack.h" #include "repository.h" #include "hex.h" +#include "midx.h" #include "packfile.h" static uint32_t pack_geometry_weight(struct packed_git *p) @@ -31,8 +32,28 @@ void pack_geometry_init(struct pack_geometry *geometry, { struct packed_git *p; struct strbuf buf = STRBUF_INIT; + struct multi_pack_index *m = get_multi_pack_index(existing->source); repo_for_each_pack(existing->repo, p) { + if (geometry->midx_layer_threshold_set && m && + p->multi_pack_index) { + /* + * When writing MIDX layers incrementally, + * ignore packs unless they are in the most + * recent MIDX layer *and* there are at least + * 'midx_layer_threshold' packs in that layer. + * + * Otherwise 'p' is either in an older layer, or + * the youngest layer does not have enough packs + * to consider its packs as candidates for + * repacking. In either of those cases we want + * to ignore the pack. + */ + if (m->num_packs < geometry->midx_layer_threshold || + !midx_layer_contains_pack(m, pack_basename(p))) + continue; + } + if (args->local && !p->pack_local) /* * When asked to only repack local packfiles we skip @@ -173,6 +194,20 @@ void pack_geometry_split(struct pack_geometry *geometry) geometry->promisor_split = compute_pack_geometry_split(geometry->promisor_pack, geometry->promisor_pack_nr, geometry->split_factor); + for (uint32_t i = 0; i < geometry->split; i++) { + struct packed_git *p = geometry->pack[i]; + /* + * During incremental MIDX/bitmap repacking, any packs + * included in the rollup are either (a) not MIDX'd, or + * (b) contained in the tip layer iff it has at least + * the threshold number of packs. + * + * In the latter case, we can safely conclude that the + * tip of the MIDX chain will be rewritten. + */ + if (p->multi_pack_index) + geometry->midx_tip_rewritten = true; + } } struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry) diff --git a/repack.h b/repack.h index c0e9f0ca647c50..77d24ee45fb6ae 100644 --- a/repack.h +++ b/repack.h @@ -108,6 +108,10 @@ struct pack_geometry { uint32_t promisor_pack_nr, promisor_pack_alloc; uint32_t promisor_split; + uint32_t midx_layer_threshold; + bool midx_layer_threshold_set; + bool midx_tip_rewritten; + int split_factor; }; From d376967fbfb0b366c08be50a1c41d6b30c5e6d89 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:13 -0400 Subject: [PATCH 80/93] builtin/repack.c: convert `--write-midx` to an `OPT_CALLBACK` Change the --write-midx (-m) flag from an OPT_BOOL to an OPT_CALLBACK that accepts an optional mode argument. Introduce an enum with REPACK_WRITE_MIDX_NONE and REPACK_WRITE_MIDX_DEFAULT to distinguish between the two states, and update all existing boolean checks accordingly. For now, passing no argument (or just `-m`) selects the default mode, preserving existing behavior. A subsequent commit will add a new mode for writing incremental MIDXs. Extract repack_write_midx() as a dispatcher that selects the appropriate MIDX-writing implementation based on the mode. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 50 ++++++++++++++++++++++++++++++++++++------------ repack-midx.c | 14 +++++++++++++- repack.h | 8 +++++++- 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 24be147d39a1ed..5d366340c34b78 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -97,6 +97,24 @@ static int repack_config(const char *var, const char *value, return git_default_config(var, value, ctx, cb); } +static int option_parse_write_midx(const struct option *opt, const char *arg, + int unset) +{ + enum repack_write_midx_mode *cfg = opt->value; + + if (unset) { + *cfg = REPACK_WRITE_MIDX_NONE; + return 0; + } + + if (!arg || !*arg) + *cfg = REPACK_WRITE_MIDX_DEFAULT; + else + return error(_("unknown value for %s: %s"), opt->long_name, arg); + + return 0; +} + int cmd_repack(int argc, const char **argv, const char *prefix, @@ -119,7 +137,7 @@ int cmd_repack(int argc, struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct pack_objects_args po_args = PACK_OBJECTS_ARGS_INIT; struct pack_objects_args cruft_po_args = PACK_OBJECTS_ARGS_INIT; - int write_midx = 0; + enum repack_write_midx_mode write_midx = REPACK_WRITE_MIDX_NONE; const char *cruft_expiration = NULL; const char *expire_to = NULL; const char *filter_to = NULL; @@ -185,8 +203,14 @@ int cmd_repack(int argc, N_("do not repack this pack")), OPT_INTEGER('g', "geometric", &geometry.split_factor, N_("find a geometric progression with factor ")), - OPT_BOOL('m', "write-midx", &write_midx, - N_("write a multi-pack index of the resulting packs")), + OPT_CALLBACK_F(0, "write-midx", &write_midx, + N_("mode"), + N_("write a multi-pack index of the resulting packs"), + PARSE_OPT_OPTARG, option_parse_write_midx), + OPT_SET_INT_F('m', NULL, &write_midx, + N_("write a multi-pack index of the resulting packs"), + REPACK_WRITE_MIDX_DEFAULT, + PARSE_OPT_HIDDEN), OPT_STRING(0, "expire-to", &expire_to, N_("dir"), N_("pack prefix to store a pack containing pruned objects")), OPT_STRING(0, "filter-to", &filter_to, N_("dir"), @@ -221,14 +245,16 @@ int cmd_repack(int argc, pack_everything |= ALL_INTO_ONE; if (write_bitmaps < 0) { - if (!write_midx && + if (write_midx == REPACK_WRITE_MIDX_NONE && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) write_bitmaps = 0; } if (po_args.pack_kept_objects < 0) - po_args.pack_kept_objects = write_bitmaps > 0 && !write_midx; + po_args.pack_kept_objects = write_bitmaps > 0 && + write_midx == REPACK_WRITE_MIDX_NONE; - if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) + if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && + write_midx == REPACK_WRITE_MIDX_NONE) die(_(incremental_bitmap_conflict_error)); if (write_bitmaps && po_args.local && @@ -244,7 +270,7 @@ int cmd_repack(int argc, write_bitmaps = 0; } - if (write_midx && write_bitmaps) { + if (write_midx != REPACK_WRITE_MIDX_NONE && write_bitmaps) { struct strbuf path = STRBUF_INIT; strbuf_addf(&path, "%s/%s_XXXXXX", @@ -297,7 +323,7 @@ int cmd_repack(int argc, } if (repo_has_promisor_remote(repo)) strvec_push(&cmd.args, "--exclude-promisor-objects"); - if (!write_midx) { + if (write_midx == REPACK_WRITE_MIDX_NONE) { if (write_bitmaps > 0) strvec_push(&cmd.args, "--write-bitmap-index"); else if (write_bitmaps < 0) @@ -519,7 +545,7 @@ int cmd_repack(int argc, if (delete_redundant && pack_everything & ALL_INTO_ONE) existing_packs_mark_for_deletion(&existing, &names); - if (write_midx) { + if (write_midx != REPACK_WRITE_MIDX_NONE) { struct repack_write_midx_opts opts = { .existing = &existing, .geometry = &geometry, @@ -528,11 +554,11 @@ int cmd_repack(int argc, .packdir = packdir, .show_progress = show_progress, .write_bitmaps = write_bitmaps > 0, - .midx_must_contain_cruft = midx_must_contain_cruft + .midx_must_contain_cruft = midx_must_contain_cruft, + .mode = write_midx, }; - ret = write_midx_included_packs(&opts); - + ret = repack_write_midx(&opts); if (ret) goto cleanup; } diff --git a/repack-midx.c b/repack-midx.c index 3fe83715da420f..b1ca37970809a3 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -315,7 +315,7 @@ static int repack_fill_midx_stdin_packs(struct child_process *cmd, return finish_command(cmd); } -int write_midx_included_packs(struct repack_write_midx_opts *opts) +static int write_midx_included_packs(struct repack_write_midx_opts *opts) { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list include = STRING_LIST_INIT_DUP; @@ -378,3 +378,15 @@ int write_midx_included_packs(struct repack_write_midx_opts *opts) return ret; } + +int repack_write_midx(struct repack_write_midx_opts *opts) +{ + switch (opts->mode) { + case REPACK_WRITE_MIDX_NONE: + BUG("write_midx mode is NONE?"); + case REPACK_WRITE_MIDX_DEFAULT: + return write_midx_included_packs(opts); + default: + BUG("unhandled write_midx mode: %d", opts->mode); + } +} diff --git a/repack.h b/repack.h index 77d24ee45fb6ae..81907fcce7ff94 100644 --- a/repack.h +++ b/repack.h @@ -134,6 +134,11 @@ void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; +enum repack_write_midx_mode { + REPACK_WRITE_MIDX_NONE, + REPACK_WRITE_MIDX_DEFAULT, +}; + struct repack_write_midx_opts { struct existing_packs *existing; struct pack_geometry *geometry; @@ -143,10 +148,11 @@ struct repack_write_midx_opts { int show_progress; int write_bitmaps; int midx_must_contain_cruft; + enum repack_write_midx_mode mode; }; void midx_snapshot_refs(struct repository *repo, struct tempfile *f); -int write_midx_included_packs(struct repack_write_midx_opts *opts); +int repack_write_midx(struct repack_write_midx_opts *opts); int write_filtered_pack(const struct write_pack_opts *opts, struct existing_packs *existing, From b0d6e7b0d0b1c20fc847f371dcc261a0c7b27be1 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:16 -0400 Subject: [PATCH 81/93] packfile: ensure `close_pack_revindex()` frees in-memory revindex The following commit will introduce a case where we write a MIDX bitmap over packs that do not themselves have on-disk *.rev files. This case is supported within Git, and we will simply fall back to generating the revindex in memory. But we don't ever release that memory, causing a leak that is exposed by a test introduced in the following commit. (As far as I could find, we never free()'d memory allocated as a byproduct of creating an in-memory revindex, likely because that code predates the leak-checking niceties we have in the test suite now.) Rectify this by calling `FREE_AND_NULL()` on the `p->revindex` field when calling `close_pack_revindex()`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- packfile.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packfile.c b/packfile.c index b012d648adaf2e..a1e88fdb223b8b 100644 --- a/packfile.c +++ b/packfile.c @@ -420,6 +420,8 @@ void close_pack_index(struct packed_git *p) static void close_pack_revindex(struct packed_git *p) { + FREE_AND_NULL(p->revindex); + if (!p->revindex_map) return; From 1da62fb5c868447640c3697e9f2ec0004e24951f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:19 -0400 Subject: [PATCH 82/93] repack: implement incremental MIDX repacking Implement the `write_midx_incremental()` function, which builds and maintains an incremental MIDX chain as part of the geometric repacking process. Unlike the default mode which writes a single flat MIDX, the incremental mode constructs a compaction plan that determines which MIDX layers to write, compact, or copy, and then executes each step using `git multi-pack-index` subcommands with the --no-write-chain-file flag. The repacking strategy works as follows: * Acquire the lock guarding the multi-pack-index-chain. * A new MIDX layer is always written containing the newly created pack(s). If the tip MIDX layer was rewritten during geometric repacking, any surviving packs from that layer are also included. * Starting from the new layer, adjacent MIDX layers are merged together as long as the accumulated object count exceeds half the object count of the next deeper layer (controlled by 'repack.midxSplitFactor'). * Remaining layers in the chain are evaluated pairwise and either compacted or copied as-is, following the same merging condition. * Write the contents of the new multi-pack-index chain, atomically move it into place, and then release the lock. * Delete any now-unused MIDX layers. After writing the new layer, the strategy is evaluated among the existing MIDX layers in order from oldest to newest. Each step that writes a new MIDX layer uses "--no-write-chain-file" to avoid updating the multi-pack-index-chain file. After all steps are complete, the new chain file is written and then atomically moved into place. At present, this functionality is exposed behind a new enum value, `REPACK_WRITE_MIDX_INCREMENTAL`, but has no external callers. A subsequent commit will expose this mode via `git repack --write-midx=incremental`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 5 + repack-midx.c | 593 +++++++++++++++++++++++++++++++++++++++++++++-- repack.h | 3 + 3 files changed, 588 insertions(+), 13 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 5d366340c34b78..75c57736780efd 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -42,6 +42,9 @@ static const char incremental_bitmap_conflict_error[] = N_( "--no-write-bitmap-index or disable the pack.writeBitmaps configuration." ); +#define DEFAULT_MIDX_SPLIT_FACTOR 2 +#define DEFAULT_MIDX_NEW_LAYER_THRESHOLD 8 + struct repack_config_ctx { struct pack_objects_args *po_args; struct pack_objects_args *cruft_po_args; @@ -555,6 +558,8 @@ int cmd_repack(int argc, .show_progress = show_progress, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft, + .midx_split_factor = DEFAULT_MIDX_SPLIT_FACTOR, + .midx_new_layer_threshold = DEFAULT_MIDX_NEW_LAYER_THRESHOLD, .mode = write_midx, }; diff --git a/repack-midx.c b/repack-midx.c index b1ca37970809a3..f97331fb1b743a 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -2,12 +2,16 @@ #include "repack.h" #include "hash.h" #include "hex.h" +#include "lockfile.h" +#include "midx.h" #include "odb.h" #include "oidset.h" #include "pack-bitmap.h" +#include "path.h" #include "refs.h" #include "run-command.h" #include "tempfile.h" +#include "trace2.h" struct midx_snapshot_ref_data { struct repository *repo; @@ -293,26 +297,30 @@ static void repack_prepare_midx_command(struct child_process *cmd, } static int repack_fill_midx_stdin_packs(struct child_process *cmd, - struct string_list *include) + struct string_list *include, + struct string_list *out) { + struct strbuf in_buf = STRBUF_INIT; + struct strbuf out_buf = STRBUF_INIT; struct string_list_item *item; - FILE *in; int ret; - cmd->in = -1; - strvec_push(&cmd->args, "--stdin-packs"); - ret = start_command(cmd); - if (ret) - return ret; - - in = xfdopen(cmd->in, "w"); for_each_string_list_item(item, include) - fprintf(in, "%s\n", item->string); - fclose(in); + strbuf_addf(&in_buf, "%s\n", item->string); + + ret = pipe_command(cmd, in_buf.buf, in_buf.len, + out ? &out_buf : NULL, 0, NULL, 0); + + if (out) + string_list_split_f(out, out_buf.buf, "\n", -1, + STRING_LIST_SPLIT_NONEMPTY); + + strbuf_release(&in_buf); + strbuf_release(&out_buf); - return finish_command(cmd); + return ret; } static int write_midx_included_packs(struct repack_write_midx_opts *opts) @@ -369,7 +377,7 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) strvec_pushf(&cmd.args, "--refs-snapshot=%s", opts->refs_snapshot); - ret = repack_fill_midx_stdin_packs(&cmd, &include); + ret = repack_fill_midx_stdin_packs(&cmd, &include, NULL); done: if (!ret && opts->write_bitmaps) remove_redundant_bitmaps(&include, opts->packdir); @@ -379,6 +387,563 @@ static int write_midx_included_packs(struct repack_write_midx_opts *opts) return ret; } +struct midx_compaction_step { + union { + struct multi_pack_index *copy; + struct string_list write; + struct { + struct multi_pack_index *from; + struct multi_pack_index *to; + } compact; + } u; + + uint32_t objects_nr; + char *csum; + + enum { + MIDX_COMPACTION_STEP_UNKNOWN, + MIDX_COMPACTION_STEP_COPY, + MIDX_COMPACTION_STEP_WRITE, + MIDX_COMPACTION_STEP_COMPACT, + } type; +}; + +static const char *midx_compaction_step_base(const struct midx_compaction_step *step) +{ + switch (step->type) { + case MIDX_COMPACTION_STEP_UNKNOWN: + BUG("cannot use UNKNOWN step as a base"); + case MIDX_COMPACTION_STEP_COPY: + return midx_get_checksum_hex(step->u.copy); + case MIDX_COMPACTION_STEP_WRITE: + BUG("cannot use WRITE step as a base"); + case MIDX_COMPACTION_STEP_COMPACT: + return midx_get_checksum_hex(step->u.compact.to); + default: + BUG("unhandled midx compaction step type %d", step->type); + } +} + +static int midx_compaction_step_exec_copy(struct midx_compaction_step *step) +{ + step->csum = xstrdup(midx_get_checksum_hex(step->u.copy)); + return 0; +} + +static int midx_compaction_step_exec_write(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts, + const char *base) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct string_list hash = STRING_LIST_INIT_DUP; + struct string_list_item *item; + const char *preferred_pack = NULL; + int ret = 0; + + if (!step->u.write.nr) { + ret = error(_("no packs to write MIDX during compaction")); + goto out; + } + + for_each_string_list_item(item, &step->u.write) { + if (item->util) + preferred_pack = item->string; + } + + repack_prepare_midx_command(&cmd, opts, "write"); + strvec_pushl(&cmd.args, "--incremental", "--no-write-chain-file", NULL); + strvec_pushf(&cmd.args, "--base=%s", base ? base : "none"); + + if (preferred_pack) { + struct strbuf buf = STRBUF_INIT; + + strbuf_addstr(&buf, preferred_pack); + strbuf_strip_suffix(&buf, ".idx"); + strbuf_addstr(&buf, ".pack"); + + strvec_pushf(&cmd.args, "--preferred-pack=%s", buf.buf); + + strbuf_release(&buf); + } + + ret = repack_fill_midx_stdin_packs(&cmd, &step->u.write, &hash); + if (hash.nr != 1) { + ret = error(_("expected exactly one line during MIDX write, " + "got: %"PRIuMAX), + (uintmax_t)hash.nr); + goto out; + } + + step->csum = xstrdup(hash.items[0].string); + +out: + string_list_clear(&hash, 0); + + return ret; +} + +static int midx_compaction_step_exec_compact(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct strbuf buf = STRBUF_INIT; + FILE *out = NULL; + int ret; + + repack_prepare_midx_command(&cmd, opts, "compact"); + strvec_pushl(&cmd.args, "--incremental", "--no-write-chain-file", + midx_get_checksum_hex(step->u.compact.from), + midx_get_checksum_hex(step->u.compact.to), NULL); + + cmd.out = -1; + + ret = start_command(&cmd); + if (ret) + goto out; + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&buf, out) != EOF) { + if (step->csum) { + ret = error(_("unexpected MIDX output: '%s'"), buf.buf); + fclose(out); + out = NULL; + finish_command(&cmd); + goto out; + } + step->csum = strbuf_detach(&buf, NULL); + } + + ret = finish_command(&cmd); + +out: + if (out) + fclose(out); + strbuf_release(&buf); + + return ret; +} + +static int midx_compaction_step_exec(struct midx_compaction_step *step, + struct repack_write_midx_opts *opts, + const char *base) +{ + switch (step->type) { + case MIDX_COMPACTION_STEP_UNKNOWN: + BUG("cannot execute UNKNOWN midx compaction step"); + case MIDX_COMPACTION_STEP_COPY: + return midx_compaction_step_exec_copy(step); + case MIDX_COMPACTION_STEP_WRITE: + return midx_compaction_step_exec_write(step, opts, base); + case MIDX_COMPACTION_STEP_COMPACT: + return midx_compaction_step_exec_compact(step, opts); + default: + BUG("unhandled midx compaction step type %d", step->type); + } +} + +static void midx_compaction_step_release(struct midx_compaction_step *step) +{ + if (step->type == MIDX_COMPACTION_STEP_WRITE) + string_list_clear(&step->u.write, 0); + free(step->csum); +} + +static int repack_make_midx_compaction_plan(struct repack_write_midx_opts *opts, + struct midx_compaction_step **steps_p, + size_t *steps_nr_p) +{ + struct multi_pack_index *m; + struct midx_compaction_step *steps = NULL; + struct midx_compaction_step step = { 0 }; + struct strbuf buf = STRBUF_INIT; + size_t steps_nr = 0, steps_alloc = 0; + uint32_t i; + int ret = 0; + + trace2_region_enter("repack", "make_midx_compaction_plan", + opts->existing->repo); + + odb_reprepare(opts->existing->repo->objects); + m = get_multi_pack_index(opts->existing->source); + + for (i = 0; m && i < m->num_packs + m->num_packs_in_base; i++) { + if (prepare_midx_pack(m, i)) { + ret = error(_("could not load pack %"PRIu32" from MIDX"), + i); + goto out; + } + } + + trace2_region_enter("repack", "steps:write", opts->existing->repo); + + /* + * The first MIDX in the resulting chain is always going to be + * new. + * + * At a minimum, it will include all of the newly written packs. + * If there is an existing MIDX whose tip layer contains packs + * that were repacked, it will also include any of its packs + * which were *not* rolled up as part of the geometric repack + * (if any), and the previous tip will be replaced. + * + * It may grow to include the packs from zero or more MIDXs from + * the old chain, beginning either at the old tip (if the MIDX + * was *not* rewritten) or the old tip's base MIDX layer + * (otherwise). + */ + step.type = MIDX_COMPACTION_STEP_WRITE; + string_list_init_dup(&step.u.write); + + for (i = 0; i < opts->names->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", opts->names->items[i].string); + string_list_append(&step.u.write, buf.buf); + + trace2_data_string("repack", opts->existing->repo, + "include:fresh", + step.u.write.items[step.u.write.nr - 1].string); + } + for (i = 0; i < opts->geometry->split; i++) { + struct packed_git *p = opts->geometry->pack[i]; + if (unsigned_add_overflows(step.objects_nr, p->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + + step.objects_nr += p->num_objects; + } + trace2_data_intmax("repack", opts->existing->repo, + "include:fresh:objects_nr", + (uintmax_t)step.objects_nr); + + /* + * Now handle any existing packs which were *not* rewritten. + * + * The list of packs in opts->geometry only contains MIDX'd + * packs from the newest layer when that layer has more than + * 'repack.midxNewLayerThreshold' number of packs. + * + * If the MIDX tip was rewritten (that is, one or more of those + * packs appear below the split line), then add all packs above + * the split line to the new layer, as the old one is no longer + * usable. + * + * If the MIDX tip was not rewritten (that is, all MIDX'd packs + * from the youngest layer appear below the split line, or were + * not included in the geometric repack at all because there + * were too few of them), ignore them since we'll retain the + * existing layer as-is. + */ + for (i = opts->geometry->split; i < opts->geometry->pack_nr; i++) { + struct packed_git *p = opts->geometry->pack[i]; + struct string_list_item *item; + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + if (p->multi_pack_index && + !opts->geometry->midx_tip_rewritten) { + trace2_data_string("repack", opts->existing->repo, + "exclude:unmodified", buf.buf); + continue; + } + + trace2_data_string("repack", opts->existing->repo, + "include:unmodified", buf.buf); + trace2_data_string("repack", opts->existing->repo, + "include:unmodified:midx", + p->multi_pack_index ? "true" : "false"); + + item = string_list_append(&step.u.write, buf.buf); + if (p->multi_pack_index || i == opts->geometry->pack_nr - 1) + item->util = (void *)1; /* mark as preferred */ + + if (unsigned_add_overflows(step.objects_nr, p->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + + step.objects_nr += p->num_objects; + } + trace2_data_intmax("repack", opts->existing->repo, + "include:unmodified:objects_nr", + (uintmax_t)step.objects_nr); + + /* + * If the MIDX tip was rewritten, then we no longer consider it + * a candidate for compaction, since it will not exist in the + * MIDX chain being built. + */ + if (opts->geometry->midx_tip_rewritten) + m = m->base_midx; + + trace2_data_string("repack", opts->existing->repo, "midx:rewrote-tip", + opts->geometry->midx_tip_rewritten ? "true" : "false"); + + trace2_region_enter("repack", "compact", opts->existing->repo); + + /* + * Compact additional MIDX layers into this proposed one until + * the merging condition is violated. + */ + while (m) { + uint32_t preferred_pack_idx; + + trace2_data_string("repack", opts->existing->repo, + "candidate", midx_get_checksum_hex(m)); + + if (step.objects_nr < m->num_objects / opts->midx_split_factor) { + /* + * Stop compacting MIDX layer as soon as the + * merged size is less than half the size of the + * next layer in the chain. + */ + trace2_data_string("repack", opts->existing->repo, + "compact", "violated"); + trace2_data_intmax("repack", opts->existing->repo, + "objects_nr", + (uintmax_t)step.objects_nr); + trace2_data_intmax("repack", opts->existing->repo, + "next_objects_nr", + (uintmax_t)m->num_objects); + trace2_data_intmax("repack", opts->existing->repo, + "split_factor", + (uintmax_t)opts->midx_split_factor); + + break; + } + + if (midx_preferred_pack(m, &preferred_pack_idx) < 0) { + ret = error(_("could not find preferred pack for MIDX " + "%s"), midx_get_checksum_hex(m)); + goto out; + } + + for (i = 0; i < m->num_packs; i++) { + struct string_list_item *item; + uint32_t pack_int_id = i + m->num_packs_in_base; + struct packed_git *p = nth_midxed_pack(m, pack_int_id); + + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(p)); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_addstr(&buf, ".idx"); + + trace2_data_string("repack", opts->existing->repo, + "midx:pack", buf.buf); + + item = string_list_append(&step.u.write, buf.buf); + if (pack_int_id == preferred_pack_idx) + item->util = (void *)1; /* mark as preferred */ + } + + if (unsigned_add_overflows(step.objects_nr, m->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + goto out; + } + step.objects_nr += m->num_objects; + + m = m->base_midx; + } + + if (step.u.write.nr > 0) { + /* + * As long as there is at least one new pack to write + * (and thus the MIDX is non-empty), add it to the plan. + */ + ALLOC_GROW(steps, steps_nr + 1, steps_alloc); + steps[steps_nr++] = step; + } + + trace2_data_intmax("repack", opts->existing->repo, + "step:objects_nr", (uintmax_t)step.objects_nr); + trace2_data_intmax("repack", opts->existing->repo, + "step:packs_nr", (uintmax_t)step.u.write.nr); + + trace2_region_leave("repack", "compact", opts->existing->repo); + trace2_region_leave("repack", "steps:write", opts->existing->repo); + + trace2_region_enter("repack", "steps:rest", opts->existing->repo); + + /* + * Then start over, repeat, and either compact or keep as-is + * each MIDX layer until we have exhausted the chain. + * + * Finally, evaluate the remainder of the chain (if any) and + * either compact a sequence of adjacent layers, or keep + * individual layers as-is according to the same merging + * condition as above. + */ + while (m) { + struct multi_pack_index *next = m; + + ALLOC_GROW(steps, steps_nr + 1, steps_alloc); + + memset(&step, 0, sizeof(step)); + step.type = MIDX_COMPACTION_STEP_UNKNOWN; + + trace2_region_enter("repack", "step", opts->existing->repo); + + trace2_data_string("repack", opts->existing->repo, + "from", midx_get_checksum_hex(m)); + + while (next) { + uint32_t proposed_objects_nr; + if (unsigned_add_overflows(step.objects_nr, next->num_objects)) { + ret = error(_("too many objects in MIDX compaction step")); + trace2_region_leave("repack", "step", opts->existing->repo); + goto out; + } + + proposed_objects_nr = step.objects_nr + next->num_objects; + + trace2_data_string("repack", opts->existing->repo, + "proposed", + midx_get_checksum_hex(next)); + trace2_data_intmax("repack", opts->existing->repo, + "proposed:objects_nr", + (uintmax_t)next->num_objects); + + if (!next->base_midx) { + /* + * If we are at the end of the MIDX + * chain, there is nothing to compact, + * so mark it and stop. + */ + step.objects_nr = proposed_objects_nr; + break; + } + + if (proposed_objects_nr < next->base_midx->num_objects / opts->midx_split_factor) { + /* + * If there is a MIDX following this + * one, but our accumulated size is less + * than half of its size, compacting + * them would violate the merging + * condition, so stop here. + */ + + trace2_data_string("repack", opts->existing->repo, + "compact:violated:at", + midx_get_checksum_hex(next->base_midx)); + trace2_data_intmax("repack", opts->existing->repo, + "compact:violated:at:objects_nr", + (uintmax_t)next->base_midx->num_objects); + break; + } + + /* + * Otherwise, it is OK to compact the next layer + * into this one. Do so, and then continue + * through the remainder of the chain. + */ + step.objects_nr = proposed_objects_nr; + trace2_data_intmax("repack", opts->existing->repo, + "step:objects_nr", + (uintmax_t)step.objects_nr); + next = next->base_midx; + } + + if (m == next) { + step.type = MIDX_COMPACTION_STEP_COPY; + step.u.copy = m; + + trace2_data_string("repack", opts->existing->repo, + "type", "copy"); + } else { + step.type = MIDX_COMPACTION_STEP_COMPACT; + step.u.compact.from = next; + step.u.compact.to = m; + + trace2_data_string("repack", opts->existing->repo, + "to", midx_get_checksum_hex(m)); + trace2_data_string("repack", opts->existing->repo, + "type", "compact"); + } + + m = next->base_midx; + steps[steps_nr++] = step; + trace2_region_leave("repack", "step", opts->existing->repo); + } + + trace2_region_leave("repack", "steps:rest", opts->existing->repo); + +out: + *steps_p = steps; + *steps_nr_p = steps_nr; + + strbuf_release(&buf); + + trace2_region_leave("repack", "make_midx_compaction_plan", + opts->existing->repo); + + return ret; +} + +static int write_midx_incremental(struct repack_write_midx_opts *opts) +{ + struct midx_compaction_step *steps = NULL; + struct strbuf lock_name = STRBUF_INIT; + struct lock_file lf; + size_t steps_nr = 0; + size_t i; + int ret = 0; + + get_midx_chain_filename(opts->existing->source, &lock_name); + if (safe_create_leading_directories(opts->existing->repo, + lock_name.buf)) + die_errno(_("unable to create leading directories of %s"), + lock_name.buf); + hold_lock_file_for_update(&lf, lock_name.buf, LOCK_DIE_ON_ERROR); + + if (!fdopen_lock_file(&lf, "w")) { + ret = error_errno(_("unable to open multi-pack-index chain file")); + goto done; + } + + if (repack_make_midx_compaction_plan(opts, &steps, &steps_nr) < 0) { + ret = error(_("unable to generate compaction plan")); + goto done; + } + + for (i = 0; i < steps_nr; i++) { + struct midx_compaction_step *step = &steps[i]; + char *base = NULL; + + if (i + 1 < steps_nr) + base = xstrdup(midx_compaction_step_base(&steps[i + 1])); + + if (midx_compaction_step_exec(step, opts, base) < 0) { + ret = error(_("unable to execute compaction step %"PRIuMAX), + (uintmax_t)i); + free(base); + goto done; + } + + free(base); + } + + i = steps_nr; + while (i--) { + struct midx_compaction_step *step = &steps[i]; + if (!step->csum) + BUG("missing result for compaction step %"PRIuMAX, + (uintmax_t)i); + fprintf(get_lock_file_fp(&lf), "%s\n", step->csum); + } + + commit_lock_file(&lf); + +done: + strbuf_release(&lock_name); + for (i = 0; i < steps_nr; i++) + midx_compaction_step_release(&steps[i]); + free(steps); + return ret; +} + int repack_write_midx(struct repack_write_midx_opts *opts) { switch (opts->mode) { @@ -386,6 +951,8 @@ int repack_write_midx(struct repack_write_midx_opts *opts) BUG("write_midx mode is NONE?"); case REPACK_WRITE_MIDX_DEFAULT: return write_midx_included_packs(opts); + case REPACK_WRITE_MIDX_INCREMENTAL: + return write_midx_incremental(opts); default: BUG("unhandled write_midx mode: %d", opts->mode); } diff --git a/repack.h b/repack.h index 81907fcce7ff94..831ccfb1c6ce77 100644 --- a/repack.h +++ b/repack.h @@ -137,6 +137,7 @@ struct tempfile; enum repack_write_midx_mode { REPACK_WRITE_MIDX_NONE, REPACK_WRITE_MIDX_DEFAULT, + REPACK_WRITE_MIDX_INCREMENTAL, }; struct repack_write_midx_opts { @@ -148,6 +149,8 @@ struct repack_write_midx_opts { int show_progress; int write_bitmaps; int midx_must_contain_cruft; + int midx_split_factor; + int midx_new_layer_threshold; enum repack_write_midx_mode mode; }; From 938af8926099882ff87f8ffa4115c34ed63d9e8b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:22 -0400 Subject: [PATCH 83/93] repack: introduce `--write-midx=incremental` Expose the incremental MIDX repacking mode (implemented in an earlier commit) via a new --write-midx=incremental option for `git repack`. Add "incremental" as a recognized argument to the --write-midx OPT_CALLBACK, mapping it to REPACK_WRITE_MIDX_INCREMENTAL. When this mode is active and --geometric is in use, set the midx_layer_threshold on the pack geometry so that only packs in sufficiently large tip layers are considered for repacking. Two new configuration options control the compaction behavior: - repack.midxSplitFactor (default: 2): the factor used in the geometric merging condition for MIDX layers. - repack.midxNewLayerThreshold (default: 8): the minimum number of packs in the tip MIDX layer before its packs are considered as candidates for geometric repacking. Add tests exercising the new mode across a variety of scenarios including basic geometric violations, multi-round chain integrity, branching and merging histories, cross-layer object uniqueness, and threshold-based compaction. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/repack.adoc | 18 ++ Documentation/git-repack.adoc | 39 ++- builtin/repack.c | 49 ++- midx.c | 29 ++ midx.h | 3 + repack-geometry.c | 13 +- repack-midx.c | 5 + repack.c | 56 +++- repack.h | 10 +- t/meson.build | 1 + t/t7705-repack-incremental-midx.sh | 470 +++++++++++++++++++++++++++++ 11 files changed, 669 insertions(+), 24 deletions(-) create mode 100755 t/t7705-repack-incremental-midx.sh diff --git a/Documentation/config/repack.adoc b/Documentation/config/repack.adoc index e9e78dcb198292..4c22a499f6216c 100644 --- a/Documentation/config/repack.adoc +++ b/Documentation/config/repack.adoc @@ -46,3 +46,21 @@ repack.midxMustContainCruft:: `--write-midx`. When false, cruft packs are only included in the MIDX when necessary (e.g., because they might be required to form a reachability closure with MIDX bitmaps). Defaults to true. + +repack.midxSplitFactor:: + The factor used in the geometric merging condition when + compacting incremental MIDX layers during `git repack` when + invoked with the `--write-midx=incremental` option. ++ +Adjacent layers are merged when the accumulated object count of the +newer layer exceeds `1/` of the object count of the next deeper +layer. Must be at least 2. Defaults to 2. + +repack.midxNewLayerThreshold:: + The minimum number of packs in the tip MIDX layer before those + packs are considered as candidates for geometric repacking + during `git repack --write-midx=incremental`. ++ +When the tip layer has fewer packs than this threshold, those packs are +excluded from the geometric repack entirely, and are thus left +unmodified. Must be at least 1. Defaults to 8. diff --git a/Documentation/git-repack.adoc b/Documentation/git-repack.adoc index 673ce91083720d..27a99cc46f4ada 100644 --- a/Documentation/git-repack.adoc +++ b/Documentation/git-repack.adoc @@ -11,7 +11,7 @@ SYNOPSIS [verse] 'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m] [--window=] [--depth=] [--threads=] [--keep-pack=] - [--write-midx] [--name-hash-version=] [--path-walk] + [--write-midx[=]] [--name-hash-version=] [--path-walk] DESCRIPTION ----------- @@ -250,9 +250,42 @@ pack as the preferred pack for object selection by the MIDX (see linkgit:git-multi-pack-index[1]). -m:: ---write-midx:: +--write-midx[=]:: Write a multi-pack index (see linkgit:git-multi-pack-index[1]) - containing the non-redundant packs. + containing the non-redundant packs. The following modes are + available: ++ +-- + `default`;; + Write a single MIDX covering all packs. This is the + default when `--write-midx` is given without an + explicit mode. + + `incremental`;; + Write an incremental MIDX chain instead of a single + flat MIDX. This mode requires `--geometric`. ++ +The incremental mode maintains a chain of MIDX layers that is compacted +over time using a geometric merging strategy. Each repack creates a new +tip layer containing the newly written pack(s). Adjacent layers are then +merged whenever the newer layer's object count exceeds +`1/repack.midxSplitFactor` of the next deeper layer's count. Layers +that do not meet this condition are retained as-is. ++ +The result is that newer (tip) layers tend to contain many small packs +with relatively few objects, while older (deeper) layers contain fewer, +larger packs covering more objects. Because compaction is driven by the +tip of the chain, newer layers are also rewritten more frequently than +older ones, which are only touched when enough objects have accumulated +to justify merging into them. This keeps the total number of layers +logarithmic relative to the total number of objects. ++ +Only packs in the tip MIDX layer are considered as candidates for the +geometric repack; packs in deeper layers are left untouched. If the tip +layer contains fewer packs than `repack.midxNewLayerThreshold`, those +packs are excluded from the geometry entirely, and a new layer is +created for any new pack(s) without disturbing the existing chain. +-- --name-hash-version=:: Provide this argument to the underlying `git pack-objects` process. diff --git a/builtin/repack.c b/builtin/repack.c index 75c57736780efd..5ffa18e085e49a 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -33,7 +33,7 @@ static int midx_must_contain_cruft = 1; static const char *const git_repack_usage[] = { N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n" "[--window=] [--depth=] [--threads=] [--keep-pack=]\n" - "[--write-midx] [--name-hash-version=] [--path-walk]"), + "[--write-midx[=]] [--name-hash-version=] [--path-walk]"), NULL }; @@ -48,6 +48,8 @@ static const char incremental_bitmap_conflict_error[] = N_( struct repack_config_ctx { struct pack_objects_args *po_args; struct pack_objects_args *cruft_po_args; + int midx_split_factor; + int midx_new_layer_threshold; }; static int repack_config(const char *var, const char *value, @@ -97,6 +99,16 @@ static int repack_config(const char *var, const char *value, midx_must_contain_cruft = git_config_bool(var, value); return 0; } + if (!strcmp(var, "repack.midxsplitfactor")) { + repack_ctx->midx_split_factor = git_config_int(var, value, + ctx->kvi); + return 0; + } + if (!strcmp(var, "repack.midxnewlayerthreshold")) { + repack_ctx->midx_new_layer_threshold = git_config_int(var, value, + ctx->kvi); + return 0; + } return git_default_config(var, value, ctx, cb); } @@ -112,6 +124,8 @@ static int option_parse_write_midx(const struct option *opt, const char *arg, if (!arg || !*arg) *cfg = REPACK_WRITE_MIDX_DEFAULT; + else if (!strcmp(arg, "incremental")) + *cfg = REPACK_WRITE_MIDX_INCREMENTAL; else return error(_("unknown value for %s: %s"), opt->long_name, arg); @@ -226,6 +240,8 @@ int cmd_repack(int argc, memset(&config_ctx, 0, sizeof(config_ctx)); config_ctx.po_args = &po_args; config_ctx.cruft_po_args = &cruft_po_args; + config_ctx.midx_split_factor = DEFAULT_MIDX_SPLIT_FACTOR; + config_ctx.midx_new_layer_threshold = DEFAULT_MIDX_NEW_LAYER_THRESHOLD; repo_config(repo, repack_config, &config_ctx); @@ -247,6 +263,9 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) pack_everything |= ALL_INTO_ONE; + if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL && !geometry.split_factor) + die(_("--write-midx=incremental requires --geometric")); + if (write_bitmaps < 0) { if (write_midx == REPACK_WRITE_MIDX_NONE && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) @@ -273,6 +292,13 @@ int cmd_repack(int argc, write_bitmaps = 0; } + if (config_ctx.midx_split_factor < 2) + die(_("invalid value for %s: %d"), "--midx-split-factor", + config_ctx.midx_split_factor); + if (config_ctx.midx_new_layer_threshold < 1) + die(_("invalid value for %s: %d"), "--midx-new-layer-threshold", + config_ctx.midx_new_layer_threshold); + if (write_midx != REPACK_WRITE_MIDX_NONE && write_bitmaps) { struct strbuf path = STRBUF_INIT; @@ -296,6 +322,10 @@ int cmd_repack(int argc, if (geometry.split_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); + if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL) { + geometry.midx_layer_threshold = config_ctx.midx_new_layer_threshold; + geometry.midx_layer_threshold_set = true; + } pack_geometry_init(&geometry, &existing, &po_args); pack_geometry_split(&geometry); } @@ -545,8 +575,11 @@ int cmd_repack(int argc, packtmp); /* End of pack replacement. */ - if (delete_redundant && pack_everything & ALL_INTO_ONE) + if (delete_redundant && pack_everything & ALL_INTO_ONE) { + if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL) + existing_packs_retain_midx_packs(&existing); existing_packs_mark_for_deletion(&existing, &names); + } if (write_midx != REPACK_WRITE_MIDX_NONE) { struct repack_write_midx_opts opts = { @@ -558,8 +591,8 @@ int cmd_repack(int argc, .show_progress = show_progress, .write_bitmaps = write_bitmaps > 0, .midx_must_contain_cruft = midx_must_contain_cruft, - .midx_split_factor = DEFAULT_MIDX_SPLIT_FACTOR, - .midx_new_layer_threshold = DEFAULT_MIDX_NEW_LAYER_THRESHOLD, + .midx_split_factor = config_ctx.midx_split_factor, + .midx_new_layer_threshold = config_ctx.midx_new_layer_threshold, .mode = write_midx, }; @@ -572,11 +605,15 @@ int cmd_repack(int argc, if (delete_redundant) { int opts = 0; - existing_packs_remove_redundant(&existing, packdir); + bool wrote_incremental_midx = write_midx == REPACK_WRITE_MIDX_INCREMENTAL; + + existing_packs_remove_redundant(&existing, packdir, + wrote_incremental_midx); if (geometry.split_factor) pack_geometry_remove_redundant(&geometry, &names, - &existing, packdir); + &existing, packdir, + wrote_incremental_midx); if (show_progress) opts |= PRUNE_PACKED_VERBOSE; prune_packed_objects(opts); diff --git a/midx.c b/midx.c index dc86c8e7fee879..efbfbb13f4106a 100644 --- a/midx.c +++ b/midx.c @@ -850,6 +850,35 @@ void clear_midx_file(struct repository *r) strbuf_release(&midx); } +void clear_incremental_midx_files(struct repository *r, + const struct strvec *keep_hashes) +{ + struct odb_source *source = r->objects->sources; + struct strbuf chain = STRBUF_INIT; + + get_midx_chain_filename(source, &chain); + + for (; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (files->packed->midx) + close_midx(files->packed->midx); + files->packed->midx = NULL; + } + + if (!keep_hashes && remove_path(chain.buf)) + die(_("failed to clear multi-pack-index chain at %s"), + chain.buf); + + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_BITMAP, + keep_hashes); + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_REV, + keep_hashes); + clear_incremental_midx_files_ext(r->objects->sources, MIDX_EXT_MIDX, + keep_hashes); + + strbuf_release(&chain); +} + static int verify_midx_error; __attribute__((format (printf, 1, 2))) diff --git a/midx.h b/midx.h index 3ee12dd08ec087..63853a03a47fd1 100644 --- a/midx.h +++ b/midx.h @@ -9,6 +9,7 @@ struct repository; struct bitmapped_pack; struct git_hash_algo; struct odb_source; +struct strvec; #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION_V1 1 @@ -143,6 +144,8 @@ int write_midx_file_compact(struct odb_source *source, const char *incremental_base, unsigned flags); void clear_midx_file(struct repository *r); +void clear_incremental_midx_files(struct repository *r, + const struct strvec *keep_hashes); int verify_midx_file(struct odb_source *source, unsigned flags); int expire_midx_packs(struct odb_source *source, unsigned flags); int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags); diff --git a/repack-geometry.c b/repack-geometry.c index 2408b8a3cc2023..2064683dcfe1e1 100644 --- a/repack-geometry.c +++ b/repack-geometry.c @@ -249,7 +249,8 @@ static void remove_redundant_packs(struct packed_git **pack, uint32_t pack_nr, struct string_list *names, struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { const struct git_hash_algo *algop = existing->repo->hash_algo; struct strbuf buf = STRBUF_INIT; @@ -269,7 +270,8 @@ static void remove_redundant_packs(struct packed_git **pack, (string_list_has_string(&existing->kept_packs, buf.buf))) continue; - repack_remove_redundant_pack(existing->repo, packdir, buf.buf); + repack_remove_redundant_pack(existing->repo, packdir, buf.buf, + wrote_incremental_midx); } strbuf_release(&buf); @@ -278,12 +280,13 @@ static void remove_redundant_packs(struct packed_git **pack, void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { remove_redundant_packs(geometry->pack, geometry->split, - names, existing, packdir); + names, existing, packdir, wrote_incremental_midx); remove_redundant_packs(geometry->promisor_pack, geometry->promisor_split, - names, existing, packdir); + names, existing, packdir, wrote_incremental_midx); } void pack_geometry_release(struct pack_geometry *geometry) diff --git a/repack-midx.c b/repack-midx.c index f97331fb1b743a..4f5deeb97bf033 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -887,6 +887,7 @@ static int write_midx_incremental(struct repack_write_midx_opts *opts) struct midx_compaction_step *steps = NULL; struct strbuf lock_name = STRBUF_INIT; struct lock_file lf; + struct strvec keep_hashes = STRVEC_INIT; size_t steps_nr = 0; size_t i; int ret = 0; @@ -932,11 +933,15 @@ static int write_midx_incremental(struct repack_write_midx_opts *opts) BUG("missing result for compaction step %"PRIuMAX, (uintmax_t)i); fprintf(get_lock_file_fp(&lf), "%s\n", step->csum); + strvec_push(&keep_hashes, step->csum); } commit_lock_file(&lf); + clear_incremental_midx_files(opts->existing->repo, &keep_hashes); + done: + strvec_clear(&keep_hashes); strbuf_release(&lock_name); for (i = 0; i < steps_nr; i++) midx_compaction_step_release(&steps[i]); diff --git a/repack.c b/repack.c index 2ee6b51420aa54..571dabb665ee9b 100644 --- a/repack.c +++ b/repack.c @@ -55,14 +55,18 @@ void pack_objects_args_release(struct pack_objects_args *args) } void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, - const char *base_name) + const char *base_name, + bool wrote_incremental_midx) { struct strbuf buf = STRBUF_INIT; struct odb_source *source = repo->objects->sources; struct multi_pack_index *m = get_multi_pack_index(source); strbuf_addf(&buf, "%s.pack", base_name); - if (m && source->local && midx_contains_pack(m, buf.buf)) + if (m && source->local && midx_contains_pack(m, buf.buf)) { clear_midx_file(repo); + if (!wrote_incremental_midx) + clear_incremental_midx_files(repo, NULL); + } strbuf_insertf(&buf, 0, "%s/", dir_name); unlink_pack_path(buf.buf, 1); strbuf_release(&buf); @@ -250,25 +254,63 @@ void existing_packs_mark_for_deletion(struct existing_packs *existing, &existing->cruft_packs); } +/* + * Mark every pack that is referenced by the existing MIDX chain as + * retained, so that a subsequent call to + * existing_packs_mark_for_deletion() will not mark them for deletion. + * + * This is used when writing an incremental MIDX layer on top of an + * existing chain: retained layers continue to reference the same + * packs on disk, so those packs must not be unlinked even if the + * freshly-written pack supersedes them. + */ +void existing_packs_retain_midx_packs(struct existing_packs *existing) +{ + struct string_list_item *item; + struct strbuf buf = STRBUF_INIT; + + for_each_string_list_item(item, &existing->midx_packs) { + struct string_list_item *found; + + strbuf_reset(&buf); + strbuf_addstr(&buf, item->string); + strbuf_strip_suffix(&buf, ".pack"); + strbuf_strip_suffix(&buf, ".idx"); + + found = string_list_lookup(&existing->non_kept_packs, buf.buf); + if (found) + existing_packs_mark_retained(found); + + found = string_list_lookup(&existing->cruft_packs, buf.buf); + if (found) + existing_packs_mark_retained(found); + } + + strbuf_release(&buf); +} + static void remove_redundant_packs_1(struct repository *repo, struct string_list *packs, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { struct string_list_item *item; for_each_string_list_item(item, packs) { if (!existing_pack_is_marked_for_deletion(item)) continue; - repack_remove_redundant_pack(repo, packdir, item->string); + repack_remove_redundant_pack(repo, packdir, item->string, + wrote_incremental_midx); } } void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir) + const char *packdir, + bool wrote_incremental_midx) { remove_redundant_packs_1(existing->repo, &existing->non_kept_packs, - packdir); + packdir, wrote_incremental_midx); remove_redundant_packs_1(existing->repo, &existing->cruft_packs, - packdir); + packdir, wrote_incremental_midx); } void existing_packs_release(struct existing_packs *existing) diff --git a/repack.h b/repack.h index 831ccfb1c6ce77..f9fbc895f02940 100644 --- a/repack.h +++ b/repack.h @@ -34,7 +34,8 @@ void prepare_pack_objects(struct child_process *cmd, void pack_objects_args_release(struct pack_objects_args *args); void repack_remove_redundant_pack(struct repository *repo, const char *dir_name, - const char *base_name); + const char *base_name, + bool wrote_incremental_midx); struct write_pack_opts { struct pack_objects_args *po_args; @@ -83,8 +84,10 @@ void existing_packs_retain_cruft(struct existing_packs *existing, struct packed_git *cruft); void existing_packs_mark_for_deletion(struct existing_packs *existing, struct string_list *names); +void existing_packs_retain_midx_packs(struct existing_packs *existing); void existing_packs_remove_redundant(struct existing_packs *existing, - const char *packdir); + const char *packdir, + bool wrote_incremental_midx); void existing_packs_release(struct existing_packs *existing); struct generated_pack; @@ -129,7 +132,8 @@ struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry); void pack_geometry_remove_redundant(struct pack_geometry *geometry, struct string_list *names, struct existing_packs *existing, - const char *packdir); + const char *packdir, + bool wrote_incremental_midx); void pack_geometry_release(struct pack_geometry *geometry); struct tempfile; diff --git a/t/meson.build b/t/meson.build index 7528e5cda5fef0..25f0d823d8ebbd 100644 --- a/t/meson.build +++ b/t/meson.build @@ -951,6 +951,7 @@ integration_tests = [ 't7702-repack-cyclic-alternate.sh', 't7703-repack-geometric.sh', 't7704-repack-cruft.sh', + 't7705-repack-incremental-midx.sh', 't7800-difftool.sh', 't7810-grep.sh', 't7811-grep-open.sh', diff --git a/t/t7705-repack-incremental-midx.sh b/t/t7705-repack-incremental-midx.sh new file mode 100755 index 00000000000000..9e317ff6e8f2fb --- /dev/null +++ b/t/t7705-repack-incremental-midx.sh @@ -0,0 +1,470 @@ +#!/bin/sh + +test_description='git repack --write-midx=incremental' + +. ./test-lib.sh + +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0 + +objdir=.git/objects +packdir=$objdir/pack +midxdir=$packdir/multi-pack-index.d +midx_chain=$midxdir/multi-pack-index-chain + +# incrementally_repack N +# +# Make "N" new commits, each stored in their own pack, and then repacked +# with the --write-midx=incremental strategy. +incrementally_repack () { + for i in $(test_seq 1 "$1") + do + test_commit "$i" && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + git multi-pack-index verify || return 1 + done +} + +# Create packs with geometrically increasing sizes so that they +# satisfy the geometric progression and survive a --geometric=2 +# repack without being rolled up. Creates 3 packs containing 1, +# 2, and 6 commits (3, 6, and 18 objects) respectively. +create_geometric_packs () { + test_commit "small" && + git repack -d && + + test_commit_bulk --message="medium" 2 && + test_commit_bulk --message="large" 6 && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index +} + +# create_layer +# +# Creates a new MIDX layer with the contents of "test_commit_bulk $@". +create_layer () { + test_commit_bulk "$@" && + + git multi-pack-index write --incremental --bitmap +} + +# create_layers +# +# Reads lines of " " from stdin and creates a new MIDX +# layer for each line. See create_layer above for more. +create_layers () { + while read msg nr + do + create_layer --message="$msg" "$nr" || return 1 + done +} + +test_expect_success '--write-midx=incremental requires --geometric' ' + test_must_fail git repack --write-midx=incremental 2>err && + + test_grep -- "--write-midx=incremental requires --geometric" err +' + +test_expect_success 'below layer threshold, tip packs excluded' ' + git init below-layer-threshold-tip-packs-excluded && + ( + cd below-layer-threshold-tip-packs-excluded && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 4 && + git config repack.midxsplitfactor 2 && + + # Create 3 packs forming a geometric progression by + # object count such that they are unmodified by the + # initial repack. The MIDX chain thusly contains a + # single layer with three packs. + create_geometric_packs && + ls $packdir/pack-*.idx | sort >packs.before && + test_line_count = 1 $midx_chain && + cp $midx_chain $midx_chain.before && + + # Repack a new commit. Since the layer threshold is + # unmet, a new MIDX layer is added on top of the + # existing one. + test_commit extra && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + git multi-pack-index verify && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + test_line_count = 1 packs.new && + + test_line_count = 2 "$midx_chain" && + head -n 1 "$midx_chain.before" >expect && + head -n 1 "$midx_chain" >actual && + test_cmp expect actual + ) +' + +test_expect_success 'above layer threshold, tip packs repacked' ' + git init above-layer-threshold-tip-packs-repacked && + ( + cd above-layer-threshold-tip-packs-repacked && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + # Same setup, but with the layer threshold set to 2. + # Since the tip MIDX layer meets that threshold, its + # packs are considered repack candidates. + create_geometric_packs && + cp $midx_chain $midx_chain.before && + + # Perturb the existing progression such that it is + # rolled up into a single new pack, invalidating the + # existing MIDX layer and replacing it with a new one. + test_commit extra && + git repack -d && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 1 $midx_chain && + + git multi-pack-index verify + ) +' + +test_expect_success 'above layer threshold, tip layer preserved' ' + git init above-layer-threshold-tip-layer-preserved && + ( + cd above-layer-threshold-tip-layer-preserved && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + test_commit_bulk --message="medium" 2 && + test_commit_bulk --message="large" 6 && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_line_count = 1 "$midx_chain" && + ls $packdir/pack-*.idx | sort >packs.before && + cp $midx_chain $midx_chain.before && + + # Create objects to form a pack satisfying the geometric + # progression (thus preserving the tip layer), but not + # so large that it meets the layer merging condition. + test_commit_bulk --message="small" 1 && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + + test_line_count = 1 packs.new && + test_line_count = 3 packs.after && + test_line_count = 2 "$midx_chain" && + head -n 1 "$midx_chain.before" >expect && + head -n 1 "$midx_chain" >actual && + test_cmp expect actual && + + git multi-pack-index verify + ) +' + +test_expect_success 'above layer threshold, tip packs preserved' ' + git init above-layer-threshold-tip-packs-preserved && + ( + cd above-layer-threshold-tip-packs-preserved && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 2 && + git config repack.midxsplitfactor 2 && + + create_geometric_packs && + ls $packdir/pack-*.idx | sort >packs.before && + cp $midx_chain $midx_chain.before && + + # Same setup as above, but this time the new objects do + # not satisfy the new layer merging condition, resulting + # in a new tip layer. + test_commit_bulk --message="huge" 18 && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ls $packdir/pack-*.idx | sort >packs.after && + comm -13 packs.before packs.after >packs.new && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 1 $midx_chain && + test_line_count = 1 packs.new && + + git multi-pack-index verify + ) +' + +test_expect_success 'new tip absorbs multiple layers' ' + git init new-tip-absorbs-multiple-layers && + ( + cd new-tip-absorbs-multiple-layers && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Build a 4-layer chain where each layer is too small to + # absorb the one below it. The sizes must satisfy L(n) < + # L(n-1)/2 for each adjacent pair: + # + # L0 (oldest): 75 obj (25 commits) + # L1: 21 obj (7 commits, 21 < 75/2) + # L2: 9 obj (3 commits, 9 < 21/2) + # L3 (tip): 3 obj (1 commit, 3 < 9/2) + create_layers <<-\EOF && + L0 25 + L1 7 + L2 3 + L3 1 + EOF + + test_line_count = 4 "$midx_chain" && + cp $midx_chain $midx_chain.before && + + # Now add a new commit. The merging condition is + # satisfied between L3-L1, but violated at L0, which is + # too large relative to the accumulated size. + # + # As a result, the chain shrinks from 4 to 2 layers. + test_commit new && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + test_line_count = 2 "$midx_chain" && + git multi-pack-index verify + ) +' + +test_expect_success 'compaction of older layers' ' + git init compaction-of-older-layers && + ( + cd compaction-of-older-layers && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Build a chain with two small layers at the bottom + # and a larger barrier layer on top, producing a + # chain that violates the compaction invariant, since + # the two small layers would normally have been merged. + create_layers <<-\EOF && + one 2 + two 4 + barrier 54 + EOF + + cp $midx_chain $midx_chain.before && + + # Running an incremental repack compacts the two + # small layers at the bottom of the chain as a + # separate step in the compaction plan. + test_commit another && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_line_count = 2 "$midx_chain" && + git multi-pack-index verify + ) +' + +test_expect_success 'geometric rollup with surviving tip packs' ' + git init geometric-rollup-with-surviving-tip-packs && + ( + cd geometric-rollup-with-surviving-tip-packs && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Create a pack large enough to anchor the geometric + # progression when small packs are added alongside it. + create_layer --message="big" 5 && + + test_line_count = 1 "$midx_chain" && + cp $midx_chain $midx_chain.before && + + # Repack a small number of objects such that the + # progression is unbothered. Note that the existing pack + # is considered a repack candidate as the new layer + # threshold is set to 1. + test_commit small-1 && + git repack -d && + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + ! test_cmp $midx_chain.before $midx_chain && + cp $midx_chain $midx_chain.before + ) +' + +test_expect_success 'kept packs are excluded from repack' ' + git init kept-packs-excluded-from-repack && + ( + cd kept-packs-excluded-from-repack && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + # Create two equal-sized packs, marking one as kept. + for i in A B + do + test_commit "$i" && git repack -d || return 1 + done && + + keep=$(ls $packdir/pack-*.idx | head -n 1) && + touch "${keep%.idx}.keep" && + + # The kept pack is excluded as a repacking candidate + # entirely, so no rollup occurs as there is only one + # non-kept pack. A new MIDX layer is written containing + # that pack. + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test-tool read-midx $objdir >actual && + grep "^pack-.*\.idx$" actual >actual.packs && + test_line_count = 1 actual.packs && + test_grep ! "$keep" actual.packs && + + git multi-pack-index verify && + + # All objects (from both kept and non-kept packs) + # must still be accessible. + git fsck + ) +' + +test_expect_success 'incremental MIDX with --max-pack-size' ' + git init incremental-midx-with--max-pack-size && + ( + cd incremental-midx-with--max-pack-size && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layer --message="base" 1 && + + # Now add enough data that a small --max-pack-size will + # cause pack-objects to split its output. Create objects + # large enough to fill multiple packs. + test-tool genrandom foo 1M >big1 && + test-tool genrandom bar 1M >big2 && + git add big1 big2 && + test_tick && + git commit -a -m "big blobs" && + git repack -d && + + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index --max-pack-size=1M && + + test_line_count = 1 "$midx_chain" && + test-tool read-midx $objdir >actual && + grep "^pack-.*\.idx$" actual >actual.packs && + test_line_count -gt 1 actual.packs && + + git multi-pack-index verify + ) +' + +test_expect_success 'noop repack preserves valid MIDX chain' ' + git init noop-repack-preserves-valid-midx-chain && + ( + cd noop-repack-preserves-valid-midx-chain && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layer --message="base" 1 && + + git multi-pack-index verify && + cp $midx_chain $midx_chain.before && + + # Running again with no new objects should not break + # the MIDX chain. It produces "Nothing new to pack." + git repack --geometric=2 -d --write-midx=incremental \ + --write-bitmap-index && + + test_cmp $midx_chain.before $midx_chain && + + git multi-pack-index verify && + git fsck + ) +' + +test_expect_success 'repack -ad removes stale incremental chain' ' + git init repack--ad-removes-stale-incremental-chain && + ( + cd repack--ad-removes-stale-incremental-chain && + + git config maintenance.auto false && + git config repack.midxnewlayerthreshold 1 && + git config repack.midxsplitfactor 2 && + + create_layers <<-\EOF && + one 1 + two 1 + EOF + + test_path_is_file $midx_chain && + test_line_count = 2 $midx_chain && + + git repack -ad && + + test_path_is_missing $packdir/multi-pack-index && + test_dir_is_empty $midxdir + ) +' + +test_expect_success 'repack rejects invalid midxSplitFactor' ' + test_when_finished "rm -fr bad-split-factor" && + git init bad-split-factor && + ( + cd bad-split-factor && + test_commit base && + + for v in 0 1 -1 + do + test_must_fail git -c repack.midxSplitFactor=$v \ + repack -d --geometric=2 --write-midx=incremental 2>err && + test_grep "invalid value for --midx-split-factor" err || + return 1 + done + ) +' + +test_expect_success 'repack rejects invalid midxNewLayerThreshold' ' + test_when_finished "rm -fr bad-layer-threshold" && + git init bad-layer-threshold && + ( + cd bad-layer-threshold && + test_commit base && + + for v in 0 -1 + do + test_must_fail git -c repack.midxNewLayerThreshold=$v \ + repack -d --geometric=2 --write-midx=incremental 2>err && + test_grep "invalid value for --midx-new-layer-threshold" err || + return 1 + done + ) +' + +test_done From 06733a50eeec4205011d210d3932c5b708a665e9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 19 May 2026 11:58:25 -0400 Subject: [PATCH 84/93] repack: allow `--write-midx=incremental` without `--geometric` Previously, `--write-midx=incremental` required `--geometric` and would die() without it. Relax this restriction so that incremental MIDX repacking can be used independently. Without `--geometric`, the behavior is append-only: a single new MIDX layer is created containing whatever packs were written by the repack and appended to the existing chain (or a new chain is started). Existing layers are preserved as-is with no compaction or merging. Implement this via a new repack_make_midx_append_plan() that builds a plan consisting of a WRITE step for the freshly written packs followed by COPY steps for every existing MIDX layer. The existing compaction plan (repack_make_midx_compaction_plan) is used only when `--geometric` is active. Update the documentation to describe the behavior with and without `--geometric`, and replace the test that enforced the old restriction with one exercising append-only incremental MIDX repacking. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/git-repack.adoc | 19 +++++---- builtin/repack.c | 3 -- repack-midx.c | 64 +++++++++++++++++++++++++++-- t/t7705-repack-incremental-midx.sh | 65 +++++++++++++++++++++++++++--- 4 files changed, 133 insertions(+), 18 deletions(-) diff --git a/Documentation/git-repack.adoc b/Documentation/git-repack.adoc index 27a99cc46f4ada..72c42015e23f94 100644 --- a/Documentation/git-repack.adoc +++ b/Documentation/git-repack.adoc @@ -263,14 +263,19 @@ linkgit:git-multi-pack-index[1]). `incremental`;; Write an incremental MIDX chain instead of a single - flat MIDX. This mode requires `--geometric`. + flat MIDX. + -The incremental mode maintains a chain of MIDX layers that is compacted -over time using a geometric merging strategy. Each repack creates a new -tip layer containing the newly written pack(s). Adjacent layers are then -merged whenever the newer layer's object count exceeds -`1/repack.midxSplitFactor` of the next deeper layer's count. Layers -that do not meet this condition are retained as-is. +Without `--geometric`, a new MIDX layer is appended to the existing +chain (or a new chain is started) containing whatever packs were written +by the repack. Existing layers are preserved as-is. ++ +When combined with `--geometric`, the incremental mode maintains a chain +of MIDX layers that is compacted over time using a geometric merging +strategy. Each repack creates a new tip layer containing the newly +written pack(s). Adjacent layers are then merged whenever the newer +layer's object count exceeds `1/repack.midxSplitFactor` of the next +deeper layer's count. Layers that do not meet this condition are +retained as-is. + The result is that newer (tip) layers tend to contain many small packs with relatively few objects, while older (deeper) layers contain fewer, diff --git a/builtin/repack.c b/builtin/repack.c index 5ffa18e085e49a..1524a9c13ad5b8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -263,9 +263,6 @@ int cmd_repack(int argc, if (pack_everything & PACK_CRUFT) pack_everything |= ALL_INTO_ONE; - if (write_midx == REPACK_WRITE_MIDX_INCREMENTAL && !geometry.split_factor) - die(_("--write-midx=incremental requires --geometric")); - if (write_bitmaps < 0) { if (write_midx == REPACK_WRITE_MIDX_NONE && (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository())) diff --git a/repack-midx.c b/repack-midx.c index 4f5deeb97bf033..b6b1de718058da 100644 --- a/repack-midx.c +++ b/repack-midx.c @@ -548,6 +548,60 @@ static void midx_compaction_step_release(struct midx_compaction_step *step) free(step->csum); } +/* + * Build an append-only MIDX plan: a single WRITE step for the freshly + * written packs, plus COPY steps for every existing layer. No + * compaction or merging is performed. + */ +static void repack_make_midx_append_plan(struct repack_write_midx_opts *opts, + struct midx_compaction_step **steps_p, + size_t *steps_nr_p) +{ + struct multi_pack_index *m; + struct midx_compaction_step *steps = NULL; + struct midx_compaction_step *step; + size_t steps_nr = 0, steps_alloc = 0; + + odb_reprepare(opts->existing->repo->objects); + m = get_multi_pack_index(opts->existing->source); + + if (opts->names->nr) { + struct strbuf buf = STRBUF_INIT; + uint32_t i; + + ALLOC_GROW(steps, st_add(steps_nr, 1), steps_alloc); + + step = &steps[steps_nr++]; + memset(step, 0, sizeof(*step)); + + step->type = MIDX_COMPACTION_STEP_WRITE; + string_list_init_dup(&step->u.write); + + for (i = 0; i < opts->names->nr; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "pack-%s.idx", + opts->names->items[i].string); + string_list_append(&step->u.write, buf.buf); + } + + strbuf_release(&buf); + } + + for (; m; m = m->base_midx) { + ALLOC_GROW(steps, st_add(steps_nr, 1), steps_alloc); + + step = &steps[steps_nr++]; + memset(step, 0, sizeof(*step)); + + step->type = MIDX_COMPACTION_STEP_COPY; + step->u.copy = m; + step->objects_nr = m->num_objects; + } + + *steps_p = steps; + *steps_nr_p = steps_nr; +} + static int repack_make_midx_compaction_plan(struct repack_write_midx_opts *opts, struct midx_compaction_step **steps_p, size_t *steps_nr_p) @@ -904,9 +958,13 @@ static int write_midx_incremental(struct repack_write_midx_opts *opts) goto done; } - if (repack_make_midx_compaction_plan(opts, &steps, &steps_nr) < 0) { - ret = error(_("unable to generate compaction plan")); - goto done; + if (opts->geometry->split_factor) { + if (repack_make_midx_compaction_plan(opts, &steps, &steps_nr) < 0) { + ret = error(_("unable to generate compaction plan")); + goto done; + } + } else { + repack_make_midx_append_plan(opts, &steps, &steps_nr); } for (i = 0; i < steps_nr; i++) { diff --git a/t/t7705-repack-incremental-midx.sh b/t/t7705-repack-incremental-midx.sh index 9e317ff6e8f2fb..25a8c40e8ee5cc 100755 --- a/t/t7705-repack-incremental-midx.sh +++ b/t/t7705-repack-incremental-midx.sh @@ -63,10 +63,36 @@ create_layers () { done } -test_expect_success '--write-midx=incremental requires --geometric' ' - test_must_fail git repack --write-midx=incremental 2>err && +test_expect_success '--write-midx=incremental without --geometric' ' + git init incremental-without-geometric && + ( + cd incremental-without-geometric && + + git config maintenance.auto false && + + test_commit first && + git repack -d && - test_grep -- "--write-midx=incremental requires --geometric" err + test_commit second && + git repack --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 1 $midx_chain && + cp $midx_chain $midx_chain.before && + + # A second repack appends a new layer without + # disturbing the existing one. + test_commit third && + git repack --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 2 $midx_chain && + head -n 1 $midx_chain.before >expect && + head -n 1 $midx_chain >actual && + test_cmp expect actual && + + git fsck + ) ' test_expect_success 'below layer threshold, tip packs excluded' ' @@ -334,8 +360,7 @@ test_expect_success 'kept packs are excluded from repack' ' # entirely, so no rollup occurs as there is only one # non-kept pack. A new MIDX layer is written containing # that pack. - git repack --geometric=2 -d --write-midx=incremental \ - --write-bitmap-index && + git repack --geometric=2 -d --write-midx=incremental && test-tool read-midx $objdir >actual && grep "^pack-.*\.idx$" actual >actual.packs && @@ -433,6 +458,36 @@ test_expect_success 'repack -ad removes stale incremental chain' ' ) ' +test_expect_success 'repack -ad --write-midx=incremental is safe' ' + git init ad-incremental-midx && + ( + cd ad-incremental-midx && + + git config maintenance.auto false && + + # Build a MIDX chain with multiple layers referencing + # distinct packs. + test_commit first && + git repack -d && + + test_commit second && + git repack -d --write-midx=incremental && + + git multi-pack-index verify && + test_line_count = 1 $midx_chain && + + # Now do a full -ad repack. The new pack contains all + # objects, but any retained MIDX layers still reference + # the now-deleted packs. + test_commit third && + git repack -ad --write-midx=incremental && + + git multi-pack-index verify && + git fsck && + git rev-list --all --objects >/dev/null + ) +' + test_expect_success 'repack rejects invalid midxSplitFactor' ' test_when_finished "rm -fr bad-split-factor" && git init bad-split-factor && From 4e5b2a37956f40acd016f078b1e9a883e97a9f27 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 May 2026 16:24:48 +0000 Subject: [PATCH 85/93] t5516: fix test order flakiness The 'fetch follows tags by default' test sorts using 'sort -k 4', but for-each-ref output only has 3 columns. This relies on sort treating records with fewer fields as having an empty fourth field, which may produce unstable results depending on locale. This appears to be an accident added in 3f763ddf28 (fetch: set remote/HEAD if it does not exist, 2024-11-22). Use 'sort -k 3' to match the actual number of columns in the output. Reviewed-by: Matthew John Cheetham Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5516-fetch-push.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 29e2f176081561..ac8447f21ed963 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1349,7 +1349,7 @@ test_expect_success 'fetch follows tags by default' ' git for-each-ref >tmp1 && sed -n "p; s|refs/heads/main$|refs/remotes/origin/main|p" tmp1 | sed -n "p; s|refs/heads/main$|refs/remotes/origin/HEAD|p" | - sort -k 4 >../expect + sort -k 3 >../expect ) && test_when_finished "rm -rf dst" && git init dst && From 1a445fc60b84df95253d740f3a112343ab5ed8d2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 May 2026 16:24:49 +0000 Subject: [PATCH 86/93] fetch: add --negotiation-restrict option The --negotiation-tip option to 'git fetch' and 'git pull' allows users to specify that they want to focus negotiation on a small set of references. This is a _restriction_ on the negotiation set, helping to focus the negotiation when the ref count is high. However, it doesn't allow for the ability to opportunistically select references beyond that list. This subtle detail that this is a 'maximum set' and not a 'minimum set' is not immediately clear from the option name. This makes it more complicated to add a new option that provides the complementary behavior of a minimum set. For now, create a new synonym option, --negotiation-restrict, that behaves identically to --negotiation-tip. Update the documentation to make it clear that this new name is the preferred option, but we keep the old name for compatibility. Mark --negotiation-tip as an alias of the new, preferred option. Update a few warning messages with the new option, but also make them translatable with the option name inserted by formatting. At least one of these messages will be reused later for a new option. Reviewed-by: Matthew John Cheetham Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config/fetch.adoc | 2 +- Documentation/fetch-options.adoc | 6 +++++- builtin/fetch.c | 13 ++++++++----- builtin/pull.c | 3 ++- send-pack.c | 2 +- t/t5510-fetch.sh | 25 +++++++++++++++++++++++++ t/t5702-protocol-v2.sh | 4 ++-- transport-helper.c | 3 ++- 8 files changed, 46 insertions(+), 12 deletions(-) diff --git a/Documentation/config/fetch.adoc b/Documentation/config/fetch.adoc index cd40db0cad1c36..04ac90912d3a58 100644 --- a/Documentation/config/fetch.adoc +++ b/Documentation/config/fetch.adoc @@ -76,7 +76,7 @@ default is `skipping`. Unknown values will cause `git fetch` to error out. + -See also the `--negotiate-only` and `--negotiation-tip` options to +See also the `--negotiate-only` and `--negotiation-restrict` options to linkgit:git-fetch[1]. `fetch.showForcedUpdates`:: diff --git a/Documentation/fetch-options.adoc b/Documentation/fetch-options.adoc index 81a9d7f9bbc11d..d39cecb4468991 100644 --- a/Documentation/fetch-options.adoc +++ b/Documentation/fetch-options.adoc @@ -49,6 +49,7 @@ the current repository has the same history as the source repository. `.git/shallow`. This option updates `.git/shallow` and accepts such refs. +`--negotiation-restrict=(|)`:: `--negotiation-tip=(|)`:: By default, Git will report, to the server, commits reachable from all local refs to find common commits in an attempt to @@ -58,6 +59,9 @@ the current repository has the same history as the source repository. local ref is likely to have commits in common with the upstream ref being fetched. + +`--negotiation-restrict` is the preferred name for this option; +`--negotiation-tip` is accepted as a synonym. ++ This option may be specified more than once; if so, Git will report commits reachable from any of the given commits. + @@ -71,7 +75,7 @@ configuration variables documented in linkgit:git-config[1], and the `--negotiate-only`:: Do not fetch anything from the server, and instead print the - ancestors of the provided `--negotiation-tip=` arguments, + ancestors of the provided `--negotiation-restrict=` arguments, which we have in common with the server. + This is incompatible with `--recurse-submodules=(yes|on-demand)`. diff --git a/builtin/fetch.c b/builtin/fetch.c index 4795b2a13c30e3..fc950fe35b5e73 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1558,8 +1558,8 @@ static void add_negotiation_tips(struct git_transport_options *smart_options) refs_for_each_ref_ext(get_main_ref_store(the_repository), add_oid, oids, &opts); if (old_nr == oids->nr) - warning("ignoring --negotiation-tip=%s because it does not match any refs", - s); + warning(_("ignoring %s=%s because it does not match any refs"), + "--negotiation-restrict", s); } smart_options->negotiation_tips = oids; } @@ -1599,7 +1599,8 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, if (transport->smart_options) add_negotiation_tips(transport->smart_options); else - warning("ignoring --negotiation-tip because the protocol does not support it"); + warning(_("ignoring %s because the protocol does not support it"), + "--negotiation-restrict"); } return transport; } @@ -2565,8 +2566,9 @@ int cmd_fetch(int argc, N_("specify fetch refmap"), PARSE_OPT_NONEG, parse_refmap_arg), OPT_STRING_LIST('o', "server-option", &server_options, N_("server-specific"), N_("option to transmit")), OPT_IPVERSION(&family), - OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"), + OPT_STRING_LIST(0, "negotiation-restrict", &negotiation_tip, N_("revision"), N_("report that we have only objects reachable from this object")), + OPT_ALIAS(0, "negotiation-tip", "negotiation-restrict"), OPT_BOOL(0, "negotiate-only", &negotiate_only, N_("do not fetch a packfile; instead, print ancestors of negotiation tips")), OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), @@ -2657,7 +2659,8 @@ int cmd_fetch(int argc, } if (negotiate_only && !negotiation_tip.nr) - die(_("--negotiate-only needs one or more --negotiation-tip=*")); + die(_("%s needs one or more %s"), "--negotiate-only", + "--negotiation-restrict=*"); if (deepen_relative) { if (deepen_relative < 0) diff --git a/builtin/pull.c b/builtin/pull.c index 7e67fdce97fd1d..cc6ce485fc4e70 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -996,9 +996,10 @@ int cmd_pull(int argc, OPT_PASSTHRU('6', "ipv6", &opt_ipv6, NULL, N_("use IPv6 addresses only"), PARSE_OPT_NOARG), - OPT_PASSTHRU_ARGV(0, "negotiation-tip", &opt_fetch, N_("revision"), + OPT_PASSTHRU_ARGV(0, "negotiation-restrict", &opt_fetch, N_("revision"), N_("report that we have only objects reachable from this object"), 0), + OPT_ALIAS(0, "negotiation-tip", "negotiation-restrict"), OPT_BOOL(0, "show-forced-updates", &opt_show_forced_updates, N_("check for forced-updates on all updated branches")), OPT_PASSTHRU(0, "set-upstream", &set_upstream, NULL, diff --git a/send-pack.c b/send-pack.c index 67d6987b1ccd7e..3d5d36ba3baa2e 100644 --- a/send-pack.c +++ b/send-pack.c @@ -447,7 +447,7 @@ static void get_commons_through_negotiation(struct repository *r, strvec_pushl(&child.args, "fetch", "--negotiate-only", NULL); for (ref = remote_refs; ref; ref = ref->next) { if (!is_null_oid(&ref->new_oid)) { - strvec_pushf(&child.args, "--negotiation-tip=%s", + strvec_pushf(&child.args, "--negotiation-restrict=%s", oid_to_hex(&ref->new_oid)); nr_negotiation_tip++; } diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index 5dcb4b51a47d88..dc3ce56d84c743 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1460,6 +1460,31 @@ EOF test_cmp fatal-expect fatal-actual ' +test_expect_success '--negotiation-restrict limits "have" lines sent' ' + setup_negotiation_tip server server 0 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 --negotiation-restrict=beta_1 \ + origin alpha_s beta_s && + check_negotiation_tip +' + +test_expect_success '--negotiation-restrict understands globs' ' + setup_negotiation_tip server server 0 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=*_1 \ + origin alpha_s beta_s && + check_negotiation_tip +' + +test_expect_success '--negotiation-restrict and --negotiation-tip can be mixed' ' + setup_negotiation_tip server server 0 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-tip=beta_1 \ + origin alpha_s beta_s && + check_negotiation_tip +' + test_expect_success SYMLINKS 'clone does not get confused by a D/F conflict' ' git init df-conflict && ( diff --git a/t/t5702-protocol-v2.sh b/t/t5702-protocol-v2.sh index f826ac46a5be5a..9f6cf4142d5b83 100755 --- a/t/t5702-protocol-v2.sh +++ b/t/t5702-protocol-v2.sh @@ -869,14 +869,14 @@ setup_negotiate_only () { test_commit -C client three } -test_expect_success 'usage: --negotiate-only without --negotiation-tip' ' +test_expect_success 'usage: --negotiate-only without --negotiation-restrict' ' SERVER="server" && URI="file://$(pwd)/server" && setup_negotiate_only "$SERVER" "$URI" && cat >err.expect <<-\EOF && - fatal: --negotiate-only needs one or more --negotiation-tip=* + fatal: --negotiate-only needs one or more --negotiation-restrict=* EOF test_must_fail git -c protocol.version=2 -C client fetch \ diff --git a/transport-helper.c b/transport-helper.c index 4d95d84f9e4d05..dd78d406681f0e 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -755,7 +755,8 @@ static int fetch_refs(struct transport *transport, } if (data->transport_options.negotiation_tips) - warning("Ignoring --negotiation-tip because the protocol does not support it."); + warning(_("ignoring %s because the protocol does not support it."), + "--negotiation-restrict"); if (data->fetch) return fetch_with_fetch(transport, nr_heads, to_fetch); From 4aef7dbb063cfd0923baae5a431913256edad667 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 May 2026 16:24:50 +0000 Subject: [PATCH 87/93] transport: rename negotiation_tips The previous change added the --negotiation-restrict synonym for the --negotiation-tip option for 'git fetch'. In anticipation of adding a new option that behaves similarly but with distinct changes to its behavior, rename the internal representation of this data from 'negotiation_tips' to 'negotiation_restrict_tips'. The 'tips' part is kept because this is an oid_array in the transport layer. This requires the builtin to handle parsing refs into collections of oids so the transport layer can handle this cleaner form of the data. Also update the string_list used to store the inputs from command-line options. Reviewed-by: Matthew John Cheetham Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/fetch.c | 18 +++++++++--------- fetch-pack.c | 18 +++++++++--------- fetch-pack.h | 4 ++-- transport-helper.c | 2 +- transport.c | 10 +++++----- transport.h | 4 ++-- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index fc950fe35b5e73..2ba0051d520cdf 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -98,7 +98,7 @@ static struct transport *gtransport; static struct transport *gsecondary; static struct refspec refmap = REFSPEC_INIT_FETCH; static struct string_list server_options = STRING_LIST_INIT_DUP; -static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP; +static struct string_list negotiation_restrict = STRING_LIST_INIT_NODUP; struct fetch_config { enum display_format display_format; @@ -1534,13 +1534,13 @@ static int add_oid(const struct reference *ref, void *cb_data) return 0; } -static void add_negotiation_tips(struct git_transport_options *smart_options) +static void add_negotiation_restrict_tips(struct git_transport_options *smart_options) { struct oid_array *oids = xcalloc(1, sizeof(*oids)); int i; - for (i = 0; i < negotiation_tip.nr; i++) { - const char *s = negotiation_tip.items[i].string; + for (i = 0; i < negotiation_restrict.nr; i++) { + const char *s = negotiation_restrict.items[i].string; struct refs_for_each_ref_options opts = { .pattern = s, }; @@ -1561,7 +1561,7 @@ static void add_negotiation_tips(struct git_transport_options *smart_options) warning(_("ignoring %s=%s because it does not match any refs"), "--negotiation-restrict", s); } - smart_options->negotiation_tips = oids; + smart_options->negotiation_restrict_tips = oids; } static struct transport *prepare_transport(struct remote *remote, int deepen, @@ -1595,9 +1595,9 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, spec); set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); } - if (negotiation_tip.nr) { + if (negotiation_restrict.nr) { if (transport->smart_options) - add_negotiation_tips(transport->smart_options); + add_negotiation_restrict_tips(transport->smart_options); else warning(_("ignoring %s because the protocol does not support it"), "--negotiation-restrict"); @@ -2566,7 +2566,7 @@ int cmd_fetch(int argc, N_("specify fetch refmap"), PARSE_OPT_NONEG, parse_refmap_arg), OPT_STRING_LIST('o', "server-option", &server_options, N_("server-specific"), N_("option to transmit")), OPT_IPVERSION(&family), - OPT_STRING_LIST(0, "negotiation-restrict", &negotiation_tip, N_("revision"), + OPT_STRING_LIST(0, "negotiation-restrict", &negotiation_restrict, N_("revision"), N_("report that we have only objects reachable from this object")), OPT_ALIAS(0, "negotiation-tip", "negotiation-restrict"), OPT_BOOL(0, "negotiate-only", &negotiate_only, @@ -2658,7 +2658,7 @@ int cmd_fetch(int argc, config.display_format = DISPLAY_FORMAT_PORCELAIN; } - if (negotiate_only && !negotiation_tip.nr) + if (negotiate_only && !negotiation_restrict.nr) die(_("%s needs one or more %s"), "--negotiate-only", "--negotiation-restrict=*"); diff --git a/fetch-pack.c b/fetch-pack.c index 6ecd468ef766a8..baf239adf98db3 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -291,21 +291,21 @@ static int next_flush(int stateless_rpc, int count) } static void mark_tips(struct fetch_negotiator *negotiator, - const struct oid_array *negotiation_tips) + const struct oid_array *negotiation_restrict_tips) { struct refs_for_each_ref_options opts = { .flags = REFS_FOR_EACH_INCLUDE_BROKEN, }; int i; - if (!negotiation_tips) { + if (!negotiation_restrict_tips) { refs_for_each_ref_ext(get_main_ref_store(the_repository), rev_list_insert_ref_oid, negotiator, &opts); return; } - for (i = 0; i < negotiation_tips->nr; i++) - rev_list_insert_ref(negotiator, &negotiation_tips->oid[i]); + for (i = 0; i < negotiation_restrict_tips->nr; i++) + rev_list_insert_ref(negotiator, &negotiation_restrict_tips->oid[i]); return; } @@ -355,7 +355,7 @@ static int find_common(struct fetch_negotiator *negotiator, PACKET_READ_CHOMP_NEWLINE | PACKET_READ_DIE_ON_ERR_PACKET); - mark_tips(negotiator, args->negotiation_tips); + mark_tips(negotiator, args->negotiation_restrict_tips); for_each_cached_alternate(negotiator, insert_one_alternate_object); fetching = 0; @@ -1728,7 +1728,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, else state = FETCH_SEND_REQUEST; - mark_tips(negotiator, args->negotiation_tips); + mark_tips(negotiator, args->negotiation_restrict_tips); for_each_cached_alternate(negotiator, insert_one_alternate_object); break; @@ -2177,7 +2177,7 @@ static void clear_common_flag(struct oidset *s) } } -void negotiate_using_fetch(const struct oid_array *negotiation_tips, +void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, const struct string_list *server_options, int stateless_rpc, int fd[], @@ -2195,13 +2195,13 @@ void negotiate_using_fetch(const struct oid_array *negotiation_tips, timestamp_t min_generation = GENERATION_NUMBER_INFINITY; fetch_negotiator_init(the_repository, &negotiator); - mark_tips(&negotiator, negotiation_tips); + mark_tips(&negotiator, negotiation_restrict_tips); packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE | PACKET_READ_DIE_ON_ERR_PACKET); - oid_array_for_each((struct oid_array *) negotiation_tips, + oid_array_for_each((struct oid_array *) negotiation_restrict_tips, add_to_object_array, &nt_object_array); diff --git a/fetch-pack.h b/fetch-pack.h index 9d3470366f85ec..6c70c942c2f001 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -21,7 +21,7 @@ struct fetch_pack_args { * If not NULL, during packfile negotiation, fetch-pack will send "have" * lines only with these tips and their ancestors. */ - const struct oid_array *negotiation_tips; + const struct oid_array *negotiation_restrict_tips; unsigned deepen_relative:1; unsigned quiet:1; @@ -89,7 +89,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, * In the capability advertisement that has happened prior to invoking this * function, the "wait-for-done" capability must be present. */ -void negotiate_using_fetch(const struct oid_array *negotiation_tips, +void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, const struct string_list *server_options, int stateless_rpc, int fd[], diff --git a/transport-helper.c b/transport-helper.c index dd78d406681f0e..f4388da7660d6d 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -754,7 +754,7 @@ static int fetch_refs(struct transport *transport, set_helper_option(transport, "filter", spec); } - if (data->transport_options.negotiation_tips) + if (data->transport_options.negotiation_restrict_tips) warning(_("ignoring %s because the protocol does not support it."), "--negotiation-restrict"); diff --git a/transport.c b/transport.c index 107f4fa5dce96a..a3051f6733633d 100644 --- a/transport.c +++ b/transport.c @@ -463,7 +463,7 @@ static int fetch_refs_via_pack(struct transport *transport, args.refetch = data->options.refetch; args.stateless_rpc = transport->stateless_rpc; args.server_options = transport->server_options; - args.negotiation_tips = data->options.negotiation_tips; + args.negotiation_restrict_tips = data->options.negotiation_restrict_tips; args.reject_shallow_remote = transport->smart_options->reject_shallow; if (!data->finished_handshake) { @@ -491,7 +491,7 @@ static int fetch_refs_via_pack(struct transport *transport, warning(_("server does not support wait-for-done")); ret = -1; } else { - negotiate_using_fetch(data->options.negotiation_tips, + negotiate_using_fetch(data->options.negotiation_restrict_tips, transport->server_options, transport->stateless_rpc, data->fd, @@ -979,9 +979,9 @@ static int disconnect_git(struct transport *transport) finish_connect(data->conn); } - if (data->options.negotiation_tips) { - oid_array_clear(data->options.negotiation_tips); - free(data->options.negotiation_tips); + if (data->options.negotiation_restrict_tips) { + oid_array_clear(data->options.negotiation_restrict_tips); + free(data->options.negotiation_restrict_tips); } list_objects_filter_release(&data->options.filter_options); oid_array_clear(&data->extra_have); diff --git a/transport.h b/transport.h index 892f19454a75d6..cdeb33c16f82f6 100644 --- a/transport.h +++ b/transport.h @@ -40,13 +40,13 @@ struct git_transport_options { /* * This is only used during fetch. See the documentation of - * negotiation_tips in struct fetch_pack_args. + * negotiation_restrict_tips in struct fetch_pack_args. * * This field is only supported by transports that support connect or * stateless_connect. Set this field directly instead of using * transport_set_option(). */ - struct oid_array *negotiation_tips; + struct oid_array *negotiation_restrict_tips; /* * If allocated, whenever transport_fetch_refs() is called, add known From 8bb252f86c30a3066ec64f99f94719c01a53743a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 May 2026 16:24:51 +0000 Subject: [PATCH 88/93] remote: add remote.*.negotiationRestrict config In a previous change, the --negotiation-restrict command-line option of 'git fetch' was added as a synonym of --negotiation-tip. Both of these options restrict the set of 'haves' the client can send as part of negotiation. This was previously not available via a configuration option. Add a new 'remote..negotiationRestrict' multi-valued config option that updates 'git fetch ' to use these restrictions by default. If the user provides even one --negotiation-restrict argument, then the config is ignored. An empty value resets the value list to allow ignoring earlier config values, such as those that might be set in system or global config. Reviewed-by: Matthew John Cheetham Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config/remote.adoc | 18 ++++++++++++++++++ builtin/fetch.c | 28 +++++++++++++++++++++------- remote.c | 5 +++++ remote.h | 1 + t/t5510-fetch.sh | 26 ++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 7 deletions(-) diff --git a/Documentation/config/remote.adoc b/Documentation/config/remote.adoc index 91e46f66f5dd1c..4dcf81fbce4ed3 100644 --- a/Documentation/config/remote.adoc +++ b/Documentation/config/remote.adoc @@ -107,6 +107,24 @@ priority configuration file (e.g. `.git/config` in a repository) to clear the values inherited from a lower priority configuration files (e.g. `$HOME/.gitconfig`). +remote..negotiationRestrict:: + When negotiating with this remote during `git fetch`, restrict the + commits advertised as "have" lines to only those reachable from refs + matching the given patterns. This multi-valued config option behaves + like `--negotiation-restrict` on the command line. ++ +Each value is either an exact ref name (e.g. `refs/heads/release`) or a +glob pattern (e.g. `refs/heads/release/*`). The pattern syntax is the +same as for `--negotiation-restrict`. ++ +These config values are used as defaults for the `--negotiation-restrict` +command-line option. If `--negotiation-restrict` (or its synonym +`--negotiation-tip`) is specified on the command line, then the config +values are not used. ++ +Blank values signal to ignore all previous values, allowing a reset of +the list from broader config scenarios. + remote..followRemoteHEAD:: How linkgit:git-fetch[1] should handle updates to `remotes//HEAD` when fetching using the configured refspecs of a remote. diff --git a/builtin/fetch.c b/builtin/fetch.c index 2ba0051d520cdf..a957739f37dbd9 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1601,6 +1601,19 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, else warning(_("ignoring %s because the protocol does not support it"), "--negotiation-restrict"); + } else if (remote->negotiation_restrict.nr) { + struct string_list_item *item; + for_each_string_list_item(item, &remote->negotiation_restrict) + string_list_append(&negotiation_restrict, item->string); + if (transport->smart_options) + add_negotiation_restrict_tips(transport->smart_options); + else { + struct strbuf config_name = STRBUF_INIT; + strbuf_addf(&config_name, "remote.%s.negotiationRestrict", remote->name); + warning(_("ignoring %s because the protocol does not support it"), + config_name.buf); + strbuf_release(&config_name); + } } return transport; } @@ -2658,10 +2671,6 @@ int cmd_fetch(int argc, config.display_format = DISPLAY_FORMAT_PORCELAIN; } - if (negotiate_only && !negotiation_restrict.nr) - die(_("%s needs one or more %s"), "--negotiate-only", - "--negotiation-restrict=*"); - if (deepen_relative) { if (deepen_relative < 0) die(_("negative depth in --deepen is not supported")); @@ -2749,14 +2758,19 @@ int cmd_fetch(int argc, if (!remote) die(_("must supply remote when using --negotiate-only")); gtransport = prepare_transport(remote, 1, &filter_options); - if (gtransport->smart_options) { - gtransport->smart_options->acked_commits = &acked_commits; - } else { + + if (!gtransport->smart_options) { warning(_("protocol does not support --negotiate-only, exiting")); result = 1; trace2_region_leave("fetch", "negotiate-only", the_repository); goto cleanup; } + if (!gtransport->smart_options->negotiation_restrict_tips) + die(_("%s needs one or more %s"), "--negotiate-only", + "--negotiation-restrict=*"); + + gtransport->smart_options->acked_commits = &acked_commits; + if (server_options.nr) gtransport->server_options = &server_options; result = transport_fetch_refs(gtransport, NULL); diff --git a/remote.c b/remote.c index 7ca2a6501b4920..620086e16e279f 100644 --- a/remote.c +++ b/remote.c @@ -152,6 +152,7 @@ static struct remote *make_remote(struct remote_state *remote_state, refspec_init_push(&ret->push); refspec_init_fetch(&ret->fetch); string_list_init_dup(&ret->server_options); + string_list_init_dup(&ret->negotiation_restrict); ALLOC_GROW(remote_state->remotes, remote_state->remotes_nr + 1, remote_state->remotes_alloc); @@ -179,6 +180,7 @@ static void remote_clear(struct remote *remote) FREE_AND_NULL(remote->http_proxy); FREE_AND_NULL(remote->http_proxy_authmethod); string_list_clear(&remote->server_options, 0); + string_list_clear(&remote->negotiation_restrict, 0); } static void add_merge(struct branch *branch, const char *name) @@ -562,6 +564,9 @@ static int handle_config(const char *key, const char *value, } else if (!strcmp(subkey, "serveroption")) { return parse_transport_option(key, value, &remote->server_options); + } else if (!strcmp(subkey, "negotiationrestrict")) { + return parse_transport_option(key, value, + &remote->negotiation_restrict); } else if (!strcmp(subkey, "followremotehead")) { const char *no_warn_branch; if (!strcmp(value, "never")) diff --git a/remote.h b/remote.h index fc052945ee451d..e6ec37c3930355 100644 --- a/remote.h +++ b/remote.h @@ -117,6 +117,7 @@ struct remote { char *http_proxy_authmethod; struct string_list server_options; + struct string_list negotiation_restrict; enum follow_remote_head_settings follow_remote_head; const char *no_warn_branch; diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index dc3ce56d84c743..eff3ce8e2de89c 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1485,6 +1485,32 @@ test_expect_success '--negotiation-restrict and --negotiation-tip can be mixed' check_negotiation_tip ' +test_expect_success 'remote..negotiationRestrict used as default' ' + setup_negotiation_tip server server 0 && + + # test the reset of the list on an empty value + git -C client config --add remote.origin.negotiationRestrict alpha_2 && + git -C client config --add remote.origin.negotiationRestrict "" && + git -C client config --add remote.origin.negotiationRestrict alpha_1 && + git -C client config --add remote.origin.negotiationRestrict beta_1 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + origin alpha_s beta_s && + check_negotiation_tip +' + +test_expect_success 'CLI --negotiation-restrict overrides remote config' ' + setup_negotiation_tip server server 0 && + git -C client config --add remote.origin.negotiationRestrict alpha_1 && + git -C client config --add remote.origin.negotiationRestrict beta_1 && + ALPHA_1=$(git -C client rev-parse alpha_1) && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + origin alpha_s beta_s && + test_grep "fetch> have $ALPHA_1" trace && + BETA_1=$(git -C client rev-parse beta_1) && + test_grep ! "fetch> have $BETA_1" trace +' + test_expect_success SYMLINKS 'clone does not get confused by a D/F conflict' ' git init df-conflict && ( From 22b2f3d2a319af32e9f3add0b3cc7732cbf4733b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 May 2026 16:24:52 +0000 Subject: [PATCH 89/93] negotiator: add have_sent() interface In a future change, we will introduce a capability to choose specific commit OIDs as 'have's in fetch negotiation, with the ability to have the negotiator choose more 'have's to increase coverage beyond that required core set. The negotiator works to avoid emitting 'have's that can reach each other, but that logic is hidden beneath the negotiator's iterator function pointer ('next'). We need a way to communicate to the negotiator that we have picked a 'have' so it could incorporate that into its logic. Add a have_sent() method to the fetch_negotiator interface. This is the signal that allows the negotiator to track the commit as already shown and can perform the proper bookkeeping to avoid emitting those objects or anything they can reach. For our non-trivial negotiators, it is sufficient to mark these commits as common, so the implementation is quite simple. This logic will be exercised in the next change. Reviewed-by: Matthew John Cheetham Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- fetch-negotiator.h | 9 +++++++++ negotiator/default.c | 8 ++++++++ negotiator/noop.c | 7 +++++++ negotiator/skipping.c | 8 ++++++++ 4 files changed, 32 insertions(+) diff --git a/fetch-negotiator.h b/fetch-negotiator.h index e348905a1f0008..6ca422a064768c 100644 --- a/fetch-negotiator.h +++ b/fetch-negotiator.h @@ -47,6 +47,15 @@ struct fetch_negotiator { */ int (*ack)(struct fetch_negotiator *, struct commit *); + /* + * Inform the negotiator that this commit has already been sent as + * a "have" line outside of the negotiator's control. The negotiator + * should avoid outputting it from next() and may use it to optimize + * further negotiation (e.g., by treating it and its ancestors as + * common). + */ + void (*have_sent)(struct fetch_negotiator *, struct commit *); + void (*release)(struct fetch_negotiator *); /* internal use */ diff --git a/negotiator/default.c b/negotiator/default.c index 116dedcf83035d..05ab616f399116 100644 --- a/negotiator/default.c +++ b/negotiator/default.c @@ -175,6 +175,13 @@ static int ack(struct fetch_negotiator *n, struct commit *c) return known_to_be_common; } +static void have_sent(struct fetch_negotiator *n, struct commit *c) +{ + if (repo_parse_commit(the_repository, c)) + return; + mark_common(n->data, c, 0, 0); +} + static void release(struct fetch_negotiator *n) { clear_prio_queue(&((struct negotiation_state *)n->data)->rev_list); @@ -188,6 +195,7 @@ void default_negotiator_init(struct fetch_negotiator *negotiator) negotiator->add_tip = add_tip; negotiator->next = next; negotiator->ack = ack; + negotiator->have_sent = have_sent; negotiator->release = release; negotiator->data = CALLOC_ARRAY(ns, 1); ns->rev_list.compare = compare_commits_by_commit_date; diff --git a/negotiator/noop.c b/negotiator/noop.c index 65e3c200084aa4..edf1b456f31204 100644 --- a/negotiator/noop.c +++ b/negotiator/noop.c @@ -29,6 +29,12 @@ static int ack(struct fetch_negotiator *n UNUSED, struct commit *c UNUSED) return 0; } +static void have_sent(struct fetch_negotiator *n UNUSED, + struct commit *c UNUSED) +{ + /* nothing to do */ +} + static void release(struct fetch_negotiator *n UNUSED) { /* nothing to release */ @@ -40,6 +46,7 @@ void noop_negotiator_init(struct fetch_negotiator *negotiator) negotiator->add_tip = add_tip; negotiator->next = next; negotiator->ack = ack; + negotiator->have_sent = have_sent; negotiator->release = release; negotiator->data = NULL; } diff --git a/negotiator/skipping.c b/negotiator/skipping.c index 0a272130fb1b6d..69472c58e18a7a 100644 --- a/negotiator/skipping.c +++ b/negotiator/skipping.c @@ -243,6 +243,13 @@ static int ack(struct fetch_negotiator *n, struct commit *c) return known_to_be_common; } +static void have_sent(struct fetch_negotiator *n, struct commit *c) +{ + if (repo_parse_commit(the_repository, c)) + return; + mark_common(n->data, c); +} + static void release(struct fetch_negotiator *n) { struct data *data = n->data; @@ -259,6 +266,7 @@ void skipping_negotiator_init(struct fetch_negotiator *negotiator) negotiator->add_tip = add_tip; negotiator->next = next; negotiator->ack = ack; + negotiator->have_sent = have_sent; negotiator->release = release; negotiator->data = CALLOC_ARRAY(data, 1); data->rev_list.compare = compare; From e2164742c9ceb60ac9ddd2114f49304fd73df1f3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 May 2026 16:24:53 +0000 Subject: [PATCH 90/93] fetch: add --negotiation-include option for negotiation Add a new --negotiation-include option to 'git fetch', which ensures that certain ref tips are always sent as 'have' lines during fetch negotiation, regardless of what the negotiation algorithm selects. This is useful when the repository has a large number of references, so the normal negotiation algorithm truncates the list. This is especially important in repositories with long parallel commit histories. For example, a repo could have a 'dev' branch for development and a 'release' branch for released versions. If the 'dev' branch isn't selected for negotiation, then it's not a big deal because there are many in-progress development branches with a shared history. However, if 'release' is not selected for negotiation, then the server may think that this is the first time the client has asked for that reference, causing a full download of its parallel commit history (and any extra data that may be unique to that branch). This is based on a real example where certain fetches would grow to 60+ GB when a release branch updated. This option is a complement to --negotiation-restrict, which reduces the negotiation ref set to a specific list. In the earlier example, using --negotiation-restrict to focus the negotiation to 'dev' and 'release' would avoid those problematic downloads, but would still not allow advertising potentially-relevant user branches. In this way, the 'include' version solves the problem I mention while allowing negotiation to pick other references opportunistically. The two options can also be combined to allow the best of both worlds. The argument may be an exact ref name or a glob pattern. Non-existent refs are silently ignored. This behavior is also updated in the ref matching logic for the related --negotiation-restrict option to match. The implementation outputs the requested objects as haves before the negotiator performs its own algorithm to choose the next haves. Use the new have_sent() interface to signal these have commits were sent before engaging with the negotiator's next() iterator. Also add --negotiation-include to 'git pull' passthrough options. Reviewed-by: Matthew John Cheetham Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/fetch-options.adoc | 19 +++++++ builtin/fetch.c | 38 ++++++++++--- builtin/pull.c | 3 ++ fetch-pack.c | 81 +++++++++++++++++++++++++--- fetch-pack.h | 6 ++- t/t5510-fetch.sh | 91 ++++++++++++++++++++++++++++++++ transport.c | 8 ++- transport.h | 5 +- 8 files changed, 232 insertions(+), 19 deletions(-) diff --git a/Documentation/fetch-options.adoc b/Documentation/fetch-options.adoc index d39cecb4468991..7b897a720228d2 100644 --- a/Documentation/fetch-options.adoc +++ b/Documentation/fetch-options.adoc @@ -73,6 +73,25 @@ See also the `fetch.negotiationAlgorithm` and `push.negotiate` configuration variables documented in linkgit:git-config[1], and the `--negotiate-only` option below. +`--negotiation-include=(|)`:: + Ensure that the commits at the given tips are always sent as "have" + lines during fetch negotiation, regardless of what the negotiation + algorithm selects. This is useful to guarantee that common + history reachable from specific refs is always considered, even + when `--negotiation-restrict` restricts the set of tips or when + the negotiation algorithm would otherwise skip them. ++ +This option may be specified more than once; if so, each commit is sent +unconditionally. ++ +The argument may be an exact ref name (e.g. `refs/heads/release`), an +object hash, or a glob pattern (e.g. `refs/heads/release/{asterisk}`). +The pattern syntax is the same as for `--negotiation-restrict`. ++ +If `--negotiation-restrict` is used, the have set is first restricted by +that option and then increased to include the tips specified by +`--negotiation-include`. + `--negotiate-only`:: Do not fetch anything from the server, and instead print the ancestors of the provided `--negotiation-restrict=` arguments, diff --git a/builtin/fetch.c b/builtin/fetch.c index a957739f37dbd9..ba56e9022bebcf 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -99,6 +99,7 @@ static struct transport *gsecondary; static struct refspec refmap = REFSPEC_INIT_FETCH; static struct string_list server_options = STRING_LIST_INIT_DUP; static struct string_list negotiation_restrict = STRING_LIST_INIT_NODUP; +static struct string_list negotiation_include = STRING_LIST_INIT_NODUP; struct fetch_config { enum display_format display_format; @@ -1534,23 +1535,29 @@ static int add_oid(const struct reference *ref, void *cb_data) return 0; } -static void add_negotiation_restrict_tips(struct git_transport_options *smart_options) +static void add_negotiation_tips(struct string_list *input_list, + struct oid_array **output_list, + const char *argname) { struct oid_array *oids = xcalloc(1, sizeof(*oids)); int i; - for (i = 0; i < negotiation_restrict.nr; i++) { - const char *s = negotiation_restrict.items[i].string; + for (i = 0; i < input_list->nr; i++) { + const char *s = input_list->items[i].string; struct refs_for_each_ref_options opts = { .pattern = s, }; int old_nr; if (!has_glob_specials(s)) { struct object_id oid; + + /* Ignore missing reference. */ if (repo_get_oid(the_repository, s, &oid)) - die(_("%s is not a valid object"), s); + continue; + /* Fail on missing object pointed by ref. */ if (!odb_has_object(the_repository->objects, &oid, 0)) die(_("the object %s does not exist"), s); + oid_array_append(oids, &oid); continue; } @@ -1559,9 +1566,9 @@ static void add_negotiation_restrict_tips(struct git_transport_options *smart_op add_oid, oids, &opts); if (old_nr == oids->nr) warning(_("ignoring %s=%s because it does not match any refs"), - "--negotiation-restrict", s); + argname, s); } - smart_options->negotiation_restrict_tips = oids; + *output_list = oids; } static struct transport *prepare_transport(struct remote *remote, int deepen, @@ -1597,7 +1604,9 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, } if (negotiation_restrict.nr) { if (transport->smart_options) - add_negotiation_restrict_tips(transport->smart_options); + add_negotiation_tips(&negotiation_restrict, + &transport->smart_options->negotiation_restrict_tips, + "--negotiation-restrict"); else warning(_("ignoring %s because the protocol does not support it"), "--negotiation-restrict"); @@ -1606,7 +1615,9 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, for_each_string_list_item(item, &remote->negotiation_restrict) string_list_append(&negotiation_restrict, item->string); if (transport->smart_options) - add_negotiation_restrict_tips(transport->smart_options); + add_negotiation_tips(&negotiation_restrict, + &transport->smart_options->negotiation_restrict_tips, + "--negotiation-restrict"); else { struct strbuf config_name = STRBUF_INIT; strbuf_addf(&config_name, "remote.%s.negotiationRestrict", remote->name); @@ -1615,6 +1626,15 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, strbuf_release(&config_name); } } + if (negotiation_include.nr) { + if (transport->smart_options) + add_negotiation_tips(&negotiation_include, + &transport->smart_options->negotiation_include_tips, + "--negotiation-include"); + else + warning(_("ignoring %s because the protocol does not support it"), + "--negotiation-include"); + } return transport; } @@ -2582,6 +2602,8 @@ int cmd_fetch(int argc, OPT_STRING_LIST(0, "negotiation-restrict", &negotiation_restrict, N_("revision"), N_("report that we have only objects reachable from this object")), OPT_ALIAS(0, "negotiation-tip", "negotiation-restrict"), + OPT_STRING_LIST(0, "negotiation-include", &negotiation_include, N_("revision"), + N_("ensure this ref is always sent as a negotiation have")), OPT_BOOL(0, "negotiate-only", &negotiate_only, N_("do not fetch a packfile; instead, print ancestors of negotiation tips")), OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), diff --git a/builtin/pull.c b/builtin/pull.c index cc6ce485fc4e70..d49b09114ae7d8 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -1000,6 +1000,9 @@ int cmd_pull(int argc, N_("report that we have only objects reachable from this object"), 0), OPT_ALIAS(0, "negotiation-tip", "negotiation-restrict"), + OPT_PASSTHRU_ARGV(0, "negotiation-include", &opt_fetch, N_("revision"), + N_("ensure this ref is always sent as a negotiation have"), + 0), OPT_BOOL(0, "show-forced-updates", &opt_show_forced_updates, N_("check for forced-updates on all updated branches")), OPT_PASSTHRU(0, "set-upstream", &set_upstream, NULL, diff --git a/fetch-pack.c b/fetch-pack.c index baf239adf98db3..96071434b88fa5 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -25,6 +25,7 @@ #include "oidset.h" #include "packfile.h" #include "odb.h" +#include "object-name.h" #include "path.h" #include "connected.h" #include "fetch-negotiator.h" @@ -332,6 +333,21 @@ static void send_filter(struct fetch_pack_args *args, } } +static void add_oids_to_set(const struct oid_array *array, + struct oidset *set) +{ + if (!array) + return; + + for (size_t i = 0; i < array->nr; i++) { + struct object_id *oid = &array->oid[i]; + if (!odb_has_object(the_repository->objects, oid, 0)) + die(_("the object %s does not exist"), oid_to_hex(oid)); + + oidset_insert(set, oid); + } +} + static int find_common(struct fetch_negotiator *negotiator, struct fetch_pack_args *args, int fd[2], struct object_id *result_oid, @@ -347,6 +363,7 @@ static int find_common(struct fetch_negotiator *negotiator, struct strbuf req_buf = STRBUF_INIT; size_t state_len = 0; struct packet_reader reader; + struct oidset negotiation_include_oids = OIDSET_INIT; if (args->stateless_rpc && multi_ack == 1) die(_("the option '%s' requires '%s'"), "--stateless-rpc", "multi_ack_detailed"); @@ -474,6 +491,27 @@ static int find_common(struct fetch_negotiator *negotiator, trace2_region_enter("fetch-pack", "negotiation_v0_v1", the_repository); flushes = 0; retval = -1; + + /* Send unconditional haves from --negotiation-include */ + add_oids_to_set(args->negotiation_include_tips, + &negotiation_include_oids); + if (oidset_size(&negotiation_include_oids)) { + struct oidset_iter iter; + oidset_iter_init(&negotiation_include_oids, &iter); + + while ((oid = oidset_iter_next(&iter))) { + struct commit *commit; + packet_buf_write(&req_buf, "have %s\n", + oid_to_hex(oid)); + print_verbose(args, "have %s", oid_to_hex(oid)); + count++; + + commit = lookup_commit(the_repository, oid); + if (commit) + negotiator->have_sent(negotiator, commit); + } + } + while ((oid = negotiator->next(negotiator))) { packet_buf_write(&req_buf, "have %s\n", oid_to_hex(oid)); print_verbose(args, "have %s", oid_to_hex(oid)); @@ -584,6 +622,7 @@ static int find_common(struct fetch_negotiator *negotiator, flushes++; } strbuf_release(&req_buf); + oidset_clear(&negotiation_include_oids); if (!got_ready || !no_done) consume_shallow_list(args, &reader); @@ -1305,11 +1344,27 @@ static void add_common(struct strbuf *req_buf, struct oidset *common) static int add_haves(struct fetch_negotiator *negotiator, struct strbuf *req_buf, - int *haves_to_send) + int *haves_to_send, + struct oidset *negotiation_include_oids) { int haves_added = 0; const struct object_id *oid; + /* Send unconditional haves from --negotiation-include */ + if (negotiation_include_oids) { + struct oidset_iter iter; + oidset_iter_init(negotiation_include_oids, &iter); + + while ((oid = oidset_iter_next(&iter))) { + struct commit *commit = lookup_commit(the_repository, oid); + if (commit) { + packet_buf_write(req_buf, "have %s\n", + oid_to_hex(oid)); + negotiator->have_sent(negotiator, commit); + } + } + } + while ((oid = negotiator->next(negotiator))) { packet_buf_write(req_buf, "have %s\n", oid_to_hex(oid)); if (++haves_added >= *haves_to_send) @@ -1358,7 +1413,8 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, struct fetch_pack_args *args, const struct ref *wants, struct oidset *common, int *haves_to_send, int *in_vain, - int sideband_all, int seen_ack) + int sideband_all, int seen_ack, + struct oidset *negotiation_include_oids) { int haves_added; int done_sent = 0; @@ -1413,7 +1469,8 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out, /* Add all of the common commits we've found in previous rounds */ add_common(&req_buf, common); - haves_added = add_haves(negotiator, &req_buf, haves_to_send); + haves_added = add_haves(negotiator, &req_buf, haves_to_send, + negotiation_include_oids); *in_vain += haves_added; trace2_data_intmax("negotiation_v2", the_repository, "haves_added", haves_added); trace2_data_intmax("negotiation_v2", the_repository, "in_vain", *in_vain); @@ -1657,6 +1714,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct ref *ref = copy_ref_list(orig_ref); enum fetch_state state = FETCH_CHECK_LOCAL; struct oidset common = OIDSET_INIT; + struct oidset negotiation_include_oids = OIDSET_INIT; struct packet_reader reader; int in_vain = 0, negotiation_started = 0; int negotiation_round = 0; @@ -1729,6 +1787,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, state = FETCH_SEND_REQUEST; mark_tips(negotiator, args->negotiation_restrict_tips); + add_oids_to_set(args->negotiation_include_tips, + &negotiation_include_oids); for_each_cached_alternate(negotiator, insert_one_alternate_object); break; @@ -1747,7 +1807,8 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, &common, &haves_to_send, &in_vain, reader.use_sideband, - seen_ack)) { + seen_ack, + &negotiation_include_oids)) { trace2_region_leave_printf("negotiation_v2", "round", the_repository, "%d", negotiation_round); @@ -1883,6 +1944,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, negotiator->release(negotiator); oidset_clear(&common); + oidset_clear(&negotiation_include_oids); return ref; } @@ -2181,12 +2243,14 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, const struct string_list *server_options, int stateless_rpc, int fd[], - struct oidset *acked_commits) + struct oidset *acked_commits, + const struct oid_array *negotiation_include_tips) { struct fetch_negotiator negotiator; struct packet_reader reader; struct object_array nt_object_array = OBJECT_ARRAY_INIT; struct strbuf req_buf = STRBUF_INIT; + struct oidset negotiation_include_oids = OIDSET_INIT; int haves_to_send = INITIAL_FLUSH; int in_vain = 0; int seen_ack = 0; @@ -2197,6 +2261,9 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, fetch_negotiator_init(the_repository, &negotiator); mark_tips(&negotiator, negotiation_restrict_tips); + add_oids_to_set(negotiation_include_tips, + &negotiation_include_oids); + packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE | PACKET_READ_DIE_ON_ERR_PACKET); @@ -2221,7 +2288,8 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, packet_buf_write(&req_buf, "wait-for-done"); - haves_added = add_haves(&negotiator, &req_buf, &haves_to_send); + haves_added = add_haves(&negotiator, &req_buf, &haves_to_send, + &negotiation_include_oids); in_vain += haves_added; if (!haves_added || (seen_ack && in_vain >= MAX_IN_VAIN)) last_iteration = 1; @@ -2273,6 +2341,7 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, clear_common_flag(acked_commits); object_array_clear(&nt_object_array); + oidset_clear(&negotiation_include_oids); negotiator.release(&negotiator); strbuf_release(&req_buf); } diff --git a/fetch-pack.h b/fetch-pack.h index 6c70c942c2f001..6d0dec7f412fd8 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -19,9 +19,10 @@ struct fetch_pack_args { /* * If not NULL, during packfile negotiation, fetch-pack will send "have" - * lines only with these tips and their ancestors. + * lines for all _include_ tips and then a subset of the _restrict_ tips. */ const struct oid_array *negotiation_restrict_tips; + const struct oid_array *negotiation_include_tips; unsigned deepen_relative:1; unsigned quiet:1; @@ -93,7 +94,8 @@ void negotiate_using_fetch(const struct oid_array *negotiation_restrict_tips, const struct string_list *server_options, int stateless_rpc, int fd[], - struct oidset *acked_commits); + struct oidset *acked_commits, + const struct oid_array *negotiation_include_tips); /* * Print an appropriate error message for each sought ref that wasn't diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index eff3ce8e2de89c..bc2e2af9599d91 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1460,6 +1460,16 @@ EOF test_cmp fatal-expect fatal-actual ' +test_expect_success '--negotiation-tip ignores missing refs and invalid hashes' ' + setup_negotiation_tip server server 0 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-tip=alpha_1 --negotiation-tip=beta_1 \ + --negotiation-tip=no-such-ref \ + --negotiation-tip=invalid-hash \ + origin alpha_s beta_s && + check_negotiation_tip +' + test_expect_success '--negotiation-restrict limits "have" lines sent' ' setup_negotiation_tip server server 0 && GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ @@ -1511,6 +1521,87 @@ test_expect_success 'CLI --negotiation-restrict overrides remote config' ' test_grep ! "fetch> have $BETA_1" trace ' +test_expect_success '--negotiation-include includes configured refs as haves' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-include=refs/tags/beta_1 \ + origin alpha_s beta_s && + + ALPHA_1=$(git -C client rev-parse alpha_1) && + test_grep "fetch> have $ALPHA_1" trace && + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace +' + +test_expect_success '--negotiation-include works with glob patterns' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-include="refs/tags/beta_*" \ + origin alpha_s beta_s && + + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace && + BETA_2=$(git -C client rev-parse beta_2) && + test_grep "fetch> have $BETA_2" trace +' + +test_expect_success '--negotiation-include is additive with negotiation' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-include=refs/tags/beta_1 \ + origin alpha_s beta_s && + + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace +' + +test_expect_success '--negotiation-include ignores non-existent refs silently' ' + setup_negotiation_tip server server 0 && + + git -C client fetch --quiet \ + --negotiation-restrict=alpha_1 \ + --negotiation-include=refs/tags/nonexistent \ + origin alpha_s beta_s 2>err && + test_must_be_empty err +' + +test_expect_success '--negotiation-include avoids duplicates with negotiator' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + ALPHA_1=$(git -C client rev-parse alpha_1) && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-include=refs/tags/alpha_1 \ + origin alpha_s beta_s && + + test_grep "fetch> have $ALPHA_1" trace >matches && + test_line_count = 1 matches +' + +test_expect_success '--negotiation-include avoids duplicates with v0' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + ALPHA_1=$(git -C client rev-parse alpha_1) && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client \ + -c protocol.version=0 fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-include=refs/tags/alpha_1 \ + origin alpha_s beta_s && + + test_grep "fetch> have $ALPHA_1" trace >matches && + test_line_count = 1 matches +' + test_expect_success SYMLINKS 'clone does not get confused by a D/F conflict' ' git init df-conflict && ( diff --git a/transport.c b/transport.c index a3051f6733633d..fa54928966e2f2 100644 --- a/transport.c +++ b/transport.c @@ -464,6 +464,7 @@ static int fetch_refs_via_pack(struct transport *transport, args.stateless_rpc = transport->stateless_rpc; args.server_options = transport->server_options; args.negotiation_restrict_tips = data->options.negotiation_restrict_tips; + args.negotiation_include_tips = data->options.negotiation_include_tips; args.reject_shallow_remote = transport->smart_options->reject_shallow; if (!data->finished_handshake) { @@ -495,7 +496,8 @@ static int fetch_refs_via_pack(struct transport *transport, transport->server_options, transport->stateless_rpc, data->fd, - data->options.acked_commits); + data->options.acked_commits, + data->options.negotiation_include_tips); ret = 0; } goto cleanup; @@ -983,6 +985,10 @@ static int disconnect_git(struct transport *transport) oid_array_clear(data->options.negotiation_restrict_tips); free(data->options.negotiation_restrict_tips); } + if (data->options.negotiation_include_tips) { + oid_array_clear(data->options.negotiation_include_tips); + free(data->options.negotiation_include_tips); + } list_objects_filter_release(&data->options.filter_options); oid_array_clear(&data->extra_have); oid_array_clear(&data->shallow); diff --git a/transport.h b/transport.h index cdeb33c16f82f6..97d905ecc03dc4 100644 --- a/transport.h +++ b/transport.h @@ -40,13 +40,14 @@ struct git_transport_options { /* * This is only used during fetch. See the documentation of - * negotiation_restrict_tips in struct fetch_pack_args. + * these member names in struct fetch_pack_args. * - * This field is only supported by transports that support connect or + * These fields are only supported by transports that support connect or * stateless_connect. Set this field directly instead of using * transport_set_option(). */ struct oid_array *negotiation_restrict_tips; + struct oid_array *negotiation_include_tips; /* * If allocated, whenever transport_fetch_refs() is called, add known From 6f37fecfed7633d47b2c0e16fde0a8ca89e45beb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 May 2026 16:24:54 +0000 Subject: [PATCH 91/93] remote: add remote.*.negotiationInclude config Add a new 'remote..negotiationInclude' multi-valued config option that provides default values for --negotiation-include when no --negotiation-include arguments are specified over the command line. This is a mirror of how 'remote..negotiationRestrict' specifies defaults for the --negotiation-restrict arguments. Each value is either an exact ref name or a glob pattern whose tips should always be sent as 'have' lines during negotiation. The config values are resolved through the same resolve_negotiation_include() codepath as the CLI options. This option is additive with the normal negotiation process: the negotiation algorithm still runs and advertises its own selected commits, but the refs matching the config are sent unconditionally on top of those heuristically selected commits. Similar to the negotiationRestrict config, an empty value resets the value list to allow ignoring earlier config values, such as those that might be set in system or global config. Reviewed-by: Matthew John Cheetham Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config/remote.adoc | 25 ++++++++++++++++ Documentation/fetch-options.adoc | 4 +++ builtin/fetch.c | 12 ++++++++ remote.c | 5 ++++ remote.h | 1 + t/t5510-fetch.sh | 49 ++++++++++++++++++++++++++++++++ 6 files changed, 96 insertions(+) diff --git a/Documentation/config/remote.adoc b/Documentation/config/remote.adoc index 4dcf81fbce4ed3..1951df154e565d 100644 --- a/Documentation/config/remote.adoc +++ b/Documentation/config/remote.adoc @@ -125,6 +125,31 @@ values are not used. Blank values signal to ignore all previous values, allowing a reset of the list from broader config scenarios. +remote..negotiationInclude:: + When negotiating with this remote during `git fetch`, the client + advertises a list of commits that exist locally. In repos with + many references, this list of "haves" can be truncated. Depending + on data shape, dropping certain references may be expensive. This + multi-valued config option specifies references, commit hashes, + or ref pattern globs whose tips should always be sent as "have" + commits during fetch negotiation with this remote. ++ +Each value is either an exact ref name (e.g. `refs/heads/release`), a +commit hash, or a glob pattern (e.g. `refs/heads/release/*`). The +pattern syntax is the same as for `--negotiation-include`. ++ +These config values are used as defaults for the `--negotiation-include` +command-line option. If `--negotiation-include` is specified on the +command line, then the config values are not used. ++ +This option is additive with the normal negotiation process: the +negotiation algorithm still runs and advertises its own selected commits, +but the refs matching `remote..negotiationInclude` are sent +unconditionally on top of those heuristically selected commits. ++ +Blank values signal to ignore all previous values, allowing a reset of +the list from broader config scenarios. + remote..followRemoteHEAD:: How linkgit:git-fetch[1] should handle updates to `remotes//HEAD` when fetching using the configured refspecs of a remote. diff --git a/Documentation/fetch-options.adoc b/Documentation/fetch-options.adoc index 7b897a720228d2..8074004377c1ed 100644 --- a/Documentation/fetch-options.adoc +++ b/Documentation/fetch-options.adoc @@ -91,6 +91,10 @@ The pattern syntax is the same as for `--negotiation-restrict`. If `--negotiation-restrict` is used, the have set is first restricted by that option and then increased to include the tips specified by `--negotiation-include`. ++ +If this option is not specified on the command line, then any +`remote..negotiationInclude` config values for the current remote +are used instead. `--negotiate-only`:: Do not fetch anything from the server, and instead print the diff --git a/builtin/fetch.c b/builtin/fetch.c index ba56e9022bebcf..1af6500c1de506 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1634,6 +1634,18 @@ static struct transport *prepare_transport(struct remote *remote, int deepen, else warning(_("ignoring %s because the protocol does not support it"), "--negotiation-include"); + } else if (remote->negotiation_include.nr) { + if (transport->smart_options) { + add_negotiation_tips(&remote->negotiation_include, + &transport->smart_options->negotiation_include_tips, + "--negotiation-include"); + } else { + struct strbuf config_name = STRBUF_INIT; + strbuf_addf(&config_name, "remote.%s.negotiationInclude", remote->name); + warning(_("ignoring %s because the protocol does not support it"), + config_name.buf); + strbuf_release(&config_name); + } } return transport; } diff --git a/remote.c b/remote.c index 620086e16e279f..6fb5758820fabf 100644 --- a/remote.c +++ b/remote.c @@ -153,6 +153,7 @@ static struct remote *make_remote(struct remote_state *remote_state, refspec_init_fetch(&ret->fetch); string_list_init_dup(&ret->server_options); string_list_init_dup(&ret->negotiation_restrict); + string_list_init_dup(&ret->negotiation_include); ALLOC_GROW(remote_state->remotes, remote_state->remotes_nr + 1, remote_state->remotes_alloc); @@ -181,6 +182,7 @@ static void remote_clear(struct remote *remote) FREE_AND_NULL(remote->http_proxy_authmethod); string_list_clear(&remote->server_options, 0); string_list_clear(&remote->negotiation_restrict, 0); + string_list_clear(&remote->negotiation_include, 0); } static void add_merge(struct branch *branch, const char *name) @@ -567,6 +569,9 @@ static int handle_config(const char *key, const char *value, } else if (!strcmp(subkey, "negotiationrestrict")) { return parse_transport_option(key, value, &remote->negotiation_restrict); + } else if (!strcmp(subkey, "negotiationinclude")) { + return parse_transport_option(key, value, + &remote->negotiation_include); } else if (!strcmp(subkey, "followremotehead")) { const char *no_warn_branch; if (!strcmp(value, "never")) diff --git a/remote.h b/remote.h index e6ec37c3930355..d8809b6991a613 100644 --- a/remote.h +++ b/remote.h @@ -118,6 +118,7 @@ struct remote { struct string_list server_options; struct string_list negotiation_restrict; + struct string_list negotiation_include; enum follow_remote_head_settings follow_remote_head; const char *no_warn_branch; diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index bc2e2af9599d91..33f61ac12a7151 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1587,6 +1587,55 @@ test_expect_success '--negotiation-include avoids duplicates with negotiator' ' test_line_count = 1 matches ' +test_expect_success 'remote..negotiationInclude used as default for --negotiation-include' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + # test the reset of the list on an empty value + git -C client config --add remote.origin.negotiationInclude refs/tags/alpha_1 && + git -C client config --add remote.origin.negotiationInclude "" && + git -C client config --add remote.origin.negotiationInclude refs/tags/beta_1 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=beta_2 \ + origin alpha_s beta_s && + + ALPHA_1=$(git -C client rev-parse alpha_1) && + test_grep ! "fetch> have $ALPHA_1" trace && + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace +' + +test_expect_success 'remote..negotiationInclude works with glob patterns' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + git -C client config --add remote.origin.negotiationInclude "refs/tags/beta_*" && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + origin alpha_s beta_s && + + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace && + BETA_2=$(git -C client rev-parse beta_2) && + test_grep "fetch> have $BETA_2" trace +' + +test_expect_success 'CLI --negotiation-include overrides remote..negotiationInclude' ' + test_when_finished rm -f trace && + setup_negotiation_tip server server 0 && + + git -C client config --add remote.origin.negotiationInclude refs/tags/beta_2 && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \ + --negotiation-restrict=alpha_1 \ + --negotiation-include=refs/tags/beta_1 \ + origin alpha_s beta_s && + + BETA_1=$(git -C client rev-parse beta_1) && + test_grep "fetch> have $BETA_1" trace && + BETA_2=$(git -C client rev-parse beta_2) && + test_grep ! "fetch> have $BETA_2" trace +' + test_expect_success '--negotiation-include avoids duplicates with v0' ' test_when_finished rm -f trace && setup_negotiation_tip server server 0 && From a6d92c48e4426b88a427a75ed2c20d1daa5dc7f7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 May 2026 16:24:55 +0000 Subject: [PATCH 92/93] send-pack: pass negotiation config in push When push.negotiate is enabled, 'git push' spawns a child 'git fetch --negotiate-only' process to find common commits. Pass --negotiation-include and --negotiation-restrict options from the 'remote..negotiationInclude' and 'remote..negotiationRestrict' config keys to this child process. When negotiationRestrict is configured, it replaces the default behavior of using all remote refs as negotiation tips. This allows the user to control which local refs are used for push negotiation. When negotiationInclude is configured, the specified ref patterns are passed as --negotiation-include to ensure their tips are always sent as 'have' lines during push negotiation. Reviewed-by: Matthew John Cheetham Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/config/remote.adoc | 6 ++++++ send-pack.c | 37 ++++++++++++++++++++++++++------ send-pack.h | 2 ++ t/t5516-fetch-push.sh | 30 ++++++++++++++++++++++++++ transport.c | 2 ++ 5 files changed, 70 insertions(+), 7 deletions(-) diff --git a/Documentation/config/remote.adoc b/Documentation/config/remote.adoc index 1951df154e565d..eb9c8a3c488448 100644 --- a/Documentation/config/remote.adoc +++ b/Documentation/config/remote.adoc @@ -122,6 +122,9 @@ command-line option. If `--negotiation-restrict` (or its synonym `--negotiation-tip`) is specified on the command line, then the config values are not used. + +These values also influence negotiation during `git push` if +`push.negotiate` is enabled. ++ Blank values signal to ignore all previous values, allowing a reset of the list from broader config scenarios. @@ -147,6 +150,9 @@ negotiation algorithm still runs and advertises its own selected commits, but the refs matching `remote..negotiationInclude` are sent unconditionally on top of those heuristically selected commits. + +These values also influence negotiation during `git push` if +`push.negotiate` is enabled. ++ Blank values signal to ignore all previous values, allowing a reset of the list from broader config scenarios. diff --git a/send-pack.c b/send-pack.c index 3d5d36ba3baa2e..d18e030ce8bbd9 100644 --- a/send-pack.c +++ b/send-pack.c @@ -433,28 +433,48 @@ static void reject_invalid_nonce(const char *nonce, int len) static void get_commons_through_negotiation(struct repository *r, const char *url, + const struct string_list *negotiation_include, + const struct string_list *negotiation_restrict, const struct ref *remote_refs, struct oid_array *commons) { struct child_process child = CHILD_PROCESS_INIT; const struct ref *ref; int len = r->hash_algo->hexsz + 1; /* hash + NL */ - int nr_negotiation_tip = 0; + int nr_negotiation = 0; child.git_cmd = 1; child.no_stdin = 1; child.out = -1; strvec_pushl(&child.args, "fetch", "--negotiate-only", NULL); - for (ref = remote_refs; ref; ref = ref->next) { - if (!is_null_oid(&ref->new_oid)) { + + if (negotiation_restrict && negotiation_restrict->nr) { + struct string_list_item *item; + for_each_string_list_item(item, negotiation_restrict) strvec_pushf(&child.args, "--negotiation-restrict=%s", - oid_to_hex(&ref->new_oid)); - nr_negotiation_tip++; + item->string); + nr_negotiation = negotiation_restrict->nr; + } else { + for (ref = remote_refs; ref; ref = ref->next) { + if (!is_null_oid(&ref->new_oid)) { + strvec_pushf(&child.args, "--negotiation-restrict=%s", + oid_to_hex(&ref->new_oid)); + nr_negotiation++; + } } } + + if (negotiation_include && negotiation_include->nr) { + struct string_list_item *item; + for_each_string_list_item(item, negotiation_include) + strvec_pushf(&child.args, "--negotiation-include=%s", + item->string); + nr_negotiation += negotiation_include->nr; + } + strvec_push(&child.args, url); - if (!nr_negotiation_tip) { + if (!nr_negotiation) { child_process_clear(&child); return; } @@ -528,7 +548,10 @@ int send_pack(struct repository *r, repo_config_get_bool(r, "push.negotiate", &push_negotiate); if (push_negotiate) { trace2_region_enter("send_pack", "push_negotiate", r); - get_commons_through_negotiation(r, args->url, remote_refs, &commons); + get_commons_through_negotiation(r, args->url, + args->negotiation_include, + args->negotiation_restrict, + remote_refs, &commons); trace2_region_leave("send_pack", "push_negotiate", r); } diff --git a/send-pack.h b/send-pack.h index c5ded2d2006f13..13850c98bb093a 100644 --- a/send-pack.h +++ b/send-pack.h @@ -18,6 +18,8 @@ struct repository; struct send_pack_args { const char *url; + const struct string_list *negotiation_include; + const struct string_list *negotiation_restrict; unsigned verbose:1, quiet:1, porcelain:1, diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index ac8447f21ed963..177cbc6c751fd2 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -254,6 +254,36 @@ test_expect_success 'push with negotiation does not attempt to fetch submodules' ! grep "Fetching submodule" err ' +test_expect_success 'push with negotiation and remote..negotiationInclude' ' + test_when_finished rm -rf negotiation_include && + mk_empty negotiation_include && + git push negotiation_include $the_first_commit:refs/remotes/origin/first_commit && + test_commit -C negotiation_include unrelated_commit && + git -C negotiation_include config receive.hideRefs refs/remotes/origin/first_commit && + test_when_finished "rm event" && + GIT_TRACE2_EVENT="$(pwd)/event" \ + git -c protocol.version=2 -c push.negotiate=1 \ + -c remote.negotiation_include.negotiationInclude=refs/heads/main \ + push negotiation_include refs/heads/main:refs/remotes/origin/main && + test_grep \"key\":\"total_rounds\" event && + grep_wrote 2 event # 1 commit, 1 tree +' + +test_expect_success 'push with negotiation and remote..negotiationRestrict' ' + test_when_finished rm -rf negotiation_restrict && + mk_empty negotiation_restrict && + git push negotiation_restrict $the_first_commit:refs/remotes/origin/first_commit && + test_commit -C negotiation_restrict unrelated_commit && + git -C negotiation_restrict config receive.hideRefs refs/remotes/origin/first_commit && + test_when_finished "rm event" && + GIT_TRACE2_EVENT="$(pwd)/event" \ + git -c protocol.version=2 -c push.negotiate=1 \ + -c remote.negotiation_restrict.negotiationRestrict=refs/heads/main \ + push negotiation_restrict refs/heads/main:refs/remotes/origin/main && + test_grep \"key\":\"total_rounds\" event && + grep_wrote 2 event # 1 commit, 1 tree +' + test_expect_success 'push without wildcard' ' mk_empty testrepo && diff --git a/transport.c b/transport.c index fa54928966e2f2..a2d8958cb82ab8 100644 --- a/transport.c +++ b/transport.c @@ -921,6 +921,8 @@ static int git_transport_push(struct transport *transport, struct ref *remote_re args.atomic = !!(flags & TRANSPORT_PUSH_ATOMIC); args.push_options = transport->push_options; args.url = transport->url; + args.negotiation_include = &transport->remote->negotiation_include; + args.negotiation_restrict = &transport->remote->negotiation_restrict; if (flags & TRANSPORT_PUSH_CERT_ALWAYS) args.push_cert = SEND_PACK_PUSH_CERT_ALWAYS; From c69baaf57ba26cf117c2b6793802877f19738b0d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 27 May 2026 14:15:25 +0900 Subject: [PATCH 93/93] The 9th batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.55.0.adoc | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/Documentation/RelNotes/2.55.0.adoc b/Documentation/RelNotes/2.55.0.adoc index 82577357a065d9..36e49d73803abf 100644 --- a/Documentation/RelNotes/2.55.0.adoc +++ b/Documentation/RelNotes/2.55.0.adoc @@ -64,6 +64,40 @@ Performance, Internal Implementation, Development Support etc. * "git merge-base" optimization. + * The limit_list() function that is one of the core part of the + revision traversal infrastructure has been optimized by replacing + its use of linear list with priority queue. + + * In a lazy clone, "git cherry" and "git grep" often fetch necessary + blob objects one by one from promisor remotes. It has been corrected + to collect necessary object names and fetch them in bulk to gain + reasonable performance. + + * The logic to determine that branches in an octopus merge are + independent has been optimized. + + * The consistency checks for the files reference backend have been updated + to skip lock files earlier, avoiding unnecessary parsing of + intermediate files. + + * The negotiation tip options in "git fetch" have been reworked to + allow requiring certain refs to be sent as "have" lines, and to + restrict negotiation to a specific set of refs. + + * The repacking code has been refactored and compaction of MIDX layers + have been implemented, and incremental strategy that does not require + all-into-one repacking has been introduced. + + * ODB transaction interface is being reworked to explicitly handle + object writes. + + * Add a new odb "in-memory" source that is meant to only hold + tentative objects (like the virtual blob object that represents the + working tree file used by "git blame"). + + * Many uses of the_repository has been updated to use a more + appropriate struct repository instance in setup.c codepath. + Fixes since v2.54 ----------------- @@ -184,3 +218,9 @@ Fixes since v2.54 (merge b635fd0725 kh/doc-log-decorate-list later to maint). (merge 65ea197dca jk/commit-sign-overflow-fix later to maint). (merge 3ccb16052a jk/apply-leakfix later to maint). + (merge 5e6e8dc786 tb/pseudo-merge-bugfixes later to maint). + (merge 6d09e798bc pb/doc-diff-format-updates later to maint). + (merge 34a891a2d3 rs/trailer-fold-optim later to maint). + (merge 499f9048e0 ps/t3903-cover-stash-include-untracked later to maint). + (merge b56ab270aa jk/sq-dequote-cleanup later to maint). + (merge 29d9fdcf10 rs/use-builtin-add-overflow-explicitly-on-clang later to maint).