diff --git a/Documentation/git-backfill.adoc b/Documentation/git-backfill.adoc index c0a3b80615e034..82d6a1969d0542 100644 --- a/Documentation/git-backfill.adoc +++ b/Documentation/git-backfill.adoc @@ -80,6 +80,10 @@ OPTIONS + You may also use commit-limiting options understood by linkgit:git-rev-list[1] such as `--first-parent`, `--since`, or pathspecs. ++ +Most `--filter=` options don't work with the purpose of +`git backfill`, but the `sparse:` filter is integrated to provide a +focused set of paths to download, distinct from the `--sparse` option. SEE ALSO -------- diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc index b78175fbe1b97b..2fd79f13ad9407 100644 --- a/Documentation/git-pack-objects.adoc +++ b/Documentation/git-pack-objects.adoc @@ -402,9 +402,11 @@ will be automatically changed to version `1`. of filenames that cause collisions in Git's default name-hash algorithm. + -Incompatible with `--delta-islands`, `--shallow`, or `--filter`. The -`--use-bitmap-index` option will be ignored in the presence of -`--path-walk.` +Incompatible with `--delta-islands`. The `--use-bitmap-index` option is +ignored in the presence of `--path-walk`. Whe `--path-walk` option +supports the `--filter=` forms `blob:none`, `blob:limit=`, +`tree:0`, `object:type=`, and `sparse:`. These supported filter +types can be combined with the `combine:+` form. DELTA ISLANDS diff --git a/Documentation/technical/api-path-walk.adoc b/Documentation/technical/api-path-walk.adoc index a67de1b143ab5b..6e17b13d61b969 100644 --- a/Documentation/technical/api-path-walk.adoc +++ b/Documentation/technical/api-path-walk.adoc @@ -48,6 +48,13 @@ commits. applications could disable some options to make it simpler to walk the objects or to have fewer calls to `path_fn`. + +Note that objects directly requested as pending objects (such as targets +of lightweight tags or other ref tips) are always emitted to `path_fn`, +even when the corresponding type flag is disabled. Only objects +discovered during the tree walk are subject to these type filters. This +ensures that objects specifically requested through the revision input +are never silently dropped. ++ While it is possible to walk only commits in this way, consumers would be better off using the revision walk API instead. diff --git a/builtin/backfill.c b/builtin/backfill.c index 7ffab2ea74f5cc..e71e0f4742c506 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -96,9 +96,10 @@ static void reject_unsupported_rev_list_options(struct rev_info *revs) if (revs->explicit_diff_merges) die(_("'%s' cannot be used with 'git backfill'"), "--diff-merges"); - if (revs->filter.choice) - die(_("'%s' cannot be used with 'git backfill'"), - "--filter"); + if (!path_walk_filter_compatible(&revs->filter)) + die(_("cannot backfill with these filter options")); + if (revs->filter.blob_limit_value) + die(_("cannot backfill with blob size limits")); } static int do_backfill(struct backfill_context *ctx) @@ -108,6 +109,7 @@ static int do_backfill(struct backfill_context *ctx) if (ctx->sparse) { CALLOC_ARRAY(info.pl, 1); + info.pl_sparse_trees = 1; if (get_sparse_checkout_patterns(info.pl)) { path_walk_info_clear(&info); return error(_("problem loading sparse-checkout")); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index dd2480a73d2edf..b783dc62bc9b77 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4754,7 +4754,7 @@ static int add_objects_by_path(const char *path, return 0; } -static void get_object_list_path_walk(struct rev_info *revs) +static int get_object_list_path_walk(struct rev_info *revs) { struct path_walk_info info = PATH_WALK_INFO_INIT; unsigned int processed = 0; @@ -4777,8 +4777,9 @@ static void get_object_list_path_walk(struct rev_info *revs) result = walk_objects_by_path(&info); trace2_region_leave("pack-objects", "path-walk", revs->repo); - if (result) - die(_("failed to pack objects via path-walk")); + path_walk_info_clear(&info); + + return result; } static void get_object_list(struct rev_info *revs, struct strvec *argv) @@ -4841,8 +4842,13 @@ static void get_object_list(struct rev_info *revs, struct strvec *argv) fn_show_object = show_object; if (path_walk) { - get_object_list_path_walk(revs); - } else { + if (get_object_list_path_walk(revs)) { + warning(_("failed to pack objects via path-walk")); + path_walk = 0; + } + } + + if (!path_walk) { if (prepare_revision_walk(revs)) die(_("revision walk setup failed")); mark_edges_uninteresting(revs, show_edge, sparse); @@ -5177,7 +5183,7 @@ int cmd_pack_objects(int argc, if (path_walk) { const char *option = NULL; - if (filter_options.choice) + if (!path_walk_filter_compatible(&filter_options)) option = "--filter"; else if (use_delta_islands) option = "--delta-islands"; @@ -5190,10 +5196,7 @@ int cmd_pack_objects(int argc, } if (path_walk) { strvec_push(&rp, "--boundary"); - /* - * We must disable the bitmaps because we are removing - * the --objects / --objects-edge[-aggressive] options. - */ + strvec_push(&rp, "--objects"); use_bitmap_index = 0; } else if (thin) { use_internal_rev_list = 1; diff --git a/path-walk.c b/path-walk.c index 6e426af4330893..3c67b359f97543 100644 --- a/path-walk.c +++ b/path-walk.c @@ -9,6 +9,9 @@ #include "hashmap.h" #include "hex.h" #include "list-objects.h" +#include "list-objects-filter-options.h" +#include "object-name.h" +#include "odb.h" #include "object.h" #include "oid-array.h" #include "path.h" @@ -178,11 +181,6 @@ static int add_tree_entries(struct path_walk_context *ctx, return -1; } - /* Skip this object if already seen. */ - if (o->flags & SEEN) - continue; - o->flags |= SEEN; - strbuf_setlen(&path, base_len); strbuf_add(&path, entry.path, entry.pathlen); @@ -193,6 +191,40 @@ static int add_tree_entries(struct path_walk_context *ctx, if (type == OBJ_TREE) strbuf_addch(&path, '/'); + if (o->flags & SEEN) { + /* + * A tree with a shared OID may appear at multiple + * paths. Even though we already added this tree to + * the output at some other path, we still need to + * walk into it at this in-cone path to discover + * blobs that were not found at the earlier + * out-of-cone path. + * + * Only do this for paths not yet in our map, to + * avoid duplicate entries when the same tree OID + * appears at the same path across multiple commits. + */ + if (type == OBJ_TREE && ctx->info->pl && + ctx->info->pl->use_cone_patterns && + !ctx->info->pl_sparse_trees && + !strmap_contains(&ctx->paths_to_lists, path.buf)) { + int dtype; + enum pattern_match_result m; + m = path_matches_pattern_list(path.buf, path.len, + path.buf + base_len, + &dtype, + ctx->info->pl, + ctx->repo->index); + if (m != NOT_MATCHED) { + add_path_to_list(ctx, path.buf, type, + &entry.oid, + !(o->flags & UNINTERESTING)); + push_to_stack(ctx, path.buf); + } + } + continue; + } + if (ctx->info->pl) { int dtype; enum pattern_match_result match; @@ -202,7 +234,8 @@ static int add_tree_entries(struct path_walk_context *ctx, ctx->repo->index); if (ctx->info->pl->use_cone_patterns && - match == NOT_MATCHED) + match == NOT_MATCHED && + (type == OBJ_BLOB || ctx->info->pl_sparse_trees)) continue; else if (!ctx->info->pl->use_cone_patterns && type == OBJ_BLOB && @@ -237,6 +270,7 @@ static int add_tree_entries(struct path_walk_context *ctx, continue; } + o->flags |= SEEN; add_path_to_list(ctx, path.buf, type, &entry.oid, !(o->flags & UNINTERESTING)); @@ -248,6 +282,16 @@ static int add_tree_entries(struct path_walk_context *ctx, return 0; } +/* + * Paths starting with '/' (e.g., "/tags", "/tagged-blobs") hold objects that + * were directly requested by 'pending' objects rather than discovered during + * tree traversal. + */ +static int path_is_for_direct_objects(const char *path) +{ + return path[0] == '/'; +} + /* * For each path in paths_to_explore, walk the trees another level * and add any found blobs to the batch (but only if they exist and @@ -306,20 +350,47 @@ static int walk_path(struct path_walk_context *ctx, if (list->type == OBJ_BLOB && ctx->revs->prune_data.nr && + !path_is_for_direct_objects(path) && !match_pathspec(ctx->repo->index, &ctx->revs->prune_data, path, strlen(path), 0, NULL, 0)) return 0; - /* Evaluate function pointer on this data, if requested. */ - if ((list->type == OBJ_TREE && ctx->info->trees) || - (list->type == OBJ_BLOB && ctx->info->blobs) || - (list->type == OBJ_TAG && ctx->info->tags)) + /* + * Evaluate function pointer on this data, if requested. + * Ignore object type filters for tagged objects (path starts + * with `/`), first for blobs and then other types. + */ + if (list->type == OBJ_BLOB && + ctx->info->blob_limit && + !path_is_for_direct_objects(path)) { + struct oid_array filtered = OID_ARRAY_INIT; + + for (size_t i = 0; i < list->oids.nr; i++) { + unsigned long size; + + if (odb_read_object_info(ctx->repo->objects, + &list->oids.oid[i], + &size) != OBJ_BLOB || + size < ctx->info->blob_limit) + oid_array_append(&filtered, + &list->oids.oid[i]); + } + + if (filtered.nr) + ret = ctx->info->path_fn(path, &filtered, list->type, + ctx->info->path_fn_data); + oid_array_clear(&filtered); + } else if ((!ctx->info->strict_types && path_is_for_direct_objects(path)) || + (list->type == OBJ_TREE && ctx->info->trees) || + (list->type == OBJ_BLOB && ctx->info->blobs) || + (list->type == OBJ_TAG && ctx->info->tags)) { ret = ctx->info->path_fn(path, &list->oids, list->type, ctx->info->path_fn_data); + } - /* Expand data for children. */ - if (list->type == OBJ_TREE) { + /* Expand data for children, unless this is a direct-object path. */ + if (list->type == OBJ_TREE && !path_is_for_direct_objects(path)) { for (size_t i = 0; i < list->oids.nr; i++) { ret |= add_tree_entries(ctx, path, @@ -370,14 +441,15 @@ static int setup_pending_objects(struct path_walk_info *info, { struct type_and_oid_list *tags = NULL; struct type_and_oid_list *tagged_blobs = NULL; + struct type_and_oid_list *tagged_trees = NULL; struct type_and_oid_list *root_tree_list = NULL; if (info->tags) CALLOC_ARRAY(tags, 1); - if (info->blobs) - CALLOC_ARRAY(tagged_blobs, 1); - if (info->trees) - root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); + CALLOC_ARRAY(tagged_blobs, 1); + if (!info->trees) + CALLOC_ARRAY(tagged_trees, 1); + root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); /* * Pending objects include: @@ -421,9 +493,15 @@ static int setup_pending_objects(struct path_walk_info *info, switch (obj->type) { case OBJ_TREE: - if (!info->trees) - continue; - if (pending->path) { + if (tagged_trees) { + /* + * Trees are disabled but pending trees + * should still be emitted. Collect them + * into a "/tagged-trees" list that + * bypasses the object type filter. + */ + oid_array_append(&tagged_trees->oids, &obj->oid); + } else if (pending->path) { char *path = *pending->path ? xstrfmt("%s/", pending->path) : xstrdup(""); add_path_to_list(ctx, path, OBJ_TREE, &obj->oid, 1); @@ -435,8 +513,6 @@ static int setup_pending_objects(struct path_walk_info *info, break; case OBJ_BLOB: - if (!info->blobs) - continue; if (pending->path) add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1); else @@ -469,6 +545,18 @@ static int setup_pending_objects(struct path_walk_info *info, free(tagged_blobs); } } + if (tagged_trees) { + if (tagged_trees->oids.nr) { + const char *tagged_tree_path = "/tagged-trees"; + tagged_trees->type = OBJ_TREE; + tagged_trees->maybe_interesting = 1; + strmap_put(&ctx->paths_to_lists, tagged_tree_path, tagged_trees); + push_to_stack(ctx, tagged_tree_path); + } else { + oid_array_clear(&tagged_trees->oids); + free(tagged_trees); + } + } if (tags) { if (tags->oids.nr) { const char *tag_path = "/tags"; @@ -485,6 +573,123 @@ static int setup_pending_objects(struct path_walk_info *info, return 0; } +static int prepare_filters_one(struct path_walk_info *info, + struct list_objects_filter_options *options) +{ + switch (options->choice) { + case LOFC_DISABLED: + return 1; + + case LOFC_BLOB_NONE: + if (info) { + info->blobs = 0; + list_objects_filter_release(options); + } + return 1; + + case LOFC_BLOB_LIMIT: + if (info) { + if (!options->blob_limit_value) + info->blobs = 0; + else if (!info->blob_limit || + info->blob_limit > options->blob_limit_value) + info->blob_limit = options->blob_limit_value; + list_objects_filter_release(options); + } + return 1; + + case LOFC_TREE_DEPTH: + if (options->tree_exclude_depth) { + error(_("tree:%lu filter not supported by the path-walk API"), + options->tree_exclude_depth); + return 0; + } + if (info) { + info->trees = 0; + info->blobs = 0; + } + return 1; + + case LOFC_OBJECT_TYPE: + if (info) { + info->commits &= options->object_type == OBJ_COMMIT; + info->tags &= options->object_type == OBJ_TAG; + info->trees &= options->object_type == OBJ_TREE; + info->blobs &= options->object_type == OBJ_BLOB; + info->strict_types = 1; + list_objects_filter_release(options); + } + return 1; + + case LOFC_SPARSE_OID: + if (info) { + struct object_id sparse_oid; + struct repository *repo = info->revs->repo; + + if (info->pl) { + warning(_("sparse filter cannot be combined with existing sparse patterns")); + return 0; + } + + if (repo_get_oid_with_flags(repo, + options->sparse_oid_name, + &sparse_oid, + GET_OID_BLOB)) { + error(_("unable to access sparse blob in '%s'"), + options->sparse_oid_name); + return 0; + } + + CALLOC_ARRAY(info->pl, 1); + info->pl->use_cone_patterns = 1; + + if (add_patterns_from_blob_to_list(&sparse_oid, "", 0, + info->pl) < 0) { + clear_pattern_list(info->pl); + FREE_AND_NULL(info->pl); + error(_("unable to parse sparse filter data in '%s'"), + oid_to_hex(&sparse_oid)); + return 0; + } + + if (!info->pl->use_cone_patterns) { + clear_pattern_list(info->pl); + FREE_AND_NULL(info->pl); + warning(_("sparse filter is not cone-mode compatible")); + return 0; + } + } + return 1; + + case LOFC_COMBINE: + for (size_t i = 0; i < options->sub_nr; i++) { + if (!prepare_filters_one(info, &options->sub[i])) + return 0; + } + return 1; + + default: + error(_("object filter '%s' not supported by the path-walk API"), + list_objects_filter_spec(options)); + return 0; + } +} + +static int prepare_filters(struct path_walk_info *info, + struct list_objects_filter_options *options) +{ + if (!prepare_filters_one(info, options)) + return 0; + if (info) + list_objects_filter_release(options); + return 1; +} + +int path_walk_filter_compatible(struct list_objects_filter_options *options) +{ + return prepare_filters(NULL, options); +} + /** * Given the configuration of 'info', walk the commits based on 'info->revs' and * call 'info->path_fn' on each discovered path. @@ -512,6 +717,9 @@ int walk_objects_by_path(struct path_walk_info *info) trace2_region_enter("path-walk", "commit-walk", info->revs->repo); + if (!prepare_filters(info, &info->revs->filter)) + return -1; + CALLOC_ARRAY(commit_list, 1); commit_list->type = OBJ_COMMIT; @@ -532,15 +740,17 @@ int walk_objects_by_path(struct path_walk_info *info) push_to_stack(&ctx, root_path); /* - * Set these values before preparing the walk to catch - * lightweight tags pointing to non-commits and indexed objects. + * Ensure that prepare_revision_walk() keeps all pending objects + * even through an object type filter. */ - info->revs->blob_objects = info->blobs; - info->revs->tree_objects = info->trees; + info->revs->blob_objects = info->revs->tree_objects = 1; if (prepare_revision_walk(info->revs)) die(_("failed to setup revision walk")); + info->revs->blob_objects = info->blobs; + info->revs->tree_objects = info->trees; + /* * Walk trees to mark them as UNINTERESTING. * This is particularly important when 'edge_aggressive' is set. diff --git a/path-walk.h b/path-walk.h index 5ef5a8440e6b5e..a2652b2d465edf 100644 --- a/path-walk.h +++ b/path-walk.h @@ -36,12 +36,30 @@ struct path_walk_info { /** * Initialize which object types the path_fn should be called on. This * could also limit the walk to skip blobs if not set. + * + * Note: even when 'blobs' or 'trees' is disabled, objects that are + * directly requested as pending objects will still be emitted to + * path_fn. Only objects discovered during the tree walk are filtered by + * these flags. */ int commits; int trees; int blobs; int tags; + /** + * If 'strict_types' is 0, then direct object requests will no longer + * override the object type restrictions. + */ + int strict_types; + + /** + * If non-zero, specifies a maximum blob size. Blobs with a + * size equal to or greater than this limit will not be + * emitted unless included in 'pending'. + */ + unsigned long blob_limit; + /** * When 'prune_all_uninteresting' is set and a path has all objects * marked as UNINTERESTING, then the path-walk will not visit those @@ -64,8 +82,14 @@ struct path_walk_info { * of the cone. If not in cone mode, then all tree paths will be * explored but the path_fn will only be called when the path matches * the sparse-checkout patterns. + * + * When 'pl_sparse_trees' is zero, the sparse patterns only restrict + * blobs and all trees are included in the walk output. This matches + * the behavior of the sparse:oid object filter. When nonzero, trees + * are also pruned by the sparse patterns (as used by backfill). */ struct pattern_list *pl; + int pl_sparse_trees; }; #define PATH_WALK_INFO_INIT { \ @@ -85,3 +109,10 @@ void path_walk_info_clear(struct path_walk_info *info); * Returns nonzero on an error. */ int walk_objects_by_path(struct path_walk_info *info); + +struct list_objects_filter_options; +/** + * Given a set of options for filtering objects, return 1 if the options + * are compatible with the path-walk API and 0 otherwise. + */ +int path_walk_filter_compatible(struct list_objects_filter_options *options); diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index fe63002c2be27d..3f2b50a9aa16bd 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -4,6 +4,7 @@ #include "dir.h" #include "environment.h" #include "hex.h" +#include "list-objects-filter-options.h" #include "object-name.h" #include "object.h" #include "pretty.h" @@ -67,10 +68,12 @@ static int emit_block(const char *path, struct oid_array *oids, int cmd__path_walk(int argc, const char **argv) { - int res, stdin_pl = 0; + int res, stdin_pl = 0, pl_sparse_trees = -1; struct rev_info revs = REV_INFO_INIT; struct path_walk_info info = PATH_WALK_INFO_INIT; struct path_walk_test_data data = { 0 }; + struct list_objects_filter_options filter_options = + LIST_OBJECTS_FILTER_INIT; struct option options[] = { OPT_BOOL(0, "blobs", &info.blobs, N_("toggle inclusion of blob objects")), @@ -86,11 +89,14 @@ int cmd__path_walk(int argc, const char **argv) N_("toggle aggressive edge walk")), OPT_BOOL(0, "stdin-pl", &stdin_pl, N_("read a pattern list over stdin")), + OPT_BOOL(0, "pl-sparse-trees", &pl_sparse_trees, + N_("toggle pruning of trees by sparse patterns")), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END(), }; setup_git_directory(); - revs.repo = the_repository; + repo_init_revisions(the_repository, &revs, NULL); argc = parse_options(argc, argv, NULL, options, path_walk_usage, @@ -101,6 +107,10 @@ int cmd__path_walk(int argc, const char **argv) else usage(path_walk_usage[0]); + /* Apply the filter after setup_revisions to avoid the --objects check. */ + if (filter_options.choice) + list_objects_filter_copy(&revs.filter, &filter_options); + info.revs = &revs; info.path_fn = emit_block; info.path_fn_data = &data; @@ -108,6 +118,8 @@ int cmd__path_walk(int argc, const char **argv) if (stdin_pl) { struct strbuf in = STRBUF_INIT; CALLOC_ARRAY(info.pl, 1); + info.pl_sparse_trees = (pl_sparse_trees >= 0) ? + pl_sparse_trees : 1; info.pl->use_cone_patterns = 1; @@ -129,6 +141,7 @@ int cmd__path_walk(int argc, const char **argv) free(info.pl); } + list_objects_filter_release(&filter_options); release_revisions(&revs); return res; } diff --git a/t/perf/p5315-pack-objects-filter.sh b/t/perf/p5315-pack-objects-filter.sh new file mode 100755 index 00000000000000..b009039c8908fd --- /dev/null +++ b/t/perf/p5315-pack-objects-filter.sh @@ -0,0 +1,129 @@ +#!/bin/sh + +test_description='Tests pack-objects performance with filters and --path-walk' +. ./perf-lib.sh + +test_perf_large_repo + +test_expect_success 'setup filter inputs' ' + # Sample a few depth-2 directories from the test repo to build + # a cone-mode sparse-checkout definition. The sampling picks + # directories at evenly-spaced positions so the choice is stable + # and scales to repos of any shape. + + git ls-tree -d --name-only HEAD >top-dirs && + top_nr=$(wc -l depth2-dirs && + while read tdir + do + git ls-tree -d --name-only "HEAD:$tdir" 2>/dev/null | + sed "s|^|$tdir/|" >>depth2-dirs || return 1 + done sparse-patterns && + + git hash-object -w sparse-patterns >sparse-oid && + echo "Sparse cone: $first $mid" && + cat sparse-patterns && + test_set_prereq SPARSE_OID + elif test "$top_nr" -ge 1 + then + # Fallback: use a single top-level directory. + first=$(sed -n "1p" top-dirs) && + { + echo "/*" && + echo "!/*/" && + echo "/$first/" + } >sparse-patterns && + + git hash-object -w sparse-patterns >sparse-oid && + echo "Sparse cone: $first" && + cat sparse-patterns && + test_set_prereq SPARSE_OID + fi +' + +test_perf 'repack (no filter)' ' + git pack-objects --stdout --no-reuse-delta --revs --all pk +' + +test_size 'repack size (no filter)' ' + test_file_size pk +' + +test_perf 'repack (no filter, --path-walk)' ' + git pack-objects --stdout --no-reuse-delta --revs --all --path-walk pk +' + +test_size 'repack size (no filter, --path-walk)' ' + test_file_size pk +' + +test_perf 'repack (blob:none)' ' + git pack-objects --stdout --no-reuse-delta --revs --all --filter=blob:none pk +' + +test_size 'repack size (blob:none)' ' + test_file_size pk +' + +test_perf 'repack (blob:none, --path-walk)' ' + git pack-objects --stdout --no-reuse-delta --revs --all --path-walk \ + --filter=blob:none pk +' + +test_size 'repack size (blob:none, --path-walk)' ' + test_file_size pk +' + +test_perf 'repack (sparse:oid)' \ + --prereq SPARSE_OID ' + git pack-objects --stdout --no-reuse-delta --revs --all \ + --filter=sparse:oid=$(cat sparse-oid) pk +' + +test_size 'repack size (sparse:oid)' \ + --prereq SPARSE_OID ' + test_file_size pk +' + +test_perf 'repack (sparse:oid, --path-walk)' \ + --prereq SPARSE_OID ' + git pack-objects --stdout --no-reuse-delta --revs --all --path-walk \ + --filter=sparse:oid=$(cat sparse-oid) pk +' + +test_size 'repack size (sparse:oid, --path-walk)' \ + --prereq SPARSE_OID ' + test_file_size pk +' + +test_done diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh index 501d715b9a16b7..dddb79ba627036 100755 --- a/t/t5317-pack-objects-filter-objects.sh +++ b/t/t5317-pack-objects-filter-objects.sh @@ -478,4 +478,129 @@ test_expect_success 'verify pack-objects w/ --missing=allow-any' ' EOF ' +# Test that --path-walk produces the same object set as standard traversal +# when using sparse:oid filters with cone-mode patterns. +# +# The sparse:oid filter restricts only blobs, not trees. Both standard +# and path-walk should produce identical sets of blobs, commits, and trees. + +test_expect_success 'setup pw_sparse for path-walk comparison' ' + git init pw_sparse && + mkdir -p pw_sparse/inc/sub pw_sparse/exc/sub && + + for n in 1 2 + do + echo "inc $n" >pw_sparse/inc/file$n && + echo "inc sub $n" >pw_sparse/inc/sub/file$n && + echo "exc $n" >pw_sparse/exc/file$n && + echo "exc sub $n" >pw_sparse/exc/sub/file$n && + echo "root $n" >pw_sparse/root$n || return 1 + done && + + git -C pw_sparse add . && + git -C pw_sparse commit -m "first" && + + echo "inc 1 modified" >pw_sparse/inc/file1 && + echo "exc 1 modified" >pw_sparse/exc/file1 && + echo "root 1 modified" >pw_sparse/root1 && + git -C pw_sparse add . && + git -C pw_sparse commit -m "second" && + + # Cone-mode sparse pattern: include root + inc/ + printf "/*\n!/*/\n/inc/\n" | + git -C pw_sparse hash-object -w --stdin >sparse_oid +' + +test_expect_success 'sparse:oid with --path-walk produces same blobs' ' + oid=$(cat sparse_oid) && + + git -C pw_sparse pack-objects --revs --stdout \ + --filter=sparse:oid=$oid >standard.pack <<-EOF && + HEAD + EOF + git -C pw_sparse index-pack ../standard.pack && + git -C pw_sparse verify-pack -v ../standard.pack >standard_verify && + + git -C pw_sparse pack-objects --revs --stdout \ + --path-walk --filter=sparse:oid=$oid >pathwalk.pack <<-EOF && + HEAD + EOF + git -C pw_sparse index-pack ../pathwalk.pack && + git -C pw_sparse verify-pack -v ../pathwalk.pack >pathwalk_verify && + + # Blobs must match exactly + grep -E "^[0-9a-f]{40} blob" standard_verify | + awk "{print \$1}" | sort >standard_blobs && + grep -E "^[0-9a-f]{40} blob" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_blobs && + test_cmp standard_blobs pathwalk_blobs && + + # Commits must match exactly + grep -E "^[0-9a-f]{40} commit" standard_verify | + awk "{print \$1}" | sort >standard_commits && + grep -E "^[0-9a-f]{40} commit" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_commits && + test_cmp standard_commits pathwalk_commits +' + +test_expect_success 'sparse:oid with --path-walk includes all trees' ' + # The sparse:oid filter restricts only blobs, not trees. + # Both standard and path-walk should include the same trees. + grep -E "^[0-9a-f]{40} tree" standard_verify | + awk "{print \$1}" | sort >standard_trees && + grep -E "^[0-9a-f]{40} tree" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_trees && + + test_cmp standard_trees pathwalk_trees +' + +# Test the edge case where the same tree/blob OID appears at both an +# in-cone and out-of-cone path. When sibling directories have identical +# contents, they share a tree OID. The path-walk defers marking objects +# SEEN until after checking sparse patterns, so an object at an out-of-cone +# path can still be discovered at an in-cone path. + +test_expect_success 'setup pw_shared for shared OID across cone boundary' ' + git init pw_shared && + mkdir pw_shared/aaa pw_shared/zzz && + echo "shared content" >pw_shared/aaa/file && + echo "shared content" >pw_shared/zzz/file && + echo "root file" >pw_shared/rootfile && + git -C pw_shared add . && + git -C pw_shared commit -m "aaa and zzz share tree OID" && + + # Verify they share a tree OID + aaa_tree=$(git -C pw_shared rev-parse HEAD:aaa) && + zzz_tree=$(git -C pw_shared rev-parse HEAD:zzz) && + test "$aaa_tree" = "$zzz_tree" && + + # Cone pattern: include root + zzz/ (not aaa/) + printf "/*\n!/*/\n/zzz/\n" | + git -C pw_shared hash-object -w --stdin >shared_sparse_oid +' + +test_expect_success 'shared tree OID: --path-walk blobs match standard' ' + oid=$(cat shared_sparse_oid) && + + git -C pw_shared pack-objects --revs --stdout \ + --filter=sparse:oid=$oid >shared_std.pack <<-EOF && + HEAD + EOF + git -C pw_shared index-pack ../shared_std.pack && + git -C pw_shared verify-pack -v ../shared_std.pack >shared_std_verify && + + git -C pw_shared pack-objects --revs --stdout \ + --path-walk --filter=sparse:oid=$oid >shared_pw.pack <<-EOF && + HEAD + EOF + git -C pw_shared index-pack ../shared_pw.pack && + git -C pw_shared verify-pack -v ../shared_pw.pack >shared_pw_verify && + + grep -E "^[0-9a-f]{40} blob" shared_std_verify | + awk "{print \$1}" | sort >shared_std_blobs && + grep -E "^[0-9a-f]{40} blob" shared_pw_verify | + awk "{print \$1}" | sort >shared_pw_blobs && + test_cmp shared_std_blobs shared_pw_blobs +' + test_done diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index 94f35ce1901671..d2ea68e065304d 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -15,6 +15,14 @@ test_expect_success 'backfill rejects unexpected arguments' ' test_grep "unrecognized argument: --unexpected-arg" err ' +test_expect_success 'backfill rejects incompatible filter options' ' + test_must_fail git backfill --objects --filter=tree:1 2>err && + test_grep "cannot backfill with these filter options" err && + + test_must_fail git backfill --objects --filter=blob:limit=10m 2>err && + test_grep "cannot backfill with blob size limits" err +' + # We create objects in the 'src' repo. test_expect_success 'setup repo for object creation' ' echo "{print \$1}" >print_1.awk && diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 56bd1e3c5bec97..643d630d2a96ff 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -206,6 +206,43 @@ test_expect_success 'base & topic, sparse' ' test_cmp_sorted expect out ' +test_expect_success 'base & topic, sparse, no tree pruning' ' + cat >patterns <<-EOF && + /* + !/*/ + /left/ + EOF + + test-tool path-walk --stdin-pl --no-pl-sparse-trees \ + -- base topic out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:blob:a:$(git rev-parse base~2:a) + 3:tree:a/:$(git rev-parse base:a) + 4:tree:left/:$(git rev-parse base:left) + 4:tree:left/:$(git rev-parse base~2:left) + 5:blob:left/b:$(git rev-parse base~2:left/b) + 5:blob:left/b:$(git rev-parse base:left/b) + 6:tree:right/:$(git rev-parse topic:right) + 6:tree:right/:$(git rev-parse base~1:right) + 6:tree:right/:$(git rev-parse base~2:right) + blobs:3 + commits:4 + tags:0 + trees:10 + EOF + + test_cmp_sorted expect out +' + test_expect_success 'topic only' ' test-tool path-walk -- topic >out && @@ -415,4 +452,483 @@ test_expect_success 'trees are reported exactly once' ' test_line_count = 1 out-filtered ' +test_expect_success 'all, blob:none filter' ' + test-tool path-walk --filter=blob:none -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:tree:a/:$(git rev-parse base:a) + 5:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 6:tree:left/:$(git rev-parse base:left) + 6:tree:left/:$(git rev-parse base~2:left) + 7:tree:right/:$(git rev-parse topic:right) + 7:tree:right/:$(git rev-parse base~1:right) + 7:tree:right/:$(git rev-parse base~2:right) + blobs:2 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only, blob:none filter' ' + test-tool path-walk --filter=blob:none -- topic >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:tree:left/:$(git rev-parse base~2:left) + 3:tree:right/:$(git rev-parse topic:right) + 3:tree:right/:$(git rev-parse base~1:right) + 3:tree:right/:$(git rev-parse base~2:right) + blobs:0 + commits:3 + tags:0 + trees:7 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, blob:limit=0 filter' ' + test-tool path-walk --filter=blob:limit=0 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:tree:a/:$(git rev-parse base:a) + 5:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 6:tree:left/:$(git rev-parse base:left) + 6:tree:left/:$(git rev-parse base~2:left) + 7:tree:right/:$(git rev-parse topic:right) + 7:tree:right/:$(git rev-parse base~1:right) + 7:tree:right/:$(git rev-parse base~2:right) + blobs:2 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, blob:limit=3 filter' ' + test-tool path-walk --filter=blob:limit=3 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:blob:a:$(git rev-parse base~2:a) + 5:tree:a/:$(git rev-parse base:a) + 6:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 7:tree:left/:$(git rev-parse base:left) + 7:tree:left/:$(git rev-parse base~2:left) + 8:blob:left/b:$(git rev-parse base~2:left/b) + 9:tree:right/:$(git rev-parse topic:right) + 9:tree:right/:$(git rev-parse base~1:right) + 9:tree:right/:$(git rev-parse base~2:right) + 10:blob:right/c:$(git rev-parse base~2:right/c) + 11:blob:right/d:$(git rev-parse base~1:right/d) + blobs:6 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, tree:0 filter' ' + test-tool path-walk --filter=tree:0 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{tree}) + 3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2) + blobs:2 + commits:4 + tags:7 + trees:2 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only, tree:0 filter' ' + test-tool path-walk --filter=tree:0 -- topic >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + blobs:0 + commits:3 + tags:0 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'tree:1 filter is rejected' ' + test_must_fail test-tool path-walk --filter=tree:1 -- --all 2>err && + test_grep "tree:1 filter not supported by the path-walk API" err +' + +test_expect_success 'all, object:type=commit filter' ' + test-tool path-walk --filter=object:type=commit -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + blobs:0 + commits:4 + tags:0 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, object:type=tag filter' ' + test-tool path-walk --filter=object:type=tag -- --all >out && + + cat >expect <<-EOF && + 0:tag:/tags:$(git rev-parse refs/tags/first) + 0:tag:/tags:$(git rev-parse refs/tags/second.1) + 0:tag:/tags:$(git rev-parse refs/tags/second.2) + 0:tag:/tags:$(git rev-parse refs/tags/third) + 0:tag:/tags:$(git rev-parse refs/tags/fourth) + 0:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 0:tag:/tags:$(git rev-parse refs/tags/blob-tag) + blobs:0 + commits:0 + tags:7 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, object:type=tree filter' ' + test-tool path-walk --filter=object:type=tree -- --all >out && + + cat >expect <<-EOF && + 0:tree::$(git rev-parse topic^{tree}) + 0:tree::$(git rev-parse base^{tree}) + 0:tree::$(git rev-parse base~1^{tree}) + 0:tree::$(git rev-parse base~2^{tree}) + 0:tree::$(git rev-parse refs/tags/tree-tag^{}) + 0:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 1:tree:a/:$(git rev-parse base:a) + 2:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 3:tree:left/:$(git rev-parse base:left) + 3:tree:left/:$(git rev-parse base~2:left) + 4:tree:right/:$(git rev-parse topic:right) + 4:tree:right/:$(git rev-parse base~1:right) + 4:tree:right/:$(git rev-parse base~2:right) + blobs:0 + commits:0 + tags:0 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, object:type=blob filter' ' + test-tool path-walk --filter=object:type=blob -- --all >out && + + cat >expect <<-EOF && + 0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 1:blob:a:$(git rev-parse base~2:a) + 2:blob:left/b:$(git rev-parse base:left/b) + 2:blob:left/b:$(git rev-parse base~2:left/b) + 3:blob:right/c:$(git rev-parse base~2:right/c) + 3:blob:right/c:$(git rev-parse topic:right/c) + 4:blob:right/d:$(git rev-parse base~1:right/d) + blobs:8 + commits:0 + tags:0 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, combine:blob:none+tree:0 filter' ' + test-tool path-walk \ + --filter=combine:blob:none+tree:0 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag^{tree}) + 3:tree:/tagged-trees:$(git rev-parse refs/tags/tree-tag2) + blobs:2 + commits:4 + tags:7 + trees:2 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, combine:object:type=blob+blob:limit=3 filter' ' + test-tool path-walk \ + --filter=combine:object:type=blob+blob:limit=3 \ + -- --all >out && + + cat >expect <<-EOF && + 0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 0:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 1:blob:a:$(git rev-parse base~2:a) + 2:blob:left/b:$(git rev-parse base~2:left/b) + 3:blob:right/c:$(git rev-parse base~2:right/c) + 4:blob:right/d:$(git rev-parse base~1:right/d) + blobs:6 + commits:0 + tags:0 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, combine of disjoint object:types is empty' ' + test-tool path-walk \ + --filter=combine:object:type=blob+object:type=tree \ + -- --all >out && + + cat >expect <<-EOF && + blobs:0 + commits:0 + tags:0 + trees:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'combine: rejects unsupported subfilters' ' + test_must_fail test-tool path-walk \ + --filter=combine:tree:1+blob:none -- --all 2>err && + test_grep "tree:1 filter not supported by the path-walk API" err +' + +test_expect_success 'setup sparse filter blob' ' + # Cone-mode patterns: include root, exclude all dirs, include left/ + cat >patterns <<-\EOF && + /* + !/*/ + /left/ + EOF + sparse_oid=$(git hash-object -w -t blob patterns) +' + +test_expect_success 'all, sparse:oid filter' ' + test-tool path-walk --filter=sparse:oid=$sparse_oid -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:blob:a:$(git rev-parse base~2:a) + 5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) + 6:tree:a/:$(git rev-parse base:a) + 7:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 8:tree:left/:$(git rev-parse base:left) + 8:tree:left/:$(git rev-parse base~2:left) + 9:blob:left/b:$(git rev-parse base~2:left/b) + 9:blob:left/b:$(git rev-parse base:left/b) + 10:tree:right/:$(git rev-parse topic:right) + 10:tree:right/:$(git rev-parse base~1:right) + 10:tree:right/:$(git rev-parse base~2:right) + blobs:6 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only, sparse:oid filter' ' + test-tool path-walk --filter=sparse:oid=$sparse_oid -- topic >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:blob:a:$(git rev-parse base~2:a) + 3:tree:left/:$(git rev-parse base~2:left) + 4:blob:left/b:$(git rev-parse base~2:left/b) + 5:tree:right/:$(git rev-parse topic:right) + 5:tree:right/:$(git rev-parse base~1:right) + 5:tree:right/:$(git rev-parse base~2:right) + blobs:2 + commits:3 + tags:0 + trees:7 + EOF + + test_cmp_sorted expect out +' + +# Demonstrate the SEEN flag ordering issue: when the same tree/blob OID +# appears at two sibling paths where one is in-cone and the other is +# out-of-cone, the path-walk must still discover blobs at the in-cone +# path even when the shared tree OID was first encountered out-of-cone. +# Since sparse:oid includes all trees, the out-of-cone tree (aaa/) is +# walked first, and its blob is skipped. The path-walk then re-walks +# the same tree OID at the in-cone path (zzz/) to find the blob there. + +test_expect_success 'setup shared tree OID across cone boundary' ' + git checkout --orphan shared-tree && + git rm -rf . && + mkdir aaa zzz && + echo "shared content" >aaa/file && + echo "shared content" >zzz/file && + echo "root file" >rootfile && + git add aaa zzz rootfile && + git commit -m "aaa and zzz have same tree OID" && + + # Verify they really share a tree OID + aaa_tree=$(git rev-parse HEAD:aaa) && + zzz_tree=$(git rev-parse HEAD:zzz) && + test "$aaa_tree" = "$zzz_tree" && + + # Cone pattern: include root + zzz/ (not aaa/) + cat >shared-patterns <<-\EOF && + /* + !/*/ + /zzz/ + EOF + shared_sparse_oid=$(git hash-object -w -t blob shared-patterns) +' + +test_expect_success 'sparse:oid with shared tree OID across cone boundary' ' + test-tool path-walk \ + --filter=sparse:oid=$shared_sparse_oid \ + -- shared-tree >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse shared-tree) + 1:tree::$(git rev-parse shared-tree^{tree}) + 2:blob:rootfile:$(git rev-parse shared-tree:rootfile) + 3:tree:aaa/:$(git rev-parse shared-tree:aaa) + 4:tree:zzz/:$(git rev-parse shared-tree:zzz) + 5:blob:zzz/file:$(git rev-parse shared-tree:zzz/file) + blobs:2 + commits:1 + tags:0 + trees:3 + EOF + + test_cmp_sorted expect out +' + test_done