Skip to content

make fs::walkdir_sorted_new() sort entries by paths literally (#1928) #1931

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion gix-features/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ parallel = ["dep:crossbeam-channel", "dep:parking_lot"]
once_cell = ["dep:once_cell"]
## Makes facilities of the `walkdir` crate partially available.
## In conjunction with the **parallel** feature, directory walking will be parallel instead behind a compatible interface.
walkdir = ["dep:walkdir", "dep:gix-utils"]
walkdir = ["dep:walkdir", "dep:gix-path", "dep:gix-utils"]
#* an in-memory unidirectional pipe using `bytes` as efficient transfer mechanism.
io-pipe = ["dep:bytes"]
## provide a proven and fast `crc32` implementation.
Expand Down Expand Up @@ -106,6 +106,7 @@ required-features = ["io-pipe"]
gix-trace = { version = "^0.1.12", path = "../gix-trace" }

# for walkdir
gix-path = { version = "^0.10.15", path = "../gix-path", optional = true }
gix-utils = { version = "^0.2.0", path = "../gix-utils", optional = true }

# 'parallel' feature
Expand Down
36 changes: 24 additions & 12 deletions gix-features/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
//! along with runtime costs for maintaining a global [`rayon`](https://docs.rs/rayon) thread pool.
//!
//! For information on how to use the [`WalkDir`] type, have a look at
//! * [`jwalk::WalkDir`](https://docs.rs/jwalk/0.5.1/jwalk/type.WalkDir.html) if `parallel` feature is enabled
//! * [walkdir::WalkDir](https://docs.rs/walkdir/2.3.1/walkdir/struct.WalkDir.html) otherwise
// TODO: Move all this to `gix-fs` in a breaking change.

#[cfg(feature = "walkdir")]
mod shared {
Expand Down Expand Up @@ -217,19 +216,32 @@ pub mod walkdir {
///
/// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option.
pub fn walkdir_sorted_new(root: &Path, _: Parallelism, precompose_unicode: bool) -> WalkDir {
fn ft_to_number(ft: std::fs::FileType) -> usize {
if ft.is_file() {
1
} else {
2
}
}
WalkDir {
inner: WalkDirImpl::new(root)
.sort_by(|a, b| {
ft_to_number(a.file_type())
.cmp(&ft_to_number(b.file_type()))
.then_with(|| a.file_name().cmp(b.file_name()))
let storage_a;
let storage_b;
let a_name = match gix_path::os_str_into_bstr(a.file_name()) {
Ok(f) => f,
Err(_) => {
storage_a = a.file_name().to_string_lossy();
storage_a.as_ref().into()
}
};
let b_name = match gix_path::os_str_into_bstr(b.file_name()) {
Ok(f) => f,
Err(_) => {
storage_b = b.file_name().to_string_lossy();
storage_b.as_ref().into()
}
};
// "common." < "common/" < "common0"
let common = a_name.len().min(b_name.len());
a_name[..common].cmp(&b_name[..common]).then_with(|| {
let a = a_name.get(common).or_else(|| a.file_type().is_dir().then_some(&b'/'));
let b = b_name.get(common).or_else(|| b.file_type().is_dir().then_some(&b'/'));
a.cmp(&b)
})
})
.into(),
precompose_unicode,
Expand Down
Binary file not shown.
17 changes: 17 additions & 0 deletions gix-ref/tests/fixtures/make_repo_for_1928_repro.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env bash
set -eu -o pipefail

git init -q

mkdir -p .git/refs/heads/a
cat <<EOF >.git/packed-refs
# pack-refs with: peeled fully-peeled sorted
1111111111111111111111111111111111111111 refs/heads/a-
2222222222222222222222222222222222222222 refs/heads/a/b
3333333333333333333333333333333333333333 refs/heads/a0
EOF

mkdir -p .git/refs/heads/a
echo aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa >.git/refs/heads/a-
echo bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb >.git/refs/heads/a/b
echo cccccccccccccccccccccccccccccccccccccccc >.git/refs/heads/a0
49 changes: 41 additions & 8 deletions gix-ref/tests/refs/file/store/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ mod with_namespace {
.map(|r: gix_ref::Reference| r.name)
.collect::<Vec<_>>();
let expected_namespaced_refs = vec![
"refs/namespaces/bar/refs/multi-link",
"refs/namespaces/bar/refs/heads/multi-link-target1",
"refs/namespaces/bar/refs/multi-link",
"refs/namespaces/bar/refs/remotes/origin/multi-link-target3",
"refs/namespaces/bar/refs/tags/multi-link-target2",
];
Expand All @@ -50,8 +50,8 @@ mod with_namespace {
.map(|r| r.name.into_inner())
.collect::<Vec<_>>(),
[
"refs/namespaces/bar/refs/multi-link",
"refs/namespaces/bar/refs/heads/multi-link-target1",
"refs/namespaces/bar/refs/multi-link",
"refs/namespaces/bar/refs/tags/multi-link-target2"
]
);
Expand Down Expand Up @@ -149,8 +149,8 @@ mod with_namespace {
let packed = ns_store.open_packed_buffer()?;

let expected_refs = vec![
"refs/multi-link",
"refs/heads/multi-link-target1",
"refs/multi-link",
"refs/remotes/origin/multi-link-target3",
"refs/tags/multi-link-target2",
];
Expand Down Expand Up @@ -198,8 +198,8 @@ mod with_namespace {
.map(|r| r.name.into_inner())
.collect::<Vec<_>>(),
[
"refs/multi-link",
"refs/heads/multi-link-target1",
"refs/multi-link",
"refs/tags/multi-link-target2",
],
"loose iterators have namespace support as well"
Expand All @@ -214,8 +214,8 @@ mod with_namespace {
.map(|r| r.name.into_inner())
.collect::<Vec<_>>(),
[
"refs/namespaces/bar/refs/multi-link",
"refs/namespaces/bar/refs/heads/multi-link-target1",
"refs/namespaces/bar/refs/multi-link",
"refs/namespaces/bar/refs/tags/multi-link-target2",
"refs/namespaces/foo/refs/remotes/origin/HEAD"
],
Expand Down Expand Up @@ -291,14 +291,14 @@ fn loose_iter_with_broken_refs() -> crate::Result {
ref_paths,
vec![
"d1",
"loop-a",
"loop-b",
"multi-link",
"heads/A",
"heads/d1",
"heads/dt1",
"heads/main",
"heads/multi-link-target1",
"loop-a",
"loop-b",
"multi-link",
"remotes/origin/HEAD",
"remotes/origin/main",
"remotes/origin/multi-link-target3",
Expand Down Expand Up @@ -464,6 +464,39 @@ fn overlay_iter_reproduce_1850() -> crate::Result {
Ok(())
}

#[test]
fn overlay_iter_reproduce_1928() -> crate::Result {
let store = store_at("make_repo_for_1928_repro.sh")?;
let ref_names = store
.iter()?
.all()?
.map(|r| r.map(|r| (r.name.as_bstr().to_owned(), r.target)))
.collect::<Result<Vec<_>, _>>()?;
insta::assert_debug_snapshot!(ref_names, @r#"
[
(
"refs/heads/a-",
Object(
Sha1(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa),
),
),
(
"refs/heads/a/b",
Object(
Sha1(bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb),
),
),
(
"refs/heads/a0",
Object(
Sha1(cccccccccccccccccccccccccccccccccccccccc),
),
),
]
"#);
Ok(())
}

#[test]
fn overlay_iter_with_prefix_wont_allow_absolute_paths() -> crate::Result {
let store = store_with_packed_refs()?;
Expand Down
4 changes: 2 additions & 2 deletions gix-submodule/tests/file/baseline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ fn common_values_and_names_by_path() -> crate::Result {
"recursive-clone/submodule/.gitmodules",
"relative-clone/.gitmodules",
"relative-clone/submodule/.gitmodules",
"super/.gitmodules",
"super/submodule/.gitmodules",
"super-clone/.gitmodules",
"super-clone/submodule/.gitmodules",
"super/.gitmodules",
"super/submodule/.gitmodules",
"top-only-clone/.gitmodules"
]
.into_iter()
Expand Down
2 changes: 1 addition & 1 deletion gix/tests/gix/repository/reference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ mod iter_references {
"refs/heads/d1",
"refs/heads/dt1",
"refs/heads/main",
"refs/heads/multi-link-target1",
"refs/loop-a",
"refs/loop-b",
"refs/multi-link",
"refs/heads/multi-link-target1",
"refs/remotes/origin/HEAD",
"refs/remotes/origin/main",
"refs/remotes/origin/multi-link-target3",
Expand Down
Loading