Skip to content

Commit ad50624

Browse files
committed
Turns out that using the resource cache is too costly
It's a bit unclear why that is, but let's try to just read objects from the ODB instead. Unfortunately, even without going through a resource cache, we still have 50% performance loss. Turns out the pack-cache has this much of an influence, and we changed the variable names to `GIX_*` instead of `GITOXIDE_*`.
1 parent 2206723 commit ad50624

File tree

3 files changed

+62
-94
lines changed

3 files changed

+62
-94
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@ baseline-atomic: ## run very slow tests that single-step through all commits
1111

1212
test: ## run all tests with cargo
1313
RUST_BACKTRACE=1 cargo test --test crates-index-diff
14-
GITOXIDE_PACK_CACHE_MEMORY=1g RUST_BACKTRACE=1 cargo test --test baseline --release --features max-performance
14+
GIX_PACK_CACHE_MEMORY=1g RUST_BACKTRACE=1 cargo test --test baseline --release --features max-performance
1515

src/index/diff/delegate.rs

Lines changed: 60 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,14 @@ use hashbrown::raw::RawTable;
66
use std::hash::Hasher;
77
use std::ops::Deref;
88

9+
#[derive(Default)]
910
pub(crate) struct Delegate {
1011
changes: Vec<Change>,
11-
resource_cache: gix::diff::blob::Platform,
1212
/// All changes that happen within a file, along the line-number it happens in .
1313
per_file_changes: Vec<(usize, Change)>,
1414
err: Option<Error>,
1515
}
1616

17-
impl Delegate {
18-
pub(crate) fn new(resource_cache: gix::diff::blob::Platform) -> Self {
19-
Delegate {
20-
resource_cache,
21-
changes: Default::default(),
22-
per_file_changes: Default::default(),
23-
err: None,
24-
}
25-
}
26-
}
27-
2817
impl Delegate {
2918
pub fn handle(
3019
&mut self,
@@ -76,82 +65,73 @@ impl Delegate {
7665
});
7766
}
7867
}
79-
Modification { entry_mode, .. } => {
68+
Modification {
69+
entry_mode,
70+
previous_id,
71+
id,
72+
..
73+
} => {
8074
if entry_mode.is_blob() {
81-
self.resource_cache.clear_resource_cache();
82-
let platform = change.diff(&mut self.resource_cache)?;
83-
if let Some((old, new)) = platform.resource_cache.resources() {
84-
let mut old_lines = AHashSet::with_capacity(1024);
85-
let location = change.location;
86-
for (number, line) in old
87-
.data
88-
.as_slice()
89-
.expect("present in modification")
90-
.lines()
91-
.enumerate()
92-
{
93-
old_lines.insert(Line(number, line));
94-
}
75+
let old = previous_id.object()?.into_blob();
76+
let new = id.object()?.into_blob();
77+
let mut old_lines = AHashSet::with_capacity(1024);
78+
let location = change.location;
79+
for (number, line) in old.data.lines().enumerate() {
80+
old_lines.insert(Line(number, line));
81+
}
9582

96-
// A RawTable is used to represent a Checksum -> CrateVersion map
97-
// because the checksum is already stored in the CrateVersion
98-
// and we want to avoid storing the checksum twice for performance reasons
99-
let mut new_versions = RawTable::with_capacity(old_lines.len().min(1024));
100-
let hasher = RandomState::new();
83+
// A RawTable is used to represent a Checksum -> CrateVersion map
84+
// because the checksum is already stored in the CrateVersion
85+
// and we want to avoid storing the checksum twice for performance reasons
86+
let mut new_versions = RawTable::with_capacity(old_lines.len().min(1024));
87+
let hasher = RandomState::new();
10188

102-
for (number, line) in new
103-
.data
104-
.as_slice()
105-
.expect("present in modification")
106-
.lines()
107-
.enumerate()
108-
{
109-
// first quickly check if the exact same line is already present in this file in that case we don't need to do anything else
110-
if old_lines.remove(&Line(number, line)) {
111-
continue;
112-
}
113-
// no need to check if the checksum already exists in the hashmap
114-
// as each checksum appears only once
115-
let new_version = version_from_json_line(line, location)?;
116-
new_versions.insert(
117-
hasher.hash_one(new_version.checksum),
118-
(number, new_version),
119-
|rehashed| hasher.hash_one(rehashed.1.checksum),
120-
);
89+
for (number, line) in new.data.lines().enumerate() {
90+
// first quickly check if the exact same line is already present in this file in that case we don't need to do anything else
91+
if old_lines.remove(&Line(number, line)) {
92+
continue;
12193
}
94+
// no need to check if the checksum already exists in the hashmap
95+
// as each checksum appears only once
96+
let new_version = version_from_json_line(line, location)?;
97+
new_versions.insert(
98+
hasher.hash_one(new_version.checksum),
99+
(number, new_version),
100+
|rehashed| hasher.hash_one(rehashed.1.checksum),
101+
);
102+
}
122103

123-
for line in old_lines.drain() {
124-
let old_version = version_from_json_line(&line, location)?;
125-
let new_version = new_versions
126-
.remove_entry(hasher.hash_one(old_version.checksum), |version| {
127-
version.1.checksum == old_version.checksum
128-
});
129-
match new_version {
130-
Some((_, new_version)) => {
131-
let change = match (old_version.yanked, new_version.yanked) {
132-
(true, false) => Change::Unyanked(new_version),
133-
(false, true) => Change::Yanked(new_version),
134-
_ => continue,
135-
};
136-
self.per_file_changes.push((line.0, change))
137-
}
138-
None => self
139-
.per_file_changes
140-
.push((line.0, Change::VersionDeleted(old_version))),
104+
for line in old_lines.drain() {
105+
let old_version = version_from_json_line(&line, location)?;
106+
let new_version = new_versions
107+
.remove_entry(hasher.hash_one(old_version.checksum), |version| {
108+
version.1.checksum == old_version.checksum
109+
});
110+
match new_version {
111+
Some((_, new_version)) => {
112+
let change = match (old_version.yanked, new_version.yanked) {
113+
(true, false) => Change::Unyanked(new_version),
114+
(false, true) => Change::Yanked(new_version),
115+
_ => continue,
116+
};
117+
self.per_file_changes.push((line.0, change))
141118
}
119+
None => self
120+
.per_file_changes
121+
.push((line.0, Change::VersionDeleted(old_version))),
142122
}
143-
for (number, version) in new_versions.drain() {
144-
let change = if version.yanked {
145-
Change::AddedAndYanked(version)
146-
} else {
147-
Change::Added(version)
148-
};
149-
self.per_file_changes.push((number, change));
150-
}
151-
self.per_file_changes.sort_by_key(|t| t.0);
152-
self.changes
153-
.extend(self.per_file_changes.drain(..).map(|t| t.1));
154123
}
124+
for (number, version) in new_versions.drain() {
125+
let change = if version.yanked {
126+
Change::AddedAndYanked(version)
127+
} else {
128+
Change::Added(version)
129+
};
130+
self.per_file_changes.push((number, change));
131+
}
132+
self.per_file_changes.sort_by_key(|t| t.0);
133+
self.changes
134+
.extend(self.per_file_changes.drain(..).map(|t| t.1));
155135
}
156136
}
157137
}

src/index/diff/mod.rs

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ pub enum Error {
3434
RevParse(#[from] gix::revision::spec::parse::Error),
3535
#[error(transparent)]
3636
DiffRewrites(#[from] gix::diff::new_rewrites::Error),
37-
#[error(transparent)]
38-
DiffResourceCache(#[from] gix::diff::resource_cache::Error),
3937
#[error("Couldn't find blob that showed up when diffing trees")]
4038
FindObject(#[from] gix::object::find::existing::Error),
4139
#[error("Couldn't get the tree of a commit for diffing purposes")]
@@ -240,24 +238,14 @@ impl Index {
240238
};
241239
let from = into_tree(from.into())?;
242240
let to = into_tree(to.into())?;
243-
let mut delegate = Delegate::new(self.resource_cache()?);
241+
let mut delegate = Delegate::default();
244242
from.changes()?
245243
.track_filename()
246244
.track_rewrites(None)
247245
.for_each_to_obtain_tree(&to, |change| delegate.handle(change))?;
248246
delegate.into_result()
249247
}
250248

251-
fn resource_cache(&self) -> Result<gix::diff::blob::Platform, Error> {
252-
Ok(gix::diff::resource_cache(
253-
&self.repo,
254-
&gix::index::State::new(self.repo.object_hash()),
255-
gix::diff::blob::pipeline::Mode::ToGit,
256-
gix::worktree::stack::state::attributes::Source::IdMapping,
257-
Default::default(),
258-
)?)
259-
}
260-
261249
/// Similar to [`Self::changes()`], but requires `ancestor_commit` and `current_commit` objects to be provided
262250
/// with `ancestor_commit` being in the ancestry of `current_commit`.
263251
///

0 commit comments

Comments
 (0)