diff --git a/compiler/rustc_query_impl/src/on_disk_cache.rs b/compiler/rustc_query_impl/src/on_disk_cache.rs index 8e4b3269402e..5a915933dc5b 100644 --- a/compiler/rustc_query_impl/src/on_disk_cache.rs +++ b/compiler/rustc_query_impl/src/on_disk_cache.rs @@ -713,7 +713,7 @@ impl<'a, 'tcx> Decodable> for Span { let len = BytePos::decode(decoder); let file_lo = decoder.file_index_to_file(file_lo_index); - let lo = file_lo.lines[line_lo - 1] + col_lo; + let lo = file_lo.lines(|lines| lines[line_lo - 1] + col_lo); let hi = lo + len; Span::new(lo, hi, ctxt, parent) diff --git a/compiler/rustc_query_system/src/ich/impls_syntax.rs b/compiler/rustc_query_system/src/ich/impls_syntax.rs index acf2990b6434..1fa085926767 100644 --- a/compiler/rustc_query_system/src/ich/impls_syntax.rs +++ b/compiler/rustc_query_system/src/ich/impls_syntax.rs @@ -69,7 +69,7 @@ impl<'a> HashStable> for SourceFile { external_src: _, start_pos, end_pos: _, - ref lines, + lines: _, ref multibyte_chars, ref non_narrow_chars, ref normalized_pos, @@ -79,11 +79,15 @@ impl<'a> HashStable> for SourceFile { src_hash.hash_stable(hcx, hasher); - // We only hash the relative position within this source_file - lines.len().hash_stable(hcx, hasher); - for &line in lines.iter() { - stable_byte_pos(line, start_pos).hash_stable(hcx, hasher); - } + // We are always in `Lines` form by the time we reach here. + assert!(self.lines.borrow().is_lines()); + self.lines(|lines| { + // We only hash the relative position within this source_file + lines.len().hash_stable(hcx, hasher); + for &line in lines.iter() { + stable_byte_pos(line, start_pos).hash_stable(hcx, hasher); + } + }); // We only hash the relative position within this source_file multibyte_chars.len().hash_stable(hcx, hasher); diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index adf5a7440480..6805d212f0b2 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -1222,6 +1222,52 @@ impl DebuggerVisualizerFile { } } +#[derive(Clone)] +pub enum SourceFileLines { + /// The source file lines, in decoded (random-access) form. + Lines(Vec), + + /// The source file lines, in undecoded difference list form. + Diffs(SourceFileDiffs), +} + +impl SourceFileLines { + pub fn is_lines(&self) -> bool { + matches!(self, SourceFileLines::Lines(_)) + } +} + +/// The source file lines in difference list form. This matches the form +/// used within metadata, which saves space by exploiting the fact that the +/// lines list is sorted and individual lines are usually not that long. +/// +/// We read it directly from metadata and only decode it into `Lines` form +/// when necessary. This is a significant performance win, especially for +/// small crates where very little of `std`'s metadata is used. +#[derive(Clone)] +pub struct SourceFileDiffs { + /// Position of the first line. Note that this is always encoded as a + /// `BytePos` because it is often much larger than any of the + /// differences. + line_start: BytePos, + + /// Always 1, 2, or 4. Always as small as possible, while being big + /// enough to hold the length of the longest line in the source file. + /// The 1 case is by far the most common. + bytes_per_diff: usize, + + /// The number of diffs encoded in `raw_diffs`. Always one less than + /// the number of lines in the source file. + num_diffs: usize, + + /// The diffs in "raw" form. Each segment of `bytes_per_diff` length + /// encodes one little-endian diff. Note that they aren't LEB128 + /// encoded. This makes for much faster decoding. Besides, the + /// bytes_per_diff==1 case is by far the most common, and LEB128 + /// encoding has no effect on that case. + raw_diffs: Vec, +} + /// A single source in the [`SourceMap`]. #[derive(Clone)] pub struct SourceFile { @@ -1241,7 +1287,7 @@ pub struct SourceFile { /// The end position of this source in the `SourceMap`. pub end_pos: BytePos, /// Locations of lines beginnings in the source code. - pub lines: Vec, + pub lines: Lock, /// Locations of multi-byte characters in the source code. pub multibyte_chars: Vec, /// Width of characters that are not narrow in the source code. @@ -1262,64 +1308,66 @@ impl Encodable for SourceFile { s.emit_struct_field("start_pos", false, |s| self.start_pos.encode(s))?; s.emit_struct_field("end_pos", false, |s| self.end_pos.encode(s))?; s.emit_struct_field("lines", false, |s| { - let lines = &self.lines[..]; - // Store the length. - s.emit_u32(lines.len() as u32)?; - - if !lines.is_empty() { - // In order to preserve some space, we exploit the fact that - // the lines list is sorted and individual lines are - // probably not that long. Because of that we can store lines - // as a difference list, using as little space as possible - // for the differences. But note that the first line is - // always encoded as a `BytePos` because its position is - // often much larger than any of the differences. - let max_line_length = if lines.len() == 1 { - 0 - } else { - lines - .array_windows() - .map(|&[fst, snd]| snd - fst) - .map(|bp| bp.to_usize()) - .max() - .unwrap() - }; - - let bytes_per_diff: u8 = match max_line_length { - 0..=0xFF => 1, - 0x100..=0xFFFF => 2, - _ => 4, - }; - - // Encode the number of bytes used per diff. - bytes_per_diff.encode(s)?; - - // Encode the first element. - lines[0].encode(s)?; - - let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst); - - match bytes_per_diff { - 1 => { - for diff in diff_iter { - (diff.0 as u8).encode(s)? + // We are always in `Lines` form by the time we reach here. + assert!(self.lines.borrow().is_lines()); + self.lines(|lines| { + // Store the length. + s.emit_u32(lines.len() as u32)?; + + // Compute and store the difference list. + if lines.len() != 0 { + let max_line_length = if lines.len() == 1 { + 0 + } else { + lines + .array_windows() + .map(|&[fst, snd]| snd - fst) + .map(|bp| bp.to_usize()) + .max() + .unwrap() + }; + + let bytes_per_diff: usize = match max_line_length { + 0..=0xFF => 1, + 0x100..=0xFFFF => 2, + _ => 4, + }; + + // Encode the number of bytes used per diff. + s.emit_u8(bytes_per_diff as u8)?; + + // Encode the first element. + lines[0].encode(s)?; + + // Encode the difference list. + let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst); + let num_diffs = lines.len() - 1; + let mut raw_diffs; + match bytes_per_diff { + 1 => { + raw_diffs = Vec::with_capacity(num_diffs); + for diff in diff_iter { + raw_diffs.push(diff.0 as u8); + } } - } - 2 => { - for diff in diff_iter { - (diff.0 as u16).encode(s)? + 2 => { + raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs); + for diff in diff_iter { + raw_diffs.extend_from_slice(&(diff.0 as u16).to_le_bytes()); + } } - } - 4 => { - for diff in diff_iter { - diff.0.encode(s)? + 4 => { + raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs); + for diff in diff_iter { + raw_diffs.extend_from_slice(&(diff.0 as u32).to_le_bytes()); + } } + _ => unreachable!(), } - _ => unreachable!(), + s.emit_raw_bytes(&raw_diffs)?; } - } - - Ok(()) + Ok(()) + }) })?; s.emit_struct_field("multibyte_chars", false, |s| self.multibyte_chars.encode(s))?; s.emit_struct_field("non_narrow_chars", false, |s| self.non_narrow_chars.encode(s))?; @@ -1336,36 +1384,27 @@ impl Decodable for SourceFile { let src_hash: SourceFileHash = Decodable::decode(d); let start_pos: BytePos = Decodable::decode(d); let end_pos: BytePos = Decodable::decode(d); - let lines: Vec = { + let lines = { let num_lines: u32 = Decodable::decode(d); - let mut lines = Vec::with_capacity(num_lines as usize); - if num_lines > 0 { // Read the number of bytes used per diff. - let bytes_per_diff: u8 = Decodable::decode(d); + let bytes_per_diff = d.read_u8() as usize; // Read the first element. - let mut line_start: BytePos = Decodable::decode(d); - lines.push(line_start); - - match bytes_per_diff { - 1 => lines.extend((1..num_lines).map(|_| { - line_start = line_start + BytePos(d.read_u8() as u32); - line_start - })), - 2 => lines.extend((1..num_lines).map(|_| { - line_start = line_start + BytePos(d.read_u16() as u32); - line_start - })), - 4 => lines.extend((1..num_lines).map(|_| { - line_start = line_start + BytePos(d.read_u32()); - line_start - })), - _ => unreachable!(), - } + let line_start: BytePos = Decodable::decode(d); + + // Read the difference list. + let num_diffs = num_lines as usize - 1; + let raw_diffs = d.read_raw_bytes(bytes_per_diff * num_diffs).to_vec(); + SourceFileLines::Diffs(SourceFileDiffs { + line_start, + bytes_per_diff, + num_diffs, + raw_diffs, + }) + } else { + SourceFileLines::Lines(vec![]) } - - lines }; let multibyte_chars: Vec = Decodable::decode(d); let non_narrow_chars: Vec = Decodable::decode(d); @@ -1381,7 +1420,7 @@ impl Decodable for SourceFile { // Unused - the metadata decoder will construct // a new SourceFile, filling in `external_src` properly external_src: Lock::new(ExternalSource::Unneeded), - lines, + lines: Lock::new(lines), multibyte_chars, non_narrow_chars, normalized_pos, @@ -1426,7 +1465,7 @@ impl SourceFile { external_src: Lock::new(ExternalSource::Unneeded), start_pos, end_pos: Pos::from_usize(end_pos), - lines, + lines: Lock::new(SourceFileLines::Lines(lines)), multibyte_chars, non_narrow_chars, normalized_pos, @@ -1435,10 +1474,68 @@ impl SourceFile { } } + pub fn lines(&self, f: F) -> R + where + F: FnOnce(&[BytePos]) -> R, + { + let mut guard = self.lines.borrow_mut(); + match &*guard { + SourceFileLines::Lines(lines) => f(lines), + SourceFileLines::Diffs(SourceFileDiffs { + mut line_start, + bytes_per_diff, + num_diffs, + raw_diffs, + }) => { + // Convert from "diffs" form to "lines" form. + let num_lines = num_diffs + 1; + let mut lines = Vec::with_capacity(num_lines); + lines.push(line_start); + + assert_eq!(*num_diffs, raw_diffs.len() / bytes_per_diff); + match bytes_per_diff { + 1 => { + lines.extend(raw_diffs.into_iter().map(|&diff| { + line_start = line_start + BytePos(diff as u32); + line_start + })); + } + 2 => { + lines.extend((0..*num_diffs).map(|i| { + let pos = bytes_per_diff * i; + let bytes = [raw_diffs[pos], raw_diffs[pos + 1]]; + let diff = u16::from_le_bytes(bytes); + line_start = line_start + BytePos(diff as u32); + line_start + })); + } + 4 => { + lines.extend((0..*num_diffs).map(|i| { + let pos = bytes_per_diff * i; + let bytes = [ + raw_diffs[pos], + raw_diffs[pos + 1], + raw_diffs[pos + 2], + raw_diffs[pos + 3], + ]; + let diff = u32::from_le_bytes(bytes); + line_start = line_start + BytePos(diff); + line_start + })); + } + _ => unreachable!(), + } + let res = f(&lines); + *guard = SourceFileLines::Lines(lines); + res + } + } + } + /// Returns the `BytePos` of the beginning of the current line. pub fn line_begin_pos(&self, pos: BytePos) -> BytePos { let line_index = self.lookup_line(pos).unwrap(); - self.lines[line_index] + self.lines(|lines| lines[line_index]) } /// Add externally loaded source. @@ -1495,8 +1592,8 @@ impl SourceFile { } let begin = { - let line = self.lines.get(line_number)?; - let begin: BytePos = *line - self.start_pos; + let line = self.lines(|lines| lines.get(line_number).copied())?; + let begin: BytePos = line - self.start_pos; begin.to_usize() }; @@ -1518,7 +1615,7 @@ impl SourceFile { } pub fn count_lines(&self) -> usize { - self.lines.len() + self.lines(|lines| lines.len()) } /// Finds the line containing the given position. The return value is the @@ -1526,11 +1623,11 @@ impl SourceFile { /// number. If the source_file is empty or the position is located before the /// first line, `None` is returned. pub fn lookup_line(&self, pos: BytePos) -> Option { - match self.lines.binary_search(&pos) { + self.lines(|lines| match lines.binary_search(&pos) { Ok(idx) => Some(idx), Err(0) => None, Err(idx) => Some(idx - 1), - } + }) } pub fn line_bounds(&self, line_index: usize) -> Range { @@ -1538,12 +1635,14 @@ impl SourceFile { return self.start_pos..self.end_pos; } - assert!(line_index < self.lines.len()); - if line_index == (self.lines.len() - 1) { - self.lines[line_index]..self.end_pos - } else { - self.lines[line_index]..self.lines[line_index + 1] - } + self.lines(|lines| { + assert!(line_index < lines.len()); + if line_index == (lines.len() - 1) { + lines[line_index]..self.end_pos + } else { + lines[line_index]..lines[line_index + 1] + } + }) } /// Returns whether or not the file contains the given `SourceMap` byte @@ -1605,7 +1704,7 @@ impl SourceFile { match self.lookup_line(pos) { Some(a) => { let line = a + 1; // Line numbers start at 1 - let linebpos = self.lines[a]; + let linebpos = self.lines(|lines| lines[a]); let linechpos = self.bytepos_to_file_charpos(linebpos); let col = chpos - linechpos; debug!("byte pos {:?} is on the line at byte pos {:?}", pos, linebpos); @@ -1624,7 +1723,7 @@ impl SourceFile { let (line, col_or_chpos) = self.lookup_file_pos(pos); if line > 0 { let col = col_or_chpos; - let linebpos = self.lines[line - 1]; + let linebpos = self.lines(|lines| lines[line - 1]); let col_display = { let start_width_idx = self .non_narrow_chars diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index d60b4d3d021e..95ea70296170 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -331,7 +331,7 @@ impl SourceMap { name_hash: u128, source_len: usize, cnum: CrateNum, - mut file_local_lines: Vec, + file_local_lines: Lock, mut file_local_multibyte_chars: Vec, mut file_local_non_narrow_chars: Vec, mut file_local_normalized_pos: Vec, @@ -355,8 +355,15 @@ impl SourceMap { // form rather than pre-computing the offset into a local variable. The // compiler backend can optimize away the repeated computations in a // way that won't trigger overflow checks. - for pos in &mut file_local_lines { - *pos = (*pos - original_start_pos) + start_pos; + match &mut *file_local_lines.borrow_mut() { + SourceFileLines::Lines(lines) => { + for pos in lines { + *pos = (*pos - original_start_pos) + start_pos; + } + } + SourceFileLines::Diffs(SourceFileDiffs { line_start, .. }) => { + *line_start = (*line_start - original_start_pos) + start_pos; + } } for mbc in &mut file_local_multibyte_chars { mbc.pos = (mbc.pos - original_start_pos) + start_pos; diff --git a/compiler/rustc_span/src/tests.rs b/compiler/rustc_span/src/tests.rs index 11edcacc0d43..5b3915c33387 100644 --- a/compiler/rustc_span/src/tests.rs +++ b/compiler/rustc_span/src/tests.rs @@ -5,7 +5,7 @@ fn test_lookup_line() { let source = "abcdefghijklm\nabcdefghij\n...".to_owned(); let sf = SourceFile::new(FileName::Anon(0), source, BytePos(3), SourceFileHashAlgorithm::Sha256); - assert_eq!(sf.lines.as_slice(), &[BytePos(3), BytePos(17), BytePos(28)]); + sf.lines(|lines| assert_eq!(lines, &[BytePos(3), BytePos(17), BytePos(28)])); assert_eq!(sf.lookup_line(BytePos(0)), None); assert_eq!(sf.lookup_line(BytePos(3)), Some(0)); diff --git a/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs b/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs index 5a8677f90be4..025dd57e83aa 100644 --- a/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs +++ b/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs @@ -187,11 +187,13 @@ fn item_has_safety_comment(cx: &LateContext<'_>, item: &hir::Item<'_>) -> bool { && Lrc::ptr_eq(&unsafe_line.sf, &comment_start_line.sf) && let Some(src) = unsafe_line.sf.src.as_deref() { - comment_start_line.line < unsafe_line.line && text_has_safety_comment( - src, - &unsafe_line.sf.lines[comment_start_line.line + 1..=unsafe_line.line], - unsafe_line.sf.start_pos.to_usize(), - ) + unsafe_line.sf.lines(|lines| { + comment_start_line.line < unsafe_line.line && text_has_safety_comment( + src, + &lines[comment_start_line.line + 1..=unsafe_line.line], + unsafe_line.sf.start_pos.to_usize(), + ) + }) } else { // Problem getting source text. Pretend a comment was found. true @@ -249,11 +251,13 @@ fn span_from_macro_expansion_has_safety_comment(cx: &LateContext<'_>, span: Span && Lrc::ptr_eq(&unsafe_line.sf, ¯o_line.sf) && let Some(src) = unsafe_line.sf.src.as_deref() { - macro_line.line < unsafe_line.line && text_has_safety_comment( - src, - &unsafe_line.sf.lines[macro_line.line + 1..=unsafe_line.line], - unsafe_line.sf.start_pos.to_usize(), - ) + unsafe_line.sf.lines(|lines| { + macro_line.line < unsafe_line.line && text_has_safety_comment( + src, + &lines[macro_line.line + 1..=unsafe_line.line], + unsafe_line.sf.start_pos.to_usize(), + ) + }) } else { // Problem getting source text. Pretend a comment was found. true @@ -276,11 +280,13 @@ fn span_in_body_has_safety_comment(cx: &LateContext<'_>, span: Span) -> bool { // Get the text from the start of function body to the unsafe block. // fn foo() { some_stuff; unsafe { stuff }; other_stuff; } // ^-------------^ - body_line.line < unsafe_line.line && text_has_safety_comment( - src, - &unsafe_line.sf.lines[body_line.line + 1..=unsafe_line.line], - unsafe_line.sf.start_pos.to_usize(), - ) + unsafe_line.sf.lines(|lines| { + body_line.line < unsafe_line.line && text_has_safety_comment( + src, + &lines[body_line.line + 1..=unsafe_line.line], + unsafe_line.sf.start_pos.to_usize(), + ) + }) } else { // Problem getting source text. Pretend a comment was found. true diff --git a/src/tools/clippy/clippy_utils/src/diagnostics.rs b/src/tools/clippy/clippy_utils/src/diagnostics.rs index 4e037d88494d..39595f589c70 100644 --- a/src/tools/clippy/clippy_utils/src/diagnostics.rs +++ b/src/tools/clippy/clippy_utils/src/diagnostics.rs @@ -283,10 +283,10 @@ pub fn span_lint_and_sugg_for_edges( { let split_idx = MAX_SUGGESTION_HIGHLIGHT_LINES / 2; let span_upper = sm.span_until_char( - sp.with_hi(line_upper.sf.lines[line_upper.line + split_idx]), + sp.with_hi(line_upper.sf.lines(|lines| lines[line_upper.line + split_idx])), '\n', ); - let span_bottom = sp.with_lo(line_bottom.sf.lines[line_bottom.line - split_idx]); + let span_bottom = sp.with_lo(line_bottom.sf.lines(|lines| lines[line_bottom.line - split_idx])); let sugg_lines_vec = sugg.lines().collect::>(); let sugg_upper = sugg_lines_vec[..split_idx].join("\n"); diff --git a/src/tools/clippy/clippy_utils/src/lib.rs b/src/tools/clippy/clippy_utils/src/lib.rs index adb37cc9d751..833f8cde63ab 100644 --- a/src/tools/clippy/clippy_utils/src/lib.rs +++ b/src/tools/clippy/clippy_utils/src/lib.rs @@ -1149,7 +1149,7 @@ fn line_span(cx: &T, span: Span) -> Span { let span = original_sp(span, DUMMY_SP); let source_map_and_line = cx.sess().source_map().lookup_line(span.lo()).unwrap(); let line_no = source_map_and_line.line; - let line_start = source_map_and_line.sf.lines[line_no]; + let line_start = source_map_and_line.sf.lines(|lines| lines[line_no]); span.with_lo(line_start) }