rust-lang · bors · Aug 29, 2021 · Jul 22, 2021 · Aug 21, 2021 · Aug 21, 2021
diff --git a/src/librustdoc/html/length_limit.rs b/src/librustdoc/html/length_limit.rs
@@ -0,0 +1,119 @@
+//! See [`HtmlWithLimit`].
+
+use std::fmt::Write;
+use std::ops::ControlFlow;
+
+use crate::html::escape::Escape;
+
+/// A buffer that allows generating HTML with a length limit.
+///
+/// This buffer ensures that:
+///
+/// * all tags are closed,
+/// * tags are closed in the reverse order of when they were opened (i.e., the correct HTML order),
+/// * no tags are left empty (e.g., `<em></em>`) due to the length limit being reached,
+/// * all text is escaped.
+#[derive(Debug)]
+pub(super) struct HtmlWithLimit {
+    buf: String,
+    len: usize,
+    limit: usize,
+    /// A list of tags that have been requested to be opened via [`Self::open_tag()`]
+    /// but have not actually been pushed to `buf` yet. This ensures that tags are not
+    /// left empty (e.g., `<em></em>`) due to the length limit being reached.
+    queued_tags: Vec<&'static str>,
+    /// A list of all tags that have been opened but not yet closed.
+    unclosed_tags: Vec<&'static str>,
+}
+
+impl HtmlWithLimit {
+    /// Create a new buffer, with a limit of `length_limit`.
+    pub(super) fn new(length_limit: usize) -> Self {
+        let buf = if length_limit > 1000 {
+            // If the length limit is really large, don't preallocate tons of memory.
+            String::new()
+        } else {
+            // The length limit is actually a good heuristic for initial allocation size.
+            // Measurements showed that using it as the initial capacity ended up using less memory
+            // than `String::new`.
+            // See https://github.com/rust-lang/rust/pull/88173#discussion_r692531631 for more.
+            String::with_capacity(length_limit)
+        };
+        Self {
+            buf,
+            len: 0,
+            limit: length_limit,
+            unclosed_tags: Vec::new(),
+            queued_tags: Vec::new(),
+        }
+    }
+
+    /// Finish using the buffer and get the written output.
+    /// This function will close all unclosed tags for you.
+    pub(super) fn finish(mut self) -> String {
+        self.close_all_tags();
+        self.buf
+    }
+
+    /// Write some plain text to the buffer, escaping as needed.
+    ///
+    /// This function skips writing the text if the length limit was reached
+    /// and returns [`ControlFlow::Break`].
+    pub(super) fn push(&mut self, text: &str) -> ControlFlow<(), ()> {
+        if self.len + text.len() > self.limit {
+            return ControlFlow::BREAK;
+        }
+
+        self.flush_queue();
+        write!(self.buf, "{}", Escape(text)).unwrap();
+        self.len += text.len();
+
+        ControlFlow::CONTINUE
+    }
+
+    /// Open an HTML tag.
+    ///
+    /// **Note:** HTML attributes have not yet been implemented.
+    /// This function will panic if called with a non-alphabetic `tag_name`.
+    pub(super) fn open_tag(&mut self, tag_name: &'static str) {
+        assert!(
+            tag_name.chars().all(|c| ('a'..='z').contains(&c)),
+            "tag_name contained non-alphabetic chars: {:?}",
+            tag_name
+        );
+        self.queued_tags.push(tag_name);
+    }
+
+    /// Close the most recently opened HTML tag.
+    pub(super) fn close_tag(&mut self) {
+        match self.unclosed_tags.pop() {
+            // Close the most recently opened tag.
+            Some(tag_name) => write!(self.buf, "</{}>", tag_name).unwrap(),
+            // There are valid cases where `close_tag()` is called without
+            // there being any tags to close. For example, this occurs when
+            // a tag is opened after the length limit is exceeded;
+            // `flush_queue()` will never be called, and thus, the tag will
+            // not end up being added to `unclosed_tags`.
+            None => {}
+        }
+    }
+
+    /// Write all queued tags and add them to the `unclosed_tags` list.
+    fn flush_queue(&mut self) {
+        for tag_name in self.queued_tags.drain(..) {
+            write!(self.buf, "<{}>", tag_name).unwrap();
+
+            self.unclosed_tags.push(tag_name);
+        }
+    }
+
+    /// Close all unclosed tags.
+    fn close_all_tags(&mut self) {
+        while !self.unclosed_tags.is_empty() {
+            self.close_tag();
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/src/librustdoc/html/length_limit/tests.rs b/src/librustdoc/html/length_limit/tests.rs
@@ -0,0 +1,120 @@
+use super::*;
+
+#[test]
+fn empty() {
+    assert_eq!(HtmlWithLimit::new(0).finish(), "");
+    assert_eq!(HtmlWithLimit::new(60).finish(), "");
+}
+
+#[test]
+fn basic() {
+    let mut buf = HtmlWithLimit::new(60);
+    buf.push("Hello ");
+    buf.open_tag("em");
+    buf.push("world");
+    buf.close_tag();
+    buf.push("!");
+    assert_eq!(buf.finish(), "Hello <em>world</em>!");
+}
+
+#[test]
+fn no_tags() {
+    let mut buf = HtmlWithLimit::new(60);
+    buf.push("Hello");
+    buf.push(" world!");
+    assert_eq!(buf.finish(), "Hello world!");
+}
+
+#[test]
+fn limit_0() {
+    let mut buf = HtmlWithLimit::new(0);
+    buf.push("Hello ");
+    buf.open_tag("em");
+    buf.push("world");
+    buf.close_tag();
+    buf.push("!");
+    assert_eq!(buf.finish(), "");
+}
+
+#[test]
+fn exactly_limit() {
+    let mut buf = HtmlWithLimit::new(12);
+    buf.push("Hello ");
+    buf.open_tag("em");
+    buf.push("world");
+    buf.close_tag();
+    buf.push("!");
+    assert_eq!(buf.finish(), "Hello <em>world</em>!");
+}
+
+#[test]
+fn multiple_nested_tags() {
+    let mut buf = HtmlWithLimit::new(60);
+    buf.open_tag("p");
+    buf.push("This is a ");
+    buf.open_tag("em");
+    buf.push("paragraph");
+    buf.open_tag("strong");
+    buf.push("!");
+    buf.close_tag();
+    buf.close_tag();
+    buf.close_tag();
+    assert_eq!(buf.finish(), "<p>This is a <em>paragraph<strong>!</strong></em></p>");
+}
+
+#[test]
+fn forgot_to_close_tags() {
+    let mut buf = HtmlWithLimit::new(60);
+    buf.open_tag("p");
+    buf.push("This is a ");
+    buf.open_tag("em");
+    buf.push("paragraph");
+    buf.open_tag("strong");
+    buf.push("!");
+    assert_eq!(buf.finish(), "<p>This is a <em>paragraph<strong>!</strong></em></p>");
+}
+
+#[test]
+fn past_the_limit() {
+    let mut buf = HtmlWithLimit::new(20);
+    buf.open_tag("p");
+    (0..10).try_for_each(|n| {
+        buf.open_tag("strong");
+        buf.push("word#")?;
+        buf.push(&n.to_string())?;
+        buf.close_tag();
+        ControlFlow::CONTINUE
+    });
+    buf.close_tag();
+    assert_eq!(
+        buf.finish(),
+        "<p>\
+             <strong>word#0</strong>\
+             <strong>word#1</strong>\
+             <strong>word#2</strong>\
+             </p>"
+    );
+}
+
+#[test]
+fn quickly_past_the_limit() {
+    let mut buf = HtmlWithLimit::new(6);
+    buf.open_tag("p");
+    buf.push("Hello");
+    buf.push(" World");
+    // intentionally not closing <p> before finishing
+    assert_eq!(buf.finish(), "<p>Hello</p>");
+}
+
+#[test]
+fn close_too_many() {
+    let mut buf = HtmlWithLimit::new(60);
+    buf.open_tag("p");
+    buf.push("Hello");
+    buf.close_tag();
+    // This call does not panic because there are valid cases
+    // where `close_tag()` is called with no tags left to close.
+    // So `close_tag()` does nothing in this case.
+    buf.close_tag();
+    assert_eq!(buf.finish(), "<p>Hello</p>");
+}
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
@@ -23,19 +23,21 @@ use rustc_hir::HirId;
 use rustc_middle::ty::TyCtxt;
 use rustc_span::edition::Edition;
 use rustc_span::Span;
+
 use std::borrow::Cow;
 use std::cell::RefCell;
 use std::collections::VecDeque;
 use std::default::Default;
 use std::fmt::Write;
-use std::ops::Range;
+use std::ops::{ControlFlow, Range};
 use std::str;
 
 use crate::clean::RenderedLink;
 use crate::doctest;
 use crate::html::escape::Escape;
 use crate::html::format::Buffer;
 use crate::html::highlight;
+use crate::html::length_limit::HtmlWithLimit;
 use crate::html::toc::TocBuilder;
 
 use pulldown_cmark::{
@@ -1081,15 +1083,6 @@ fn markdown_summary_with_limit(
         return (String::new(), false);
     }
 
-    let mut s = String::with_capacity(md.len() * 3 / 2);
-    let mut text_length = 0;
-    let mut stopped_early = false;
-
-    fn push(s: &mut String, text_length: &mut usize, text: &str) {
-        write!(s, "{}", Escape(text)).unwrap();
-        *text_length += text.len();
-    }
-
     let mut replacer = |broken_link: BrokenLink<'_>| {
         if let Some(link) =
             link_names.iter().find(|link| &*link.original_text == broken_link.reference)
@@ -1101,56 +1094,48 @@ fn markdown_summary_with_limit(
     };
 
     let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut replacer));
-    let p = LinkReplacer::new(p, link_names);
+    let mut p = LinkReplacer::new(p, link_names);
 
-    'outer: for event in p {
+    let mut buf = HtmlWithLimit::new(length_limit);
+    let mut stopped_early = false;
+    p.try_for_each(|event| {
         match &event {
             Event::Text(text) => {
-                for word in text.split_inclusive(char::is_whitespace) {
-                    if text_length + word.len() >= length_limit {
-                        stopped_early = true;
-                        break 'outer;
-                    }
-
-                    push(&mut s, &mut text_length, word);
+                let r =
+                    text.split_inclusive(char::is_whitespace).try_for_each(|word| buf.push(word));
+                if r.is_break() {
+                    stopped_early = true;
                 }
+                return r;
             }
             Event::Code(code) => {
-                if text_length + code.len() >= length_limit {
+                buf.open_tag("code");
+                let r = buf.push(code);
+                if r.is_break() {
                     stopped_early = true;
-                    break;
+                } else {
+                    buf.close_tag();
                 }
-
-                s.push_str("<code>");
-                push(&mut s, &mut text_length, code);
-                s.push_str("</code>");
+                return r;
             }
             Event::Start(tag) => match tag {
-                Tag::Emphasis => s.push_str("<em>"),
-                Tag::Strong => s.push_str("<strong>"),
-                Tag::CodeBlock(..) => break,
+                Tag::Emphasis => buf.open_tag("em"),
+                Tag::Strong => buf.open_tag("strong"),
+                Tag::CodeBlock(..) => return ControlFlow::BREAK,
                 _ => {}
             },
             Event::End(tag) => match tag {
-                Tag::Emphasis => s.push_str("</em>"),
-                Tag::Strong => s.push_str("</strong>"),
-                Tag::Paragraph => break,
-                Tag::Heading(..) => break,
+                Tag::Emphasis | Tag::Strong => buf.close_tag(),
+                Tag::Paragraph | Tag::Heading(..) => return ControlFlow::BREAK,
                 _ => {}
             },
-            Event::HardBreak | Event::SoftBreak => {
-                if text_length + 1 >= length_limit {
-                    stopped_early = true;
-                    break;
-                }
-
-                push(&mut s, &mut text_length, " ");
-            }
+            Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
             _ => {}
-        }
-    }
+        };
+        ControlFlow::CONTINUE
+    });
 
-    (s, stopped_early)
+    (buf.finish(), stopped_early)
 }
 
 /// Renders a shortened first paragraph of the given Markdown as a subset of Markdown,

diff --git a/src/librustdoc/html/markdown/tests.rs b/src/librustdoc/html/markdown/tests.rs
@@ -225,6 +225,7 @@ fn test_short_markdown_summary() {
         assert_eq!(output, expect, "original: {}", input);
     }
 
+    t("", "");
     t("hello [Rust](https://www.rust-lang.org) :)", "hello Rust :)");
     t("*italic*", "<em>italic</em>");
     t("**bold**", "<strong>bold</strong>");
@@ -264,6 +265,7 @@ fn test_plain_text_summary() {
         assert_eq!(output, expect, "original: {}", input);
     }
 
+    t("", "");
     t("hello [Rust](https://www.rust-lang.org) :)", "hello Rust :)");
     t("**bold**", "bold");
     t("Multi-line\nsummary", "Multi-line summary");

diff --git a/src/librustdoc/html/mod.rs b/src/librustdoc/html/mod.rs
@@ -2,6 +2,7 @@ crate mod escape;
 crate mod format;
 crate mod highlight;
 crate mod layout;
+mod length_limit;
 // used by the error-index generator, so it needs to be public
 pub mod markdown;
 crate mod render;

diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs
@@ -5,6 +5,7 @@
 #![feature(rustc_private)]
 #![feature(array_methods)]
 #![feature(box_patterns)]
+#![feature(control_flow_enum)]
 #![feature(in_band_lifetimes)]
 #![feature(nll)]
 #![feature(test)]