Revert "Switch self profile to use HW counters instead of walltime" (#1700)

Kobzol · web-flow · commit a95bee114301 · 2023-08-12T13:49:41.000Z
diff --git a/collector/src/bin/rustc-fake.rs b/collector/src/bin/rustc-fake.rs
@@ -95,9 +95,8 @@ fn main() {
                 if wrapper == "PerfStatSelfProfile" {
                     cmd.arg(&format!(
                         "-Zself-profile={}",
-                        prof_out_dir.to_str().unwrap(),
+                        prof_out_dir.to_str().unwrap()
                     ));
-                    cmd.arg("-Zself-profile-counter=instructions:u");
                     let _ = fs::remove_dir_all(&prof_out_dir);
                     let _ = fs::create_dir_all(&prof_out_dir);
                 }
diff --git a/site/frontend/src/pages/detailed-query.ts b/site/frontend/src/pages/detailed-query.ts
@@ -2,8 +2,8 @@ import {createUrlWithAppendedParams, getUrlParams} from "../utils/navigation";
 import {postMsgpack} from "../utils/requests";
 import {SELF_PROFILE_DATA_URL} from "../urls";
 
-function normalize_value(value) {
-  return value;
+function to_seconds(time) {
+  return time / 1000000000;
 }
 
 function fmt_delta(to, delta, is_integral_delta) {
@@ -267,14 +267,14 @@ function populate_data(data, state: Selector) {
         t.setAttribute("title", "% of cpu-time stat");
       }
     }
-    td(row, normalize_value(cur.self_time));
+    td(row, to_seconds(cur.self_time).toFixed(3));
     if (delta) {
       td(
         row,
         fmt_delta(
-          normalize_value(cur.self_time),
-          normalize_value(delta.self_time),
-          true
+          to_seconds(cur.self_time),
+          to_seconds(delta.self_time),
+          false
         ),
         true
       );
@@ -291,14 +291,16 @@ function populate_data(data, state: Selector) {
     } else {
       td(row, "-", true);
     }
-    td(row, normalize_value(cur.incremental_load_time)).classList.add("incr");
+    td(row, to_seconds(cur.incremental_load_time).toFixed(3)).classList.add(
+      "incr"
+    );
     if (delta) {
       td(
         row,
         fmt_delta(
-          normalize_value(cur.incremental_load_time),
-          normalize_value(delta.incremental_load_time),
-          true
+          to_seconds(cur.incremental_load_time),
+          to_seconds(delta.incremental_load_time),
+          false
         ),
         true
       ).classList.add("incr");
diff --git a/site/frontend/templates/pages/detailed-query.html b/site/frontend/templates/pages/detailed-query.html
@@ -79,21 +79,20 @@ <h4>Artifact Size</h4>
             <tbody id="artifact-body">
             </tbody>
         </table>
-        <p>'Instructions (%)' is the percentage of instructions executed on this query.</p>
-        <p><b>Note: self-profile measurements have been <a href="https://github.com/rust-lang/rustc-perf/pull/1647">recently switched</a>
-          from wall-time to HW counters (instruction count). If comparing with an older artifact, the timings might not be directly comparable.</b></p>
+        <p>'Time (%)' is the percentage of the cpu-clock time spent on this query (we do not use
+            wall-time as we want to account for parallelism).</p>
         <p>Executions do not include cached executions.</p>
         <table>
             <thead>
                 <tr id="table-header">
                     <th data-sort-idx="1" data-default-sort-dir="1">Query/Function</th>
-                    <th data-sort-idx="10" data-default-sort-dir="-1">Instructions (%)</th>
-                    <th data-sort-idx="2" data-default-sort-dir="-1">Instructions</th>
-                    <th data-sort-idx="11" data-default-sort-dir="-1" class="delta">Instructions delta</th>
+                    <th data-sort-idx="10" data-default-sort-dir="-1">Time (%)</th>
+                    <th data-sort-idx="2" data-default-sort-dir="-1">Time (s)</th>
+                    <th data-sort-idx="11" data-default-sort-dir="-1" class="delta">Time delta</th>
                     <th data-sort-idx="5" data-default-sort-dir="-1">Executions</th>
                     <th data-sort-idx="12" data-default-sort-dir="-1" class="delta">Executions delta</th>
-                    <th class="incr" data-sort-idx="7" data-default-sort-dir="-1" title="Incremental loading instructions">
-                        Incremental loading instructions</th>
+                    <th class="incr" data-sort-idx="7" data-default-sort-dir="-1" title="Incremental loading time">
+                        Incremental loading (s)</th>
                     <th class="incr delta" data-sort-idx="13" data-default-sort-dir="-1">Incremental loading delta</th>
                 </tr>
             </thead>
diff --git a/site/src/api.rs b/site/src/api.rs
@@ -345,21 +345,18 @@ pub mod self_profile {
         pub artifact_sizes: Option<Vec<ArtifactSize>>,
     }
 
-    // Due to backwards compatibility, self profile event timing data is represented as durations,
-    // however since https://github.com/rust-lang/rustc-perf/pull/1647 it actually represents
-    // HW counter data (instruction counts).
     #[derive(Serialize, Deserialize, Clone, Debug)]
     pub struct QueryData {
         pub label: QueryLabel,
-        // Instruction count
+        // Nanoseconds
         pub self_time: u64,
         pub percent_total_time: f32,
         pub number_of_cache_misses: u32,
         pub number_of_cache_hits: u32,
         pub invocation_count: u32,
-        // Instruction count
+        // Nanoseconds
         pub blocked_time: u64,
-        // Instruction count
+        // Nanoseconds
         pub incremental_load_time: u64,
     }
 
diff --git a/site/src/request_handlers/self_profile.rs b/site/src/request_handlers/self_profile.rs
@@ -155,7 +155,7 @@ pub async fn handle_self_profile_processed_download(
 }
 
 fn get_self_profile_data(
-    total_instructions: Option<f64>,
+    cpu_clock: Option<f64>,
     profile: &analyzeme::AnalysisResults,
 ) -> ServerResult<self_profile::SelfProfile> {
     let total_time: Duration = profile.query_data.iter().map(|qd| qd.self_time).sum();
@@ -180,7 +180,7 @@ fn get_self_profile_data(
         label: "Totals".into(),
         self_time: total_time.as_nanos() as u64,
         // TODO: check against wall-time from perf stats
-        percent_total_time: total_instructions
+        percent_total_time: cpu_clock
             .map(|w| ((total_time.as_secs_f64() / w) * 100.0) as f32)
             // sentinel "we couldn't compute this time"
             .unwrap_or(-100.0),
@@ -587,7 +587,7 @@ pub async fn handle_self_profile(
         .benchmark(selector::Selector::One(bench_name.to_string()))
         .profile(selector::Selector::One(profile.parse().unwrap()))
         .scenario(selector::Selector::One(scenario))
-        .metric(selector::Selector::One(Metric::InstructionsUser));
+        .metric(selector::Selector::One(Metric::CpuClock));
 
     // Helper for finding an `ArtifactId` based on a commit sha
     let find_aid = |commit: &str| {
@@ -602,9 +602,9 @@ pub async fn handle_self_profile(
     }
     let commits = Arc::new(commits);
 
-    let mut instructions_responses = ctxt.statistic_series(query, commits.clone()).await?;
-    assert_eq!(instructions_responses.len(), 1, "all selectors are exact");
-    let mut instructions_response = instructions_responses.remove(0).series;
+    let mut cpu_responses = ctxt.statistic_series(query, commits.clone()).await?;
+    assert_eq!(cpu_responses.len(), 1, "all selectors are exact");
+    let mut cpu_response = cpu_responses.remove(0).series;
 
     let mut self_profile_data = Vec::new();
     let conn = ctxt.conn().await;
@@ -623,16 +623,12 @@ pub async fn handle_self_profile(
         }
     }
     let profiling_data = self_profile_data.remove(0).perform_analysis();
-    let mut profile =
-        get_self_profile_data(instructions_response.next().unwrap().1, &profiling_data)
-            .map_err(|e| format!("{}: {}", body.commit, e))?;
+    let mut profile = get_self_profile_data(cpu_response.next().unwrap().1, &profiling_data)
+        .map_err(|e| format!("{}: {}", body.commit, e))?;
     let (base_profile, base_raw_data) = if body.base_commit.is_some() {
         let base_profiling_data = self_profile_data.remove(0).perform_analysis();
-        let profile = get_self_profile_data(
-            instructions_response.next().unwrap().1,
-            &base_profiling_data,
-        )
-        .map_err(|e| format!("{}: {}", body.base_commit.as_ref().unwrap(), e))?;
+        let profile = get_self_profile_data(cpu_response.next().unwrap().1, &base_profiling_data)
+            .map_err(|e| format!("{}: {}", body.base_commit.as_ref().unwrap(), e))?;
         (Some(profile), Some(base_profiling_data))
     } else {
         (None, None)