From afeb47ec496876f378ff78cc81b5b8c5c8b680f3 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Thu, 19 Jan 2023 09:04:29 -0500
Subject: [PATCH 1/2] Make mypy results more consistent

---
 benchmarks/bm_mypy/run_benchmark.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/benchmarks/bm_mypy/run_benchmark.py b/benchmarks/bm_mypy/run_benchmark.py
index d64aa74..2d449a8 100644
--- a/benchmarks/bm_mypy/run_benchmark.py
+++ b/benchmarks/bm_mypy/run_benchmark.py
@@ -41,8 +41,6 @@ def _bench_mypy(loops=20, *, legacy=False):
     times = []
     with open(os.devnull, "w") as devnull:
         for i in range(loops):
-            if legacy:
-                print(i)
             # This is a macro benchmark for a Python implementation
             # so "elapsed" covers more than just how long main() takes.
             t0 = pyperf.perf_counter()
@@ -52,8 +50,12 @@ def _bench_mypy(loops=20, *, legacy=False):
                 pass
             t1 = pyperf.perf_counter()
 
-            elapsed += t1 - t0
-            times.append(t0)
+            # Don't include results from the first run, since it loads the
+            # files from disk. Subsequent runs will use the file contents in an
+            # in-memory cache.
+            if i > 0:
+                elapsed += t1 - t0
+                times.append(t0)
         times.append(pyperf.perf_counter())
     return elapsed, times
 

From 41d08c8ef920993b0cb6ff058037a1786479a8ab Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Fri, 27 Jan 2023 11:39:51 -0500
Subject: [PATCH 2/2] Reduce the variability of the mypy benchmark

The first 2 runs of mypy read data from disk into a mypy-internal in-memory
cache.  This change attempts to reduce the impact of that I/O, which otherwise
causes the results to have an excessively wide range.

Additionally, since this makes the benchmark read as significantly "faster",
this renames the benchmark so it would be accidentally compared with baselines
against the old version.

There is more discussion here: https://github.com/faster-cpython/ideas/issues/543
---
 benchmarks/MANIFEST                                  | 2 +-
 benchmarks/{bm_mypy => bm_mypy2}/data/mypy_target.py | 0
 benchmarks/{bm_mypy => bm_mypy2}/legacyutils.py      | 0
 benchmarks/{bm_mypy => bm_mypy2}/pyproject.toml      | 2 +-
 benchmarks/{bm_mypy => bm_mypy2}/requirements.txt    | 0
 benchmarks/{bm_mypy => bm_mypy2}/run_benchmark.py    | 4 ++--
 6 files changed, 4 insertions(+), 4 deletions(-)
 rename benchmarks/{bm_mypy => bm_mypy2}/data/mypy_target.py (100%)
 rename benchmarks/{bm_mypy => bm_mypy2}/legacyutils.py (100%)
 rename benchmarks/{bm_mypy => bm_mypy2}/pyproject.toml (84%)
 rename benchmarks/{bm_mypy => bm_mypy2}/requirements.txt (100%)
 rename benchmarks/{bm_mypy => bm_mypy2}/run_benchmark.py (96%)

diff --git a/benchmarks/MANIFEST b/benchmarks/MANIFEST
index a99f58a..5229652 100644
--- a/benchmarks/MANIFEST
+++ b/benchmarks/MANIFEST
@@ -8,7 +8,7 @@ gevent_hub	<local>
 gunicorn	<local>
 json	<local>
 kinto	<local>
-mypy	<local>
+mypy2	<local>
 pycparser	<local>
 pylint	<local>
 pytorch_alexnet_inference	<local>
diff --git a/benchmarks/bm_mypy/data/mypy_target.py b/benchmarks/bm_mypy2/data/mypy_target.py
similarity index 100%
rename from benchmarks/bm_mypy/data/mypy_target.py
rename to benchmarks/bm_mypy2/data/mypy_target.py
diff --git a/benchmarks/bm_mypy/legacyutils.py b/benchmarks/bm_mypy2/legacyutils.py
similarity index 100%
rename from benchmarks/bm_mypy/legacyutils.py
rename to benchmarks/bm_mypy2/legacyutils.py
diff --git a/benchmarks/bm_mypy/pyproject.toml b/benchmarks/bm_mypy2/pyproject.toml
similarity index 84%
rename from benchmarks/bm_mypy/pyproject.toml
rename to benchmarks/bm_mypy2/pyproject.toml
index 5da0cd8..7ac33ed 100644
--- a/benchmarks/bm_mypy/pyproject.toml
+++ b/benchmarks/bm_mypy2/pyproject.toml
@@ -1,5 +1,5 @@
 [project]
-name = "bm_mypy"
+name = "bm_mypy2"
 dependencies = [
     "mypy",
 ]
diff --git a/benchmarks/bm_mypy/requirements.txt b/benchmarks/bm_mypy2/requirements.txt
similarity index 100%
rename from benchmarks/bm_mypy/requirements.txt
rename to benchmarks/bm_mypy2/requirements.txt
diff --git a/benchmarks/bm_mypy/run_benchmark.py b/benchmarks/bm_mypy2/run_benchmark.py
similarity index 96%
rename from benchmarks/bm_mypy/run_benchmark.py
rename to benchmarks/bm_mypy2/run_benchmark.py
index 2d449a8..535b9af 100644
--- a/benchmarks/bm_mypy/run_benchmark.py
+++ b/benchmarks/bm_mypy2/run_benchmark.py
@@ -53,7 +53,7 @@ def _bench_mypy(loops=20, *, legacy=False):
             # Don't include results from the first run, since it loads the
             # files from disk. Subsequent runs will use the file contents in an
             # in-memory cache.
-            if i > 0:
+            if i > 1:
                 elapsed += t1 - t0
                 times.append(t0)
         times.append(pyperf.perf_counter())
@@ -69,4 +69,4 @@ def _bench_mypy(loops=20, *, legacy=False):
 
     runner = pyperf.Runner()
     runner.metadata['description'] = "Test the performance of mypy types"
-    runner.bench_time_func("mypy", bench_mypy)
+    runner.bench_time_func("mypy2", bench_mypy)