Add benchcomp filter command (#3105)

karkhaz · web-flow · commit 506bfc4dd693 · 2024-03-26T16:22:49.000Z
This allows benchcomp to pass the list of results to an external program
for modification before the results are visualized. This can be used,
for example, to visualize only a relevant subset of results.

By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 and MIT licenses.
diff --git a/docs/src/benchcomp-conf.md b/docs/src/benchcomp-conf.md
@@ -24,6 +24,25 @@ variants:
 ```
 
 
+## Filters
+
+After benchcomp has finished parsing the results, it writes the results to `results.yaml` by default.
+Before visualizing the results (see below), benchcomp can *filter* the results by piping them into an external program.
+
+To filter results before visualizing them, add `filters` to the configuration file.
+
+```yaml
+filters:
+    - command_line: ./scripts/remove-redundant-results.py
+    - command_line: cat
+```
+
+The value of `filters` is a list of dicts.
+Currently the only legal key for each of the dicts is `command_line`.
+Benchcomp invokes each `command_line` in order, passing the results as a JSON file on stdin, and interprets the stdout as a YAML-formatted modified set of results.
+Filter scripts can emit either YAML (which might be more readable while developing the script), or JSON (which benchcomp will parse as a subset of YAML).
+
+
 ## Built-in visualizations
 
 The following visualizations are available; these can be added to the `visualize` list of `benchcomp.yaml`.
diff --git a/tools/benchcomp/benchcomp/__init__.py b/tools/benchcomp/benchcomp/__init__.py
@@ -62,7 +62,16 @@ class ConfigFile(collections.UserDict):
               anyof:
                 - schema:
                     type: {}
-filter: {}
+filters:
+    type: list
+    default: []
+    schema:
+        type: dict
+        keysrules:
+            type: string
+            allowed: ["command_line"]
+        valuesrules:
+            type: string
 visualize: {}
 """)
 
diff --git a/tools/benchcomp/benchcomp/cmd_args.py b/tools/benchcomp/benchcomp/cmd_args.py
@@ -170,7 +170,13 @@ def _get_args_dict():
                 },
                 "filter": {
                     "help": "transform a result by piping it through a program",
-                    "args": [],
+                    "args": [{
+                        "flags": ["--result-file"],
+                        "metavar": "F",
+                        "default": pathlib.Path("result.yaml"),
+                        "type": pathlib.Path,
+                        "help": "read result from F instead of %(default)s."
+                    }],
                 },
                 "visualize": {
                     "help": "render a result in various formats",
@@ -180,7 +186,7 @@ def _get_args_dict():
                         "default": pathlib.Path("result.yaml"),
                         "type": pathlib.Path,
                         "help":
-                            "read result from F instead of %(default)s. "
+                            "read result from F instead of %(default)s."
                     }, {
                         "flags": ["--only"],
                         "nargs": "+",
@@ -234,6 +240,11 @@ def get():
 
     subparsers = ad["subparsers"].pop("parsers")
     subs = parser.add_subparsers(**ad["subparsers"])
+
+    # Add all subcommand-specific flags to the top-level argument parser,
+    # but only add them once.
+    flag_set = set()
+
     for subcommand, info in subparsers.items():
         args = info.pop("args")
         subparser = subs.add_parser(name=subcommand, **info)
@@ -246,7 +257,9 @@ def get():
         for arg in args:
             flags = arg.pop("flags")
             subparser.add_argument(*flags, **arg)
-            if arg not in global_args:
+            long_flag = flags[-1]
+            if arg not in global_args and long_flag not in flag_set:
+                flag_set.add(long_flag)
                 parser.add_argument(*flags, **arg)
 
     return parser.parse_args()
diff --git a/tools/benchcomp/benchcomp/entry/benchcomp.py b/tools/benchcomp/benchcomp/entry/benchcomp.py
@@ -16,4 +16,6 @@ def main(args):
     args.suites_dir = run_result.out_prefix / run_result.out_symlink
     results = benchcomp.entry.collate.main(args)
 
+    results = benchcomp.entry.filter.main(args)
+
     benchcomp.entry.visualize.main(args)
diff --git a/tools/benchcomp/benchcomp/entry/filter.py b/tools/benchcomp/benchcomp/entry/filter.py
@@ -4,5 +4,91 @@
 # Entrypoint for `benchcomp filter`
 
 
-def main(_):
-    raise NotImplementedError  # TODO
+import json
+import logging
+import pathlib
+import subprocess
+import sys
+import tempfile
+
+import yaml
+
+
+def main(args):
+    """Filter the results file by piping it into a list of scripts"""
+
+    with open(args.result_file) as handle:
+        old_results = yaml.safe_load(handle)
+
+    if "filters" not in args.config:
+        return old_results
+
+    tmp_root = pathlib.Path(tempfile.gettempdir()) / "benchcomp" / "filter"
+    tmp_root.mkdir(parents=True, exist_ok=True)
+    tmpdir = pathlib.Path(tempfile.mkdtemp(dir=str(tmp_root)))
+
+    for idx, filt in enumerate(args.config["filters"]):
+        with open(args.result_file) as handle:
+            old_results = yaml.safe_load(handle)
+
+        json_results = json.dumps(old_results, indent=2)
+        in_file = tmpdir / f"{idx}.in.json"
+        out_file = tmpdir / f"{idx}.out.json"
+        cmd_out = _pipe(
+            filt["command_line"], json_results, in_file, out_file)
+
+        try:
+            new_results = yaml.safe_load(cmd_out)
+        except yaml.YAMLError as exc:
+            logging.exception(
+                "Filter command '%s' produced invalid YAML. Stdin of"
+                " the command is saved in %s, stdout is saved in %s.",
+                filt["command_line"], in_file, out_file)
+            if hasattr(exc, "problem_mark"):
+                logging.error(
+                    "Parse error location: line %d, column %d",
+                    exc.problem_mark.line+1, exc.problem_mark.column+1)
+            sys.exit(1)
+
+        with open(args.result_file, "w") as handle:
+            yaml.dump(new_results, handle, default_flow_style=False, indent=2)
+
+        return new_results
+
+
+def _pipe(shell_command, in_text, in_file, out_file):
+    """Pipe `in_text` into `shell_command` and return the output text
+
+    Save the in and out text into files for later inspection if necessary.
+    """
+
+    with open(in_file, "w") as handle:
+        print(in_text, file=handle)
+
+    logging.debug(
+        "Piping the contents of '%s' into '%s', saving into '%s'",
+        in_file, shell_command, out_file)
+
+    timeout = 60
+    with subprocess.Popen(
+            shell_command, shell=True, text=True, stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE) as proc:
+        try:
+            out, _ = proc.communicate(input=in_text, timeout=timeout)
+        except subprocess.TimeoutExpired:
+            logging.error(
+                "Filter command failed to terminate after %ds: '%s'",
+                timeout, shell_command)
+            sys.exit(1)
+
+    with open(out_file, "w") as handle:
+        print(out, file=handle)
+
+    if proc.returncode:
+        logging.error(
+            "Filter command '%s' exited with code %d. Stdin of"
+            " the command is saved in %s, stdout is saved in %s.",
+            shell_command, proc.returncode, in_file, out_file)
+        sys.exit(1)
+
+    return out
diff --git a/tools/benchcomp/test/test_regression.py b/tools/benchcomp/test/test_regression.py
@@ -662,6 +662,98 @@ def test_return_0_on_fail(self):
                 result = yaml.safe_load(handle)
 
 
+    def test_bad_filters(self):
+        """Ensure that bad filters terminate benchcomp"""
+
+        with tempfile.TemporaryDirectory() as tmp:
+            run_bc = Benchcomp({
+                "variants": {
+                    "variant-1": {
+                        "config": {
+                            "command_line": "true",
+                            "directory": tmp,
+                            "env": {},
+                        }
+                    },
+                },
+                "run": {
+                    "suites": {
+                        "suite_1": {
+                            "parser": {
+                                "command": textwrap.dedent("""\
+                                    echo '{
+                                        "benchmarks": { },
+                                        "metrics": { }
+                                    }'
+                                    """)
+                            },
+                            "variants": ["variant-1"]
+                        }
+                    }
+                },
+                "filters": [{
+                    "command_line": "false"
+                }],
+                "visualize": [],
+            })
+            run_bc()
+            self.assertEqual(run_bc.proc.returncode, 1, msg=run_bc.stderr)
+
+
+    def test_two_filters(self):
+        """Ensure that the output can be filtered"""
+
+        with tempfile.TemporaryDirectory() as tmp:
+            run_bc = Benchcomp({
+                "variants": {
+                    "variant-1": {
+                        "config": {
+                            "command_line": "true",
+                            "directory": tmp,
+                            "env": {},
+                        }
+                    },
+                },
+                "run": {
+                    "suites": {
+                        "suite_1": {
+                            "parser": {
+                                "command": textwrap.dedent("""\
+                                    echo '{
+                                        "benchmarks": {
+                                            "bench-1": {
+                                                "variants": {
+                                                    "variant-1": {
+                                                        "metrics": {
+                                                            "runtime": 10,
+                                                            "memory": 5
+                                                        }
+                                                    }
+                                                }
+                                            }
+                                        },
+                                        "metrics": {
+                                            "runtime": {},
+                                            "memory": {},
+                                        }
+                                    }'
+                                    """)
+                            },
+                            "variants": ["variant-1"]
+                        }
+                    }
+                },
+                "filters": [{
+                    "command_line": "sed -e 's/10/20/;s/5/10/'"
+                }, {
+                    "command_line": """grep '"runtime": 20'"""
+                }],
+                "visualize": [],
+            })
+            run_bc()
+            self.assertEqual(run_bc.proc.returncode, 0, msg=run_bc.stderr)
+
+
     def test_env_expansion(self):
         """Ensure that config parser expands '${}' in env key"""