diff --git a/case_utils/case_sparql_select/__init__.py b/case_utils/case_sparql_select/__init__.py
index eaa98cb..2d8581c 100644
--- a/case_utils/case_sparql_select/__init__.py
+++ b/case_utils/case_sparql_select/__init__.py
@@ -49,74 +49,44 @@
_logger = logging.getLogger(os.path.basename(__file__))
-def main() -> None:
- parser = argparse.ArgumentParser()
-
- # Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser.
- logging.basicConfig(
- level=logging.DEBUG
- if ("--debug" in sys.argv or "-d" in sys.argv)
- else logging.INFO
- )
-
- parser.add_argument("-d", "--debug", action="store_true")
- parser.add_argument(
- "--built-version",
- choices=tuple(built_version_choices_list),
- default="case-" + CURRENT_CASE_VERSION,
- help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release. Passing 'none' will mean no pre-built CASE ontology versions accompanying this tool will be included in the analysis.",
- )
- parser.add_argument(
- "--disallow-empty-results",
- action="store_true",
- help="Raise error if no results are returned for query.",
- )
- parser.add_argument(
- "--use-prefixes",
- action="store_true",
- help="Abbreviate node IDs according to graph's encoded prefixes. (This will use prefixes in the graph, not the query.)",
- )
- parser.add_argument(
- "out_table",
- help="Expected extensions are .html for HTML tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values.",
- )
- parser.add_argument(
- "in_sparql",
- help="File containing a SPARQL SELECT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.",
- )
- parser.add_argument("in_graph", nargs="+")
- args = parser.parse_args()
+def query_text_to_variables(select_query_text: str) -> typing.List[str]:
+ # Build columns list from SELECT line.
+ select_query_text_lines = select_query_text.split("\n")
+ select_line = [
+ line for line in select_query_text_lines if line.startswith("SELECT ")
+ ][0]
+ variables = select_line.replace(" DISTINCT", "").replace("SELECT ", "").split(" ")
+ return variables
- graph = rdflib.Graph()
- for in_graph_filename in args.in_graph:
- graph.parse(in_graph_filename)
+def graph_and_query_to_data_frame(
+ graph: rdflib.Graph,
+ select_query_text: str,
+ *args: typing.Any,
+ built_version: str = "case-" + CURRENT_CASE_VERSION,
+ disallow_empty_results: bool = False,
+ use_prefixes: bool = False,
+ **kwargs: typing.Any,
+) -> pd.DataFrame:
# Inherit prefixes defined in input context dictionary.
nsdict = {k: v for (k, v) in graph.namespace_manager.namespaces()}
- select_query_text = None
- with open(args.in_sparql, "r") as in_fh:
- select_query_text = in_fh.read().strip()
- _logger.debug("select_query_text = %r." % select_query_text)
-
+ # Avoid side-effects on input parameter.
if "subClassOf" in select_query_text:
- case_utils.ontology.load_subclass_hierarchy(
- graph, built_version=args.built_version
- )
+ _graph = rdflib.Graph()
+ _graph += graph
+ case_utils.ontology.load_subclass_hierarchy(_graph, built_version=built_version)
+ else:
+ _graph = graph
- # Build columns list from SELECT line.
- select_query_text_lines = select_query_text.split("\n")
- select_line = [
- line for line in select_query_text_lines if line.startswith("SELECT ")
- ][0]
- variables = select_line.replace(" DISTINCT", "").replace("SELECT ", "").split(" ")
+ variables = query_text_to_variables(select_query_text)
tally = 0
records = []
select_query_object = rdflib.plugins.sparql.processor.prepareQuery(
select_query_text, initNs=nsdict
)
- for (row_no, row) in enumerate(graph.query(select_query_object)):
+ for (row_no, row) in enumerate(_graph.query(select_query_object)):
tally = row_no + 1
record = []
for (column_no, column) in enumerate(row):
@@ -131,7 +101,7 @@ def main() -> None:
# .decode() is because hexlify returns bytes.
column_value = binascii.hexlify(column.toPython()).decode()
elif isinstance(column, rdflib.URIRef):
- if args.use_prefixes:
+ if use_prefixes:
column_value = graph.namespace_manager.qname(column.toPython())
else:
column_value = column.toPython()
@@ -141,39 +111,192 @@ def main() -> None:
_logger.debug("row[0]column[%d] = %r." % (column_no, column_value))
record.append(column_value)
records.append(record)
+
if tally == 0:
- if args.disallow_empty_results:
+ if disallow_empty_results:
raise ValueError("Failed to return any results.")
df = pd.DataFrame(records, columns=variables)
+ return df
+
+def data_frame_to_table_text(
+ df: pd.DataFrame,
+ *args: typing.Any,
+ output_mode: str,
+ use_header: bool,
+ use_index: bool,
+ **kwargs: typing.Any,
+) -> str:
table_text: typing.Optional[str] = None
- if args.out_table.endswith(".csv") or args.out_table.endswith(".tsv"):
- # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html
+
+ # Set up kwargs dicts. One kwarg behaves slightly differently for Markdown vs. other formats.
+ general_kwargs: typing.Dict[str, typing.Any] = dict()
+ md_kwargs: typing.Dict[str, typing.Any] = dict()
+
+ # Note some output modes will drop 'header' from general_kwargs, due to alternate support or lack of support.
+ if use_header:
+ general_kwargs["header"] = True
+ else:
+ general_kwargs["header"] = False
+ md_kwargs["headers"] = tuple()
+
+ general_kwargs["index"] = use_index
+
+ if output_mode in {"csv", "tsv"}:
sep: str
- if args.out_table.endswith(".csv"):
+ if output_mode == "csv":
sep = ","
- elif args.out_table.endswith(".tsv"):
+ elif output_mode == "tsv":
sep = "\t"
else:
raise NotImplementedError(
"Output extension not implemented in CSV-style output."
)
- table_text = df.to_csv(sep=sep)
- elif args.out_table.endswith(".html"):
+ table_text = df.to_csv(sep=sep, **general_kwargs)
+ elif output_mode == "html":
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_html.html
# Add CSS classes for CASE website Bootstrap support.
- table_text = df.to_html(classes=("table", "table-bordered", "table-condensed"))
- elif args.out_table.endswith(".md"):
+ table_text = df.to_html(
+ classes=("table", "table-bordered", "table-condensed"), **general_kwargs
+ )
+ elif output_mode == "md":
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_markdown.html
# https://pypi.org/project/tabulate/
# Assume Github-flavored Markdown.
- table_text = df.to_markdown(tablefmt="github")
- if table_text is None:
- raise NotImplementedError(
- "Unsupported output extension for output filename %r.", args.out_table
- )
+ # Drop unsupported kwarg.
+ del general_kwargs["header"]
+
+ table_text = df.to_markdown(tablefmt="github", **general_kwargs, **md_kwargs)
+ else:
+ if table_text is None:
+ raise NotImplementedError("Unimplemented output mode: %r." % output_mode)
+ assert table_text is not None
+
+ return table_text
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+
+ # Configure debug logging before running parse_args, because there could be an error raised before the construction of the argument parser.
+ logging.basicConfig(
+ level=logging.DEBUG
+ if ("--debug" in sys.argv or "-d" in sys.argv)
+ else logging.INFO
+ )
+
+ parser.add_argument("-d", "--debug", action="store_true")
+ parser.add_argument(
+ "--built-version",
+ choices=tuple(built_version_choices_list),
+ default="case-" + CURRENT_CASE_VERSION,
+ help="Ontology version to use to supplement query, such as for subclass querying. Does not require networking to use. Default is most recent CASE release. Passing 'none' will mean no pre-built CASE ontology versions accompanying this tool will be included in the analysis.",
+ )
+ parser.add_argument(
+ "--disallow-empty-results",
+ action="store_true",
+ help="Raise error if no results are returned for query.",
+ )
+ parser.add_argument(
+ "--use-prefixes",
+ action="store_true",
+ help="Abbreviate node IDs according to graph's encoded prefixes. (This will use prefixes in the graph, not the query.)",
+ )
+ parser.add_argument(
+ "out_table",
+ help="Expected extensions are .html for HTML tables, .md for Markdown tables, .csv for comma-separated values, and .tsv for tab-separated values.",
+ )
+ parser.add_argument(
+ "in_sparql",
+ help="File containing a SPARQL SELECT query. Note that prefixes not mapped with a PREFIX statement will be mapped according to their first occurrence among input graphs.",
+ )
+
+ parser_header_group = parser.add_mutually_exclusive_group(required=False)
+ parser_header_group.add_argument(
+ "--header",
+ action="store_true",
+ help="Print column labels. This is the default behavior.",
+ )
+ parser_header_group.add_argument(
+ "--no-header",
+ action="store_true",
+ help="Do not print column labels.",
+ )
+
+ parser_index_group = parser.add_mutually_exclusive_group(required=False)
+ parser_index_group.add_argument(
+ "--index",
+ action="store_true",
+ help="Print index (auto-incrementing row labels as left untitled column). This is the default behavior.",
+ )
+ parser_index_group.add_argument(
+ "--no-index",
+ action="store_true",
+ help="Do not print index.",
+ )
+
+ parser.add_argument("in_graph", nargs="+")
+ args = parser.parse_args()
+
+ output_mode: str
+ if args.out_table.endswith(".csv"):
+ output_mode = "csv"
+ elif args.out_table.endswith(".html"):
+ output_mode = "html"
+ elif args.out_table.endswith(".json"):
+ output_mode = "json"
+ elif args.out_table.endswith(".md"):
+ output_mode = "md"
+ elif args.out_table.endswith(".tsv"):
+ output_mode = "tsv"
+ else:
+ raise NotImplementedError("Output file extension not implemented.")
+
+ graph = rdflib.Graph()
+ for in_graph_filename in args.in_graph:
+ graph.parse(in_graph_filename)
+
+ select_query_text: typing.Optional[str] = None
+ with open(args.in_sparql, "r") as in_fh:
+ select_query_text = in_fh.read().strip()
+ if select_query_text is None:
+ raise ValueError("Failed to load query.")
+ _logger.debug("select_query_text = %r." % select_query_text)
+
+ # Process --header and --no-header.
+ use_header: bool
+ if args.header is True:
+ use_header = True
+ if args.no_header is True:
+ use_header = False
+ else:
+ use_header = True
+
+ # Process --index and --no-index.
+ use_index: bool
+ if args.index is True:
+ use_index = True
+ if args.no_index is True:
+ use_index = False
+ else:
+ use_index = True
+
+ df = graph_and_query_to_data_frame(
+ graph,
+ select_query_text,
+ built_version=args.built_version,
+ disallow_empty_results=args.disallow_empty_results is True,
+ use_prefixes=args.use_prefixes is True,
+ )
+
+ table_text = data_frame_to_table_text(
+ df,
+ output_mode=output_mode,
+ use_header=use_header,
+ use_index=use_index,
+ )
with open(args.out_table, "w") as out_fh:
out_fh.write(table_text)
if table_text[-1] != "\n":
diff --git a/tests/case_utils/Makefile b/tests/case_utils/Makefile
index e77c927..3c65a40 100644
--- a/tests/case_utils/Makefile
+++ b/tests/case_utils/Makefile
@@ -65,6 +65,7 @@ check: \
&& pytest \
--ignore case_file \
--ignore case_sparql_construct \
+ --ignore case_sparql_select \
--ignore case_validate \
--log-level=DEBUG
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv
new file mode 100644
index 0000000..063e950
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.csv
@@ -0,0 +1,3 @@
+,?name,?mbox
+0,Johnny Lee Outlaw,mailto:jlow@example.com
+1,Peter Goodguy,mailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html
new file mode 100644
index 0000000..bee5944
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.html
@@ -0,0 +1,21 @@
+
+
+
+ |
+ ?name |
+ ?mbox |
+
+
+
+
+ 0 |
+ Johnny Lee Outlaw |
+ mailto:jlow@example.com |
+
+
+ 1 |
+ Peter Goodguy |
+ mailto:peter@example.org |
+
+
+
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md
new file mode 100644
index 0000000..af68b84
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.md
@@ -0,0 +1,4 @@
+| | ?name | ?mbox |
+|----|-------------------|--------------------------|
+| 0 | Johnny Lee Outlaw | mailto:jlow@example.com |
+| 1 | Peter Goodguy | mailto:peter@example.org |
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv
new file mode 100644
index 0000000..a4fdfca
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-with_index.tsv
@@ -0,0 +1,3 @@
+ ?name ?mbox
+0 Johnny Lee Outlaw mailto:jlow@example.com
+1 Peter Goodguy mailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv
new file mode 100644
index 0000000..6bd60fb
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.csv
@@ -0,0 +1,3 @@
+?name,?mbox
+Johnny Lee Outlaw,mailto:jlow@example.com
+Peter Goodguy,mailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html
new file mode 100644
index 0000000..041fd3b
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.html
@@ -0,0 +1,18 @@
+
+
+
+ ?name |
+ ?mbox |
+
+
+
+
+ Johnny Lee Outlaw |
+ mailto:jlow@example.com |
+
+
+ Peter Goodguy |
+ mailto:peter@example.org |
+
+
+
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md
new file mode 100644
index 0000000..3aa8a01
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.md
@@ -0,0 +1,4 @@
+| ?name | ?mbox |
+|-------------------|--------------------------|
+| Johnny Lee Outlaw | mailto:jlow@example.com |
+| Peter Goodguy | mailto:peter@example.org |
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv
new file mode 100644
index 0000000..dd1e81d
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-with_header-without_index.tsv
@@ -0,0 +1,3 @@
+?name ?mbox
+Johnny Lee Outlaw mailto:jlow@example.com
+Peter Goodguy mailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv
new file mode 100644
index 0000000..7933d39
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.csv
@@ -0,0 +1,2 @@
+0,Johnny Lee Outlaw,mailto:jlow@example.com
+1,Peter Goodguy,mailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html
new file mode 100644
index 0000000..b6a842b
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.html
@@ -0,0 +1,14 @@
+
+
+
+ 0 |
+ Johnny Lee Outlaw |
+ mailto:jlow@example.com |
+
+
+ 1 |
+ Peter Goodguy |
+ mailto:peter@example.org |
+
+
+
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md
new file mode 100644
index 0000000..c5ee8c8
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.md
@@ -0,0 +1,3 @@
+|---|-------------------|--------------------------|
+| 0 | Johnny Lee Outlaw | mailto:jlow@example.com |
+| 1 | Peter Goodguy | mailto:peter@example.org |
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv
new file mode 100644
index 0000000..992efe2
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-with_index.tsv
@@ -0,0 +1,2 @@
+0 Johnny Lee Outlaw mailto:jlow@example.com
+1 Peter Goodguy mailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv
new file mode 100644
index 0000000..a4c2c82
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.csv
@@ -0,0 +1,2 @@
+Johnny Lee Outlaw,mailto:jlow@example.com
+Peter Goodguy,mailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html
new file mode 100644
index 0000000..6dbc7c3
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.html
@@ -0,0 +1,12 @@
+
+
+
+ Johnny Lee Outlaw |
+ mailto:jlow@example.com |
+
+
+ Peter Goodguy |
+ mailto:peter@example.org |
+
+
+
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md
new file mode 100644
index 0000000..6ad505c
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.md
@@ -0,0 +1,3 @@
+|-------------------|--------------------------|
+| Johnny Lee Outlaw | mailto:jlow@example.com |
+| Peter Goodguy | mailto:peter@example.org |
diff --git a/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv
new file mode 100644
index 0000000..833da47
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/.check-w3-output-without_header-without_index.tsv
@@ -0,0 +1,2 @@
+Johnny Lee Outlaw mailto:jlow@example.com
+Peter Goodguy mailto:peter@example.org
diff --git a/tests/case_utils/case_sparql_select/Makefile b/tests/case_utils/case_sparql_select/Makefile
index 68f11ec..0523c8a 100644
--- a/tests/case_utils/case_sparql_select/Makefile
+++ b/tests/case_utils/case_sparql_select/Makefile
@@ -50,6 +50,9 @@ check: \
check-w3-tsv \
check-prefixed_results \
check-subclass
+ source $(tests_srcdir)/venv/bin/activate \
+ && pytest \
+ --log-level=DEBUG
check-prefixed_results: \
check-prefixed_results-csv \
diff --git a/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py
new file mode 100644
index 0000000..00d20c3
--- /dev/null
+++ b/tests/case_utils/case_sparql_select/test_data_frame_to_table_text_json.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+# This software was developed at the National Institute of Standards
+# and Technology by employees of the Federal Government in the course
+# of their official duties. Pursuant to title 17 Section 105 of the
+# United States Code this software is not subject to copyright
+# protection and is in the public domain. NIST assumes no
+# responsibility whatsoever for its use by other parties, and makes
+# no guarantees, expressed or implied, about its quality,
+# reliability, or any other characteristic.
+#
+# We would appreciate acknowledgement if the software is used.
+
+import pathlib
+import typing
+
+import pytest
+import rdflib
+
+import case_utils.case_sparql_select
+
+SRCDIR = pathlib.Path(__file__).parent
+
+GRAPH = rdflib.Graph()
+GRAPH.parse(str(SRCDIR / "w3-input-2.ttl"))
+GRAPH.parse(str(SRCDIR / "w3-input-3.json"))
+assert len(GRAPH) > 0
+
+SELECT_QUERY_TEXT: typing.Optional[str] = None
+with (SRCDIR / "w3-input-1.sparql").open("r") as _fh:
+ SELECT_QUERY_TEXT = _fh.read().strip()
+assert SELECT_QUERY_TEXT is not None
+
+DATA_FRAME = case_utils.case_sparql_select.graph_and_query_to_data_frame(
+ GRAPH, SELECT_QUERY_TEXT
+)
+
+
+def make_data_frame_to_json_table_text_parameters() -> typing.Iterator[
+ typing.Tuple[str, bool, bool]
+]:
+ for use_header in [False, True]:
+ for use_index in [False, True]:
+ for output_mode in ["csv", "html", "md", "tsv"]:
+ yield (output_mode, use_header, use_index)
+
+
+@pytest.mark.parametrize(
+ "output_mode, use_header, use_index",
+ make_data_frame_to_json_table_text_parameters(),
+)
+def test_data_frame_to_table_text_json(
+ output_mode: str,
+ use_header: bool,
+ use_index: bool,
+) -> None:
+ table_text = case_utils.case_sparql_select.data_frame_to_table_text(
+ DATA_FRAME,
+ output_mode=output_mode,
+ use_header=use_header,
+ use_index=use_index,
+ )
+
+ output_filename_template = ".check-w3-output-%s_header-%s_index.%s"
+ header_part = "with" if use_header else "without"
+ index_part = "with" if use_index else "without"
+ output_filename = output_filename_template % (
+ header_part,
+ index_part,
+ output_mode,
+ )
+ with (SRCDIR / output_filename).open("w") as out_fh:
+ out_fh.write(table_text)
+ if table_text[-1] != "\n":
+ out_fh.write("\n")