From 711717711297fc1ec3039d6010c0e106ad14064a Mon Sep 17 00:00:00 2001
From: Zdenek Kasner <kasnezde@fel.cvut.cz>
Date: Thu, 28 Apr 2022 15:42:37 +0200
Subject: [PATCH 1/5] Add templates for schema_guided_dstc8 response generation

---
 .../dialogues/templates.yaml                  | 122 ++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 promptsource/templates/schema_guided_dstc8/dialogues/templates.yaml

diff --git a/promptsource/templates/schema_guided_dstc8/dialogues/templates.yaml b/promptsource/templates/schema_guided_dstc8/dialogues/templates.yaml
new file mode 100644
index 000000000..0b1913db1
--- /dev/null
+++ b/promptsource/templates/schema_guided_dstc8/dialogues/templates.yaml
@@ -0,0 +1,122 @@
+dataset: schema_guided_dstc8
+subset: dialogues
+templates:
+  23666a79-c235-4c63-a3c1-b5712c7086b0: !Template
+    answer_choices: null
+    id: 23666a79-c235-4c63-a3c1-b5712c7086b0
+    jinja: '{% set stop = range(2, turns["utterance"]|length, 2) | random() - 1 %}
+
+      {% for utterance in turns["utterance"][:stop] %}
+
+      {{"Chatbot" if turns["speaker"][loop.index] == 0 else "Human"}}: {{utterance}}
+
+      {% endfor %}
+
+      Chatbot:
+
+      |||
+
+      {{turns["utterance"][stop]}}
+
+      '
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: predict_response_random_human_chatbot
+    reference: ''
+  2abe77de-9b9e-49d5-b17f-b9e28352baf3: !Template
+    answer_choices: null
+    id: 2abe77de-9b9e-49d5-b17f-b9e28352baf3
+    jinja: '{% set stop = range(2, turns["utterance"]|length, 2) | random() - 1 %}
+
+      {% for utterance in turns["utterance"][:stop] %}
+
+      {{"System" if turns["speaker"][loop.index] == 0 else "User"}}: {{utterance}}
+
+      {% endfor %}
+
+      System:
+
+      |||
+
+      {{turns["utterance"][stop]}}
+
+      '
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: predict_response_random_user_system
+    reference: ''
+  8a31e809-9bc7-4d95-898d-d5dad73b4d6e: !Template
+    answer_choices: null
+    id: 8a31e809-9bc7-4d95-898d-d5dad73b4d6e
+    jinja: 'How would you continue the following dialogue?
+
+      {% set stop = range(2, turns["utterance"]|length, 2) | random() - 1 %}
+
+      {% for utterance in turns["utterance"][:stop] %}
+
+      "{{utterance}}"
+
+      {% endfor %}
+
+
+      |||
+
+      "{{turns["utterance"][stop]}}"
+
+      '
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: predict_response_random_continue_dialogue
+    reference: ''
+  9af4e56b-ee5f-47e7-9681-16bbdf11ef5a: !Template
+    answer_choices: null
+    id: 9af4e56b-ee5f-47e7-9681-16bbdf11ef5a
+    jinja: "{% set stop = range(2, turns[\"utterance\"]|length, 2) | random() - 1\
+      \ %}\n{% for utterance in turns[\"utterance\"][:stop] %}\n- {{utterance}}\n\
+      {% endfor %}\n- \n|||\n{{turns[\"utterance\"][stop]}}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - ROUGE
+      - BLEU
+      original_task: false
+    name: predict_response_random_simple
+    reference: ''
+  e828a273-00eb-4fa1-9b2a-4331682980ca: !Template
+    answer_choices: null
+    id: e828a273-00eb-4fa1-9b2a-4331682980ca
+    jinja: '{% set stop = range(2, turns["utterance"]|length, 2) | random() - 1 %}
+
+      {% for utterance in turns["utterance"][:stop] %}
+
+      {{"B" if turns["speaker"][loop.index] == 0 else "A"}}: {{utterance}}
+
+      {% endfor %}
+
+      B:
+
+      |||
+
+      {{turns["utterance"][stop]}}
+
+      '
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      metrics:
+      - BLEU
+      - ROUGE
+      original_task: false
+    name: predict_response_random_a_b
+    reference: ''

From 120a78ffb2958f29a4f99f09abfb03084ce84030 Mon Sep 17 00:00:00 2001
From: Zdenek Kasner <kasnezde@fel.cvut.cz>
Date: Wed, 18 May 2022 14:57:23 +0200
Subject: [PATCH 2/5] Remove extra newlines at the end of targets for
 schema_guided_dstc8

---
 .../schema_guided_dstc8/dialogues/templates.yaml | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/promptsource/templates/schema_guided_dstc8/dialogues/templates.yaml b/promptsource/templates/schema_guided_dstc8/dialogues/templates.yaml
index 0b1913db1..0191b4006 100644
--- a/promptsource/templates/schema_guided_dstc8/dialogues/templates.yaml
+++ b/promptsource/templates/schema_guided_dstc8/dialogues/templates.yaml
@@ -16,9 +16,7 @@ templates:
 
       |||
 
-      {{turns["utterance"][stop]}}
-
-      '
+      {{turns["utterance"][stop]}}'
     metadata: !TemplateMetadata
       choices_in_prompt: false
       metrics:
@@ -42,9 +40,7 @@ templates:
 
       |||
 
-      {{turns["utterance"][stop]}}
-
-      '
+      {{turns["utterance"][stop]}}'
     metadata: !TemplateMetadata
       choices_in_prompt: false
       metrics:
@@ -69,9 +65,7 @@ templates:
 
       |||
 
-      "{{turns["utterance"][stop]}}"
-
-      '
+      "{{turns["utterance"][stop]}}"'
     metadata: !TemplateMetadata
       choices_in_prompt: false
       metrics:
@@ -109,9 +103,7 @@ templates:
 
       |||
 
-      {{turns["utterance"][stop]}}
-
-      '
+      {{turns["utterance"][stop]}}'
     metadata: !TemplateMetadata
       choices_in_prompt: false
       metrics:

From d1f16cfa786ee496588de7117abe5ab6c5326f67 Mon Sep 17 00:00:00 2001
From: Victor SANH <victorsanh@gmail.com>
Date: Sun, 22 May 2022 22:36:21 +0100
Subject: [PATCH 3/5] Accelerate `get_infos` by caching the `DataseInfoDict`s
 (#778)

* accelerate `get_infos` by caching the `DataseInfoDict`s

* quality

* consistency
---
 promptsource/__init__.py |  5 ++++-
 promptsource/app.py      | 32 +++++++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/promptsource/__init__.py b/promptsource/__init__.py
index d4dacf2cb..21eaa37ed 100644
--- a/promptsource/__init__.py
+++ b/promptsource/__init__.py
@@ -1 +1,4 @@
-DEFAULT_PROMPTSOURCE_CACHE_HOME = "~/.cache/promptsource"
+from pathlib import Path
+
+
+DEFAULT_PROMPTSOURCE_CACHE_HOME = str(Path("~/.cache/promptsource").expanduser())
diff --git a/promptsource/app.py b/promptsource/app.py
index ed1bc7076..44e712b84 100644
--- a/promptsource/app.py
+++ b/promptsource/app.py
@@ -1,20 +1,23 @@
 import argparse
 import functools
 import multiprocessing
+import os
 import textwrap
+from hashlib import sha256
 from multiprocessing import Manager, Pool
 
 import pandas as pd
 import plotly.express as px
 import streamlit as st
 from datasets import get_dataset_infos
+from datasets.info import DatasetInfosDict
 from pygments import highlight
 from pygments.formatters import HtmlFormatter
 from pygments.lexers import DjangoLexer
-from templates import INCLUDED_USERS
 
+from promptsource import DEFAULT_PROMPTSOURCE_CACHE_HOME
 from promptsource.session import _get_state
-from promptsource.templates import DatasetTemplates, Template, TemplateCollection
+from promptsource.templates import INCLUDED_USERS, DatasetTemplates, Template, TemplateCollection
 from promptsource.utils import (
     get_dataset,
     get_dataset_confs,
@@ -25,6 +28,9 @@
 )
 
 
+DATASET_INFOS_CACHE_DIR = os.path.join(DEFAULT_PROMPTSOURCE_CACHE_HOME, "DATASET_INFOS")
+os.makedirs(DATASET_INFOS_CACHE_DIR, exist_ok=True)
+
 # Python 3.8 switched the default start method from fork to spawn. OS X also has
 # some issues related to fork, eee, e.g., https://github.com/bigscience-workshop/promptsource/issues/572
 # so we make sure we always use spawn for consistency
@@ -38,7 +44,17 @@ def get_infos(all_infos, d_name):
     :param all_infos: multiprocess-safe dictionary
     :param d_name: dataset name
     """
-    all_infos[d_name] = get_dataset_infos(d_name)
+    d_name_bytes = d_name.encode("utf-8")
+    d_name_hash = sha256(d_name_bytes)
+    foldername = os.path.join(DATASET_INFOS_CACHE_DIR, d_name_hash.hexdigest())
+    if os.path.isdir(foldername):
+        infos_dict = DatasetInfosDict.from_directory(foldername)
+    else:
+        infos = get_dataset_infos(d_name)
+        infos_dict = DatasetInfosDict(infos)
+        os.makedirs(foldername)
+        infos_dict.write_to_directory(foldername)
+    all_infos[d_name] = infos_dict
 
 
 # add an argument for read-only
@@ -181,11 +197,13 @@ def show_text(t, width=WIDTH, with_markdown=False):
                 else:
                     subset_infos = infos[subset_name]
 
-                split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()}
+                try:
+                    split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()}
+                except Exception:
+                    # Fixing bug in some community datasets.
+                    # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
+                    split_sizes = {}
             else:
-                # Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json
-                # so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error
-                # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
                 split_sizes = {}
 
             # Collect template counts, original task counts and names

From bcc5abd874f4b4c53e72e345020dd23a5bb32124 Mon Sep 17 00:00:00 2001
From: Stephen Bach <stephenhbach@gmail.com>
Date: Tue, 24 May 2022 09:28:44 -0400
Subject: [PATCH 4/5] Revert changes to app.py.

---
 promptsource/app.py | 34 ++++++++--------------------------
 1 file changed, 8 insertions(+), 26 deletions(-)

diff --git a/promptsource/app.py b/promptsource/app.py
index 4ebc6f9ff..d76d0564f 100644
--- a/promptsource/app.py
+++ b/promptsource/app.py
@@ -1,23 +1,20 @@
 import argparse
 import functools
 import multiprocessing
-import os
 import textwrap
-from hashlib import sha256
 from multiprocessing import Manager, Pool
 
 import pandas as pd
 import plotly.express as px
 import streamlit as st
 from datasets import get_dataset_infos
-from datasets.info import DatasetInfosDict
 from pygments import highlight
 from pygments.formatters import HtmlFormatter
 from pygments.lexers import DjangoLexer
+from templates import INCLUDED_USERS
 
-from promptsource import DEFAULT_PROMPTSOURCE_CACHE_HOME
 from promptsource.session import _get_state
-from promptsource.templates import INCLUDED_USERS, DatasetTemplates, Template, TemplateCollection
+from promptsource.templates import DatasetTemplates, Template, TemplateCollection
 from promptsource.utils import (
     get_dataset,
     get_dataset_confs,
@@ -28,9 +25,6 @@
 )
 
 
-DATASET_INFOS_CACHE_DIR = os.path.join(DEFAULT_PROMPTSOURCE_CACHE_HOME, "DATASET_INFOS")
-os.makedirs(DATASET_INFOS_CACHE_DIR, exist_ok=True)
-
 # Python 3.8 switched the default start method from fork to spawn. OS X also has
 # some issues related to fork, eee, e.g., https://github.com/bigscience-workshop/promptsource/issues/572
 # so we make sure we always use spawn for consistency
@@ -44,17 +38,7 @@ def get_infos(all_infos, d_name):
     :param all_infos: multiprocess-safe dictionary
     :param d_name: dataset name
     """
-    d_name_bytes = d_name.encode("utf-8")
-    d_name_hash = sha256(d_name_bytes)
-    foldername = os.path.join(DATASET_INFOS_CACHE_DIR, d_name_hash.hexdigest())
-    if os.path.isdir(foldername):
-        infos_dict = DatasetInfosDict.from_directory(foldername)
-    else:
-        infos = get_dataset_infos(d_name)
-        infos_dict = DatasetInfosDict(infos)
-        os.makedirs(foldername)
-        infos_dict.write_to_directory(foldername)
-    all_infos[d_name] = infos_dict
+    all_infos[d_name] = get_dataset_infos(d_name)
 
 
 # add an argument for read-only
@@ -197,13 +181,11 @@ def show_text(t, width=WIDTH, with_markdown=False):
                 else:
                     subset_infos = infos[subset_name]
 
-                try:
-                    split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()}
-                except Exception:
-                    # Fixing bug in some community datasets.
-                    # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
-                    split_sizes = {}
+                split_sizes = {k: v.num_examples for k, v in subset_infos.splits.items()}
             else:
+                # Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json
+                # so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error
+                # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
                 split_sizes = {}
 
             # Collect template counts, original task counts and names
@@ -658,4 +640,4 @@ def show_text(t, width=WIDTH, with_markdown=False):
 
 
 if __name__ == "__main__":
-    run_app()
+    run_app()
\ No newline at end of file

From 399ee3b7db4c545354dc9ed48e2ba68704a1e17d Mon Sep 17 00:00:00 2001
From: Stephen Bach <stephenhbach@gmail.com>
Date: Tue, 24 May 2022 09:31:14 -0400
Subject: [PATCH 5/5] Update promptsource/app.py

---
 promptsource/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/promptsource/app.py b/promptsource/app.py
index d76d0564f..cc19e6189 100644
--- a/promptsource/app.py
+++ b/promptsource/app.py
@@ -640,4 +640,4 @@ def show_text(t, width=WIDTH, with_markdown=False):
 
 
 if __name__ == "__main__":
-    run_app()
\ No newline at end of file
+    run_app()