Spaces:

carbonnnnn
/

ChatLiterature

Sleeping

App Files Files Community

carbonnnnn commited on Aug 4, 2023

Commit

4823e70

1 Parent(s): 8832a9e

Upload 12 files

Browse files

Files changed (12) hide show

hay/__pycache__/model.cpython-310.pyc +0 -0
hay/__pycache__/pipeline.cpython-310.pyc +0 -0
hay/__pycache__/retriever.cpython-310.pyc +0 -0
hay/model.py +19 -6
hay/pipeline.py +45 -5
hay/retriever.py +9 -20
outputs/docs-data/data-00000-of-00001.arrow +3 -0
outputs/docs-data/dataset_info.json +60 -0
outputs/docs-data/state.json +13 -0
outputs/docs-data2/data-00000-of-00001.arrow +3 -0
outputs/docs-data2/dataset_info.json +60 -0
outputs/docs-data2/state.json +13 -0

hay/__pycache__/model.cpython-310.pyc CHANGED Viewed

Binary files a/hay/__pycache__/model.cpython-310.pyc and b/hay/__pycache__/model.cpython-310.pyc differ

hay/__pycache__/pipeline.cpython-310.pyc CHANGED Viewed

Binary files a/hay/__pycache__/pipeline.cpython-310.pyc and b/hay/__pycache__/pipeline.cpython-310.pyc differ

hay/__pycache__/retriever.cpython-310.pyc CHANGED Viewed

Binary files a/hay/__pycache__/retriever.cpython-310.pyc and b/hay/__pycache__/retriever.cpython-310.pyc differ

hay/model.py CHANGED Viewed

@@ -1,22 +1,35 @@
 from haystack.nodes import PromptNode, PromptTemplate
 from haystack.nodes import AnswerParser
 from haystack.nodes import TransformersSummarizer
-from haystack import Document
 def prompting_model():
     '''
     Define a prompt node in haystack pipeline
     '''
-    prompt_node = PromptNode(model_name_or_path="facebook/galactica-125m", default_prompt_template="deepset/question-answering-per-document")
-    # prompt_node = PromptNode(model_name_or_path="facebook/opt-350m", default_prompt_template=lfqa_prompt)
     return prompt_node
 def summarize():

 from haystack.nodes import PromptNode, PromptTemplate
 from haystack.nodes import AnswerParser
 from haystack.nodes import TransformersSummarizer
 def prompting_model():
     '''
     Define a prompt node in haystack pipeline
     '''
+    # prompt_node = PromptNode(model_name_or_path="facebook/galactica-125m", default_prompt_template="deepset/question-answering-per-document")
+    prompt_node = PromptNode(model_name_or_path="facebook/galactica-125m")
     return prompt_node
+def prompting_model_2():
+    '''
+    Define a prompt node in haystack pipeline, with detailed prompt
+    '''
+    custom_prompt = PromptTemplate(prompt = """ You are a helpful and knowledgeable agent. To achieve your goal of answering complex questions,
+                                                 you have access to the following paragraph :
+                                                {join(documents)}
+                                                Your output should be a detailed summary of the paragraph
+                                                 """)
+    summarization_template = PromptTemplate("deepset/summarization")
+    prompt_node = PromptNode(model_name_or_path="facebook/galactica-125m", default_prompt_template=custom_prompt)
+    return prompt_node
 def summarize():

hay/pipeline.py CHANGED Viewed

@@ -1,14 +1,18 @@
 from hay.model import prompting_model, summarize
 from haystack.pipelines import Pipeline, SearchSummarizationPipeline
 from hay.retriever import retriever1
-def rg_pipeline(question):
     '''
     Defines a pipeline of retriever and generator and generates output for the given question
     '''
     prompt_node = prompting_model()
-    retriever = retriever1()
     pipe = Pipeline()
     pipe.add_node(component=retriever, name="retriever", inputs=["Query"])
@@ -23,17 +27,17 @@ def rg_pipeline(question):
     return None
-def rs_pipeline(question):
     '''
     Defines a pipeline of retriever and summarizer and generates output for the given question
     '''
-    retriever = retriever1()
     summarizer = summarize()
     # Get top 10 results from the retriever and summarize them
     pipeline = SearchSummarizationPipeline(summarizer=summarizer, retriever=retriever)
-    result = pipeline.run(query=question, params={"Retriever": {"top_k": 5}})
     output = ''
     for i in range(len(result['documents'])):
@@ -43,6 +47,42 @@ def rs_pipeline(question):
     return output

 from hay.model import prompting_model, summarize
+from hay.model import prompting_model_2
 from haystack.pipelines import Pipeline, SearchSummarizationPipeline
+from haystack.agents.memory import ConversationSummaryMemory
+# from haystack.agents.conversational import
+from haystack import Document
 from hay.retriever import retriever1
+def rg_pipeline(question, d):
     '''
     Defines a pipeline of retriever and generator and generates output for the given question
     '''
     prompt_node = prompting_model()
+    retriever = retriever1(d)
     pipe = Pipeline()
     pipe.add_node(component=retriever, name="retriever", inputs=["Query"])
     return None
+def rs_pipeline(question, d):
     '''
     Defines a pipeline of retriever and summarizer and generates output for the given question
     '''
+    retriever = retriever1(d)
     summarizer = summarize()
     # Get top 10 results from the retriever and summarize them
     pipeline = SearchSummarizationPipeline(summarizer=summarizer, retriever=retriever)
+    result = pipeline.run(query=question, params={"Retriever": {"top_k": 2}})
     output = ''
     for i in range(len(result['documents'])):
     return output
+# Try this later
+def conv_agent(question="How to reduce carbon emissions?"):
+    # '''
+    # Defines a pipeline using the conversational agent class
+    # '''
+    # prompt_node = prompting_model()
+    # summary_memory = ConversationSummaryMemory(prompt_node=prompt_node)
+    # conversational_agent = ConversationalAgent(prompt_node=prompt_node, memory=summary_memory)
+    output = None
+    return output
+def rsg_pipeline(question, d):
+    '''
+    Defines a pipeline using the summarization pipeline with an additional prompt node
+    '''
+    # retriever = retriever1(d)
+    # summarizer = summarize()
+    # pipeline = SearchSummarizationPipeline(summarizer=summarizer, retriever=retriever)
+    # result = pipeline.run(query=question, params={"Retriever": {"top_k": 5}})
+    # output = ''
+    # for i in range(len(result['documents'])):
+    #     output += result['documents'][i].meta['summary']
+    output = "In recent years, a number of papers have examined the impact of incentives on firms to reduce carbon emissions.The European Union's (EU) Emissions Trading Scheme (ETS) aims to reduce greenhouse gas (GHG) emissions by trading carbon dioxide emissions from major emitters.The aim of this article is to provide a reference for managers to improve the attractiveness of their stores to consumers and for the gov- ernment to design carbon policy.In this paper, we discuss how the number of retail stores in a market affects the carbon emissions in the supply chain, and present our mathematical models to illustrate how retail store density can affect the carbon emissions in the supply chain, accounting for consumers’ emissions and transportation cost.KeyTakeaways:"
+    node = prompting_model_2()
+    pipe = Pipeline()
+    pipe.add_node(component=node, name="prompt_node", inputs = ["Query"])
+    f_output = pipe.run(query=question, documents=[Document(output)])
+    # op = [a.answer for a in f_output["answers"]]
+    return f_output

hay/retriever.py CHANGED Viewed

@@ -18,13 +18,14 @@ from haystack.nodes import TfidfRetriever
 import warnings
 warnings.filterwarnings('ignore')
-def generate_docs(overlap, length):
     '''
     Takes in split length and split overlap
     Saves the docs in a pandas dataframe
     '''
-    all_docs = convert_files_to_docs(dir_path='data')
     preprocessor = PreProcessor(
         clean_empty_lines=True,
@@ -42,35 +43,23 @@ def generate_docs(overlap, length):
     df = pd.DataFrame(docs)
     dataset = Dataset(pa.Table.from_pandas(df))
-    dataset.save_to_disk('outputs/docs-dataset')
     return None
-def retriever1():
     '''
     Use BM25 Retriever to retrieve data
     '''
-    dataset = load_from_disk('outputs/docs-dataset')
     # BM25Retriever with InMemoryDocumentStore
     document_store = InMemoryDocumentStore(use_bm25=True)
     document_store.write_documents(dataset)
-    retriever = BM25Retriever(document_store=document_store, top_k=5)
     return retriever
-# def retriever2():
-#     document_store = FAISSDocumentStore(similarity="dot_product")
-#     retriever = DensePassageRetriever(
-#         document_store=document_store,
-#         query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
-#         passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base"
-#     )
-#     document_store.update_embeddings(retriever)
-#     return retriever
-# generate_docs(20, 250)
-# ret = retriever2()

 import warnings
 warnings.filterwarnings('ignore')
+def generate_docs(overlap, length, d='data'):
     '''
     Takes in split length and split overlap
     Saves the docs in a pandas dataframe
     '''
+    all_docs = convert_files_to_docs(dir_path=d)
     preprocessor = PreProcessor(
         clean_empty_lines=True,
     df = pd.DataFrame(docs)
     dataset = Dataset(pa.Table.from_pandas(df))
+    # dataset.save_to_disk('outputs/docs-dataset')
+    dataset.save_to_disk('outputs/docs-'+d)
     return None
+def retriever1(d):
     '''
     Use BM25 Retriever to retrieve data
     '''
+    # dataset = load_from_disk('outputs/docs-dataset')
+    dataset = load_from_disk('outputs/docs-'+d)
     # BM25Retriever with InMemoryDocumentStore
     document_store = InMemoryDocumentStore(use_bm25=True)
     document_store.write_documents(dataset)
+    retriever = BM25Retriever(document_store=document_store, top_k=10)
     return retriever

outputs/docs-data/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d48ece13d37530a44060ad19d05e96a34d048d5de9403947fd50235c9b6a254
+size 3583736

outputs/docs-data/dataset_info.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "id": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "content": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "content_type": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "meta": {
+      "_split_id": {
+        "dtype": "int64",
+        "_type": "Value"
+      },
+      "_split_overlap": [
+        {
+          "doc_id": {
+            "dtype": "string",
+            "_type": "Value"
+          },
+          "range": {
+            "feature": {
+              "dtype": "int64",
+              "_type": "Value"
+            },
+            "_type": "Sequence"
+          }
+        }
+      ],
+      "name": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "id_hash_keys": {
+      "feature": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "score": {
+      "dtype": "null",
+      "_type": "Value"
+    },
+    "embedding": {
+      "dtype": "null",
+      "_type": "Value"
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

outputs/docs-data/state.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "737b5afa18cfd1c6",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

outputs/docs-data2/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0dc7265bdf1990c6ee2e9cfdb4a11583e5d8e4d8149239d0f4b32412f6677beb
+size 6516160

outputs/docs-data2/dataset_info.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "id": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "content": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "content_type": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "meta": {
+      "_split_id": {
+        "dtype": "int64",
+        "_type": "Value"
+      },
+      "_split_overlap": [
+        {
+          "doc_id": {
+            "dtype": "string",
+            "_type": "Value"
+          },
+          "range": {
+            "feature": {
+              "dtype": "int64",
+              "_type": "Value"
+            },
+            "_type": "Sequence"
+          }
+        }
+      ],
+      "name": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "id_hash_keys": {
+      "feature": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "score": {
+      "dtype": "null",
+      "_type": "Value"
+    },
+    "embedding": {
+      "dtype": "null",
+      "_type": "Value"
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

outputs/docs-data2/state.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "87b3b11c03e41dc7",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}