robertselvam commited on
Commit
37bde1e
·
verified ·
1 Parent(s): e95ed68

Create dspy_qa.py

Browse files
Files changed (1) hide show
  1. dspy_qa.py +78 -0
dspy_qa.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import dspy
4
+ from dsp.utils import deduplicate
5
+ from dspy.retrieve.faiss_rm import FaissRM
6
+ from langchain_community.document_loaders import PyPDFLoader
7
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+
9
+ # os.environ["AZURE_OPENAI_API_KEY"] = ""
10
+
11
+ class GenerateSearchQuery(dspy.Signature):
12
+ """Write a simple search query that will help answer a complex question."""
13
+
14
+
15
+ context = dspy.InputField(desc="may contain relevant content")
16
+ question = dspy.InputField()
17
+ query = dspy.OutputField()
18
+
19
+ class GenerateAnswer(dspy.Signature):
20
+ """give me a answer for user question based on context"""
21
+
22
+
23
+ context = dspy.InputField(desc="may contain relevant content")
24
+ question = dspy.InputField()
25
+ answer = dspy.OutputField()
26
+
27
+
28
+
29
+ class DocQA(dspy.Module):
30
+ def __init__(self, file_path,passages_per_hop=3, max_hops=2):
31
+ super().__init__()
32
+ self.cache = "cache.json"
33
+ self.llm = dspy.AzureOpenAI(api_base="https://azureadople.openai.azure.com/",
34
+ api_version="2023-09-15-preview",
35
+ model="GPT-3")
36
+
37
+ self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
38
+ self.retrieve = dspy.Retrieve(k=passages_per_hop)
39
+ self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
40
+ self.max_hops = max_hops
41
+
42
+ self.knowledge_base = self.create_knowledge_base(file_path)
43
+
44
+ def load_documents(self, file_path):
45
+ print("file_path", file_path)
46
+ loader = PyPDFLoader(file_path)
47
+ documents = loader.load()
48
+ return documents
49
+
50
+ def split_documents(self, documents):
51
+ text_splitter = RecursiveCharacterTextSplitter(
52
+ chunk_size=6000,
53
+ chunk_overlap=0,
54
+ length_function=len,
55
+ is_separator_regex=False,
56
+ )
57
+
58
+ docs = text_splitter.split_documents(documents)
59
+ document_chunks = [page_content.page_content for page_content in docs]
60
+ print("input context Ready")
61
+ return document_chunks
62
+
63
+ def create_knowledge_base(self, file_path):
64
+ print("file_path", file_path)
65
+ document = self.load_documents(file_path)
66
+ split_documents = self.split_documents(document)
67
+ knowledge_base = FaissRM(split_documents)
68
+ return knowledge_base
69
+
70
+ def run(self,question):
71
+ dspy.settings.configure(lm=self.llm, rm=self.knowledge_base)
72
+
73
+
74
+ passages = self.retrieve(question).passages
75
+ context = deduplicate(passages)
76
+
77
+ pred = self.generate_answer(context=context, question=question)
78
+ return pred.answer