Spaces:
Sleeping
Sleeping
File size: 2,707 Bytes
37bde1e 1c283c8 37bde1e 1c283c8 37bde1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
import time
import dspy
from dsp.utils import deduplicate
from dspy.retrieve.faiss_rm import FaissRM
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import CSVLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
# os.environ["AZURE_OPENAI_API_KEY"] = ""
class GenerateSearchQuery(dspy.Signature):
"""Write a simple search query that will help answer a complex question."""
context = dspy.InputField(desc="may contain relevant content")
question = dspy.InputField()
query = dspy.OutputField()
class GenerateAnswer(dspy.Signature):
"""give me a answer for user question based on context"""
context = dspy.InputField(desc="may contain relevant content")
question = dspy.InputField()
answer = dspy.OutputField()
class DocQA(dspy.Module):
def __init__(self, file_path,passages_per_hop=3, max_hops=2):
super().__init__()
self.cache = "cache.json"
self.llm = dspy.AzureOpenAI(api_base="https://azureadople.openai.azure.com/",
api_version="2023-09-15-preview",
model="GPT-3")
self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
self.retrieve = dspy.Retrieve(k=passages_per_hop)
self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
self.max_hops = max_hops
self.knowledge_base = self.create_knowledge_base(file_path)
def load_documents(self, file_path):
print("file_path", file_path)
loader = CSVLoader(file_path)
documents = loader.load()
return documents
def split_documents(self, documents):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=6000,
chunk_overlap=0,
length_function=len,
is_separator_regex=False,
)
docs = text_splitter.split_documents(documents)
document_chunks = [page_content.page_content for page_content in docs]
print("input context Ready")
return document_chunks
def create_knowledge_base(self, file_path):
print("file_path", file_path)
document = self.load_documents(file_path)
split_documents = self.split_documents(document)
knowledge_base = FaissRM(split_documents)
return knowledge_base
def run(self,question):
dspy.settings.configure(lm=self.llm, rm=self.knowledge_base)
passages = self.retrieve(question).passages
context = deduplicate(passages)
pred = self.generate_answer(context=context, question=question)
return pred.answer
|