Spaces:

EmbeddedAndrew
/

examin8

Sleeping

App Files Files Community

examin8 / chain.py

EmbeddedAndrew

init

607a79c almost 2 years ago

raw

history blame

4.68 kB

	import json
	import os
	import pathlib
	from typing import Dict, List, Tuple

	import weaviate
	from langchain import OpenAI, PromptTemplate
	from langchain.chains import LLMChain
	from langchain.chains.base import Chain
	from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
	from langchain.chains.conversation.memory import ConversationBufferMemory
	from langchain.chains.question_answering import load_qa_chain
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.prompts import FewShotPromptTemplate, PromptTemplate
	from langchain.prompts.example_selector import \
	SemanticSimilarityExampleSelector
	from langchain.vectorstores import FAISS, Weaviate
	from pydantic import BaseModel


	class CustomChain(Chain, BaseModel):

	vstore: Weaviate
	chain: BaseCombineDocumentsChain
	key_word_extractor: Chain

	@property
	def input_keys(self) -> List[str]:
	return ["question"]

	@property
	def output_keys(self) -> List[str]:
	return ["answer"]

	def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
	question = inputs["question"]
	chat_history_str = _get_chat_history(inputs["chat_history"])
	if chat_history_str:
	new_question = self.key_word_extractor.run(
	question=question, chat_history=chat_history_str
	)
	else:
	new_question = question
	print(new_question)
	docs = self.vstore.similarity_search(new_question, k=4)
	new_inputs = inputs.copy()
	new_inputs["question"] = new_question
	new_inputs["chat_history"] = chat_history_str
	answer, _ = self.chain.combine_docs(docs, **new_inputs)
	return {"answer": answer}


	def get_new_chain1(vectorstore) -> Chain:
	WEAVIATE_URL = os.environ["WEAVIATE_URL"]
	client = weaviate.Client(
	url=WEAVIATE_URL,
	additional_headers={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]},
	)

	_eg_template = """## Example:

	Chat History:
	{chat_history}
	Follow Up Input: {question}
	Standalone question: {answer}"""
	_eg_prompt = PromptTemplate(
	template=_eg_template,
	input_variables=["chat_history", "question", "answer"],
	)

	_prefix = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. You should assume that the question is related to marine biology."""
	_suffix = """## Example:

	Chat History:
	{chat_history}
	Follow Up Input: {question}
	Standalone question:"""
	eg_store = Weaviate(
	client,
	"Rephrase",
	"content",
	attributes=["question", "answer", "chat_history"],
	)
	example_selector = SemanticSimilarityExampleSelector(vectorstore=eg_store, k=4)
	prompt = FewShotPromptTemplate(
	prefix=_prefix,
	suffix=_suffix,
	example_selector=example_selector,
	example_prompt=_eg_prompt,
	input_variables=["question", "chat_history"],
	)
	llm = OpenAI(temperature=0, model_name="text-davinci-003")
	key_word_extractor = LLMChain(llm=llm, prompt=prompt)

	EXAMPLE_PROMPT = PromptTemplate(
	template=">Example:\nContent:\n---------\n{page_content}\n----------\nSource: {source}",
	input_variables=["page_content", "source"],
	)
	template = """You are an AI assistant for Wikipedia information about marine biology.
	You are given the following extracted parts of a long document and a question. Provide a conversational answer with a hyperlink to the wikipedia page.
	You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
	If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
	If the question is not about marine biology, the oceans, or biology, politely inform them that you are tuned to only answer questions about marine biology.
	Question: {question}
	=========
	{context}
	=========
	Answer in Markdown:"""
	PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
	doc_chain = load_qa_chain(
	OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=-1),
	chain_type="stuff",
	prompt=PROMPT,
	document_prompt=EXAMPLE_PROMPT,
	)
	return CustomChain(
	chain=doc_chain, vstore=vectorstore, key_word_extractor=key_word_extractor
	)


	def _get_chat_history(chat_history: List[Tuple[str, str]]):
	buffer = ""
	for human_s, ai_s in chat_history:
	human = f"Human: " + human_s
	ai = f"Assistant: " + ai_s
	buffer += "\n" + "\n".join([human, ai])
	return buffer