Spaces:

el-camino-de-santiago
/

model-pick

Sleeping

App Files Files Community

model-pick / langchain_pipeline.py

ilj

add gemini

dd0cd45 8 months ago

raw

history blame

2.25 kB

	import os
	from pdfminer import high_level

	from langchain_astradb import AstraDBVectorStore
	from langchain_core.prompts import PromptTemplate
	from langchain_openai import OpenAIEmbeddings
	from langchain_anthropic import ChatAnthropic
	from langchain_google_genai import ChatGoogleGenerativeAI

	ASTRA_DB_API_ENDPOINT = os.environ["ASTRA_DB_API_ENDPOINT"]
	ASTRA_DB_APPLICATION_TOKEN = os.environ["ASTRA_DB_APPLICATION_TOKEN"]
	OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
	ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
	GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]

	collection_name = "ilj_test"

	embedding = OpenAIEmbeddings(model="text-embedding-ada-002")

	models = {
	"claude-3": ChatAnthropic(model='claude-3-sonnet-20240229'),
	"gemini-pro": ChatGoogleGenerativeAI(model="gemini-pro")
	}


	def model_names():
	return models.keys()


	def pipeline(bytes, model_name):
	disclosure_text = high_level.extract_text(bytes)
	# disclosure_text = doc[0].page_content
	#
	astra = AstraDBVectorStore(
	api_endpoint=ASTRA_DB_API_ENDPOINT,
	token=ASTRA_DB_APPLICATION_TOKEN,
	collection_name=collection_name,
	embedding=embedding
	)

	related_docs = astra.search(disclosure_text, search_type="similarity")

	prompt = PromptTemplate.from_template(
	"""
	law context:

	{context}

	end of law context
	=====
	disclosure:

	{disclosure}

	end of disclosure
	===
	Given the context above, how would a good regulatory attorney edit the disclosure above? Ignore the model form a-9 and focus only on laws.
	Please provide only changes to the text in git diff format if they contradict any laws or rules, do not provide enough clarity based on the spirit of the law, or give you pause.
	If there is a company name or general information about the bank, there is no need to correct that sentence unless there is a legal contradiction.
	In your response, provide the git diff and, underneath it, all of the reasons for the changes referencing the law.
	Be as thorough as possible.
	""",
	)
	val = prompt.format(context=related_docs, disclosure={disclosure_text})

	chat_response = models[model_name].invoke(input=val)

	return chat_response.content