model-pick / langchain_pipeline.py
ilj's picture
add gemini
dd0cd45
raw
history blame
2.25 kB
import os
from pdfminer import high_level
from langchain_astradb import AstraDBVectorStore
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_anthropic import ChatAnthropic
from langchain_google_genai import ChatGoogleGenerativeAI
ASTRA_DB_API_ENDPOINT = os.environ["ASTRA_DB_API_ENDPOINT"]
ASTRA_DB_APPLICATION_TOKEN = os.environ["ASTRA_DB_APPLICATION_TOKEN"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
collection_name = "ilj_test"
embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
models = {
"claude-3": ChatAnthropic(model='claude-3-sonnet-20240229'),
"gemini-pro": ChatGoogleGenerativeAI(model="gemini-pro")
}
def model_names():
return models.keys()
def pipeline(bytes, model_name):
disclosure_text = high_level.extract_text(bytes)
# disclosure_text = doc[0].page_content
#
astra = AstraDBVectorStore(
api_endpoint=ASTRA_DB_API_ENDPOINT,
token=ASTRA_DB_APPLICATION_TOKEN,
collection_name=collection_name,
embedding=embedding
)
related_docs = astra.search(disclosure_text, search_type="similarity")
prompt = PromptTemplate.from_template(
"""
law context:
{context}
end of law context
=====
disclosure:
{disclosure}
end of disclosure
===
Given the context above, how would a good regulatory attorney edit the disclosure above? Ignore the model form a-9 and focus only on laws.
Please provide only changes to the text in git diff format if they contradict any laws or rules, do not provide enough clarity based on the spirit of the law, or give you pause.
If there is a company name or general information about the bank, there is no need to correct that sentence unless there is a legal contradiction.
In your response, provide the git diff and, underneath it, all of the reasons for the changes referencing the law.
Be as thorough as possible.
""",
)
val = prompt.format(context=related_docs, disclosure={disclosure_text})
chat_response = models[model_name].invoke(input=val)
return chat_response.content