Spaces:
Sleeping
Sleeping
import os | |
from pdfminer import high_level | |
from langchain_astradb import AstraDBVectorStore | |
from langchain_core.prompts import PromptTemplate | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_anthropic import ChatAnthropic | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
ASTRA_DB_API_ENDPOINT = os.environ["ASTRA_DB_API_ENDPOINT"] | |
ASTRA_DB_APPLICATION_TOKEN = os.environ["ASTRA_DB_APPLICATION_TOKEN"] | |
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] | |
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"] | |
GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] | |
collection_name = "ilj_test" | |
embedding = OpenAIEmbeddings(model="text-embedding-ada-002") | |
models = { | |
"claude-3": ChatAnthropic(model='claude-3-sonnet-20240229'), | |
"gemini-pro": ChatGoogleGenerativeAI(model="gemini-pro") | |
} | |
def model_names(): | |
return models.keys() | |
def pipeline(bytes, model_name): | |
disclosure_text = high_level.extract_text(bytes) | |
# disclosure_text = doc[0].page_content | |
# | |
astra = AstraDBVectorStore( | |
api_endpoint=ASTRA_DB_API_ENDPOINT, | |
token=ASTRA_DB_APPLICATION_TOKEN, | |
collection_name=collection_name, | |
embedding=embedding | |
) | |
related_docs = astra.search(disclosure_text, search_type="similarity") | |
prompt = PromptTemplate.from_template( | |
""" | |
law context: | |
{context} | |
end of law context | |
===== | |
disclosure: | |
{disclosure} | |
end of disclosure | |
=== | |
Given the context above, how would a good regulatory attorney edit the disclosure above? Ignore the model form a-9 and focus only on laws. | |
Please provide only changes to the text in git diff format if they contradict any laws or rules, do not provide enough clarity based on the spirit of the law, or give you pause. | |
If there is a company name or general information about the bank, there is no need to correct that sentence unless there is a legal contradiction. | |
In your response, provide the git diff and, underneath it, all of the reasons for the changes referencing the law. | |
Be as thorough as possible. | |
""", | |
) | |
val = prompt.format(context=related_docs, disclosure={disclosure_text}) | |
chat_response = models[model_name].invoke(input=val) | |
return chat_response.content | |