|
|
|
import gradio as gr |
|
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch |
|
import pymongo |
|
from pymongo.mongo_client import MongoClient |
|
from llama_index.core.vector_stores.types import VectorStoreQuery |
|
from langchain_nomic.embeddings import NomicEmbeddings |
|
import os |
|
|
|
os.system("ollama pull llama3.2:3b-instruct-fp16") |
|
|
|
from langchain_ollama import ChatOllama |
|
|
|
local_llm = "llama3.2:3b-instruct-fp16" |
|
llm = ChatOllama(model=local_llm, temperature=0) |
|
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json") |
|
|
|
|
|
|
|
embed_model = NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local") |
|
|
|
|
|
|
|
MONGO_URI = os.getenv("MONGO_URI") |
|
os.environ["MONGODB_URI"] = MONGO_URI |
|
DB_NAME = os.getenv("DB_NAME") |
|
COLLECTION_NAME = os.getenv("COLLECTION_NAME") |
|
|
|
mongo_client = MongoClient(MONGO_URI) |
|
collection = mongo_client[DB_NAME][COLLECTION_NAME] |
|
|
|
|
|
vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default") |
|
|
|
|
|
|
|
|
|
|
|
import json |
|
from langchain_core.messages import HumanMessage |
|
|
|
|
|
|
|
|
|
|
|
rag_prompt = """Você é um assistente multilíngue para tarefas de resposta a perguntas. |
|
|
|
Aquí está o contexto a ser usado para responder à pergunta: |
|
|
|
{context} |
|
|
|
Pense cuidadosamente acerca do contexto de acima. |
|
|
|
Agora, revise a pergunta do usuario: |
|
|
|
{question} |
|
|
|
Forneça uma resposta a essas perguntas usando apenas o contexto acima. |
|
|
|
Mantenha sua resposta formal e concisa. |
|
|
|
Resposta:""" |
|
|
|
|
|
def format_docs(nodes): |
|
return "\n\n".join(doc.text for doc in nodes) |
|
|
|
|
|
def respond( |
|
message, |
|
history: list[tuple[str, str]], |
|
system_message, |
|
top_k, |
|
): |
|
messages = [{"role": "system", "content": system_message}] |
|
|
|
for val in history: |
|
if val[0]: |
|
messages.append({"role": "user", "content": val[0]}) |
|
if val[1]: |
|
messages.append({"role": "assistant", "content": val[1]}) |
|
|
|
|
|
|
|
query_str = message |
|
question = query_str |
|
|
|
query_embedding = embed_model.embed_query(query_str) |
|
vector_store_query = VectorStoreQuery(query_embedding,similarity_top_k = top_k) |
|
|
|
query_results = vector_store.query(vector_store_query) |
|
docs = query_results.nodes |
|
docs_txt = format_docs(docs) |
|
rag_prompt_formatted = rag_prompt.format(context=docs_txt, question=question) |
|
|
|
generation = llm.invoke([HumanMessage(content=rag_prompt_formatted)]) |
|
|
|
return generation.content |
|
|
|
""" |
|
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface |
|
""" |
|
demo = gr.ChatInterface( |
|
respond, |
|
additional_inputs=[ |
|
gr.Textbox(value="Qual é sua pergunta?", label="System message"), |
|
gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k"), |
|
], |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |