neuroRAG / app.py
vermen's picture
Update app.py
6f70717 verified
# version for gradio
import gradio as gr
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
import pymongo
from pymongo.mongo_client import MongoClient
from llama_index.core.vector_stores.types import VectorStoreQuery
from langchain_nomic.embeddings import NomicEmbeddings
import os
###### load LLM
os.system("ollama pull llama3.2:3b-instruct-fp16")
# LLM
from langchain_ollama import ChatOllama
local_llm = "llama3.2:3b-instruct-fp16"
llm = ChatOllama(model=local_llm, temperature=0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")
# load embedding model
# sentence transformers
embed_model = NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local")
# Load vector database
MONGO_URI = os.getenv("MONGO_URI")
os.environ["MONGODB_URI"] = MONGO_URI
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
# Connect to your Atlas deployment
mongo_client = MongoClient(MONGO_URI)
collection = mongo_client[DB_NAME][COLLECTION_NAME]
#
vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default")
#
# print(query_results.nodes[0].text)
# COMPONENT
### Router
import json
from langchain_core.messages import HumanMessage
# Prompt
### Generate
# Prompt
rag_prompt = """Você é um assistente multilíngue para tarefas de resposta a perguntas.
Aquí está o contexto a ser usado para responder à pergunta:
{context}
Pense cuidadosamente acerca do contexto de acima.
Agora, revise a pergunta do usuario:
{question}
Forneça uma resposta a essas perguntas usando apenas o contexto acima.
Mantenha sua resposta formal e concisa.
Resposta:"""
# Post-processing
def format_docs(nodes):
return "\n\n".join(doc.text for doc in nodes)
########### FOR CHAT
def respond(
message,
history: list[tuple[str, str]],
system_message,
top_k,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
#
# build the query engine
####
query_str = message
question = query_str
#
query_embedding = embed_model.embed_query(query_str)
vector_store_query = VectorStoreQuery(query_embedding,similarity_top_k = top_k)
# Recover index
query_results = vector_store.query(vector_store_query)
docs = query_results.nodes
docs_txt = format_docs(docs)
rag_prompt_formatted = rag_prompt.format(context=docs_txt, question=question)
#print(rag_prompt_formatted)
generation = llm.invoke([HumanMessage(content=rag_prompt_formatted)])
#
return generation.content
#
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="Qual é sua pergunta?", label="System message"),
gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k"),
],
)
if __name__ == "__main__":
demo.launch()