# version for gradio import gradio as gr from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch import pymongo from pymongo.mongo_client import MongoClient from llama_index.core.vector_stores.types import VectorStoreQuery from langchain_nomic.embeddings import NomicEmbeddings import os ###### load LLM os.system("ollama pull llama3.2:3b-instruct-fp16") # LLM from langchain_ollama import ChatOllama local_llm = "llama3.2:3b-instruct-fp16" llm = ChatOllama(model=local_llm, temperature=0) llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json") # load embedding model # sentence transformers embed_model = NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local") # Load vector database MONGO_URI = os.getenv("MONGO_URI") os.environ["MONGODB_URI"] = MONGO_URI DB_NAME = os.getenv("DB_NAME") COLLECTION_NAME = os.getenv("COLLECTION_NAME") # Connect to your Atlas deployment mongo_client = MongoClient(MONGO_URI) collection = mongo_client[DB_NAME][COLLECTION_NAME] # vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default") # # print(query_results.nodes[0].text) # COMPONENT ### Router import json from langchain_core.messages import HumanMessage # Prompt ### Generate # Prompt rag_prompt = """Você é um assistente multilíngue para tarefas de resposta a perguntas. Aquí está o contexto a ser usado para responder à pergunta: {context} Pense cuidadosamente acerca do contexto de acima. Agora, revise a pergunta do usuario: {question} Forneça uma resposta a essas perguntas usando apenas o contexto acima. Mantenha sua resposta formal e concisa. Resposta:""" # Post-processing def format_docs(nodes): return "\n\n".join(doc.text for doc in nodes) ########### FOR CHAT def respond( message, history: list[tuple[str, str]], system_message, top_k, ): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) # # build the query engine #### query_str = message question = query_str # query_embedding = embed_model.embed_query(query_str) vector_store_query = VectorStoreQuery(query_embedding,similarity_top_k = top_k) # Recover index query_results = vector_store.query(vector_store_query) docs = query_results.nodes docs_txt = format_docs(docs) rag_prompt_formatted = rag_prompt.format(context=docs_txt, question=question) #print(rag_prompt_formatted) generation = llm.invoke([HumanMessage(content=rag_prompt_formatted)]) # return generation.content # """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="Qual é sua pergunta?", label="System message"), gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k"), ], ) if __name__ == "__main__": demo.launch()