File size: 3,180 Bytes
094f2eb 274c4d3 15ed53f 094f2eb 15ed53f 094f2eb 15ed53f 094f2eb 15ed53f 094f2eb 15ed53f 8771cd3 15ed53f 8771cd3 15ed53f 094f2eb 6f70717 094f2eb 18e8699 094f2eb 18e8699 094f2eb 15ed53f 18e8699 15ed53f 18e8699 15ed53f 094f2eb 15ed53f 094f2eb 15ed53f 094f2eb 15ed53f 18e8699 15ed53f 18e8699 15ed53f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# version for gradio
import gradio as gr
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
import pymongo
from pymongo.mongo_client import MongoClient
from llama_index.core.vector_stores.types import VectorStoreQuery
from langchain_nomic.embeddings import NomicEmbeddings
import os
###### load LLM
os.system("ollama pull llama3.2:3b-instruct-fp16")
# LLM
from langchain_ollama import ChatOllama
local_llm = "llama3.2:3b-instruct-fp16"
llm = ChatOllama(model=local_llm, temperature=0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")
# load embedding model
# sentence transformers
embed_model = NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local")
# Load vector database
MONGO_URI = os.getenv("MONGO_URI")
os.environ["MONGODB_URI"] = MONGO_URI
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
# Connect to your Atlas deployment
mongo_client = MongoClient(MONGO_URI)
collection = mongo_client[DB_NAME][COLLECTION_NAME]
#
vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default")
#
# print(query_results.nodes[0].text)
# COMPONENT
### Router
import json
from langchain_core.messages import HumanMessage
# Prompt
### Generate
# Prompt
rag_prompt = """Você é um assistente multilíngue para tarefas de resposta a perguntas.
Aquí está o contexto a ser usado para responder à pergunta:
{context}
Pense cuidadosamente acerca do contexto de acima.
Agora, revise a pergunta do usuario:
{question}
Forneça uma resposta a essas perguntas usando apenas o contexto acima.
Mantenha sua resposta formal e concisa.
Resposta:"""
# Post-processing
def format_docs(nodes):
return "\n\n".join(doc.text for doc in nodes)
########### FOR CHAT
def respond(
message,
history: list[tuple[str, str]],
system_message,
top_k,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
#
# build the query engine
####
query_str = message
question = query_str
#
query_embedding = embed_model.embed_query(query_str)
vector_store_query = VectorStoreQuery(query_embedding,similarity_top_k = top_k)
# Recover index
query_results = vector_store.query(vector_store_query)
docs = query_results.nodes
docs_txt = format_docs(docs)
rag_prompt_formatted = rag_prompt.format(context=docs_txt, question=question)
#print(rag_prompt_formatted)
generation = llm.invoke([HumanMessage(content=rag_prompt_formatted)])
#
return generation.content
#
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="Qual é sua pergunta?", label="System message"),
gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k"),
],
)
if __name__ == "__main__":
demo.launch() |