File size: 3,180 Bytes
094f2eb
274c4d3
15ed53f
 
 
094f2eb
 
15ed53f
 
094f2eb
 
 
15ed53f
094f2eb
 
 
15ed53f
094f2eb
 
 
15ed53f
 
 
8771cd3
15ed53f
8771cd3
 
15ed53f
 
 
 
 
 
094f2eb
 
 
 
 
 
6f70717
094f2eb
 
 
18e8699
094f2eb
 
18e8699
094f2eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15ed53f
 
18e8699
 
 
 
15ed53f
18e8699
 
 
 
 
 
 
 
15ed53f
 
094f2eb
15ed53f
094f2eb
 
 
 
 
 
 
 
 
 
 
15ed53f
094f2eb
15ed53f
18e8699
 
 
 
 
 
15ed53f
 
18e8699
 
 
 
 
15ed53f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# version for gradio
import gradio as gr
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
import pymongo
from pymongo.mongo_client import MongoClient
from llama_index.core.vector_stores.types import VectorStoreQuery
from langchain_nomic.embeddings import NomicEmbeddings
import os
###### load LLM
os.system("ollama pull llama3.2:3b-instruct-fp16")
# LLM
from langchain_ollama import ChatOllama

local_llm = "llama3.2:3b-instruct-fp16"
llm = ChatOllama(model=local_llm, temperature=0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")

# load embedding model
# sentence transformers
embed_model = NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local")

# Load vector database

MONGO_URI = os.getenv("MONGO_URI")
os.environ["MONGODB_URI"] = MONGO_URI
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
# Connect to your Atlas deployment
mongo_client = MongoClient(MONGO_URI)
collection = mongo_client[DB_NAME][COLLECTION_NAME]
#

vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default")
#
# print(query_results.nodes[0].text)

# COMPONENT
### Router
import json
from langchain_core.messages import HumanMessage

# Prompt
### Generate

# Prompt
rag_prompt = """Você é um assistente multilíngue para tarefas de resposta a perguntas.

Aquí está o contexto a ser usado para responder à pergunta:

{context} 

Pense cuidadosamente acerca do contexto de acima.

Agora, revise a pergunta do usuario:

{question}

Forneça uma resposta a essas perguntas usando apenas o contexto acima. 

Mantenha sua resposta formal e concisa.

Resposta:"""

# Post-processing
def format_docs(nodes):
    return "\n\n".join(doc.text for doc in nodes)

########### FOR CHAT
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    top_k,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    #
    # build the query engine
    ####
    query_str = message
    question = query_str
    #
    query_embedding = embed_model.embed_query(query_str)
    vector_store_query = VectorStoreQuery(query_embedding,similarity_top_k = top_k)
    # Recover index
    query_results = vector_store.query(vector_store_query)
    docs = query_results.nodes
    docs_txt = format_docs(docs)
    rag_prompt_formatted = rag_prompt.format(context=docs_txt, question=question)
    #print(rag_prompt_formatted)
    generation = llm.invoke([HumanMessage(content=rag_prompt_formatted)])
    #
    return generation.content
#
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="Qual é sua pergunta?", label="System message"),
        gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k"),
    ],
)


if __name__ == "__main__":
    demo.launch()