File size: 2,166 Bytes
7979fcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import sys
import time

import boto3

from langchain_aws import BedrockLLM
from langchain.embeddings import BedrockEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import gradio as gr

module_path = ".."
sys.path.append(os.path.abspath(module_path))
bedrock_client = boto3.client('bedrock-runtime',region_name=os.environ.get("AWS_DEFAULT_REGION", "us-west-2"))

modelId = 'meta.llama3-1-70b-instruct-v1:0'

llm = BedrockLLM(
    model_id=modelId,
    client=bedrock_client
)

br_embeddings = BedrockEmbeddings(model_id="cohere.embed-multilingual-v3", client=bedrock_client)

db = FAISS.load_local('faiss_index', embeddings=br_embeddings, allow_dangerous_deserialization=True)
retriever = db.as_retriever(k=5)



prompt = ChatPromptTemplate.from_messages([
    ('system',
     "Answer the questions witht the provided context. Do not include based on the context or based on the documents in your answer."
     "Please say you do not know if you do not know or cannot find the information needed."
     "\n Question: {question} \nContext: {context}"),
    ('user', "{question}")
])


chat_history = []

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

response = rag_chain.invoke("Who are the board of directors in KCE company?")


def chat_gen(message, history):
    response = rag_chain.invoke(message)
    partial_message = ""
    for token in response:
        partial_message = partial_message + token
        time.sleep(0.05)
        yield partial_message


initial_msg = "Hello! I am KCE assistant. You can ask me anything about KCE. I am happy to assist you."
chatbot = gr.Chatbot(value = [[None, initial_msg]])
demo = gr.ChatInterface(chat_gen, chatbot=chatbot).queue()

try:
  demo.launch(debug=True, share=False, show_api=False)
  demo.close()
except Exception as e:
  demo.close()
  print(e)
  raise e