TI_RAG_Demo_L3.1

Sleeping

App Files Files Community

arjunanand13 commited on May 30

Commit

0545ca0

•

1 Parent(s): e21e9b2

Create app.py

Browse files

Files changed (1) hide show

app.py +127 -0

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import torch
+from torch import cuda, bfloat16
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig, StoppingCriteria, StoppingCriteriaList
+from langchain.llms import HuggingFacePipeline
+from langchain.vectorstores import FAISS
+from langchain.chains import ConversationalRetrievalChain
+import gradio as gr
+from langchain.embeddings import HuggingFaceEmbeddings
+import os
+class Chatbot:
+    def __init__(self):
+        self.HF_TOKEN = os.environ.get("HF_TOKEN", None)
+        self.model_id = "mistralai/Mistral-7B-Instruct-v0.2"
+        self.device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
+        self.bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type='nf4',
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_compute_dtype=bfloat16
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=self.HF_TOKEN)
+        self.model = AutoModelForCausalLM.from_pretrained(self.model_id, device_map="auto", token=self.HF_TOKEN, quantization_config=self.bnb_config)
+        self.stop_list = ['\nHuman:', '\n```\n']
+        self.stop_token_ids = [self.tokenizer(x)['input_ids'] for x in self.stop_list]
+        self.stop_token_ids = [torch.LongTensor(x).to(self.device) for x in self.stop_token_ids]
+        self.stopping_criteria = StoppingCriteriaList([self.StopOnTokens()])
+        self.generate_text = pipeline(
+            model=self.model,
+            tokenizer=self.tokenizer,
+            return_full_text=True,
+            task='text-generation',
+            temperature=0.1,
+            max_new_tokens=2048,
+        )
+        self.llm = HuggingFacePipeline(pipeline=self.generate_text)
+        try:
+            self.vectorstore = FAISS.load_local('faiss_index', HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cuda"}))
+            print("Loaded embedding successfully")
+        except ImportError as e:
+            print("FAISS could not be imported. Make sure FAISS is installed correctly.")
+            raise e
+        self.chain = ConversationalRetrievalChain.from_llm(self.llm, self.vectorstore.as_retriever(), return_source_documents=True)
+        self.chat_history = []
+    class StopOnTokens(StoppingCriteria):
+        def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+            for stop_ids in self.stop_token_ids:
+                if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
+                    return True
+            return False
+    def format_prompt(self, query):
+        prompt=f"""
+        You are a knowledgeable assistant with access to a comprehensive database.
+        I need you to answer my question and provide related information in a specific format.
+        I have provided four relatable json files , choose the most suitable chunks for answering the query
+        Here's what I need:
+        Include a final answer without additional comments, sign-offs, or extra phrases. Be direct and to the point.
+        Here's my question:
+        {query}
+        Solution==>
+        Example1
+        Query: "How to use IPU1_0 instead of A15_0 to process NDK in TDA2x-EVM",
+        Solution: "To use IPU1_0 instead of A15_0 to process NDK in TDA2x-EVM, you need to modify the configuration file of the NDK application. Specifically, change the processor reference from 'A15_0' to 'IPU1_0'.",
+        Example2
+        Query: "Can BQ25896 support I2C interface?",
+        Solution: "Yes, the BQ25896 charger supports the I2C interface for communication.",
+        """
+        return prompt
+    def qa_infer(self, query):
+        content = ""
+        formatted_prompt = self.format_prompt(query)
+        result = self.chain({"question": formatted_prompt, "chat_history": self.chat_history})
+        for doc in result['source_documents']:
+            content += "-" * 50 + "\n"
+            content += doc.page_content + "\n"
+        print(content)
+        print("#" * 100)
+        print(result['answer'])
+        output_file = "output.txt"
+        with open(output_file, "w") as f:
+            f.write("Query:\n")
+            f.write(query + "\n\n")
+            f.write("Answer:\n")
+            f.write(result['answer'] + "\n\n")
+            f.write("Source Documents:\n")
+            f.write(content + "\n")
+        download_link = f'<a href="file/{output_file}" download>Download Output File</a>'
+        return result['answer'], content, download_link
+    def launch_interface(self):
+        css_code = """
+            .gradio-container {
+                background-color: #daccdb;
+            }
+            /* Button styling for all buttons */
+            button {
+                background-color: #927fc7; /* Default color for all other buttons */
+                color: black;
+                border: 1px solid black;
+                padding: 10px;
+                margin-right: 10px;
+                font-size: 16px; /* Increase font size */
+                font-weight: bold; /* Make text bold */
+            }
+            """
+        EXAMPLES = ["TDA4 product planning and datasheet release progress? ",
+                    "I'm using Code Composer Studio 5.4.0.00091 and enabled FPv4SPD16 floating point support for CortexM4 in TDA2. However, after building the project, the .asm file shows --float_support=vfplib instead of FPv4SPD16. Why is this happening?",
+                    "Master core in TDA2XX is a15 and in TDA3XX it is m4,so we have to shift all modules that are being used by a15 in TDA2XX to m4 in TDA3xx."]
+        demo = gr.Interface(fn=self.qa_infer, inputs=[gr.Textbox(label="QUERY", placeholder ="Enter your query here")], allow_flagging='never', examples=EXAMPLES, cache_examples=False, outputs=[gr.Textbox(label="SOLUTION"), gr.Textbox(label="RELATED QUERIES"), gr.HTML()], css=css_code)
+        demo.launch()
+# Instantiate and launch the chatbot
+chatbot = Chatbot()
+chatbot.launch_interface()