arjunanand13 commited on
Commit
c5dd85b
1 Parent(s): 736f969

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -0
app.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import cuda, bfloat16
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig, StoppingCriteria, StoppingCriteriaList
4
+ from langchain.llms import HuggingFacePipeline
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ import gradio as gr
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+ import os
10
+
11
+ # Load the Hugging Face token from environment
12
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
13
+
14
+ # Define stopping criteria
15
+ class StopOnTokens(StoppingCriteria):
16
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
17
+ for stop_ids in stop_token_ids:
18
+ if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
19
+ return True
20
+ return False
21
+
22
+ # Load the LLaMA model and tokenizer
23
+ # model_id = 'meta-llama/Meta-Llama-3-8B-Instruct'
24
+ # model_id= "meta-llama/Llama-2-7b-chat-hf"
25
+ model_id="mistralai/Mistral-7B-Instruct-v0.2"
26
+ device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
27
+
28
+ # Set quantization configuration
29
+ bnb_config = BitsAndBytesConfig(
30
+ load_in_4bit=True,
31
+ bnb_4bit_quant_type='nf4',
32
+ bnb_4bit_use_double_quant=True,
33
+ bnb_4bit_compute_dtype=bfloat16
34
+ )
35
+
36
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
37
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", token=HF_TOKEN, quantization_config=bnb_config)
38
+
39
+ # Define stopping criteria
40
+ stop_list = ['\nHuman:', '\n```\n']
41
+ stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
42
+ stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
43
+ stopping_criteria = StoppingCriteriaList([StopOnTokens()])
44
+
45
+ # Create text generation pipeline
46
+ generate_text = pipeline(
47
+ model=model,
48
+ tokenizer=tokenizer,
49
+ return_full_text=True,
50
+ task='text-generation',
51
+ # stopping_criteria=stopping_criteria,
52
+ temperature=0.1,
53
+ max_new_tokens=2048,
54
+ # repetition_penalty=1.1
55
+ )
56
+
57
+ llm = HuggingFacePipeline(pipeline=generate_text)
58
+
59
+ # Load the stored FAISS index
60
+ try:
61
+ vectorstore = FAISS.load_local('faiss_index', HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cuda"}))
62
+ print("Loaded embedding successfully")
63
+ except ImportError as e:
64
+ print("FAISS could not be imported. Make sure FAISS is installed correctly.")
65
+ raise e
66
+
67
+ # Set up the Conversational Retrieval Chain
68
+ chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)
69
+
70
+ chat_history = []
71
+
72
+ def format_prompt(query):
73
+ prompt=f"""
74
+ You are a knowledgeable assistant with access to a comprehensive database.
75
+ I need you to answer my question and provide related information in a specific format.
76
+ I have provided four relatable json chunks , choose the most suitable chunks for answering the query
77
+ Here's what I need:
78
+ A brief, general response to my question based on related json chunks retrieved.
79
+ Include a brief final answer without additional comments, sign-offs, or extra phrases. Be direct and to the point.
80
+
81
+ Here's my question:
82
+ {query}
83
+
84
+ The format I want answer in
85
+ user_query ==> query
86
+ response ==>
87
+ """
88
+ # prompt = f"""
89
+ # You are a knowledgeable assistant with access to a comprehensive database.
90
+ # I need you to answer my question and provide related information in a specific format.
91
+ # Here's what I need:
92
+ # A brief, general response to my question based on related answers retrieved.
93
+ # Include a brief final answer without additional comments, sign-offs, or extra phrases. Be direct and to the point.
94
+
95
+ # A JSON-formatted output containing: ALL SOURCE DOCUMENTS
96
+ # - "question": The ticketName
97
+ # - "answer": The Responses
98
+ # Here's my question:
99
+ # {query}
100
+ # """
101
+
102
+ # - "related_questions": A list of related questions and their answers, each as a dictionary with the keys. Consider all source documents:
103
+ # - "question": The related question.
104
+ # - "answer": The related answer.
105
+
106
+
107
+
108
+ # Example 1:
109
+ # {{
110
+ # "question": "How to use IPU1_0 instead of A15_0 to process NDK in TDA2x-EVM",
111
+ # "answer": "To use IPU1_0 instead of A15_0 to process NDK in TDA2x-EVM, you need to modify the configuration file of the NDK application. Specifically, change the processor reference from 'A15_0' to 'IPU1_0'.",
112
+ # "related_questions": [
113
+ # {{
114
+ # "question": "Can you provide MLBP documentation on TDA2?",
115
+ # "answer": "MLB is documented for DRA devices in the TRM book, chapter 24.12."
116
+ # }},
117
+ # {{
118
+ # "question": "Hi, could you share me the TDA2x documents about Security(SPRUHS7) and Cryptographic(SPRUHS8) addendums?",
119
+ # "answer": "Most of TDA2 documents are on ti.com under the product folder."
120
+ # }},
121
+ # {{
122
+ # "question": "Is any one can provide us a way to access CDDS for nessary docs?",
123
+ # "answer": "Which document are you looking for?"
124
+ # }},
125
+ # {{
126
+ # "question": "What can you tell me about the TDA2 and TDA3 processors? Can they / do they run Linux?",
127
+ # "answer": "We have moved your post to the appropriate forum."
128
+ # }}
129
+ # ]
130
+ # }}
131
+
132
+ # Final Answer: To use IPU1_0 instead of A15_0 to process NDK in TDA2x-EVM, you need to modify the configuration file of the NDK application. Specifically, change the processor reference from 'A15_0' to 'IPU1_0'.
133
+
134
+ # Example 2:
135
+ # {{
136
+ # "question": "Can BQ25896 support I2C interface?",
137
+ # "answer": "Yes, the BQ25896 charger supports the I2C interface for communication.",
138
+ # "related_questions": [
139
+ # {{
140
+ # "question": "What are the main features of BQ25896?",
141
+ # "answer": "The BQ25896 features include high-efficiency, fast charging capability, and a wide input voltage range."
142
+ # }},
143
+ # {{
144
+ # "question": "How to configure the BQ25896 for USB charging?",
145
+ # "answer": "To configure the BQ25896 for USB charging, set the input current limit and the charging current via I2C registers."
146
+ # }}
147
+ # ]
148
+ # }}
149
+
150
+ # Final Answer: Yes, the BQ25896 charger supports the I2C interface for communication.
151
+
152
+ # """
153
+
154
+
155
+ return prompt
156
+
157
+
158
+ def qa_infer(query):
159
+ content = ""
160
+ formatted_prompt = format_prompt(query)
161
+ result = chain({"question": formatted_prompt, "chat_history": chat_history})
162
+ for doc in result['source_documents']:
163
+ content += "-" * 50 + "\n"
164
+ content += doc.page_content + "\n"
165
+ print(content)
166
+ print("#" * 100)
167
+ print(result['answer'])
168
+ # return content , result['answer']
169
+
170
+
171
+ # Save the output to a file
172
+ output_file = "output.txt"
173
+ with open(output_file, "w") as f:
174
+ f.write("Query:\n")
175
+ f.write(query + "\n\n")
176
+ f.write("Answer:\n")
177
+ f.write(result['answer'] + "\n\n")
178
+ f.write("Source Documents:\n")
179
+ f.write(content + "\n")
180
+
181
+ # Return the content and answer along with the download link
182
+ download_link = f'<a href="file/{output_file}" download>Download Output File</a>'
183
+ return content, result['answer'], download_link
184
+
185
+ EXAMPLES = ["How to use IPU1_0 instead of A15_0 to process NDK in TDA2x-EVM",
186
+ "I'm using Code Composer Studio 5.4.0.00091 and enabled FPv4SPD16 floating point support for CortexM4 in TDA2. However, after building the project, the .asm file shows --float_support=vfplib instead of FPv4SPD16. Why is this happening?",
187
+ "Master core in TDA2XX is a15 and in TDA3XX it is m4,so we have to shift all modules that are being used by a15 in TDA2XX to m4 in TDA3xx."]
188
+
189
+ demo = gr.Interface(fn=qa_infer, inputs="text", allow_flagging='never', examples=EXAMPLES, cache_examples=False, outputs=[gr.Textbox(label="RELATED QUERIES"), gr.Textbox(label="SOLUTION"), gr.HTML()])#,outputs="text")
190
+ demo.launch()
191
+