Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,64 +1,101 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
):
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
respond,
|
48 |
-
additional_inputs=[
|
49 |
-
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
50 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
51 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
52 |
-
gr.Slider(
|
53 |
-
minimum=0.1,
|
54 |
-
maximum=1.0,
|
55 |
-
value=0.95,
|
56 |
-
step=0.05,
|
57 |
-
label="Top-p (nucleus sampling)",
|
58 |
-
),
|
59 |
-
],
|
60 |
)
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
|
64 |
-
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
os.environ["GROQ_API_KEY"] = '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'
|
4 |
+
from langchain.text_splitter import CharacterTextSplitter
|
5 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
6 |
+
from langchain_community.vectorstores import FAISS
|
7 |
+
from langchain.memory import ConversationBufferMemory
|
8 |
+
from langchain.prompts import PromptTemplate
|
9 |
+
from langchain.chains import RetrievalQA
|
10 |
+
from langchain_community.document_loaders import PyPDFLoader
|
11 |
+
from langchain_groq import ChatGroq
|
12 |
+
from langchain_groq.chat_models import ChatGroq
|
13 |
+
|
14 |
+
pdf_folder_path = "acpc_data"
|
15 |
+
documents = []
|
16 |
+
for file in os.listdir(pdf_folder_path):
|
17 |
+
if file.endswith('.pdf'):
|
18 |
+
pdf_path = os.path.join(pdf_folder_path, file)
|
19 |
+
loader = PyPDFLoader(pdf_path)
|
20 |
+
documents.extend(loader.load())
|
21 |
+
|
22 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'},
|
23 |
+
encode_kwargs={'normalize_embeddings': True})
|
24 |
+
|
25 |
+
text_splitter = CharacterTextSplitter(
|
26 |
+
separator="\n",
|
27 |
+
chunk_size=1200,
|
28 |
+
chunk_overlap=500,
|
29 |
+
length_function=len)
|
30 |
+
text_chunks = text_splitter.split_documents(documents)
|
31 |
+
|
32 |
+
db1 = FAISS.from_documents(text_chunks, embeddings)
|
33 |
+
|
34 |
+
retriever1 = db1.as_retriever(search_type="similarity", search_kwargs={"k": 1})
|
35 |
+
|
36 |
+
memory = ConversationBufferMemory(memory_key="history", input_key="question")
|
37 |
+
|
38 |
+
llm = ChatGroq(
|
39 |
+
# model="mixtral-8x7b-32768",
|
40 |
+
# model ='llama3-8b-8192',
|
41 |
+
model="llama-3.1-8b-instant",
|
42 |
+
temperature=0.655,
|
43 |
+
max_tokens=None,
|
44 |
+
timeout=None,
|
45 |
+
max_retries=2,
|
46 |
+
# other params...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
)
|
48 |
|
49 |
+
template = """You are a smart and helpful assistant for the ACPC counseling process. You guide students and solve their queries related to ACPC, MYSY scholarship, admission, etc. You will be given the student's query and the history of the chat, and you need to answer the query to the best of your knowledge. If the query is completely different from the context then tell the student that you are not made to answer this query in a polite language. If the student has included any type of content related to violence, sex, drugs or used abusive language then tell the student that you can not answer that query and request them not to use such content.
|
50 |
+
|
51 |
+
Also make sure to reply in the same language as used by the student in the current query.
|
52 |
+
|
53 |
+
NOTE that your answer should be accurate. Explain the answer such that a student with no idea about the ACPC can understand well.
|
54 |
+
|
55 |
+
For example,
|
56 |
+
|
57 |
+
Example 1
|
58 |
+
|
59 |
+
Chat history:
|
60 |
+
The student named Priya says hello.
|
61 |
+
|
62 |
+
Question:
|
63 |
+
What is the maximum size of passport size photo allowed?
|
64 |
+
|
65 |
+
Answer:
|
66 |
+
The maximum size of passport size photo allowed is 200 KB.
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
{context}
|
71 |
+
|
72 |
+
------
|
73 |
+
Chat history :
|
74 |
+
{history}
|
75 |
+
|
76 |
+
------
|
77 |
+
|
78 |
+
|
79 |
+
Question: {question}
|
80 |
+
Answer:
|
81 |
+
"""
|
82 |
+
|
83 |
+
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["history", "context", "question"], template=template, )
|
84 |
+
qa_chain = RetrievalQA.from_chain_type(llm,
|
85 |
+
retriever=db1.as_retriever(),
|
86 |
+
chain_type='stuff',
|
87 |
+
verbose=True,
|
88 |
+
chain_type_kwargs={"verbose": True, "prompt": QA_CHAIN_PROMPT,
|
89 |
+
"memory": ConversationBufferMemory(memory_key="history",
|
90 |
+
input_key="question"), })
|
91 |
+
|
92 |
+
print("Hi! How can I help you today?")
|
93 |
+
while True:
|
94 |
+
question = input("User: ")
|
95 |
+
if question.lower() == "quit":
|
96 |
+
print("Thank you for chatting. Goodbye!")
|
97 |
+
break
|
98 |
|
99 |
+
result1 = qa_chain({"query": question})
|
100 |
+
print(result1["result"])
|
101 |
+
print("-----------------------------")
|