Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,70 +11,52 @@ from langchain.prompts import PromptTemplate
|
|
11 |
from dotenv import load_dotenv
|
12 |
from st_audiorec import st_audiorec
|
13 |
import whisper
|
|
|
14 |
|
15 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
16 |
|
17 |
-
|
18 |
def get_pdf_text(pdf_docs):
|
19 |
-
text=""
|
20 |
for pdf in pdf_docs:
|
21 |
-
pdf_reader= PdfReader(pdf)
|
22 |
for page in pdf_reader.pages:
|
23 |
-
text+= page.extract_text()
|
24 |
-
return
|
25 |
-
|
26 |
-
|
27 |
|
28 |
def get_text_chunks(text):
|
29 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
|
30 |
chunks = text_splitter.split_text(text)
|
31 |
return chunks
|
32 |
|
33 |
-
|
34 |
def get_vector_store(text_chunks):
|
35 |
-
embeddings = GoogleGenerativeAIEmbeddings(model
|
36 |
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
37 |
vector_store.save_local("faiss_index")
|
38 |
|
39 |
-
|
40 |
def get_conversational_chain():
|
41 |
-
|
42 |
prompt_template = """
|
43 |
-
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
|
44 |
-
provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
|
45 |
Context:\n {context}?\n
|
46 |
Question: \n{question}\n
|
47 |
-
Answer:
|
48 |
-
""
|
49 |
-
|
50 |
-
model = ChatGoogleGenerativeAI(model="gemini-pro",
|
51 |
-
temperature=0.1)
|
52 |
-
|
53 |
-
prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
|
54 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
55 |
-
|
56 |
return chain
|
57 |
|
58 |
-
|
59 |
-
|
60 |
def user_input(user_question):
|
61 |
-
embeddings = GoogleGenerativeAIEmbeddings(model
|
62 |
-
|
63 |
-
new_db = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization= True)
|
64 |
docs = new_db.similarity_search(user_question)
|
65 |
-
|
66 |
chain = get_conversational_chain()
|
67 |
-
|
68 |
-
|
69 |
-
response = chain(
|
70 |
-
{"input_documents":docs, "question": user_question}
|
71 |
-
, return_only_outputs=True)
|
72 |
-
|
73 |
print(response)
|
74 |
-
st.write("Reply: ", response["output_text"])
|
75 |
-
|
76 |
|
|
|
|
|
|
|
77 |
|
|
|
78 |
|
79 |
def main():
|
80 |
st.set_page_config("Chat PDF")
|
@@ -82,11 +64,9 @@ def main():
|
|
82 |
|
83 |
# Audio recording
|
84 |
wav_audio_data = st_audiorec()
|
85 |
-
|
86 |
if wav_audio_data is not None:
|
87 |
with open("query.wav", "wb") as f:
|
88 |
f.write(wav_audio_data)
|
89 |
-
|
90 |
model = whisper.load_model("large")
|
91 |
result = model.transcribe("query.wav", language="en", fp16=False)
|
92 |
user_question = result["text"]
|
@@ -103,6 +83,5 @@ def main():
|
|
103 |
get_vector_store(text_chunks)
|
104 |
st.success("Done")
|
105 |
|
106 |
-
|
107 |
if __name__ == "__main__":
|
108 |
main()
|
|
|
11 |
from dotenv import load_dotenv
|
12 |
from st_audiorec import st_audiorec
|
13 |
import whisper
|
14 |
+
from txtai.pipeline import Translation
|
15 |
|
16 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
17 |
|
|
|
18 |
def get_pdf_text(pdf_docs):
|
19 |
+
text = ""
|
20 |
for pdf in pdf_docs:
|
21 |
+
pdf_reader = PdfReader(pdf)
|
22 |
for page in pdf_reader.pages:
|
23 |
+
text += page.extract_text()
|
24 |
+
return text
|
|
|
|
|
25 |
|
26 |
def get_text_chunks(text):
|
27 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
|
28 |
chunks = text_splitter.split_text(text)
|
29 |
return chunks
|
30 |
|
|
|
31 |
def get_vector_store(text_chunks):
|
32 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
|
33 |
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
34 |
vector_store.save_local("faiss_index")
|
35 |
|
|
|
36 |
def get_conversational_chain():
|
|
|
37 |
prompt_template = """
|
38 |
+
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
|
|
|
39 |
Context:\n {context}?\n
|
40 |
Question: \n{question}\n
|
41 |
+
Answer: """
|
42 |
+
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.1)
|
43 |
+
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
|
|
|
|
|
|
|
|
44 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
|
|
45 |
return chain
|
46 |
|
|
|
|
|
47 |
def user_input(user_question):
|
48 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
|
49 |
+
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
|
|
|
50 |
docs = new_db.similarity_search(user_question)
|
|
|
51 |
chain = get_conversational_chain()
|
52 |
+
response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
|
|
|
|
|
|
|
|
|
|
|
53 |
print(response)
|
|
|
|
|
54 |
|
55 |
+
# Create translation model
|
56 |
+
translate = Translation()
|
57 |
+
translation = translate(response["output_text"], "fa")
|
58 |
|
59 |
+
st.write("Reply: ", translation)
|
60 |
|
61 |
def main():
|
62 |
st.set_page_config("Chat PDF")
|
|
|
64 |
|
65 |
# Audio recording
|
66 |
wav_audio_data = st_audiorec()
|
|
|
67 |
if wav_audio_data is not None:
|
68 |
with open("query.wav", "wb") as f:
|
69 |
f.write(wav_audio_data)
|
|
|
70 |
model = whisper.load_model("large")
|
71 |
result = model.transcribe("query.wav", language="en", fp16=False)
|
72 |
user_question = result["text"]
|
|
|
83 |
get_vector_store(text_chunks)
|
84 |
st.success("Done")
|
85 |
|
|
|
86 |
if __name__ == "__main__":
|
87 |
main()
|