Spaces:
Sleeping
Sleeping
barghavani
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -11,34 +11,19 @@ from langchain.prompts import PromptTemplate
|
|
11 |
from dotenv import load_dotenv
|
12 |
import whisper
|
13 |
|
|
|
|
|
14 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
15 |
|
16 |
-
model = whisper.load_model("small")
|
17 |
-
def transcribe(audio):
|
18 |
-
# Load audio and pad/trim it to fit 30 seconds
|
19 |
-
audio = whisper.load_audio(audio)
|
20 |
-
audio = whisper.pad_or_trim(audio)
|
21 |
|
22 |
-
|
|
|
|
|
|
|
23 |
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
|
|
|
|
24 |
|
25 |
-
# Detect the spoken language
|
26 |
-
_, probs = model.detect_language(mel)
|
27 |
-
detected_language = max(probs, key=probs.get)
|
28 |
-
print(f"Detected language: {detected_language}")
|
29 |
-
|
30 |
-
# Decode the audio
|
31 |
-
options = whisper.DecodingOptions(fp16=False)
|
32 |
-
result = whisper.decode(model, mel, options)
|
33 |
-
|
34 |
-
# Check if the detected language is English; if not, translate the text
|
35 |
-
if detected_language != "en":
|
36 |
-
# Initialize the translation model; specify source and target languages as needed
|
37 |
-
translator = pipeline("translation_xx_to_yy", model="Helsinki-NLP/opus-mt-xx-en")
|
38 |
-
translated_text = translator(result.text, max_length=512)[0]['translation_text']
|
39 |
-
return translated_text
|
40 |
-
|
41 |
-
return result.text
|
42 |
|
43 |
def get_pdf_text(pdf_docs):
|
44 |
text=""
|
@@ -73,7 +58,7 @@ def get_conversational_chain():
|
|
73 |
"""
|
74 |
|
75 |
model = ChatGoogleGenerativeAI(model="gemini-pro",
|
76 |
-
temperature=0.
|
77 |
|
78 |
prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
|
79 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
@@ -85,7 +70,7 @@ def get_conversational_chain():
|
|
85 |
def user_input(user_question):
|
86 |
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
|
87 |
|
88 |
-
new_db = FAISS.load_local("faiss_index", embeddings
|
89 |
docs = new_db.similarity_search(user_question)
|
90 |
|
91 |
chain = get_conversational_chain()
|
@@ -102,29 +87,23 @@ def user_input(user_question):
|
|
102 |
|
103 |
|
104 |
def main():
|
105 |
-
st.set_page_config(
|
106 |
-
st.header("
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
with st.sidebar:
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
get_vector_store(text_chunks)
|
118 |
-
st.success("PDFs processed and ready for questions.")
|
119 |
-
|
120 |
-
# Voice recording for question
|
121 |
-
audio_file = st.file_uploader("Record your question as audio (WAV format):", type=["wav"])
|
122 |
-
if audio_file:
|
123 |
-
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
124 |
-
tmp_file.write(audio_file.getvalue())
|
125 |
-
transcribed_text = transcribe(tmp_file.name)
|
126 |
-
st.text_input("Transcribed question:", transcribed_text)
|
127 |
-
user_input(transcribed_text)
|
128 |
|
129 |
|
130 |
|
|
|
11 |
from dotenv import load_dotenv
|
12 |
import whisper
|
13 |
|
14 |
+
load_dotenv()
|
15 |
+
os.getenv("GOOGLE_API_KEY")
|
16 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
17 |
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
def transcribe_audio(audio_file):
|
20 |
+
model = whisper.load_model("small")
|
21 |
+
audio = whisper.load_audio(audio_file)
|
22 |
+
audio = whisper.pad_or_trim(audio)
|
23 |
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
24 |
+
prediction = model.transcribe(mel, language="en", fp16=False)
|
25 |
+
return prediction['text']
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
def get_pdf_text(pdf_docs):
|
29 |
text=""
|
|
|
58 |
"""
|
59 |
|
60 |
model = ChatGoogleGenerativeAI(model="gemini-pro",
|
61 |
+
temperature=0.3)
|
62 |
|
63 |
prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
|
64 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
|
|
70 |
def user_input(user_question):
|
71 |
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
|
72 |
|
73 |
+
new_db = FAISS.load_local("faiss_index", embeddings)
|
74 |
docs = new_db.similarity_search(user_question)
|
75 |
|
76 |
chain = get_conversational_chain()
|
|
|
87 |
|
88 |
|
89 |
def main():
|
90 |
+
st.set_page_config("Chat PDF")
|
91 |
+
st.header("Chat with PDF using Gemini💁")
|
92 |
+
|
93 |
+
user_question = st.text_input("Ask a Question from the PDF Files")
|
94 |
+
|
95 |
+
if user_question:
|
96 |
+
user_input(user_question)
|
97 |
|
98 |
with st.sidebar:
|
99 |
+
st.title("Menu:")
|
100 |
+
pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
|
101 |
+
if st.button("Submit & Process"):
|
102 |
+
with st.spinner("Processing..."):
|
103 |
+
raw_text = get_pdf_text(pdf_docs)
|
104 |
+
text_chunks = get_text_chunks(raw_text)
|
105 |
+
get_vector_store(text_chunks)
|
106 |
+
st.success("Done")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
|
109 |
|