barghavani commited on
Commit
d9bdbe2
·
verified ·
1 Parent(s): e815db6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -46
app.py CHANGED
@@ -11,34 +11,19 @@ from langchain.prompts import PromptTemplate
11
  from dotenv import load_dotenv
12
  import whisper
13
 
 
 
14
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
15
 
16
- model = whisper.load_model("small")
17
- def transcribe(audio):
18
- # Load audio and pad/trim it to fit 30 seconds
19
- audio = whisper.load_audio(audio)
20
- audio = whisper.pad_or_trim(audio)
21
 
22
- # Make log-Mel spectrogram and move to the same device as the model
 
 
 
23
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
 
 
24
 
25
- # Detect the spoken language
26
- _, probs = model.detect_language(mel)
27
- detected_language = max(probs, key=probs.get)
28
- print(f"Detected language: {detected_language}")
29
-
30
- # Decode the audio
31
- options = whisper.DecodingOptions(fp16=False)
32
- result = whisper.decode(model, mel, options)
33
-
34
- # Check if the detected language is English; if not, translate the text
35
- if detected_language != "en":
36
- # Initialize the translation model; specify source and target languages as needed
37
- translator = pipeline("translation_xx_to_yy", model="Helsinki-NLP/opus-mt-xx-en")
38
- translated_text = translator(result.text, max_length=512)[0]['translation_text']
39
- return translated_text
40
-
41
- return result.text
42
 
43
  def get_pdf_text(pdf_docs):
44
  text=""
@@ -73,7 +58,7 @@ def get_conversational_chain():
73
  """
74
 
75
  model = ChatGoogleGenerativeAI(model="gemini-pro",
76
- temperature=0.1)
77
 
78
  prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
79
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
@@ -85,7 +70,7 @@ def get_conversational_chain():
85
  def user_input(user_question):
86
  embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
87
 
88
- new_db = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization= True)
89
  docs = new_db.similarity_search(user_question)
90
 
91
  chain = get_conversational_chain()
@@ -102,29 +87,23 @@ def user_input(user_question):
102
 
103
 
104
  def main():
105
- st.set_page_config(page_title="Voice-enabled PDF QnA")
106
- st.header("Ask questions by voice or text from PDFs")
 
 
 
 
 
107
 
108
  with st.sidebar:
109
- pdf_docs = st.file_uploader("Upload PDFs:", accept_multiple_files=True)
110
- submit_button = st.button("Process PDFs")
111
-
112
- # Handling PDF processing
113
- if submit_button and pdf_docs:
114
- with st.spinner("Extracting text from PDFs..."):
115
- raw_text = get_pdf_text(pdf_docs)
116
- text_chunks = get_text_chunks(raw_text)
117
- get_vector_store(text_chunks)
118
- st.success("PDFs processed and ready for questions.")
119
-
120
- # Voice recording for question
121
- audio_file = st.file_uploader("Record your question as audio (WAV format):", type=["wav"])
122
- if audio_file:
123
- with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
124
- tmp_file.write(audio_file.getvalue())
125
- transcribed_text = transcribe(tmp_file.name)
126
- st.text_input("Transcribed question:", transcribed_text)
127
- user_input(transcribed_text)
128
 
129
 
130
 
 
11
  from dotenv import load_dotenv
12
  import whisper
13
 
14
+ load_dotenv()
15
+ os.getenv("GOOGLE_API_KEY")
16
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
17
 
 
 
 
 
 
18
 
19
+ def transcribe_audio(audio_file):
20
+ model = whisper.load_model("small")
21
+ audio = whisper.load_audio(audio_file)
22
+ audio = whisper.pad_or_trim(audio)
23
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
24
+ prediction = model.transcribe(mel, language="en", fp16=False)
25
+ return prediction['text']
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def get_pdf_text(pdf_docs):
29
  text=""
 
58
  """
59
 
60
  model = ChatGoogleGenerativeAI(model="gemini-pro",
61
+ temperature=0.3)
62
 
63
  prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
64
  chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
 
70
  def user_input(user_question):
71
  embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
72
 
73
+ new_db = FAISS.load_local("faiss_index", embeddings)
74
  docs = new_db.similarity_search(user_question)
75
 
76
  chain = get_conversational_chain()
 
87
 
88
 
89
  def main():
90
+ st.set_page_config("Chat PDF")
91
+ st.header("Chat with PDF using Gemini💁")
92
+
93
+ user_question = st.text_input("Ask a Question from the PDF Files")
94
+
95
+ if user_question:
96
+ user_input(user_question)
97
 
98
  with st.sidebar:
99
+ st.title("Menu:")
100
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
101
+ if st.button("Submit & Process"):
102
+ with st.spinner("Processing..."):
103
+ raw_text = get_pdf_text(pdf_docs)
104
+ text_chunks = get_text_chunks(raw_text)
105
+ get_vector_store(text_chunks)
106
+ st.success("Done")
 
 
 
 
 
 
 
 
 
 
 
107
 
108
 
109