Spaces:

Kr08
/

ASR_gradio

Build error

App Files Files Community

Kr08 commited on Sep 9, 2024

Commit

127f69f

verified ·

1 Parent(s): 9116075

Added llama 3.1 instruct q/a functionality for testing

Browse files

Files changed (1) hide show

app.py +61 -3

app.py CHANGED Viewed

@@ -16,6 +16,25 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
 def load_summarization_model():
     logger.info("Loading summarization model...")
     try:
@@ -29,6 +48,7 @@ def load_summarization_model():
         logger.info("Summarization model loaded successfully on CPU")
         return summarizer
 def process_with_fallback(func, *args, **kwargs):
     try:
         return func(*args, **kwargs)
@@ -42,6 +62,7 @@ def process_with_fallback(func, *args, **kwargs):
         else:
             raise
 @spaces.GPU(duration=60)
 def transcribe_audio(audio_file, translate, model_size):
     logger.info(f"Starting transcription: translate={translate}, model_size={model_size}")
@@ -53,6 +74,7 @@ def transcribe_audio(audio_file, translate, model_size):
         logger.error(f"Transcription failed: {str(e)}")
         raise gr.Error(f"Transcription failed: {str(e)}")
 @spaces.GPU(duration=60)
 def summarize_text(text):
     logger.info("Starting text summarization")
@@ -98,20 +120,50 @@ def process_and_summarize(audio_file, translate, model_size, do_summarize=True):
         logger.error(traceback.format_exc())
         raise gr.Error(f"Processing failed: {str(e)}")
 # Main interface
 with gr.Blocks() as iface:
-    gr.Markdown("# WhisperX Audio Transcription, Translation, and Summarization (with ZeroGPU support)")
     audio_input = gr.Audio(type="filepath")
     translate_checkbox = gr.Checkbox(label="Enable Translation")
     summarize_checkbox = gr.Checkbox(label="Enable Summarization", interactive=False)
-    # diarization_checkbox = gr.Checkbox(label="Enable Speaker Diarization")
     model_dropdown = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"], label="Whisper Model Size", value="small")
     process_button = gr.Button("Process Audio")
     transcription_output = gr.Textbox(label="Transcription/Translation")
-    full_text_output = gr.Textbox(label="Transcription/Translation")
     summary_output = gr.Textbox(label="Summary")
     def update_summarize_checkbox(translate):
         return gr.Checkbox(interactive=translate)
@@ -123,6 +175,12 @@ with gr.Blocks() as iface:
         inputs=[audio_input, translate_checkbox, model_dropdown, summarize_checkbox],
         outputs=[transcription_output, full_text_output, summary_output]
     )
     gr.Markdown(
         f"""

 )
 logger = logging.getLogger(__name__)
+def load_qa_model():
+    logger.info("Loading Q&A model...")
+    try:
+        model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+        qa_pipeline = pipeline(
+            "text-generation",
+            model=model_id,
+            model_kwargs={"torch_dtype": torch.bfloat16},
+            device_map="auto",
+        )
+        logger.info(f"Q&A model loaded successfully")
+        return qa_pipeline
+    except Exception as e:
+        logger.warning(f"Failed to load Q&A model. Error: {str(e)}")
+        return None
 def load_summarization_model():
     logger.info("Loading summarization model...")
     try:
         logger.info("Summarization model loaded successfully on CPU")
         return summarizer
 def process_with_fallback(func, *args, **kwargs):
     try:
         return func(*args, **kwargs)
         else:
             raise
 @spaces.GPU(duration=60)
 def transcribe_audio(audio_file, translate, model_size):
     logger.info(f"Starting transcription: translate={translate}, model_size={model_size}")
         logger.error(f"Transcription failed: {str(e)}")
         raise gr.Error(f"Transcription failed: {str(e)}")
 @spaces.GPU(duration=60)
 def summarize_text(text):
     logger.info("Starting text summarization")
         logger.error(traceback.format_exc())
         raise gr.Error(f"Processing failed: {str(e)}")
+@spaces.GPU(duration=60)
+def answer_question(context, question):
+    logger.info("Starting Q&A process")
+    try:
+        qa_pipeline = load_qa_model()
+        if qa_pipeline is None:
+            return "Error: Q&A model could not be loaded."
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant who can answer questions based on the given context."},
+            {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"},
+        ]
+        outputs = qa_pipeline(messages, max_new_tokens=256)
+        answer = outputs[0]["generated_text"]
+        # Extract the answer from the generated text
+        answer = answer.split("assistant:")[-1].strip()
+        logger.info("Q&A process completed successfully")
+        return answer
+    except Exception as e:
+        logger.error(f"Q&A process failed: {str(e)}")
+        logger.error(traceback.format_exc())
+        return "Error occurred during Q&A process. Please try again."
 # Main interface
 with gr.Blocks() as iface:
+    gr.Markdown("# WhisperX Audio Transcription, Translation, Summarization, and Q&A (with ZeroGPU support)")
     audio_input = gr.Audio(type="filepath")
     translate_checkbox = gr.Checkbox(label="Enable Translation")
     summarize_checkbox = gr.Checkbox(label="Enable Summarization", interactive=False)
     model_dropdown = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"], label="Whisper Model Size", value="small")
     process_button = gr.Button("Process Audio")
     transcription_output = gr.Textbox(label="Transcription/Translation")
+    full_text_output = gr.Textbox(label="Full Text")
     summary_output = gr.Textbox(label="Summary")
+    question_input = gr.Textbox(label="Ask a question about the transcription")
+    answer_button = gr.Button("Get Answer")
+    answer_output = gr.Textbox(label="Answer")
     def update_summarize_checkbox(translate):
         return gr.Checkbox(interactive=translate)
         inputs=[audio_input, translate_checkbox, model_dropdown, summarize_checkbox],
         outputs=[transcription_output, full_text_output, summary_output]
     )
+    answer_button.click(
+        answer_question,
+        inputs=[full_text_output, question_input],
+        outputs=[answer_output]
+    )
     gr.Markdown(
         f"""