Spaces:

TARGAUS
/

Audio_Transcription_Summarization_QandA

Sleeping

App Files Files Community

Kr08 commited on Sep 9, 2024

Commit

a3f7705

verified ·

1 Parent(s): 8dac0cd

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -59

app.py CHANGED Viewed

@@ -1,63 +1,139 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from audio_processing import process_audio
+from transformers import pipeline
+import spaces
+import torch
+import logging
+import traceback
+import sys
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler(sys.stdout)
+    ]
 )
+logger = logging.getLogger(__name__)
+def load_summarization_model():
+    logger.info("Loading summarization model...")
+    try:
+        cuda_available = torch.cuda.is_available()
+        summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=0 if cuda_available else -1)
+        logger.info(f"Summarization model loaded successfully on {'GPU' if cuda_available else 'CPU'}")
+        return summarizer
+    except Exception as e:
+        logger.warning(f"Failed to load summarization model on GPU. Falling back to CPU. Error: {str(e)}")
+        summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1)
+        logger.info("Summarization model loaded successfully on CPU")
+        return summarizer
+def process_with_fallback(func, *args, **kwargs):
+    try:
+        return func(*args, **kwargs)
+    except Exception as e:
+        logger.error(f"Error during processing: {str(e)}")
+        logger.error(traceback.format_exc())
+        if "CUDA" in str(e) or "GPU" in str(e):
+            logger.info("Falling back to CPU processing...")
+            kwargs['use_gpu'] = False
+            return func(*args, **kwargs)
+        else:
+            raise
+@spaces.GPU(duration=60)
+def transcribe_audio(audio_file, translate, model_size):
+    logger.info(f"Starting transcription: translate={translate}, model_size={model_size}")
+    try:
+        result = process_with_fallback(process_audio, audio_file, translate=translate, model_size=model_size) # use_diarization=use_diarization
+        logger.info("Transcription completed successfully")
+        return result
+    except Exception as e:
+        logger.error(f"Transcription failed: {str(e)}")
+        raise gr.Error(f"Transcription failed: {str(e)}")
+@spaces.GPU(duration=60)
+def summarize_text(text):
+    logger.info("Starting text summarization")
+    try:
+        summarizer = load_summarization_model()
+        summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
+        logger.info("Summarization completed successfully")
+        return summary
+    except Exception as e:
+        logger.error(f"Summarization failed: {str(e)}")
+        logger.error(traceback.format_exc())
+        return "Error occurred during summarization. Please try again."
+@spaces.GPU(duration=60)
+def process_and_summarize(audio_file, translate, model_size, do_summarize=True):
+    logger.info(f"Starting process_and_summarize: translate={translate}, model_size={model_size}, do_summarize={do_summarize}")
+    try:
+        language_segments, final_segments = transcribe_audio(audio_file, translate, model_size)
+        # transcription = "Detected language changes:\n\n"
+        transcription = ""
+        for segment in language_segments:
+            transcription += f"Language: {segment['language']}\n"
+            transcription += f"Time: {segment['start']:.2f}s - {segment['end']:.2f}s\n\n"
+        transcription += f"Transcription with language detection and speaker diarization (using {model_size} model):\n\n"
+        full_text = ""
+        for segment in final_segments:
+            transcription += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}) {segment['speaker']}:\n"
+            transcription += f"Original: {segment['text']}\n"
+            if translate:
+                transcription += f"Translated: {segment['translated']}\n"
+                full_text += segment['translated'] + " "
+            else:
+                full_text += segment['text'] + " "
+            transcription += "\n"
+        summary = summarize_text(full_text) if do_summarize else ""
+        logger.info("Process and summarize completed successfully")
+        return transcription, full_text, summary
+    except Exception as e:
+        logger.error(f"Process and summarize failed: {str(e)}\n")
+        logger.error(traceback.format_exc())
+        raise gr.Error(f"Processing failed: {str(e)}")
+# Main interface
+with gr.Blocks() as iface:
+    gr.Markdown("# WhisperX Audio Transcription, Translation, and Summarization (with ZeroGPU support)")
+    audio_input = gr.Audio(type="filepath")
+    translate_checkbox = gr.Checkbox(label="Enable Translation")
+    summarize_checkbox = gr.Checkbox(label="Enable Summarization", interactive=False)
+    # diarization_checkbox = gr.Checkbox(label="Enable Speaker Diarization")
+    model_dropdown = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"], label="Whisper Model Size", value="small")
+    process_button = gr.Button("Process Audio")
+    transcription_output = gr.Textbox(label="Transcription/Translation")
+    full_text_output = gr.Textbox(label="Transcription/Translation")
+    summary_output = gr.Textbox(label="Summary")
+    def update_summarize_checkbox(translate):
+        return gr.Checkbox(interactive=translate)
+    translate_checkbox.change(update_summarize_checkbox, inputs=[translate_checkbox], outputs=[summarize_checkbox])
+    process_button.click(
+        process_and_summarize,
+        inputs=[audio_input, translate_checkbox, model_dropdown, summarize_checkbox],
+        outputs=[transcription_output, full_text_output, summary_output]
+    )
+    gr.Markdown(
+        f"""
+        ## System Information
+        - Device: {"CUDA" if torch.cuda.is_available() else "CPU"}
+        - CUDA Available: {"Yes" if torch.cuda.is_available() else "No"}
+        ## ZeroGPU Support
+        This application supports ZeroGPU for Hugging Face Spaces pro users.
+        GPU-intensive tasks are automatically optimized for better performance when available.
+        """
+    )
+iface.launch()