Spaces:
Build error
Build error
File size: 5,998 Bytes
c569b48 444b9c9 b360956 aaac499 3a346c4 b360956 444b9c9 b360956 3a346c4 aaac499 6db9237 b360956 444b9c9 b360956 444b9c9 b360956 444b9c9 b360956 aaac499 8055777 6db9237 444b9c9 f36e52e 8055777 aaac499 444b9c9 b360956 444b9c9 b360956 444b9c9 b360956 444b9c9 b360956 444b9c9 aaac499 8055777 b360956 444b9c9 aaac499 b360956 8055777 b360956 8055777 b360956 aaac499 b360956 aaac499 3a346c4 444b9c9 3a346c4 aaac499 3a346c4 aaac499 f36e52e 81e4ee2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
from audio_processing import process_audio
from transformers import pipeline
import spaces
import torch
import logging
import traceback
import sys
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
def load_summarization_model():
logger.info("Loading summarization model...")
try:
cuda_available = torch.cuda.is_available()
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=0 if cuda_available else -1)
logger.info(f"Summarization model loaded successfully on {'GPU' if cuda_available else 'CPU'}")
return summarizer
except Exception as e:
logger.warning(f"Failed to load summarization model on GPU. Falling back to CPU. Error: {str(e)}")
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1)
logger.info("Summarization model loaded successfully on CPU")
return summarizer
def process_with_fallback(func, *args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
logger.error(f"Error during processing: {str(e)}")
logger.error(traceback.format_exc())
if "CUDA" in str(e) or "GPU" in str(e):
logger.info("Falling back to CPU processing...")
kwargs['use_gpu'] = False
return func(*args, **kwargs)
else:
raise
@spaces.GPU(duration=60)
def transcribe_audio(audio_file, translate, model_size, use_diarization):
logger.info(f"Starting transcription: translate={translate}, model_size={model_size}, use_diarization={use_diarization}")
try:
result = process_with_fallback(process_audio, audio_file, translate=translate, model_size=model_size, use_diarization=use_diarization)
logger.info("Transcription completed successfully")
return result
except Exception as e:
logger.error(f"Transcription failed: {str(e)}")
raise gr.Error(f"Transcription failed: {str(e)}")
@spaces.GPU(duration=60)
def summarize_text(text):
logger.info("Starting text summarization")
try:
summarizer = load_summarization_model()
summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
logger.info("Summarization completed successfully")
return summary
except Exception as e:
logger.error(f"Summarization failed: {str(e)}")
logger.error(traceback.format_exc())
return "Error occurred during summarization. Please try again."
@spaces.GPU(duration=60)
def process_and_summarize(audio_file, translate, model_size, use_diarization, do_summarize):
logger.info(f"Starting process_and_summarize: translate={translate}, model_size={model_size}, use_diarization={use_diarization}, do_summarize={do_summarize}")
try:
language_segments, final_segments = transcribe_audio(audio_file, translate, model_size, use_diarization)
transcription = "Detected language changes:\n\n"
for segment in language_segments:
transcription += f"Language: {segment['language']}\n"
transcription += f"Time: {segment['start']:.2f}s - {segment['end']:.2f}s\n\n"
transcription += f"Transcription with language detection and speaker diarization (using {model_size} model):\n\n"
full_text = ""
for segment in final_segments:
transcription += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}) {segment['speaker']}:\n"
transcription += f"Original: {segment['text']}\n"
if translate:
transcription += f"Translated: {segment['translated']}\n"
full_text += segment['translated'] + " "
else:
full_text += segment['text'] + " "
transcription += "\n"
summary = summarize_text(full_text) if do_summarize else ""
logger.info("Process and summarize completed successfully")
return transcription, summary
except Exception as e:
logger.error(f"Process and summarize failed: {str(e)}")
logger.error(traceback.format_exc())
raise gr.Error(f"Processing failed: {str(e)}")
# Main interface
with gr.Blocks() as iface:
gr.Markdown("# WhisperX Audio Transcription, Translation, and Summarization (with ZeroGPU support)")
audio_input = gr.Audio(type="filepath")
translate_checkbox = gr.Checkbox(label="Enable Translation")
summarize_checkbox = gr.Checkbox(label="Enable Summarization", interactive=False)
diarization_checkbox = gr.Checkbox(label="Enable Speaker Diarization")
model_dropdown = gr.Dropdown(choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"], label="Whisper Model Size", value="small")
process_button = gr.Button("Process Audio")
transcription_output = gr.Textbox(label="Transcription/Translation")
summary_output = gr.Textbox(label="Summary")
def update_summarize_checkbox(translate):
return gr.Checkbox(interactive=translate)
translate_checkbox.change(update_summarize_checkbox, inputs=[translate_checkbox], outputs=[summarize_checkbox])
process_button.click(
process_and_summarize,
inputs=[audio_input, translate_checkbox, model_dropdown, diarization_checkbox, summarize_checkbox],
outputs=[transcription_output, summary_output]
)
gr.Markdown(
f"""
## System Information
- Device: {"CUDA" if torch.cuda.is_available() else "CPU"}
- CUDA Available: {"Yes" if torch.cuda.is_available() else "No"}
## ZeroGPU Support
This application supports ZeroGPU for Hugging Face Spaces pro users.
GPU-intensive tasks are automatically optimized for better performance when available.
"""
)
iface.launch() |