import gradio as gr import torch from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, AutoModelForSeq2SeqLM, AutoTokenizer # Initialize the Whisper processor and model whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-base") whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") # Initialize the summarization model and tokenizer summarization_model = AutoModelForSeq2SeqLM.from_pretrained("meta-llama/Llama-2-7b-hf") summarization_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf") # Function to transcribe audio def transcribe_audio(audio_file): # Load audio file audio_input, _ = whisper_processor(audio_file, return_tensors="pt", sampling_rate=16000).input_values # Generate transcription transcription_ids = whisper_model.generate(audio_input) transcription = whisper_processor.decode(transcription_ids[0]) return transcription # Function to summarize text def summarize_text(text): inputs = summarization_tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True) summary_ids = summarization_model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True) summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary # Gradio interface def process_audio(audio_file): transcription = transcribe_audio(audio_file) summary = summarize_text(transcription) return transcription, summary # Gradio UI iface = gr.Interface( fn=process_audio, inputs=gr.Audio(source="upload", type="file"), outputs=[ gr.Textbox(label="Transcription"), gr.Textbox(label="Summary") ], title="Audio Transcription and Summarization", description="Upload an audio file to transcribe and summarize the conversation." ) # Launch the app iface.launch()