import whisper import multiprocessing import os from pydub import AudioSegment from typing import List import gradio as gr model = whisper.load_model("base") def convert_to_text(audio_path: str) -> str: # Load the model outside the function if possible, so it's only loaded once model = whisper.load_model("base") # Split the audio into segments/chunks chunk_size = 30 # Length of each segment in seconds audio_segments = split_audio(audio_path, chunk_size) # Process segments in parallel using multiprocessing pool = multiprocessing.Pool() print("Starting the processes....") results = pool.map(process_segment, audio_segments) pool.close() pool.join() # Combine the results text = ' '.join(results) return text import os from pydub import AudioSegment def split_audio(audio_path: str, chunk_size: int) -> List[str]: # Create a directory to store the segmented audio files output_dir = "segmented_audio" os.makedirs(output_dir, exist_ok=True) # Open the audio file using pydub audio = AudioSegment.from_file(audio_path) # Calculate the number of chunks duration = len(audio) / 1000 # Convert to seconds num_chunks = int(duration / chunk_size) print(f"Chunk : Duration : {duration} : Number : {num_chunks}") # Split the audio into chunks audio_segments = [] for i in range(num_chunks): start_time = i * chunk_size * 1000 # Convert to milliseconds end_time = (i + 1) * chunk_size * 1000 # Extract the chunk from the audio file chunk = audio[start_time:end_time] # Create a temporary file to store the chunk chunk_path = os.path.join(output_dir, f"chunk_{i}.wav") chunk.export(chunk_path, format="wav") print(f"Chunk number {i} path : {chunk_path}") audio_segments.append(chunk_path) print(f"Audio split into : {len(audio_segments)}") return audio_segments def process_segment(segment_path: str) -> str: # Load the model for each process if necessary print(f"Processing segment : {segment_path}") # Process the segment and return the transcribed text result = model.transcribe(segment_path) print(result['text']) return result["text"] def get_results(path): #path = '/The genius of Satya Nadella Sam Altman and Lex Fridman.mp3' seg = convert_to_text(path) q = multiprocessing.Queue() p = multiprocessing.Process(target=process_segment, args=(seg,q)) p.start() print(q.get()) p.join() return "complete" ad = gr.components.Audio(type='filepath') iface = gr.Interface(fn=convert_to_text, inputs=ad, outputs="text") iface.launch()