demo / app.py
iamAI123's picture
Update app.py
8a22c32
import whisper
import multiprocessing
import os
from pydub import AudioSegment
from typing import List
import gradio as gr
model = whisper.load_model("base")
def convert_to_text(audio_path: str) -> str:
# Load the model outside the function if possible, so it's only loaded once
model = whisper.load_model("base")
# Split the audio into segments/chunks
chunk_size = 30 # Length of each segment in seconds
audio_segments = split_audio(audio_path, chunk_size)
# Process segments in parallel using multiprocessing
pool = multiprocessing.Pool()
print("Starting the processes....")
results = pool.map(process_segment, audio_segments)
pool.close()
pool.join()
# Combine the results
text = ' '.join(results)
return text
import os
from pydub import AudioSegment
def split_audio(audio_path: str, chunk_size: int) -> List[str]:
# Create a directory to store the segmented audio files
output_dir = "segmented_audio"
os.makedirs(output_dir, exist_ok=True)
# Open the audio file using pydub
audio = AudioSegment.from_file(audio_path)
# Calculate the number of chunks
duration = len(audio) / 1000 # Convert to seconds
num_chunks = int(duration / chunk_size)
print(f"Chunk : Duration : {duration} : Number : {num_chunks}")
# Split the audio into chunks
audio_segments = []
for i in range(num_chunks):
start_time = i * chunk_size * 1000 # Convert to milliseconds
end_time = (i + 1) * chunk_size * 1000
# Extract the chunk from the audio file
chunk = audio[start_time:end_time]
# Create a temporary file to store the chunk
chunk_path = os.path.join(output_dir, f"chunk_{i}.wav")
chunk.export(chunk_path, format="wav")
print(f"Chunk number {i} path : {chunk_path}")
audio_segments.append(chunk_path)
print(f"Audio split into : {len(audio_segments)}")
return audio_segments
def process_segment(segment_path: str) -> str:
# Load the model for each process if necessary
print(f"Processing segment : {segment_path}")
# Process the segment and return the transcribed text
result = model.transcribe(segment_path)
print(result['text'])
return result["text"]
def get_results(path):
#path = '/The genius of Satya Nadella Sam Altman and Lex Fridman.mp3'
seg = convert_to_text(path)
q = multiprocessing.Queue()
p = multiprocessing.Process(target=process_segment, args=(seg,q))
p.start()
print(q.get())
p.join()
return "complete"
ad = gr.components.Audio(type='filepath')
iface = gr.Interface(fn=convert_to_text, inputs=ad, outputs="text")
iface.launch()