|
import whisper |
|
import multiprocessing |
|
import os |
|
from pydub import AudioSegment |
|
from typing import List |
|
import gradio as gr |
|
|
|
|
|
model = whisper.load_model("base") |
|
def convert_to_text(audio_path: str) -> str: |
|
|
|
model = whisper.load_model("base") |
|
|
|
|
|
chunk_size = 30 |
|
audio_segments = split_audio(audio_path, chunk_size) |
|
|
|
|
|
|
|
pool = multiprocessing.Pool() |
|
print("Starting the processes....") |
|
results = pool.map(process_segment, audio_segments) |
|
pool.close() |
|
pool.join() |
|
|
|
|
|
|
|
text = ' '.join(results) |
|
return text |
|
|
|
import os |
|
from pydub import AudioSegment |
|
|
|
def split_audio(audio_path: str, chunk_size: int) -> List[str]: |
|
|
|
output_dir = "segmented_audio" |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
audio = AudioSegment.from_file(audio_path) |
|
|
|
|
|
duration = len(audio) / 1000 |
|
num_chunks = int(duration / chunk_size) |
|
|
|
print(f"Chunk : Duration : {duration} : Number : {num_chunks}") |
|
|
|
|
|
audio_segments = [] |
|
for i in range(num_chunks): |
|
start_time = i * chunk_size * 1000 |
|
end_time = (i + 1) * chunk_size * 1000 |
|
|
|
|
|
chunk = audio[start_time:end_time] |
|
|
|
|
|
chunk_path = os.path.join(output_dir, f"chunk_{i}.wav") |
|
chunk.export(chunk_path, format="wav") |
|
|
|
print(f"Chunk number {i} path : {chunk_path}") |
|
audio_segments.append(chunk_path) |
|
|
|
print(f"Audio split into : {len(audio_segments)}") |
|
|
|
return audio_segments |
|
|
|
|
|
def process_segment(segment_path: str) -> str: |
|
|
|
|
|
print(f"Processing segment : {segment_path}") |
|
|
|
|
|
result = model.transcribe(segment_path) |
|
print(result['text']) |
|
return result["text"] |
|
|
|
def get_results(path): |
|
|
|
|
|
|
|
seg = convert_to_text(path) |
|
q = multiprocessing.Queue() |
|
p = multiprocessing.Process(target=process_segment, args=(seg,q)) |
|
p.start() |
|
print(q.get()) |
|
p.join() |
|
return "complete" |
|
|
|
ad = gr.components.Audio(type='filepath') |
|
iface = gr.Interface(fn=convert_to_text, inputs=ad, outputs="text") |
|
iface.launch() |
|
|