Spaces:
Runtime error
Runtime error
import os | |
from pydub import AudioSegment | |
import whisper | |
from deep_translator import GoogleTranslator | |
# @title Audio into chunks | |
def audio_into_chunks_transcribe_translate(audio_file,lang): | |
chunk_length_seconds=11 | |
output_format="wav" | |
# Check if file exists | |
if not os.path.exists(audio_file): | |
raise ValueError(f"FLAC file not found: {audio_file}") | |
Transcribe_Text=[] | |
# Load the FLAC audio | |
audio_segment = AudioSegment.from_file(audio_file, format="flac") | |
#load Model For Transcribe | |
model = whisper.load_model("medium") | |
# Get total audio duration in milliseconds | |
total_duration_ms = audio_segment.duration_seconds * 1000 | |
# Calculate chunk duration in milliseconds | |
chunk_duration_ms = chunk_length_seconds * 1000 | |
# Split audio into chunks | |
start_time = 0 | |
chunk_num = 1 | |
while start_time < total_duration_ms: | |
# Get the end time for the current chunk | |
end_time = min(start_time + chunk_duration_ms, total_duration_ms) | |
# Extract the current chunk | |
chunk = audio_segment[start_time:end_time] | |
# Generate output filename with sequential numbering | |
output_filename = f"{os.path.splitext(os.path.basename(audio_file))[0]}_chunk_{chunk_num}.{output_format}" | |
# Export the chunk as the specified format | |
chunk.export(output_filename, format=output_format) | |
# Update start time for the next chunk | |
start_time += chunk_duration_ms | |
chunk_num += 1 | |
#transcribe Chunks | |
result = model.transcribe(output_filename) | |
#translate the transcribe data | |
translator=GoogleTranslator(source='auto',target=lang) | |
data_trans=translator.translate(result['text']) | |
Transcribe_Text.append(data_trans) | |
print(data_trans) | |
print(result['text']) | |
return Transcribe_Text | |
print(f"FLAC file '{flac_filepath}' successfully split into {chunk_num - 1} chunks.") | |