# -*- coding: utf-8 -*- import torch from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor import librosa import numpy as np from datetime import timedelta import gradio as gr import os def format_time(seconds): td = timedelta(seconds=seconds) hours, remainder = divmod(td.seconds, 3600) minutes, seconds = divmod(remainder, 60) milliseconds = td.microseconds // 1000 return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}" def estimate_word_timings(transcription, total_duration): words = transcription.split() total_chars = sum(len(word) for word in words) char_duration = total_duration / total_chars word_timings = [] current_time = 0 for word in words: word_duration = len(word) * char_duration start_time = current_time end_time = current_time + word_duration word_timings.append((word, start_time, end_time)) current_time = end_time return word_timings model_name = "Akashpb13/xlsr_kurmanji_kurdish" model = Wav2Vec2ForCTC.from_pretrained(model_name) processor = Wav2Vec2Processor.from_pretrained(model_name) def transcribe_audio(file): speech, rate = librosa.load(file, sr=16000) input_values = processor(speech, return_tensors="pt", sampling_rate=rate).input_values with torch.no_grad(): logits = model(input_values).logits predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.batch_decode(predicted_ids)[0] total_duration = len(speech) / rate word_timings = estimate_word_timings(transcription, total_duration) srt_content = "" for i, (word, start_time, end_time) in enumerate(word_timings, start=1): start_time_str = format_time(start_time) end_time_str = format_time(end_time) srt_content += f"{i}\n{start_time_str} --> {end_time_str}\n{word}\n\n" output_filename = "output_word_by_word.srt" with open(output_filename, "w", encoding="utf-8") as f: f.write(srt_content) return transcription, output_filename interface = gr.Interface( fn=transcribe_audio, inputs=gr.Audio(type="filepath"), outputs=[gr.Textbox(label="Transcription"), gr.File(label="Download SRT File")], title="Deng --- Nivîsandin ::: Kurdî-Kurmancî", description="Dengê xwe ji me re rêke û li Submit bixe ... û bila bêhna te fireh be .", article="By Derax Elî" ) if __name__ == "__main__": interface.launch()