File size: 2,885 Bytes
fac06d0 1936f1e de84263 e4297a8 de84263 4b85b27 fac06d0 fb970e3 fac06d0 292ce47 1936f1e de84263 1936f1e de84263 4b85b27 292ce47 4b85b27 de84263 1936f1e 292ce47 1936f1e c031f24 292ce47 c031f24 1936f1e 292ce47 1936f1e 292ce47 1936f1e 165b80a 1936f1e be37f4c 292ce47 fac06d0 534a7d7 de84263 292ce47 306a78c 536f3e9 306a78c 292ce47 534a7d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
from transformers import pipeline
import gradio as gr
import time
from video_downloader import download_video, download_video1
from moviepy.editor import AudioFileClip
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
import datetime
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence
pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
def process_video1(from_date, to_date):
video_path = download_video1(from_date, to_date)
# Extract audio from the video
audio_path = f"audio_{from_date}_{to_date}.wav"
AudioFileClip(video_path).write_audiofile(audio_path)
# Split the audio into chunks
audio = AudioSegment.from_wav(audio_path)
chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
# Transcribe each chunk
transcription = ""
for i, chunk in enumerate(chunks):
chunk.export(f"chunk{i}.wav", format="wav")
with open(f"chunk{i}.wav", "rb") as audio_file:
audio = audio_file.read()
transcription += pipe(audio)["text"] + "\n "
os.remove(f"chunk{i}.wav")
# Remove the audio file
os.remove(audio_path)
return video_path, transcription
def process_video(date):
# Download the video
video_path = download_video(date)
# Extract the first 30 seconds of the video
short_video_path = f"short_{date}.mp4"
ffmpeg_extract_subclip(video_path, 0, 30, targetname=short_video_path)
# Extract audio from the short video
audio_path = f"audio_{date}.wav"
AudioFileClip(short_video_path).write_audiofile(audio_path)
# Split the audio into chunks
audio = AudioSegment.from_wav(audio_path)
chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
# Transcribe each chunk
transcription = ""
for i, chunk in enumerate(chunks):
chunk.export(f"chunk{i}.wav", format="wav")
with open(f"chunk{i}.wav", "rb") as audio_file:
audio = audio_file.read()
transcription += pipe(audio)["text"] + " "
os.remove(f"chunk{i}.wav")
# Remove the audio file
os.remove(audio_path)
return short_video_path, transcription
# iface = gr.Interface(
# fn=process_video1,
# inputs=[
# gr.inputs.Textbox(label="From date with format YYYY-MM-DD"),
# gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
# ],
# outputs=[
# gr.outputs.Video(),
# gr.Textbox(lines=1000, max_lines=1000, interactive=True),
# ],
# title="Swedish Transcription Test",
# )
iface = gr.Interface(
fn=process_video,
inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
outputs=[
gr.outputs.Video(),
gr.Textbox(lines=1000, max_lines=1000, interactive=True),
],
title="Romanian Transcription Test",
)
iface.launch()
|