Spaces:

ID2223-labs
/

romanian_parliament_transcription

Sleeping

File size: 2,028 Bytes

fac06d0
 
 
43e6a3b
20fa434
e4297a8
de84263
 
4b85b27
 
fac06d0
8dffbd8
fac06d0
 
8dffbd8
 
de84263
20fa434
 
 
 
 
 
 
 
 
1936f1e
8dffbd8
1936f1e
de84263
4b85b27
 
 
 
 
 
 
 
 
 
292ce47
4b85b27
de84263
 
 
1936f1e
 
 
 
8dffbd8
 
 
 
 
 
 
eecd1a3
8dffbd8
 
eecd1a3
 
 
8dffbd8
 
534a7d7

from transformers import pipeline
import gradio as gr
import time
from video_downloader import download_video1
from moviepy.editor import AudioFileClip, VideoFileClip
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
import datetime
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence

pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_swedish")


def process_video1(date):
    video_path = download_video1(date)

    # Get the duration of the video
    video = VideoFileClip(video_path)
    duration = video.duration

    # If the video is longer than 30 seconds, only take the first 30 seconds
    if duration > 30:
        video_path = f"short_{date}.mp4"
        ffmpeg_extract_subclip(video_path, 0, 30, targetname=video_path)

    # Extract audio from the video
    audio_path = f"audio_{date}.wav"
    AudioFileClip(video_path).write_audiofile(audio_path)

    # Split the audio into chunks
    audio = AudioSegment.from_wav(audio_path)
    chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)

    # Transcribe each chunk
    transcription = ""
    for i, chunk in enumerate(chunks):
        chunk.export(f"chunk{i}.wav", format="wav")
        with open(f"chunk{i}.wav", "rb") as audio_file:
            audio = audio_file.read()
        transcription += pipe(audio)["text"] + "\n "
        os.remove(f"chunk{i}.wav")

    # Remove the audio file
    os.remove(audio_path)

    return video_path, transcription


iface = gr.Interface(
    fn=process_video1,
    inputs=[
        gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
    ],
    outputs=[
        gr.outputs.Video(),
        gr.Textbox(lines=1000, max_lines=100, interactive=True),
    ],
    title="Transcribe Swedish Parliament Decisions",
    description="This app transcribes the top Swedish Parliament decision"
    + " video from the given date. Only the first 30 seconds of the "
    + "video will be used if it is longer than that.",
)

iface.launch()