Spaces:

ID2223-labs
/

romanian_parliament_transcription

Sleeping

File size: 5,062 Bytes

from transformers import pipeline
import gradio as gr
import time
from video_downloader import download_video, download_video1, download_youtube_video
from moviepy.editor import AudioFileClip, VideoFileClip
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
import datetime
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence
import re

pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian")


def process_video1(date):
    # If the date is not in YYYY-MM-DD format, return an error message
    date_pattern = re.compile(r"\b\d{4}-\d{2}-\d{2}\b")
    if not date_pattern.match(date):
        video_path = download_youtube_video(
            "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
        )
        transcription = "Please enter a date in the format YYYY-MM-DD."
        return video_path, transcription
    try:
        video_path = download_video1(date)

        # Get the duration of the video
        video = VideoFileClip(video_path)
        duration = video.duration

        # If the video is longer than 30 seconds, only take the first 30 seconds
        if duration > 30:
            video_path = f"short_{date}.mp4"
            ffmpeg_extract_subclip(video_path, 0, 30, targetname=video_path)

        # Extract audio from the video
        audio_path = f"audio_{date}.wav"
        AudioFileClip(video_path).write_audiofile(audio_path)

        # Split the audio into chunks
        audio = AudioSegment.from_wav(audio_path)
        chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)

        # Transcribe each chunk
        transcription = ""
        for i, chunk in enumerate(chunks):
            chunk.export(f"chunk{i}.wav", format="wav")
            with open(f"chunk{i}.wav", "rb") as audio_file:
                audio = audio_file.read()
            transcription += pipe(audio)["text"] + "\n "
            os.remove(f"chunk{i}.wav")

        # Remove the audio file
        os.remove(audio_path)
    except:
        video_path = download_youtube_video(
            "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
        )
        transcription = "No decision was made on this date."

    return video_path, transcription


def process_video(date):
    # If the date is not in YYYY-MM-DD format, return an error message
    date_pattern = re.compile(r"\b\d{4}\d{2}\d{2}\b")
    if not date_pattern.match(date):
        video_path = download_youtube_video(
            "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
        )
        transcription = "Please enter a date in the format YYYY-MM-DD."
        return video_path, transcription
    try:
        # Download the video
        video_path = download_video(date)

        # Extract the first 30 seconds of the video
        short_video_path = f"short_{date}.mp4"
        ffmpeg_extract_subclip(video_path, 0, 30, targetname=short_video_path)
        video_path = short_video_path

        # Extract audio from the short video
        audio_path = f"audio_{date}.wav"
        AudioFileClip(short_video_path).write_audiofile(audio_path)

        # Split the audio into chunks
        audio = AudioSegment.from_wav(audio_path)
        # chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)

        # # Transcribe each chunk
        # transcription = ""
        # for i, chunk in enumerate(chunks):
        #     chunk.export(f"chunk{i}.wav", format="wav")
        #     with open(f"chunk{i}.wav", "rb") as audio_file:
        #         audio = audio_file.read()
        #     transcription += pipe(audio)["text"] + " "
        #     os.remove(f"chunk{i}.wav")
        with open(audio_path, "rb") as audio_file:
            audio = audio_file.read()
        transcription = pipe(audio)["text"]
        # Remove the audio file
        os.remove(audio_path)
    except:
        video_path = download_youtube_video(
            "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
        )
        transcription = "No decision was made on this date."

    return video_path, transcription


iface = gr.Interface(
    fn=process_video,
    inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
    outputs=[
        gr.outputs.Video(),
        gr.Textbox(lines=100, max_lines=100, interactive=True),
    ],
    title="Romanian Transcription Test",
    description="This app transcribes videos from the Romanian Parliament"
    + " on a given date. Only the first 30 seconds of the "
    + "video will be used if it is longer than that.",
)


# iface = gr.Interface(
#     fn=process_video1,
#     inputs=[
#         gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
#     ],
#     outputs=[
#         gr.outputs.Video(),
#         gr.Textbox(lines=100, max_lines=100, interactive=True),
#     ],
#     title="Transcribe Swedish Parliament Decisions",
# description="This app transcribes the top Swedish Parliament decision"
# + " video from the given date. Only the first 30 seconds of the "
# + "video will be used if it is longer than that.",
# )

iface.launch()