Spaces:

ID2223-labs
/

romanian_parliament_transcription

Sleeping

App Files Files Community

FarhadMadadzade commited on Dec 9, 2023

Commit

20fa434

1 Parent(s): 306506f

added so that is only uses 30 seconds if the video is longer than that

Browse files

Files changed (2) hide show

app.py +10 -43
video_downloader.py +0 -22

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ from transformers import pipeline
 import gradio as gr
 import time
 from video_downloader import download_video, download_video1
-from moviepy.editor import AudioFileClip
 from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
 import datetime
 import os
@@ -15,6 +15,15 @@ pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_swedi
 def process_video1(date):
     video_path = download_video1(date)
     # Extract audio from the video
     audio_path = f"audio_{date}.wav"
     AudioFileClip(video_path).write_audiofile(audio_path)
@@ -51,46 +60,4 @@ iface = gr.Interface(
     desription="This app transcribes the top Swedish Parliament decision video from the given date.",
 )
-def process_video(date):
-    # Download the video
-    video_path = download_video(date)
-    # Extract the first 30 seconds of the video
-    short_video_path = f"short_{date}.mp4"
-    ffmpeg_extract_subclip(video_path, 0, 30, targetname=short_video_path)
-    # Extract audio from the short video
-    audio_path = f"audio_{date}.wav"
-    AudioFileClip(short_video_path).write_audiofile(audio_path)
-    # Split the audio into chunks
-    audio = AudioSegment.from_wav(audio_path)
-    chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
-    # Transcribe each chunk
-    transcription = ""
-    for i, chunk in enumerate(chunks):
-        chunk.export(f"chunk{i}.wav", format="wav")
-        with open(f"chunk{i}.wav", "rb") as audio_file:
-            audio = audio_file.read()
-        transcription += pipe(audio)["text"] + " "
-        os.remove(f"chunk{i}.wav")
-    # Remove the audio file
-    os.remove(audio_path)
-    return short_video_path, transcription
-# iface = gr.Interface(
-#     fn=process_video,
-#     inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
-#     outputs=[
-#         gr.outputs.Video(),
-#         gr.Textbox(lines=1000, max_lines=1000, interactive=True),
-#     ],
-#     title="Romanian Transcription Test",
-# )
 iface.launch()

 import gradio as gr
 import time
 from video_downloader import download_video, download_video1
+from moviepy.editor import AudioFileClip, VideoFileClip
 from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
 import datetime
 import os
 def process_video1(date):
     video_path = download_video1(date)
+    # Get the duration of the video
+    video = VideoFileClip(video_path)
+    duration = video.duration
+    # If the video is longer than 30 seconds, only take the first 30 seconds
+    if duration > 30:
+        video_path = f"short_{date}.mp4"
+        ffmpeg_extract_subclip(video_path, 0, 30, targetname=video_path)
     # Extract audio from the video
     audio_path = f"audio_{date}.wav"
     AudioFileClip(video_path).write_audiofile(audio_path)
     desription="This app transcribes the top Swedish Parliament decision video from the given date.",
 )
 iface.launch()

video_downloader.py CHANGED Viewed

@@ -1,30 +1,8 @@
 import urllib.request
-import os
-import glob
 import requests
 from bs4 import BeautifulSoup
-def download_video(date):
-    # Delete any existing .mp4 files
-    for mp4_file in glob.glob("*.mp4"):
-        os.remove(mp4_file)
-    year = date[:4]
-    url = f"https://www.cdep.ro/u02/comisii/{year}/cp46_{date}.mp4"
-    try:
-        urllib.request.urlretrieve(url, f"video_{date}.mp4")
-        print("Video downloaded successfully.")
-        return f"video_{date}.mp4"
-    except urllib.error.HTTPError as e:
-        if e.code == 404:
-            print("No video exists for the given date.")
-        else:
-            print(f"An error occurred while downloading the video: {e}")
-    except Exception as e:
-        print(f"An unexpected error occurred: {e}")
 def get_response(url):
     try:
         response = requests.get(url)

 import urllib.request
 import requests
 from bs4 import BeautifulSoup
 def get_response(url):
     try:
         response = requests.get(url)