Spaces:

ID2223-labs
/

romanian_parliament_transcription

Sleeping

FarhadMadadzade commited on Dec 9, 2023

Commit

8dffbd8

1 Parent(s): fb970e3

swedish model

Files changed (2) hide show

app.py CHANGED Viewed

@@ -9,14 +9,14 @@ import os
 from pydub import AudioSegment
 from pydub.silence import split_on_silence
-pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
-def process_video1(from_date, to_date):
-    video_path = download_video1(from_date, to_date)
     # Extract audio from the video
-    audio_path = f"audio_{from_date}_{to_date}.wav"
     AudioFileClip(video_path).write_audiofile(audio_path)
     # Split the audio into chunks
@@ -38,6 +38,20 @@ def process_video1(from_date, to_date):
     return video_path, transcription
 def process_video(date):
     # Download the video
     video_path = download_video(date)
@@ -70,26 +84,13 @@ def process_video(date):
 # iface = gr.Interface(
-#     fn=process_video1,
-#     inputs=[
-#         gr.inputs.Textbox(label="From date with format YYYY-MM-DD"),
-#         gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
-#     ],
 #     outputs=[
 #         gr.outputs.Video(),
 #         gr.Textbox(lines=1000, max_lines=1000, interactive=True),
 #     ],
-#     title="Swedish Transcription Test",
 # )
-iface = gr.Interface(
-    fn=process_video,
-    inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
-    outputs=[
-        gr.outputs.Video(),
-        gr.Textbox(lines=1000, max_lines=1000, interactive=True),
-    ],
-    title="Romanian Transcription Test",
-)
 iface.launch()

 from pydub import AudioSegment
 from pydub.silence import split_on_silence
+pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_swedish")
+def process_video1(date):
+    video_path = download_video1(date)
     # Extract audio from the video
+    audio_path = f"audio_{date}.wav"
     AudioFileClip(video_path).write_audiofile(audio_path)
     # Split the audio into chunks
     return video_path, transcription
+iface = gr.Interface(
+    fn=process_video1,
+    inputs=[
+        gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
+    ],
+    outputs=[
+        gr.outputs.Video(),
+        gr.Textbox(lines=1000, max_lines=1000, interactive=True),
+    ],
+    title="Transcribe Swedish Parliament Decisions",
+    desription="This app transcribes the top Swedish Parliament decision video from the given date.",
+)
 def process_video(date):
     # Download the video
     video_path = download_video(date)
 # iface = gr.Interface(
+#     fn=process_video,
+#     inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
 #     outputs=[
 #         gr.outputs.Video(),
 #         gr.Textbox(lines=1000, max_lines=1000, interactive=True),
 #     ],
+#     title="Romanian Transcription Test",
 # )
 iface.launch()

video_downloader.py CHANGED Viewed

@@ -44,17 +44,18 @@ def get_response(url):
     return soup
-def download_video1(from_date, to_date):
     # Get the webpage
-    url = f"https://www.riksdagen.se/sv/sok/?avd=webbtv&from={from_date}&tom={to_date}&doktyp=kam-vo"
     soup = get_response(url)
     # Find the download link
     try:
         video_page = [
             a["href"]
             for a in soup.find_all("a", href=True)
-            if a.get("aria-label") and a["aria-label"].startswith("Beslut")
         ][0]
         # go to video_page and get all links
         soup = get_response(video_page)
@@ -69,7 +70,7 @@ def download_video1(from_date, to_date):
         return None
     # Download the video
-    video_path = f"video_{from_date}_{to_date}.mp4"
     try:
         urllib.request.urlretrieve(video_link, video_path)
         return video_path

     return soup
+def download_video1(date):
     # Get the webpage
+    url = f"https://www.riksdagen.se/sv/sok/?avd=webbtv&from={date}&tom={date}&doktyp=kam-vo"
     soup = get_response(url)
     # Find the download link
     try:
+        dateparse = date.replace("-", "")
         video_page = [
             a["href"]
             for a in soup.find_all("a", href=True)
+            if a.get("aria-label") and dateparse in a["href"]
         ][0]
         # go to video_page and get all links
         soup = get_response(video_page)
         return None
     # Download the video
+    video_path = f"video_{date}.mp4"
     try:
         urllib.request.urlretrieve(video_link, video_path)
         return video_path