Spaces:

ID2223-labs
/

romanian_parliament_transcription

Running

FarhadMadadzade commited on Dec 9, 2023

Commit

4c56b36

•

1 Parent(s): 43e6a3b

added error handling

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,36 +13,40 @@ pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_swedi
 def process_video1(date):
-    video_path = download_video1(date)
-    # Get the duration of the video
-    video = VideoFileClip(video_path)
-    duration = video.duration
-    # If the video is longer than 30 seconds, only take the first 30 seconds
-    if duration > 30:
-        video_path = f"short_{date}.mp4"
-        ffmpeg_extract_subclip(video_path, 0, 30, targetname=video_path)
-    # Extract audio from the video
-    audio_path = f"audio_{date}.wav"
-    AudioFileClip(video_path).write_audiofile(audio_path)
-    # Split the audio into chunks
-    audio = AudioSegment.from_wav(audio_path)
-    chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
-    # Transcribe each chunk
-    transcription = ""
-    for i, chunk in enumerate(chunks):
-        chunk.export(f"chunk{i}.wav", format="wav")
-        with open(f"chunk{i}.wav", "rb") as audio_file:
-            audio = audio_file.read()
-        transcription += pipe(audio)["text"] + "\n "
-        os.remove(f"chunk{i}.wav")
-    # Remove the audio file
-    os.remove(audio_path)
     return video_path, transcription

 def process_video1(date):
+    try:
+        video_path = download_video1(date)
+        # Get the duration of the video
+        video = VideoFileClip(video_path)
+        duration = video.duration
+        # If the video is longer than 30 seconds, only take the first 30 seconds
+        if duration > 30:
+            video_path = f"short_{date}.mp4"
+            ffmpeg_extract_subclip(video_path, 0, 30, targetname=video_path)
+        # Extract audio from the video
+        audio_path = f"audio_{date}.wav"
+        AudioFileClip(video_path).write_audiofile(audio_path)
+        # Split the audio into chunks
+        audio = AudioSegment.from_wav(audio_path)
+        chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
+        # Transcribe each chunk
+        transcription = ""
+        for i, chunk in enumerate(chunks):
+            chunk.export(f"chunk{i}.wav", format="wav")
+            with open(f"chunk{i}.wav", "rb") as audio_file:
+                audio = audio_file.read()
+            transcription += pipe(audio)["text"] + "\n "
+            os.remove(f"chunk{i}.wav")
+        # Remove the audio file
+        os.remove(audio_path)
+    except:
+        video_path = ""
+        transcription = "No decision was made on this date."
     return video_path, transcription