Spaces:

ID2223-labs
/

romanian_parliament_transcription

Sleeping

App Files Files Community

FarhadMadadzade commited on Dec 8, 2023

Commit

292ce47

1 Parent(s): 23d6d67

trying base model with romanian

Browse files

Files changed (1) hide show

app.py +38 -29

app.py CHANGED Viewed

@@ -9,10 +9,10 @@ import os
 from pydub import AudioSegment
 from pydub.silence import split_on_silence
-pipe = pipeline("automatic-speech-recognition", model="Sleepyp00/whisper-small-Swedish")
-def process_video(from_date, to_date):
     video_path = download_video1(from_date, to_date)
     # Extract audio from the video
@@ -29,7 +29,7 @@ def process_video(from_date, to_date):
         chunk.export(f"chunk{i}.wav", format="wav")
         with open(f"chunk{i}.wav", "rb") as audio_file:
             audio = audio_file.read()
-        transcription += pipe(audio)["text"] + "\n\n"
         os.remove(f"chunk{i}.wav")
     # Remove the audio file
@@ -38,45 +38,54 @@ def process_video(from_date, to_date):
     return video_path, transcription
-# def process_video(date):
-#     # Download the video
-#     video_path = download_video(date)
-#     # Extract audio from the video
-#     audio_path = f"audio_{date}.wav"
-#     AudioFileClip(video_path).write_audiofile(audio_path)
-#     # Split the audio into chunks
-#     audio = AudioSegment.from_wav(audio_path)
-#     chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
-#     # Transcribe each chunk
-#     transcription = ""
-#     for i, chunk in enumerate(chunks):
-#         chunk.export(f"chunk{i}.wav", format="wav")
-#         with open(f"chunk{i}.wav", "rb") as audio_file:
-#             audio = audio_file.read()
-#         transcription += pipe(audio)["text"] + " "
-#         os.remove(f"chunk{i}.wav")
-#     # Remove the audio file
-#     os.remove(audio_path)
-#     return video_path, transcription
 iface = gr.Interface(
     fn=process_video,
-    # inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
-    inputs=[
-        gr.inputs.Textbox(label="From date with format YYYY-MM-DD"),
-        gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
-    ],
     outputs=[
         gr.outputs.Video(),
         gr.Textbox(lines=1000, max_lines=1000, interactive=True),
     ],
-    title="Swedish Transcription Test",
 )
 iface.launch()

 from pydub import AudioSegment
 from pydub.silence import split_on_silence
+pipe = pipeline("automatic-speech-recognition", model="openai/whisper-small")
+def process_video1(from_date, to_date):
     video_path = download_video1(from_date, to_date)
     # Extract audio from the video
         chunk.export(f"chunk{i}.wav", format="wav")
         with open(f"chunk{i}.wav", "rb") as audio_file:
             audio = audio_file.read()
+        transcription += pipe(audio)["text"] + "\n "
         os.remove(f"chunk{i}.wav")
     # Remove the audio file
     return video_path, transcription
+def process_video(date):
+    # Download the video
+    video_path = download_video(date)
+    # Extract audio from the video
+    audio_path = f"audio_{date}.wav"
+    AudioFileClip(video_path).write_audiofile(audio_path)
+    # Split the audio into chunks
+    audio = AudioSegment.from_wav(audio_path)
+    chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
+    # Transcribe each chunk
+    transcription = ""
+    for i, chunk in enumerate(chunks):
+        chunk.export(f"chunk{i}.wav", format="wav")
+        with open(f"chunk{i}.wav", "rb") as audio_file:
+            audio = audio_file.read()
+        transcription += pipe(audio)["text"] + " "
+        os.remove(f"chunk{i}.wav")
+    # Remove the audio file
+    os.remove(audio_path)
+    return video_path, transcription
+# iface = gr.Interface(
+#     fn=process_video1,
+#     inputs=[
+#         gr.inputs.Textbox(label="From date with format YYYY-MM-DD"),
+#         gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
+#     ],
+#     outputs=[
+#         gr.outputs.Video(),
+#         gr.Textbox(lines=1000, max_lines=1000, interactive=True),
+#     ],
+#     title="Swedish Transcription Test",
+# )
 iface = gr.Interface(
     fn=process_video,
+    inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
     outputs=[
         gr.outputs.Video(),
         gr.Textbox(lines=1000, max_lines=1000, interactive=True),
     ],
+    title="Romanian Transcription Test",
 )
 iface.launch()