FarhadMadadzade
commited on
Commit
•
de84263
1
Parent(s):
534a7d7
fix
Browse files- README.md +1 -0
- app.py +27 -13
- requirements.txt +3 -1
- video_downloader.py +23 -0
README.md
CHANGED
@@ -8,6 +8,7 @@ sdk_version: 3.5
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
+
python_version: 3.8
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,27 +1,41 @@
|
|
1 |
from transformers import pipeline
|
2 |
import gradio as gr
|
3 |
import time
|
|
|
|
|
|
|
|
|
4 |
|
5 |
pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
|
6 |
|
7 |
|
8 |
-
def
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
iface = gr.Interface(
|
19 |
-
fn=
|
20 |
-
inputs=
|
21 |
-
outputs=["
|
22 |
title="Romanian Transcription Test",
|
23 |
-
live=True,
|
24 |
)
|
25 |
|
26 |
-
|
27 |
iface.launch()
|
|
|
1 |
from transformers import pipeline
|
2 |
import gradio as gr
|
3 |
import time
|
4 |
+
from video_downloader import download_video
|
5 |
+
from moviepy.editor import AudioFileClip
|
6 |
+
import datetime
|
7 |
+
import os
|
8 |
|
9 |
pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
|
10 |
|
11 |
|
12 |
+
def process_video(date):
|
13 |
+
# Parse the date to the format yyyymmdd
|
14 |
+
date = datetime.datetime.strptime(date, "%Y-%m-%d").strftime("%Y%m%d")
|
15 |
+
|
16 |
+
# Download the video
|
17 |
+
video_path = download_video(date)
|
18 |
+
|
19 |
+
# Extract audio from the video
|
20 |
+
audio_path = f"audio_{date}.wav"
|
21 |
+
AudioFileClip(video_path).write_audiofile(audio_path)
|
22 |
+
|
23 |
+
# Transcribe the audio
|
24 |
+
with open(audio_path, "rb") as audio_file:
|
25 |
+
audio = audio_file.read()
|
26 |
+
transcription = pipe(audio)["text"]
|
27 |
+
|
28 |
+
# Remove the audio file
|
29 |
+
os.remove(audio_path)
|
30 |
+
|
31 |
+
return video_path, transcription
|
32 |
|
33 |
|
34 |
iface = gr.Interface(
|
35 |
+
fn=process_video,
|
36 |
+
inputs="date",
|
37 |
+
outputs=["video", "text"],
|
38 |
title="Romanian Transcription Test",
|
|
|
39 |
)
|
40 |
|
|
|
41 |
iface.launch()
|
requirements.txt
CHANGED
@@ -4,4 +4,6 @@ scikit-learn==1.1.1
|
|
4 |
httpx==0.24.1
|
5 |
gradio
|
6 |
transformers
|
7 |
-
torch
|
|
|
|
|
|
4 |
httpx==0.24.1
|
5 |
gradio
|
6 |
transformers
|
7 |
+
torch
|
8 |
+
urllib3
|
9 |
+
moviepy
|
video_downloader.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import urllib.request
|
2 |
+
import os
|
3 |
+
import glob
|
4 |
+
|
5 |
+
|
6 |
+
def download_video(date):
|
7 |
+
# Delete any existing .mp4 files
|
8 |
+
for mp4_file in glob.glob("*.mp4"):
|
9 |
+
os.remove(mp4_file)
|
10 |
+
|
11 |
+
year = date[:4]
|
12 |
+
url = f"https://www.cdep.ro/u02/comisii/{year}/cp46_{date}.mp4"
|
13 |
+
try:
|
14 |
+
urllib.request.urlretrieve(url, f"video_{date}.mp4")
|
15 |
+
print("Video downloaded successfully.")
|
16 |
+
return f"video_{date}.mp4"
|
17 |
+
except urllib.error.HTTPError as e:
|
18 |
+
if e.code == 404:
|
19 |
+
print("No video exists for the given date.")
|
20 |
+
else:
|
21 |
+
print(f"An error occurred while downloading the video: {e}")
|
22 |
+
except Exception as e:
|
23 |
+
print(f"An unexpected error occurred: {e}")
|