FarhadMadadzade commited on
Commit
8dffbd8
1 Parent(s): fb970e3

swedish model

Browse files
Files changed (2) hide show
  1. app.py +21 -20
  2. video_downloader.py +5 -4
app.py CHANGED
@@ -9,14 +9,14 @@ import os
9
  from pydub import AudioSegment
10
  from pydub.silence import split_on_silence
11
 
12
- pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
13
 
14
 
15
- def process_video1(from_date, to_date):
16
- video_path = download_video1(from_date, to_date)
17
 
18
  # Extract audio from the video
19
- audio_path = f"audio_{from_date}_{to_date}.wav"
20
  AudioFileClip(video_path).write_audiofile(audio_path)
21
 
22
  # Split the audio into chunks
@@ -38,6 +38,20 @@ def process_video1(from_date, to_date):
38
  return video_path, transcription
39
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def process_video(date):
42
  # Download the video
43
  video_path = download_video(date)
@@ -70,26 +84,13 @@ def process_video(date):
70
 
71
 
72
  # iface = gr.Interface(
73
- # fn=process_video1,
74
- # inputs=[
75
- # gr.inputs.Textbox(label="From date with format YYYY-MM-DD"),
76
- # gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
77
- # ],
78
  # outputs=[
79
  # gr.outputs.Video(),
80
  # gr.Textbox(lines=1000, max_lines=1000, interactive=True),
81
  # ],
82
- # title="Swedish Transcription Test",
83
  # )
84
 
85
- iface = gr.Interface(
86
- fn=process_video,
87
- inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
88
- outputs=[
89
- gr.outputs.Video(),
90
- gr.Textbox(lines=1000, max_lines=1000, interactive=True),
91
- ],
92
- title="Romanian Transcription Test",
93
- )
94
-
95
  iface.launch()
 
9
  from pydub import AudioSegment
10
  from pydub.silence import split_on_silence
11
 
12
+ pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_swedish")
13
 
14
 
15
+ def process_video1(date):
16
+ video_path = download_video1(date)
17
 
18
  # Extract audio from the video
19
+ audio_path = f"audio_{date}.wav"
20
  AudioFileClip(video_path).write_audiofile(audio_path)
21
 
22
  # Split the audio into chunks
 
38
  return video_path, transcription
39
 
40
 
41
+ iface = gr.Interface(
42
+ fn=process_video1,
43
+ inputs=[
44
+ gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
45
+ ],
46
+ outputs=[
47
+ gr.outputs.Video(),
48
+ gr.Textbox(lines=1000, max_lines=1000, interactive=True),
49
+ ],
50
+ title="Transcribe Swedish Parliament Decisions",
51
+ desription="This app transcribes the top Swedish Parliament decision video from the given date.",
52
+ )
53
+
54
+
55
  def process_video(date):
56
  # Download the video
57
  video_path = download_video(date)
 
84
 
85
 
86
  # iface = gr.Interface(
87
+ # fn=process_video,
88
+ # inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
 
 
 
89
  # outputs=[
90
  # gr.outputs.Video(),
91
  # gr.Textbox(lines=1000, max_lines=1000, interactive=True),
92
  # ],
93
+ # title="Romanian Transcription Test",
94
  # )
95
 
 
 
 
 
 
 
 
 
 
 
96
  iface.launch()
video_downloader.py CHANGED
@@ -44,17 +44,18 @@ def get_response(url):
44
  return soup
45
 
46
 
47
- def download_video1(from_date, to_date):
48
  # Get the webpage
49
- url = f"https://www.riksdagen.se/sv/sok/?avd=webbtv&from={from_date}&tom={to_date}&doktyp=kam-vo"
50
 
51
  soup = get_response(url)
52
  # Find the download link
53
  try:
 
54
  video_page = [
55
  a["href"]
56
  for a in soup.find_all("a", href=True)
57
- if a.get("aria-label") and a["aria-label"].startswith("Beslut")
58
  ][0]
59
  # go to video_page and get all links
60
  soup = get_response(video_page)
@@ -69,7 +70,7 @@ def download_video1(from_date, to_date):
69
  return None
70
 
71
  # Download the video
72
- video_path = f"video_{from_date}_{to_date}.mp4"
73
  try:
74
  urllib.request.urlretrieve(video_link, video_path)
75
  return video_path
 
44
  return soup
45
 
46
 
47
+ def download_video1(date):
48
  # Get the webpage
49
+ url = f"https://www.riksdagen.se/sv/sok/?avd=webbtv&from={date}&tom={date}&doktyp=kam-vo"
50
 
51
  soup = get_response(url)
52
  # Find the download link
53
  try:
54
+ dateparse = date.replace("-", "")
55
  video_page = [
56
  a["href"]
57
  for a in soup.find_all("a", href=True)
58
+ if a.get("aria-label") and dateparse in a["href"]
59
  ][0]
60
  # go to video_page and get all links
61
  soup = get_response(video_page)
 
70
  return None
71
 
72
  # Download the video
73
+ video_path = f"video_{date}.mp4"
74
  try:
75
  urllib.request.urlretrieve(video_link, video_path)
76
  return video_path