aadnk commited on
Commit
8d120bf
1 Parent(s): 3fadc6e

Add support for downloading files from YouTube using yt-dlp

Browse files
Files changed (3) hide show
  1. app.py +29 -10
  2. download.py +38 -0
  3. requirements.txt +2 -1
app.py CHANGED
@@ -1,14 +1,19 @@
 
 
1
  from io import StringIO
2
  import os
 
3
  import tempfile
4
 
5
- from typing import Iterator
 
 
 
 
6
  import gradio as gr
 
7
 
8
  from utils import slugify, write_srt, write_vtt
9
- import whisper
10
-
11
- import ffmpeg
12
 
13
  #import os
14
  #os.system("pip install git+https://github.com/openai/whisper.git")
@@ -42,9 +47,8 @@ class UI:
42
  def __init__(self, inputAudioMaxDuration):
43
  self.inputAudioMaxDuration = inputAudioMaxDuration
44
 
45
- def transcribeFile(self, modelName, languageName, uploadFile, microphoneData, task):
46
- source = uploadFile if uploadFile is not None else microphoneData
47
- sourceName = os.path.basename(source)
48
 
49
  selectedLanguage = languageName.lower() if len(languageName) > 0 else None
50
  selectedModel = modelName if modelName is not None else "base"
@@ -78,7 +82,20 @@ class UI:
78
  download.append(createFile(vtt, downloadDirectory, filePrefix + "-subs.vtt"));
79
  download.append(createFile(text, downloadDirectory, filePrefix + "-transcript.txt"));
80
 
81
- return text, vtt, download
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  def createFile(text: str, directory: str, fileName: str) -> str:
84
  # Write the text to a file
@@ -99,6 +116,7 @@ def getSubs(segments: Iterator[dict], format: str) -> str:
99
 
100
  segmentStream.seek(0)
101
  return segmentStream.read()
 
102
 
103
  def createUi(inputAudioMaxDuration, share=False):
104
  ui = UI(inputAudioMaxDuration)
@@ -113,13 +131,14 @@ def createUi(inputAudioMaxDuration, share=False):
113
  demo = gr.Interface(fn=ui.transcribeFile, description=ui_description, inputs=[
114
  gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
115
  gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
 
116
  gr.Audio(source="upload", type="filepath", label="Upload Audio"),
117
  gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
118
  gr.Dropdown(choices=["transcribe", "translate"], label="Task"),
119
  ], outputs=[
 
120
  gr.Text(label="Transcription"),
121
- gr.Text(label="Segments"),
122
- gr.File(label="Download")
123
  ])
124
 
125
  demo.launch(share=share)
 
1
+ from typing import Iterator
2
+
3
  from io import StringIO
4
  import os
5
+ import pathlib
6
  import tempfile
7
 
8
+ # External programs
9
+ import whisper
10
+ import ffmpeg
11
+
12
+ # UI
13
  import gradio as gr
14
+ from download import downloadUrl
15
 
16
  from utils import slugify, write_srt, write_vtt
 
 
 
17
 
18
  #import os
19
  #os.system("pip install git+https://github.com/openai/whisper.git")
 
47
  def __init__(self, inputAudioMaxDuration):
48
  self.inputAudioMaxDuration = inputAudioMaxDuration
49
 
50
+ def transcribeFile(self, modelName, languageName, urlData, uploadFile, microphoneData, task):
51
+ source, sourceName = getSource(urlData, uploadFile, microphoneData)
 
52
 
53
  selectedLanguage = languageName.lower() if len(languageName) > 0 else None
54
  selectedModel = modelName if modelName is not None else "base"
 
82
  download.append(createFile(vtt, downloadDirectory, filePrefix + "-subs.vtt"));
83
  download.append(createFile(text, downloadDirectory, filePrefix + "-transcript.txt"));
84
 
85
+ return download, text, vtt
86
+
87
+ def getSource(urlData, uploadFile, microphoneData):
88
+ if urlData:
89
+ # Download from YouTube
90
+ source = downloadUrl(urlData)
91
+ else:
92
+ # File input
93
+ source = uploadFile if uploadFile is not None else microphoneData
94
+
95
+ file_path = pathlib.Path(source)
96
+ sourceName = file_path.stem[:18] + file_path.suffix
97
+
98
+ return source, sourceName
99
 
100
  def createFile(text: str, directory: str, fileName: str) -> str:
101
  # Write the text to a file
 
116
 
117
  segmentStream.seek(0)
118
  return segmentStream.read()
119
+
120
 
121
  def createUi(inputAudioMaxDuration, share=False):
122
  ui = UI(inputAudioMaxDuration)
 
131
  demo = gr.Interface(fn=ui.transcribeFile, description=ui_description, inputs=[
132
  gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
133
  gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
134
+ gr.Text(label="URL (YouTube, etc.)"),
135
  gr.Audio(source="upload", type="filepath", label="Upload Audio"),
136
  gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
137
  gr.Dropdown(choices=["transcribe", "translate"], label="Task"),
138
  ], outputs=[
139
+ gr.File(label="Download"),
140
  gr.Text(label="Transcription"),
141
+ gr.Text(label="Segments")
 
142
  ])
143
 
144
  demo.launch(share=share)
download.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from tempfile import mkdtemp
4
+ from yt_dlp import YoutubeDL
5
+ from yt_dlp.postprocessor import PostProcessor
6
+
7
+ class FilenameCollectorPP(PostProcessor):
8
+ def __init__(self):
9
+ super(FilenameCollectorPP, self).__init__(None)
10
+ self.filenames = []
11
+
12
+ def run(self, information):
13
+ self.filenames.append(information["filepath"])
14
+ return [], information
15
+
16
+ def downloadUrl(url: str):
17
+ destinationDirectory = mkdtemp()
18
+
19
+ ydl_opts = {
20
+ "format": "bestaudio/best",
21
+ 'playlist_items': '1',
22
+ 'paths': {
23
+ 'home': destinationDirectory
24
+ }
25
+ }
26
+ filename_collector = FilenameCollectorPP()
27
+
28
+ with YoutubeDL(ydl_opts) as ydl:
29
+ ydl.add_post_processor(filename_collector)
30
+ ydl.download([url])
31
+
32
+ if len(filename_collector.filenames) <= 0:
33
+ raise Exception("Cannot download " + url)
34
+
35
+ result = filename_collector.filenames[0]
36
+ print("Downloaded " + result)
37
+
38
+ return result
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  git+https://github.com/openai/whisper.git
2
  transformers
3
  ffmpeg-python==0.2.0
4
- gradio
 
 
1
  git+https://github.com/openai/whisper.git
2
  transformers
3
  ffmpeg-python==0.2.0
4
+ gradio
5
+ yt-dlp