Spaces:
Running
Running
Add support for downloading files from YouTube using yt-dlp
Browse files- app.py +29 -10
- download.py +38 -0
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,14 +1,19 @@
|
|
|
|
|
|
1 |
from io import StringIO
|
2 |
import os
|
|
|
3 |
import tempfile
|
4 |
|
5 |
-
|
|
|
|
|
|
|
|
|
6 |
import gradio as gr
|
|
|
7 |
|
8 |
from utils import slugify, write_srt, write_vtt
|
9 |
-
import whisper
|
10 |
-
|
11 |
-
import ffmpeg
|
12 |
|
13 |
#import os
|
14 |
#os.system("pip install git+https://github.com/openai/whisper.git")
|
@@ -42,9 +47,8 @@ class UI:
|
|
42 |
def __init__(self, inputAudioMaxDuration):
|
43 |
self.inputAudioMaxDuration = inputAudioMaxDuration
|
44 |
|
45 |
-
def transcribeFile(self, modelName, languageName, uploadFile, microphoneData, task):
|
46 |
-
source =
|
47 |
-
sourceName = os.path.basename(source)
|
48 |
|
49 |
selectedLanguage = languageName.lower() if len(languageName) > 0 else None
|
50 |
selectedModel = modelName if modelName is not None else "base"
|
@@ -78,7 +82,20 @@ class UI:
|
|
78 |
download.append(createFile(vtt, downloadDirectory, filePrefix + "-subs.vtt"));
|
79 |
download.append(createFile(text, downloadDirectory, filePrefix + "-transcript.txt"));
|
80 |
|
81 |
-
return text, vtt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
def createFile(text: str, directory: str, fileName: str) -> str:
|
84 |
# Write the text to a file
|
@@ -99,6 +116,7 @@ def getSubs(segments: Iterator[dict], format: str) -> str:
|
|
99 |
|
100 |
segmentStream.seek(0)
|
101 |
return segmentStream.read()
|
|
|
102 |
|
103 |
def createUi(inputAudioMaxDuration, share=False):
|
104 |
ui = UI(inputAudioMaxDuration)
|
@@ -113,13 +131,14 @@ def createUi(inputAudioMaxDuration, share=False):
|
|
113 |
demo = gr.Interface(fn=ui.transcribeFile, description=ui_description, inputs=[
|
114 |
gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
|
115 |
gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
|
|
|
116 |
gr.Audio(source="upload", type="filepath", label="Upload Audio"),
|
117 |
gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
|
118 |
gr.Dropdown(choices=["transcribe", "translate"], label="Task"),
|
119 |
], outputs=[
|
|
|
120 |
gr.Text(label="Transcription"),
|
121 |
-
gr.Text(label="Segments")
|
122 |
-
gr.File(label="Download")
|
123 |
])
|
124 |
|
125 |
demo.launch(share=share)
|
|
|
1 |
+
from typing import Iterator
|
2 |
+
|
3 |
from io import StringIO
|
4 |
import os
|
5 |
+
import pathlib
|
6 |
import tempfile
|
7 |
|
8 |
+
# External programs
|
9 |
+
import whisper
|
10 |
+
import ffmpeg
|
11 |
+
|
12 |
+
# UI
|
13 |
import gradio as gr
|
14 |
+
from download import downloadUrl
|
15 |
|
16 |
from utils import slugify, write_srt, write_vtt
|
|
|
|
|
|
|
17 |
|
18 |
#import os
|
19 |
#os.system("pip install git+https://github.com/openai/whisper.git")
|
|
|
47 |
def __init__(self, inputAudioMaxDuration):
|
48 |
self.inputAudioMaxDuration = inputAudioMaxDuration
|
49 |
|
50 |
+
def transcribeFile(self, modelName, languageName, urlData, uploadFile, microphoneData, task):
|
51 |
+
source, sourceName = getSource(urlData, uploadFile, microphoneData)
|
|
|
52 |
|
53 |
selectedLanguage = languageName.lower() if len(languageName) > 0 else None
|
54 |
selectedModel = modelName if modelName is not None else "base"
|
|
|
82 |
download.append(createFile(vtt, downloadDirectory, filePrefix + "-subs.vtt"));
|
83 |
download.append(createFile(text, downloadDirectory, filePrefix + "-transcript.txt"));
|
84 |
|
85 |
+
return download, text, vtt
|
86 |
+
|
87 |
+
def getSource(urlData, uploadFile, microphoneData):
|
88 |
+
if urlData:
|
89 |
+
# Download from YouTube
|
90 |
+
source = downloadUrl(urlData)
|
91 |
+
else:
|
92 |
+
# File input
|
93 |
+
source = uploadFile if uploadFile is not None else microphoneData
|
94 |
+
|
95 |
+
file_path = pathlib.Path(source)
|
96 |
+
sourceName = file_path.stem[:18] + file_path.suffix
|
97 |
+
|
98 |
+
return source, sourceName
|
99 |
|
100 |
def createFile(text: str, directory: str, fileName: str) -> str:
|
101 |
# Write the text to a file
|
|
|
116 |
|
117 |
segmentStream.seek(0)
|
118 |
return segmentStream.read()
|
119 |
+
|
120 |
|
121 |
def createUi(inputAudioMaxDuration, share=False):
|
122 |
ui = UI(inputAudioMaxDuration)
|
|
|
131 |
demo = gr.Interface(fn=ui.transcribeFile, description=ui_description, inputs=[
|
132 |
gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
|
133 |
gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
|
134 |
+
gr.Text(label="URL (YouTube, etc.)"),
|
135 |
gr.Audio(source="upload", type="filepath", label="Upload Audio"),
|
136 |
gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
|
137 |
gr.Dropdown(choices=["transcribe", "translate"], label="Task"),
|
138 |
], outputs=[
|
139 |
+
gr.File(label="Download"),
|
140 |
gr.Text(label="Transcription"),
|
141 |
+
gr.Text(label="Segments")
|
|
|
142 |
])
|
143 |
|
144 |
demo.launch(share=share)
|
download.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from tempfile import mkdtemp
|
4 |
+
from yt_dlp import YoutubeDL
|
5 |
+
from yt_dlp.postprocessor import PostProcessor
|
6 |
+
|
7 |
+
class FilenameCollectorPP(PostProcessor):
|
8 |
+
def __init__(self):
|
9 |
+
super(FilenameCollectorPP, self).__init__(None)
|
10 |
+
self.filenames = []
|
11 |
+
|
12 |
+
def run(self, information):
|
13 |
+
self.filenames.append(information["filepath"])
|
14 |
+
return [], information
|
15 |
+
|
16 |
+
def downloadUrl(url: str):
|
17 |
+
destinationDirectory = mkdtemp()
|
18 |
+
|
19 |
+
ydl_opts = {
|
20 |
+
"format": "bestaudio/best",
|
21 |
+
'playlist_items': '1',
|
22 |
+
'paths': {
|
23 |
+
'home': destinationDirectory
|
24 |
+
}
|
25 |
+
}
|
26 |
+
filename_collector = FilenameCollectorPP()
|
27 |
+
|
28 |
+
with YoutubeDL(ydl_opts) as ydl:
|
29 |
+
ydl.add_post_processor(filename_collector)
|
30 |
+
ydl.download([url])
|
31 |
+
|
32 |
+
if len(filename_collector.filenames) <= 0:
|
33 |
+
raise Exception("Cannot download " + url)
|
34 |
+
|
35 |
+
result = filename_collector.filenames[0]
|
36 |
+
print("Downloaded " + result)
|
37 |
+
|
38 |
+
return result
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
git+https://github.com/openai/whisper.git
|
2 |
transformers
|
3 |
ffmpeg-python==0.2.0
|
4 |
-
gradio
|
|
|
|
1 |
git+https://github.com/openai/whisper.git
|
2 |
transformers
|
3 |
ffmpeg-python==0.2.0
|
4 |
+
gradio
|
5 |
+
yt-dlp
|