Spaces:
Running
on
T4
Running
on
T4
To handle videos longer than one hour and to transcribe them in segments, we need to make several modifications to the yt_transcribe function.
#15
by
Illia56
- opened
app.py
CHANGED
@@ -71,21 +71,42 @@ def download_yt_audio(yt_url, filename):
|
|
71 |
raise gr.Error(str(err))
|
72 |
|
73 |
|
74 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
html_embed_str = _return_yt_html_embed(yt_url)
|
76 |
|
77 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
78 |
filepath = os.path.join(tmpdirname, "video.mp4")
|
79 |
download_yt_audio(yt_url, filepath)
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
return html_embed_str, text
|
89 |
|
90 |
|
91 |
demo = gr.Blocks()
|
|
|
71 |
raise gr.Error(str(err))
|
72 |
|
73 |
|
74 |
+
def ffmpeg_read(file_path, sampling_rate):
|
75 |
+
# This function should use FFmpeg to extract audio and convert it to the desired format and sampling rate.
|
76 |
+
# The exact implementation will depend on your requirements and setup.
|
77 |
+
# For now, I'll provide a placeholder.
|
78 |
+
raise NotImplementedError("Please implement the ffmpeg_read function.")
|
79 |
+
|
80 |
+
def yt_transcribe(yt_url, task, max_filesize=75.0, segment_length=30*1000):
|
81 |
html_embed_str = _return_yt_html_embed(yt_url)
|
82 |
|
83 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
84 |
filepath = os.path.join(tmpdirname, "video.mp4")
|
85 |
download_yt_audio(yt_url, filepath)
|
86 |
+
|
87 |
+
# Load the audio using pydub
|
88 |
+
audio = AudioSegment.from_file(filepath, format="mp4")
|
89 |
+
|
90 |
+
# Split the audio into segments
|
91 |
+
segments = [audio[i:i+segment_length] for i in range(0, len(audio), segment_length)]
|
92 |
+
|
93 |
+
# Transcribe each segment and combine the results
|
94 |
+
transcriptions = []
|
95 |
+
for segment in segments:
|
96 |
+
with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as segment_file:
|
97 |
+
segment.export(segment_file.name, format="wav")
|
98 |
+
|
99 |
+
# Convert the segment using ffmpeg
|
100 |
+
segment_data = ffmpeg_read(segment_file.name, pipe.feature_extractor.sampling_rate)
|
101 |
+
inputs = {"array": segment_data, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
102 |
+
|
103 |
+
transcription = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
104 |
+
transcriptions.append(transcription)
|
105 |
+
|
106 |
+
full_transcription = " ".join(transcriptions)
|
107 |
+
|
108 |
+
return html_embed_str, full_transcription
|
109 |
|
|
|
110 |
|
111 |
|
112 |
demo = gr.Blocks()
|