Spaces:
Runtime error
Runtime error
fix the bug in the transcribe_yt_vid_api function
Browse files
app.py
CHANGED
@@ -6,7 +6,6 @@ if not os.path.exists(save_dir):
|
|
6 |
transcription_model_id = "openai/whisper-large"
|
7 |
llm_model_id = "tiiuae/falcon-7b-instruct"
|
8 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
9 |
-
# HF_TOKEN = f"Bearer {HF_TOKEN}"
|
10 |
|
11 |
from youtube_transcript_api import YouTubeTranscriptApi
|
12 |
import pytube
|
@@ -65,7 +64,7 @@ def transcribe_yt_vid_api(url,api_token):
|
|
65 |
# download YouTube video's audio
|
66 |
yt = YouTube(str(url))
|
67 |
audio = yt.streams.filter(only_audio = True).first()
|
68 |
-
out_file = audio.download(filename="audio.
|
69 |
output_path = save_dir)
|
70 |
|
71 |
# Initialize client for the Whisper model
|
@@ -76,13 +75,13 @@ def transcribe_yt_vid_api(url,api_token):
|
|
76 |
import soundfile as sf
|
77 |
|
78 |
text = ''
|
79 |
-
t=
|
80 |
x, sr = librosa.load(out_file, sr=None)
|
81 |
# This gives x as audio file in numpy array and sr as original sampling rate
|
82 |
# The audio needs to be split in 20 second chunks since the API call truncates the response
|
83 |
-
for i in range(0, len(x)//(t * sr)):
|
84 |
y = x[t * sr * i: t * sr *(i+1)]
|
85 |
-
split_path = save_dir
|
86 |
sf.write(split_path, y, sr)
|
87 |
text += client.automatic_speech_recognition(split_path)
|
88 |
|
@@ -316,19 +315,22 @@ def get_video(url):
|
|
316 |
embed_html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/{}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'.format(vid_id)
|
317 |
return embed_html
|
318 |
|
319 |
-
def summarize_youtube_video(url,force_transcribe,
|
320 |
-
temperature=1,words=
|
321 |
-
|
322 |
-
|
|
|
|
|
|
|
323 |
return summary, text, transcript_source, summary_source
|
324 |
|
325 |
html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
|
326 |
|
327 |
-
def change_transcribe_api(vis):
|
328 |
-
|
329 |
|
330 |
-
def change_api_token(vis):
|
331 |
-
|
332 |
|
333 |
def update_source(source):
|
334 |
return gr.Textbox(info=source)
|
@@ -345,21 +347,21 @@ with gr.Blocks() as demo:
|
|
345 |
with gr.Column(scale=4):
|
346 |
url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=")
|
347 |
with gr.Column(scale=1):
|
348 |
-
api_token = gr.Textbox(label="Paste your Hugging Face API token here
|
349 |
with gr.Column(scale=1):
|
350 |
sum_btn = gr.Button("Summarize!")
|
351 |
|
352 |
with gr.Accordion("Transcription Settings",open=False):
|
353 |
with gr.Row():
|
354 |
force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.", info='If unchecked, the app attempts to download the transcript from YouTube first. Check this if the transcript does not seem accurate.')
|
355 |
-
use_transcribe_api = gr.Checkbox(label="Transcribe using the HuggingFaceHub API.",
|
356 |
|
357 |
with gr.Accordion("Summarization Settings",open=False):
|
358 |
with gr.Row():
|
359 |
-
use_llm_api = gr.Checkbox(label="Summarize using the HuggingFaceHub API.",
|
360 |
do_sample = gr.Checkbox(label="Set the Temperature",value=True,visible=True)
|
361 |
temperature = gr.Slider(minimum=0,maximum=1,value=1.0,label="Generation temperature",visible=True)
|
362 |
-
words = gr.Slider(minimum=100,maximum=500,value=
|
363 |
|
364 |
gr.Markdown("# Results")
|
365 |
|
@@ -372,8 +374,8 @@ with gr.Blocks() as demo:
|
|
372 |
|
373 |
with gr.Row():
|
374 |
with gr.Group():
|
375 |
-
transcript_source = gr.Textbox(visible=False)
|
376 |
transcript = gr.Textbox(label="Full Transcript",placeholder="transcript...",show_label=True)
|
|
|
377 |
|
378 |
with gr.Accordion("Notes",open=False):
|
379 |
gr.Markdown("""
|
@@ -383,18 +385,18 @@ with gr.Blocks() as demo:
|
|
383 |
""")
|
384 |
|
385 |
# Defining the interactivity of the UI elements
|
386 |
-
force_transcribe.change(fn=change_transcribe_api,inputs=force_transcribe,outputs=use_transcribe_api)
|
387 |
-
use_transcribe_api.change(fn=change_api_token,inputs=use_transcribe_api,outputs=api_token)
|
388 |
-
use_llm_api.change(fn=change_api_token,inputs=use_llm_api,outputs=api_token)
|
389 |
transcript_source.change(fn=update_source,inputs=transcript_source,outputs=transcript)
|
390 |
summary_source.change(fn=update_source,inputs=summary_source,outputs=summary)
|
391 |
do_sample.change(fn=show_temp,inputs=do_sample,outputs=temperature)
|
392 |
|
393 |
# Defining the functions to call on clicking the button
|
394 |
sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title", queue=False)
|
395 |
-
sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe,
|
396 |
outputs=[summary,transcript, transcript_source, summary_source], api_name="summarize_youtube_video", queue=True)
|
397 |
sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video", queue=False)
|
398 |
|
399 |
demo.queue()
|
400 |
-
demo.launch(share=False)
|
|
|
6 |
transcription_model_id = "openai/whisper-large"
|
7 |
llm_model_id = "tiiuae/falcon-7b-instruct"
|
8 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
|
|
9 |
|
10 |
from youtube_transcript_api import YouTubeTranscriptApi
|
11 |
import pytube
|
|
|
64 |
# download YouTube video's audio
|
65 |
yt = YouTube(str(url))
|
66 |
audio = yt.streams.filter(only_audio = True).first()
|
67 |
+
out_file = audio.download(filename="audio.wav",
|
68 |
output_path = save_dir)
|
69 |
|
70 |
# Initialize client for the Whisper model
|
|
|
75 |
import soundfile as sf
|
76 |
|
77 |
text = ''
|
78 |
+
t=25 # audio chunk length in seconds
|
79 |
x, sr = librosa.load(out_file, sr=None)
|
80 |
# This gives x as audio file in numpy array and sr as original sampling rate
|
81 |
# The audio needs to be split in 20 second chunks since the API call truncates the response
|
82 |
+
for _,i in enumerate(range(0, (len(x)//(t * sr)) +1)):
|
83 |
y = x[t * sr * i: t * sr *(i+1)]
|
84 |
+
split_path = os.path.join(save_dir,"audio_split.wav")
|
85 |
sf.write(split_path, y, sr)
|
86 |
text += client.automatic_speech_recognition(split_path)
|
87 |
|
|
|
315 |
embed_html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/{}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'.format(vid_id)
|
316 |
return embed_html
|
317 |
|
318 |
+
def summarize_youtube_video(url,force_transcribe,api_token="",
|
319 |
+
temperature=1.0,words=150,do_sample=True):
|
320 |
+
if api_token == "":
|
321 |
+
api_token = HF_TOKEN
|
322 |
+
title,text,transcript_source = transcribe_youtube_video(url,force_transcribe,True,api_token)
|
323 |
+
summary, summary_source = summarize_text(title,text,temperature,words,True,api_token,do_sample)
|
324 |
+
print(text)
|
325 |
return summary, text, transcript_source, summary_source
|
326 |
|
327 |
html = '<iframe width="100%" height="315" src="https://www.youtube.com/embed/" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
|
328 |
|
329 |
+
# def change_transcribe_api(vis):
|
330 |
+
# return gr.Checkbox(value=False, visible=vis)
|
331 |
|
332 |
+
# def change_api_token(vis):
|
333 |
+
# return gr.Textbox(visible=vis)
|
334 |
|
335 |
def update_source(source):
|
336 |
return gr.Textbox(info=source)
|
|
|
347 |
with gr.Column(scale=4):
|
348 |
url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=")
|
349 |
with gr.Column(scale=1):
|
350 |
+
api_token = gr.Textbox(label="Paste your Hugging Face API token here:",placeholder="hf_...",visible=True,show_label=True,info='The API token passed via this field is not stored. It is only passed through the Hugging Face Hub API for inference.')
|
351 |
with gr.Column(scale=1):
|
352 |
sum_btn = gr.Button("Summarize!")
|
353 |
|
354 |
with gr.Accordion("Transcription Settings",open=False):
|
355 |
with gr.Row():
|
356 |
force_transcribe = gr.Checkbox(label="Transcribe even if transcription is available.", info='If unchecked, the app attempts to download the transcript from YouTube first. Check this if the transcript does not seem accurate.')
|
357 |
+
# use_transcribe_api = gr.Checkbox(label="Transcribe using the HuggingFaceHub API.",visible=False)
|
358 |
|
359 |
with gr.Accordion("Summarization Settings",open=False):
|
360 |
with gr.Row():
|
361 |
+
# use_llm_api = gr.Checkbox(label="Summarize using the HuggingFaceHub API.",visible=True)
|
362 |
do_sample = gr.Checkbox(label="Set the Temperature",value=True,visible=True)
|
363 |
temperature = gr.Slider(minimum=0,maximum=1,value=1.0,label="Generation temperature",visible=True)
|
364 |
+
words = gr.Slider(minimum=100,maximum=500,value=100,label="Length of the summary")
|
365 |
|
366 |
gr.Markdown("# Results")
|
367 |
|
|
|
374 |
|
375 |
with gr.Row():
|
376 |
with gr.Group():
|
|
|
377 |
transcript = gr.Textbox(label="Full Transcript",placeholder="transcript...",show_label=True)
|
378 |
+
transcript_source = gr.Textbox(visible=False)
|
379 |
|
380 |
with gr.Accordion("Notes",open=False):
|
381 |
gr.Markdown("""
|
|
|
385 |
""")
|
386 |
|
387 |
# Defining the interactivity of the UI elements
|
388 |
+
# force_transcribe.change(fn=change_transcribe_api,inputs=force_transcribe,outputs=use_transcribe_api)
|
389 |
+
# use_transcribe_api.change(fn=change_api_token,inputs=use_transcribe_api,outputs=api_token)
|
390 |
+
# use_llm_api.change(fn=change_api_token,inputs=use_llm_api,outputs=api_token)
|
391 |
transcript_source.change(fn=update_source,inputs=transcript_source,outputs=transcript)
|
392 |
summary_source.change(fn=update_source,inputs=summary_source,outputs=summary)
|
393 |
do_sample.change(fn=show_temp,inputs=do_sample,outputs=temperature)
|
394 |
|
395 |
# Defining the functions to call on clicking the button
|
396 |
sum_btn.click(fn=get_youtube_title, inputs=url, outputs=title, api_name="get_youtube_title", queue=False)
|
397 |
+
sum_btn.click(fn=summarize_youtube_video, inputs=[url,force_transcribe,api_token,temperature,words,do_sample],
|
398 |
outputs=[summary,transcript, transcript_source, summary_source], api_name="summarize_youtube_video", queue=True)
|
399 |
sum_btn.click(fn=get_video, inputs=url, outputs=video, api_name="get_youtube_video", queue=False)
|
400 |
|
401 |
demo.queue()
|
402 |
+
demo.launch(share=False)
|