Spaces:
Runtime error
Runtime error
Jonathan Li
commited on
Commit
·
5de9db4
1
Parent(s):
1ebc0dd
Fix url problem
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import requests
|
|
|
3 |
from transformers import AutoTokenizer, pipeline
|
4 |
from youtube_transcript_api._transcripts import TranscriptListFetcher
|
5 |
|
@@ -12,6 +13,14 @@ tokenizer = AutoTokenizer.from_pretrained("./checkpoint-6000")
|
|
12 |
max_size = 512
|
13 |
classes = [False, True]
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def process(obj):
|
16 |
o = obj["events"]
|
17 |
new_l = []
|
@@ -65,8 +74,12 @@ def get_transcript(video_id, session):
|
|
65 |
p = process(obj.json())
|
66 |
return p
|
67 |
|
68 |
-
def transcript(
|
69 |
-
|
|
|
|
|
|
|
|
|
70 |
|
71 |
def inference(transcript):
|
72 |
tokens = tokenizer(transcript.split(" "))["input_ids"]
|
@@ -119,9 +132,9 @@ def predict(transcript):
|
|
119 |
with gr.Blocks() as demo:
|
120 |
with gr.Row():
|
121 |
with gr.Column():
|
122 |
-
inp = gr.Textbox(label="Video
|
123 |
btn = gr.Button("Fetch Transcript")
|
124 |
-
gr.Examples(["xsLJZyih3Ac"], [inp])
|
125 |
text = gr.Textbox(label="Transcript", placeholder="<generated transcript>")
|
126 |
btn.click(fn=transcript, inputs=inp, outputs=text)
|
127 |
with gr.Column():
|
|
|
1 |
import gradio as gr
|
2 |
import requests
|
3 |
+
import re
|
4 |
from transformers import AutoTokenizer, pipeline
|
5 |
from youtube_transcript_api._transcripts import TranscriptListFetcher
|
6 |
|
|
|
13 |
max_size = 512
|
14 |
classes = [False, True]
|
15 |
|
16 |
+
pattern = re.compile(
|
17 |
+
r"(?:https?:\/\/)?(?:[0-9A-Z-]+\.)?(?:youtube|youtu|youtube-nocookie)\.(?:com|be)\/(?:watch\?v=|watch\?.+&v=|embed\/|v\/|.+\?v=)?([^&=\n%\?]{11})"
|
18 |
+
)
|
19 |
+
|
20 |
+
def video_id(url):
|
21 |
+
p = pattern.match(url)
|
22 |
+
return p.group(1) if p else None
|
23 |
+
|
24 |
def process(obj):
|
25 |
o = obj["events"]
|
26 |
new_l = []
|
|
|
74 |
p = process(obj.json())
|
75 |
return p
|
76 |
|
77 |
+
def transcript(url):
|
78 |
+
i = video_id(url)
|
79 |
+
if i:
|
80 |
+
return " ".join(l["w"].strip() for l in get_transcript(i, requests.Session()))
|
81 |
+
else:
|
82 |
+
return "ERROR: Failed to load transcript (it the link a valid youtube url?)..."
|
83 |
|
84 |
def inference(transcript):
|
85 |
tokens = tokenizer(transcript.split(" "))["input_ids"]
|
|
|
132 |
with gr.Blocks() as demo:
|
133 |
with gr.Row():
|
134 |
with gr.Column():
|
135 |
+
inp = gr.Textbox(label="Video URL", placeholder="Video URL", lines=1, max_lines=1)
|
136 |
btn = gr.Button("Fetch Transcript")
|
137 |
+
gr.Examples(["youtu.be/xsLJZyih3Ac"], [inp])
|
138 |
text = gr.Textbox(label="Transcript", placeholder="<generated transcript>")
|
139 |
btn.click(fn=transcript, inputs=inp, outputs=text)
|
140 |
with gr.Column():
|