tkottke commited on
Commit
15b519b
1 Parent(s): 89a88dd

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.txt +13 -0
  2. app.py +30 -0
  3. requirements.txt +4 -0
  4. summarize.py +43 -0
README.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: 📺NLP Video Summary📝
3
+ emoji: 📺📝
4
+ colorFrom: red
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 2.9.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from summarize import Summarizer
3
+
4
+ interface = gr.Interface(fn = Summarizer,
5
+ inputs = [gr.inputs.Textbox(lines=2,
6
+ placeholder="Enter your link...",
7
+ label='YouTube Video Link'),
8
+ gr.inputs.Radio(["mT5", "BART"], type="value", label='Model')],
9
+ outputs = [gr.outputs.Textbox(
10
+ label="Summary")],
11
+
12
+ title = "Video Summary Generator",
13
+ examples = [
14
+ ['https://www.youtube.com/watch?v=cdiD-9MMpb0', 'BART'],
15
+ ['https://www.youtube.com/watch?v=p3lsYlod5OU&t=5202s', 'BART'],
16
+ ['https://www.youtube.com/watch?v=Gfr50f6ZBvo&t=1493s', 'BART'],
17
+ ['https://www.youtube.com/watch?v=4oDZyOf6CW4&t=3149s', 'BART'],
18
+ ['https://www.youtube.com/watch?v=lvh3g7eszVQ&t=291s', 'mT5'],
19
+ ['https://www.youtube.com/watch?v=OaeYUm06in0', 'mT5'],
20
+ ['https://www.youtube.com/watch?v=ZecQ64l-gKM&t=545s', 'mT5'],
21
+ ['https://www.youtube.com/watch?v=5zOHSysMmH0&t=5798s', 'mT5'],
22
+ ['https://www.youtube.com/watch?v=X0-SXS6zdEQ&t=23s', 'mT5'],
23
+ ['https://www.youtube.com/watch?v=gFEE3w7F0ww&t=18s', 'mT5'],
24
+ ['https://www.youtube.com/watch?v=Z1KwkpTUbkg&t=30s', 'mT5'],
25
+ ['https://www.youtube.com/watch?v=rIpUf-Vy2JA&t=3542s', 'mT5'],
26
+ ['https://www.youtube.com/watch?v=bgNzUxyS-kQ&t=3631s', 'mT5']
27
+ ],
28
+ enable_queue=True)
29
+
30
+ interface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ sentencepiece
4
+ youtube-transcript-api
summarize.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ import sys
3
+
4
+ from youtube_transcript_api import YouTubeTranscriptApi
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
+
7
+ def Summarizer(link, model):
8
+
9
+ video_id = link.split("=")[1]
10
+
11
+ try:
12
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
13
+ FinalTranscript = ' '.join([i['text'] for i in transcript])
14
+
15
+ if model == "Pegasus":
16
+ checkpoint = "google/pegasus-large"
17
+ elif model == "mT5":
18
+ checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
19
+ elif model == "BART":
20
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
23
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
24
+
25
+
26
+ inputs = tokenizer(FinalTranscript,
27
+ max_length=1024,
28
+ truncation=True,
29
+ return_tensors="pt")
30
+
31
+ summary_ids = model.generate(inputs["input_ids"])
32
+ summary = tokenizer.batch_decode(summary_ids,
33
+ skip_special_tokens=True,
34
+ clean_up_tokenization_spaces=False)
35
+
36
+
37
+ return summary[0]
38
+
39
+
40
+ except Exception:
41
+ print(traceback.format_exc())
42
+ # or
43
+ print(sys.exc_info()[2])