storresbusquets commited on
Commit
16034fb
·
0 Parent(s):

Duplicate from storresbusquets/demo1

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +14 -0
  3. app.py +156 -0
  4. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Demo1
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.42.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: storresbusquets/demo1
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ from pytube import YouTube
4
+ from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
5
+
6
+ class GradioInference():
7
+ def __init__(self):
8
+ self.sizes = list(whisper._MODELS.keys())
9
+ self.langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
10
+ self.current_size = "base"
11
+ self.loaded_model = whisper.load_model(self.current_size)
12
+ self.yt = None
13
+ self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
14
+
15
+ # Initialize VoiceLabT5 model and tokenizer
16
+ self.keyword_model = T5ForConditionalGeneration.from_pretrained("Voicelab/vlt5-base-keywords")
17
+ self.keyword_tokenizer = T5Tokenizer.from_pretrained("Voicelab/vlt5-base-keywords")
18
+
19
+ # Sentiment Classifier
20
+ self.classifier = pipeline("text-classification")
21
+
22
+ def __call__(self, link, lang, size):
23
+ if self.yt is None:
24
+ self.yt = YouTube(link)
25
+ path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
26
+
27
+ if lang == "none":
28
+ lang = None
29
+
30
+ if size != self.current_size:
31
+ self.loaded_model = whisper.load_model(size)
32
+ self.current_size = size
33
+
34
+ results = self.loaded_model.transcribe(path, language=lang)
35
+
36
+ # Perform summarization on the transcription
37
+ transcription_summary = self.summarizer(results["text"], max_length=130, min_length=30, do_sample=False)
38
+
39
+ # Extract keywords using VoiceLabT5
40
+ task_prefix = "Keywords: "
41
+ input_sequence = task_prefix + results["text"]
42
+ input_ids = self.keyword_tokenizer(input_sequence, return_tensors="pt", truncation=False).input_ids
43
+ output = self.keyword_model.generate(input_ids, no_repeat_ngram_size=3, num_beams=4)
44
+ predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
45
+ keywords = [x.strip() for x in predicted.split(',') if x.strip()]
46
+
47
+ label = self.classifier(results["text"])[0]["label"]
48
+
49
+ return results["text"], transcription_summary[0]["summary_text"], keywords, label
50
+
51
+ def populate_metadata(self, link):
52
+ self.yt = YouTube(link)
53
+ return self.yt.thumbnail_url, self.yt.title
54
+
55
+
56
+ def from_audio_input(self, lang, size, audio_file):
57
+ if lang == "none":
58
+ lang = None
59
+
60
+ if size != self.current_size:
61
+ self.loaded_model = whisper.load_model(size)
62
+ self.current_size = size
63
+
64
+ results = self.loaded_model.transcribe(audio_file, language=lang)
65
+
66
+ # Perform summarization on the transcription
67
+ transcription_summary = self.summarizer(results["text"], max_length=130, min_length=30, do_sample=False)
68
+
69
+ # Extract keywords using VoiceLabT5
70
+ task_prefix = "Keywords: "
71
+ input_sequence = task_prefix + results["text"]
72
+ input_ids = self.keyword_tokenizer(input_sequence, return_tensors="pt", truncation=False).input_ids
73
+ output = self.keyword_model.generate(input_ids, no_repeat_ngram_size=3, num_beams=4)
74
+ predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
75
+ keywords = [x.strip() for x in predicted.split(',') if x.strip()]
76
+
77
+ label = self.classifier(results["text"])[0]["label"]
78
+
79
+ return results["text"], transcription_summary[0]["summary_text"], keywords, label
80
+
81
+
82
+ gio = GradioInference()
83
+ title = "Youtube Insights"
84
+ description = "Your AI-powered video analytics tool"
85
+
86
+ block = gr.Blocks()
87
+ with block as demo:
88
+ gr.HTML(
89
+ """
90
+ <div style="text-align: center; max-width: 500px; margin: 0 auto;">
91
+ <div>
92
+ <h1>Youtube <span style="color: red;">Insights</span> 📹</h1>
93
+ </div>
94
+ <p style="margin-bottom: 10px; font-size: 94%">
95
+ Your AI-powered video analytics tool
96
+ </p>
97
+ </div>
98
+ """
99
+ )
100
+ with gr.Group():
101
+ with gr.Tab("From YouTube"):
102
+ with gr.Box():
103
+ with gr.Row().style(equal_height=True):
104
+ size = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
105
+ lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
106
+ link = gr.Textbox(label="YouTube Link", placeholder="Enter YouTube link...")
107
+ title = gr.Label(label="Video Title")
108
+ with gr.Row().style(equal_height=True):
109
+ img = gr.Image(label="Thumbnail")
110
+ text = gr.Textbox(label="Transcription", placeholder="Transcription Output...", lines=10).style(show_copy_button=True, container=True)
111
+ with gr.Row().style(equal_height=True):
112
+ summary = gr.Textbox(label="Summary", placeholder="Summary Output...", lines=5).style(show_copy_button=True, container=True)
113
+ keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output...", lines=5).style(show_copy_button=True, container=True)
114
+ label = gr.Label(label="Sentiment Analysis")
115
+ with gr.Row().style(equal_height=True):
116
+ clear = gr.ClearButton([link, title, img, text, summary, keywords, label], scale=1)
117
+ btn = gr.Button("Get video insights", variant='primary', scale=1)
118
+ btn.click(gio, inputs=[link, lang, size], outputs=[text, summary, keywords, label])
119
+ link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
120
+
121
+ with gr.Tab("From Audio file"):
122
+ with gr.Box():
123
+ with gr.Row().style(equal_height=True):
124
+ size = gr.Dropdown(label="Model Size", choices=gio.sizes, value='base')
125
+ lang = gr.Dropdown(label="Language (Optional)", choices=gio.langs, value="none")
126
+ audio_file = gr.Audio(type="filepath")
127
+ with gr.Row().style(equal_height=True):
128
+ text = gr.Textbox(label="Transcription", placeholder="Transcription Output...", lines=10).style(show_copy_button=True, container=False)
129
+ with gr.Row().style(equal_height=True):
130
+ summary = gr.Textbox(label="Summary", placeholder="Summary Output", lines=5)
131
+ keywords = gr.Textbox(label="Keywords", placeholder="Keywords Output", lines=5)
132
+ label = gr.Label(label="Sentiment Analysis")
133
+ with gr.Row().style(equal_height=True):
134
+ clear = gr.ClearButton([text], scale=1)
135
+ btn = gr.Button("Get video insights", variant='primary', scale=1) # Updated button label
136
+ btn.click(gio.from_audio_input, inputs=[lang, size, audio_file], outputs=[text, summary, keywords, label])
137
+
138
+
139
+ with block:
140
+ gr.Markdown("About the app:")
141
+
142
+ with gr.Accordion("What is YouTube Insights?", open=False):
143
+ gr.Markdown("YouTube Insights is a tool developed with academic purposes only, that creates summaries, keywords and sentiments analysis based on YouTube videos or user audio files.")
144
+
145
+ with gr.Accordion("How does it work?", open=False):
146
+ gr.Markdown("Works by using OpenAI's Whisper, DistilBART for summarization and VoiceLabT5 for Keyword Extraction.")
147
+
148
+ gr.HTML("""
149
+ <div style="text-align: center; max-width: 500px; margin: 0 auto;">
150
+ <p style="margin-bottom: 10px; font-size: 96%">
151
+ 2023 Master in Big Data & Data Science - Universidad Complutense de Madrid
152
+ </p>
153
+ </div>
154
+ """)
155
+
156
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ openai-whisper
2
+ transformers
3
+ torch
4
+ yake
5
+ pytube
6
+ sentencepiece