KarthickAdopleAI commited on
Commit
91af6e1
·
verified ·
1 Parent(s): b677d97

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +297 -0
app.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import AzureOpenAI
2
+ import os
3
+ import ffmpeg
4
+ from typing import List
5
+ from moviepy.editor import VideoFileClip
6
+ import nltk
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ import gradio as gr
9
+ from pytube import YouTube
10
+ import requests
11
+ import logging
12
+
13
+ nltk.download('punkt')
14
+ nltk.download('stopwords')
15
+
16
+
17
+ class VideoAnalytics:
18
+ """
19
+ Class for performing analytics on videos including transcription, summarization, topic generation,
20
+ and extraction of important sentences.
21
+ """
22
+
23
+ def __init__(self):
24
+ """
25
+ Initialize the VideoAnalytics object.
26
+
27
+ Args:
28
+ hf_token (str): Hugging Face API token.
29
+ """
30
+ # Initialize AzureOpenAI client
31
+ self.client = AzureOpenAI()
32
+
33
+ # Initialize transcribed text variable
34
+ self.transcribed_text = ""
35
+
36
+ # API URL for accessing the Hugging Face model
37
+ self.API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
38
+
39
+ # Placeholder for Hugging Face API token
40
+ hf_token = os.get_environ("HF_TOKEN") # Replace this with the actual Hugging Face API token
41
+
42
+ # Set headers for API requests with Hugging Face token
43
+ self.headers = {"Authorization": f"Bearer {hf_token}"}
44
+
45
+ # Configure logging settings
46
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
47
+
48
+ def transcribe_video(self, vid: str) -> str:
49
+ """
50
+ Transcribe the audio of the video.
51
+
52
+ Args:
53
+ vid (str): Path to the video file.
54
+
55
+ Returns:
56
+ str: Transcribed text.
57
+ """
58
+ try:
59
+ # Load the video file and extract audio
60
+ video = VideoFileClip(vid)
61
+ audio = video.audio
62
+
63
+ # Write audio to a temporary file
64
+ audio.write_audiofile("output_audio.mp3")
65
+ audio_file = open("output_audio.mp3", "rb")
66
+
67
+ # Define a helper function to query the Hugging Face model
68
+ def query(data):
69
+ response = requests.post(self.API_URL, headers=self.headers, data=data)
70
+ return response.json()
71
+
72
+ # Send audio data to the Hugging Face model for transcription
73
+ output = query(audio_file)
74
+ # Update the transcribed_text attribute with the transcription result
75
+ self.transcribed_text = output["text"]
76
+ # Return the transcribed text
77
+ return output["text"]
78
+
79
+ except Exception as e:
80
+ logging.error(f"Error transcribing video: {e}")
81
+ return ""
82
+
83
+ def generate_video_summary(self) -> str:
84
+ """
85
+ Generate a summary of the transcribed video.
86
+
87
+ Returns:
88
+ str: Generated summary.
89
+ """
90
+ try:
91
+ # Define a conversation between system and user
92
+ conversation = [
93
+ {"role": "system", "content": "You are a Summarizer"},
94
+ {"role": "user", "content": f"""summarize the following text delimited by triple backticks.
95
+ In two format of Outputs given below:
96
+ Abstractive Summary:
97
+ Extractive Summary:
98
+ ```{self.transcribed_text}```
99
+ """}
100
+ ]
101
+ # Generate completion using ChatGPT model
102
+ response = self.client.chat.completions.create(
103
+ model="ChatGPT",
104
+ messages=conversation,
105
+ temperature=0,
106
+ max_tokens=1000
107
+ )
108
+ # Get the generated summary message
109
+ message = response.choices[0].message.content
110
+ return message
111
+ except Exception as e:
112
+ logging.error(f"Error generating video summary: {e}")
113
+ return ""
114
+
115
+
116
+ def generate_topics(self) -> str:
117
+ """
118
+ Generate topics from the transcribed video.
119
+
120
+ Returns:
121
+ str: Generated topics.
122
+ """
123
+ try:
124
+ # Define a conversation between system and user
125
+ conversation = [
126
+ {"role": "system", "content": "You are a Topic Generator"},
127
+ {"role": "user", "content": f"""generate single Topics from the following text don't make sentence for topic generation,delimited by triple backticks.
128
+ list out the topics:
129
+ Topics:
130
+ ```{self.transcribed_text}```
131
+ """}
132
+ ]
133
+ # Generate completion using ChatGPT model
134
+ response = self.client.chat.completions.create(
135
+ model="ChatGPT",
136
+ messages=conversation,
137
+ temperature=0,
138
+ max_tokens=1000
139
+ )
140
+ # Get the generated topics message
141
+ message = response.choices[0].message.content
142
+ return message
143
+ except Exception as e:
144
+ logging.error(f"Error generating topics: {e}")
145
+ return ""
146
+
147
+ def extract_video_important_sentence(self) -> str:
148
+ """
149
+ Extract important sentences from the transcribed video.
150
+
151
+ Returns:
152
+ str: Extracted important sentences.
153
+ """
154
+ try:
155
+ # Tokenize the sentences
156
+ sentences = nltk.sent_tokenize(self.transcribed_text)
157
+
158
+ # Initialize TF-IDF vectorizer
159
+ tfidf_vectorizer = TfidfVectorizer()
160
+
161
+ # Fit the vectorizer on the summary sentences
162
+ tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
163
+
164
+ # Calculate sentence scores based on TF-IDF values
165
+ sentence_scores = tfidf_matrix.sum(axis=1)
166
+
167
+ # Create a list of (score, sentence) tuples
168
+ sentence_rankings = [(score, sentence) for score, sentence in zip(sentence_scores, sentences)]
169
+
170
+ # Sort sentences by score in descending order
171
+ sentence_rankings.sort(reverse=True)
172
+
173
+ # Set a threshold for selecting sentences
174
+ threshold = 2 # Adjust as needed
175
+
176
+ # Select sentences with scores above the threshold
177
+ selected_sentences = [sentence for score, sentence in sentence_rankings if score >= threshold]
178
+
179
+ # Join selected sentences to form the summary
180
+ summary = '\n\n'.join(selected_sentences)
181
+
182
+ return summary
183
+
184
+ except Exception as e:
185
+ logging.error(f"Error extracting important sentences: {e}")
186
+ return ""
187
+
188
+ def write_text_files(self, text: str, filename: str) -> None:
189
+ """
190
+ Write text to a file.
191
+
192
+ Args:
193
+ text (str): Text to be written to the file.
194
+ filename (str): Name of the file.
195
+ """
196
+ try:
197
+ file_path = f"{filename}.txt"
198
+ with open(file_path, 'w') as file:
199
+ # Write content to the file
200
+ file.write(text)
201
+ except Exception as e:
202
+ logging.error(f"Error writing text to file: {e}")
203
+
204
+ def Download(self, link: str) -> str:
205
+ """
206
+ Download a video from YouTube.
207
+
208
+ Args:
209
+ link (str): YouTube video link.
210
+
211
+ Returns:
212
+ str: Path to the downloaded video file.
213
+ """
214
+ try:
215
+ # Initialize YouTube object with the provided link
216
+ youtubeObject = YouTube(link)
217
+
218
+ # Get the highest resolution stream
219
+ youtubeObject = youtubeObject.streams.get_highest_resolution()
220
+ try:
221
+ # Attempt to download the video
222
+ file_name = youtubeObject.download()
223
+ return file_name
224
+ except:
225
+ # Log any errors that occur during video download
226
+ logging.info("An error has occurred")
227
+
228
+ logging.info("Download is completed successfully")
229
+
230
+ except Exception as e:
231
+ # Log any errors that occur during initialization of YouTube object
232
+ logging.error(f"Error downloading video: {e}")
233
+ return ""
234
+
235
+
236
+ def main(self, video: str = None, input_path: str = None) -> tuple:
237
+ """
238
+ Perform video analytics.
239
+
240
+ Args:
241
+ video (str): Path to the video file.
242
+ input_path (str): Input path for the video.
243
+
244
+ Returns:
245
+ tuple: Summary, important sentences, and topics.
246
+ """
247
+ try:
248
+ # Download the video if input_path is provided, otherwise use the provided video path
249
+ if input_path:
250
+ input_path = self.Download(input_path)
251
+ text = self.transcribe_video(input_path)
252
+ elif video:
253
+ text = self.transcribe_video(video)
254
+ input_path = video
255
+
256
+ # Generate summary, important sentences, and topics
257
+ summary = self.generate_video_summary()
258
+ self.write_text_files(summary,"Summary")
259
+ important_sentences = self.extract_video_important_sentence()
260
+ self.write_text_files(important_sentences,"Important_Sentence")
261
+ topics = self.generate_topics()
262
+ self.write_text_files(topics,"Topics")
263
+
264
+ # Return the generated summary, important sentences, and topics
265
+ return summary,important_sentences,topics
266
+
267
+ except Exception as e:
268
+ # Log any errors that occur during video analytics
269
+ logging.error(f"Error in main function: {e}")
270
+ return "", "", ""
271
+
272
+ def gradio_interface(self):
273
+ with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
274
+ gr.HTML("""<center><h1>Video Analytics</h1></center>""")
275
+ with gr.Row():
276
+ yt_link = gr.Textbox(label= "Youtube Link",placeholder="https://www.youtube.com/watch?v=")
277
+ with gr.Row():
278
+ video = gr.Video(sources="upload",height=200,width=300)
279
+ with gr.Row():
280
+ submit_btn = gr.Button(value="Submit")
281
+ with gr.Tab("Summary"):
282
+ with gr.Row():
283
+ summary = gr.Textbox(show_label=False,lines=10)
284
+ with gr.Row():
285
+ summary_download = gr.DownloadButton(label="Download",value="Summary.txt",visible=True,size='lg',elem_classes="download_button")
286
+ with gr.Tab("Important Sentences"):
287
+ with gr.Row():
288
+ Important_Sentences = gr.Textbox(show_label=False,lines=10)
289
+ with gr.Row():
290
+ sentence_download = gr.DownloadButton(label="Download",value="Important_Sentence.txt",visible=True,size='lg',elem_classes="download_button")
291
+ with gr.Tab("Topics"):
292
+ with gr.Row():
293
+ Topics = gr.Textbox(show_label=False,lines=10)
294
+ with gr.Row():
295
+ topics_download = gr.DownloadButton(label="Download",value="Topics.txt",visible=True,size='lg',elem_classes="download_button")
296
+ submit_btn.click(self.main,[video,yt_link],[summary,Important_Sentences,Topics])
297
+ demo.launch()