VideoAnalytics

Runtime error

App Files Files Community

KarthickAdopleAI commited on Mar 25, 2024

Commit

91af6e1

verified ·

1 Parent(s): b677d97

Create app.py

Browse files

Files changed (1) hide show

app.py +297 -0

app.py ADDED Viewed

	@@ -0,0 +1,297 @@

+from openai import AzureOpenAI
+import os
+import ffmpeg
+from typing import List
+from moviepy.editor import VideoFileClip
+import nltk
+from sklearn.feature_extraction.text import TfidfVectorizer
+import gradio as gr
+from pytube import YouTube
+import requests
+import logging
+nltk.download('punkt')
+nltk.download('stopwords')
+class VideoAnalytics:
+    """
+    Class for performing analytics on videos including transcription, summarization, topic generation,
+    and extraction of important sentences.
+    """
+    def __init__(self):
+      """
+      Initialize the VideoAnalytics object.
+      Args:
+          hf_token (str): Hugging Face API token.
+      """
+      # Initialize AzureOpenAI client
+      self.client = AzureOpenAI()
+      # Initialize transcribed text variable
+      self.transcribed_text = ""
+      # API URL for accessing the Hugging Face model
+      self.API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
+      # Placeholder for Hugging Face API token
+      hf_token = os.get_environ("HF_TOKEN")  # Replace this with the actual Hugging Face API token
+      # Set headers for API requests with Hugging Face token
+      self.headers = {"Authorization": f"Bearer {hf_token}"}
+      # Configure logging settings
+      logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+    def transcribe_video(self, vid: str) -> str:
+      """
+      Transcribe the audio of the video.
+      Args:
+          vid (str): Path to the video file.
+      Returns:
+          str: Transcribed text.
+      """
+      try:
+          # Load the video file and extract audio
+          video = VideoFileClip(vid)
+          audio = video.audio
+          # Write audio to a temporary file
+          audio.write_audiofile("output_audio.mp3")
+          audio_file = open("output_audio.mp3", "rb")
+          # Define a helper function to query the Hugging Face model
+          def query(data):
+              response = requests.post(self.API_URL, headers=self.headers, data=data)
+              return response.json()
+          # Send audio data to the Hugging Face model for transcription
+          output = query(audio_file)
+          # Update the transcribed_text attribute with the transcription result
+          self.transcribed_text = output["text"]
+          # Return the transcribed text
+          return output["text"]
+      except Exception as e:
+          logging.error(f"Error transcribing video: {e}")
+          return ""
+    def generate_video_summary(self) -> str:
+        """
+        Generate a summary of the transcribed video.
+        Returns:
+            str: Generated summary.
+        """
+        try:
+          # Define a conversation between system and user
+          conversation = [
+            {"role": "system", "content": "You are a Summarizer"},
+            {"role": "user", "content": f"""summarize the following text delimited by triple backticks.
+                      In two format of Outputs given below:
+                      Abstractive Summary:
+                      Extractive Summary:
+                      ```{self.transcribed_text}```
+              """}
+              ]
+          # Generate completion using ChatGPT model
+          response = self.client.chat.completions.create(
+              model="ChatGPT",
+              messages=conversation,
+              temperature=0,
+              max_tokens=1000
+          )
+          # Get the generated summary message
+          message = response.choices[0].message.content
+          return message
+        except Exception as e:
+            logging.error(f"Error generating video summary: {e}")
+            return ""
+    def generate_topics(self) -> str:
+        """
+        Generate topics from the transcribed video.
+        Returns:
+            str: Generated topics.
+        """
+        try:
+          # Define a conversation between system and user
+          conversation = [
+            {"role": "system", "content": "You are a Topic Generator"},
+            {"role": "user", "content": f"""generate single Topics from the following text don't make sentence for topic generation,delimited by triple backticks.
+                      list out the topics:
+                      Topics:
+                      ```{self.transcribed_text}```
+              """}
+              ]
+          # Generate completion using ChatGPT model
+          response = self.client.chat.completions.create(
+              model="ChatGPT",
+              messages=conversation,
+              temperature=0,
+              max_tokens=1000
+          )
+          # Get the generated topics message
+          message = response.choices[0].message.content
+          return message
+        except Exception as e:
+            logging.error(f"Error generating topics: {e}")
+            return ""
+    def extract_video_important_sentence(self) -> str:
+        """
+        Extract important sentences from the transcribed video.
+        Returns:
+            str: Extracted important sentences.
+        """
+        try:
+          # Tokenize the sentences
+          sentences = nltk.sent_tokenize(self.transcribed_text)
+          # Initialize TF-IDF vectorizer
+          tfidf_vectorizer = TfidfVectorizer()
+          # Fit the vectorizer on the summary sentences
+          tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
+          # Calculate sentence scores based on TF-IDF values
+          sentence_scores = tfidf_matrix.sum(axis=1)
+          # Create a list of (score, sentence) tuples
+          sentence_rankings = [(score, sentence) for score, sentence in zip(sentence_scores, sentences)]
+          # Sort sentences by score in descending order
+          sentence_rankings.sort(reverse=True)
+          # Set a threshold for selecting sentences
+          threshold = 2 # Adjust as needed
+          # Select sentences with scores above the threshold
+          selected_sentences = [sentence for score, sentence in sentence_rankings if score >= threshold]
+          # Join selected sentences to form the summary
+          summary = '\n\n'.join(selected_sentences)
+          return summary
+        except Exception as e:
+            logging.error(f"Error extracting important sentences: {e}")
+            return ""
+    def write_text_files(self, text: str, filename: str) -> None:
+        """
+        Write text to a file.
+        Args:
+            text (str): Text to be written to the file.
+            filename (str): Name of the file.
+        """
+        try:
+          file_path = f"{filename}.txt"
+          with open(file_path, 'w') as file:
+              # Write content to the file
+              file.write(text)
+        except Exception as e:
+            logging.error(f"Error writing text to file: {e}")
+    def Download(self, link: str) -> str:
+        """
+        Download a video from YouTube.
+        Args:
+            link (str): YouTube video link.
+        Returns:
+            str: Path to the downloaded video file.
+        """
+        try:
+          # Initialize YouTube object with the provided link
+          youtubeObject = YouTube(link)
+          # Get the highest resolution stream
+          youtubeObject = youtubeObject.streams.get_highest_resolution()
+          try:
+              # Attempt to download the video
+              file_name = youtubeObject.download()
+              return file_name
+          except:
+              # Log any errors that occur during video download
+              logging.info("An error has occurred")
+          logging.info("Download is completed successfully")
+        except Exception as e:
+            # Log any errors that occur during initialization of YouTube object
+            logging.error(f"Error downloading video: {e}")
+            return ""
+    def main(self, video: str = None, input_path: str = None) -> tuple:
+        """
+        Perform video analytics.
+        Args:
+            video (str): Path to the video file.
+            input_path (str): Input path for the video.
+        Returns:
+            tuple: Summary, important sentences, and topics.
+        """
+        try:
+          # Download the video if input_path is provided, otherwise use the provided video path
+          if input_path:
+            input_path = self.Download(input_path)
+            text = self.transcribe_video(input_path)
+          elif video:
+            text = self.transcribe_video(video)
+            input_path = video
+          # Generate summary, important sentences, and topics
+          summary = self.generate_video_summary()
+          self.write_text_files(summary,"Summary")
+          important_sentences = self.extract_video_important_sentence()
+          self.write_text_files(important_sentences,"Important_Sentence")
+          topics = self.generate_topics()
+          self.write_text_files(topics,"Topics")
+          # Return the generated summary, important sentences, and topics
+          return summary,important_sentences,topics
+        except Exception as e:
+            # Log any errors that occur during video analytics
+            logging.error(f"Error in main function: {e}")
+            return "", "", ""
+    def gradio_interface(self):
+        with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
+            gr.HTML("""<center><h1>Video Analytics</h1></center>""")
+            with gr.Row():
+              yt_link = gr.Textbox(label= "Youtube Link",placeholder="https://www.youtube.com/watch?v=")
+            with gr.Row():
+              video = gr.Video(sources="upload",height=200,width=300)
+            with gr.Row():
+              submit_btn = gr.Button(value="Submit")
+            with gr.Tab("Summary"):
+              with gr.Row():
+                  summary = gr.Textbox(show_label=False,lines=10)
+              with gr.Row():
+                  summary_download = gr.DownloadButton(label="Download",value="Summary.txt",visible=True,size='lg',elem_classes="download_button")
+            with gr.Tab("Important Sentences"):
+              with gr.Row():
+                  Important_Sentences = gr.Textbox(show_label=False,lines=10)
+              with gr.Row():
+                  sentence_download = gr.DownloadButton(label="Download",value="Important_Sentence.txt",visible=True,size='lg',elem_classes="download_button")
+            with gr.Tab("Topics"):
+              with gr.Row():
+                  Topics = gr.Textbox(show_label=False,lines=10)
+              with gr.Row():
+                  topics_download = gr.DownloadButton(label="Download",value="Topics.txt",visible=True,size='lg',elem_classes="download_button")
+              submit_btn.click(self.main,[video,yt_link],[summary,Important_Sentences,Topics])
+        demo.launch()