tomiwa1a
/

video-search

Automatic Speech Recognition

generic

audio

endpoints-template

Inference Endpoints

Model card Files Files and versions Community

tomiwa1a commited on Feb 12, 2023

Commit

b80a146

•

1 Parent(s): efe5d70

add summarizer to handler

Browse files

Files changed (1) hide show

handler.py +28 -1

handler.py CHANGED Viewed

@@ -1,9 +1,12 @@
 from typing import Dict
 from sentence_transformers import SentenceTransformer
 from tqdm import tqdm
 import whisper
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import torch
 import pytube
 import time
@@ -14,7 +17,9 @@ class EndpointHandler():
     WHISPER_MODEL_NAME = "tiny.en"
     SENTENCE_TRANSFORMER_MODEL_NAME = "multi-qa-mpnet-base-dot-v1"
     QUESTION_ANSWER_MODEL_NAME = "vblagoje/bart_lfqa"
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     def __init__(self, path=""):
@@ -34,6 +39,13 @@ class EndpointHandler():
         total = t1 - t0
         print(f'Finished loading sentence_transformer_model in {total} seconds')
         self.question_answer_tokenizer = AutoTokenizer.from_pretrained(self.QUESTION_ANSWER_MODEL_NAME)
         t0 = time.time()
@@ -59,6 +71,7 @@ class EndpointHandler():
         video_url = data.pop("video_url", None)
         query = data.pop("query", None)
         long_form_answer = data.pop("long_form_answer", None)
         encoded_segments = {}
         if video_url:
             video_with_transcript = self.transcribe_video(video_url)
@@ -73,6 +86,9 @@ class EndpointHandler():
                 **video_with_transcript,
                 **encoded_segments
             }
         elif query:
             if long_form_answer:
                 context = data.pop("context", None)
@@ -167,6 +183,17 @@ class EndpointHandler():
         return all_batches
     def generate_answer(self, query, documents):
         # concatenate question and support documents into BART input

+"""
+https://huggingface.co/tomiwa1a/video-search
+"""
 from typing import Dict
 from sentence_transformers import SentenceTransformer
 from tqdm import tqdm
 import whisper
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 import torch
 import pytube
 import time
     WHISPER_MODEL_NAME = "tiny.en"
     SENTENCE_TRANSFORMER_MODEL_NAME = "multi-qa-mpnet-base-dot-v1"
     QUESTION_ANSWER_MODEL_NAME = "vblagoje/bart_lfqa"
+    SUMMARIZER_MODEL_NAME = "philschmid/bart-large-cnn-samsum"
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    device_number = 0 if torch.cuda.is_available() else -1
     def __init__(self, path=""):
         total = t1 - t0
         print(f'Finished loading sentence_transformer_model in {total} seconds')
+        t0 = time.time()
+        self.summarizer = pipeline("summarization", model=self.SUMMARIZER_MODEL_NAME, device=device)
+        t1 = time.time()
+        total = t1 - t0
+        print(f'Finished loading summarizer in {total} seconds')
         self.question_answer_tokenizer = AutoTokenizer.from_pretrained(self.QUESTION_ANSWER_MODEL_NAME)
         t0 = time.time()
         video_url = data.pop("video_url", None)
         query = data.pop("query", None)
         long_form_answer = data.pop("long_form_answer", None)
+        summarize = data.pop("summarize", False)
         encoded_segments = {}
         if video_url:
             video_with_transcript = self.transcribe_video(video_url)
                 **video_with_transcript,
                 **encoded_segments
             }
+        elif summarize:
+            summary = self.summarize_video(data["segments"])
+            return {"summary": summary}
         elif query:
             if long_form_answer:
                 context = data.pop("context", None)
         return all_batches
+    def summarize_video(self, segments):
+        for index, segment in enumerate(segments):
+            segment['summary'] = self.summarizer(segment['text'])
+            segment['summary'] = segment['summary'][0]['summary_text']
+            print('index', index)
+            print('length', segment['length'])
+            print('text', segment['text'])
+            print('summary', segment['summary'])
+        return segments
     def generate_answer(self, query, documents):
         # concatenate question and support documents into BART input