from utils.sentence_embedding import * from utils.clustering import * from models.summarizers import * from nltk.tokenize import sent_tokenize, word_tokenize import math from time import perf_counter import time def get_summary(model_name, article, max_length, min_length, increment): start_time = perf_counter() summarization_model, summarization_tokenizer = load_summarizer(model_name) summarizer_token_limit = summarization_tokenizer.model_max_length print("Going Beyong Token limit:", summarizer_token_limit) input_word_toks = word_tokenize(article) num_words = len(input_word_toks) if num_words <= summarizer_token_limit and model_name == "t5": pred_summary = summarize_input(article, summarization_model, summarization_tokenizer) end_time = perf_counter() print("Time taken: ", end_time - start_time) else: input_sent_toks = sent_tokenize(article) embeddings = make_embeddings(input_sent_toks, mean_pooling) embeddings = embeddings.numpy() increment[0] = 20 n_clusters_estimate = math.ceil(num_words / summarizer_token_limit) clemb = ClusterEmbeddings( cluster_estimate=n_clusters_estimate, cluster_fn="agglo", # much better embeddings=embeddings, sentences=np.array(input_sent_toks), words=np.array(input_word_toks) ) increment[0] = 50 sentence_clusters = clemb.get_sentence_clusters() n = len(sentence_clusters) summs = "" for cluster in sentence_clusters: cluster_summary = summarize_input( cluster, summarization_model, summarization_tokenizer, max_length=250, min_length=50, ) if type(cluster_summary) == list: cluster_summary = cluster_summary[0] summs += cluster_summary + " " increment[0] += 40 / n pred_summary = summarize_input( summs, summarization_model, summarization_tokenizer, max_length=max_length, min_length=min_length, ) increment[0] += 100 end_time = perf_counter() time_taken = end_time - start_time return pred_summary, time_taken def test(): article = """Recent text-to-image matching models apply contrastive learning to large corpora of uncurated pairs of images and sentences. While such models can provide a powerful score for matching and subsequent zero-shot tasks, they are not capable of generating caption given an image. In this work, we repurpose such models to generate a descriptive text given an image at inference time, without any further training or tuning step. This is done by combining the visual-semantic model with a large language model, benefiting from the knowledge in both web-scale models. The resulting captions are much less restrictive than those obtained by supervised captioning methods. Moreover, as a zero-shot learning method, it is extremely flexible and wedemonstrate its ability to perform image arithmetic in which the inputs can be either images or text and the output is a sentence.""" model_name = "BART" summ, time_taken = get_summary(model_name, article, 250, 150) print(summ) print(time_taken)