A model for mapping abstract sentence descriptions to sentences that fit the descriptions. Trained on Pubmed sentences. Use load_finetuned_model
to load the query and sentence encoder, and encode_batch()
to encode a sentence with the model.
from transformers import AutoTokenizer, AutoModel
import torch
def load_finetuned_model():
sentence_encoder = AutoModel.from_pretrained("biu-nlp/abstract-sim-sentence-pubmed", revision="71f4539120e29024adc618173a1ed5fd230ac249")
query_encoder = AutoModel.from_pretrained("biu-nlp/abstract-sim-query-pubmed", revision="8d34676d80a39bcbc5a1d2eec13e6f8078496215")
tokenizer = AutoTokenizer.from_pretrained("biu-nlp/abstract-sim-sentence-pubmed")
return tokenizer, query_encoder, sentence_encoder
def encode_batch(model, tokenizer, sentences, device):
input_ids = tokenizer(sentences, padding=True, max_length=128, truncation=True, return_tensors="pt",
add_special_tokens=True).to(device)
features = model(**input_ids)[0]
features = torch.sum(features[:,:,:] * input_ids["attention_mask"][:,:].unsqueeze(-1), dim=1) / torch.clamp(torch.sum(input_ids["attention_mask"][:,:], dim=1, keepdims=True), min=1e-9)
return features
- Downloads last month
- 170
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.