--- license: mit --- # Model using ```python from transformers import AutoConfig, AutoTokenizer from torch import nn import torch.nn.functional as F import torch # First, define your custom model class again class HFCustomBertModel(nn.Module): def __init__(self, config): super().__init__() self.bert = BertModel(config) self.pooler = nn.Sequential( nn.Linear(config.hidden_size, config.hidden_size), nn.Tanh() ) def forward(self, input_ids, attention_mask=None, token_type_ids=None): outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) pooled_output = self.pooler(outputs.pooler_output) return pooled_output def load_custom_model_and_tokenizer(model_path): # Load the config config = AutoConfig.from_pretrained(model_path) # Initialize the custom model with the config model = HFCustomBertModel(config) # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained(model_path) return model, tokenizer # Usage model_path = "Imran1/embadding" model, tokenizer = load_custom_model_and_tokenizer(model_path) queries = ["how much protein should a female eat"] documents = ["As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day."] model.eval() # Set the model to evaluation mode with torch.no_grad(): # Tokenize and encode the queries and documents query_inputs = tokenizer(queries, padding=True, truncation=True, return_tensors="pt") document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors="pt") # Get embeddings query_embeddings = model(**query_inputs) document_embeddings = model(**document_inputs) # Normalize embeddings query_embeddings = F.normalize(query_embeddings, p=2, dim=1) document_embeddings = F.normalize(document_embeddings, p=2, dim=1) # Calculate cosine similarity scores = torch.matmul(query_embeddings, document_embeddings.transpose(0, 1)) print(f"Similarity score: {scores.item():.4f}") Similarity score: 0.9605 ```