Imran1 commited on
Commit
52f0805
1 Parent(s): 924bda0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +65 -3
README.md CHANGED
@@ -1,3 +1,65 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
4
+ # Model using
5
+
6
+ ```python
7
+ from transformers import AutoConfig, AutoTokenizer
8
+ from torch import nn
9
+ import torch.nn.functional as F
10
+ import torch
11
+ # First, define your custom model class again
12
+ class HFCustomBertModel(nn.Module):
13
+ def __init__(self, config):
14
+ super().__init__()
15
+ self.bert = BertModel(config)
16
+ self.pooler = nn.Sequential(
17
+ nn.Linear(config.hidden_size, config.hidden_size),
18
+ nn.Tanh()
19
+ )
20
+
21
+ def forward(self, input_ids, attention_mask=None, token_type_ids=None):
22
+ outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
23
+ pooled_output = self.pooler(outputs.pooler_output)
24
+ return pooled_output
25
+ def load_custom_model_and_tokenizer(model_path):
26
+ # Load the config
27
+ config = AutoConfig.from_pretrained(model_path)
28
+
29
+ # Initialize the custom model with the config
30
+ model = HFCustomBertModel(config)
31
+ # Load the tokenizer
32
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
33
+
34
+ return model, tokenizer
35
+
36
+ # Usage
37
+ model_path = "Imran1/embadding"
38
+ model, tokenizer = load_custom_model_and_tokenizer(model_path)
39
+
40
+
41
+
42
+ queries = ["how much protein should a female eat"]
43
+ documents = ["As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day."]
44
+
45
+ model.eval() # Set the model to evaluation mode
46
+
47
+ with torch.no_grad():
48
+ # Tokenize and encode the queries and documents
49
+ query_inputs = tokenizer(queries, padding=True, truncation=True, return_tensors="pt")
50
+ document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors="pt")
51
+
52
+ # Get embeddings
53
+ query_embeddings = model(**query_inputs)
54
+ document_embeddings = model(**document_inputs)
55
+
56
+ # Normalize embeddings
57
+ query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
58
+ document_embeddings = F.normalize(document_embeddings, p=2, dim=1)
59
+
60
+ # Calculate cosine similarity
61
+ scores = torch.matmul(query_embeddings, document_embeddings.transpose(0, 1))
62
+
63
+ print(f"Similarity score: {scores.item():.4f}")
64
+ Similarity score: 0.9605
65
+ ```