ozanoktay commited on
Commit
a0adb7d
1 Parent(s): 371998e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -12
README.md CHANGED
@@ -68,24 +68,31 @@ Here is how to use this model to extract radiological sentence embeddings and ob
68
  ```python
69
  import torch
70
  from transformers import AutoModel, AutoTokenizer
 
71
  # Load the model and tokenizer
72
  url = "microsoft/BiomedVLP-BioViL-T"
73
  tokenizer = AutoTokenizer.from_pretrained(url, trust_remote_code=True)
74
  model = AutoModel.from_pretrained(url, trust_remote_code=True)
75
- # Input text prompts (e.g., reference, synonym, contradiction)
76
- text_prompts = ["There is no pneumothorax or pleural effusion",
77
- "No pleural effusion or pneumothorax is seen",
78
- "The extent of the pleural effusion is constant."
79
- "Interval enlargement of moderate pleural effusion"]
 
 
 
 
80
  # Tokenize and compute the sentence embeddings
81
- tokenizer_output = tokenizer.batch_encode_plus(batch_text_or_text_pairs=text_prompts,
82
- add_special_tokens=True,
83
- padding='longest',
84
- return_tensors='pt')
85
- embeddings = model.get_projected_text_embeddings(input_ids=tokenizer_output.input_ids,
 
86
  attention_mask=tokenizer_output.attention_mask)
87
- # Compute the cosine similarity of sentence embeddings obtained from input text prompts.
88
- sim = torch.mm(embeddings, embeddings.t())
 
89
  ```
90
 
91
  ## Data
 
68
  ```python
69
  import torch
70
  from transformers import AutoModel, AutoTokenizer
71
+
72
  # Load the model and tokenizer
73
  url = "microsoft/BiomedVLP-BioViL-T"
74
  tokenizer = AutoTokenizer.from_pretrained(url, trust_remote_code=True)
75
  model = AutoModel.from_pretrained(url, trust_remote_code=True)
76
+
77
+ # Input text prompts describing findings.
78
+ # The order of prompts is adjusted to capture the spectrum from absence of a finding to its temporal progression.
79
+ text_prompts = ["No pleural effusion or pneumothorax is seen",
80
+ "There is no pneumothorax or pleural effusion",
81
+ "The extent of the pleural effusion is reduced.",
82
+ "The extent of the pleural effusion remains constant.",
83
+ "Interval enlargement of pleural effusion"]
84
+
85
  # Tokenize and compute the sentence embeddings
86
+ with torch.no_grad():
87
+ tokenizer_output = tokenizer.batch_encode_plus(batch_text_or_text_pairs=text_prompts,
88
+ add_special_tokens=True,
89
+ padding='longest',
90
+ return_tensors='pt')
91
+ embeddings = model.get_projected_text_embeddings(input_ids=tokenizer_output.input_ids,
92
  attention_mask=tokenizer_output.attention_mask)
93
+
94
+ # Compute the cosine similarity of sentence embeddings obtained from input text prompts.
95
+ sim = torch.mm(embeddings, embeddings.t())
96
  ```
97
 
98
  ## Data