Tonic commited on
Commit
ace4204
1 Parent(s): 964b92e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -59
app.py CHANGED
@@ -44,75 +44,72 @@ def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tenso
44
  sequence_lengths = attention_mask.sum(dim=1) - 1
45
  batch_size = last_hidden_states.shape[0]
46
  return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
 
47
  def clear_cuda_cache():
48
  torch.cuda.empty_cache()
49
 
50
  def free_memory(*args):
51
  for arg in args:
52
  del arg
53
-
54
- class EmbeddingModel:
55
- def __init__(self):
56
- self.tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-mistral-7b-instruct')
57
- self.model = AutoModel.from_pretrained('intfloat/e5-mistral-7b-instruct', torch_dtype=torch.float16, device_map=device)
58
-
59
- def _compute_cosine_similarity(self, emb1, emb2):
60
- tensor1 = torch.tensor(emb1).to(device).half()
61
- tensor2 = torch.tensor(emb2).to(device).half()
62
- similarity = F.cosine_similarity(tensor1, tensor2).item()
63
- free_memory(tensor1, tensor2)
64
- return similarity
65
-
66
- def compute_embeddings(self, selected_task, input_text):
67
- try:
68
- task_description = tasks[selected_task]
69
- except KeyError:
70
- print(f"Selected task not found: {selected_task}")
71
- return f"Error: Task '{selected_task}' not found. Please select a valid task."
72
- max_length = 2042
73
- processed_texts = [f'Instruct: {task_description}\nQuery: {input_text}']
74
 
75
- batch_dict = self.tokenizer(processed_texts, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
76
- batch_dict['input_ids'] = [input_ids + [self.tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
77
- batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
78
- batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
79
- outputs = self.model(**batch_dict)
80
- embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
81
- embeddings = F.normalize(embeddings, p=2, dim=1)
82
- embeddings_list = embeddings.detach().cpu().numpy().tolist()
83
- return embeddings_list
84
-
85
- def compute_similarity(self, selected_task, sentence1, sentence2, extra_sentence1, extra_sentence2):
86
- try:
87
- task_description = tasks[selected_task]
88
- except KeyError:
89
- print(f"Selected task not found: {selected_task}")
90
- return f"Error: Task '{selected_task}' not found. Please select a valid task."
91
- # Compute embeddings for each sentence
92
- embeddings1 = self.compute_embeddings(self.selected_task, sentence1)
93
- embeddings2 = self.compute_embeddings(self.selected_task, sentence2)
94
- embeddings3 = self.compute_embeddings(self.selected_task, extra_sentence1)
95
- embeddings4 = self.compute_embeddings(self.selected_task, extra_sentence2)
96
 
97
- # Convert embeddings to tensors
98
- embeddings_tensor1 = torch.tensor(embeddings1).to(device).half()
99
- embeddings_tensor2 = torch.tensor(embeddings2).to(device).half()
100
- embeddings_tensor3 = torch.tensor(embeddings3).to(device).half()
101
- embeddings_tensor4 = torch.tensor(embeddings4).to(device).half()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- # Compute cosine similarity
104
- similarity1 = self._compute_cosine_similarity(embeddings1, embeddings2)
105
- similarity2 = self._compute_cosine_similarity(embeddings1, embeddings3)
106
- similarity3 = self._compute_cosine_similarity(embeddings1, embeddings4)
 
 
 
 
 
 
107
 
108
- # Free memory
109
- free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
110
 
111
- return similarity1, similarity2, similarity3
 
 
 
 
 
 
 
 
112
 
113
-
114
  def app_interface():
115
- embedding_model = EmbeddingModel()
116
  with gr.Blocks() as demo:
117
  gr.Markdown(title)
118
  gr.Markdown(description)
@@ -124,7 +121,7 @@ def app_interface():
124
  compute_button = gr.Button("Try🐣🛌🏻e5")
125
  output_display = gr.Textbox(label="🐣e5-mistral🛌🏻 Embeddings")
126
  compute_button.click(
127
- fn=embedding_model.compute_embeddings,
128
  inputs=[task_dropdown, input_text_box],
129
  outputs=output_display
130
  )
@@ -137,8 +134,8 @@ def app_interface():
137
  similarity_button = gr.Button("Compute Similarity")
138
  similarity_output = gr.Label(label="🐣e5-mistral🛌🏻 Similarity Scores")
139
  similarity_button.click(
140
- fn=embedding_model.compute_similarity,
141
- inputs=[task_dropdown, sentence1_box, sentence2_box],
142
  outputs=similarity_output
143
  )
144
 
 
44
  sequence_lengths = attention_mask.sum(dim=1) - 1
45
  batch_size = last_hidden_states.shape[0]
46
  return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
47
+
48
  def clear_cuda_cache():
49
  torch.cuda.empty_cache()
50
 
51
  def free_memory(*args):
52
  for arg in args:
53
  del arg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ # @spaces.GPU
56
+ def compute_embeddings(selected_task, input_text):
57
+ try:
58
+ task_description = tasks[selected_task]
59
+ except KeyError:
60
+ print(f"Selected task not found: {selected_task}")
61
+ return f"Error: Task '{selected_task}' not found. Please select a valid task."
62
+ max_length = 2042
63
+ processed_texts = [f'Instruct: {task_description}\nQuery: {input_text}']
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ batch_dict = self.tokenizer(processed_texts, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
66
+ batch_dict['input_ids'] = [input_ids + [self.tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
67
+ batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
68
+ batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
69
+ outputs = self.model(**batch_dict)
70
+ embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
71
+ embeddings = F.normalize(embeddings, p=2, dim=1)
72
+ embeddings_list = embeddings.detach().cpu().numpy().tolist()
73
+ return embeddings_list
74
+
75
+ # @spaces.GPU
76
+ def compute_similarity(selected_task, sentence1, sentence2, extra_sentence1, extra_sentence2):
77
+ try:
78
+ task_description = tasks[selected_task]
79
+ except KeyError:
80
+ print(f"Selected task not found: {selected_task}")
81
+ return f"Error: Task '{selected_task}' not found. Please select a valid task."
82
+ # Compute embeddings for each sentence
83
+ embeddings1 = self.compute_embeddings(self.selected_task, sentence1)
84
+ embeddings2 = self.compute_embeddings(self.selected_task, sentence2)
85
+ embeddings3 = self.compute_embeddings(self.selected_task, extra_sentence1)
86
+ embeddings4 = self.compute_embeddings(self.selected_task, extra_sentence2)
87
 
88
+ # Convert embeddings to tensors
89
+ embeddings_tensor1 = torch.tensor(embeddings1).to(device).half()
90
+ embeddings_tensor2 = torch.tensor(embeddings2).to(device).half()
91
+ embeddings_tensor3 = torch.tensor(embeddings3).to(device).half()
92
+ embeddings_tensor4 = torch.tensor(embeddings4).to(device).half()
93
+
94
+ # Compute cosine similarity
95
+ similarity1 = self._compute_cosine_similarity(embeddings1, embeddings2)
96
+ similarity2 = self._compute_cosine_similarity(embeddings1, embeddings3)
97
+ similarity3 = self._compute_cosine_similarity(embeddings1, embeddings4)
98
 
99
+ # Free memory
100
+ free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
101
 
102
+ return similarity1, similarity2, similarity3
103
+
104
+ # @spaces.GPU
105
+ def _compute_cosine_similarity(emb1, emb2):
106
+ tensor1 = torch.tensor(emb1).to(device).half()
107
+ tensor2 = torch.tensor(emb2).to(device).half()
108
+ similarity = F.cosine_similarity(tensor1, tensor2).item()
109
+ free_memory(tensor1, tensor2)
110
+ return similarity
111
 
 
112
  def app_interface():
 
113
  with gr.Blocks() as demo:
114
  gr.Markdown(title)
115
  gr.Markdown(description)
 
121
  compute_button = gr.Button("Try🐣🛌🏻e5")
122
  output_display = gr.Textbox(label="🐣e5-mistral🛌🏻 Embeddings")
123
  compute_button.click(
124
+ fn=compute_embeddings,
125
  inputs=[task_dropdown, input_text_box],
126
  outputs=output_display
127
  )
 
134
  similarity_button = gr.Button("Compute Similarity")
135
  similarity_output = gr.Label(label="🐣e5-mistral🛌🏻 Similarity Scores")
136
  similarity_button.click(
137
+ fn=compute_similarity,
138
+ inputs=[task_dropdown, sentence1_box, sentence2_box, extra_sentence1_box, extra_sentence2_box],
139
  outputs=similarity_output
140
  )
141