Tonic commited on
Commit
af9af6d
·
unverified ·
1 Parent(s): 56352f5

add loadimg from @not-lain

Browse files
Files changed (2) hide show
  1. app.py +19 -8
  2. requirements.txt +2 -1
app.py CHANGED
@@ -15,6 +15,9 @@ import math
15
  from typing import List, Optional, Tuple
16
  import gc
17
  from contextlib import contextmanager
 
 
 
18
 
19
  title = "# **WIP / DEMO** 🙋🏻‍♂️Welcome to Tonic's Pixtral Model Demo"
20
  description = """
@@ -25,8 +28,8 @@ This demo showcases two capabilities of the Pixtral model:
25
  ### Join us :
26
  🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
27
  """
28
-
29
- model_path = snapshot_download(repo_id="mistral-community/pixtral-12b-240910")
30
 
31
  with open(f'{model_path}/params.json', 'r') as f:
32
  params = json.load(f)
@@ -186,9 +189,12 @@ tokenizer = MistralTokenizer.from_model("pixtral")
186
  def preprocess_image(image):
187
  if image is None:
188
  raise ValueError("No image provided")
189
- image = image.convert('RGB')
190
- image = image.resize((params['vision_encoder']['image_size'], params['vision_encoder']['image_size']))
191
- image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
 
 
 
192
  return image_tensor
193
 
194
  @contextmanager
@@ -206,7 +212,9 @@ def generate_text(image, prompt, max_tokens):
206
  try:
207
  with gpu_memory_manager():
208
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
209
- image_tensor = preprocess_image(image).to(device)
 
 
210
  model.to(device)
211
 
212
  tokenized = tokenizer.encode_chat_completion(
@@ -242,8 +250,11 @@ def calculate_similarity(image1, image2):
242
  try:
243
  with gpu_memory_manager():
244
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
245
- tensor1 = preprocess_image(image1).to(device)
246
- tensor2 = preprocess_image(image2).to(device)
 
 
 
247
  model.to(device)
248
 
249
  with torch.no_grad():
 
15
  from typing import List, Optional, Tuple
16
  import gc
17
  from contextlib import contextmanager
18
+ import os
19
+ from loadimg import load_img
20
+ # Add this near the top of the file
21
 
22
  title = "# **WIP / DEMO** 🙋🏻‍♂️Welcome to Tonic's Pixtral Model Demo"
23
  description = """
 
28
  ### Join us :
29
  🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
30
  """
31
+ HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
32
+ model_path = snapshot_download(repo_id="mistralai/Pixtral-12B-2409", token=HUGGINGFACE_TOKEN)
33
 
34
  with open(f'{model_path}/params.json', 'r') as f:
35
  params = json.load(f)
 
189
  def preprocess_image(image):
190
  if image is None:
191
  raise ValueError("No image provided")
192
+
193
+ pil_image = load_img(image, output_type="pil", input_type="auto")
194
+
195
+ pil_image = pil_image.convert('RGB')
196
+ pil_image = pil_image.resize((params['vision_encoder']['image_size'], params['vision_encoder']['image_size']))
197
+ image_tensor = torch.tensor(np.array(pil_image)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
198
  return image_tensor
199
 
200
  @contextmanager
 
212
  try:
213
  with gpu_memory_manager():
214
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
215
+ # Use load_img here
216
+ image_pil = load_img(image, output_type="pil", input_type="auto")
217
+ image_tensor = preprocess_image(image_pil).to(device)
218
  model.to(device)
219
 
220
  tokenized = tokenizer.encode_chat_completion(
 
250
  try:
251
  with gpu_memory_manager():
252
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
253
+ # Use load_img for both images
254
+ pil_image1 = load_img(image1, output_type="pil", input_type="auto")
255
+ pil_image2 = load_img(image2, output_type="pil", input_type="auto")
256
+ tensor1 = preprocess_image(pil_image1).to(device)
257
+ tensor2 = preprocess_image(pil_image2).to(device)
258
  model.to(device)
259
 
260
  with torch.no_grad():
requirements.txt CHANGED
@@ -3,4 +3,5 @@ safetensors>=0.3.1
3
  gradio>=3.32.0
4
  Pillow>=9.0.0
5
  numpy>=1.21.0
6
- mistral_common
 
 
3
  gradio>=3.32.0
4
  Pillow>=9.0.0
5
  numpy>=1.21.0
6
+ mistral_common
7
+ loadimg