Spaces:
Paused
Paused
add loadimg from @not-lain
Browse files- app.py +19 -8
- requirements.txt +2 -1
app.py
CHANGED
@@ -15,6 +15,9 @@ import math
|
|
15 |
from typing import List, Optional, Tuple
|
16 |
import gc
|
17 |
from contextlib import contextmanager
|
|
|
|
|
|
|
18 |
|
19 |
title = "# **WIP / DEMO** 🙋🏻♂️Welcome to Tonic's Pixtral Model Demo"
|
20 |
description = """
|
@@ -25,8 +28,8 @@ This demo showcases two capabilities of the Pixtral model:
|
|
25 |
### Join us :
|
26 |
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
|
27 |
"""
|
28 |
-
|
29 |
-
model_path = snapshot_download(repo_id="
|
30 |
|
31 |
with open(f'{model_path}/params.json', 'r') as f:
|
32 |
params = json.load(f)
|
@@ -186,9 +189,12 @@ tokenizer = MistralTokenizer.from_model("pixtral")
|
|
186 |
def preprocess_image(image):
|
187 |
if image is None:
|
188 |
raise ValueError("No image provided")
|
189 |
-
|
190 |
-
|
191 |
-
|
|
|
|
|
|
|
192 |
return image_tensor
|
193 |
|
194 |
@contextmanager
|
@@ -206,7 +212,9 @@ def generate_text(image, prompt, max_tokens):
|
|
206 |
try:
|
207 |
with gpu_memory_manager():
|
208 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
209 |
-
|
|
|
|
|
210 |
model.to(device)
|
211 |
|
212 |
tokenized = tokenizer.encode_chat_completion(
|
@@ -242,8 +250,11 @@ def calculate_similarity(image1, image2):
|
|
242 |
try:
|
243 |
with gpu_memory_manager():
|
244 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
245 |
-
|
246 |
-
|
|
|
|
|
|
|
247 |
model.to(device)
|
248 |
|
249 |
with torch.no_grad():
|
|
|
15 |
from typing import List, Optional, Tuple
|
16 |
import gc
|
17 |
from contextlib import contextmanager
|
18 |
+
import os
|
19 |
+
from loadimg import load_img
|
20 |
+
# Add this near the top of the file
|
21 |
|
22 |
title = "# **WIP / DEMO** 🙋🏻♂️Welcome to Tonic's Pixtral Model Demo"
|
23 |
description = """
|
|
|
28 |
### Join us :
|
29 |
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
|
30 |
"""
|
31 |
+
HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
|
32 |
+
model_path = snapshot_download(repo_id="mistralai/Pixtral-12B-2409", token=HUGGINGFACE_TOKEN)
|
33 |
|
34 |
with open(f'{model_path}/params.json', 'r') as f:
|
35 |
params = json.load(f)
|
|
|
189 |
def preprocess_image(image):
|
190 |
if image is None:
|
191 |
raise ValueError("No image provided")
|
192 |
+
|
193 |
+
pil_image = load_img(image, output_type="pil", input_type="auto")
|
194 |
+
|
195 |
+
pil_image = pil_image.convert('RGB')
|
196 |
+
pil_image = pil_image.resize((params['vision_encoder']['image_size'], params['vision_encoder']['image_size']))
|
197 |
+
image_tensor = torch.tensor(np.array(pil_image)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
|
198 |
return image_tensor
|
199 |
|
200 |
@contextmanager
|
|
|
212 |
try:
|
213 |
with gpu_memory_manager():
|
214 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
215 |
+
# Use load_img here
|
216 |
+
image_pil = load_img(image, output_type="pil", input_type="auto")
|
217 |
+
image_tensor = preprocess_image(image_pil).to(device)
|
218 |
model.to(device)
|
219 |
|
220 |
tokenized = tokenizer.encode_chat_completion(
|
|
|
250 |
try:
|
251 |
with gpu_memory_manager():
|
252 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
253 |
+
# Use load_img for both images
|
254 |
+
pil_image1 = load_img(image1, output_type="pil", input_type="auto")
|
255 |
+
pil_image2 = load_img(image2, output_type="pil", input_type="auto")
|
256 |
+
tensor1 = preprocess_image(pil_image1).to(device)
|
257 |
+
tensor2 = preprocess_image(pil_image2).to(device)
|
258 |
model.to(device)
|
259 |
|
260 |
with torch.no_grad():
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ safetensors>=0.3.1
|
|
3 |
gradio>=3.32.0
|
4 |
Pillow>=9.0.0
|
5 |
numpy>=1.21.0
|
6 |
-
mistral_common
|
|
|
|
3 |
gradio>=3.32.0
|
4 |
Pillow>=9.0.0
|
5 |
numpy>=1.21.0
|
6 |
+
mistral_common
|
7 |
+
loadimg
|