Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -27,14 +27,19 @@ def unzip_images(zip_file):
|
|
27 |
|
28 |
return image_paths, image_data, session_dir
|
29 |
|
30 |
-
@spaces.GPU(duration=
|
31 |
def generate_caption(image_path, prompt):
|
32 |
try:
|
33 |
-
# Load processor and model
|
34 |
-
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, torch_dtype=
|
35 |
-
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
#
|
38 |
model.to('cuda')
|
39 |
|
40 |
image = Image.open(image_path)
|
@@ -43,10 +48,10 @@ def generate_caption(image_path, prompt):
|
|
43 |
text=prompt,
|
44 |
)
|
45 |
|
46 |
-
# Move inputs to GPU
|
47 |
-
inputs = {k: v.to('cuda').unsqueeze(0) for k, v in inputs.items()}
|
48 |
|
49 |
-
with torch.autocast(device_type="cuda", enabled=True):
|
50 |
output = model.generate_from_batch(
|
51 |
inputs,
|
52 |
GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
|
@@ -60,12 +65,11 @@ def generate_caption(image_path, prompt):
|
|
60 |
del model
|
61 |
del inputs
|
62 |
del output
|
63 |
-
torch.cuda.empty_cache()
|
64 |
|
65 |
return generated_text
|
66 |
|
67 |
except Exception as e:
|
68 |
-
# Clean up on error
|
69 |
torch.cuda.empty_cache()
|
70 |
raise e
|
71 |
|
|
|
27 |
|
28 |
return image_paths, image_data, session_dir
|
29 |
|
30 |
+
@spaces.GPU(duration=180) # Keep increased timeout
|
31 |
def generate_caption(image_path, prompt):
|
32 |
try:
|
33 |
+
# Load processor and model in FP16
|
34 |
+
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float16)
|
35 |
+
model = AutoModelForCausalLM.from_pretrained(
|
36 |
+
model_id,
|
37 |
+
trust_remote_code=True,
|
38 |
+
torch_dtype=torch.float16, # Cast model to FP16
|
39 |
+
device_map='auto'
|
40 |
+
)
|
41 |
|
42 |
+
# Move model to GPU
|
43 |
model.to('cuda')
|
44 |
|
45 |
image = Image.open(image_path)
|
|
|
48 |
text=prompt,
|
49 |
)
|
50 |
|
51 |
+
# Move and cast inputs to FP16 on GPU
|
52 |
+
inputs = {k: v.to('cuda', dtype=torch.float16).unsqueeze(0) for k, v in inputs.items()}
|
53 |
|
54 |
+
with torch.autocast(device_type="cuda", dtype=torch.float16, enabled=True):
|
55 |
output = model.generate_from_batch(
|
56 |
inputs,
|
57 |
GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
|
|
|
65 |
del model
|
66 |
del inputs
|
67 |
del output
|
68 |
+
torch.cuda.empty_cache()
|
69 |
|
70 |
return generated_text
|
71 |
|
72 |
except Exception as e:
|
|
|
73 |
torch.cuda.empty_cache()
|
74 |
raise e
|
75 |
|