quarterturn commited on
Commit
63f27a5
·
verified ·
1 Parent(s): 285c7ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -27,14 +27,19 @@ def unzip_images(zip_file):
27
 
28
  return image_paths, image_data, session_dir
29
 
30
- @spaces.GPU(duration=86)
31
  def generate_caption(image_path, prompt):
32
  try:
33
- # Load processor and model
34
- processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, torch_dtype='auto')
35
- model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype='auto', device_map='auto')
 
 
 
 
 
36
 
37
- # Explicitly move to GPU
38
  model.to('cuda')
39
 
40
  image = Image.open(image_path)
@@ -43,10 +48,10 @@ def generate_caption(image_path, prompt):
43
  text=prompt,
44
  )
45
 
46
- # Move inputs to GPU
47
- inputs = {k: v.to('cuda').unsqueeze(0) for k, v in inputs.items()}
48
 
49
- with torch.autocast(device_type="cuda", enabled=True):
50
  output = model.generate_from_batch(
51
  inputs,
52
  GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
@@ -60,12 +65,11 @@ def generate_caption(image_path, prompt):
60
  del model
61
  del inputs
62
  del output
63
- torch.cuda.empty_cache() # Clear GPU memory
64
 
65
  return generated_text
66
 
67
  except Exception as e:
68
- # Clean up on error
69
  torch.cuda.empty_cache()
70
  raise e
71
 
 
27
 
28
  return image_paths, image_data, session_dir
29
 
30
+ @spaces.GPU(duration=180) # Keep increased timeout
31
  def generate_caption(image_path, prompt):
32
  try:
33
+ # Load processor and model in FP16
34
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float16)
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ model_id,
37
+ trust_remote_code=True,
38
+ torch_dtype=torch.float16, # Cast model to FP16
39
+ device_map='auto'
40
+ )
41
 
42
+ # Move model to GPU
43
  model.to('cuda')
44
 
45
  image = Image.open(image_path)
 
48
  text=prompt,
49
  )
50
 
51
+ # Move and cast inputs to FP16 on GPU
52
+ inputs = {k: v.to('cuda', dtype=torch.float16).unsqueeze(0) for k, v in inputs.items()}
53
 
54
+ with torch.autocast(device_type="cuda", dtype=torch.float16, enabled=True):
55
  output = model.generate_from_batch(
56
  inputs,
57
  GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
 
65
  del model
66
  del inputs
67
  del output
68
+ torch.cuda.empty_cache()
69
 
70
  return generated_text
71
 
72
  except Exception as e:
 
73
  torch.cuda.empty_cache()
74
  raise e
75