sagar007 commited on
Commit
44f3097
·
verified ·
1 Parent(s): 9023c9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -48
app.py CHANGED
@@ -1,38 +1,39 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor
3
  import torch
 
4
  from PIL import Image
5
  import os
 
6
 
7
- # Check if CUDA is available, otherwise use CPU
8
- device = "cuda" if torch.cuda.is_available() else "cpu"
9
- print(f"Using device: {device}")
10
 
11
- # Load model and tokenizer with optimizations for CPU deployment
12
- def load_model():
13
- print("Loading model and tokenizer...")
14
- model = AutoModelForCausalLM.from_pretrained(
15
- "sagar007/Lava_phi",
16
- torch_dtype=torch.float32 if device == "cpu" else torch.bfloat16,
17
- low_cpu_mem_usage=True,
18
- )
19
- model = model.to(device)
20
-
21
- tokenizer = AutoTokenizer.from_pretrained("sagar007/Lava_phi")
22
- processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
23
-
24
- print("Model and tokenizer loaded successfully!")
25
- return model, tokenizer, processor
26
 
27
- # Load models
28
- model, tokenizer, processor = load_model()
29
 
30
- # For text-only generation
 
31
  def generate_text(prompt, max_length=128):
32
  try:
33
- inputs = tokenizer(f"human: {prompt}\ngpt:", return_tensors="pt").to(device)
34
 
35
- # Generate with low memory footprint settings
 
 
 
 
 
 
 
 
 
 
 
 
36
  with torch.no_grad():
37
  outputs = model.generate(
38
  **inputs,
@@ -50,21 +51,35 @@ def generate_text(prompt, max_length=128):
50
 
51
  return generated_text
52
  except Exception as e:
 
53
  return f"Error generating text: {str(e)}"
54
 
55
- # For image and text processing
 
56
  def process_image_and_prompt(image, prompt, max_length=128):
57
  try:
58
  if image is None:
59
  return "No image provided. Please upload an image."
60
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # Process image
62
- image_tensor = processor(images=image, return_tensors="pt").pixel_values.to(device)
63
 
64
  # Tokenize input with image token
65
- inputs = tokenizer(f"human: <image>\n{prompt}\ngpt:", return_tensors="pt").to(device)
66
 
67
- # Generate with memory optimizations
68
  with torch.no_grad():
69
  outputs = model.generate(
70
  input_ids=inputs["input_ids"],
@@ -84,12 +99,13 @@ def process_image_and_prompt(image, prompt, max_length=128):
84
 
85
  return generated_text
86
  except Exception as e:
 
87
  return f"Error processing image: {str(e)}"
88
 
89
  # Create Gradio Interface
90
  with gr.Blocks(title="LLaVA-Phi: Vision-Language Model") as demo:
91
  gr.Markdown("# LLaVA-Phi: Vision-Language Model")
92
- gr.Markdown("This model can generate text responses from text prompts or analyze images with text prompts.")
93
 
94
  with gr.Tab("Text Generation"):
95
  with gr.Row():
@@ -98,10 +114,22 @@ with gr.Blocks(title="LLaVA-Phi: Vision-Language Model") as demo:
98
  text_max_length = gr.Slider(minimum=16, maximum=512, value=128, step=8, label="Maximum response length")
99
  text_button = gr.Button("Generate")
100
 
101
- text_output = gr.Textbox(label="Generated response", lines=8)
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  text_button.click(
104
- fn=generate_text,
105
  inputs=[text_input, text_max_length],
106
  outputs=text_output
107
  )
@@ -116,10 +144,22 @@ with gr.Blocks(title="LLaVA-Phi: Vision-Language Model") as demo:
116
  image_max_length = gr.Slider(minimum=16, maximum=512, value=128, step=8, label="Maximum response length")
117
  image_button = gr.Button("Analyze")
118
 
119
- image_output = gr.Textbox(label="Model response", lines=8)
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  image_button.click(
122
- fn=process_image_and_prompt,
123
  inputs=[image_input, image_text_input, image_max_length],
124
  outputs=image_output
125
  )
@@ -132,24 +172,13 @@ with gr.Blocks(title="LLaVA-Phi: Vision-Language Model") as demo:
132
  inputs=text_input
133
  )
134
 
135
- # Add examples for image tab if you have example images
136
- # gr.Examples(
137
- # examples=[["example1.jpg", "What's in this image?"]],
138
- # inputs=[image_input, image_text_input]
139
- # )
140
 
141
- # Launch the app with memory optimizations
142
  if __name__ == "__main__":
143
- # Memory cleanup before launch
144
- torch.cuda.empty_cache() if torch.cuda.is_available() else None
145
-
146
- # Set low CPU thread usage to reduce memory
147
- os.environ["OMP_NUM_THREADS"] = "4"
148
-
149
- # Launch with minimal resource usage
150
  demo.launch(
151
- share=True, # Set to False in production
152
  enable_queue=True,
153
- max_threads=4,
154
  show_error=True
155
  )
 
1
  import gradio as gr
 
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor
4
  from PIL import Image
5
  import os
6
+ import spaces
7
 
8
+ # Initial setup without loading model to device
9
+ print("Setting up the application...")
 
10
 
11
+ # We'll load the model in the GPU functions to avoid CPU memory issues
12
+ model = None
13
+ tokenizer = AutoTokenizer.from_pretrained("sagar007/Lava_phi")
14
+ processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ print("Tokenizer and processor loaded successfully!")
 
17
 
18
+ # For text-only generation with GPU on demand
19
+ @spaces.GPU
20
  def generate_text(prompt, max_length=128):
21
  try:
22
+ global model
23
 
24
+ # Load model if not already loaded
25
+ if model is None:
26
+ print("Loading model on first request...")
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ "sagar007/Lava_phi",
29
+ torch_dtype=torch.float16, # Use float16 on GPU
30
+ device_map="auto" # This will put the model on GPU automatically
31
+ )
32
+ print("Model loaded successfully!")
33
+
34
+ inputs = tokenizer(f"human: {prompt}\ngpt:", return_tensors="pt").to(model.device)
35
+
36
+ # Generate with GPU
37
  with torch.no_grad():
38
  outputs = model.generate(
39
  **inputs,
 
51
 
52
  return generated_text
53
  except Exception as e:
54
+ # Capture and return any errors
55
  return f"Error generating text: {str(e)}"
56
 
57
+ # For image and text processing with GPU on demand
58
+ @spaces.GPU
59
  def process_image_and_prompt(image, prompt, max_length=128):
60
  try:
61
  if image is None:
62
  return "No image provided. Please upload an image."
63
 
64
+ global model
65
+
66
+ # Load model if not already loaded
67
+ if model is None:
68
+ print("Loading model on first request...")
69
+ model = AutoModelForCausalLM.from_pretrained(
70
+ "sagar007/Lava_phi",
71
+ torch_dtype=torch.float16, # Use float16 on GPU
72
+ device_map="auto" # This will put the model on GPU automatically
73
+ )
74
+ print("Model loaded successfully!")
75
+
76
  # Process image
77
+ image_tensor = processor(images=image, return_tensors="pt").pixel_values.to(model.device)
78
 
79
  # Tokenize input with image token
80
+ inputs = tokenizer(f"human: <image>\n{prompt}\ngpt:", return_tensors="pt").to(model.device)
81
 
82
+ # Generate with GPU
83
  with torch.no_grad():
84
  outputs = model.generate(
85
  input_ids=inputs["input_ids"],
 
99
 
100
  return generated_text
101
  except Exception as e:
102
+ # Capture and return any errors
103
  return f"Error processing image: {str(e)}"
104
 
105
  # Create Gradio Interface
106
  with gr.Blocks(title="LLaVA-Phi: Vision-Language Model") as demo:
107
  gr.Markdown("# LLaVA-Phi: Vision-Language Model")
108
+ gr.Markdown("This model uses ZeroGPU technology - GPU resources are allocated only when generating responses and released afterward.")
109
 
110
  with gr.Tab("Text Generation"):
111
  with gr.Row():
 
114
  text_max_length = gr.Slider(minimum=16, maximum=512, value=128, step=8, label="Maximum response length")
115
  text_button = gr.Button("Generate")
116
 
117
+ with gr.Column():
118
+ text_output = gr.Textbox(label="Generated response", lines=8)
119
+ text_status = gr.Markdown("*Status: Ready*")
120
+
121
+ def text_fn(prompt, max_length):
122
+ text_status.update("*Status: Generating response...*")
123
+ try:
124
+ response = generate_text(prompt, max_length)
125
+ text_status.update("*Status: Complete*")
126
+ return response
127
+ except Exception as e:
128
+ text_status.update("*Status: Error*")
129
+ return f"Error: {str(e)}"
130
 
131
  text_button.click(
132
+ fn=text_fn,
133
  inputs=[text_input, text_max_length],
134
  outputs=text_output
135
  )
 
144
  image_max_length = gr.Slider(minimum=16, maximum=512, value=128, step=8, label="Maximum response length")
145
  image_button = gr.Button("Analyze")
146
 
147
+ with gr.Column():
148
+ image_output = gr.Textbox(label="Model response", lines=8)
149
+ image_status = gr.Markdown("*Status: Ready*")
150
+
151
+ def image_fn(image, prompt, max_length):
152
+ image_status.update("*Status: Analyzing image...*")
153
+ try:
154
+ response = process_image_and_prompt(image, prompt, max_length)
155
+ image_status.update("*Status: Complete*")
156
+ return response
157
+ except Exception as e:
158
+ image_status.update("*Status: Error*")
159
+ return f"Error: {str(e)}"
160
 
161
  image_button.click(
162
+ fn=image_fn,
163
  inputs=[image_input, image_text_input, image_max_length],
164
  outputs=image_output
165
  )
 
172
  inputs=text_input
173
  )
174
 
175
+ # Status indicator
176
+ with gr.Row():
177
+ gr.Markdown("*Note: When you click Generate or Analyze, a GPU will be temporarily allocated to process your request and then released. The first request may take longer as the model needs to be loaded.*")
 
 
178
 
179
+ # Launch the app
180
  if __name__ == "__main__":
 
 
 
 
 
 
 
181
  demo.launch(
 
182
  enable_queue=True,
 
183
  show_error=True
184
  )