Spaces:

sagar007
/

Lava_phi_model

Running on Zero

App Files Files Community

sagar007 commited on 19 days ago

Commit

44f3097

verified ·

1 Parent(s): 9023c9b

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -48

app.py CHANGED Viewed

@@ -1,38 +1,39 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor
 import torch
 from PIL import Image
 import os
-# Check if CUDA is available, otherwise use CPU
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-# Load model and tokenizer with optimizations for CPU deployment
-def load_model():
-    print("Loading model and tokenizer...")
-    model = AutoModelForCausalLM.from_pretrained(
-        "sagar007/Lava_phi",
-        torch_dtype=torch.float32 if device == "cpu" else torch.bfloat16,
-        low_cpu_mem_usage=True,
-    )
-    model = model.to(device)
-    tokenizer = AutoTokenizer.from_pretrained("sagar007/Lava_phi")
-    processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
-    print("Model and tokenizer loaded successfully!")
-    return model, tokenizer, processor
-# Load models
-model, tokenizer, processor = load_model()
-# For text-only generation
 def generate_text(prompt, max_length=128):
     try:
-        inputs = tokenizer(f"human: {prompt}\ngpt:", return_tensors="pt").to(device)
-        # Generate with low memory footprint settings
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
@@ -50,21 +51,35 @@ def generate_text(prompt, max_length=128):
         return generated_text
     except Exception as e:
         return f"Error generating text: {str(e)}"
-# For image and text processing
 def process_image_and_prompt(image, prompt, max_length=128):
     try:
         if image is None:
             return "No image provided. Please upload an image."
         # Process image
-        image_tensor = processor(images=image, return_tensors="pt").pixel_values.to(device)
         # Tokenize input with image token
-        inputs = tokenizer(f"human: <image>\n{prompt}\ngpt:", return_tensors="pt").to(device)
-        # Generate with memory optimizations
         with torch.no_grad():
             outputs = model.generate(
                 input_ids=inputs["input_ids"],
@@ -84,12 +99,13 @@ def process_image_and_prompt(image, prompt, max_length=128):
         return generated_text
     except Exception as e:
         return f"Error processing image: {str(e)}"
 # Create Gradio Interface
 with gr.Blocks(title="LLaVA-Phi: Vision-Language Model") as demo:
     gr.Markdown("# LLaVA-Phi: Vision-Language Model")
-    gr.Markdown("This model can generate text responses from text prompts or analyze images with text prompts.")
     with gr.Tab("Text Generation"):
         with gr.Row():
@@ -98,10 +114,22 @@ with gr.Blocks(title="LLaVA-Phi: Vision-Language Model") as demo:
                 text_max_length = gr.Slider(minimum=16, maximum=512, value=128, step=8, label="Maximum response length")
                 text_button = gr.Button("Generate")
-            text_output = gr.Textbox(label="Generated response", lines=8)
         text_button.click(
-            fn=generate_text,
             inputs=[text_input, text_max_length],
             outputs=text_output
         )
@@ -116,10 +144,22 @@ with gr.Blocks(title="LLaVA-Phi: Vision-Language Model") as demo:
                 image_max_length = gr.Slider(minimum=16, maximum=512, value=128, step=8, label="Maximum response length")
                 image_button = gr.Button("Analyze")
-            image_output = gr.Textbox(label="Model response", lines=8)
         image_button.click(
-            fn=process_image_and_prompt,
             inputs=[image_input, image_text_input, image_max_length],
             outputs=image_output
         )
@@ -132,24 +172,13 @@ with gr.Blocks(title="LLaVA-Phi: Vision-Language Model") as demo:
         inputs=text_input
     )
-    # Add examples for image tab if you have example images
-    # gr.Examples(
-    #     examples=[["example1.jpg", "What's in this image?"]],
-    #     inputs=[image_input, image_text_input]
-    # )
-# Launch the app with memory optimizations
 if __name__ == "__main__":
-    # Memory cleanup before launch
-    torch.cuda.empty_cache() if torch.cuda.is_available() else None
-    # Set low CPU thread usage to reduce memory
-    os.environ["OMP_NUM_THREADS"] = "4"
-    # Launch with minimal resource usage
     demo.launch(
-        share=True,  # Set to False in production
         enable_queue=True,
-        max_threads=4,
         show_error=True
     )

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor
 from PIL import Image
 import os
+import spaces
+# Initial setup without loading model to device
+print("Setting up the application...")
+# We'll load the model in the GPU functions to avoid CPU memory issues
+model = None
+tokenizer = AutoTokenizer.from_pretrained("sagar007/Lava_phi")
+processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
+print("Tokenizer and processor loaded successfully!")
+# For text-only generation with GPU on demand
+@spaces.GPU
 def generate_text(prompt, max_length=128):
     try:
+        global model
+        # Load model if not already loaded
+        if model is None:
+            print("Loading model on first request...")
+            model = AutoModelForCausalLM.from_pretrained(
+                "sagar007/Lava_phi",
+                torch_dtype=torch.float16,  # Use float16 on GPU
+                device_map="auto"  # This will put the model on GPU automatically
+            )
+            print("Model loaded successfully!")
+        inputs = tokenizer(f"human: {prompt}\ngpt:", return_tensors="pt").to(model.device)
+        # Generate with GPU
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
         return generated_text
     except Exception as e:
+        # Capture and return any errors
         return f"Error generating text: {str(e)}"
+# For image and text processing with GPU on demand
+@spaces.GPU
 def process_image_and_prompt(image, prompt, max_length=128):
     try:
         if image is None:
             return "No image provided. Please upload an image."
+        global model
+        # Load model if not already loaded
+        if model is None:
+            print("Loading model on first request...")
+            model = AutoModelForCausalLM.from_pretrained(
+                "sagar007/Lava_phi",
+                torch_dtype=torch.float16,  # Use float16 on GPU
+                device_map="auto"  # This will put the model on GPU automatically
+            )
+            print("Model loaded successfully!")
         # Process image
+        image_tensor = processor(images=image, return_tensors="pt").pixel_values.to(model.device)
         # Tokenize input with image token
+        inputs = tokenizer(f"human: <image>\n{prompt}\ngpt:", return_tensors="pt").to(model.device)
+        # Generate with GPU
         with torch.no_grad():
             outputs = model.generate(
                 input_ids=inputs["input_ids"],
         return generated_text
     except Exception as e:
+        # Capture and return any errors
         return f"Error processing image: {str(e)}"
 # Create Gradio Interface
 with gr.Blocks(title="LLaVA-Phi: Vision-Language Model") as demo:
     gr.Markdown("# LLaVA-Phi: Vision-Language Model")
+    gr.Markdown("This model uses ZeroGPU technology - GPU resources are allocated only when generating responses and released afterward.")
     with gr.Tab("Text Generation"):
         with gr.Row():
                 text_max_length = gr.Slider(minimum=16, maximum=512, value=128, step=8, label="Maximum response length")
                 text_button = gr.Button("Generate")
+            with gr.Column():
+                text_output = gr.Textbox(label="Generated response", lines=8)
+                text_status = gr.Markdown("*Status: Ready*")
+        def text_fn(prompt, max_length):
+            text_status.update("*Status: Generating response...*")
+            try:
+                response = generate_text(prompt, max_length)
+                text_status.update("*Status: Complete*")
+                return response
+            except Exception as e:
+                text_status.update("*Status: Error*")
+                return f"Error: {str(e)}"
         text_button.click(
+            fn=text_fn,
             inputs=[text_input, text_max_length],
             outputs=text_output
         )
                 image_max_length = gr.Slider(minimum=16, maximum=512, value=128, step=8, label="Maximum response length")
                 image_button = gr.Button("Analyze")
+            with gr.Column():
+                image_output = gr.Textbox(label="Model response", lines=8)
+                image_status = gr.Markdown("*Status: Ready*")
+        def image_fn(image, prompt, max_length):
+            image_status.update("*Status: Analyzing image...*")
+            try:
+                response = process_image_and_prompt(image, prompt, max_length)
+                image_status.update("*Status: Complete*")
+                return response
+            except Exception as e:
+                image_status.update("*Status: Error*")
+                return f"Error: {str(e)}"
         image_button.click(
+            fn=image_fn,
             inputs=[image_input, image_text_input, image_max_length],
             outputs=image_output
         )
         inputs=text_input
     )
+    # Status indicator
+    with gr.Row():
+        gr.Markdown("*Note: When you click Generate or Analyze, a GPU will be temporarily allocated to process your request and then released. The first request may take longer as the model needs to be loaded.*")
+# Launch the app
 if __name__ == "__main__":
     demo.launch(
         enable_queue=True,
         show_error=True
     )