Spaces:

Utiric
/

omniVLM

Running

App Files Files Community

sksstudio commited on Jan 28

Commit

5401975

1 Parent(s): 1be012e

sa

Browse files

Files changed (2) hide show

.gitignore +1 -0
app.py +71 -30

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ test.py

app.py CHANGED Viewed

@@ -1,59 +1,100 @@
-from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from llama_cpp import Llama
 from typing import Optional
 import uvicorn
 import huggingface_hub
 import os
 app = FastAPI(
-	title="OmniVLM API",
-	description="API for text generation using OmniVLM model",
-	version="1.0.0"
 )
 # Download the model from Hugging Face Hub
 model_path = huggingface_hub.hf_hub_download(
-	repo_id="NexaAIDev/OmniVLM-968M",
-	filename="omnivision-text-optimized-llm-Q8_0.gguf"
 )
 # Initialize the model with the downloaded file
 llm = Llama(
-	model_path=model_path,
-	n_ctx=2048,        # Context window
-	n_threads=4,       # Number of CPU threads to use
-	n_batch=512,       # Number of tokens to process in parallel
-	verbose=True       # Enable verbose logging for debugging
 )
 class GenerationRequest(BaseModel):
-	prompt: str
-	max_tokens: Optional[int] = 100
-	temperature: Optional[float] = 0.7
-	top_p: Optional[float] = 0.9
 class GenerationResponse(BaseModel):
-	generated_text: str
 @app.post("/generate", response_model=GenerationResponse)
 async def generate_text(request: GenerationRequest):
-	try:
-		output = llm(
-			request.prompt,
-			max_tokens=request.max_tokens,
-			temperature=request.temperature,
-			top_p=request.top_p
-		)
-		return GenerationResponse(generated_text=output["choices"][0]["text"])
-	except Exception as e:
-		raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health_check():
-	return {"status": "healthy"}
 if __name__ == "__main__":
-	port = int(os.environ.get("PORT", 7860))  # Hugging Face Spaces uses port 7860 by default
-	uvicorn.run(app, host="0.0.0.0", port=port)

+# app.py
+from fastapi import FastAPI, HTTPException, UploadFile, File
 from pydantic import BaseModel
 from llama_cpp import Llama
 from typing import Optional
 import uvicorn
 import huggingface_hub
 import os
+from PIL import Image
+import io
+import base64
 app = FastAPI(
+    title="OmniVLM API",
+    description="API for text and image processing using OmniVLM model",
+    version="1.0.0"
 )
 # Download the model from Hugging Face Hub
 model_path = huggingface_hub.hf_hub_download(
+    repo_id="NexaAIDev/OmniVLM-968M",
+    filename="omnivision-text-optimized-llm-Q8_0.gguf"
 )
 # Initialize the model with the downloaded file
 llm = Llama(
+    model_path=model_path,
+    n_ctx=2048,
+    n_threads=4,
+    n_batch=512,
+    verbose=True
 )
 class GenerationRequest(BaseModel):
+    prompt: str
+    max_tokens: Optional[int] = 100
+    temperature: Optional[float] = 0.7
+    top_p: Optional[float] = 0.9
+class ImageRequest(BaseModel):
+    prompt: Optional[str] = "Describe this image in detail"
+    max_tokens: Optional[int] = 200
+    temperature: Optional[float] = 0.7
 class GenerationResponse(BaseModel):
+    generated_text: str
 @app.post("/generate", response_model=GenerationResponse)
 async def generate_text(request: GenerationRequest):
+    try:
+        output = llm(
+            request.prompt,
+            max_tokens=request.max_tokens,
+            temperature=request.temperature,
+            top_p=request.top_p
+        )
+        return GenerationResponse(generated_text=output["choices"][0]["text"])
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/process-image", response_model=GenerationResponse)
+async def process_image(
+    file: UploadFile = File(...),
+    request: ImageRequest = None
+):
+    try:
+        # Read and validate the image
+        image_data = await file.read()
+        image = Image.open(io.BytesIO(image_data))
+        # Convert image to base64
+        buffered = io.BytesIO()
+        image.save(buffered, format=image.format or "JPEG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        # Create prompt with image
+        prompt = f"""
+        <image>data:image/jpeg;base64,{img_str}</image>
+        {request.prompt if request else "Describe this image in detail"}
+        """
+        # Generate description
+        output = llm(
+            prompt,
+            max_tokens=request.max_tokens if request else 200,
+            temperature=request.temperature if request else 0.7
+        )
+        return GenerationResponse(generated_text=output["choices"][0]["text"])
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health_check():
+    return {"status": "healthy"}
 if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))
+    uvicorn.run(app, host="0.0.0.0", port=port)