Spaces:

Utiric
/

omniVLM

Running

omniVLM / app.py

sksstudio

Add application file twoo

ca0012e about 2 months ago

1.39 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from llama_cpp import Llama
	from typing import Optional
	import uvicorn
	import huggingface_hub

	app = FastAPI(
	title="OmniVLM API",
	description="API for text generation using OmniVLM model",
	version="1.0.0"
	)

	# Download the model from Hugging Face Hub
	model_path = huggingface_hub.hf_hub_download(
	repo_id="NexaAIDev/OmniVLM-968M",
	filename="omnivision-text-optimized-llm-Q8_0.gguf"
	)

	# Initialize the model with the downloaded file
	llm = Llama(
	model_path=model_path,
	n_ctx=2048, # Context window
	n_threads=4 # Number of CPU threads to use
	)

	class GenerationRequest(BaseModel):
	prompt: str
	max_tokens: Optional[int] = 100
	temperature: Optional[float] = 0.7
	top_p: Optional[float] = 0.9

	class GenerationResponse(BaseModel):
	generated_text: str

	@app.post("/generate", response_model=GenerationResponse)
	async def generate_text(request: GenerationRequest):
	try:
	output = llm(
	request.prompt,
	max_tokens=request.max_tokens,
	temperature=request.temperature,
	top_p=request.top_p
	)

	return GenerationResponse(generated_text=output["choices"][0]["text"])
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/health")
	async def health_check():
	return {"status": "healthy"}

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000)