sksstudio commited on
Commit
bf190b6
·
1 Parent(s): 7af1a4d

Add application file

Browse files
Files changed (2) hide show
  1. app.py +47 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+ from typing import Optional
5
+ import uvicorn
6
+
7
+ app = FastAPI(
8
+ title="OmniVLM API",
9
+ description="API for text generation using OmniVLM model",
10
+ version="1.0.0"
11
+ )
12
+
13
+ # Initialize the model
14
+ llm = Llama.from_pretrained(
15
+ repo_id="NexaAIDev/OmniVLM-968M",
16
+ filename="omnivision-text-optimized-llm-Q8_0.gguf",
17
+ )
18
+
19
+ class GenerationRequest(BaseModel):
20
+ prompt: str
21
+ max_tokens: Optional[int] = 100
22
+ temperature: Optional[float] = 0.7
23
+ top_p: Optional[float] = 0.9
24
+
25
+ class GenerationResponse(BaseModel):
26
+ generated_text: str
27
+
28
+ @app.post("/generate", response_model=GenerationResponse)
29
+ async def generate_text(request: GenerationRequest):
30
+ try:
31
+ output = llm(
32
+ request.prompt,
33
+ max_tokens=request.max_tokens,
34
+ temperature=request.temperature,
35
+ top_p=request.top_p
36
+ )
37
+
38
+ return GenerationResponse(generated_text=output["choices"][0]["text"])
39
+ except Exception as e:
40
+ raise HTTPException(status_code=500, detail=str(e))
41
+
42
+ @app.get("/health")
43
+ async def health_check():
44
+ return {"status": "healthy"}
45
+
46
+ if __name__ == "__main__":
47
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.24.0
3
+ pydantic==2.4.2
4
+ llama-cpp-python==0.1.76