sksstudio commited on
Commit
5401975
·
1 Parent(s): 1be012e
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +71 -30
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ test.py
app.py CHANGED
@@ -1,59 +1,100 @@
1
- from fastapi import FastAPI, HTTPException
 
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from typing import Optional
5
  import uvicorn
6
  import huggingface_hub
7
  import os
 
 
 
8
 
9
  app = FastAPI(
10
- title="OmniVLM API",
11
- description="API for text generation using OmniVLM model",
12
- version="1.0.0"
13
  )
14
 
15
  # Download the model from Hugging Face Hub
16
  model_path = huggingface_hub.hf_hub_download(
17
- repo_id="NexaAIDev/OmniVLM-968M",
18
- filename="omnivision-text-optimized-llm-Q8_0.gguf"
19
  )
20
 
21
  # Initialize the model with the downloaded file
22
  llm = Llama(
23
- model_path=model_path,
24
- n_ctx=2048, # Context window
25
- n_threads=4, # Number of CPU threads to use
26
- n_batch=512, # Number of tokens to process in parallel
27
- verbose=True # Enable verbose logging for debugging
28
  )
29
 
30
  class GenerationRequest(BaseModel):
31
- prompt: str
32
- max_tokens: Optional[int] = 100
33
- temperature: Optional[float] = 0.7
34
- top_p: Optional[float] = 0.9
 
 
 
 
 
35
 
36
  class GenerationResponse(BaseModel):
37
- generated_text: str
38
 
39
  @app.post("/generate", response_model=GenerationResponse)
40
  async def generate_text(request: GenerationRequest):
41
- try:
42
- output = llm(
43
- request.prompt,
44
- max_tokens=request.max_tokens,
45
- temperature=request.temperature,
46
- top_p=request.top_p
47
- )
48
-
49
- return GenerationResponse(generated_text=output["choices"][0]["text"])
50
- except Exception as e:
51
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  @app.get("/health")
54
  async def health_check():
55
- return {"status": "healthy"}
56
 
57
  if __name__ == "__main__":
58
- port = int(os.environ.get("PORT", 7860)) # Hugging Face Spaces uses port 7860 by default
59
- uvicorn.run(app, host="0.0.0.0", port=port)
 
1
+ # app.py
2
+ from fastapi import FastAPI, HTTPException, UploadFile, File
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from typing import Optional
6
  import uvicorn
7
  import huggingface_hub
8
  import os
9
+ from PIL import Image
10
+ import io
11
+ import base64
12
 
13
  app = FastAPI(
14
+ title="OmniVLM API",
15
+ description="API for text and image processing using OmniVLM model",
16
+ version="1.0.0"
17
  )
18
 
19
  # Download the model from Hugging Face Hub
20
  model_path = huggingface_hub.hf_hub_download(
21
+ repo_id="NexaAIDev/OmniVLM-968M",
22
+ filename="omnivision-text-optimized-llm-Q8_0.gguf"
23
  )
24
 
25
  # Initialize the model with the downloaded file
26
  llm = Llama(
27
+ model_path=model_path,
28
+ n_ctx=2048,
29
+ n_threads=4,
30
+ n_batch=512,
31
+ verbose=True
32
  )
33
 
34
  class GenerationRequest(BaseModel):
35
+ prompt: str
36
+ max_tokens: Optional[int] = 100
37
+ temperature: Optional[float] = 0.7
38
+ top_p: Optional[float] = 0.9
39
+
40
+ class ImageRequest(BaseModel):
41
+ prompt: Optional[str] = "Describe this image in detail"
42
+ max_tokens: Optional[int] = 200
43
+ temperature: Optional[float] = 0.7
44
 
45
  class GenerationResponse(BaseModel):
46
+ generated_text: str
47
 
48
  @app.post("/generate", response_model=GenerationResponse)
49
  async def generate_text(request: GenerationRequest):
50
+ try:
51
+ output = llm(
52
+ request.prompt,
53
+ max_tokens=request.max_tokens,
54
+ temperature=request.temperature,
55
+ top_p=request.top_p
56
+ )
57
+
58
+ return GenerationResponse(generated_text=output["choices"][0]["text"])
59
+ except Exception as e:
60
+ raise HTTPException(status_code=500, detail=str(e))
61
+
62
+ @app.post("/process-image", response_model=GenerationResponse)
63
+ async def process_image(
64
+ file: UploadFile = File(...),
65
+ request: ImageRequest = None
66
+ ):
67
+ try:
68
+ # Read and validate the image
69
+ image_data = await file.read()
70
+ image = Image.open(io.BytesIO(image_data))
71
+
72
+ # Convert image to base64
73
+ buffered = io.BytesIO()
74
+ image.save(buffered, format=image.format or "JPEG")
75
+ img_str = base64.b64encode(buffered.getvalue()).decode()
76
+
77
+ # Create prompt with image
78
+ prompt = f"""
79
+ <image>data:image/jpeg;base64,{img_str}</image>
80
+ {request.prompt if request else "Describe this image in detail"}
81
+ """
82
+
83
+ # Generate description
84
+ output = llm(
85
+ prompt,
86
+ max_tokens=request.max_tokens if request else 200,
87
+ temperature=request.temperature if request else 0.7
88
+ )
89
+
90
+ return GenerationResponse(generated_text=output["choices"][0]["text"])
91
+ except Exception as e:
92
+ raise HTTPException(status_code=500, detail=str(e))
93
 
94
  @app.get("/health")
95
  async def health_check():
96
+ return {"status": "healthy"}
97
 
98
  if __name__ == "__main__":
99
+ port = int(os.environ.get("PORT", 7860))
100
+ uvicorn.run(app, host="0.0.0.0", port=port)