sksstudio
commited on
Commit
·
e0172c2
1
Parent(s):
bf190b6
Add application file
Browse files- app.py +12 -3
- requirements.txt +2 -1
app.py
CHANGED
@@ -3,6 +3,8 @@ from pydantic import BaseModel
|
|
3 |
from llama_cpp import Llama
|
4 |
from typing import Optional
|
5 |
import uvicorn
|
|
|
|
|
6 |
|
7 |
app = FastAPI(
|
8 |
title="OmniVLM API",
|
@@ -10,10 +12,17 @@ app = FastAPI(
|
|
10 |
version="1.0.0"
|
11 |
)
|
12 |
|
13 |
-
#
|
14 |
-
|
15 |
repo_id="NexaAIDev/OmniVLM-968M",
|
16 |
-
filename="omnivision-text-optimized-llm-Q8_0.gguf"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
)
|
18 |
|
19 |
class GenerationRequest(BaseModel):
|
|
|
3 |
from llama_cpp import Llama
|
4 |
from typing import Optional
|
5 |
import uvicorn
|
6 |
+
import os
|
7 |
+
import huggingface_hub
|
8 |
|
9 |
app = FastAPI(
|
10 |
title="OmniVLM API",
|
|
|
12 |
version="1.0.0"
|
13 |
)
|
14 |
|
15 |
+
# Download the model from Hugging Face Hub
|
16 |
+
model_path = huggingface_hub.hf_hub_download(
|
17 |
repo_id="NexaAIDev/OmniVLM-968M",
|
18 |
+
filename="omnivision-text-optimized-llm-Q8_0.gguf"
|
19 |
+
)
|
20 |
+
|
21 |
+
# Initialize the model with the downloaded file
|
22 |
+
llm = Llama(
|
23 |
+
model_path=model_path,
|
24 |
+
n_ctx=2048, # Context window
|
25 |
+
n_threads=4 # Number of CPU threads to use
|
26 |
)
|
27 |
|
28 |
class GenerationRequest(BaseModel):
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
fastapi==0.104.1
|
2 |
uvicorn==0.24.0
|
3 |
pydantic==2.4.2
|
4 |
-
llama-cpp-python==0.1.76
|
|
|
|
1 |
fastapi==0.104.1
|
2 |
uvicorn==0.24.0
|
3 |
pydantic==2.4.2
|
4 |
+
llama-cpp-python==0.1.76
|
5 |
+
huggingface-hub>=0.19.0
|