sksstudio commited on
Commit
e0172c2
·
1 Parent(s): bf190b6

Add application file

Browse files
Files changed (2) hide show
  1. app.py +12 -3
  2. requirements.txt +2 -1
app.py CHANGED
@@ -3,6 +3,8 @@ from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from typing import Optional
5
  import uvicorn
 
 
6
 
7
  app = FastAPI(
8
  title="OmniVLM API",
@@ -10,10 +12,17 @@ app = FastAPI(
10
  version="1.0.0"
11
  )
12
 
13
- # Initialize the model
14
- llm = Llama.from_pretrained(
15
  repo_id="NexaAIDev/OmniVLM-968M",
16
- filename="omnivision-text-optimized-llm-Q8_0.gguf",
 
 
 
 
 
 
 
17
  )
18
 
19
  class GenerationRequest(BaseModel):
 
3
  from llama_cpp import Llama
4
  from typing import Optional
5
  import uvicorn
6
+ import os
7
+ import huggingface_hub
8
 
9
  app = FastAPI(
10
  title="OmniVLM API",
 
12
  version="1.0.0"
13
  )
14
 
15
+ # Download the model from Hugging Face Hub
16
+ model_path = huggingface_hub.hf_hub_download(
17
  repo_id="NexaAIDev/OmniVLM-968M",
18
+ filename="omnivision-text-optimized-llm-Q8_0.gguf"
19
+ )
20
+
21
+ # Initialize the model with the downloaded file
22
+ llm = Llama(
23
+ model_path=model_path,
24
+ n_ctx=2048, # Context window
25
+ n_threads=4 # Number of CPU threads to use
26
  )
27
 
28
  class GenerationRequest(BaseModel):
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  fastapi==0.104.1
2
  uvicorn==0.24.0
3
  pydantic==2.4.2
4
- llama-cpp-python==0.1.76
 
 
1
  fastapi==0.104.1
2
  uvicorn==0.24.0
3
  pydantic==2.4.2
4
+ llama-cpp-python==0.1.76
5
+ huggingface-hub>=0.19.0