Spaces:

Utiric
/

omniVLM

Running

sksstudio commited on Jan 28

Commit

59ee418

1 Parent(s): ca0012e

Add application file twoo5

Files changed (2) hide show

app.py CHANGED Viewed

@@ -20,8 +20,10 @@ model_path = huggingface_hub.hf_hub_download(
 # Initialize the model with the downloaded file
 llm = Llama(
 	model_path=model_path,
-	n_ctx=2048,  # Context window
-	n_threads=4   # Number of CPU threads to use
 )
 class GenerationRequest(BaseModel):

 # Initialize the model with the downloaded file
 llm = Llama(
 	model_path=model_path,
+	n_ctx=2048,        # Context window
+	n_threads=4,       # Number of CPU threads to use
+	n_batch=512,       # Number of tokens to process in parallel
+	verbose=True       # Enable verbose logging for debugging
 )
 class GenerationRequest(BaseModel):

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 fastapi==0.104.1
 uvicorn==0.24.0
 pydantic==2.4.2
-llama-cpp-python==0.1.76
 huggingface-hub>=0.19.0

 fastapi==0.104.1
 uvicorn==0.24.0
 pydantic==2.4.2
+llama-cpp-python>=0.2.20
 huggingface-hub>=0.19.0