sksstudio
commited on
Commit
·
59ee418
1
Parent(s):
ca0012e
Add application file twoo5
Browse files- app.py +4 -2
- requirements.txt +1 -1
app.py
CHANGED
@@ -20,8 +20,10 @@ model_path = huggingface_hub.hf_hub_download(
|
|
20 |
# Initialize the model with the downloaded file
|
21 |
llm = Llama(
|
22 |
model_path=model_path,
|
23 |
-
n_ctx=2048,
|
24 |
-
n_threads=4
|
|
|
|
|
25 |
)
|
26 |
|
27 |
class GenerationRequest(BaseModel):
|
|
|
20 |
# Initialize the model with the downloaded file
|
21 |
llm = Llama(
|
22 |
model_path=model_path,
|
23 |
+
n_ctx=2048, # Context window
|
24 |
+
n_threads=4, # Number of CPU threads to use
|
25 |
+
n_batch=512, # Number of tokens to process in parallel
|
26 |
+
verbose=True # Enable verbose logging for debugging
|
27 |
)
|
28 |
|
29 |
class GenerationRequest(BaseModel):
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
fastapi==0.104.1
|
2 |
uvicorn==0.24.0
|
3 |
pydantic==2.4.2
|
4 |
-
llama-cpp-python
|
5 |
huggingface-hub>=0.19.0
|
|
|
1 |
fastapi==0.104.1
|
2 |
uvicorn==0.24.0
|
3 |
pydantic==2.4.2
|
4 |
+
llama-cpp-python>=0.2.20
|
5 |
huggingface-hub>=0.19.0
|