Spaces:
Runtime error
Runtime error
Yash Sachdeva
commited on
Commit
·
37111b2
1
Parent(s):
e39bef5
download llama
Browse files- Dockerfile +2 -0
- question_paper.py +1 -6
Dockerfile
CHANGED
@@ -18,6 +18,8 @@ RUN pip install accelerate
|
|
18 |
|
19 |
# Install hugging face hub to download llama2 model
|
20 |
RUN pip install --upgrade huggingface_hub
|
|
|
|
|
21 |
|
22 |
RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install 'llama-cpp-python[server]' --upgrade --force-reinstall --no-cache-dir
|
23 |
# Install requirements.txt
|
|
|
18 |
|
19 |
# Install hugging face hub to download llama2 model
|
20 |
RUN pip install --upgrade huggingface_hub
|
21 |
+
RUN huggingface-cli download TheBloke/Llama-2-7b-Chat-GGUF llama-2-7b.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False
|
22 |
+
|
23 |
|
24 |
RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install 'llama-cpp-python[server]' --upgrade --force-reinstall --no-cache-dir
|
25 |
# Install requirements.txt
|
question_paper.py
CHANGED
@@ -10,16 +10,11 @@ from llama_cpp import Llama
|
|
10 |
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
|
13 |
-
# Load the model
|
14 |
-
|
15 |
-
hf_hub_download(repo_id="bevangelista/Llama-2-7b-chat-hf-GGUF-Q4_K_M", filename="Llama-2-7b-chat-hf-GGUF-Q4_K_M.gguf")
|
16 |
-
|
17 |
-
|
18 |
app = FastAPI()
|
19 |
@app.get("/")
|
20 |
def llama():
|
21 |
llm = Llama(
|
22 |
-
model_path="./
|
23 |
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
|
24 |
# seed=1337, # Uncomment to set a specific seed
|
25 |
# n_ctx=2048, # Uncomment to increase the context window
|
|
|
10 |
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
app = FastAPI()
|
14 |
@app.get("/")
|
15 |
def llama():
|
16 |
llm = Llama(
|
17 |
+
model_path="./llama-2-7b.Q4_K_M.gguf",
|
18 |
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
|
19 |
# seed=1337, # Uncomment to set a specific seed
|
20 |
# n_ctx=2048, # Uncomment to increase the context window
|