Spaces:

yash009
/

textgeneration

Runtime error

Yash Sachdeva commited on Mar 11, 2024

Commit

37111b2

1 Parent(s): e39bef5

download llama

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -18,6 +18,8 @@ RUN pip install accelerate
 # Install hugging face hub to download llama2 model
 RUN pip install --upgrade huggingface_hub
 RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install 'llama-cpp-python[server]' --upgrade --force-reinstall --no-cache-dir
 # Install requirements.txt

 # Install hugging face hub to download llama2 model
 RUN pip install --upgrade huggingface_hub
+RUN huggingface-cli download TheBloke/Llama-2-7b-Chat-GGUF llama-2-7b.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False
 RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install 'llama-cpp-python[server]' --upgrade --force-reinstall --no-cache-dir
 # Install requirements.txt

question_paper.py CHANGED Viewed

@@ -10,16 +10,11 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-# Load the model
-hf_hub_download(repo_id="bevangelista/Llama-2-7b-chat-hf-GGUF-Q4_K_M", filename="Llama-2-7b-chat-hf-GGUF-Q4_K_M.gguf")
 app = FastAPI()
 @app.get("/")
 def llama():
     llm = Llama(
-      model_path="./Llama-2-7b-chat-hf-GGUF-Q4_K_M.gguf",
       # n_gpu_layers=-1, # Uncomment to use GPU acceleration
       # seed=1337, # Uncomment to set a specific seed
       # n_ctx=2048, # Uncomment to increase the context window

 from huggingface_hub import hf_hub_download
 app = FastAPI()
 @app.get("/")
 def llama():
     llm = Llama(
+      model_path="./llama-2-7b.Q4_K_M.gguf",
       # n_gpu_layers=-1, # Uncomment to use GPU acceleration
       # seed=1337, # Uncomment to set a specific seed
       # n_ctx=2048, # Uncomment to increase the context window