Yash Sachdeva commited on
Commit
37111b2
·
1 Parent(s): e39bef5

download llama

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -0
  2. question_paper.py +1 -6
Dockerfile CHANGED
@@ -18,6 +18,8 @@ RUN pip install accelerate
18
 
19
  # Install hugging face hub to download llama2 model
20
  RUN pip install --upgrade huggingface_hub
 
 
21
 
22
  RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install 'llama-cpp-python[server]' --upgrade --force-reinstall --no-cache-dir
23
  # Install requirements.txt
 
18
 
19
  # Install hugging face hub to download llama2 model
20
  RUN pip install --upgrade huggingface_hub
21
+ RUN huggingface-cli download TheBloke/Llama-2-7b-Chat-GGUF llama-2-7b.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False
22
+
23
 
24
  RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install 'llama-cpp-python[server]' --upgrade --force-reinstall --no-cache-dir
25
  # Install requirements.txt
question_paper.py CHANGED
@@ -10,16 +10,11 @@ from llama_cpp import Llama
10
 
11
  from huggingface_hub import hf_hub_download
12
 
13
- # Load the model
14
-
15
- hf_hub_download(repo_id="bevangelista/Llama-2-7b-chat-hf-GGUF-Q4_K_M", filename="Llama-2-7b-chat-hf-GGUF-Q4_K_M.gguf")
16
-
17
-
18
  app = FastAPI()
19
  @app.get("/")
20
  def llama():
21
  llm = Llama(
22
- model_path="./Llama-2-7b-chat-hf-GGUF-Q4_K_M.gguf",
23
  # n_gpu_layers=-1, # Uncomment to use GPU acceleration
24
  # seed=1337, # Uncomment to set a specific seed
25
  # n_ctx=2048, # Uncomment to increase the context window
 
10
 
11
  from huggingface_hub import hf_hub_download
12
 
 
 
 
 
 
13
  app = FastAPI()
14
  @app.get("/")
15
  def llama():
16
  llm = Llama(
17
+ model_path="./llama-2-7b.Q4_K_M.gguf",
18
  # n_gpu_layers=-1, # Uncomment to use GPU acceleration
19
  # seed=1337, # Uncomment to set a specific seed
20
  # n_ctx=2048, # Uncomment to increase the context window