ldhldh commited on
Commit
123c1d1
1 Parent(s): 48267a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from threading import Thread
2
  from llama_cpp import Llama
3
  import torch
@@ -9,13 +11,13 @@ print("Running on device:", torch_device)
9
  print("CPU threads:", torch.get_num_threads())
10
 
11
  llm = Llama(model_path = 'Llama-2-ko-7B-chat-gguf-q4_0.bin',
12
- n_ctx=100,
13
  n_threads = 8,
14
- n_batch = 100
15
  )
16
 
17
  def gen(x, max_new_tokens):
18
- output = llm(f"Q: {x} A: ", max_tokens=max_new_tokens, stop=["Q:", "\n"], echo=True)
19
 
20
  return output['choices'][0]['text'].replace('▁',' ')
21
 
 
1
+ !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir
2
+
3
  from threading import Thread
4
  from llama_cpp import Llama
5
  import torch
 
11
  print("CPU threads:", torch.get_num_threads())
12
 
13
  llm = Llama(model_path = 'Llama-2-ko-7B-chat-gguf-q4_0.bin',
14
+ n_ctx=50,
15
  n_threads = 8,
16
+ n_batch = 5
17
  )
18
 
19
  def gen(x, max_new_tokens):
20
+ output = llm(f"### 명령어:\n{x}\n\n### 응답:", max_tokens=max_new_tokens, stop=["###", "\n", ":"], echo=True)
21
 
22
  return output['choices'][0]['text'].replace('▁',' ')
23