BlinkDL commited on
Commit
2d6fa0c
·
1 Parent(s): 61b9ff7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -6,7 +6,7 @@ from pynvml import *
6
  nvmlInit()
7
  gpu_h = nvmlDeviceGetHandleByIndex(0)
8
 
9
- title = "RWKV-4 14B fp16 ctx1024"
10
  desc = '''Links:
11
  <a href='https://github.com/BlinkDL/ChatRWKV' target="_blank" style="margin:0 1em">ChatRWKV</a>
12
  <a href='https://github.com/BlinkDL/RWKV-LM' target="_blank" style="margin:0 1em">RWKV-LM</a>
@@ -17,10 +17,10 @@ os.environ["RWKV_JIT_ON"] = '1'
17
  os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
18
 
19
  from rwkv.model import RWKV
20
- # model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-14b", filename="RWKV-4-Pile-14B-20230213-8019.pth")
21
- # model = RWKV(model=model_path, strategy='cuda fp16 *34 -> cpu fp32')
22
- model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-169m", filename="RWKV-4-Pile-169M-20220807-8023.pth")
23
- model = RWKV(model=model_path, strategy='cuda fp16')
24
  from rwkv.utils import PIPELINE, PIPELINE_ARGS
25
  pipeline = PIPELINE(model, "20B_tokenizer.json")
26
 
@@ -53,7 +53,7 @@ def infer(
53
  occurrence = {}
54
  state = None
55
  for i in range(int(token_count)):
56
- out, state = model.forward(pipeline.encode(ctx)[:1024] if i == 0 else [token], state)
57
  for n in args.token_ban:
58
  out[n] = -float('inf')
59
  for n in occurrence:
 
6
  nvmlInit()
7
  gpu_h = nvmlDeviceGetHandleByIndex(0)
8
 
9
+ title = "RWKV-4 14B fp16 (DEMO, limited to ctxlen 824)"
10
  desc = '''Links:
11
  <a href='https://github.com/BlinkDL/ChatRWKV' target="_blank" style="margin:0 1em">ChatRWKV</a>
12
  <a href='https://github.com/BlinkDL/RWKV-LM' target="_blank" style="margin:0 1em">RWKV-LM</a>
 
17
  os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
18
 
19
  from rwkv.model import RWKV
20
+ model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-14b", filename="RWKV-4-Pile-14B-20230213-8019.pth")
21
+ model = RWKV(model=model_path, strategy='cuda fp16 *34 -> cpu fp32')
22
+ # model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-169m", filename="RWKV-4-Pile-169M-20220807-8023.pth")
23
+ # model = RWKV(model=model_path, strategy='cuda fp16')
24
  from rwkv.utils import PIPELINE, PIPELINE_ARGS
25
  pipeline = PIPELINE(model, "20B_tokenizer.json")
26
 
 
53
  occurrence = {}
54
  state = None
55
  for i in range(int(token_count)):
56
+ out, state = model.forward(pipeline.encode(ctx)[:824] if i == 0 else [token], state)
57
  for n in args.token_ban:
58
  out[n] = -float('inf')
59
  for n in occurrence: