BlinkDL commited on
Commit
e2b275b
1 Parent(s): cc93644

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -23
app.py CHANGED
@@ -6,40 +6,30 @@ from pynvml import *
6
  nvmlInit()
7
  gpu_h = nvmlDeviceGetHandleByIndex(0)
8
  ctx_limit = 1536
9
- title = "RWKV-4-Raven-7B-v12-Eng98%-Other2%-20230521-ctx8192"
10
 
11
  os.environ["RWKV_JIT_ON"] = '1'
12
  os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
13
 
14
  from rwkv.model import RWKV
15
- model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-raven", filename=f"{title}.pth")
16
  model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
17
- # model = RWKV(model='D:/ChatRWKV/RWKV-4-Raven-7B-v9-Eng99%-Other1%-20230412-ctx8192.pth', strategy='cuda fp16i8 *10 -> cuda fp16')
18
  from rwkv.utils import PIPELINE, PIPELINE_ARGS
19
  pipeline = PIPELINE(model, "20B_tokenizer.json")
20
 
21
  def generate_prompt(instruction, input=None):
22
- instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
23
- input = input.strip().replace('\r\n','\n').replace('\n\n','\n')
24
  if input:
25
- return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
26
 
27
- # Instruction:
28
- {instruction}
29
 
30
- # Input:
31
- {input}
32
-
33
- # Response:
34
- """
35
  else:
36
- return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
37
-
38
- # Instruction:
39
- {instruction}
40
 
41
- # Response:
42
- """
43
 
44
  def evaluate(
45
  instruction,
@@ -56,8 +46,8 @@ def evaluate(
56
  token_ban = [], # ban the generation of some tokens
57
  token_stop = [0]) # stop generation whenever you see any token here
58
 
59
- instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
60
- input = input.strip().replace('\r\n','\n').replace('\n\n','\n')
61
  ctx = generate_prompt(instruction, input)
62
 
63
  all_tokens = []
@@ -243,9 +233,9 @@ def chat(
243
  ##########################################################################
244
 
245
  with gr.Blocks(title=title) as demo:
246
- gr.HTML(f"<div style=\"text-align: center;\">\n<h1>🐦Raven - {title}</h1>\n</div>")
247
  with gr.Tab("Instruct mode"):
248
- gr.Markdown(f"Raven is [RWKV 7B](https://github.com/BlinkDL/ChatRWKV) 100% RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) finetuned to follow instructions. *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen {ctx_limit}. Finetuned on alpaca, gpt4all, codealpaca and more. For best results, *** keep you prompt short and clear ***. <b>UPDATE: now with Chat (see above, as a tab) ==> turn off as of now due to VRAM leak caused by buggy code.</b>.")
249
  with gr.Row():
250
  with gr.Column():
251
  instruction = gr.Textbox(lines=2, label="Instruction", value="Tell me about ravens.")
 
6
  nvmlInit()
7
  gpu_h = nvmlDeviceGetHandleByIndex(0)
8
  ctx_limit = 1536
9
+ title = "RWKV-4-World-7B-v1-OnlyForTest_84%_trained-20230618-ctx4096"
10
 
11
  os.environ["RWKV_JIT_ON"] = '1'
12
  os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
13
 
14
  from rwkv.model import RWKV
15
+ model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-world", filename=f"{title}.pth")
16
  model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
 
17
  from rwkv.utils import PIPELINE, PIPELINE_ARGS
18
  pipeline = PIPELINE(model, "20B_tokenizer.json")
19
 
20
  def generate_prompt(instruction, input=None):
21
+ instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n').replace('\n\n','\n')
22
+ input = input.strip().replace('\r\n','\n').replace('\n\n','\n').replace('\n\n','\n')
23
  if input:
24
+ return f"""Instruction: {instruction}
25
 
26
+ Input: {input}
 
27
 
28
+ Response:"""
 
 
 
 
29
  else:
30
+ return f"""Question: {instruction}
 
 
 
31
 
32
+ Answer:"""
 
33
 
34
  def evaluate(
35
  instruction,
 
46
  token_ban = [], # ban the generation of some tokens
47
  token_stop = [0]) # stop generation whenever you see any token here
48
 
49
+ instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n').replace('\n\n','\n')
50
+ input = input.strip().replace('\r\n','\n').replace('\n\n','\n').replace('\n\n','\n')
51
  ctx = generate_prompt(instruction, input)
52
 
53
  all_tokens = []
 
233
  ##########################################################################
234
 
235
  with gr.Blocks(title=title) as demo:
236
+ gr.HTML(f"<div style=\"text-align: center;\">\n<h1>🌍World - {title}</h1>\n</div>")
237
  with gr.Tab("Instruct mode"):
238
+ gr.Markdown(f"World is [RWKV 7B](https://github.com/BlinkDL/ChatRWKV) 100% RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) trained on 100+ world languages. *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen {ctx_limit}. Finetuned on alpaca, gpt4all, codealpaca and more. For best results, *** keep you prompt short and clear ***.</b>.") # <b>UPDATE: now with Chat (see above, as a tab) ==> turn off as of now due to VRAM leak caused by buggy code.
239
  with gr.Row():
240
  with gr.Column():
241
  instruction = gr.Textbox(lines=2, label="Instruction", value="Tell me about ravens.")