govisi commited on
Commit
804e6d8
·
1 Parent(s): dd5a827

adding rwkv

Browse files
Files changed (4) hide show
  1. app.py +32 -1
  2. chatbot.py +21 -0
  3. requirements copy.txt +8 -0
  4. requirements.txt +9 -3
app.py CHANGED
@@ -4,12 +4,23 @@ import discord
4
  from discord.ext import commands
5
  from dotenv import load_dotenv
6
  from threading import Thread
7
-
 
 
8
 
9
  load_dotenv()
10
  bot = commands.Bot("", intents=discord.Intents.all())
11
 
12
  # model_pipe = pickle.load(open('pipe.pkl', 'rb'))
 
 
 
 
 
 
 
 
 
13
 
14
  @bot.event
15
  async def on_ready():
@@ -20,6 +31,26 @@ async def on_message_event(message):
20
  if message.author == bot.user:
21
  return
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  if message.content:
24
  print(message.content)
25
  # await message.channel.send(model_pipe.predict([message.content]))
 
4
  from discord.ext import commands
5
  from dotenv import load_dotenv
6
  from threading import Thread
7
+ from rwkvstic.load import RWKV
8
+ from rwkvstic.agnostic.backends import TORCH
9
+ import torch
10
 
11
  load_dotenv()
12
  bot = commands.Bot("", intents=discord.Intents.all())
13
 
14
  # model_pipe = pickle.load(open('pipe.pkl', 'rb'))
15
+ # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
16
+ runtimedtype = torch.float32 # torch.float64, torch.bfloat16
17
+
18
+ # this is the dtype used for matrix-vector operations, and is the dtype that will determine the performance and memory usage of the model
19
+ dtype = torch.bfloat16 # torch.float32, torch.float64, torch.bfloat16
20
+
21
+ useGPU = False # False
22
+
23
+ model = RWKV("RWKV-4-Pile-3B-Instruct-test2-20230209.pth", mode=TORCH, useGPU=useGPU, runtimedtype=runtimedtype, dtype=dtype)
24
 
25
  @bot.event
26
  async def on_ready():
 
31
  if message.author == bot.user:
32
  return
33
 
34
+ if message.content.startswith("!rwkv "):
35
+ mess = await message.channel.send("Loading...")
36
+ model.resetState()
37
+ model.loadContext(
38
+ newctx=f"\n\nQuestion: {message.content[6:]}\n\nExpert Long Detailed Response: ")
39
+ tex = ""
40
+ for i in range(10):
41
+ print(i)
42
+ curr = model.forward(number=10)[
43
+ "output"]
44
+ tex = tex + curr
45
+ print(curr)
46
+
47
+ if ("<|endoftext|>" in curr):
48
+ break
49
+ mess = await mess.edit(content=tex)
50
+
51
+ await asyncio.sleep(1)
52
+ await mess.edit(content=tex)
53
+
54
  if message.content:
55
  print(message.content)
56
  # await message.channel.send(model_pipe.predict([message.content]))
chatbot.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rwkvstic.load import RWKV
2
+ from rwkvstic.agnostic.backends import TORCH
3
+ import torch
4
+ import os
5
+
6
+ os.environ["RWKV_JIT_ON"] = '1'
7
+ os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
8
+ # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
9
+ runtimedtype = torch.float32 # torch.float64, torch.bfloat16
10
+
11
+ # this is the dtype used for matrix-vector operations, and is the dtype that will determine the performance and memory usage of the model
12
+ dtype = torch.bfloat16 # torch.float32, torch.float64, torch.bfloat16
13
+
14
+ useGPU = False # False
15
+
16
+ model = RWKV("RWKV-4-Pile-3B-Instruct-test2-20230209.pth", mode=TORCH, useGPU=useGPU, runtimedtype=runtimedtype, dtype=dtype)
17
+
18
+ model.loadContext(newctx=f"Q: How many hours are there in a day?\n\nA:")
19
+ output = model.forward(number=100)["output"]
20
+
21
+ print(output)
requirements copy.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ discord.py==2.1.1
2
+ python-dotenv==1.0.0
3
+ rwkvstic==0.7.0
4
+ torch==1.13.1
5
+ inquirer==3.1.2
6
+ scipy==1.10.1
7
+ nvidia-cuda-runtime-cu12==12.0.146
8
+ --extra-index-url https://pypi.ngc.nvidia.com
requirements.txt CHANGED
@@ -1,4 +1,10 @@
 
 
1
  discord==2.1.0
2
- requests
3
- asyncio==3.4.3
4
- python-dotenv==1.0.0
 
 
 
 
 
1
+ # Automatically generated by https://github.com/damnever/pigar.
2
+
3
  discord==2.1.0
4
+ python-dotenv==0.21.1
5
+ rwkvstic==0.7.0
6
+ torch==1.13.1+cu116
7
+ inquirer==3.1.2
8
+ scipy==1.10.1
9
+ nvidia-cuda-runtime-cu12==12.0.146
10
+ --extra-index-url https://pypi.ngc.nvidia.com