EagleX-7B-2.25T-Gradio-Demo

Runtime error

App Files Files Community

BlinkDL commited on May 2, 2023

Commit

ab56f98

•

1 Parent(s): b454fd9

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -38

app.py CHANGED Viewed

@@ -87,7 +87,8 @@ def evaluate(
     gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
     print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
     gc.collect()
     torch.cuda.empty_cache()
     yield out_str.strip()
@@ -244,7 +245,7 @@ def chat(
 with gr.Blocks(title=title) as demo:
     gr.HTML(f"<div style=\"text-align: center;\">\n<h1>🐦Raven - {title}</h1>\n</div>")
     with gr.Tab("Instruct mode"):
-        gr.Markdown(f"Raven is [RWKV 7B](https://github.com/BlinkDL/ChatRWKV) 100% RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) finetuned to follow instructions. *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen {ctx_limit}. Finetuned on alpaca, gpt4all, codealpaca and more. For best results, *** keep you prompt short and clear ***. <b>UPDATE: now with Chat (see above, as a tab)</b>.")
         with gr.Row():
             with gr.Column():
                 instruction = gr.Textbox(lines=2, label="Instruction", value="Tell me about ravens.")
@@ -264,42 +265,42 @@ with gr.Blocks(title=title) as demo:
         clear.click(lambda: None, [], [output])
         data.click(lambda x: x, [data], [instruction, input, token_count, temperature, top_p, presence_penalty, count_penalty])
-    with gr.Tab("Chat (Experimental - Might be buggy - use ChatRWKV for reference)"):
-        gr.Markdown(f'''<b>*** The length of response is restricted in this demo. Use ChatRWKV for longer generations. ***</b> Say "go on" or "continue" can sometimes continue the response. If you'd like to edit the scenario, make sure to follow the exact same format: empty lines between (and only between) different speakers. Changes only take effect after you press [Clear]. <b>The default "Bob" & "Alice" names work the best.</b>''', label="Description")
-        with gr.Row():
-            with gr.Column():
-                chatbot = gr.Chatbot()
-                state = gr.State()
-                message = gr.Textbox(label="Message", value="Write me a python code to land on moon.")
-                with gr.Row():
-                    send = gr.Button("Send", variant="primary")
-                    alt = gr.Button("Alternative", variant="secondary")
-                    clear = gr.Button("Clear", variant="secondary")
-            with gr.Column():
-                with gr.Row():
-                    user_name = gr.Textbox(lines=1, max_lines=1, label="User Name", value="Bob")
-                    bot_name = gr.Textbox(lines=1, max_lines=1, label="Bot Name", value="Alice")
-                prompt = gr.Textbox(lines=10, max_lines=50, label="Scenario", value=chat_intro)
-                temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.2)
-                top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.5)
-                presence_penalty = gr.Slider(0.0, 1.0, label="Presence Penalty", step=0.1, value=0.4)
-                count_penalty = gr.Slider(0.0, 1.0, label="Count Penalty", step=0.1, value=0.4)
-        chat_inputs = [
-            prompt,
-            user_name,
-            bot_name,
-            chatbot,
-            state,
-            temperature,
-            top_p,
-            presence_penalty,
-            count_penalty
-        ]
-        chat_outputs = [chatbot, state]
-        message.submit(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
-        send.click(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
-        alt.click(alternative, [chatbot, state], [chatbot, state], queue=False).then(chat, chat_inputs, chat_outputs)
-        clear.click(lambda: ([], None, ""), [], [chatbot, state, message], queue=False)
 demo.queue(concurrency_count=1, max_size=10)
 demo.launch(share=False)

     gpu_info = nvmlDeviceGetMemoryInfo(gpu_h)
     print(f'vram {gpu_info.total} used {gpu_info.used} free {gpu_info.free}')
+    del out
+    del state
     gc.collect()
     torch.cuda.empty_cache()
     yield out_str.strip()
 with gr.Blocks(title=title) as demo:
     gr.HTML(f"<div style=\"text-align: center;\">\n<h1>🐦Raven - {title}</h1>\n</div>")
     with gr.Tab("Instruct mode"):
+        gr.Markdown(f"Raven is [RWKV 7B](https://github.com/BlinkDL/ChatRWKV) 100% RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM) finetuned to follow instructions. *** Please try examples first (bottom of page) *** (edit them to use your question). Demo limited to ctxlen {ctx_limit}. Finetuned on alpaca, gpt4all, codealpaca and more. For best results, *** keep you prompt short and clear ***. <b>UPDATE: now with Chat (see above, as a tab) ==> turn off as of now due to VRAM leak caused by buggy code.</b>.")
         with gr.Row():
             with gr.Column():
                 instruction = gr.Textbox(lines=2, label="Instruction", value="Tell me about ravens.")
         clear.click(lambda: None, [], [output])
         data.click(lambda x: x, [data], [instruction, input, token_count, temperature, top_p, presence_penalty, count_penalty])
+    # with gr.Tab("Chat (Experimental - Might be buggy - use ChatRWKV for reference)"):
+    #     gr.Markdown(f'''<b>*** The length of response is restricted in this demo. Use ChatRWKV for longer generations. ***</b> Say "go on" or "continue" can sometimes continue the response. If you'd like to edit the scenario, make sure to follow the exact same format: empty lines between (and only between) different speakers. Changes only take effect after you press [Clear]. <b>The default "Bob" & "Alice" names work the best.</b>''', label="Description")
+    #     with gr.Row():
+    #         with gr.Column():
+    #             chatbot = gr.Chatbot()
+    #             state = gr.State()
+    #             message = gr.Textbox(label="Message", value="Write me a python code to land on moon.")
+    #             with gr.Row():
+    #                 send = gr.Button("Send", variant="primary")
+    #                 alt = gr.Button("Alternative", variant="secondary")
+    #                 clear = gr.Button("Clear", variant="secondary")
+    #         with gr.Column():
+    #             with gr.Row():
+    #                 user_name = gr.Textbox(lines=1, max_lines=1, label="User Name", value="Bob")
+    #                 bot_name = gr.Textbox(lines=1, max_lines=1, label="Bot Name", value="Alice")
+    #             prompt = gr.Textbox(lines=10, max_lines=50, label="Scenario", value=chat_intro)
+    #             temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.2)
+    #             top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.5)
+    #             presence_penalty = gr.Slider(0.0, 1.0, label="Presence Penalty", step=0.1, value=0.4)
+    #             count_penalty = gr.Slider(0.0, 1.0, label="Count Penalty", step=0.1, value=0.4)
+    #     chat_inputs = [
+    #         prompt,
+    #         user_name,
+    #         bot_name,
+    #         chatbot,
+    #         state,
+    #         temperature,
+    #         top_p,
+    #         presence_penalty,
+    #         count_penalty
+    #     ]
+    #     chat_outputs = [chatbot, state]
+    #     message.submit(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
+    #     send.click(user, [message, chatbot], [message, chatbot], queue=False).then(chat, chat_inputs, chat_outputs)
+    #     alt.click(alternative, [chatbot, state], [chatbot, state], queue=False).then(chat, chat_inputs, chat_outputs)
+    #     clear.click(lambda: ([], None, ""), [], [chatbot, state, message], queue=False)
 demo.queue(concurrency_count=1, max_size=10)
 demo.launch(share=False)