Spaces:
Running
on
Zero
Running
on
Zero
Limit input length
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from typing import Iterator
|
|
3 |
import gradio as gr
|
4 |
import torch
|
5 |
|
6 |
-
from model import run
|
7 |
|
8 |
DEFAULT_SYSTEM_PROMPT = """\
|
9 |
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
|
@@ -12,6 +12,7 @@ If a question does not make any sense, or is not factually coherent, explain why
|
|
12 |
"""
|
13 |
MAX_MAX_NEW_TOKENS = 2048
|
14 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
|
|
15 |
|
16 |
DESCRIPTION = """
|
17 |
# Llama-2 7B Chat
|
@@ -67,8 +68,7 @@ def generate(
|
|
67 |
raise ValueError
|
68 |
|
69 |
history = history_with_input[:-1]
|
70 |
-
generator = run(message, history, system_prompt, max_new_tokens,
|
71 |
-
temperature, top_p, top_k)
|
72 |
try:
|
73 |
first_response = next(generator)
|
74 |
yield history + [(message, first_response)]
|
@@ -79,13 +79,18 @@ def generate(
|
|
79 |
|
80 |
|
81 |
def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
|
82 |
-
generator = generate(message, [], DEFAULT_SYSTEM_PROMPT, 1024, 0.95, 1,
|
83 |
-
1000)
|
84 |
for x in generator:
|
85 |
pass
|
86 |
return '', x
|
87 |
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
with gr.Blocks(css='style.css') as demo:
|
90 |
gr.Markdown(DESCRIPTION)
|
91 |
gr.DuplicateButton(value='Duplicate Space for private use',
|
@@ -173,6 +178,11 @@ with gr.Blocks(css='style.css') as demo:
|
|
173 |
api_name=False,
|
174 |
queue=False,
|
175 |
).then(
|
|
|
|
|
|
|
|
|
|
|
176 |
fn=generate,
|
177 |
inputs=[
|
178 |
saved_input,
|
@@ -200,6 +210,11 @@ with gr.Blocks(css='style.css') as demo:
|
|
200 |
api_name=False,
|
201 |
queue=False,
|
202 |
).then(
|
|
|
|
|
|
|
|
|
|
|
203 |
fn=generate,
|
204 |
inputs=[
|
205 |
saved_input,
|
|
|
3 |
import gradio as gr
|
4 |
import torch
|
5 |
|
6 |
+
from model import get_prompt, run
|
7 |
|
8 |
DEFAULT_SYSTEM_PROMPT = """\
|
9 |
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
|
|
|
12 |
"""
|
13 |
MAX_MAX_NEW_TOKENS = 2048
|
14 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
15 |
+
MAX_WORD_NUM = 3000
|
16 |
|
17 |
DESCRIPTION = """
|
18 |
# Llama-2 7B Chat
|
|
|
68 |
raise ValueError
|
69 |
|
70 |
history = history_with_input[:-1]
|
71 |
+
generator = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
|
|
|
72 |
try:
|
73 |
first_response = next(generator)
|
74 |
yield history + [(message, first_response)]
|
|
|
79 |
|
80 |
|
81 |
def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
|
82 |
+
generator = generate(message, [], DEFAULT_SYSTEM_PROMPT, 1024, 0.95, 1, 50)
|
|
|
83 |
for x in generator:
|
84 |
pass
|
85 |
return '', x
|
86 |
|
87 |
|
88 |
+
def check_prompt_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> None:
|
89 |
+
prompt = get_prompt(message, chat_history, system_prompt)
|
90 |
+
if len(prompt.split()) > MAX_WORD_NUM:
|
91 |
+
raise gr.Error('The accumulated input is too long. Clear your chat history and try again.')
|
92 |
+
|
93 |
+
|
94 |
with gr.Blocks(css='style.css') as demo:
|
95 |
gr.Markdown(DESCRIPTION)
|
96 |
gr.DuplicateButton(value='Duplicate Space for private use',
|
|
|
178 |
api_name=False,
|
179 |
queue=False,
|
180 |
).then(
|
181 |
+
fn=check_prompt_length,
|
182 |
+
inputs=[saved_input, chatbot, system_prompt],
|
183 |
+
api_name=False,
|
184 |
+
queue=False,
|
185 |
+
).success(
|
186 |
fn=generate,
|
187 |
inputs=[
|
188 |
saved_input,
|
|
|
210 |
api_name=False,
|
211 |
queue=False,
|
212 |
).then(
|
213 |
+
fn=check_prompt_length,
|
214 |
+
inputs=[saved_input, chatbot, system_prompt],
|
215 |
+
api_name=False,
|
216 |
+
queue=False,
|
217 |
+
).success(
|
218 |
fn=generate,
|
219 |
inputs=[
|
220 |
saved_input,
|