update code for real streaming
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ import requests
|
|
7 |
#Streaming endpoint
|
8 |
API_URL = os.getenv("API_URL") + "/generate_stream"
|
9 |
|
10 |
-
def
|
11 |
if not inputs.startswith("User: "):
|
12 |
inputs = "User: " + inputs + "\n"
|
13 |
payload = {
|
@@ -48,6 +48,50 @@ def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
|
|
48 |
|
49 |
yield chat, history #resembles {chatbot: chat, state: history}
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
title = """<h1 align="center">Streaming your Chatbot output with Gradio</h1>"""
|
52 |
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
|
53 |
```
|
|
|
7 |
#Streaming endpoint
|
8 |
API_URL = os.getenv("API_URL") + "/generate_stream"
|
9 |
|
10 |
+
def predict_old(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
|
11 |
if not inputs.startswith("User: "):
|
12 |
inputs = "User: " + inputs + "\n"
|
13 |
payload = {
|
|
|
48 |
|
49 |
yield chat, history #resembles {chatbot: chat, state: history}
|
50 |
|
51 |
+
|
52 |
+
def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
|
53 |
+
if not inputs.startswith("User: "):
|
54 |
+
inputs = "User: " + inputs + "\n"
|
55 |
+
payload = {
|
56 |
+
"inputs": inputs, #"My name is Jane and I",
|
57 |
+
"parameters": {
|
58 |
+
"details": True,
|
59 |
+
"do_sample": True,
|
60 |
+
"max_new_tokens": 100,
|
61 |
+
"repetition_penalty": repetition_penalty, #1.03,
|
62 |
+
"seed": 0,
|
63 |
+
"temperature": temperature, #0.5,
|
64 |
+
"top_k": top_k, #10,
|
65 |
+
"top_p": top_p #0.95
|
66 |
+
}
|
67 |
+
}
|
68 |
+
|
69 |
+
headers = {
|
70 |
+
'accept': 'text/event-stream',
|
71 |
+
'Content-Type': 'application/json'
|
72 |
+
}
|
73 |
+
|
74 |
+
history.append(inputs)
|
75 |
+
response = requests.post(API_URL2, headers=headers, json=payload, stream=True)
|
76 |
+
token_counter = 0
|
77 |
+
partial_words = "" #inputs
|
78 |
+
for chunk in response.iter_lines():
|
79 |
+
if chunk:
|
80 |
+
#print(chunk.decode())
|
81 |
+
partial_words = partial_words + json.loads(chunk.decode()[5:])['token']['text']
|
82 |
+
#print(partial_words)
|
83 |
+
time.sleep(0.05)
|
84 |
+
#print([(partial_words, "")])
|
85 |
+
if token_counter == 0:
|
86 |
+
history.append(" " + partial_words)
|
87 |
+
else:
|
88 |
+
history[-1] = partial_words
|
89 |
+
chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
|
90 |
+
#yield [(partial_words, history)]
|
91 |
+
token_counter+=1
|
92 |
+
yield chat, history #{chatbot: chat, state: history} #[(partial_words, history)]
|
93 |
+
|
94 |
+
|
95 |
title = """<h1 align="center">Streaming your Chatbot output with Gradio</h1>"""
|
96 |
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
|
97 |
```
|