Spaces:

ysharma
/

Gradio-demo-streaming

Paused

App Files Files Community

ysharma HF staff commited on Mar 1, 2023

Commit

eed9231

1 Parent(s): 2c12f3b

update code for real streaming

Browse files

Files changed (1) hide show

app.py +45 -1

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import requests
 #Streaming endpoint
 API_URL = os.getenv("API_URL") + "/generate_stream"
-def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
     if not inputs.startswith("User: "):
         inputs = "User: " + inputs + "\n"
     payload = {
@@ -48,6 +48,50 @@ def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
         yield chat, history #resembles {chatbot: chat, state: history}
 title = """<h1 align="center">Streaming your Chatbot output with Gradio</h1>"""
 description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
 ```

 #Streaming endpoint
 API_URL = os.getenv("API_URL") + "/generate_stream"
+def predict_old(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
     if not inputs.startswith("User: "):
         inputs = "User: " + inputs + "\n"
     payload = {
         yield chat, history #resembles {chatbot: chat, state: history}
+def predict(inputs, top_p, temperature, top_k, repetition_penalty, history=[]):
+    if not inputs.startswith("User: "):
+        inputs = "User: " + inputs + "\n"
+    payload = {
+        "inputs": inputs, #"My name is Jane and I",
+        "parameters": {
+            "details": True,
+            "do_sample": True,
+            "max_new_tokens": 100,
+            "repetition_penalty": repetition_penalty, #1.03,
+            "seed": 0,
+            "temperature": temperature, #0.5,
+            "top_k": top_k, #10,
+            "top_p": top_p #0.95
+        }
+    }
+    headers = {
+        'accept': 'text/event-stream',
+        'Content-Type': 'application/json'
+    }
+    history.append(inputs)
+    response = requests.post(API_URL2, headers=headers, json=payload, stream=True)
+    token_counter = 0
+    partial_words = "" #inputs
+    for chunk in response.iter_lines():
+      if chunk:
+        #print(chunk.decode())
+        partial_words = partial_words + json.loads(chunk.decode()[5:])['token']['text']
+        #print(partial_words)
+        time.sleep(0.05)
+        #print([(partial_words, "")])
+        if token_counter == 0:
+          history.append(" " + partial_words)
+        else:
+          history[-1] = partial_words
+        chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ]  # convert to tuples of list
+        #yield [(partial_words, history)]
+        token_counter+=1
+        yield chat, history #{chatbot: chat, state: history}  #[(partial_words, history)]
 title = """<h1 align="center">Streaming your Chatbot output with Gradio</h1>"""
 description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
 ```