Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -35,7 +35,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
35 |
tokenizer = AutoTokenizer.from_pretrained("THUDM/LongWriter-glm4-9b",trust_remote_code=True)
|
36 |
|
37 |
|
38 |
-
@spaces.GPU
|
39 |
def stream_chat(message: str, history: list, temperature: float, max_length: int):
|
40 |
print(f'message is - {message}')
|
41 |
print(f'history is - {history}')
|
@@ -61,13 +61,12 @@ def stream_chat(message: str, history: list, temperature: float, max_length: int
|
|
61 |
)
|
62 |
gen_kwargs = {**input_ids, **generate_kwargs}
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
yield buffer
|
71 |
|
72 |
chatbot = gr.Chatbot(height=600, placeholder = PLACEHOLDER)
|
73 |
|
|
|
35 |
tokenizer = AutoTokenizer.from_pretrained("THUDM/LongWriter-glm4-9b",trust_remote_code=True)
|
36 |
|
37 |
|
38 |
+
@spaces.GPU()
|
39 |
def stream_chat(message: str, history: list, temperature: float, max_length: int):
|
40 |
print(f'message is - {message}')
|
41 |
print(f'history is - {history}')
|
|
|
61 |
)
|
62 |
gen_kwargs = {**input_ids, **generate_kwargs}
|
63 |
|
64 |
+
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
65 |
+
thread.start()
|
66 |
+
buffer = ""
|
67 |
+
for new_text in streamer:
|
68 |
+
buffer += new_text
|
69 |
+
yield buffer
|
|
|
70 |
|
71 |
chatbot = gr.Chatbot(height=600, placeholder = PLACEHOLDER)
|
72 |
|