kwabs22
commited on
Commit
·
ee032a8
1
Parent(s):
d9e0520
after bufsize=1 change, exploring word or token level stream
Browse files
app.py
CHANGED
@@ -46,6 +46,7 @@ def generate_response_token_by_token(user_message):
|
|
46 |
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)
|
47 |
|
48 |
start_time = time.time()
|
|
|
49 |
token_buffer = ''
|
50 |
while True:
|
51 |
# Read one character at a time
|
@@ -56,7 +57,8 @@ def generate_response_token_by_token(user_message):
|
|
56 |
token_buffer += char
|
57 |
if char == ' ' or char == '\n': # Token delimiters
|
58 |
elapsed_time = time.time() - start_time # Calculate elapsed time
|
59 |
-
|
|
|
60 |
token_buffer = '' # Reset token buffer
|
61 |
|
62 |
# Yield the last token if there is any
|
|
|
46 |
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)
|
47 |
|
48 |
start_time = time.time()
|
49 |
+
alltokens = ""
|
50 |
token_buffer = ''
|
51 |
while True:
|
52 |
# Read one character at a time
|
|
|
57 |
token_buffer += char
|
58 |
if char == ' ' or char == '\n': # Token delimiters
|
59 |
elapsed_time = time.time() - start_time # Calculate elapsed time
|
60 |
+
alltokens += token_buffer
|
61 |
+
yield f"{alltokens} [Inference time: {elapsed_time:.2f} seconds]"
|
62 |
token_buffer = '' # Reset token buffer
|
63 |
|
64 |
# Yield the last token if there is any
|