Spaces:
Build error
Build error
fix stop tokens to match new prompt formatting, stream instruct response, add comments about concurrency to config
Browse files- config.yml +3 -2
- tabbed.py +6 -3
config.yml
CHANGED
@@ -10,7 +10,8 @@ chat:
|
|
10 |
stop:
|
11 |
- "</s>"
|
12 |
- "<unk>"
|
13 |
-
- "###
|
|
|
14 |
queue:
|
15 |
max_size: 16
|
16 |
-
concurrency_count: 1
|
|
|
10 |
stop:
|
11 |
- "</s>"
|
12 |
- "<unk>"
|
13 |
+
- "### USER:"
|
14 |
+
- "USER:"
|
15 |
queue:
|
16 |
max_size: 16
|
17 |
+
concurrency_count: 1 # leave this at 1, llama-cpp-python doesn't handle concurrent requests and will crash the entire app
|
tabbed.py
CHANGED
@@ -49,7 +49,7 @@ def chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_
|
|
49 |
):
|
50 |
answer = output['choices'][0]['text']
|
51 |
history[-1][1] += answer
|
52 |
-
|
53 |
yield history, history
|
54 |
|
55 |
|
@@ -66,8 +66,11 @@ start_message = """
|
|
66 |
|
67 |
|
68 |
def generate_text_instruct(input_text):
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
71 |
|
72 |
|
73 |
instruct_interface = gr.Interface(
|
|
|
49 |
):
|
50 |
answer = output['choices'][0]['text']
|
51 |
history[-1][1] += answer
|
52 |
+
# stream the response
|
53 |
yield history, history
|
54 |
|
55 |
|
|
|
66 |
|
67 |
|
68 |
def generate_text_instruct(input_text):
|
69 |
+
response = ""
|
70 |
+
for output in llm(f"### Instruction:\n{input_text}\n\n### Response:\n", echo=False, **config['chat']):
|
71 |
+
answer = output['choices'][0]['text']
|
72 |
+
response += answer
|
73 |
+
yield response
|
74 |
|
75 |
|
76 |
instruct_interface = gr.Interface(
|