⚡️ Limit the number of tokens sent to the backend (#93)
Browse files
.env
CHANGED
@@ -6,6 +6,7 @@ MONGODB_DB_NAME=chat-ui
|
|
6 |
HF_TOKEN=#your huggingface token here
|
7 |
COOKIE_NAME=hf-chat
|
8 |
|
|
|
9 |
PUBLIC_ORIGIN=#https://hf.co
|
10 |
PUBLIC_MODEL_ENDPOINT=https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-6-llama-30b
|
11 |
PUBLIC_MODEL_NAME=OpenAssistant/oasst-sft-6-llama-30b # public facing link
|
|
|
6 |
HF_TOKEN=#your huggingface token here
|
7 |
COOKIE_NAME=hf-chat
|
8 |
|
9 |
+
PUBLIC_MAX_INPUT_TOKENS=1024
|
10 |
PUBLIC_ORIGIN=#https://hf.co
|
11 |
PUBLIC_MODEL_ENDPOINT=https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-6-llama-30b
|
12 |
PUBLIC_MODEL_NAME=OpenAssistant/oasst-sft-6-llama-30b # public facing link
|
src/lib/buildPrompt.ts
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import {
|
2 |
PUBLIC_ASSISTANT_MESSAGE_TOKEN,
|
|
|
3 |
PUBLIC_SEP_TOKEN,
|
4 |
PUBLIC_USER_MESSAGE_TOKEN,
|
5 |
} from "$env/static/public";
|
@@ -11,7 +12,7 @@ import type { Message } from "./types/Message";
|
|
11 |
* <|assistant|>hi<|endoftext|><|prompter|>hello<|endoftext|><|assistant|>
|
12 |
*/
|
13 |
export function buildPrompt(messages: Message[]): string {
|
14 |
-
|
15 |
messages
|
16 |
.map(
|
17 |
(m) =>
|
@@ -20,6 +21,8 @@ export function buildPrompt(messages: Message[]): string {
|
|
20 |
: PUBLIC_ASSISTANT_MESSAGE_TOKEN + m.content) +
|
21 |
(m.content.endsWith(PUBLIC_SEP_TOKEN) ? "" : PUBLIC_SEP_TOKEN)
|
22 |
)
|
23 |
-
.join("") + PUBLIC_ASSISTANT_MESSAGE_TOKEN
|
24 |
-
|
|
|
|
|
25 |
}
|
|
|
1 |
import {
|
2 |
PUBLIC_ASSISTANT_MESSAGE_TOKEN,
|
3 |
+
PUBLIC_MAX_INPUT_TOKENS,
|
4 |
PUBLIC_SEP_TOKEN,
|
5 |
PUBLIC_USER_MESSAGE_TOKEN,
|
6 |
} from "$env/static/public";
|
|
|
12 |
* <|assistant|>hi<|endoftext|><|prompter|>hello<|endoftext|><|assistant|>
|
13 |
*/
|
14 |
export function buildPrompt(messages: Message[]): string {
|
15 |
+
const prompt =
|
16 |
messages
|
17 |
.map(
|
18 |
(m) =>
|
|
|
21 |
: PUBLIC_ASSISTANT_MESSAGE_TOKEN + m.content) +
|
22 |
(m.content.endsWith(PUBLIC_SEP_TOKEN) ? "" : PUBLIC_SEP_TOKEN)
|
23 |
)
|
24 |
+
.join("") + PUBLIC_ASSISTANT_MESSAGE_TOKEN;
|
25 |
+
|
26 |
+
// Not super precise, but it's truncated in the model's backend anyway
|
27 |
+
return prompt.split(" ").slice(-parseInt(PUBLIC_MAX_INPUT_TOKENS)).join(" ");
|
28 |
}
|
src/routes/conversation/[id]/+page.svelte
CHANGED
@@ -8,7 +8,7 @@
|
|
8 |
import { invalidate } from "$app/navigation";
|
9 |
import { base } from "$app/paths";
|
10 |
import { trimSuffix } from "$lib/utils/trimSuffix";
|
11 |
-
import { PUBLIC_SEP_TOKEN } from "$env/static/public";
|
12 |
import { trimPrefix } from "$lib/utils/trimPrefix";
|
13 |
import { shareConversation } from "$lib/shareConversation";
|
14 |
import { UrlDependency } from "$lib/types/UrlDependency";
|
@@ -41,7 +41,7 @@
|
|
41 |
repetition_penalty: 1.2,
|
42 |
top_k: 50,
|
43 |
// @ts-ignore
|
44 |
-
truncate:
|
45 |
watermark: false,
|
46 |
max_new_tokens: 1024,
|
47 |
stop: ["<|endoftext|>"],
|
|
|
8 |
import { invalidate } from "$app/navigation";
|
9 |
import { base } from "$app/paths";
|
10 |
import { trimSuffix } from "$lib/utils/trimSuffix";
|
11 |
+
import { PUBLIC_SEP_TOKEN, PUBLIC_MAX_INPUT_TOKENS } from "$env/static/public";
|
12 |
import { trimPrefix } from "$lib/utils/trimPrefix";
|
13 |
import { shareConversation } from "$lib/shareConversation";
|
14 |
import { UrlDependency } from "$lib/types/UrlDependency";
|
|
|
41 |
repetition_penalty: 1.2,
|
42 |
top_k: 50,
|
43 |
// @ts-ignore
|
44 |
+
truncate: parseInt(PUBLIC_MAX_INPUT_TOKENS),
|
45 |
watermark: false,
|
46 |
max_new_tokens: 1024,
|
47 |
stop: ["<|endoftext|>"],
|
src/routes/conversation/[id]/summarize/+server.ts
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import { HF_TOKEN } from "$env/static/private";
|
2 |
-
import { PUBLIC_MODEL_ENDPOINT } from "$env/static/public";
|
3 |
import { buildPrompt } from "$lib/buildPrompt";
|
4 |
import { collections } from "$lib/server/database.js";
|
5 |
import { textGeneration } from "@huggingface/inference";
|
@@ -33,6 +33,7 @@ export async function POST({ params, locals, fetch }) {
|
|
33 |
top_k: 50,
|
34 |
watermark: false,
|
35 |
max_new_tokens: 1024,
|
|
|
36 |
stop: ["<|endoftext|>"],
|
37 |
return_full_text: false,
|
38 |
};
|
|
|
1 |
import { HF_TOKEN } from "$env/static/private";
|
2 |
+
import { PUBLIC_MAX_INPUT_TOKENS, PUBLIC_MODEL_ENDPOINT } from "$env/static/public";
|
3 |
import { buildPrompt } from "$lib/buildPrompt";
|
4 |
import { collections } from "$lib/server/database.js";
|
5 |
import { textGeneration } from "@huggingface/inference";
|
|
|
33 |
top_k: 50,
|
34 |
watermark: false,
|
35 |
max_new_tokens: 1024,
|
36 |
+
truncate: parseInt(PUBLIC_MAX_INPUT_TOKENS),
|
37 |
stop: ["<|endoftext|>"],
|
38 |
return_full_text: false,
|
39 |
};
|