Add gemma model to prod config (#854)
Browse files* Add `google/gemma-7b-it` to production config
* Add gemma prompt format to PROMPTS.md
* Make sampling parameters optional
- .env.template +28 -1
- PROMPTS.md +6 -0
- src/lib/server/models.ts +2 -2
.env.template
CHANGED
@@ -31,7 +31,34 @@ MODELS=`[
|
|
31 |
}
|
32 |
]
|
33 |
},
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
"name": "meta-llama/Llama-2-70b-chat-hf",
|
36 |
"description": "The latest and biggest model from Meta, fine-tuned for chat.",
|
37 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/meta-logo.png",
|
|
|
31 |
}
|
32 |
]
|
33 |
},
|
34 |
+
{
|
35 |
+
"name" : "google/gemma-7b-it",
|
36 |
+
"description": "Gemma 7B is from a family of lightweight models from Google built from the same research and technology used to create the Gemini models.",
|
37 |
+
"websiteUrl" : "https://add-link-to-their-comms-here.com/",
|
38 |
+
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/google-logo.png",
|
39 |
+
"modelUrl": "https://huggingface.co/google/gemma-7b-it",
|
40 |
+
"preprompt": "",
|
41 |
+
"chatPromptTemplate" : "{{#each messages}}{{#ifUser}}<start_of_turn>user\n{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}}<end_of_turn>\n<start_of_turn>model\n{{/ifUser}}{{#ifAssistant}}{{content}}<end_of_turn>\n{{/ifAssistant}}{{/each}}",
|
42 |
+
"promptExamples": [
|
43 |
+
{
|
44 |
+
"title": "Write an email from bullet list",
|
45 |
+
"prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
|
46 |
+
}, {
|
47 |
+
"title": "Code a snake game",
|
48 |
+
"prompt": "Code a basic snake game in python, give explanations for each step."
|
49 |
+
}, {
|
50 |
+
"title": "Assist in a task",
|
51 |
+
"prompt": "How do I make a delicious lemon cheesecake?"
|
52 |
+
}
|
53 |
+
],
|
54 |
+
"parameters": {
|
55 |
+
"do_sample": true,
|
56 |
+
"truncate": 7168,
|
57 |
+
"max_new_tokens": 1024,
|
58 |
+
"stop" : ["<end_of_turn>"]
|
59 |
+
}
|
60 |
+
},
|
61 |
+
{
|
62 |
"name": "meta-llama/Llama-2-70b-chat-hf",
|
63 |
"description": "The latest and biggest model from Meta, fine-tuned for chat.",
|
64 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/meta-logo.png",
|
PROMPTS.md
CHANGED
@@ -61,3 +61,9 @@ System: {{preprompt}}\nUser:{{#each messages}}{{#ifUser}}{{content}}\nFalcon:{{/
|
|
61 |
```env
|
62 |
<s>{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} <step> {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} <step> {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} <step> {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ``
|
63 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
```env
|
62 |
<s>{{#if @root.preprompt}}Source: system\n\n {{@root.preprompt}} <step> {{/if}}{{#each messages}}{{#ifUser}}Source: user\n\n {{content}} <step> {{/ifUser}}{{#ifAssistant}}Source: assistant\n\n {{content}} <step> {{/ifAssistant}}{{/each}}Source: assistant\nDestination: user\n\n ``
|
63 |
```
|
64 |
+
|
65 |
+
## Gemma
|
66 |
+
|
67 |
+
```env
|
68 |
+
{{#each messages}}{{#ifUser}}<start_of_turn>user\n{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}}<end_of_turn>\n<start_of_turn>model\n{{/ifUser}}{{#ifAssistant}}{{content}}<end_of_turn>\n{{/ifAssistant}}{{/each}}
|
69 |
+
```
|
src/lib/server/models.ts
CHANGED
@@ -58,9 +58,9 @@ const modelConfig = z.object({
|
|
58 |
endpoints: z.array(endpointSchema).optional(),
|
59 |
parameters: z
|
60 |
.object({
|
61 |
-
temperature: z.number().min(0).max(1),
|
62 |
truncate: z.number().int().positive().optional(),
|
63 |
-
max_new_tokens: z.number().int().positive(),
|
64 |
stop: z.array(z.string()).optional(),
|
65 |
top_p: z.number().positive().optional(),
|
66 |
top_k: z.number().positive().optional(),
|
|
|
58 |
endpoints: z.array(endpointSchema).optional(),
|
59 |
parameters: z
|
60 |
.object({
|
61 |
+
temperature: z.number().min(0).max(1).optional(),
|
62 |
truncate: z.number().int().positive().optional(),
|
63 |
+
max_new_tokens: z.number().int().positive().optional(),
|
64 |
stop: z.array(z.string()).optional(),
|
65 |
top_p: z.number().positive().optional(),
|
66 |
top_k: z.number().positive().optional(),
|