Spaces:

huggingfacejs
/

inference-widgets

Running on CPU Upgrade

App Files Files Community

machineuser commited on Apr 30, 2024

Commit

ac33c34

1 Parent(s): 507971e

Sync widgets demo

Browse files

Files changed (16) hide show

packages/jinja/test/e2e.test.js +2 -2
packages/tasks/package.json +7 -2
packages/tasks/pnpm-lock.yaml +52 -0
packages/tasks/scripts/inference-codegen.ts +1 -1
packages/tasks/scripts/inference-tgi-import.ts +115 -0
packages/tasks/src/tasks/chat-completion/inference.ts +204 -85
packages/tasks/src/tasks/chat-completion/spec/input.json +198 -34
packages/tasks/src/tasks/chat-completion/spec/output.json +178 -40
packages/tasks/src/tasks/chat-completion/spec/output_stream.json +0 -48
packages/tasks/src/tasks/chat-completion/spec/stream_output.json +170 -0
packages/tasks/src/tasks/index.ts +7 -8
packages/tasks/src/tasks/text-generation/inference.ts +58 -170
packages/tasks/src/tasks/text-generation/spec/input.json +130 -29
packages/tasks/src/tasks/text-generation/spec/output.json +104 -90
packages/tasks/src/tasks/text-generation/spec/output_stream.json +0 -47
packages/tasks/src/tasks/text-generation/spec/stream_output.json +97 -0

packages/jinja/test/e2e.test.js CHANGED Viewed

@@ -192,7 +192,7 @@ const TEST_CUSTOM_TEMPLATES = Object.freeze({
 		},
 		target: `<bos><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n`,
 	},
-	"mistralai/Mistral-7B-Instruct-v0.1": {
 		chat_template: `{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`,
 		data: {
 			messages: EXAMPLE_CHAT,
@@ -440,7 +440,7 @@ describe("End-to-end tests", () => {
 	});
 	it("should parse a chat template from the Hugging Face Hub", async () => {
-		const repo = "mistralai/Mistral-7B-Instruct-v0.1";
 		const tokenizerConfig = await (
 			await downloadFile({
 				repo,

 		},
 		target: `<bos><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n`,
 	},
+	"TheBloke/Mistral-7B-Instruct-v0.1-GPTQ": {
 		chat_template: `{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`,
 		data: {
 			messages: EXAMPLE_CHAT,
 	});
 	it("should parse a chat template from the Hugging Face Hub", async () => {
+		const repo = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ";
 		const tokenizerConfig = await (
 			await downloadFile({
 				repo,

packages/tasks/package.json CHANGED Viewed

@@ -27,7 +27,8 @@
 		"build": "tsup src/index.ts --format cjs,esm --clean --dts && pnpm run inference-codegen",
 		"prepare": "pnpm run build",
 		"check": "tsc",
-		"inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts"
 	},
 	"type": "module",
 	"files": [
@@ -44,6 +45,10 @@
 	"license": "MIT",
 	"devDependencies": {
 		"@types/node": "^20.11.5",
-		"quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz"
 	}
 }

 		"build": "tsup src/index.ts --format cjs,esm --clean --dts && pnpm run inference-codegen",
 		"prepare": "pnpm run build",
 		"check": "tsc",
+		"inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts",
+		"inference-tgi-import": "tsx scripts/inference-tgi-import.ts && prettier --write src/tasks/text-generation/spec/*.json && prettier --write src/tasks/chat-completion/spec/*.json"
 	},
 	"type": "module",
 	"files": [
 	"license": "MIT",
 	"devDependencies": {
 		"@types/node": "^20.11.5",
+		"quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz",
+		"type-fest": "^3.13.1"
+	},
+	"dependencies": {
+		"node-fetch": "^3.3.2"
 	}
 }

packages/tasks/pnpm-lock.yaml CHANGED Viewed

@@ -4,6 +4,11 @@ settings:
   autoInstallPeers: true
   excludeLinksFromLockfile: false
 devDependencies:
   '@types/node':
     specifier: ^20.11.5
@@ -11,6 +16,9 @@ devDependencies:
   quicktype-core:
     specifier: https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz
     version: '@github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz'
 packages:
@@ -62,6 +70,11 @@ packages:
       - encoding
     dev: true
   /event-target-shim@5.0.1:
     resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
     engines: {node: '>=6'}
@@ -72,6 +85,21 @@ packages:
     engines: {node: '>=0.8.x'}
     dev: true
   /ieee754@1.2.1:
     resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
     dev: true
@@ -88,6 +116,11 @@ packages:
     resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==}
     dev: true
   /node-fetch@2.7.0:
     resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
     engines: {node: 4.x || >=6.0.0}
@@ -100,6 +133,15 @@ packages:
       whatwg-url: 5.0.0
     dev: true
   /pako@0.2.9:
     resolution: {integrity: sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA==}
     dev: true
@@ -147,6 +189,11 @@ packages:
     resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
     dev: true
   /undici-types@5.26.5:
     resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
     dev: true
@@ -169,6 +216,11 @@ packages:
     resolution: {integrity: sha512-HXgFDgDommxn5/bIv0cnQZsPhHDA90NPHD6+c/v21U5+Sx5hoP8+dP9IZXBU1gIfvdRfhG8cel9QNPeionfcCQ==}
     dev: true
   /webidl-conversions@3.0.1:
     resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
     dev: true

   autoInstallPeers: true
   excludeLinksFromLockfile: false
+dependencies:
+  node-fetch:
+    specifier: ^3.3.2
+    version: 3.3.2
 devDependencies:
   '@types/node':
     specifier: ^20.11.5
   quicktype-core:
     specifier: https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz
     version: '@github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz'
+  type-fest:
+    specifier: ^3.13.1
+    version: 3.13.1
 packages:
       - encoding
     dev: true
+  /data-uri-to-buffer@4.0.1:
+    resolution: {integrity: sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==}
+    engines: {node: '>= 12'}
+    dev: false
   /event-target-shim@5.0.1:
     resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
     engines: {node: '>=6'}
     engines: {node: '>=0.8.x'}
     dev: true
+  /fetch-blob@3.2.0:
+    resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==}
+    engines: {node: ^12.20 || >= 14.13}
+    dependencies:
+      node-domexception: 1.0.0
+      web-streams-polyfill: 3.3.3
+    dev: false
+  /formdata-polyfill@4.0.10:
+    resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==}
+    engines: {node: '>=12.20.0'}
+    dependencies:
+      fetch-blob: 3.2.0
+    dev: false
   /ieee754@1.2.1:
     resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
     dev: true
     resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==}
     dev: true
+  /node-domexception@1.0.0:
+    resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
+    engines: {node: '>=10.5.0'}
+    dev: false
   /node-fetch@2.7.0:
     resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==}
     engines: {node: 4.x || >=6.0.0}
       whatwg-url: 5.0.0
     dev: true
+  /node-fetch@3.3.2:
+    resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==}
+    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
+    dependencies:
+      data-uri-to-buffer: 4.0.1
+      fetch-blob: 3.2.0
+      formdata-polyfill: 4.0.10
+    dev: false
   /pako@0.2.9:
     resolution: {integrity: sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA==}
     dev: true
     resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==}
     dev: true
+  /type-fest@3.13.1:
+    resolution: {integrity: sha512-tLq3bSNx+xSpwvAJnzrK0Ep5CLNWjvFTOp71URMaAEWBfRb9nnJiBoUe0tF8bI4ZFO3omgBR6NvnbzVUT3Ly4g==}
+    engines: {node: '>=14.16'}
+    dev: true
   /undici-types@5.26.5:
     resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
     dev: true
     resolution: {integrity: sha512-HXgFDgDommxn5/bIv0cnQZsPhHDA90NPHD6+c/v21U5+Sx5hoP8+dP9IZXBU1gIfvdRfhG8cel9QNPeionfcCQ==}
     dev: true
+  /web-streams-polyfill@3.3.3:
+    resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==}
+    engines: {node: '>= 8'}
+    dev: false
   /webidl-conversions@3.0.1:
     resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==}
     dev: true

packages/tasks/scripts/inference-codegen.ts CHANGED Viewed

@@ -57,7 +57,7 @@ async function buildInputData(taskId: string, taskSpecDir: string, allSpecFiles:
 	if (taskId === "text-generation" || taskId === "chat-completion") {
 		await schema.addSource({
 			name: `${taskId}-stream-output`,
-			schema: await fs.readFile(`${taskSpecDir}/output_stream.json`, { encoding: "utf-8" }),
 		});
 	}
 	const inputData = new InputData();

 	if (taskId === "text-generation" || taskId === "chat-completion") {
 		await schema.addSource({
 			name: `${taskId}-stream-output`,
+			schema: await fs.readFile(`${taskSpecDir}/stream_output.json`, { encoding: "utf-8" }),
 		});
 	}
 	const inputData = new InputData();

packages/tasks/scripts/inference-tgi-import.ts ADDED Viewed

	@@ -0,0 +1,115 @@

+/*
+ * Fetches TGI specs and generated JSON schema for input, output and stream_output of
+ * text-generation and chat-completion tasks.
+ * See https://huggingface.github.io/text-generation-inference/
+ */
+import fs from "fs/promises";
+import fetch from "node-fetch";
+import * as path from "node:path/posix";
+import { existsSync as pathExists } from "node:fs";
+import type { JsonObject, JsonValue } from "type-fest";
+const URL = "https://huggingface.github.io/text-generation-inference/openapi.json";
+const rootDirFinder = function (): string {
+	let currentPath = path.normalize(import.meta.url);
+	while (currentPath !== "/") {
+		if (pathExists(path.join(currentPath, "package.json"))) {
+			return currentPath;
+		}
+		currentPath = path.normalize(path.join(currentPath, ".."));
+	}
+	return "/";
+};
+const rootDir = rootDirFinder();
+const tasksDir = path.join(rootDir, "src", "tasks");
+function toCamelCase(str: string, joiner = "") {
+	return str
+		.split(/[-_]/)
+		.map((part) => part.charAt(0).toUpperCase() + part.slice(1))
+		.join(joiner);
+}
+async function _extractAndAdapt(task: string, mainComponentName: string, type: "input" | "output" | "stream_output") {
+	console.debug(`✨ Importing`, task, type);
+	console.debug("   📥 Fetching TGI specs");
+	const response = await fetch(URL);
+	// eslint-disable-next-line @typescript-eslint/no-explicit-any
+	const openapi = (await response.json()) as any;
+	// eslint-disable-next-line @typescript-eslint/no-explicit-any
+	const components: Record<string, any> = openapi["components"]["schemas"];
+	// e.g. TextGeneration
+	const camelName = toCamelCase(task);
+	// e.g. TextGenerationInput
+	const camelFullName = camelName + toCamelCase(type);
+	const mainComponent = components[mainComponentName];
+	const filteredComponents: Record<string, JsonObject> = {};
+	function _scan(data: JsonValue) {
+		if (Array.isArray(data) || data instanceof Array) {
+			for (const item of data) {
+				_scan(item);
+			}
+		} else if (data && typeof data === "object") {
+			for (const key of Object.keys(data)) {
+				if (key === "$ref" && typeof data[key] === "string") {
+					// Verify reference exists
+					const ref = (data[key] as string).split("/").pop() ?? "";
+					if (!components[ref]) {
+						throw new Error(`Reference not found in components: ${data[key]}`);
+					}
+					// Add reference to components to export (and scan it too)
+					const newRef = camelFullName + ref.replace(camelName, "");
+					if (!filteredComponents[newRef]) {
+						components[ref]["title"] = newRef; // Rename title to avoid conflicts
+						filteredComponents[newRef] = components[ref];
+						_scan(components[ref]);
+					}
+					// Updating the reference to new format
+					data[key] = `#/$defs/${newRef}`;
+				} else {
+					_scan(data[key]);
+				}
+			}
+		}
+	}
+	console.debug("   📦 Packaging jsonschema");
+	_scan(mainComponent);
+	const prettyName = toCamelCase(task, " ") + " " + toCamelCase(type, " ");
+	const inputSchema = {
+		$id: `/inference/schemas/${task}/${type}.json`,
+		$schema: "http://json-schema.org/draft-06/schema#",
+		description:
+			prettyName +
+			".\n\nAuto-generated from TGI specs." +
+			"\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
+		title: camelFullName,
+		type: "object",
+		required: mainComponent["required"],
+		properties: mainComponent["properties"],
+		$defs: filteredComponents,
+	};
+	const specPath = path.join(tasksDir, task, "spec", `${type}.json`);
+	console.debug("   📂 Exporting", specPath);
+	await fs.writeFile(specPath, JSON.stringify(inputSchema, null, 4));
+}
+await _extractAndAdapt("text-generation", "CompatGenerateRequest", "input");
+await _extractAndAdapt("text-generation", "GenerateResponse", "output");
+await _extractAndAdapt("text-generation", "StreamResponse", "stream_output");
+await _extractAndAdapt("chat-completion", "ChatRequest", "input");
+await _extractAndAdapt("chat-completion", "ChatCompletion", "output");
+await _extractAndAdapt("chat-completion", "ChatCompletionChunk", "stream_output");
+console.debug("✅ All done!");

packages/tasks/src/tasks/chat-completion/inference.ts CHANGED Viewed

@@ -5,154 +5,273 @@
  */
 /**
- * Inputs for ChatCompletion inference
  */
 export interface ChatCompletionInput {
 	/**
 	 * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
-	 * frequency in the text so far, decreasing the model's likelihood to repeat the same line
-	 * verbatim.
 	 */
 	frequency_penalty?: number;
 	/**
 	 * The maximum number of tokens that can be generated in the chat completion.
 	 */
 	max_tokens?: number;
 	messages: ChatCompletionInputMessage[];
 	/**
-	 * The random sampling seed.
 	 */
-	seed?: number;
 	/**
-	 * Stop generating tokens if a stop token is generated.
 	 */
-	stop?: ChatCompletionInputStopReason;
 	/**
-	 * If set, partial message deltas will be sent.
 	 */
 	stream?: boolean;
 	/**
-	 * The value used to modulate the logits distribution.
 	 */
 	temperature?: number;
 	/**
-	 * If set to < 1, only the smallest set of most probable tokens with probabilities that add
-	 * up to `top_p` or higher are kept for generation.
 	 */
 	top_p?: number;
 	[property: string]: unknown;
 }
 export interface ChatCompletionInputMessage {
-	/**
-	 * The content of the message.
-	 */
-	content: string;
-	role: ChatCompletionMessageRole;
 	[property: string]: unknown;
 }
-/**
- * The role of the message author.
- */
-export type ChatCompletionMessageRole = "assistant" | "system" | "user";
-/**
- * Stop generating tokens if a stop token is generated.
- */
-export type ChatCompletionInputStopReason = string[] | string;
 /**
- * Outputs for Chat Completion inference
  */
 export interface ChatCompletionOutput {
-	/**
-	 * A list of chat completion choices.
-	 */
-	choices: ChatCompletionOutputChoice[];
-	/**
-	 * The Unix timestamp (in seconds) of when the chat completion was created.
-	 */
 	created: number;
 	[property: string]: unknown;
 }
-export interface ChatCompletionOutputChoice {
-	/**
-	 * The reason why the generation was stopped.
-	 */
-	finish_reason: ChatCompletionFinishReason;
-	/**
-	 * The index of the choice in the list of choices.
-	 */
 	index: number;
-	message: ChatCompletionOutputChoiceMessage;
 	[property: string]: unknown;
 }
-/**
- * The reason why the generation was stopped.
- *
- * The generated sequence reached the maximum allowed length
- *
- * The model generated an end-of-sentence (EOS) token
- *
- * One of the sequence in stop_sequences was generated
- */
-export type ChatCompletionFinishReason = "length" | "eos_token" | "stop_sequence";
-export interface ChatCompletionOutputChoiceMessage {
-	/**
-	 * The content of the chat completion message.
-	 */
-	content: string;
-	role: ChatCompletionMessageRole;
 	[property: string]: unknown;
 }
 /**
- * Chat Completion Stream Output
  */
 export interface ChatCompletionStreamOutput {
-	/**
-	 * A list of chat completion choices.
-	 */
 	choices: ChatCompletionStreamOutputChoice[];
-	/**
-	 * The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has
-	 * the same timestamp.
-	 */
 	created: number;
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputChoice {
-	/**
-	 * A chat completion delta generated by streamed model responses.
-	 */
 	delta: ChatCompletionStreamOutputDelta;
-	/**
-	 * The reason why the generation was stopped.
-	 */
-	finish_reason?: ChatCompletionFinishReason;
-	/**
-	 * The index of the choice in the list of choices.
-	 */
 	index: number;
 	[property: string]: unknown;
 }
-/**
- * A chat completion delta generated by streamed model responses.
- */
 export interface ChatCompletionStreamOutputDelta {
-	/**
-	 * The contents of the chunk message.
-	 */
 	content?: string;
-	/**
-	 * The role of the author of this message.
-	 */
-	role?: string;
 	[property: string]: unknown;
 }

  */
 /**
+ * Chat Completion Input.
+ *
+ * Auto-generated from TGI specs.
+ * For more details, check out
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
  */
 export interface ChatCompletionInput {
 	/**
 	 * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
+	 * frequency in the text so far,
+	 * decreasing the model's likelihood to repeat the same line verbatim.
 	 */
 	frequency_penalty?: number;
+	/**
+	 * UNUSED
+	 * Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON
+	 * object that maps tokens
+	 * (specified by their token ID in the tokenizer) to an associated bias value from -100 to
+	 * 100. Mathematically,
+	 * the bias is added to the logits generated by the model prior to sampling. The exact
+	 * effect will vary per model,
+	 * but values between -1 and 1 should decrease or increase likelihood of selection; values
+	 * like -100 or 100 should
+	 * result in a ban or exclusive selection of the relevant token.
+	 */
+	logit_bias?: number[];
+	/**
+	 * Whether to return log probabilities of the output tokens or not. If true, returns the log
+	 * probabilities of each
+	 * output token returned in the content of message.
+	 */
+	logprobs?: boolean;
 	/**
 	 * The maximum number of tokens that can be generated in the chat completion.
 	 */
 	max_tokens?: number;
+	/**
+	 * A list of messages comprising the conversation so far.
+	 */
 	messages: ChatCompletionInputMessage[];
 	/**
+	 * [UNUSED] ID of the model to use. See the model endpoint compatibility table for details
+	 * on which models work with the Chat API.
 	 */
+	model: string;
+	/**
+	 * UNUSED
+	 * How many chat completion choices to generate for each input message. Note that you will
+	 * be charged based on the
+	 * number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
+	 */
+	n?: number;
 	/**
+	 * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they
+	 * appear in the text so far,
+	 * increasing the model's likelihood to talk about new topics
 	 */
+	presence_penalty?: number;
+	seed?: number;
 	/**
+	 * Up to 4 sequences where the API will stop generating further tokens.
 	 */
+	stop?: string[];
 	stream?: boolean;
 	/**
+	 * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
+	 * output more random, while
+	 * lower values like 0.2 will make it more focused and deterministic.
+	 *
+	 * We generally recommend altering this or `top_p` but not both.
 	 */
 	temperature?: number;
+	tool_choice?: ChatCompletionInputToolType;
+	/**
+	 * A prompt to be appended before the tools
+	 */
+	tool_prompt?: string;
 	/**
+	 * A list of tools the model may call. Currently, only functions are supported as a tool.
+	 * Use this to provide a list of
+	 * functions the model may generate JSON inputs for.
+	 */
+	tools?: ChatCompletionInputTool[];
+	/**
+	 * An integer between 0 and 5 specifying the number of most likely tokens to return at each
+	 * token position, each with
+	 * an associated log probability. logprobs must be set to true if this parameter is used.
+	 */
+	top_logprobs?: number;
+	/**
+	 * An alternative to sampling with temperature, called nucleus sampling, where the model
+	 * considers the results of the
+	 * tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+	 * probability mass are considered.
 	 */
 	top_p?: number;
 	[property: string]: unknown;
 }
 export interface ChatCompletionInputMessage {
+	content?: string;
+	name?: string;
+	role: string;
+	tool_calls?: ChatCompletionInputToolCall[];
 	[property: string]: unknown;
 }
+export interface ChatCompletionInputToolCall {
+	function: ChatCompletionInputFunctionDefinition;
+	id: number;
+	type: string;
+	[property: string]: unknown;
+}
+export interface ChatCompletionInputFunctionDefinition {
+	arguments: unknown;
+	description?: string;
+	name: string;
+	[property: string]: unknown;
+}
+export type ChatCompletionInputToolType = "OneOf" | ChatCompletionInputToolTypeObject;
+export interface ChatCompletionInputToolTypeObject {
+	FunctionName: string;
+	[property: string]: unknown;
+}
+export interface ChatCompletionInputTool {
+	function: ChatCompletionInputFunctionDefinition;
+	type: string;
+	[property: string]: unknown;
+}
 /**
+ * Chat Completion Output.
+ *
+ * Auto-generated from TGI specs.
+ * For more details, check out
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
  */
 export interface ChatCompletionOutput {
+	choices: ChatCompletionOutputComplete[];
 	created: number;
+	id: string;
+	model: string;
+	object: string;
+	system_fingerprint: string;
+	usage: ChatCompletionOutputUsage;
 	[property: string]: unknown;
 }
+export interface ChatCompletionOutputComplete {
+	finish_reason: string;
 	index: number;
+	logprobs?: ChatCompletionOutputLogprobs;
+	message: ChatCompletionOutputMessage;
 	[property: string]: unknown;
 }
+export interface ChatCompletionOutputLogprobs {
+	content: ChatCompletionOutputLogprob[];
+	[property: string]: unknown;
+}
+export interface ChatCompletionOutputLogprob {
+	logprob: number;
+	token: string;
+	top_logprobs: ChatCompletionOutputTopLogprob[];
+	[property: string]: unknown;
+}
+export interface ChatCompletionOutputTopLogprob {
+	logprob: number;
+	token: string;
+	[property: string]: unknown;
+}
+export interface ChatCompletionOutputMessage {
+	content?: string;
+	name?: string;
+	role: string;
+	tool_calls?: ChatCompletionOutputToolCall[];
+	[property: string]: unknown;
+}
+export interface ChatCompletionOutputToolCall {
+	function: ChatCompletionOutputFunctionDefinition;
+	id: number;
+	type: string;
+	[property: string]: unknown;
+}
+export interface ChatCompletionOutputFunctionDefinition {
+	arguments: unknown;
+	description?: string;
+	name: string;
+	[property: string]: unknown;
+}
+export interface ChatCompletionOutputUsage {
+	completion_tokens: number;
+	prompt_tokens: number;
+	total_tokens: number;
 	[property: string]: unknown;
 }
 /**
+ * Chat Completion Stream Output.
+ *
+ * Auto-generated from TGI specs.
+ * For more details, check out
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
  */
 export interface ChatCompletionStreamOutput {
 	choices: ChatCompletionStreamOutputChoice[];
 	created: number;
+	id: string;
+	model: string;
+	object: string;
+	system_fingerprint: string;
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputChoice {
 	delta: ChatCompletionStreamOutputDelta;
+	finish_reason?: string;
 	index: number;
+	logprobs?: ChatCompletionStreamOutputLogprobs;
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputDelta {
 	content?: string;
+	role: string;
+	tool_calls?: ChatCompletionStreamOutputDeltaToolCall;
+	[property: string]: unknown;
+}
+export interface ChatCompletionStreamOutputDeltaToolCall {
+	function: ChatCompletionStreamOutputFunction;
+	id: string;
+	index: number;
+	type: string;
+	[property: string]: unknown;
+}
+export interface ChatCompletionStreamOutputFunction {
+	arguments: string;
+	name?: string;
+	[property: string]: unknown;
+}
+export interface ChatCompletionStreamOutputLogprobs {
+	content: ChatCompletionStreamOutputLogprob[];
+	[property: string]: unknown;
+}
+export interface ChatCompletionStreamOutputLogprob {
+	logprob: number;
+	token: string;
+	top_logprobs: ChatCompletionStreamOutputTopLogprob[];
+	[property: string]: unknown;
+}
+export interface ChatCompletionStreamOutputTopLogprob {
+	logprob: number;
+	token: string;
 	[property: string]: unknown;
 }

packages/tasks/src/tasks/chat-completion/spec/input.json CHANGED Viewed

@@ -1,63 +1,227 @@
 {
-	"title": "ChatCompletionInput",
 	"$id": "/inference/schemas/chat-completion/input.json",
 	"$schema": "http://json-schema.org/draft-06/schema#",
-	"description": "Inputs for ChatCompletion inference",
 	"type": "object",
 	"properties": {
-		"messages": {
 			"type": "array",
-			"title": "ChatCompletionInputMessage",
 			"items": {
-				"type": "object",
-				"properties": {
-					"role": {
-						"$ref": "#/definitions/Role"
-					},
-					"content": {
-						"type": "string",
-						"description": "The content of the message."
-					}
-				},
-				"required": ["role", "content"]
-			}
 		},
-		"frequency_penalty": {
-			"type": "number",
-			"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
 		},
 		"max_tokens": {
 			"type": "integer",
-			"description": "The maximum number of tokens that can be generated in the chat completion."
 		},
 		"seed": {
 			"type": "integer",
-			"description": "The random sampling seed."
 		},
 		"stop": {
-			"oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }],
-			"title": "ChatCompletionInputStopReason",
-			"description": "Stop generating tokens if a stop token is generated."
 		},
 		"stream": {
-			"type": "boolean",
-			"description": "If set, partial message deltas will be sent."
 		},
 		"temperature": {
 			"type": "number",
-			"description": "The value used to modulate the logits distribution."
 		},
 		"top_p": {
 			"type": "number",
-			"description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
 		}
 	},
-	"required": ["messages"],
-	"definitions": {
-		"Role": {
-			"oneOf": [{ "const": "assistant" }, { "const": "system" }, { "const": "user" }],
-			"title": "ChatCompletionMessageRole",
-			"description": "The role of the message author."
 		}
 	}
 }

 {
 	"$id": "/inference/schemas/chat-completion/input.json",
 	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Chat Completion Input.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
+	"title": "ChatCompletionInput",
 	"type": "object",
+	"required": ["model", "messages"],
 	"properties": {
+		"frequency_penalty": {
+			"type": "number",
+			"format": "float",
+			"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
+			"example": "1.0",
+			"nullable": true
+		},
+		"logit_bias": {
 			"type": "array",
 			"items": {
+				"type": "number",
+				"format": "float"
+			},
+			"description": "UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.",
+			"nullable": true
 		},
+		"logprobs": {
+			"type": "boolean",
+			"description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.",
+			"example": "false",
+			"nullable": true
 		},
 		"max_tokens": {
 			"type": "integer",
+			"format": "int32",
+			"description": "The maximum number of tokens that can be generated in the chat completion.",
+			"example": "32",
+			"nullable": true,
+			"minimum": 0
+		},
+		"messages": {
+			"type": "array",
+			"items": {
+				"$ref": "#/$defs/ChatCompletionInputMessage"
+			},
+			"description": "A list of messages comprising the conversation so far.",
+			"example": "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]"
+		},
+		"model": {
+			"type": "string",
+			"description": "[UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
+			"example": "mistralai/Mistral-7B-Instruct-v0.2"
+		},
+		"n": {
+			"type": "integer",
+			"format": "int32",
+			"description": "UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.",
+			"example": "2",
+			"nullable": true,
+			"minimum": 0
+		},
+		"presence_penalty": {
+			"type": "number",
+			"format": "float",
+			"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
+			"example": 0.1,
+			"nullable": true
 		},
 		"seed": {
 			"type": "integer",
+			"format": "int64",
+			"example": 42,
+			"nullable": true,
+			"minimum": 0
 		},
 		"stop": {
+			"type": "array",
+			"items": {
+				"type": "string"
+			},
+			"description": "Up to 4 sequences where the API will stop generating further tokens.",
+			"example": "null",
+			"nullable": true
 		},
 		"stream": {
+			"type": "boolean"
 		},
 		"temperature": {
 			"type": "number",
+			"format": "float",
+			"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.",
+			"example": 1,
+			"nullable": true
+		},
+		"tool_choice": {
+			"allOf": [
+				{
+					"$ref": "#/$defs/ChatCompletionInputToolType"
+				}
+			],
+			"nullable": true
+		},
+		"tool_prompt": {
+			"type": "string",
+			"description": "A prompt to be appended before the tools",
+			"example": "\"You will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n\"",
+			"nullable": true
+		},
+		"tools": {
+			"type": "array",
+			"items": {
+				"$ref": "#/$defs/ChatCompletionInputTool"
+			},
+			"description": "A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of\nfunctions the model may generate JSON inputs for.",
+			"example": "null",
+			"nullable": true
+		},
+		"top_logprobs": {
+			"type": "integer",
+			"format": "int32",
+			"description": "An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
+			"example": "5",
+			"nullable": true,
+			"minimum": 0
 		},
 		"top_p": {
 			"type": "number",
+			"format": "float",
+			"description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
+			"example": 0.95,
+			"nullable": true
 		}
 	},
+	"$defs": {
+		"ChatCompletionInputMessage": {
+			"type": "object",
+			"required": ["role"],
+			"properties": {
+				"content": {
+					"type": "string",
+					"example": "My name is David and I",
+					"nullable": true
+				},
+				"name": {
+					"type": "string",
+					"example": "\"David\"",
+					"nullable": true
+				},
+				"role": {
+					"type": "string",
+					"example": "user"
+				},
+				"tool_calls": {
+					"type": "array",
+					"items": {
+						"$ref": "#/$defs/ChatCompletionInputToolCall"
+					},
+					"nullable": true
+				}
+			},
+			"title": "ChatCompletionInputMessage"
+		},
+		"ChatCompletionInputToolCall": {
+			"type": "object",
+			"required": ["id", "type", "function"],
+			"properties": {
+				"function": {
+					"$ref": "#/$defs/ChatCompletionInputFunctionDefinition"
+				},
+				"id": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				},
+				"type": {
+					"type": "string"
+				}
+			},
+			"title": "ChatCompletionInputToolCall"
+		},
+		"ChatCompletionInputFunctionDefinition": {
+			"type": "object",
+			"required": ["name", "arguments"],
+			"properties": {
+				"arguments": {},
+				"description": {
+					"type": "string",
+					"nullable": true
+				},
+				"name": {
+					"type": "string"
+				}
+			},
+			"title": "ChatCompletionInputFunctionDefinition"
+		},
+		"ChatCompletionInputToolType": {
+			"oneOf": [
+				{
+					"type": "object",
+					"required": ["FunctionName"],
+					"properties": {
+						"FunctionName": {
+							"type": "string"
+						}
+					}
+				},
+				{
+					"type": "string",
+					"enum": ["OneOf"]
+				}
+			],
+			"title": "ChatCompletionInputToolType"
+		},
+		"ChatCompletionInputTool": {
+			"type": "object",
+			"required": ["type", "function"],
+			"properties": {
+				"function": {
+					"$ref": "#/$defs/ChatCompletionInputFunctionDefinition"
+				},
+				"type": {
+					"type": "string",
+					"example": "function"
+				}
+			},
+			"title": "ChatCompletionInputTool"
 		}
 	}
 }

packages/tasks/src/tasks/chat-completion/spec/output.json CHANGED Viewed

@@ -1,58 +1,196 @@
 {
 	"$id": "/inference/schemas/chat-completion/output.json",
 	"$schema": "http://json-schema.org/draft-06/schema#",
-	"description": "Outputs for Chat Completion inference",
 	"title": "ChatCompletionOutput",
 	"type": "object",
 	"properties": {
 		"choices": {
 			"type": "array",
-			"description": "A list of chat completion choices.",
-			"title": "ChatCompletionOutputChoice",
 			"items": {
-				"type": "object",
-				"properties": {
-					"finish_reason": {
-						"$ref": "#/definitions/FinishReason",
-						"description": "The reason why the generation was stopped."
-					},
-					"index": {
-						"type": "integer",
-						"description": "The index of the choice in the list of choices."
-					},
-					"message": {
-						"type": "object",
-						"properties": {
-							"role": {
-								"$ref": "/inference/schemas/chat-completion/input.json#/definitions/Role"
-							},
-							"content": {
-								"type": "string",
-								"description": "The content of the chat completion message."
-							}
-						},
-						"title": "ChatCompletionOutputChoiceMessage",
-						"required": ["content", "role"]
-					}
-				},
-				"required": ["finish_reason", "index", "message"]
 			}
 		},
 		"created": {
 			"type": "integer",
-			"description": "The Unix timestamp (in seconds) of when the chat completion was created."
 		}
 	},
-	"required": ["choices", "created"],
-	"definitions": {
-		"FinishReason": {
-			"type": "string",
-			"title": "ChatCompletionFinishReason",
-			"oneOf": [
-				{ "const": "length", "description": "The generated sequence reached the maximum allowed length" },
-				{ "const": "eos_token", "description": "The model generated an end-of-sentence (EOS) token" },
-				{ "const": "stop_sequence", "description": "One of the sequence in stop_sequences was generated" }
-			]
 		}
 	}
 }

 {
 	"$id": "/inference/schemas/chat-completion/output.json",
 	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Chat Completion Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
 	"title": "ChatCompletionOutput",
 	"type": "object",
+	"required": ["id", "object", "created", "model", "system_fingerprint", "choices", "usage"],
 	"properties": {
 		"choices": {
 			"type": "array",
 			"items": {
+				"$ref": "#/$defs/ChatCompletionOutputComplete"
 			}
 		},
 		"created": {
 			"type": "integer",
+			"format": "int64",
+			"example": "1706270835",
+			"minimum": 0
+		},
+		"id": {
+			"type": "string"
+		},
+		"model": {
+			"type": "string",
+			"example": "mistralai/Mistral-7B-Instruct-v0.2"
+		},
+		"object": {
+			"type": "string"
+		},
+		"system_fingerprint": {
+			"type": "string"
+		},
+		"usage": {
+			"$ref": "#/$defs/ChatCompletionOutputUsage"
 		}
 	},
+	"$defs": {
+		"ChatCompletionOutputComplete": {
+			"type": "object",
+			"required": ["index", "message", "finish_reason"],
+			"properties": {
+				"finish_reason": {
+					"type": "string"
+				},
+				"index": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				},
+				"logprobs": {
+					"allOf": [
+						{
+							"$ref": "#/$defs/ChatCompletionOutputLogprobs"
+						}
+					],
+					"nullable": true
+				},
+				"message": {
+					"$ref": "#/$defs/ChatCompletionOutputMessage"
+				}
+			},
+			"title": "ChatCompletionOutputComplete"
+		},
+		"ChatCompletionOutputLogprobs": {
+			"type": "object",
+			"required": ["content"],
+			"properties": {
+				"content": {
+					"type": "array",
+					"items": {
+						"$ref": "#/$defs/ChatCompletionOutputLogprob"
+					}
+				}
+			},
+			"title": "ChatCompletionOutputLogprobs"
+		},
+		"ChatCompletionOutputLogprob": {
+			"type": "object",
+			"required": ["token", "logprob", "top_logprobs"],
+			"properties": {
+				"logprob": {
+					"type": "number",
+					"format": "float"
+				},
+				"token": {
+					"type": "string"
+				},
+				"top_logprobs": {
+					"type": "array",
+					"items": {
+						"$ref": "#/$defs/ChatCompletionOutputTopLogprob"
+					}
+				}
+			},
+			"title": "ChatCompletionOutputLogprob"
+		},
+		"ChatCompletionOutputTopLogprob": {
+			"type": "object",
+			"required": ["token", "logprob"],
+			"properties": {
+				"logprob": {
+					"type": "number",
+					"format": "float"
+				},
+				"token": {
+					"type": "string"
+				}
+			},
+			"title": "ChatCompletionOutputTopLogprob"
+		},
+		"ChatCompletionOutputMessage": {
+			"type": "object",
+			"required": ["role"],
+			"properties": {
+				"content": {
+					"type": "string",
+					"example": "My name is David and I",
+					"nullable": true
+				},
+				"name": {
+					"type": "string",
+					"example": "\"David\"",
+					"nullable": true
+				},
+				"role": {
+					"type": "string",
+					"example": "user"
+				},
+				"tool_calls": {
+					"type": "array",
+					"items": {
+						"$ref": "#/$defs/ChatCompletionOutputToolCall"
+					},
+					"nullable": true
+				}
+			},
+			"title": "ChatCompletionOutputMessage"
+		},
+		"ChatCompletionOutputToolCall": {
+			"type": "object",
+			"required": ["id", "type", "function"],
+			"properties": {
+				"function": {
+					"$ref": "#/$defs/ChatCompletionOutputFunctionDefinition"
+				},
+				"id": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				},
+				"type": {
+					"type": "string"
+				}
+			},
+			"title": "ChatCompletionOutputToolCall"
+		},
+		"ChatCompletionOutputFunctionDefinition": {
+			"type": "object",
+			"required": ["name", "arguments"],
+			"properties": {
+				"arguments": {},
+				"description": {
+					"type": "string",
+					"nullable": true
+				},
+				"name": {
+					"type": "string"
+				}
+			},
+			"title": "ChatCompletionOutputFunctionDefinition"
+		},
+		"ChatCompletionOutputUsage": {
+			"type": "object",
+			"required": ["prompt_tokens", "completion_tokens", "total_tokens"],
+			"properties": {
+				"completion_tokens": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				},
+				"prompt_tokens": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				},
+				"total_tokens": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				}
+			},
+			"title": "ChatCompletionOutputUsage"
 		}
 	}
 }

packages/tasks/src/tasks/chat-completion/spec/output_stream.json DELETED Viewed

@@ -1,48 +0,0 @@
-{
-	"$id": "/inference/schemas/chat-completion/output_stream.json",
-	"$schema": "http://json-schema.org/draft-06/schema#",
-	"description": "Chat Completion Stream Output",
-	"title": "ChatCompletionStreamOutput",
-	"type": "object",
-	"properties": {
-		"choices": {
-			"type": "array",
-			"title": "ChatCompletionStreamOutputChoice",
-			"description": "A list of chat completion choices.",
-			"items": {
-				"type": "object",
-				"properties": {
-					"delta": {
-						"type": "object",
-						"title": "ChatCompletionStreamOutputDelta",
-						"description": "A chat completion delta generated by streamed model responses.",
-						"properties": {
-							"content": {
-								"type": "string",
-								"description": "The contents of the chunk message."
-							},
-							"role": {
-								"type": "string",
-								"description": "The role of the author of this message."
-							}
-						}
-					},
-					"finish_reason": {
-						"$ref": "/inference/schemas/chat-completion/output.json#/definitions/FinishReason",
-						"description": "The reason why the generation was stopped."
-					},
-					"index": {
-						"type": "integer",
-						"description": "The index of the choice in the list of choices."
-					}
-				},
-				"required": ["delta", "index"]
-			}
-		},
-		"created": {
-			"type": "integer",
-			"description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp."
-		}
-	},
-	"required": ["choices", "created"]
-}

packages/tasks/src/tasks/chat-completion/spec/stream_output.json ADDED Viewed

	@@ -0,0 +1,170 @@

+{
+	"$id": "/inference/schemas/chat-completion/stream_output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Chat Completion Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
+	"title": "ChatCompletionStreamOutput",
+	"type": "object",
+	"required": ["id", "object", "created", "model", "system_fingerprint", "choices"],
+	"properties": {
+		"choices": {
+			"type": "array",
+			"items": {
+				"$ref": "#/$defs/ChatCompletionStreamOutputChoice"
+			}
+		},
+		"created": {
+			"type": "integer",
+			"format": "int64",
+			"example": "1706270978",
+			"minimum": 0
+		},
+		"id": {
+			"type": "string"
+		},
+		"model": {
+			"type": "string",
+			"example": "mistralai/Mistral-7B-Instruct-v0.2"
+		},
+		"object": {
+			"type": "string"
+		},
+		"system_fingerprint": {
+			"type": "string"
+		}
+	},
+	"$defs": {
+		"ChatCompletionStreamOutputChoice": {
+			"type": "object",
+			"required": ["index", "delta"],
+			"properties": {
+				"delta": {
+					"$ref": "#/$defs/ChatCompletionStreamOutputDelta"
+				},
+				"finish_reason": {
+					"type": "string",
+					"nullable": true
+				},
+				"index": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				},
+				"logprobs": {
+					"allOf": [
+						{
+							"$ref": "#/$defs/ChatCompletionStreamOutputLogprobs"
+						}
+					],
+					"nullable": true
+				}
+			},
+			"title": "ChatCompletionStreamOutputChoice"
+		},
+		"ChatCompletionStreamOutputDelta": {
+			"type": "object",
+			"required": ["role"],
+			"properties": {
+				"content": {
+					"type": "string",
+					"example": "What is Deep Learning?",
+					"nullable": true
+				},
+				"role": {
+					"type": "string",
+					"example": "user"
+				},
+				"tool_calls": {
+					"allOf": [
+						{
+							"$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall"
+						}
+					],
+					"nullable": true
+				}
+			},
+			"title": "ChatCompletionStreamOutputDelta"
+		},
+		"ChatCompletionStreamOutputDeltaToolCall": {
+			"type": "object",
+			"required": ["index", "id", "type", "function"],
+			"properties": {
+				"function": {
+					"$ref": "#/$defs/ChatCompletionStreamOutputFunction"
+				},
+				"id": {
+					"type": "string"
+				},
+				"index": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				},
+				"type": {
+					"type": "string"
+				}
+			},
+			"title": "ChatCompletionStreamOutputDeltaToolCall"
+		},
+		"ChatCompletionStreamOutputFunction": {
+			"type": "object",
+			"required": ["arguments"],
+			"properties": {
+				"arguments": {
+					"type": "string"
+				},
+				"name": {
+					"type": "string",
+					"nullable": true
+				}
+			},
+			"title": "ChatCompletionStreamOutputFunction"
+		},
+		"ChatCompletionStreamOutputLogprobs": {
+			"type": "object",
+			"required": ["content"],
+			"properties": {
+				"content": {
+					"type": "array",
+					"items": {
+						"$ref": "#/$defs/ChatCompletionStreamOutputLogprob"
+					}
+				}
+			},
+			"title": "ChatCompletionStreamOutputLogprobs"
+		},
+		"ChatCompletionStreamOutputLogprob": {
+			"type": "object",
+			"required": ["token", "logprob", "top_logprobs"],
+			"properties": {
+				"logprob": {
+					"type": "number",
+					"format": "float"
+				},
+				"token": {
+					"type": "string"
+				},
+				"top_logprobs": {
+					"type": "array",
+					"items": {
+						"$ref": "#/$defs/ChatCompletionStreamOutputTopLogprob"
+					}
+				}
+			},
+			"title": "ChatCompletionStreamOutputLogprob"
+		},
+		"ChatCompletionStreamOutputTopLogprob": {
+			"type": "object",
+			"required": ["token", "logprob"],
+			"properties": {
+				"logprob": {
+					"type": "number",
+					"format": "float"
+				},
+				"token": {
+					"type": "string"
+				}
+			},
+			"title": "ChatCompletionStreamOutputTopLogprob"
+		}
+	}
+}

packages/tasks/src/tasks/index.ts CHANGED Viewed

@@ -43,9 +43,8 @@ export type {
 	ChatCompletionInput,
 	ChatCompletionInputMessage,
 	ChatCompletionOutput,
-	ChatCompletionOutputChoice,
-	ChatCompletionFinishReason,
-	ChatCompletionOutputChoiceMessage,
 	ChatCompletionStreamOutput,
 	ChatCompletionStreamOutputChoice,
 	ChatCompletionStreamOutputDelta,
@@ -85,15 +84,15 @@ export type {
 	TextClassificationParameters,
 } from "./text-classification/inference";
 export type {
-	TextGenerationFinishReason,
-	TextGenerationPrefillToken,
 	TextGenerationInput,
 	TextGenerationOutput,
 	TextGenerationOutputDetails,
-	TextGenerationParameters,
-	TextGenerationOutputSequenceDetails,
 	TextGenerationOutputToken,
-	TextGenerationStreamDetails,
 	TextGenerationStreamOutput,
 } from "./text-generation/inference";
 export type * from "./video-classification/inference";

 	ChatCompletionInput,
 	ChatCompletionInputMessage,
 	ChatCompletionOutput,
+	ChatCompletionOutputComplete,
+	ChatCompletionOutputMessage,
 	ChatCompletionStreamOutput,
 	ChatCompletionStreamOutputChoice,
 	ChatCompletionStreamOutputDelta,
 	TextClassificationParameters,
 } from "./text-classification/inference";
 export type {
+	TextGenerationOutputFinishReason,
+	TextGenerationOutputPrefillToken,
 	TextGenerationInput,
 	TextGenerationOutput,
 	TextGenerationOutputDetails,
+	TextGenerationInputGenerateParameters,
+	TextGenerationOutputBestOfSequence,
 	TextGenerationOutputToken,
+	TextGenerationStreamOutputStreamDetails,
 	TextGenerationStreamOutput,
 } from "./text-generation/inference";
 export type * from "./video-classification/inference";

packages/tasks/src/tasks/text-generation/inference.ts CHANGED Viewed

@@ -5,246 +5,134 @@
  */
 /**
- * Inputs for Text Generation inference
  */
 export interface TextGenerationInput {
-	/**
-	 * The text to initialize generation with
-	 */
 	inputs: string;
-	/**
-	 * Additional inference parameters
-	 */
-	parameters?: TextGenerationParameters;
-	/**
-	 * Whether to stream output tokens
-	 */
 	stream?: boolean;
 	[property: string]: unknown;
 }
-/**
- * Additional inference parameters
- *
- * Additional inference parameters for Text Generation
- */
-export interface TextGenerationParameters {
-	/**
-	 * The number of sampling queries to run. Only the best one (in terms of total logprob) will
-	 * be returned.
-	 */
 	best_of?: number;
-	/**
-	 * Whether or not to output decoder input details
-	 */
 	decoder_input_details?: boolean;
-	/**
-	 * Whether or not to output details
-	 */
 	details?: boolean;
-	/**
-	 * Whether to use logits sampling instead of greedy decoding when generating new tokens.
-	 */
 	do_sample?: boolean;
-	/**
-	 * The maximum number of tokens to generate.
-	 */
 	max_new_tokens?: number;
-	/**
-	 * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
-	 * paper](https://hf.co/papers/1909.05858) for more details.
-	 */
 	repetition_penalty?: number;
-	/**
-	 * Whether to prepend the prompt to the generated text.
-	 */
 	return_full_text?: boolean;
-	/**
-	 * The random sampling seed.
-	 */
 	seed?: number;
-	/**
-	 * Stop generating tokens if a member of `stop_sequences` is generated.
-	 */
-	stop_sequences?: string[];
-	/**
-	 * The value used to modulate the logits distribution.
-	 */
 	temperature?: number;
-	/**
-	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
-	 */
 	top_k?: number;
-	/**
-	 * If set to < 1, only the smallest set of most probable tokens with probabilities that add
-	 * up to `top_p` or higher are kept for generation.
-	 */
 	top_p?: number;
-	/**
-	 * Truncate input tokens to the given size.
-	 */
 	truncate?: number;
-	/**
-	 * Typical Decoding mass. See [Typical Decoding for Natural Language
-	 * Generation](https://hf.co/papers/2202.00666) for more information
-	 */
 	typical_p?: number;
 	/**
-	 * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
 	 */
-	watermark?: boolean;
 	[property: string]: unknown;
 }
 /**
- * Outputs for Text Generation inference
  */
 export interface TextGenerationOutput {
-	/**
-	 * When enabled, details about the generation
-	 */
 	details?: TextGenerationOutputDetails;
-	/**
-	 * The generated text
-	 */
 	generated_text: string;
 	[property: string]: unknown;
 }
-/**
- * When enabled, details about the generation
- */
 export interface TextGenerationOutputDetails {
-	/**
-	 * Details about additional sequences when best_of is provided
-	 */
-	best_of_sequences?: TextGenerationOutputSequenceDetails[];
-	/**
-	 * The reason why the generation was stopped.
-	 */
-	finish_reason: TextGenerationFinishReason;
-	/**
-	 * The number of generated tokens
-	 */
 	generated_tokens: number;
-	prefill: TextGenerationPrefillToken[];
-	/**
-	 * The random seed used for generation
-	 */
 	seed?: number;
-	/**
-	 * The generated tokens and associated details
-	 */
 	tokens: TextGenerationOutputToken[];
-	/**
-	 * Most likely tokens
-	 */
 	top_tokens?: Array<TextGenerationOutputToken[]>;
 	[property: string]: unknown;
 }
-export interface TextGenerationOutputSequenceDetails {
-	finish_reason: TextGenerationFinishReason;
-	/**
-	 * The generated text
-	 */
 	generated_text: string;
-	/**
-	 * The number of generated tokens
-	 */
 	generated_tokens: number;
-	prefill: TextGenerationPrefillToken[];
-	/**
-	 * The random seed used for generation
-	 */
 	seed?: number;
-	/**
-	 * The generated tokens and associated details
-	 */
 	tokens: TextGenerationOutputToken[];
-	/**
-	 * Most likely tokens
-	 */
 	top_tokens?: Array<TextGenerationOutputToken[]>;
 	[property: string]: unknown;
 }
-/**
- * The reason why the generation was stopped.
- *
- * length: The generated sequence reached the maximum allowed length
- *
- * eos_token: The model generated an end-of-sentence (EOS) token
- *
- * stop_sequence: One of the sequence in stop_sequences was generated
- */
-export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
-export interface TextGenerationPrefillToken {
 	id: number;
 	logprob: number;
-	/**
-	 * The text associated with that token
-	 */
 	text: string;
 	[property: string]: unknown;
 }
-/**
- * Generated token.
- */
 export interface TextGenerationOutputToken {
 	id: number;
-	logprob?: number;
-	/**
-	 * Whether or not that token is a special one
-	 */
 	special: boolean;
-	/**
-	 * The text associated with that token
-	 */
 	text: string;
 	[property: string]: unknown;
 }
 /**
- * Text Generation Stream Output
  */
 export interface TextGenerationStreamOutput {
-	/**
-	 * Generation details. Only available when the generation is finished.
-	 */
-	details?: TextGenerationStreamDetails;
-	/**
-	 * The complete generated text. Only available when the generation is finished.
-	 */
 	generated_text?: string;
-	/**
-	 * The token index within the stream. Optional to support older clients that omit it.
-	 */
-	index?: number;
-	/**
-	 * Generated token.
-	 */
-	token: TextGenerationOutputToken;
 	[property: string]: unknown;
 }
-/**
- * Generation details. Only available when the generation is finished.
- */
-export interface TextGenerationStreamDetails {
-	/**
-	 * The reason why the generation was stopped.
-	 */
-	finish_reason: TextGenerationFinishReason;
-	/**
-	 * The number of generated tokens
-	 */
 	generated_tokens: number;
-	/**
-	 * The random seed used for generation
-	 */
-	seed: number;
 	[property: string]: unknown;
 }

  */
 /**
+ * Text Generation Input.
+ *
+ * Auto-generated from TGI specs.
+ * For more details, check out
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
  */
 export interface TextGenerationInput {
 	inputs: string;
+	parameters?: TextGenerationInputGenerateParameters;
 	stream?: boolean;
 	[property: string]: unknown;
 }
+export interface TextGenerationInputGenerateParameters {
 	best_of?: number;
 	decoder_input_details?: boolean;
 	details?: boolean;
 	do_sample?: boolean;
+	frequency_penalty?: number;
+	grammar?: TextGenerationInputGrammarType;
 	max_new_tokens?: number;
 	repetition_penalty?: number;
 	return_full_text?: boolean;
 	seed?: number;
+	stop?: string[];
 	temperature?: number;
 	top_k?: number;
+	top_n_tokens?: number;
 	top_p?: number;
 	truncate?: number;
 	typical_p?: number;
+	watermark?: boolean;
+	[property: string]: unknown;
+}
+export interface TextGenerationInputGrammarType {
+	type: Type;
 	/**
+	 * A string that represents a [JSON Schema](https://json-schema.org/).
+	 *
+	 * JSON Schema is a declarative language that allows to annotate JSON documents
+	 * with types and descriptions.
 	 */
+	value: unknown;
 	[property: string]: unknown;
 }
+export type Type = "json" | "regex";
 /**
+ * Text Generation Output.
+ *
+ * Auto-generated from TGI specs.
+ * For more details, check out
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
  */
 export interface TextGenerationOutput {
 	details?: TextGenerationOutputDetails;
 	generated_text: string;
 	[property: string]: unknown;
 }
 export interface TextGenerationOutputDetails {
+	best_of_sequences?: TextGenerationOutputBestOfSequence[];
+	finish_reason: TextGenerationOutputFinishReason;
 	generated_tokens: number;
+	prefill: TextGenerationOutputPrefillToken[];
 	seed?: number;
 	tokens: TextGenerationOutputToken[];
 	top_tokens?: Array<TextGenerationOutputToken[]>;
 	[property: string]: unknown;
 }
+export interface TextGenerationOutputBestOfSequence {
+	finish_reason: TextGenerationOutputFinishReason;
 	generated_text: string;
 	generated_tokens: number;
+	prefill: TextGenerationOutputPrefillToken[];
 	seed?: number;
 	tokens: TextGenerationOutputToken[];
 	top_tokens?: Array<TextGenerationOutputToken[]>;
 	[property: string]: unknown;
 }
+export type TextGenerationOutputFinishReason = "length" | "eos_token" | "stop_sequence";
+export interface TextGenerationOutputPrefillToken {
 	id: number;
 	logprob: number;
 	text: string;
 	[property: string]: unknown;
 }
 export interface TextGenerationOutputToken {
 	id: number;
+	logprob: number;
 	special: boolean;
 	text: string;
 	[property: string]: unknown;
 }
 /**
+ * Text Generation Stream Output.
+ *
+ * Auto-generated from TGI specs.
+ * For more details, check out
+ * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
  */
 export interface TextGenerationStreamOutput {
+	details?: TextGenerationStreamOutputStreamDetails;
 	generated_text?: string;
+	index: number;
+	token: TextGenerationStreamOutputToken;
+	top_tokens?: TextGenerationStreamOutputToken[];
 	[property: string]: unknown;
 }
+export interface TextGenerationStreamOutputStreamDetails {
+	finish_reason: TextGenerationOutputFinishReason;
 	generated_tokens: number;
+	seed?: number;
+	[property: string]: unknown;
+}
+export interface TextGenerationStreamOutputToken {
+	id: number;
+	logprob: number;
+	special: boolean;
+	text: string;
 	[property: string]: unknown;
 }

packages/tasks/src/tasks/text-generation/spec/input.json CHANGED Viewed

@@ -1,94 +1,195 @@
 {
 	"$id": "/inference/schemas/text-generation/input.json",
 	"$schema": "http://json-schema.org/draft-06/schema#",
-	"description": "Inputs for Text Generation inference",
 	"title": "TextGenerationInput",
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The text to initialize generation with",
-			"type": "string"
 		},
 		"parameters": {
-			"description": "Additional inference parameters",
-			"$ref": "#/$defs/TextGenerationParameters"
 		},
 		"stream": {
-			"description": "Whether to stream output tokens",
-			"type": "boolean"
 		}
 	},
 	"$defs": {
-		"TextGenerationParameters": {
-			"title": "TextGenerationParameters",
-			"description": "Additional inference parameters for Text Generation",
 			"type": "object",
 			"properties": {
 				"best_of": {
 					"type": "integer",
-					"description": "The number of sampling queries to run. Only the best one (in terms of total logprob) will be returned."
 				},
 				"decoder_input_details": {
 					"type": "boolean",
-					"description": "Whether or not to output decoder input details"
 				},
 				"details": {
 					"type": "boolean",
-					"description": "Whether or not to output details"
 				},
 				"do_sample": {
 					"type": "boolean",
-					"description": "Whether to use logits sampling instead of greedy decoding when generating new tokens."
 				},
 				"max_new_tokens": {
 					"type": "integer",
-					"description": "The maximum number of tokens to generate."
 				},
 				"repetition_penalty": {
 					"type": "number",
-					"description": "The parameter for repetition penalty. A value of 1.0 means no penalty. See [this paper](https://hf.co/papers/1909.05858) for more details."
 				},
 				"return_full_text": {
 					"type": "boolean",
-					"description": "Whether to prepend the prompt to the generated text."
 				},
 				"seed": {
 					"type": "integer",
-					"description": "The random sampling seed."
 				},
-				"stop_sequences": {
 					"type": "array",
 					"items": {
 						"type": "string"
 					},
-					"description": "Stop generating tokens if a member of `stop_sequences` is generated."
 				},
 				"temperature": {
 					"type": "number",
-					"description": "The value used to modulate the logits distribution."
 				},
 				"top_k": {
 					"type": "integer",
-					"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
 				},
 				"top_p": {
 					"type": "number",
-					"description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
 				},
 				"truncate": {
 					"type": "integer",
-					"description": "Truncate input tokens to the given size."
 				},
 				"typical_p": {
 					"type": "number",
-					"description": "Typical Decoding mass. See [Typical Decoding for Natural Language Generation](https://hf.co/papers/2202.00666) for more information"
 				},
 				"watermark": {
 					"type": "boolean",
-					"description": "Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)"
 				}
-			}
 		}
-	},
-	"required": ["inputs"]
 }

 {
 	"$id": "/inference/schemas/text-generation/input.json",
 	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Text Generation Input.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
 	"title": "TextGenerationInput",
 	"type": "object",
+	"required": ["inputs"],
 	"properties": {
 		"inputs": {
+			"type": "string",
+			"example": "My name is Olivier and I"
 		},
 		"parameters": {
+			"$ref": "#/$defs/TextGenerationInputGenerateParameters"
 		},
 		"stream": {
+			"type": "boolean",
+			"default": "false"
 		}
 	},
 	"$defs": {
+		"TextGenerationInputGenerateParameters": {
 			"type": "object",
 			"properties": {
 				"best_of": {
 					"type": "integer",
+					"default": "null",
+					"example": 1,
+					"nullable": true,
+					"minimum": 0,
+					"exclusiveMinimum": 0
 				},
 				"decoder_input_details": {
 					"type": "boolean",
+					"default": "false"
 				},
 				"details": {
 					"type": "boolean",
+					"default": "true"
 				},
 				"do_sample": {
 					"type": "boolean",
+					"default": "false",
+					"example": true
+				},
+				"frequency_penalty": {
+					"type": "number",
+					"format": "float",
+					"default": "null",
+					"example": 0.1,
+					"nullable": true,
+					"exclusiveMinimum": -2
+				},
+				"grammar": {
+					"allOf": [
+						{
+							"$ref": "#/$defs/TextGenerationInputGrammarType"
+						}
+					],
+					"default": "null",
+					"nullable": true
 				},
 				"max_new_tokens": {
 					"type": "integer",
+					"format": "int32",
+					"default": "100",
+					"example": "20",
+					"nullable": true,
+					"minimum": 0
 				},
 				"repetition_penalty": {
 					"type": "number",
+					"format": "float",
+					"default": "null",
+					"example": 1.03,
+					"nullable": true,
+					"exclusiveMinimum": 0
 				},
 				"return_full_text": {
 					"type": "boolean",
+					"default": "null",
+					"example": false,
+					"nullable": true
 				},
 				"seed": {
 					"type": "integer",
+					"format": "int64",
+					"default": "null",
+					"example": "null",
+					"nullable": true,
+					"minimum": 0,
+					"exclusiveMinimum": 0
 				},
+				"stop": {
 					"type": "array",
 					"items": {
 						"type": "string"
 					},
+					"example": ["photographer"],
+					"maxItems": 4
 				},
 				"temperature": {
 					"type": "number",
+					"format": "float",
+					"default": "null",
+					"example": 0.5,
+					"nullable": true,
+					"exclusiveMinimum": 0
 				},
 				"top_k": {
 					"type": "integer",
+					"format": "int32",
+					"default": "null",
+					"example": 10,
+					"nullable": true,
+					"exclusiveMinimum": 0
+				},
+				"top_n_tokens": {
+					"type": "integer",
+					"format": "int32",
+					"default": "null",
+					"example": 5,
+					"nullable": true,
+					"minimum": 0,
+					"exclusiveMinimum": 0
 				},
 				"top_p": {
 					"type": "number",
+					"format": "float",
+					"default": "null",
+					"example": 0.95,
+					"nullable": true,
+					"maximum": 1,
+					"exclusiveMinimum": 0
 				},
 				"truncate": {
 					"type": "integer",
+					"default": "null",
+					"example": "null",
+					"nullable": true,
+					"minimum": 0
 				},
 				"typical_p": {
 					"type": "number",
+					"format": "float",
+					"default": "null",
+					"example": 0.95,
+					"nullable": true,
+					"maximum": 1,
+					"exclusiveMinimum": 0
 				},
 				"watermark": {
 					"type": "boolean",
+					"default": "false",
+					"example": true
 				}
+			},
+			"title": "TextGenerationInputGenerateParameters"
+		},
+		"TextGenerationInputGrammarType": {
+			"oneOf": [
+				{
+					"type": "object",
+					"required": ["type", "value"],
+					"properties": {
+						"type": {
+							"type": "string",
+							"enum": ["json"]
+						},
+						"value": {
+							"description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions."
+						}
+					}
+				},
+				{
+					"type": "object",
+					"required": ["type", "value"],
+					"properties": {
+						"type": {
+							"type": "string",
+							"enum": ["regex"]
+						},
+						"value": {
+							"type": "string"
+						}
+					}
+				}
+			],
+			"discriminator": {
+				"propertyName": "type"
+			},
+			"title": "TextGenerationInputGrammarType"
 		}
+	}
 }

packages/tasks/src/tasks/text-generation/spec/output.json CHANGED Viewed

@@ -1,165 +1,179 @@
 {
 	"$id": "/inference/schemas/text-generation/output.json",
 	"$schema": "http://json-schema.org/draft-06/schema#",
-	"description": "Outputs for Text Generation inference",
 	"title": "TextGenerationOutput",
 	"type": "object",
 	"properties": {
 		"generated_text": {
 			"type": "string",
-			"description": "The generated text"
-		},
-		"details": {
-			"$ref": "#/$defs/Details",
-			"description": "When enabled, details about the generation"
 		}
 	},
-	"required": ["generated_text"],
 	"$defs": {
-		"FinishReason": {
-			"type": "string",
-			"title": "TextGenerationFinishReason",
-			"description": "The reason why the generation was stopped.",
-			"oneOf": [
-				{ "const": "length", "description": "length: The generated sequence reached the maximum allowed length" },
-				{ "const": "eos_token", "description": "eos_token: The model generated an end-of-sentence (EOS) token" },
-				{
-					"const": "stop_sequence",
-					"description": "stop_sequence: One of the sequence in stop_sequences was generated"
-				}
-			]
-		},
-		"PrefillToken": {
-			"title": "TextGenerationPrefillToken",
 			"type": "object",
 			"properties": {
-				"id": {
-					"type": "integer"
-				},
-				"logprob": {
-					"type": "number"
-				},
-				"text": {
-					"type": "string",
-					"description": "The text associated with that token"
-				}
-			},
-			"required": ["id", "logprob", "text"]
-		},
-		"Token": {
-			"type": "object",
-			"title": "TextGenerationOutputToken",
-			"properties": {
-				"id": {
-					"type": "integer"
-				},
-				"logprob": {
-					"type": "number"
-				},
-				"special": {
-					"type": "boolean",
-					"description": "Whether or not that token is a special one"
 				},
-				"text": {
-					"type": "string",
-					"description": "The text associated with that token"
-				}
-			},
-			"required": ["id", "special", "text"]
-		},
-		"Details": {
-			"type": "object",
-			"title": "TextGenerationOutputDetails",
-			"properties": {
 				"finish_reason": {
-					"$ref": "#/$defs/FinishReason",
-					"description": "The reason why the generation was stopped."
 				},
 				"generated_tokens": {
 					"type": "integer",
-					"description": "The number of generated tokens"
 				},
 				"prefill": {
 					"type": "array",
 					"items": {
-						"$ref": "#/$defs/PrefillToken"
 					}
 				},
 				"seed": {
 					"type": "integer",
-					"description": "The random seed used for generation"
 				},
 				"tokens": {
 					"type": "array",
-					"description": "The generated tokens and associated details",
 					"items": {
-						"$ref": "#/$defs/Token"
 					}
 				},
 				"top_tokens": {
 					"type": "array",
-					"description": "Most likely tokens",
 					"items": {
 						"type": "array",
 						"items": {
-							"$ref": "#/$defs/Token"
 						}
 					}
-				},
-				"best_of_sequences": {
-					"type": "array",
-					"description": "Details about additional sequences when best_of is provided",
-					"items": {
-						"$ref": "#/$defs/SequenceDetails"
-					}
 				}
 			},
-			"required": ["finish_reason", "generated_tokens", "prefill", "tokens"]
 		},
-		"SequenceDetails": {
 			"type": "object",
-			"title": "TextGenerationOutputSequenceDetails",
 			"properties": {
 				"generated_text": {
 					"type": "string",
-					"description": "The generated text"
-				},
-				"finish_reason": {
-					"$ref": "#/$defs/FinishReason"
 				},
 				"generated_tokens": {
 					"type": "integer",
-					"description": "The number of generated tokens"
 				},
 				"prefill": {
 					"type": "array",
 					"items": {
-						"$ref": "#/$defs/PrefillToken"
 					}
 				},
 				"seed": {
 					"type": "integer",
-					"description": "The random seed used for generation"
 				},
 				"tokens": {
 					"type": "array",
-					"description": "The generated tokens and associated details",
 					"items": {
-						"$ref": "#/$defs/Token"
 					}
 				},
 				"top_tokens": {
 					"type": "array",
-					"description": "Most likely tokens",
 					"items": {
 						"type": "array",
 						"items": {
-							"$ref": "#/$defs/Token"
 						}
 					}
 				}
 			},
-			"required": ["generated_text", "finish_reason", "generated_tokens", "prefill", "tokens"]
 		}
 	}
 }

 {
 	"$id": "/inference/schemas/text-generation/output.json",
 	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Text Generation Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
 	"title": "TextGenerationOutput",
 	"type": "object",
+	"required": ["generated_text"],
 	"properties": {
+		"details": {
+			"allOf": [
+				{
+					"$ref": "#/$defs/TextGenerationOutputDetails"
+				}
+			],
+			"nullable": true
+		},
 		"generated_text": {
 			"type": "string",
+			"example": "test"
 		}
 	},
 	"$defs": {
+		"TextGenerationOutputDetails": {
 			"type": "object",
+			"required": ["finish_reason", "generated_tokens", "prefill", "tokens"],
 			"properties": {
+				"best_of_sequences": {
+					"type": "array",
+					"items": {
+						"$ref": "#/$defs/TextGenerationOutputBestOfSequence"
+					},
+					"nullable": true
 				},
 				"finish_reason": {
+					"$ref": "#/$defs/TextGenerationOutputFinishReason"
 				},
 				"generated_tokens": {
 					"type": "integer",
+					"format": "int32",
+					"example": 1,
+					"minimum": 0
 				},
 				"prefill": {
 					"type": "array",
 					"items": {
+						"$ref": "#/$defs/TextGenerationOutputPrefillToken"
 					}
 				},
 				"seed": {
 					"type": "integer",
+					"format": "int64",
+					"example": 42,
+					"nullable": true,
+					"minimum": 0
 				},
 				"tokens": {
 					"type": "array",
 					"items": {
+						"$ref": "#/$defs/TextGenerationOutputToken"
 					}
 				},
 				"top_tokens": {
 					"type": "array",
 					"items": {
 						"type": "array",
 						"items": {
+							"$ref": "#/$defs/TextGenerationOutputToken"
 						}
 					}
 				}
 			},
+			"title": "TextGenerationOutputDetails"
 		},
+		"TextGenerationOutputBestOfSequence": {
 			"type": "object",
+			"required": ["generated_text", "finish_reason", "generated_tokens", "prefill", "tokens"],
 			"properties": {
+				"finish_reason": {
+					"$ref": "#/$defs/TextGenerationOutputFinishReason"
+				},
 				"generated_text": {
 					"type": "string",
+					"example": "test"
 				},
 				"generated_tokens": {
 					"type": "integer",
+					"format": "int32",
+					"example": 1,
+					"minimum": 0
 				},
 				"prefill": {
 					"type": "array",
 					"items": {
+						"$ref": "#/$defs/TextGenerationOutputPrefillToken"
 					}
 				},
 				"seed": {
 					"type": "integer",
+					"format": "int64",
+					"example": 42,
+					"nullable": true,
+					"minimum": 0
 				},
 				"tokens": {
 					"type": "array",
 					"items": {
+						"$ref": "#/$defs/TextGenerationOutputToken"
 					}
 				},
 				"top_tokens": {
 					"type": "array",
 					"items": {
 						"type": "array",
 						"items": {
+							"$ref": "#/$defs/TextGenerationOutputToken"
 						}
 					}
 				}
 			},
+			"title": "TextGenerationOutputBestOfSequence"
+		},
+		"TextGenerationOutputFinishReason": {
+			"type": "string",
+			"enum": ["length", "eos_token", "stop_sequence"],
+			"example": "Length",
+			"title": "TextGenerationOutputFinishReason"
+		},
+		"TextGenerationOutputPrefillToken": {
+			"type": "object",
+			"required": ["id", "text", "logprob"],
+			"properties": {
+				"id": {
+					"type": "integer",
+					"format": "int32",
+					"example": 0,
+					"minimum": 0
+				},
+				"logprob": {
+					"type": "number",
+					"format": "float",
+					"example": -0.34,
+					"nullable": true
+				},
+				"text": {
+					"type": "string",
+					"example": "test"
+				}
+			},
+			"title": "TextGenerationOutputPrefillToken"
+		},
+		"TextGenerationOutputToken": {
+			"type": "object",
+			"required": ["id", "text", "logprob", "special"],
+			"properties": {
+				"id": {
+					"type": "integer",
+					"format": "int32",
+					"example": 0,
+					"minimum": 0
+				},
+				"logprob": {
+					"type": "number",
+					"format": "float",
+					"example": -0.34,
+					"nullable": true
+				},
+				"special": {
+					"type": "boolean",
+					"example": "false"
+				},
+				"text": {
+					"type": "string",
+					"example": "test"
+				}
+			},
+			"title": "TextGenerationOutputToken"
 		}
 	}
 }

packages/tasks/src/tasks/text-generation/spec/output_stream.json DELETED Viewed

@@ -1,47 +0,0 @@
-{
-	"$id": "/inference/schemas/text-generation/output.json",
-	"$schema": "http://json-schema.org/draft-06/schema#",
-	"description": "Text Generation Stream Output",
-	"title": "TextGenerationStreamOutput",
-	"type": "object",
-	"properties": {
-		"token": {
-			"$ref": "#/$defs/Token",
-			"description": "Generated token."
-		},
-		"index": {
-			"type": "integer",
-			"description": "The token index within the stream. Optional to support older clients that omit it."
-		},
-		"generated_text": {
-			"type": "string",
-			"description": "The complete generated text. Only available when the generation is finished."
-		},
-		"details": {
-			"$ref": "#/$defs/StreamDetails",
-			"description": "Generation details. Only available when the generation is finished."
-		}
-	},
-	"required": ["token"],
-	"$defs": {
-		"StreamDetails": {
-			"type": "object",
-			"title": "TextGenerationStreamDetails",
-			"properties": {
-				"finish_reason": {
-					"$ref": "#/$defs/FinishReason",
-					"description": "The reason why the generation was stopped."
-				},
-				"generated_tokens": {
-					"type": "integer",
-					"description": "The number of generated tokens"
-				},
-				"seed": {
-					"type": "integer",
-					"description": "The random seed used for generation"
-				}
-			},
-			"required": ["finish_reason", "generated_tokens", "seed"]
-		}
-	}
-}

packages/tasks/src/tasks/text-generation/spec/stream_output.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+	"$id": "/inference/schemas/text-generation/stream_output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Text Generation Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
+	"title": "TextGenerationStreamOutput",
+	"type": "object",
+	"required": ["index", "token"],
+	"properties": {
+		"details": {
+			"allOf": [
+				{
+					"$ref": "#/$defs/TextGenerationStreamOutputStreamDetails"
+				}
+			],
+			"default": "null",
+			"nullable": true
+		},
+		"generated_text": {
+			"type": "string",
+			"default": "null",
+			"example": "test",
+			"nullable": true
+		},
+		"index": {
+			"type": "integer",
+			"format": "int32",
+			"minimum": 0
+		},
+		"token": {
+			"$ref": "#/$defs/TextGenerationStreamOutputToken"
+		},
+		"top_tokens": {
+			"type": "array",
+			"items": {
+				"$ref": "#/$defs/TextGenerationStreamOutputToken"
+			}
+		}
+	},
+	"$defs": {
+		"TextGenerationStreamOutputStreamDetails": {
+			"type": "object",
+			"required": ["finish_reason", "generated_tokens"],
+			"properties": {
+				"finish_reason": {
+					"$ref": "#/$defs/TextGenerationStreamOutputFinishReason"
+				},
+				"generated_tokens": {
+					"type": "integer",
+					"format": "int32",
+					"example": 1,
+					"minimum": 0
+				},
+				"seed": {
+					"type": "integer",
+					"format": "int64",
+					"example": 42,
+					"nullable": true,
+					"minimum": 0
+				}
+			},
+			"title": "TextGenerationStreamOutputStreamDetails"
+		},
+		"TextGenerationStreamOutputFinishReason": {
+			"type": "string",
+			"enum": ["length", "eos_token", "stop_sequence"],
+			"example": "Length",
+			"title": "TextGenerationStreamOutputFinishReason"
+		},
+		"TextGenerationStreamOutputToken": {
+			"type": "object",
+			"required": ["id", "text", "logprob", "special"],
+			"properties": {
+				"id": {
+					"type": "integer",
+					"format": "int32",
+					"example": 0,
+					"minimum": 0
+				},
+				"logprob": {
+					"type": "number",
+					"format": "float",
+					"example": -0.34,
+					"nullable": true
+				},
+				"special": {
+					"type": "boolean",
+					"example": "false"
+				},
+				"text": {
+					"type": "string",
+					"example": "test"
+				}
+			},
+			"title": "TextGenerationStreamOutputToken"
+		}
+	}
+}