diff --git a/packages/tasks/scripts/inference-codegen.ts b/packages/tasks/scripts/inference-codegen.ts index a041821689aa3c7922a9976ff95bdfc3f16c79d3..37b389efbaa35ce6dc7c471b43d88b60f370f90f 100644 --- a/packages/tasks/scripts/inference-codegen.ts +++ b/packages/tasks/scripts/inference-codegen.ts @@ -57,7 +57,7 @@ async function generateTypescript(inputData: InputData): Promise1000). If provided, the inference will * skip the OCR step and use the provided bounding boxes instead. */ - wordBoxes?: WordBox[]; + word_boxes?: WordBox[]; [property: string]: unknown; } export type WordBox = number[] | string; @@ -88,11 +88,19 @@ export interface DocumentQuestionAnsweringOutputElement { * The answer to the question. */ answer: string; + /** + * The end word index of the answer (in the OCR’d version of the input or provided word + * boxes). + */ end: number; /** * The probability associated to the answer. */ score: number; + /** + * The start word index of the answer (in the OCR’d version of the input or provided word + * boxes). + */ start: number; /** * The index of each word/box pair that is in the answer diff --git a/packages/tasks/src/tasks/document-question-answering/spec/input.json b/packages/tasks/src/tasks/document-question-answering/spec/input.json index 12d38ec81a820dc925fc65b12cf8c61b7540653f..b017ce469be82c2f587da76b162a5494423bd468 100644 --- a/packages/tasks/src/tasks/document-question-answering/spec/input.json +++ b/packages/tasks/src/tasks/document-question-answering/spec/input.json @@ -31,11 +31,11 @@ "description": "Additional inference parameters for Document Question Answering", "type": "object", "properties": { - "docStride": { + "doc_stride": { "type": "integer", "description": "If the words in the document are too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap." }, - "handleImpossibleAnswer": { + "handle_impossible_answer": { "type": "boolean", "description": "Whether to accept impossible as an answer" }, @@ -43,23 +43,23 @@ "type": "string", "description": "Language to use while running OCR. Defaults to english." }, - "maxAnswerLen": { + "max_answer_len": { "type": "integer", "description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)." }, - "maxSeqLen": { + "max_seq_len": { "type": "integer", "description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using doc_stride as overlap) if needed." }, - "maxQuestionLen": { + "max_question_len": { "type": "integer", "description": "The maximum length of the question after tokenization. It will be truncated if needed." }, - "topK": { + "top_k": { "type": "integer", "description": "The number of answers to return (will be chosen by order of likelihood). Can return less than top_k answers if there are not enough options available within the context." }, - "wordBoxes": { + "word_boxes": { "type": "array", "description": "A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR step and use the provided bounding boxes instead.", "items": { diff --git a/packages/tasks/src/tasks/document-question-answering/spec/output.json b/packages/tasks/src/tasks/document-question-answering/spec/output.json index 9f69584ae89696ca2d94b55ae60029ae868b8fb6..4fda3771a6c7fee0e09eff8dab47e3df6a6da823 100644 --- a/packages/tasks/src/tasks/document-question-answering/spec/output.json +++ b/packages/tasks/src/tasks/document-question-answering/spec/output.json @@ -17,11 +17,11 @@ }, "start": { "type": "integer", - "descrtiption": "The start word index of the answer (in the OCR’d version of the input or provided word boxes)." + "description": "The start word index of the answer (in the OCR\u2019d version of the input or provided word boxes)." }, "end": { "type": "integer", - "descrtiption": "The end word index of the answer (in the OCR’d version of the input or provided word boxes)." + "description": "The end word index of the answer (in the OCR\u2019d version of the input or provided word boxes)." }, "words": { "type": "array", diff --git a/packages/tasks/src/tasks/fill-mask/inference.ts b/packages/tasks/src/tasks/fill-mask/inference.ts index 8784e979637aa00e022e928d1bf851c4b8835797..4d78ecd814ee6d4a46c29996f1cb5e15f6da119c 100644 --- a/packages/tasks/src/tasks/fill-mask/inference.ts +++ b/packages/tasks/src/tasks/fill-mask/inference.ts @@ -33,7 +33,7 @@ export interface FillMaskParameters { /** * When passed, overrides the number of predictions to return. */ - topK?: number; + top_k?: number; [property: string]: unknown; } export type FillMaskOutput = FillMaskOutputElement[]; @@ -53,9 +53,10 @@ export interface FillMaskOutputElement { * The predicted token id (to replace the masked one). */ token: number; + tokenStr: unknown; /** * The predicted token (to replace the masked one). */ - tokenStr: string; + token_str?: string; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/fill-mask/spec/input.json b/packages/tasks/src/tasks/fill-mask/spec/input.json index 009baf364b5462a3fc49d1302711aa06008073e9..cd3271e4a35d910c12cfcba80f380c2a84a80a8c 100644 --- a/packages/tasks/src/tasks/fill-mask/spec/input.json +++ b/packages/tasks/src/tasks/fill-mask/spec/input.json @@ -20,7 +20,7 @@ "description": "Additional inference parameters for Fill Mask", "type": "object", "properties": { - "topK": { + "top_k": { "type": "integer", "description": "When passed, overrides the number of predictions to return." }, diff --git a/packages/tasks/src/tasks/fill-mask/spec/output.json b/packages/tasks/src/tasks/fill-mask/spec/output.json index f8e91aeeaa0871f7f498fd627608ecc80f687f68..0b613382e781cf0405c76df5c1f9f5091da6b196 100644 --- a/packages/tasks/src/tasks/fill-mask/spec/output.json +++ b/packages/tasks/src/tasks/fill-mask/spec/output.json @@ -19,7 +19,7 @@ "type": "integer", "description": "The predicted token id (to replace the masked one)." }, - "tokenStr": { + "token_str": { "type": "string", "description": "The predicted token (to replace the masked one)." } diff --git a/packages/tasks/src/tasks/image-classification/inference.ts b/packages/tasks/src/tasks/image-classification/inference.ts index 92ca03e0d81456ca7a7ef585d27c1f0572ec0890..e0689d887fd9248237845eac5aaa6658dd3f4019 100644 --- a/packages/tasks/src/tasks/image-classification/inference.ts +++ b/packages/tasks/src/tasks/image-classification/inference.ts @@ -23,11 +23,11 @@ export interface ImageClassificationInput { * Additional inference parameters for Image Classification */ export interface ImageClassificationParameters { - functionToApply?: ClassificationOutputTransform; + function_to_apply?: ClassificationOutputTransform; /** * When specified, limits the output to the top K most probable classes. */ - topK?: number; + top_k?: number; [property: string]: unknown; } /** diff --git a/packages/tasks/src/tasks/image-classification/spec/input.json b/packages/tasks/src/tasks/image-classification/spec/input.json index 8c2c2fcc709a851dcf2401e3a93a5742e6281816..a8cd4273cc8c311b12857d9104d2814f7cf4179e 100644 --- a/packages/tasks/src/tasks/image-classification/spec/input.json +++ b/packages/tasks/src/tasks/image-classification/spec/input.json @@ -19,11 +19,11 @@ "description": "Additional inference parameters for Image Classification", "type": "object", "properties": { - "functionToApply": { + "function_to_apply": { "title": "ImageClassificationOutputTransform", "$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform" }, - "topK": { + "top_k": { "type": "integer", "description": "When specified, limits the output to the top K most probable classes." } diff --git a/packages/tasks/src/tasks/image-segmentation/inference.ts b/packages/tasks/src/tasks/image-segmentation/inference.ts index 7d552b66e5ed0c188c59f1190d853e4b366fc6a0..02db5cb90f115e8c6b198eac0ac1aa616539344b 100644 --- a/packages/tasks/src/tasks/image-segmentation/inference.ts +++ b/packages/tasks/src/tasks/image-segmentation/inference.ts @@ -26,11 +26,11 @@ export interface ImageSegmentationParameters { /** * Threshold to use when turning the predicted masks into binary values. */ - maskThreshold?: number; + mask_threshold?: number; /** * Mask overlap threshold to eliminate small, disconnected segments. */ - overlapMaskAreaThreshold?: number; + overlap_mask_area_threshold?: number; /** * Segmentation task to be performed, depending on model capabilities. */ diff --git a/packages/tasks/src/tasks/image-segmentation/spec/input.json b/packages/tasks/src/tasks/image-segmentation/spec/input.json index 169036406c40b22af16a779b3a94d58fd026f85c..500793554146810f1aa1e30adf221a5d10506b50 100644 --- a/packages/tasks/src/tasks/image-segmentation/spec/input.json +++ b/packages/tasks/src/tasks/image-segmentation/spec/input.json @@ -19,11 +19,11 @@ "description": "Additional inference parameters for Image Segmentation", "type": "object", "properties": { - "maskThreshold": { + "mask_threshold": { "type": "number", "description": "Threshold to use when turning the predicted masks into binary values." }, - "overlapMaskAreaThreshold": { + "overlap_mask_area_threshold": { "type": "number", "description": "Mask overlap threshold to eliminate small, disconnected segments." }, diff --git a/packages/tasks/src/tasks/image-to-image/inference.ts b/packages/tasks/src/tasks/image-to-image/inference.ts index f2d3064c800ff7ea79b19e02d4fcd0c985c644eb..bf732e07018c5ab5d1e9bd0eb3f1212e7943dd36 100644 --- a/packages/tasks/src/tasks/image-to-image/inference.ts +++ b/packages/tasks/src/tasks/image-to-image/inference.ts @@ -29,20 +29,20 @@ export interface ImageToImageParameters { * For diffusion models. A higher guidance scale value encourages the model to generate * images closely linked to the text prompt at the expense of lower image quality. */ - guidanceScale?: number; + guidance_scale?: number; /** * One or several prompt to guide what NOT to include in image generation. */ - negativePrompt?: string[]; + negative_prompt?: string[]; /** * For diffusion models. The number of denoising steps. More denoising steps usually lead to * a higher quality image at the expense of slower inference. */ - numInferenceSteps?: number; + num_inference_steps?: number; /** * The size in pixel of the output image */ - targetSize?: TargetSize; + target_size?: TargetSize; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/image-to-image/spec/input.json b/packages/tasks/src/tasks/image-to-image/spec/input.json index ac6d7aed65d43b1cbaa75188c565740928e85fcf..873e1f20d956f5cb40802589be3d2a8972bd2abc 100644 --- a/packages/tasks/src/tasks/image-to-image/spec/input.json +++ b/packages/tasks/src/tasks/image-to-image/spec/input.json @@ -19,22 +19,22 @@ "description": "Additional inference parameters for Image To Image", "type": "object", "properties": { - "guidanceScale": { + "guidance_scale": { "type": "number", "description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality." }, - "negativePrompt": { + "negative_prompt": { "type": "array", "items": { "type": "string" }, "description": "One or several prompt to guide what NOT to include in image generation." }, - "numInferenceSteps": { + "num_inference_steps": { "type": "integer", "description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference." }, - "targetSize": { + "target_size": { "type": "object", "description": "The size in pixel of the output image", "properties": { diff --git a/packages/tasks/src/tasks/image-to-text/inference.ts b/packages/tasks/src/tasks/image-to-text/inference.ts index 84dc7b80f0a1d2959dfb8d2918125e712d789b1e..7cace215832b47d1727b0db82d85e7322f5cbf03 100644 --- a/packages/tasks/src/tasks/image-to-text/inference.ts +++ b/packages/tasks/src/tasks/image-to-text/inference.ts @@ -3,6 +3,7 @@ * * Using src/scripts/inference-codegen */ + /** * Inputs for Image To Text inference */ @@ -17,6 +18,7 @@ export interface ImageToTextInput { parameters?: ImageToTextParameters; [property: string]: unknown; } + /** * Additional inference parameters * @@ -30,9 +32,10 @@ export interface ImageToTextParameters { /** * The amount of maximum tokens to generate. */ - maxNewTokens?: number; + max_new_tokens?: number; [property: string]: unknown; } + /** * Parametrization of the text generation process * @@ -42,18 +45,18 @@ export interface GenerationParameters { /** * Whether to use sampling instead of greedy decoding when generating new tokens. */ - doSample?: boolean; + do_sample?: boolean; /** * Controls the stopping condition for beam-based methods. */ - earlyStopping?: EarlyStoppingUnion; + early_stopping?: EarlyStoppingUnion; /** * If set to float strictly between 0 and 1, only tokens with a conditional probability * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language * Model Desmoothing](https://hf.co/papers/2210.15191) for more details. */ - epsilonCutoff?: number; + epsilon_cutoff?: number; /** * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to * float strictly between 0 and 1, a token is only considered if it is greater than either @@ -63,37 +66,37 @@ export interface GenerationParameters { * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) * for more details. */ - etaCutoff?: number; + eta_cutoff?: number; /** * The maximum length (in tokens) of the generated text, including the input. */ - maxLength?: number; + max_length?: number; /** * The maximum number of tokens to generate. Takes precedence over maxLength. */ - maxNewTokens?: number; + max_new_tokens?: number; /** * The minimum length (in tokens) of the generated text, including the input. */ - minLength?: number; + min_length?: number; /** * The minimum number of tokens to generate. Takes precedence over maxLength. */ - minNewTokens?: number; + min_new_tokens?: number; /** * Number of groups to divide num_beams into in order to ensure diversity among different * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details. */ - numBeamGroups?: number; + num_beam_groups?: number; /** * Number of beams to use for beam search. */ - numBeams?: number; + num_beams?: number; /** * The value balances the model confidence and the degeneration penalty in contrastive * search decoding. */ - penaltyAlpha?: number; + penalty_alpha?: number; /** * The value used to modulate the next token probabilities. */ @@ -101,12 +104,12 @@ export interface GenerationParameters { /** * The number of highest probability vocabulary tokens to keep for top-k-filtering. */ - topK?: number; + top_k?: number; /** * If set to float < 1, only the smallest set of most probable tokens with probabilities * that add up to top_p or higher are kept for generation. */ - topP?: number; + top_p?: number; /** * Local typicality measures how similar the conditional probability of predicting a target * token next is to the expected conditional probability of predicting a random token next, @@ -114,25 +117,27 @@ export interface GenerationParameters { * most locally typical tokens with probabilities that add up to typical_p or higher are * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details. */ - typicalP?: number; + typical_p?: number; /** * Whether the model should use the past last key/values attentions to speed up decoding */ - useCache?: boolean; + use_cache?: boolean; [property: string]: unknown; } + /** * Controls the stopping condition for beam-based methods. */ export type EarlyStoppingUnion = boolean | "never"; -export type ImageToTextOutput = ImageToTextOutputElement[]; + /** * Outputs of inference for the Image To Text task */ -export interface ImageToTextOutputElement { +export interface ImageToTextOutput { + generatedText: unknown; /** * The generated text. */ - generatedText: string; + generated_text?: string; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/image-to-text/spec/input.json b/packages/tasks/src/tasks/image-to-text/spec/input.json index d3c367b951beaa5b4c54a22af874b7a8b51bbc52..dec832a48f604d66b3af6541a37f130101078bf9 100644 --- a/packages/tasks/src/tasks/image-to-text/spec/input.json +++ b/packages/tasks/src/tasks/image-to-text/spec/input.json @@ -19,7 +19,7 @@ "description": "Additional inference parameters for Image To Text", "type": "object", "properties": { - "maxNewTokens": { + "max_new_tokens": { "type": "integer", "description": "The amount of maximum tokens to generate." }, diff --git a/packages/tasks/src/tasks/image-to-text/spec/output.json b/packages/tasks/src/tasks/image-to-text/spec/output.json index e3283e34f7c71a3165e4bce52e9c5d51ccf7f810..388c3456f4e7f50b0c7b133725a2d951f152cb01 100644 --- a/packages/tasks/src/tasks/image-to-text/spec/output.json +++ b/packages/tasks/src/tasks/image-to-text/spec/output.json @@ -3,15 +3,12 @@ "$schema": "http://json-schema.org/draft-06/schema#", "description": "Outputs of inference for the Image To Text task", "title": "ImageToTextOutput", - "type": "array", - "items": { - "type": "object", - "properties": { - "generatedText": { - "type": "string", - "description": "The generated text." - } - }, - "required": ["generatedText"] - } + "type": "object", + "properties": { + "generated_text": { + "type": "string", + "description": "The generated text." + } + }, + "required": ["generatedText"] } diff --git a/packages/tasks/src/tasks/placeholder/spec/input.json b/packages/tasks/src/tasks/placeholder/spec/input.json index aded2e46a6d155eaed9f222cd57cef96a55742a6..d31f4aac619900220d154523a6c5abc4b37f10c1 100644 --- a/packages/tasks/src/tasks/placeholder/spec/input.json +++ b/packages/tasks/src/tasks/placeholder/spec/input.json @@ -20,11 +20,11 @@ "description": "TODO: describe additional parameters here.", "type": "object", "properties": { - "dummyParameterName": { + "dummy_parameter_name": { "type": "boolean", "description": "TODO: describe the parameter here" }, - "dummyParameterName2": { + "dummy_parameter_name2": { "type": "integer", "description": "TODO: describe the parameter here" } diff --git a/packages/tasks/src/tasks/placeholder/spec/output.json b/packages/tasks/src/tasks/placeholder/spec/output.json index 8e3e132941936718b08c0cbcd961fcc277e57a38..697c6e2672a45f10abc4ba5554e38e7352bb807d 100644 --- a/packages/tasks/src/tasks/placeholder/spec/output.json +++ b/packages/tasks/src/tasks/placeholder/spec/output.json @@ -7,7 +7,7 @@ "items": { "type": "object", "properties": { - "meaningfulOutputName": { + "meaningful_output_name": { "type": "string", "description": "TODO: Describe what is outputed by the inference here" } diff --git a/packages/tasks/src/tasks/question-answering/inference.ts b/packages/tasks/src/tasks/question-answering/inference.ts index e2aa0088a743e03344aff2be07902a752b1bc96f..eaef8dfe3170ec4f790bdcdaaa0a07dd8aae7d76 100644 --- a/packages/tasks/src/tasks/question-answering/inference.ts +++ b/packages/tasks/src/tasks/question-answering/inference.ts @@ -41,37 +41,37 @@ export interface QuestionAnsweringParameters { * Attempts to align the answer to real words. Improves quality on space separated * languages. Might hurt on non-space-separated languages (like Japanese or Chinese) */ - alignToWords?: boolean; + align_to_words?: boolean; /** * If the context is too long to fit with the question for the model, it will be split in * several chunks with some overlap. This argument controls the size of that overlap. */ - docStride?: number; + doc_stride?: number; /** * Whether to accept impossible as an answer. */ - handleImpossibleAnswer?: boolean; + handle_impossible_answer?: boolean; /** * The maximum length of predicted answers (e.g., only answers with a shorter length are * considered). */ - maxAnswerLen?: number; + max_answer_len?: number; /** * The maximum length of the question after tokenization. It will be truncated if needed. */ - maxQuestionLen?: number; + max_question_len?: number; /** * The maximum length of the total sentence (context + question) in tokens of each chunk * passed to the model. The context will be split in several chunks (using docStride as * overlap) if needed. */ - maxSeqLen?: number; + max_seq_len?: number; /** * The number of answers to return (will be chosen by order of likelihood). Note that we * return less than topk answers if there are not enough options available within the * context. */ - topK?: number; + top_k?: number; [property: string]: unknown; } export type QuestionAnsweringOutput = QuestionAnsweringOutputElement[]; diff --git a/packages/tasks/src/tasks/question-answering/spec/input.json b/packages/tasks/src/tasks/question-answering/spec/input.json index 62f36ebc99d6a6a11e661b379c23db130337bcc3..70d5607cffcb93e728a987ca0da384c2c813dc21 100644 --- a/packages/tasks/src/tasks/question-answering/spec/input.json +++ b/packages/tasks/src/tasks/question-answering/spec/input.json @@ -32,31 +32,31 @@ "description": "Additional inference parameters for Question Answering", "type": "object", "properties": { - "topK": { + "top_k": { "type": "integer", "description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context." }, - "docStride": { + "doc_stride": { "type": "integer", "description": "If the context is too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap." }, - "maxAnswerLen": { + "max_answer_len": { "type": "integer", "description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)." }, - "maxSeqLen": { + "max_seq_len": { "type": "integer", "description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using docStride as overlap) if needed." }, - "maxQuestionLen": { + "max_question_len": { "type": "integer", "description": "The maximum length of the question after tokenization. It will be truncated if needed." }, - "handleImpossibleAnswer": { + "handle_impossible_answer": { "type": "boolean", "description": "Whether to accept impossible as an answer." }, - "alignToWords": { + "align_to_words": { "type": "boolean", "description": "Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt on non-space-separated languages (like Japanese or Chinese)" } diff --git a/packages/tasks/src/tasks/summarization/inference.ts b/packages/tasks/src/tasks/summarization/inference.ts index 5cc2bb0d2b750ce4fd1e2ebbd24fe2e3e8da9f75..a73a7098572b836aef9194a1d87c5393c5805249 100644 --- a/packages/tasks/src/tasks/summarization/inference.ts +++ b/packages/tasks/src/tasks/summarization/inference.ts @@ -30,11 +30,11 @@ export interface Text2TextGenerationParameters { /** * Whether to clean up the potential extra spaces in the text output. */ - cleanUpTokenizationSpaces?: boolean; + clean_up_tokenization_spaces?: boolean; /** * Additional parametrization of the text generation algorithm */ - generateParameters?: { [key: string]: unknown }; + generate_parameters?: { [key: string]: unknown }; /** * The truncation strategy to use */ @@ -50,9 +50,10 @@ export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest * Outputs of inference for the Text2text Generation task */ export interface SummarizationOutput { + generatedText: unknown; /** * The generated text. */ - generatedText: string; + generated_text?: string; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/text-classification/inference.ts b/packages/tasks/src/tasks/text-classification/inference.ts index 2272d903b95c8d84dd36d5b0851196d6bb9ef857..dc913690203f4f3b64d1606f4d11aaa254b8013d 100644 --- a/packages/tasks/src/tasks/text-classification/inference.ts +++ b/packages/tasks/src/tasks/text-classification/inference.ts @@ -23,11 +23,11 @@ export interface TextClassificationInput { * Additional inference parameters for Text Classification */ export interface TextClassificationParameters { - functionToApply?: ClassificationOutputTransform; + function_to_apply?: ClassificationOutputTransform; /** * When specified, limits the output to the top K most probable classes. */ - topK?: number; + top_k?: number; [property: string]: unknown; } /** diff --git a/packages/tasks/src/tasks/text-classification/spec/input.json b/packages/tasks/src/tasks/text-classification/spec/input.json index 10b98cbba3af88698a7979b5890925d0261390e9..3bfdeaf6b905d957e5241674b7ac3d3eb1c6438a 100644 --- a/packages/tasks/src/tasks/text-classification/spec/input.json +++ b/packages/tasks/src/tasks/text-classification/spec/input.json @@ -20,11 +20,11 @@ "description": "Additional inference parameters for Text Classification", "type": "object", "properties": { - "functionToApply": { + "function_to_apply": { "title": "TextClassificationOutputTransform", "$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform" }, - "topK": { + "top_k": { "type": "integer", "description": "When specified, limits the output to the top K most probable classes." } diff --git a/packages/tasks/src/tasks/text-generation/inference.ts b/packages/tasks/src/tasks/text-generation/inference.ts index 52adc0ace30af542a30f323cf764127c34071fb2..0f0d1e8d754644ac170abe168a6498130f6ac10f 100644 --- a/packages/tasks/src/tasks/text-generation/inference.ts +++ b/packages/tasks/src/tasks/text-generation/inference.ts @@ -3,6 +3,7 @@ * * Using src/scripts/inference-codegen */ + /** * Inputs for Text Generation inference */ @@ -17,6 +18,7 @@ export interface TextGenerationInput { parameters?: TextGenerationParameters; [property: string]: unknown; } + /** * Additional inference parameters * @@ -26,24 +28,24 @@ export interface TextGenerationParameters { /** * Whether to use logit sampling (true) or greedy search (false). */ - doSample?: boolean; + do_sample?: boolean; /** * Maximum number of generated tokens. */ - maxNewTokens?: number; + max_new_tokens?: number; /** * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this * paper](https://hf.co/papers/1909.05858) for more details. */ - repetitionPenalty?: number; + repetition_penalty?: number; /** * Whether to prepend the prompt to the generated text. */ - returnFullText?: boolean; + return_full_text?: boolean; /** * Stop generating tokens if a member of `stop_sequences` is generated. */ - stopSequences?: string[]; + stop_sequences?: string[]; /** * The value used to modulate the logits distribution. */ @@ -51,12 +53,12 @@ export interface TextGenerationParameters { /** * The number of highest probability vocabulary tokens to keep for top-k-filtering. */ - topK?: number; + top_k?: number; /** * If set to < 1, only the smallest set of most probable tokens with probabilities that add * up to `top_p` or higher are kept for generation. */ - topP?: number; + top_p?: number; /** * Truncate input tokens to the given size. */ @@ -65,21 +67,22 @@ export interface TextGenerationParameters { * Typical Decoding mass. See [Typical Decoding for Natural Language * Generation](https://hf.co/papers/2202.00666) for more information */ - typicalP?: number; + typical_p?: number; /** * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226) */ watermark?: boolean; [property: string]: unknown; } -export type TextGenerationOutput = TextGenerationOutputElement[]; + /** * Outputs for Text Generation inference */ -export interface TextGenerationOutputElement { +export interface TextGenerationOutput { + generatedText: unknown; /** * The generated text */ - generatedText: string; + generated_text?: string; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/text-generation/spec/input.json b/packages/tasks/src/tasks/text-generation/spec/input.json index c4756edd3fa55a04bd1a7ce5e54e10e7d720e376..26fe24c8a20d1e46c7ad76a9511f9c23cd9f4a8e 100644 --- a/packages/tasks/src/tasks/text-generation/spec/input.json +++ b/packages/tasks/src/tasks/text-generation/spec/input.json @@ -20,23 +20,23 @@ "description": "Additional inference parameters for Text Generation", "type": "object", "properties": { - "doSample": { + "do_sample": { "type": "boolean", "description": "Whether to use logit sampling (true) or greedy search (false)." }, - "maxNewTokens": { + "max_new_tokens": { "type": "integer", "description": "Maximum number of generated tokens." }, - "repetitionPenalty": { + "repetition_penalty": { "type": "number", "description": "The parameter for repetition penalty. A value of 1.0 means no penalty. See [this paper](https://hf.co/papers/1909.05858) for more details." }, - "returnFullText": { + "return_full_text": { "type": "boolean", "description": "Whether to prepend the prompt to the generated text." }, - "stopSequences": { + "stop_sequences": { "type": "array", "items": { "type": "string" @@ -47,11 +47,11 @@ "type": "number", "description": "The value used to modulate the logits distribution." }, - "topK": { + "top_k": { "type": "integer", "description": "The number of highest probability vocabulary tokens to keep for top-k-filtering." }, - "topP": { + "top_p": { "type": "number", "description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation." }, @@ -59,7 +59,7 @@ "type": "integer", "description": "Truncate input tokens to the given size." }, - "typicalP": { + "typical_p": { "type": "number", "description": "Typical Decoding mass. See [Typical Decoding for Natural Language Generation](https://hf.co/papers/2202.00666) for more information" }, diff --git a/packages/tasks/src/tasks/text-generation/spec/output.json b/packages/tasks/src/tasks/text-generation/spec/output.json index eacb907e2c75f02a866b9b963b6a2bbfefe18d8d..b38bc8be305be78ca5f4d575eed7a5d910af0266 100644 --- a/packages/tasks/src/tasks/text-generation/spec/output.json +++ b/packages/tasks/src/tasks/text-generation/spec/output.json @@ -3,15 +3,12 @@ "$schema": "http://json-schema.org/draft-06/schema#", "description": "Outputs for Text Generation inference", "title": "TextGenerationOutput", - "type": "array", - "items": { - "type": "object", - "properties": { - "generatedText": { - "type": "string", - "description": "The generated text" - } - }, - "required": ["generatedText"] - } + "type": "object", + "properties": { + "generated_text": { + "type": "string", + "description": "The generated text" + } + }, + "required": ["generatedText"] } diff --git a/packages/tasks/src/tasks/text-to-audio/inference.ts b/packages/tasks/src/tasks/text-to-audio/inference.ts index 2ac6b3c08e1462ce22c0c64f952fa2e5a1164215..276ecce652394bdc98b8708c6ac19fba46a8da48 100644 --- a/packages/tasks/src/tasks/text-to-audio/inference.ts +++ b/packages/tasks/src/tasks/text-to-audio/inference.ts @@ -3,6 +3,7 @@ * * Using src/scripts/inference-codegen */ + /** * Inputs for Text To Audio inference */ @@ -17,6 +18,7 @@ export interface TextToAudioInput { parameters?: TextToAudioParameters; [property: string]: unknown; } + /** * Additional inference parameters * @@ -29,6 +31,7 @@ export interface TextToAudioParameters { generate?: GenerationParameters; [property: string]: unknown; } + /** * Parametrization of the text generation process * @@ -38,18 +41,18 @@ export interface GenerationParameters { /** * Whether to use sampling instead of greedy decoding when generating new tokens. */ - doSample?: boolean; + do_sample?: boolean; /** * Controls the stopping condition for beam-based methods. */ - earlyStopping?: EarlyStoppingUnion; + early_stopping?: EarlyStoppingUnion; /** * If set to float strictly between 0 and 1, only tokens with a conditional probability * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language * Model Desmoothing](https://hf.co/papers/2210.15191) for more details. */ - epsilonCutoff?: number; + epsilon_cutoff?: number; /** * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to * float strictly between 0 and 1, a token is only considered if it is greater than either @@ -59,37 +62,37 @@ export interface GenerationParameters { * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) * for more details. */ - etaCutoff?: number; + eta_cutoff?: number; /** * The maximum length (in tokens) of the generated text, including the input. */ - maxLength?: number; + max_length?: number; /** * The maximum number of tokens to generate. Takes precedence over maxLength. */ - maxNewTokens?: number; + max_new_tokens?: number; /** * The minimum length (in tokens) of the generated text, including the input. */ - minLength?: number; + min_length?: number; /** * The minimum number of tokens to generate. Takes precedence over maxLength. */ - minNewTokens?: number; + min_new_tokens?: number; /** * Number of groups to divide num_beams into in order to ensure diversity among different * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details. */ - numBeamGroups?: number; + num_beam_groups?: number; /** * Number of beams to use for beam search. */ - numBeams?: number; + num_beams?: number; /** * The value balances the model confidence and the degeneration penalty in contrastive * search decoding. */ - penaltyAlpha?: number; + penalty_alpha?: number; /** * The value used to modulate the next token probabilities. */ @@ -97,12 +100,12 @@ export interface GenerationParameters { /** * The number of highest probability vocabulary tokens to keep for top-k-filtering. */ - topK?: number; + top_k?: number; /** * If set to float < 1, only the smallest set of most probable tokens with probabilities * that add up to top_p or higher are kept for generation. */ - topP?: number; + top_p?: number; /** * Local typicality measures how similar the conditional probability of predicting a target * token next is to the expected conditional probability of predicting a random token next, @@ -110,29 +113,31 @@ export interface GenerationParameters { * most locally typical tokens with probabilities that add up to typical_p or higher are * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details. */ - typicalP?: number; + typical_p?: number; /** * Whether the model should use the past last key/values attentions to speed up decoding */ - useCache?: boolean; + use_cache?: boolean; [property: string]: unknown; } + /** * Controls the stopping condition for beam-based methods. */ export type EarlyStoppingUnion = boolean | "never"; -export type TextToAudioOutput = TextToAudioOutputElement[]; + /** * Outputs of inference for the Text To Audio task */ -export interface TextToAudioOutputElement { +export interface TextToAudioOutput { /** * The generated audio waveform. */ audio: unknown; + samplingRate: unknown; /** * The sampling rate of the generated audio waveform. */ - samplingRate: number; + sampling_rate?: number; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/text-to-audio/spec/output.json b/packages/tasks/src/tasks/text-to-audio/spec/output.json index b0a25bd9ad4bcdb2e1f55a1fa65b7e2d9d8cf832..c171d62bffbed21b423f91a807ed525d285f3445 100644 --- a/packages/tasks/src/tasks/text-to-audio/spec/output.json +++ b/packages/tasks/src/tasks/text-to-audio/spec/output.json @@ -3,18 +3,15 @@ "$schema": "http://json-schema.org/draft-06/schema#", "description": "Outputs of inference for the Text To Audio task", "title": "TextToAudioOutput", - "type": "array", - "items": { - "type": "object", - "properties": { - "audio": { - "description": "The generated audio waveform." - }, - "samplingRate": { - "type": "number", - "description": "The sampling rate of the generated audio waveform." - } + "type": "object", + "properties": { + "audio": { + "description": "The generated audio waveform." }, - "required": ["audio", "samplingRate"] - } + "sampling_rate": { + "type": "number", + "description": "The sampling rate of the generated audio waveform." + } + }, + "required": ["audio", "samplingRate"] } diff --git a/packages/tasks/src/tasks/text-to-image/inference.ts b/packages/tasks/src/tasks/text-to-image/inference.ts index 14237ebda2775336390b2cd6125bd346f4bff287..4997165b8c1351c37356ecc6ec613555b6d871b3 100644 --- a/packages/tasks/src/tasks/text-to-image/inference.ts +++ b/packages/tasks/src/tasks/text-to-image/inference.ts @@ -29,16 +29,16 @@ export interface TextToImageParameters { * For diffusion models. A higher guidance scale value encourages the model to generate * images closely linked to the text prompt at the expense of lower image quality. */ - guidanceScale?: number; + guidance_scale?: number; /** * One or several prompt to guide what NOT to include in image generation. */ - negativePrompt?: string[]; + negative_prompt?: string[]; /** * For diffusion models. The number of denoising steps. More denoising steps usually lead to * a higher quality image at the expense of slower inference. */ - numInferenceSteps?: number; + num_inference_steps?: number; /** * For diffusion models. Override the scheduler with a compatible one */ @@ -46,7 +46,7 @@ export interface TextToImageParameters { /** * The size in pixel of the output image */ - targetSize?: TargetSize; + target_size?: TargetSize; [property: string]: unknown; } @@ -62,9 +62,7 @@ export interface TargetSize { /** * Outputs of inference for the Text To Image task */ -export type TextToImageOutput = unknown[] | boolean | number | number | null | TextToImageOutputObject | string; - -export interface TextToImageOutputObject { +export interface TextToImageOutput { /** * The generated image */ diff --git a/packages/tasks/src/tasks/text-to-image/spec/input.json b/packages/tasks/src/tasks/text-to-image/spec/input.json index 130678fc91cb0bbd8709d42ec4f4956ac7e78427..49acc7ed3af74cc3293f6f8b250d715586a9085c 100644 --- a/packages/tasks/src/tasks/text-to-image/spec/input.json +++ b/packages/tasks/src/tasks/text-to-image/spec/input.json @@ -20,22 +20,22 @@ "description": "Additional inference parameters for Text To Image", "type": "object", "properties": { - "guidanceScale": { + "guidance_scale": { "type": "number", "description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality." }, - "negativePrompt": { + "negative_prompt": { "type": "array", "items": { "type": "string" }, "description": "One or several prompt to guide what NOT to include in image generation." }, - "numInferenceSteps": { + "num_inference_steps": { "type": "integer", "description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference." }, - "targetSize": { + "target_size": { "type": "object", "description": "The size in pixel of the output image", "properties": { diff --git a/packages/tasks/src/tasks/text-to-image/spec/output.json b/packages/tasks/src/tasks/text-to-image/spec/output.json index 5ab3ee7879b9833b97774a4db37254c3a76c2dbf..ff952a3a36dd7cdc4e1c6209ec9bce3aaf594999 100644 --- a/packages/tasks/src/tasks/text-to-image/spec/output.json +++ b/packages/tasks/src/tasks/text-to-image/spec/output.json @@ -3,13 +3,11 @@ "$schema": "http://json-schema.org/draft-06/schema#", "description": "Outputs of inference for the Text To Image task", "title": "TextToImageOutput", - "type": "array", - "items": { - "properties": { - "image": { - "description": "The generated image" - } - }, - "required": ["image"] - } + "type": "object", + "properties": { + "image": { + "description": "The generated image" + } + }, + "required": ["image"] } diff --git a/packages/tasks/src/tasks/text-to-speech/inference.ts b/packages/tasks/src/tasks/text-to-speech/inference.ts index d23b3e76a53424eb277bd15a131c2f19343ed254..cdf778438337af9ec63f2dd0123d8f5723c62d35 100644 --- a/packages/tasks/src/tasks/text-to-speech/inference.ts +++ b/packages/tasks/src/tasks/text-to-speech/inference.ts @@ -43,18 +43,18 @@ export interface GenerationParameters { /** * Whether to use sampling instead of greedy decoding when generating new tokens. */ - doSample?: boolean; + do_sample?: boolean; /** * Controls the stopping condition for beam-based methods. */ - earlyStopping?: EarlyStoppingUnion; + early_stopping?: EarlyStoppingUnion; /** * If set to float strictly between 0 and 1, only tokens with a conditional probability * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language * Model Desmoothing](https://hf.co/papers/2210.15191) for more details. */ - epsilonCutoff?: number; + epsilon_cutoff?: number; /** * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to * float strictly between 0 and 1, a token is only considered if it is greater than either @@ -64,37 +64,37 @@ export interface GenerationParameters { * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) * for more details. */ - etaCutoff?: number; + eta_cutoff?: number; /** * The maximum length (in tokens) of the generated text, including the input. */ - maxLength?: number; + max_length?: number; /** * The maximum number of tokens to generate. Takes precedence over maxLength. */ - maxNewTokens?: number; + max_new_tokens?: number; /** * The minimum length (in tokens) of the generated text, including the input. */ - minLength?: number; + min_length?: number; /** * The minimum number of tokens to generate. Takes precedence over maxLength. */ - minNewTokens?: number; + min_new_tokens?: number; /** * Number of groups to divide num_beams into in order to ensure diversity among different * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details. */ - numBeamGroups?: number; + num_beam_groups?: number; /** * Number of beams to use for beam search. */ - numBeams?: number; + num_beams?: number; /** * The value balances the model confidence and the degeneration penalty in contrastive * search decoding. */ - penaltyAlpha?: number; + penalty_alpha?: number; /** * The value used to modulate the next token probabilities. */ @@ -102,12 +102,12 @@ export interface GenerationParameters { /** * The number of highest probability vocabulary tokens to keep for top-k-filtering. */ - topK?: number; + top_k?: number; /** * If set to float < 1, only the smallest set of most probable tokens with probabilities * that add up to top_p or higher are kept for generation. */ - topP?: number; + top_p?: number; /** * Local typicality measures how similar the conditional probability of predicting a target * token next is to the expected conditional probability of predicting a random token next, @@ -115,11 +115,11 @@ export interface GenerationParameters { * most locally typical tokens with probabilities that add up to typical_p or higher are * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details. */ - typicalP?: number; + typical_p?: number; /** * Whether the model should use the past last key/values attentions to speed up decoding */ - useCache?: boolean; + use_cache?: boolean; [property: string]: unknown; } @@ -138,9 +138,10 @@ export interface TextToSpeechOutput { * The generated audio waveform. */ audio: unknown; + samplingRate: unknown; /** * The sampling rate of the generated audio waveform. */ - samplingRate: number; + sampling_rate?: number; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/text2text-generation/inference.ts b/packages/tasks/src/tasks/text2text-generation/inference.ts index e2649dd4109c17e7530f691a76e2af3d5d93dfac..3fb690b702a87cea401f213ffbc038d0fb076def 100644 --- a/packages/tasks/src/tasks/text2text-generation/inference.ts +++ b/packages/tasks/src/tasks/text2text-generation/inference.ts @@ -3,6 +3,7 @@ * * Using src/scripts/inference-codegen */ + /** * Inputs for Text2text Generation inference */ @@ -17,6 +18,7 @@ export interface Text2TextGenerationInput { parameters?: Text2TextGenerationParameters; [property: string]: unknown; } + /** * Additional inference parameters * @@ -26,28 +28,28 @@ export interface Text2TextGenerationParameters { /** * Whether to clean up the potential extra spaces in the text output. */ - cleanUpTokenizationSpaces?: boolean; + clean_up_tokenization_spaces?: boolean; /** * Additional parametrization of the text generation algorithm */ - generateParameters?: { - [key: string]: unknown; - }; + generate_parameters?: { [key: string]: unknown }; /** * The truncation strategy to use */ truncation?: Text2TextGenerationTruncationStrategy; [property: string]: unknown; } + export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second"; -export type Text2TextGenerationOutput = Text2TextGenerationOutputElement[]; + /** * Outputs of inference for the Text2text Generation task */ -export interface Text2TextGenerationOutputElement { +export interface Text2TextGenerationOutput { + generatedText: unknown; /** * The generated text. */ - generatedText: string; + generated_text?: string; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/text2text-generation/spec/input.json b/packages/tasks/src/tasks/text2text-generation/spec/input.json index da818bc044b236ede94d8992d515a0d0e4aee4c8..0310d74787a56ae5dd306732487646ccf82cf907 100644 --- a/packages/tasks/src/tasks/text2text-generation/spec/input.json +++ b/packages/tasks/src/tasks/text2text-generation/spec/input.json @@ -20,7 +20,7 @@ "description": "Additional inference parameters for Text2text Generation", "type": "object", "properties": { - "cleanUpTokenizationSpaces": { + "clean_up_tokenization_spaces": { "type": "boolean", "description": "Whether to clean up the potential extra spaces in the text output." }, @@ -43,7 +43,7 @@ } ] }, - "generateParameters": { + "generate_parameters": { "title": "generateParameters", "type": "object", "description": "Additional parametrization of the text generation algorithm" diff --git a/packages/tasks/src/tasks/text2text-generation/spec/output.json b/packages/tasks/src/tasks/text2text-generation/spec/output.json index f60ba8933eecead6e159ca07e03edc5f1fb93284..0da61f103d4cb27c3f61c2c5d782f44906ca2120 100644 --- a/packages/tasks/src/tasks/text2text-generation/spec/output.json +++ b/packages/tasks/src/tasks/text2text-generation/spec/output.json @@ -3,15 +3,12 @@ "$schema": "http://json-schema.org/draft-06/schema#", "description": "Outputs of inference for the Text2text Generation task", "title": "Text2TextGenerationOutput", - "type": "array", - "items": { - "type": "object", - "properties": { - "generatedText": { - "type": "string", - "description": "The generated text." - } - }, - "required": ["generatedText"] - } + "type": "object", + "properties": { + "generated_text": { + "type": "string", + "description": "The generated text." + } + }, + "required": ["generatedText"] } diff --git a/packages/tasks/src/tasks/token-classification/inference.ts b/packages/tasks/src/tasks/token-classification/inference.ts index 17f0d798e23e56c7c7c05373c02ee4b123f6e2b2..c89bf4e70e634c16400c766bbad761c0fdc53424 100644 --- a/packages/tasks/src/tasks/token-classification/inference.ts +++ b/packages/tasks/src/tasks/token-classification/inference.ts @@ -26,11 +26,11 @@ export interface TokenClassificationParameters { /** * The strategy used to fuse tokens based on model predictions */ - aggregationStrategy?: TokenClassificationAggregationStrategy; + aggregation_strategy?: TokenClassificationAggregationStrategy; /** * A list of labels to ignore */ - ignoreLabels?: string[]; + ignore_labels?: string[]; /** * The number of overlapping tokens between chunks when splitting the input text. */ @@ -64,7 +64,7 @@ export interface TokenClassificationOutputElement { /** * The predicted label for that group of tokens */ - entityGroup?: string; + entity_group?: string; label: unknown; /** * The associated score / probability diff --git a/packages/tasks/src/tasks/token-classification/spec/input.json b/packages/tasks/src/tasks/token-classification/spec/input.json index 0b29d0ab13dea645e2163390367beaed593fa2e9..30d6153d2ac99f11c79d378a2352dc85c1be3fb9 100644 --- a/packages/tasks/src/tasks/token-classification/spec/input.json +++ b/packages/tasks/src/tasks/token-classification/spec/input.json @@ -20,7 +20,7 @@ "description": "Additional inference parameters for Token Classification", "type": "object", "properties": { - "ignoreLabels": { + "ignore_labels": { "type": "array", "items": { "type": "string" @@ -31,7 +31,7 @@ "type": "integer", "description": "The number of overlapping tokens between chunks when splitting the input text." }, - "aggregationStrategy": { + "aggregation_strategy": { "title": "TokenClassificationAggregationStrategy", "type": "string", "description": "The strategy used to fuse tokens based on model predictions", diff --git a/packages/tasks/src/tasks/token-classification/spec/output.json b/packages/tasks/src/tasks/token-classification/spec/output.json index 8522d972a283821244e40b8c5f9e1107750464a9..95bdc06f531faec57d01f2bfcfb565ea6560f731 100644 --- a/packages/tasks/src/tasks/token-classification/spec/output.json +++ b/packages/tasks/src/tasks/token-classification/spec/output.json @@ -7,7 +7,7 @@ "items": { "type": "object", "properties": { - "entityGroup": { + "entity_group": { "type": "string", "description": "The predicted label for that group of tokens" }, diff --git a/packages/tasks/src/tasks/translation/inference.ts b/packages/tasks/src/tasks/translation/inference.ts index b4d6bd7162b4c5fbe4d713d7210126f9decc94be..9ee4994b4a72272363383bb43e852fdde4e6addc 100644 --- a/packages/tasks/src/tasks/translation/inference.ts +++ b/packages/tasks/src/tasks/translation/inference.ts @@ -30,11 +30,11 @@ export interface Text2TextGenerationParameters { /** * Whether to clean up the potential extra spaces in the text output. */ - cleanUpTokenizationSpaces?: boolean; + clean_up_tokenization_spaces?: boolean; /** * Additional parametrization of the text generation algorithm */ - generateParameters?: { [key: string]: unknown }; + generate_parameters?: { [key: string]: unknown }; /** * The truncation strategy to use */ @@ -50,9 +50,10 @@ export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest * Outputs of inference for the Text2text Generation task */ export interface TranslationOutput { + generatedText: unknown; /** * The generated text. */ - generatedText: string; + generated_text?: string; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/video-classification/inference.ts b/packages/tasks/src/tasks/video-classification/inference.ts index 0366d38db4df15206adf078f31e2888eceeb06f6..6615b8ddcbd0df5a4a7ebe67d89c93743ffa7d2c 100644 --- a/packages/tasks/src/tasks/video-classification/inference.ts +++ b/packages/tasks/src/tasks/video-classification/inference.ts @@ -26,16 +26,16 @@ export interface VideoClassificationParameters { /** * The sampling rate used to select frames from the video. */ - frameSamplingRate?: number; - functionToApply?: ClassificationOutputTransform; + frame_sampling_rate?: number; + function_to_apply?: ClassificationOutputTransform; /** * The number of sampled frames to consider for classification. */ - numFrames?: number; + num_frames?: number; /** * When specified, limits the output to the top K most probable classes. */ - topK?: number; + top_k?: number; [property: string]: unknown; } /** diff --git a/packages/tasks/src/tasks/video-classification/spec/input.json b/packages/tasks/src/tasks/video-classification/spec/input.json index 11861209319afc388d4bb5d5bda1261ca7c6823a..1fb58e278364bda22840da44d3aedd295a6aa331 100644 --- a/packages/tasks/src/tasks/video-classification/spec/input.json +++ b/packages/tasks/src/tasks/video-classification/spec/input.json @@ -19,19 +19,19 @@ "description": "Additional inference parameters for Video Classification", "type": "object", "properties": { - "functionToApply": { + "function_to_apply": { "title": "TextClassificationOutputTransform", "$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform" }, - "numFrames": { + "num_frames": { "type": "integer", "description": "The number of sampled frames to consider for classification." }, - "frameSamplingRate": { + "frame_sampling_rate": { "type": "integer", "description": "The sampling rate used to select frames from the video." }, - "topK": { + "top_k": { "type": "integer", "description": "When specified, limits the output to the top K most probable classes." } diff --git a/packages/tasks/src/tasks/visual-question-answering/inference.ts b/packages/tasks/src/tasks/visual-question-answering/inference.ts index 8df826bd8f32abfdf33396d6b486e626e024f1ff..7adc07ae02ab0993a6f40b8ecab7bceeb7be441e 100644 --- a/packages/tasks/src/tasks/visual-question-answering/inference.ts +++ b/packages/tasks/src/tasks/visual-question-answering/inference.ts @@ -42,7 +42,7 @@ export interface VisualQuestionAnsweringParameters { * return less than topk answers if there are not enough options available within the * context. */ - topK?: number; + top_k?: number; [property: string]: unknown; } export type VisualQuestionAnsweringOutput = VisualQuestionAnsweringOutputElement[]; diff --git a/packages/tasks/src/tasks/visual-question-answering/spec/input.json b/packages/tasks/src/tasks/visual-question-answering/spec/input.json index 17d6cda2d34b7ae6111d386e6fae00eef352a80d..9f9dab121ca0f9d2290173b4cc9bf1f20de7bf15 100644 --- a/packages/tasks/src/tasks/visual-question-answering/spec/input.json +++ b/packages/tasks/src/tasks/visual-question-answering/spec/input.json @@ -30,7 +30,7 @@ "description": "Additional inference parameters for Visual Question Answering", "type": "object", "properties": { - "topK": { + "top_k": { "type": "integer", "description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context." } diff --git a/packages/tasks/src/tasks/zero-shot-classification/inference.ts b/packages/tasks/src/tasks/zero-shot-classification/inference.ts index d5ecfd72839b3b7a72b1a9203b4162b0159baad9..20e0d369a2cfdd1b4903e4817f611159ae8f8d57 100644 --- a/packages/tasks/src/tasks/zero-shot-classification/inference.ts +++ b/packages/tasks/src/tasks/zero-shot-classification/inference.ts @@ -41,13 +41,13 @@ export interface ZeroShotClassificationParameters { * The sentence used in conjunction with candidateLabels to attempt the text classification * by replacing the placeholder with the candidate labels. */ - hypothesisTemplate?: string; + hypothesis_template?: string; /** * Whether multiple candidate labels can be true. If false, the scores are normalized such * that the sum of the label likelihoods for each sequence is 1. If true, the labels are * considered independent and probabilities are normalized for each candidate. */ - multiLabel?: boolean; + multi_label?: boolean; [property: string]: unknown; } export type ZeroShotClassificationOutput = ZeroShotClassificationOutputElement[]; diff --git a/packages/tasks/src/tasks/zero-shot-classification/spec/input.json b/packages/tasks/src/tasks/zero-shot-classification/spec/input.json index d9d0c61aa07d49e7bd683b07ad24a0bdd6dbbcf6..c955f2769f4c44c34dcb2e021fd99010c036cc45 100644 --- a/packages/tasks/src/tasks/zero-shot-classification/spec/input.json +++ b/packages/tasks/src/tasks/zero-shot-classification/spec/input.json @@ -35,11 +35,11 @@ "description": "Additional inference parameters for Zero Shot Classification", "type": "object", "properties": { - "hypothesisTemplate": { + "hypothesis_template": { "type": "string", "description": "The sentence used in conjunction with candidateLabels to attempt the text classification by replacing the placeholder with the candidate labels." }, - "multiLabel": { + "multi_label": { "type": "boolean", "description": "Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of the label likelihoods for each sequence is 1. If true, the labels are considered independent and probabilities are normalized for each candidate." } diff --git a/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts b/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts index 66f6eb43151003a574e188f5ab0e6276934157d9..44ce76173503e6403626b0ae1244e2121b0be2b1 100644 --- a/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts +++ b/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts @@ -41,7 +41,7 @@ export interface ZeroShotImageClassificationParameters { * The sentence used in conjunction with candidateLabels to attempt the text classification * by replacing the placeholder with the candidate labels. */ - hypothesisTemplate?: string; + hypothesis_template?: string; [property: string]: unknown; } export type ZeroShotImageClassificationOutput = ZeroShotImageClassificationOutputElement[]; diff --git a/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json b/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json index 7d66a51df17a9b2ef9962b224eaea311864468fd..dfdababc7018e9a46354813f77a839f6d48400c4 100644 --- a/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json +++ b/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json @@ -34,7 +34,7 @@ "description": "Additional inference parameters for Zero Shot Image Classification", "type": "object", "properties": { - "hypothesisTemplate": { + "hypothesis_template": { "type": "string", "description": "The sentence used in conjunction with candidateLabels to attempt the text classification by replacing the placeholder with the candidate labels." }