diff --git a/packages/tasks/scripts/inference-codegen.ts b/packages/tasks/scripts/inference-codegen.ts
index a041821689aa3c7922a9976ff95bdfc3f16c79d3..37b389efbaa35ce6dc7c471b43d88b60f370f90f 100644
--- a/packages/tasks/scripts/inference-codegen.ts
+++ b/packages/tasks/scripts/inference-codegen.ts
@@ -57,7 +57,7 @@ async function generateTypescript(inputData: InputData): Promise<SerializedRende
 		indentation: "\t",
 		rendererOptions: {
 			"just-types": true,
-			"nice-property-names": true,
+			"nice-property-names": false,
 			"prefer-unions": true,
 			"prefer-const-values": true,
 			"prefer-unknown": true,
diff --git a/packages/tasks/src/tasks/audio-classification/inference.ts b/packages/tasks/src/tasks/audio-classification/inference.ts
index e8482d3993343da80709674740411ef80822aa94..ee61c7052b3aa536450ea1d626639ce2bbe3948f 100644
--- a/packages/tasks/src/tasks/audio-classification/inference.ts
+++ b/packages/tasks/src/tasks/audio-classification/inference.ts
@@ -23,11 +23,11 @@ export interface AudioClassificationInput {
  * Additional inference parameters for Audio Classification
  */
 export interface AudioClassificationParameters {
-	functionToApply?: ClassificationOutputTransform;
+	function_to_apply?: ClassificationOutputTransform;
 	/**
 	 * When specified, limits the output to the top K most probable classes.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 /**
@@ -40,7 +40,7 @@ export type AudioClassificationOutput = AudioClassificationOutputElement[];
  */
 export interface AudioClassificationOutputElement {
 	/**
-	 * The predicted class label (model specific).
+	 * The predicted class label.
 	 */
 	label: string;
 	/**
diff --git a/packages/tasks/src/tasks/audio-classification/spec/input.json b/packages/tasks/src/tasks/audio-classification/spec/input.json
index 1a1c447d1dcafae2bff8f163503ca7d0f35d679e..d0372bb688881b9a56f3334fe6dad29d2186e2f9 100644
--- a/packages/tasks/src/tasks/audio-classification/spec/input.json
+++ b/packages/tasks/src/tasks/audio-classification/spec/input.json
@@ -19,11 +19,11 @@
 			"description": "Additional inference parameters for Audio Classification",
 			"type": "object",
 			"properties": {
-				"functionToApply": {
+				"function_to_apply": {
 					"title": "AudioClassificationOutputTransform",
 					"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "When specified, limits the output to the top K most probable classes."
 				}
diff --git a/packages/tasks/src/tasks/audio-classification/spec/output.json b/packages/tasks/src/tasks/audio-classification/spec/output.json
index 83e7abe71d093e3df42ffff9a5c04c45162c547c..dac7a92256d072571f14aec5ab54ab6b9871cc99 100644
--- a/packages/tasks/src/tasks/audio-classification/spec/output.json
+++ b/packages/tasks/src/tasks/audio-classification/spec/output.json
@@ -5,17 +5,6 @@
 	"description": "Outputs for Audio Classification inference",
 	"type": "array",
 	"items": {
-		"type": "object",
-		"properties": {
-			"label": {
-				"type": "string",
-				"description": "The predicted class label (model specific)."
-			},
-			"score": {
-				"type": "number",
-				"description": "The corresponding probability."
-			}
-		},
-		"required": ["label", "score"]
+		"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
 	}
 }
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
index 302cd0757acd198b9a797c9e3c7bab26804b0eb4..dfc501519d2e6e8699415e2a10d63fae4d05a20a 100644
--- a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
+++ b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
@@ -3,6 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
+
 /**
  * Inputs for Automatic Speech Recognition inference
  */
@@ -17,6 +18,7 @@ export interface AutomaticSpeechRecognitionInput {
 	parameters?: AutomaticSpeechRecognitionParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Additional inference parameters
  *
@@ -30,9 +32,10 @@ export interface AutomaticSpeechRecognitionParameters {
 	/**
 	 * Whether to output corresponding timestamps with the generated text
 	 */
-	returnTimestamps?: boolean;
+	return_timestamps?: boolean;
 	[property: string]: unknown;
 }
+
 /**
  * Parametrization of the text generation process
  *
@@ -42,18 +45,18 @@ export interface GenerationParameters {
 	/**
 	 * Whether to use sampling instead of greedy decoding when generating new tokens.
 	 */
-	doSample?: boolean;
+	do_sample?: boolean;
 	/**
 	 * Controls the stopping condition for beam-based methods.
 	 */
-	earlyStopping?: EarlyStoppingUnion;
+	early_stopping?: EarlyStoppingUnion;
 	/**
 	 * If set to float strictly between 0 and 1, only tokens with a conditional probability
 	 * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
 	 * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
 	 * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
 	 */
-	epsilonCutoff?: number;
+	epsilon_cutoff?: number;
 	/**
 	 * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
 	 * float strictly between 0 and 1, a token is only considered if it is greater than either
@@ -63,37 +66,37 @@ export interface GenerationParameters {
 	 * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
 	 * for more details.
 	 */
-	etaCutoff?: number;
+	eta_cutoff?: number;
 	/**
 	 * The maximum length (in tokens) of the generated text, including the input.
 	 */
-	maxLength?: number;
+	max_length?: number;
 	/**
 	 * The maximum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	/**
 	 * The minimum length (in tokens) of the generated text, including the input.
 	 */
-	minLength?: number;
+	min_length?: number;
 	/**
 	 * The minimum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	minNewTokens?: number;
+	min_new_tokens?: number;
 	/**
 	 * Number of groups to divide num_beams into in order to ensure diversity among different
 	 * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
 	 */
-	numBeamGroups?: number;
+	num_beam_groups?: number;
 	/**
 	 * Number of beams to use for beam search.
 	 */
-	numBeams?: number;
+	num_beams?: number;
 	/**
 	 * The value balances the model confidence and the degeneration penalty in contrastive
 	 * search decoding.
 	 */
-	penaltyAlpha?: number;
+	penalty_alpha?: number;
 	/**
 	 * The value used to modulate the next token probabilities.
 	 */
@@ -101,12 +104,12 @@ export interface GenerationParameters {
 	/**
 	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * If set to float < 1, only the smallest set of most probable tokens with probabilities
 	 * that add up to top_p or higher are kept for generation.
 	 */
-	topP?: number;
+	top_p?: number;
 	/**
 	 * Local typicality measures how similar the conditional probability of predicting a target
 	 * token next is to the expected conditional probability of predicting a random token next,
@@ -114,33 +117,23 @@ export interface GenerationParameters {
 	 * most locally typical tokens with probabilities that add up to typical_p or higher are
 	 * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
 	 */
-	typicalP?: number;
+	typical_p?: number;
 	/**
 	 * Whether the model should use the past last key/values attentions to speed up decoding
 	 */
-	useCache?: boolean;
+	use_cache?: boolean;
 	[property: string]: unknown;
 }
+
 /**
  * Controls the stopping condition for beam-based methods.
  */
 export type EarlyStoppingUnion = boolean | "never";
-export interface AutomaticSpeechRecognitionOutputChunk {
-	/**
-	 * A chunk of text identified by the model
-	 */
-	text: string;
-	/**
-	 * The start and end timestamps corresponding with the text
-	 */
-	timestamps: number[];
-	[property: string]: unknown;
-}
-export type AutomaticSpeechRecognitionOutput = AutomaticSpeechRecognitionOutputElement[];
+
 /**
  * Outputs of inference for the Automatic Speech Recognition task
  */
-export interface AutomaticSpeechRecognitionOutputElement {
+export interface AutomaticSpeechRecognitionOutput {
 	/**
 	 * When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
 	 * the model.
@@ -152,3 +145,15 @@ export interface AutomaticSpeechRecognitionOutputElement {
 	text: string;
 	[property: string]: unknown;
 }
+
+export interface AutomaticSpeechRecognitionOutputChunk {
+	/**
+	 * A chunk of text identified by the model
+	 */
+	text: string;
+	/**
+	 * The start and end timestamps corresponding with the text
+	 */
+	timestamps: number[];
+	[property: string]: unknown;
+}
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
index e1a84a299c62dfa33024cc96bf8ae3f79499603d..691c7f4b72a7254df4b2943dfc76480b2198ecc5 100644
--- a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
+++ b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
@@ -19,7 +19,7 @@
 			"description": "Additional inference parameters for Automatic Speech Recognition",
 			"type": "object",
 			"properties": {
-				"returnTimestamps": {
+				"return_timestamps": {
 					"type": "boolean",
 					"description": "Whether to output corresponding timestamps with the generated text"
 				},
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
index 217f210b15ecc22d654de2039a9f2a49848f75e1..db8a1cf2419bcd78ae0c98cf7d57eaaed78b90a3 100644
--- a/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
+++ b/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
@@ -3,34 +3,36 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs of inference for the Automatic Speech Recognition task",
 	"title": "AutomaticSpeechRecognitionOutput",
-	"type": "array",
-	"items": {
-		"type": "object",
-		"properties": {
-			"text": {
-				"type": "string",
-				"description": "The recognized text."
-			},
-			"chunks": {
-				"type": "array",
-				"description": "When returnTimestamps is enabled, chunks contains a list of audio chunks identified by the model.",
-				"items": {
-					"type": "object",
-					"title": "AutomaticSpeechRecognitionOutputChunk",
-					"properties": {
-						"text": { "type": "string", "description": "A chunk of text identified by the model" },
-						"timestamps": {
-							"type": "array",
-							"description": "The start and end timestamps corresponding with the text",
-							"items": { "type": "number" },
-							"minLength": 2,
-							"maxLength": 2
-						}
+	"type": "object",
+	"properties": {
+		"text": {
+			"type": "string",
+			"description": "The recognized text."
+		},
+		"chunks": {
+			"type": "array",
+			"description": "When returnTimestamps is enabled, chunks contains a list of audio chunks identified by the model.",
+			"items": {
+				"type": "object",
+				"title": "AutomaticSpeechRecognitionOutputChunk",
+				"properties": {
+					"text": {
+						"type": "string",
+						"description": "A chunk of text identified by the model"
 					},
-					"required": ["text", "timestamps"]
-				}
+					"timestamps": {
+						"type": "array",
+						"description": "The start and end timestamps corresponding with the text",
+						"items": {
+							"type": "number"
+						},
+						"minLength": 2,
+						"maxLength": 2
+					}
+				},
+				"required": ["text", "timestamps"]
 			}
-		},
-		"required": ["text"]
-	}
+		}
+	},
+	"required": ["text"]
 }
diff --git a/packages/tasks/src/tasks/common-definitions.json b/packages/tasks/src/tasks/common-definitions.json
index 6e0ec532d478d345979f3b2ff5f0644226ce1f4e..f78d3d9e47a78274f5053ae553a360eadaabcaaf 100644
--- a/packages/tasks/src/tasks/common-definitions.json
+++ b/packages/tasks/src/tasks/common-definitions.json
@@ -43,63 +43,71 @@
 					"type": "number",
 					"description": "The value used to modulate the next token probabilities."
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
 				},
-				"topP": {
+				"top_p": {
 					"type": "number",
 					"description": "If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation."
 				},
-				"typicalP": {
+				"typical_p": {
 					"type": "number",
 					"description": " Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details."
 				},
-				"epsilonCutoff": {
+				"epsilon_cutoff": {
 					"type": "number",
 					"description": "If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
 				},
-				"etaCutoff": {
+				"eta_cutoff": {
 					"type": "number",
 					"description": "Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
 				},
-				"maxLength": {
+				"max_length": {
 					"type": "integer",
 					"description": "The maximum length (in tokens) of the generated text, including the input."
 				},
-				"maxNewTokens": {
+				"max_new_tokens": {
 					"type": "integer",
 					"description": "The maximum number of tokens to generate. Takes precedence over maxLength."
 				},
-				"minLength": {
+				"min_length": {
 					"type": "integer",
 					"description": "The minimum length (in tokens) of the generated text, including the input."
 				},
-				"minNewTokens": {
+				"min_new_tokens": {
 					"type": "integer",
 					"description": "The minimum number of tokens to generate. Takes precedence over maxLength."
 				},
-				"doSample": {
+				"do_sample": {
 					"type": "boolean",
 					"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
 				},
-				"earlyStopping": {
+				"early_stopping": {
 					"description": "Controls the stopping condition for beam-based methods.",
-					"oneOf": [{ "type": "boolean" }, { "const": "never", "type": "string" }]
-				},
-				"numBeams": {
+					"oneOf": [
+						{
+							"type": "boolean"
+						},
+						{
+							"const": "never",
+							"type": "string"
+						}
+					]
+				},
+				"num_beams": {
 					"type": "integer",
 					"description": "Number of beams to use for beam search."
 				},
-				"numBeamGroups": {
+				"num_beam_groups": {
 					"type": "integer",
 					"description": "Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details."
 				},
-				"penaltyAlpha": {
+				"penalty_alpha": {
 					"type": "number",
 					"description": "The value balances the model confidence and the degeneration penalty in contrastive search decoding."
 				},
-				"useCache": {
+				"use_cache": {
 					"type": "boolean",
 					"description": "Whether the model should use the past last key/values attentions to speed up decoding"
 				}
diff --git a/packages/tasks/src/tasks/depth-estimation/inference.ts b/packages/tasks/src/tasks/depth-estimation/inference.ts
index 8ec78985265e04919131845021d4bc179fbe72f4..f873f925468078666c6a698b78f6a44a40094d49 100644
--- a/packages/tasks/src/tasks/depth-estimation/inference.ts
+++ b/packages/tasks/src/tasks/depth-estimation/inference.ts
@@ -30,6 +30,6 @@ export interface DepthEstimationOutput {
 	/**
 	 * The predicted depth as a tensor
 	 */
-	predictedDepth?: unknown;
+	predicted_depth?: unknown;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/document-question-answering/inference.ts b/packages/tasks/src/tasks/document-question-answering/inference.ts
index 14a7432c922b245a3fb475d6acc397e80a18fec4..1636dce9d9a0d714d368a7dc3428e1473781fed2 100644
--- a/packages/tasks/src/tasks/document-question-answering/inference.ts
+++ b/packages/tasks/src/tasks/document-question-answering/inference.ts
@@ -42,11 +42,11 @@ export interface DocumentQuestionAnsweringParameters {
 	 * be split in several chunks with some overlap. This argument controls the size of that
 	 * overlap.
 	 */
-	docStride?: number;
+	doc_stride?: number;
 	/**
 	 * Whether to accept impossible as an answer
 	 */
-	handleImpossibleAnswer?: boolean;
+	handle_impossible_answer?: boolean;
 	/**
 	 * Language to use while running OCR. Defaults to english.
 	 */
@@ -55,27 +55,27 @@ export interface DocumentQuestionAnsweringParameters {
 	 * The maximum length of predicted answers (e.g., only answers with a shorter length are
 	 * considered).
 	 */
-	maxAnswerLen?: number;
+	max_answer_len?: number;
 	/**
 	 * The maximum length of the question after tokenization. It will be truncated if needed.
 	 */
-	maxQuestionLen?: number;
+	max_question_len?: number;
 	/**
 	 * The maximum length of the total sentence (context + question) in tokens of each chunk
 	 * passed to the model. The context will be split in several chunks (using doc_stride as
 	 * overlap) if needed.
 	 */
-	maxSeqLen?: number;
+	max_seq_len?: number;
 	/**
 	 * The number of answers to return (will be chosen by order of likelihood). Can return less
 	 * than top_k answers if there are not enough options available within the context.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
 	 * skip the OCR step and use the provided bounding boxes instead.
 	 */
-	wordBoxes?: WordBox[];
+	word_boxes?: WordBox[];
 	[property: string]: unknown;
 }
 export type WordBox = number[] | string;
@@ -88,11 +88,19 @@ export interface DocumentQuestionAnsweringOutputElement {
 	 * The answer to the question.
 	 */
 	answer: string;
+	/**
+	 * The end word index of the answer (in the OCR’d version of the input or provided word
+	 * boxes).
+	 */
 	end: number;
 	/**
 	 * The probability associated to the answer.
 	 */
 	score: number;
+	/**
+	 * The start word index of the answer (in the OCR’d version of the input or provided word
+	 * boxes).
+	 */
 	start: number;
 	/**
 	 * The index of each word/box pair that is in the answer
diff --git a/packages/tasks/src/tasks/document-question-answering/spec/input.json b/packages/tasks/src/tasks/document-question-answering/spec/input.json
index 12d38ec81a820dc925fc65b12cf8c61b7540653f..b017ce469be82c2f587da76b162a5494423bd468 100644
--- a/packages/tasks/src/tasks/document-question-answering/spec/input.json
+++ b/packages/tasks/src/tasks/document-question-answering/spec/input.json
@@ -31,11 +31,11 @@
 			"description": "Additional inference parameters for Document Question Answering",
 			"type": "object",
 			"properties": {
-				"docStride": {
+				"doc_stride": {
 					"type": "integer",
 					"description": "If the words in the document are too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
 				},
-				"handleImpossibleAnswer": {
+				"handle_impossible_answer": {
 					"type": "boolean",
 					"description": "Whether to accept impossible as an answer"
 				},
@@ -43,23 +43,23 @@
 					"type": "string",
 					"description": "Language to use while running OCR. Defaults to english."
 				},
-				"maxAnswerLen": {
+				"max_answer_len": {
 					"type": "integer",
 					"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
 				},
-				"maxSeqLen": {
+				"max_seq_len": {
 					"type": "integer",
 					"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using doc_stride as overlap) if needed."
 				},
-				"maxQuestionLen": {
+				"max_question_len": {
 					"type": "integer",
 					"description": "The maximum length of the question after tokenization. It will be truncated if needed."
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "The number of answers to return (will be chosen by order of likelihood). Can return less than top_k answers if there are not enough options available within the context."
 				},
-				"wordBoxes": {
+				"word_boxes": {
 					"type": "array",
 					"description": "A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR step and use the provided bounding boxes instead.",
 					"items": {
diff --git a/packages/tasks/src/tasks/document-question-answering/spec/output.json b/packages/tasks/src/tasks/document-question-answering/spec/output.json
index 9f69584ae89696ca2d94b55ae60029ae868b8fb6..4fda3771a6c7fee0e09eff8dab47e3df6a6da823 100644
--- a/packages/tasks/src/tasks/document-question-answering/spec/output.json
+++ b/packages/tasks/src/tasks/document-question-answering/spec/output.json
@@ -17,11 +17,11 @@
 			},
 			"start": {
 				"type": "integer",
-				"descrtiption": "The start word index of the answer (in the OCR’d version of the input or provided word boxes)."
+				"description": "The start word index of the answer (in the OCR\u2019d version of the input or provided word boxes)."
 			},
 			"end": {
 				"type": "integer",
-				"descrtiption": "The end word index of the answer (in the OCR’d version of the input or provided word boxes)."
+				"description": "The end word index of the answer (in the OCR\u2019d version of the input or provided word boxes)."
 			},
 			"words": {
 				"type": "array",
diff --git a/packages/tasks/src/tasks/fill-mask/inference.ts b/packages/tasks/src/tasks/fill-mask/inference.ts
index 8784e979637aa00e022e928d1bf851c4b8835797..4d78ecd814ee6d4a46c29996f1cb5e15f6da119c 100644
--- a/packages/tasks/src/tasks/fill-mask/inference.ts
+++ b/packages/tasks/src/tasks/fill-mask/inference.ts
@@ -33,7 +33,7 @@ export interface FillMaskParameters {
 	/**
 	 * When passed, overrides the number of predictions to return.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 export type FillMaskOutput = FillMaskOutputElement[];
@@ -53,9 +53,10 @@ export interface FillMaskOutputElement {
 	 * The predicted token id (to replace the masked one).
 	 */
 	token: number;
+	tokenStr: unknown;
 	/**
 	 * The predicted token (to replace the masked one).
 	 */
-	tokenStr: string;
+	token_str?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/fill-mask/spec/input.json b/packages/tasks/src/tasks/fill-mask/spec/input.json
index 009baf364b5462a3fc49d1302711aa06008073e9..cd3271e4a35d910c12cfcba80f380c2a84a80a8c 100644
--- a/packages/tasks/src/tasks/fill-mask/spec/input.json
+++ b/packages/tasks/src/tasks/fill-mask/spec/input.json
@@ -20,7 +20,7 @@
 			"description": "Additional inference parameters for Fill Mask",
 			"type": "object",
 			"properties": {
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "When passed, overrides the number of predictions to return."
 				},
diff --git a/packages/tasks/src/tasks/fill-mask/spec/output.json b/packages/tasks/src/tasks/fill-mask/spec/output.json
index f8e91aeeaa0871f7f498fd627608ecc80f687f68..0b613382e781cf0405c76df5c1f9f5091da6b196 100644
--- a/packages/tasks/src/tasks/fill-mask/spec/output.json
+++ b/packages/tasks/src/tasks/fill-mask/spec/output.json
@@ -19,7 +19,7 @@
 				"type": "integer",
 				"description": "The predicted token id (to replace the masked one)."
 			},
-			"tokenStr": {
+			"token_str": {
 				"type": "string",
 				"description": "The predicted token (to replace the masked one)."
 			}
diff --git a/packages/tasks/src/tasks/image-classification/inference.ts b/packages/tasks/src/tasks/image-classification/inference.ts
index 92ca03e0d81456ca7a7ef585d27c1f0572ec0890..e0689d887fd9248237845eac5aaa6658dd3f4019 100644
--- a/packages/tasks/src/tasks/image-classification/inference.ts
+++ b/packages/tasks/src/tasks/image-classification/inference.ts
@@ -23,11 +23,11 @@ export interface ImageClassificationInput {
  * Additional inference parameters for Image Classification
  */
 export interface ImageClassificationParameters {
-	functionToApply?: ClassificationOutputTransform;
+	function_to_apply?: ClassificationOutputTransform;
 	/**
 	 * When specified, limits the output to the top K most probable classes.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 /**
diff --git a/packages/tasks/src/tasks/image-classification/spec/input.json b/packages/tasks/src/tasks/image-classification/spec/input.json
index 8c2c2fcc709a851dcf2401e3a93a5742e6281816..a8cd4273cc8c311b12857d9104d2814f7cf4179e 100644
--- a/packages/tasks/src/tasks/image-classification/spec/input.json
+++ b/packages/tasks/src/tasks/image-classification/spec/input.json
@@ -19,11 +19,11 @@
 			"description": "Additional inference parameters for Image Classification",
 			"type": "object",
 			"properties": {
-				"functionToApply": {
+				"function_to_apply": {
 					"title": "ImageClassificationOutputTransform",
 					"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "When specified, limits the output to the top K most probable classes."
 				}
diff --git a/packages/tasks/src/tasks/image-segmentation/inference.ts b/packages/tasks/src/tasks/image-segmentation/inference.ts
index 7d552b66e5ed0c188c59f1190d853e4b366fc6a0..02db5cb90f115e8c6b198eac0ac1aa616539344b 100644
--- a/packages/tasks/src/tasks/image-segmentation/inference.ts
+++ b/packages/tasks/src/tasks/image-segmentation/inference.ts
@@ -26,11 +26,11 @@ export interface ImageSegmentationParameters {
 	/**
 	 * Threshold to use when turning the predicted masks into binary values.
 	 */
-	maskThreshold?: number;
+	mask_threshold?: number;
 	/**
 	 * Mask overlap threshold to eliminate small, disconnected segments.
 	 */
-	overlapMaskAreaThreshold?: number;
+	overlap_mask_area_threshold?: number;
 	/**
 	 * Segmentation task to be performed, depending on model capabilities.
 	 */
diff --git a/packages/tasks/src/tasks/image-segmentation/spec/input.json b/packages/tasks/src/tasks/image-segmentation/spec/input.json
index 169036406c40b22af16a779b3a94d58fd026f85c..500793554146810f1aa1e30adf221a5d10506b50 100644
--- a/packages/tasks/src/tasks/image-segmentation/spec/input.json
+++ b/packages/tasks/src/tasks/image-segmentation/spec/input.json
@@ -19,11 +19,11 @@
 			"description": "Additional inference parameters for Image Segmentation",
 			"type": "object",
 			"properties": {
-				"maskThreshold": {
+				"mask_threshold": {
 					"type": "number",
 					"description": "Threshold to use when turning the predicted masks into binary values."
 				},
-				"overlapMaskAreaThreshold": {
+				"overlap_mask_area_threshold": {
 					"type": "number",
 					"description": "Mask overlap threshold to eliminate small, disconnected segments."
 				},
diff --git a/packages/tasks/src/tasks/image-to-image/inference.ts b/packages/tasks/src/tasks/image-to-image/inference.ts
index f2d3064c800ff7ea79b19e02d4fcd0c985c644eb..bf732e07018c5ab5d1e9bd0eb3f1212e7943dd36 100644
--- a/packages/tasks/src/tasks/image-to-image/inference.ts
+++ b/packages/tasks/src/tasks/image-to-image/inference.ts
@@ -29,20 +29,20 @@ export interface ImageToImageParameters {
 	 * For diffusion models. A higher guidance scale value encourages the model to generate
 	 * images closely linked to the text prompt at the expense of lower image quality.
 	 */
-	guidanceScale?: number;
+	guidance_scale?: number;
 	/**
 	 * One or several prompt to guide what NOT to include in image generation.
 	 */
-	negativePrompt?: string[];
+	negative_prompt?: string[];
 	/**
 	 * For diffusion models. The number of denoising steps. More denoising steps usually lead to
 	 * a higher quality image at the expense of slower inference.
 	 */
-	numInferenceSteps?: number;
+	num_inference_steps?: number;
 	/**
 	 * The size in pixel of the output image
 	 */
-	targetSize?: TargetSize;
+	target_size?: TargetSize;
 	[property: string]: unknown;
 }
 
diff --git a/packages/tasks/src/tasks/image-to-image/spec/input.json b/packages/tasks/src/tasks/image-to-image/spec/input.json
index ac6d7aed65d43b1cbaa75188c565740928e85fcf..873e1f20d956f5cb40802589be3d2a8972bd2abc 100644
--- a/packages/tasks/src/tasks/image-to-image/spec/input.json
+++ b/packages/tasks/src/tasks/image-to-image/spec/input.json
@@ -19,22 +19,22 @@
 			"description": "Additional inference parameters for Image To Image",
 			"type": "object",
 			"properties": {
-				"guidanceScale": {
+				"guidance_scale": {
 					"type": "number",
 					"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
 				},
-				"negativePrompt": {
+				"negative_prompt": {
 					"type": "array",
 					"items": {
 						"type": "string"
 					},
 					"description": "One or several prompt to guide what NOT to include in image generation."
 				},
-				"numInferenceSteps": {
+				"num_inference_steps": {
 					"type": "integer",
 					"description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
 				},
-				"targetSize": {
+				"target_size": {
 					"type": "object",
 					"description": "The size in pixel of the output image",
 					"properties": {
diff --git a/packages/tasks/src/tasks/image-to-text/inference.ts b/packages/tasks/src/tasks/image-to-text/inference.ts
index 84dc7b80f0a1d2959dfb8d2918125e712d789b1e..7cace215832b47d1727b0db82d85e7322f5cbf03 100644
--- a/packages/tasks/src/tasks/image-to-text/inference.ts
+++ b/packages/tasks/src/tasks/image-to-text/inference.ts
@@ -3,6 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
+
 /**
  * Inputs for Image To Text inference
  */
@@ -17,6 +18,7 @@ export interface ImageToTextInput {
 	parameters?: ImageToTextParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Additional inference parameters
  *
@@ -30,9 +32,10 @@ export interface ImageToTextParameters {
 	/**
 	 * The amount of maximum tokens to generate.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	[property: string]: unknown;
 }
+
 /**
  * Parametrization of the text generation process
  *
@@ -42,18 +45,18 @@ export interface GenerationParameters {
 	/**
 	 * Whether to use sampling instead of greedy decoding when generating new tokens.
 	 */
-	doSample?: boolean;
+	do_sample?: boolean;
 	/**
 	 * Controls the stopping condition for beam-based methods.
 	 */
-	earlyStopping?: EarlyStoppingUnion;
+	early_stopping?: EarlyStoppingUnion;
 	/**
 	 * If set to float strictly between 0 and 1, only tokens with a conditional probability
 	 * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
 	 * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
 	 * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
 	 */
-	epsilonCutoff?: number;
+	epsilon_cutoff?: number;
 	/**
 	 * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
 	 * float strictly between 0 and 1, a token is only considered if it is greater than either
@@ -63,37 +66,37 @@ export interface GenerationParameters {
 	 * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
 	 * for more details.
 	 */
-	etaCutoff?: number;
+	eta_cutoff?: number;
 	/**
 	 * The maximum length (in tokens) of the generated text, including the input.
 	 */
-	maxLength?: number;
+	max_length?: number;
 	/**
 	 * The maximum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	/**
 	 * The minimum length (in tokens) of the generated text, including the input.
 	 */
-	minLength?: number;
+	min_length?: number;
 	/**
 	 * The minimum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	minNewTokens?: number;
+	min_new_tokens?: number;
 	/**
 	 * Number of groups to divide num_beams into in order to ensure diversity among different
 	 * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
 	 */
-	numBeamGroups?: number;
+	num_beam_groups?: number;
 	/**
 	 * Number of beams to use for beam search.
 	 */
-	numBeams?: number;
+	num_beams?: number;
 	/**
 	 * The value balances the model confidence and the degeneration penalty in contrastive
 	 * search decoding.
 	 */
-	penaltyAlpha?: number;
+	penalty_alpha?: number;
 	/**
 	 * The value used to modulate the next token probabilities.
 	 */
@@ -101,12 +104,12 @@ export interface GenerationParameters {
 	/**
 	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * If set to float < 1, only the smallest set of most probable tokens with probabilities
 	 * that add up to top_p or higher are kept for generation.
 	 */
-	topP?: number;
+	top_p?: number;
 	/**
 	 * Local typicality measures how similar the conditional probability of predicting a target
 	 * token next is to the expected conditional probability of predicting a random token next,
@@ -114,25 +117,27 @@ export interface GenerationParameters {
 	 * most locally typical tokens with probabilities that add up to typical_p or higher are
 	 * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
 	 */
-	typicalP?: number;
+	typical_p?: number;
 	/**
 	 * Whether the model should use the past last key/values attentions to speed up decoding
 	 */
-	useCache?: boolean;
+	use_cache?: boolean;
 	[property: string]: unknown;
 }
+
 /**
  * Controls the stopping condition for beam-based methods.
  */
 export type EarlyStoppingUnion = boolean | "never";
-export type ImageToTextOutput = ImageToTextOutputElement[];
+
 /**
  * Outputs of inference for the Image To Text task
  */
-export interface ImageToTextOutputElement {
+export interface ImageToTextOutput {
+	generatedText: unknown;
 	/**
 	 * The generated text.
 	 */
-	generatedText: string;
+	generated_text?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/image-to-text/spec/input.json b/packages/tasks/src/tasks/image-to-text/spec/input.json
index d3c367b951beaa5b4c54a22af874b7a8b51bbc52..dec832a48f604d66b3af6541a37f130101078bf9 100644
--- a/packages/tasks/src/tasks/image-to-text/spec/input.json
+++ b/packages/tasks/src/tasks/image-to-text/spec/input.json
@@ -19,7 +19,7 @@
 			"description": "Additional inference parameters for Image To Text",
 			"type": "object",
 			"properties": {
-				"maxNewTokens": {
+				"max_new_tokens": {
 					"type": "integer",
 					"description": "The amount of maximum tokens to generate."
 				},
diff --git a/packages/tasks/src/tasks/image-to-text/spec/output.json b/packages/tasks/src/tasks/image-to-text/spec/output.json
index e3283e34f7c71a3165e4bce52e9c5d51ccf7f810..388c3456f4e7f50b0c7b133725a2d951f152cb01 100644
--- a/packages/tasks/src/tasks/image-to-text/spec/output.json
+++ b/packages/tasks/src/tasks/image-to-text/spec/output.json
@@ -3,15 +3,12 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs of inference for the Image To Text task",
 	"title": "ImageToTextOutput",
-	"type": "array",
-	"items": {
-		"type": "object",
-		"properties": {
-			"generatedText": {
-				"type": "string",
-				"description": "The generated text."
-			}
-		},
-		"required": ["generatedText"]
-	}
+	"type": "object",
+	"properties": {
+		"generated_text": {
+			"type": "string",
+			"description": "The generated text."
+		}
+	},
+	"required": ["generatedText"]
 }
diff --git a/packages/tasks/src/tasks/placeholder/spec/input.json b/packages/tasks/src/tasks/placeholder/spec/input.json
index aded2e46a6d155eaed9f222cd57cef96a55742a6..d31f4aac619900220d154523a6c5abc4b37f10c1 100644
--- a/packages/tasks/src/tasks/placeholder/spec/input.json
+++ b/packages/tasks/src/tasks/placeholder/spec/input.json
@@ -20,11 +20,11 @@
 			"description": "TODO: describe additional parameters here.",
 			"type": "object",
 			"properties": {
-				"dummyParameterName": {
+				"dummy_parameter_name": {
 					"type": "boolean",
 					"description": "TODO: describe the parameter here"
 				},
-				"dummyParameterName2": {
+				"dummy_parameter_name2": {
 					"type": "integer",
 					"description": "TODO: describe the parameter here"
 				}
diff --git a/packages/tasks/src/tasks/placeholder/spec/output.json b/packages/tasks/src/tasks/placeholder/spec/output.json
index 8e3e132941936718b08c0cbcd961fcc277e57a38..697c6e2672a45f10abc4ba5554e38e7352bb807d 100644
--- a/packages/tasks/src/tasks/placeholder/spec/output.json
+++ b/packages/tasks/src/tasks/placeholder/spec/output.json
@@ -7,7 +7,7 @@
 	"items": {
 		"type": "object",
 		"properties": {
-			"meaningfulOutputName": {
+			"meaningful_output_name": {
 				"type": "string",
 				"description": "TODO: Describe what is outputed by the inference here"
 			}
diff --git a/packages/tasks/src/tasks/question-answering/inference.ts b/packages/tasks/src/tasks/question-answering/inference.ts
index e2aa0088a743e03344aff2be07902a752b1bc96f..eaef8dfe3170ec4f790bdcdaaa0a07dd8aae7d76 100644
--- a/packages/tasks/src/tasks/question-answering/inference.ts
+++ b/packages/tasks/src/tasks/question-answering/inference.ts
@@ -41,37 +41,37 @@ export interface QuestionAnsweringParameters {
 	 * Attempts to align the answer to real words. Improves quality on space separated
 	 * languages. Might hurt on non-space-separated languages (like Japanese or Chinese)
 	 */
-	alignToWords?: boolean;
+	align_to_words?: boolean;
 	/**
 	 * If the context is too long to fit with the question for the model, it will be split in
 	 * several chunks with some overlap. This argument controls the size of that overlap.
 	 */
-	docStride?: number;
+	doc_stride?: number;
 	/**
 	 * Whether to accept impossible as an answer.
 	 */
-	handleImpossibleAnswer?: boolean;
+	handle_impossible_answer?: boolean;
 	/**
 	 * The maximum length of predicted answers (e.g., only answers with a shorter length are
 	 * considered).
 	 */
-	maxAnswerLen?: number;
+	max_answer_len?: number;
 	/**
 	 * The maximum length of the question after tokenization. It will be truncated if needed.
 	 */
-	maxQuestionLen?: number;
+	max_question_len?: number;
 	/**
 	 * The maximum length of the total sentence (context + question) in tokens of each chunk
 	 * passed to the model. The context will be split in several chunks (using docStride as
 	 * overlap) if needed.
 	 */
-	maxSeqLen?: number;
+	max_seq_len?: number;
 	/**
 	 * The number of answers to return (will be chosen by order of likelihood). Note that we
 	 * return less than topk answers if there are not enough options available within the
 	 * context.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 export type QuestionAnsweringOutput = QuestionAnsweringOutputElement[];
diff --git a/packages/tasks/src/tasks/question-answering/spec/input.json b/packages/tasks/src/tasks/question-answering/spec/input.json
index 62f36ebc99d6a6a11e661b379c23db130337bcc3..70d5607cffcb93e728a987ca0da384c2c813dc21 100644
--- a/packages/tasks/src/tasks/question-answering/spec/input.json
+++ b/packages/tasks/src/tasks/question-answering/spec/input.json
@@ -32,31 +32,31 @@
 			"description": "Additional inference parameters for Question Answering",
 			"type": "object",
 			"properties": {
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context."
 				},
-				"docStride": {
+				"doc_stride": {
 					"type": "integer",
 					"description": "If the context is too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
 				},
-				"maxAnswerLen": {
+				"max_answer_len": {
 					"type": "integer",
 					"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
 				},
-				"maxSeqLen": {
+				"max_seq_len": {
 					"type": "integer",
 					"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using docStride as overlap) if needed."
 				},
-				"maxQuestionLen": {
+				"max_question_len": {
 					"type": "integer",
 					"description": "The maximum length of the question after tokenization. It will be truncated if needed."
 				},
-				"handleImpossibleAnswer": {
+				"handle_impossible_answer": {
 					"type": "boolean",
 					"description": "Whether to accept impossible as an answer."
 				},
-				"alignToWords": {
+				"align_to_words": {
 					"type": "boolean",
 					"description": "Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt on non-space-separated languages (like Japanese or Chinese)"
 				}
diff --git a/packages/tasks/src/tasks/summarization/inference.ts b/packages/tasks/src/tasks/summarization/inference.ts
index 5cc2bb0d2b750ce4fd1e2ebbd24fe2e3e8da9f75..a73a7098572b836aef9194a1d87c5393c5805249 100644
--- a/packages/tasks/src/tasks/summarization/inference.ts
+++ b/packages/tasks/src/tasks/summarization/inference.ts
@@ -30,11 +30,11 @@ export interface Text2TextGenerationParameters {
 	/**
 	 * Whether to clean up the potential extra spaces in the text output.
 	 */
-	cleanUpTokenizationSpaces?: boolean;
+	clean_up_tokenization_spaces?: boolean;
 	/**
 	 * Additional parametrization of the text generation algorithm
 	 */
-	generateParameters?: { [key: string]: unknown };
+	generate_parameters?: { [key: string]: unknown };
 	/**
 	 * The truncation strategy to use
 	 */
@@ -50,9 +50,10 @@ export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest
  * Outputs of inference for the Text2text Generation task
  */
 export interface SummarizationOutput {
+	generatedText: unknown;
 	/**
 	 * The generated text.
 	 */
-	generatedText: string;
+	generated_text?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/text-classification/inference.ts b/packages/tasks/src/tasks/text-classification/inference.ts
index 2272d903b95c8d84dd36d5b0851196d6bb9ef857..dc913690203f4f3b64d1606f4d11aaa254b8013d 100644
--- a/packages/tasks/src/tasks/text-classification/inference.ts
+++ b/packages/tasks/src/tasks/text-classification/inference.ts
@@ -23,11 +23,11 @@ export interface TextClassificationInput {
  * Additional inference parameters for Text Classification
  */
 export interface TextClassificationParameters {
-	functionToApply?: ClassificationOutputTransform;
+	function_to_apply?: ClassificationOutputTransform;
 	/**
 	 * When specified, limits the output to the top K most probable classes.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 /**
diff --git a/packages/tasks/src/tasks/text-classification/spec/input.json b/packages/tasks/src/tasks/text-classification/spec/input.json
index 10b98cbba3af88698a7979b5890925d0261390e9..3bfdeaf6b905d957e5241674b7ac3d3eb1c6438a 100644
--- a/packages/tasks/src/tasks/text-classification/spec/input.json
+++ b/packages/tasks/src/tasks/text-classification/spec/input.json
@@ -20,11 +20,11 @@
 			"description": "Additional inference parameters for Text Classification",
 			"type": "object",
 			"properties": {
-				"functionToApply": {
+				"function_to_apply": {
 					"title": "TextClassificationOutputTransform",
 					"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "When specified, limits the output to the top K most probable classes."
 				}
diff --git a/packages/tasks/src/tasks/text-generation/inference.ts b/packages/tasks/src/tasks/text-generation/inference.ts
index 52adc0ace30af542a30f323cf764127c34071fb2..0f0d1e8d754644ac170abe168a6498130f6ac10f 100644
--- a/packages/tasks/src/tasks/text-generation/inference.ts
+++ b/packages/tasks/src/tasks/text-generation/inference.ts
@@ -3,6 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
+
 /**
  * Inputs for Text Generation inference
  */
@@ -17,6 +18,7 @@ export interface TextGenerationInput {
 	parameters?: TextGenerationParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Additional inference parameters
  *
@@ -26,24 +28,24 @@ export interface TextGenerationParameters {
 	/**
 	 * Whether to use logit sampling (true) or greedy search (false).
 	 */
-	doSample?: boolean;
+	do_sample?: boolean;
 	/**
 	 * Maximum number of generated tokens.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	/**
 	 * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
 	 * paper](https://hf.co/papers/1909.05858) for more details.
 	 */
-	repetitionPenalty?: number;
+	repetition_penalty?: number;
 	/**
 	 * Whether to prepend the prompt to the generated text.
 	 */
-	returnFullText?: boolean;
+	return_full_text?: boolean;
 	/**
 	 * Stop generating tokens if a member of `stop_sequences` is generated.
 	 */
-	stopSequences?: string[];
+	stop_sequences?: string[];
 	/**
 	 * The value used to modulate the logits distribution.
 	 */
@@ -51,12 +53,12 @@ export interface TextGenerationParameters {
 	/**
 	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * If set to < 1, only the smallest set of most probable tokens with probabilities that add
 	 * up to `top_p` or higher are kept for generation.
 	 */
-	topP?: number;
+	top_p?: number;
 	/**
 	 * Truncate input tokens to the given size.
 	 */
@@ -65,21 +67,22 @@ export interface TextGenerationParameters {
 	 * Typical Decoding mass. See [Typical Decoding for Natural Language
 	 * Generation](https://hf.co/papers/2202.00666) for more information
 	 */
-	typicalP?: number;
+	typical_p?: number;
 	/**
 	 * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
 	 */
 	watermark?: boolean;
 	[property: string]: unknown;
 }
-export type TextGenerationOutput = TextGenerationOutputElement[];
+
 /**
  * Outputs for Text Generation inference
  */
-export interface TextGenerationOutputElement {
+export interface TextGenerationOutput {
+	generatedText: unknown;
 	/**
 	 * The generated text
 	 */
-	generatedText: string;
+	generated_text?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/text-generation/spec/input.json b/packages/tasks/src/tasks/text-generation/spec/input.json
index c4756edd3fa55a04bd1a7ce5e54e10e7d720e376..26fe24c8a20d1e46c7ad76a9511f9c23cd9f4a8e 100644
--- a/packages/tasks/src/tasks/text-generation/spec/input.json
+++ b/packages/tasks/src/tasks/text-generation/spec/input.json
@@ -20,23 +20,23 @@
 			"description": "Additional inference parameters for Text Generation",
 			"type": "object",
 			"properties": {
-				"doSample": {
+				"do_sample": {
 					"type": "boolean",
 					"description": "Whether to use logit sampling (true) or greedy search (false)."
 				},
-				"maxNewTokens": {
+				"max_new_tokens": {
 					"type": "integer",
 					"description": "Maximum number of generated tokens."
 				},
-				"repetitionPenalty": {
+				"repetition_penalty": {
 					"type": "number",
 					"description": "The parameter for repetition penalty. A value of 1.0 means no penalty. See [this paper](https://hf.co/papers/1909.05858) for more details."
 				},
-				"returnFullText": {
+				"return_full_text": {
 					"type": "boolean",
 					"description": "Whether to prepend the prompt to the generated text."
 				},
-				"stopSequences": {
+				"stop_sequences": {
 					"type": "array",
 					"items": {
 						"type": "string"
@@ -47,11 +47,11 @@
 					"type": "number",
 					"description": "The value used to modulate the logits distribution."
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
 				},
-				"topP": {
+				"top_p": {
 					"type": "number",
 					"description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
 				},
@@ -59,7 +59,7 @@
 					"type": "integer",
 					"description": "Truncate input tokens to the given size."
 				},
-				"typicalP": {
+				"typical_p": {
 					"type": "number",
 					"description": "Typical Decoding mass. See [Typical Decoding for Natural Language Generation](https://hf.co/papers/2202.00666) for more information"
 				},
diff --git a/packages/tasks/src/tasks/text-generation/spec/output.json b/packages/tasks/src/tasks/text-generation/spec/output.json
index eacb907e2c75f02a866b9b963b6a2bbfefe18d8d..b38bc8be305be78ca5f4d575eed7a5d910af0266 100644
--- a/packages/tasks/src/tasks/text-generation/spec/output.json
+++ b/packages/tasks/src/tasks/text-generation/spec/output.json
@@ -3,15 +3,12 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs for Text Generation inference",
 	"title": "TextGenerationOutput",
-	"type": "array",
-	"items": {
-		"type": "object",
-		"properties": {
-			"generatedText": {
-				"type": "string",
-				"description": "The generated text"
-			}
-		},
-		"required": ["generatedText"]
-	}
+	"type": "object",
+	"properties": {
+		"generated_text": {
+			"type": "string",
+			"description": "The generated text"
+		}
+	},
+	"required": ["generatedText"]
 }
diff --git a/packages/tasks/src/tasks/text-to-audio/inference.ts b/packages/tasks/src/tasks/text-to-audio/inference.ts
index 2ac6b3c08e1462ce22c0c64f952fa2e5a1164215..276ecce652394bdc98b8708c6ac19fba46a8da48 100644
--- a/packages/tasks/src/tasks/text-to-audio/inference.ts
+++ b/packages/tasks/src/tasks/text-to-audio/inference.ts
@@ -3,6 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
+
 /**
  * Inputs for Text To Audio inference
  */
@@ -17,6 +18,7 @@ export interface TextToAudioInput {
 	parameters?: TextToAudioParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Additional inference parameters
  *
@@ -29,6 +31,7 @@ export interface TextToAudioParameters {
 	generate?: GenerationParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Parametrization of the text generation process
  *
@@ -38,18 +41,18 @@ export interface GenerationParameters {
 	/**
 	 * Whether to use sampling instead of greedy decoding when generating new tokens.
 	 */
-	doSample?: boolean;
+	do_sample?: boolean;
 	/**
 	 * Controls the stopping condition for beam-based methods.
 	 */
-	earlyStopping?: EarlyStoppingUnion;
+	early_stopping?: EarlyStoppingUnion;
 	/**
 	 * If set to float strictly between 0 and 1, only tokens with a conditional probability
 	 * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
 	 * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
 	 * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
 	 */
-	epsilonCutoff?: number;
+	epsilon_cutoff?: number;
 	/**
 	 * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
 	 * float strictly between 0 and 1, a token is only considered if it is greater than either
@@ -59,37 +62,37 @@ export interface GenerationParameters {
 	 * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
 	 * for more details.
 	 */
-	etaCutoff?: number;
+	eta_cutoff?: number;
 	/**
 	 * The maximum length (in tokens) of the generated text, including the input.
 	 */
-	maxLength?: number;
+	max_length?: number;
 	/**
 	 * The maximum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	/**
 	 * The minimum length (in tokens) of the generated text, including the input.
 	 */
-	minLength?: number;
+	min_length?: number;
 	/**
 	 * The minimum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	minNewTokens?: number;
+	min_new_tokens?: number;
 	/**
 	 * Number of groups to divide num_beams into in order to ensure diversity among different
 	 * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
 	 */
-	numBeamGroups?: number;
+	num_beam_groups?: number;
 	/**
 	 * Number of beams to use for beam search.
 	 */
-	numBeams?: number;
+	num_beams?: number;
 	/**
 	 * The value balances the model confidence and the degeneration penalty in contrastive
 	 * search decoding.
 	 */
-	penaltyAlpha?: number;
+	penalty_alpha?: number;
 	/**
 	 * The value used to modulate the next token probabilities.
 	 */
@@ -97,12 +100,12 @@ export interface GenerationParameters {
 	/**
 	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * If set to float < 1, only the smallest set of most probable tokens with probabilities
 	 * that add up to top_p or higher are kept for generation.
 	 */
-	topP?: number;
+	top_p?: number;
 	/**
 	 * Local typicality measures how similar the conditional probability of predicting a target
 	 * token next is to the expected conditional probability of predicting a random token next,
@@ -110,29 +113,31 @@ export interface GenerationParameters {
 	 * most locally typical tokens with probabilities that add up to typical_p or higher are
 	 * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
 	 */
-	typicalP?: number;
+	typical_p?: number;
 	/**
 	 * Whether the model should use the past last key/values attentions to speed up decoding
 	 */
-	useCache?: boolean;
+	use_cache?: boolean;
 	[property: string]: unknown;
 }
+
 /**
  * Controls the stopping condition for beam-based methods.
  */
 export type EarlyStoppingUnion = boolean | "never";
-export type TextToAudioOutput = TextToAudioOutputElement[];
+
 /**
  * Outputs of inference for the Text To Audio task
  */
-export interface TextToAudioOutputElement {
+export interface TextToAudioOutput {
 	/**
 	 * The generated audio waveform.
 	 */
 	audio: unknown;
+	samplingRate: unknown;
 	/**
 	 * The sampling rate of the generated audio waveform.
 	 */
-	samplingRate: number;
+	sampling_rate?: number;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/text-to-audio/spec/output.json b/packages/tasks/src/tasks/text-to-audio/spec/output.json
index b0a25bd9ad4bcdb2e1f55a1fa65b7e2d9d8cf832..c171d62bffbed21b423f91a807ed525d285f3445 100644
--- a/packages/tasks/src/tasks/text-to-audio/spec/output.json
+++ b/packages/tasks/src/tasks/text-to-audio/spec/output.json
@@ -3,18 +3,15 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs of inference for the Text To Audio task",
 	"title": "TextToAudioOutput",
-	"type": "array",
-	"items": {
-		"type": "object",
-		"properties": {
-			"audio": {
-				"description": "The generated audio waveform."
-			},
-			"samplingRate": {
-				"type": "number",
-				"description": "The sampling rate of the generated audio waveform."
-			}
+	"type": "object",
+	"properties": {
+		"audio": {
+			"description": "The generated audio waveform."
 		},
-		"required": ["audio", "samplingRate"]
-	}
+		"sampling_rate": {
+			"type": "number",
+			"description": "The sampling rate of the generated audio waveform."
+		}
+	},
+	"required": ["audio", "samplingRate"]
 }
diff --git a/packages/tasks/src/tasks/text-to-image/inference.ts b/packages/tasks/src/tasks/text-to-image/inference.ts
index 14237ebda2775336390b2cd6125bd346f4bff287..4997165b8c1351c37356ecc6ec613555b6d871b3 100644
--- a/packages/tasks/src/tasks/text-to-image/inference.ts
+++ b/packages/tasks/src/tasks/text-to-image/inference.ts
@@ -29,16 +29,16 @@ export interface TextToImageParameters {
 	 * For diffusion models. A higher guidance scale value encourages the model to generate
 	 * images closely linked to the text prompt at the expense of lower image quality.
 	 */
-	guidanceScale?: number;
+	guidance_scale?: number;
 	/**
 	 * One or several prompt to guide what NOT to include in image generation.
 	 */
-	negativePrompt?: string[];
+	negative_prompt?: string[];
 	/**
 	 * For diffusion models. The number of denoising steps. More denoising steps usually lead to
 	 * a higher quality image at the expense of slower inference.
 	 */
-	numInferenceSteps?: number;
+	num_inference_steps?: number;
 	/**
 	 * For diffusion models. Override the scheduler with a compatible one
 	 */
@@ -46,7 +46,7 @@ export interface TextToImageParameters {
 	/**
 	 * The size in pixel of the output image
 	 */
-	targetSize?: TargetSize;
+	target_size?: TargetSize;
 	[property: string]: unknown;
 }
 
@@ -62,9 +62,7 @@ export interface TargetSize {
 /**
  * Outputs of inference for the Text To Image task
  */
-export type TextToImageOutput = unknown[] | boolean | number | number | null | TextToImageOutputObject | string;
-
-export interface TextToImageOutputObject {
+export interface TextToImageOutput {
 	/**
 	 * The generated image
 	 */
diff --git a/packages/tasks/src/tasks/text-to-image/spec/input.json b/packages/tasks/src/tasks/text-to-image/spec/input.json
index 130678fc91cb0bbd8709d42ec4f4956ac7e78427..49acc7ed3af74cc3293f6f8b250d715586a9085c 100644
--- a/packages/tasks/src/tasks/text-to-image/spec/input.json
+++ b/packages/tasks/src/tasks/text-to-image/spec/input.json
@@ -20,22 +20,22 @@
 			"description": "Additional inference parameters for Text To Image",
 			"type": "object",
 			"properties": {
-				"guidanceScale": {
+				"guidance_scale": {
 					"type": "number",
 					"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
 				},
-				"negativePrompt": {
+				"negative_prompt": {
 					"type": "array",
 					"items": {
 						"type": "string"
 					},
 					"description": "One or several prompt to guide what NOT to include in image generation."
 				},
-				"numInferenceSteps": {
+				"num_inference_steps": {
 					"type": "integer",
 					"description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
 				},
-				"targetSize": {
+				"target_size": {
 					"type": "object",
 					"description": "The size in pixel of the output image",
 					"properties": {
diff --git a/packages/tasks/src/tasks/text-to-image/spec/output.json b/packages/tasks/src/tasks/text-to-image/spec/output.json
index 5ab3ee7879b9833b97774a4db37254c3a76c2dbf..ff952a3a36dd7cdc4e1c6209ec9bce3aaf594999 100644
--- a/packages/tasks/src/tasks/text-to-image/spec/output.json
+++ b/packages/tasks/src/tasks/text-to-image/spec/output.json
@@ -3,13 +3,11 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs of inference for the Text To Image task",
 	"title": "TextToImageOutput",
-	"type": "array",
-	"items": {
-		"properties": {
-			"image": {
-				"description": "The generated image"
-			}
-		},
-		"required": ["image"]
-	}
+	"type": "object",
+	"properties": {
+		"image": {
+			"description": "The generated image"
+		}
+	},
+	"required": ["image"]
 }
diff --git a/packages/tasks/src/tasks/text-to-speech/inference.ts b/packages/tasks/src/tasks/text-to-speech/inference.ts
index d23b3e76a53424eb277bd15a131c2f19343ed254..cdf778438337af9ec63f2dd0123d8f5723c62d35 100644
--- a/packages/tasks/src/tasks/text-to-speech/inference.ts
+++ b/packages/tasks/src/tasks/text-to-speech/inference.ts
@@ -43,18 +43,18 @@ export interface GenerationParameters {
 	/**
 	 * Whether to use sampling instead of greedy decoding when generating new tokens.
 	 */
-	doSample?: boolean;
+	do_sample?: boolean;
 	/**
 	 * Controls the stopping condition for beam-based methods.
 	 */
-	earlyStopping?: EarlyStoppingUnion;
+	early_stopping?: EarlyStoppingUnion;
 	/**
 	 * If set to float strictly between 0 and 1, only tokens with a conditional probability
 	 * greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
 	 * 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
 	 * Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
 	 */
-	epsilonCutoff?: number;
+	epsilon_cutoff?: number;
 	/**
 	 * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
 	 * float strictly between 0 and 1, a token is only considered if it is greater than either
@@ -64,37 +64,37 @@ export interface GenerationParameters {
 	 * See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
 	 * for more details.
 	 */
-	etaCutoff?: number;
+	eta_cutoff?: number;
 	/**
 	 * The maximum length (in tokens) of the generated text, including the input.
 	 */
-	maxLength?: number;
+	max_length?: number;
 	/**
 	 * The maximum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	maxNewTokens?: number;
+	max_new_tokens?: number;
 	/**
 	 * The minimum length (in tokens) of the generated text, including the input.
 	 */
-	minLength?: number;
+	min_length?: number;
 	/**
 	 * The minimum number of tokens to generate. Takes precedence over maxLength.
 	 */
-	minNewTokens?: number;
+	min_new_tokens?: number;
 	/**
 	 * Number of groups to divide num_beams into in order to ensure diversity among different
 	 * groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
 	 */
-	numBeamGroups?: number;
+	num_beam_groups?: number;
 	/**
 	 * Number of beams to use for beam search.
 	 */
-	numBeams?: number;
+	num_beams?: number;
 	/**
 	 * The value balances the model confidence and the degeneration penalty in contrastive
 	 * search decoding.
 	 */
-	penaltyAlpha?: number;
+	penalty_alpha?: number;
 	/**
 	 * The value used to modulate the next token probabilities.
 	 */
@@ -102,12 +102,12 @@ export interface GenerationParameters {
 	/**
 	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
 	 */
-	topK?: number;
+	top_k?: number;
 	/**
 	 * If set to float < 1, only the smallest set of most probable tokens with probabilities
 	 * that add up to top_p or higher are kept for generation.
 	 */
-	topP?: number;
+	top_p?: number;
 	/**
 	 * Local typicality measures how similar the conditional probability of predicting a target
 	 * token next is to the expected conditional probability of predicting a random token next,
@@ -115,11 +115,11 @@ export interface GenerationParameters {
 	 * most locally typical tokens with probabilities that add up to typical_p or higher are
 	 * kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
 	 */
-	typicalP?: number;
+	typical_p?: number;
 	/**
 	 * Whether the model should use the past last key/values attentions to speed up decoding
 	 */
-	useCache?: boolean;
+	use_cache?: boolean;
 	[property: string]: unknown;
 }
 
@@ -138,9 +138,10 @@ export interface TextToSpeechOutput {
 	 * The generated audio waveform.
 	 */
 	audio: unknown;
+	samplingRate: unknown;
 	/**
 	 * The sampling rate of the generated audio waveform.
 	 */
-	samplingRate: number;
+	sampling_rate?: number;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/text2text-generation/inference.ts b/packages/tasks/src/tasks/text2text-generation/inference.ts
index e2649dd4109c17e7530f691a76e2af3d5d93dfac..3fb690b702a87cea401f213ffbc038d0fb076def 100644
--- a/packages/tasks/src/tasks/text2text-generation/inference.ts
+++ b/packages/tasks/src/tasks/text2text-generation/inference.ts
@@ -3,6 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
+
 /**
  * Inputs for Text2text Generation inference
  */
@@ -17,6 +18,7 @@ export interface Text2TextGenerationInput {
 	parameters?: Text2TextGenerationParameters;
 	[property: string]: unknown;
 }
+
 /**
  * Additional inference parameters
  *
@@ -26,28 +28,28 @@ export interface Text2TextGenerationParameters {
 	/**
 	 * Whether to clean up the potential extra spaces in the text output.
 	 */
-	cleanUpTokenizationSpaces?: boolean;
+	clean_up_tokenization_spaces?: boolean;
 	/**
 	 * Additional parametrization of the text generation algorithm
 	 */
-	generateParameters?: {
-		[key: string]: unknown;
-	};
+	generate_parameters?: { [key: string]: unknown };
 	/**
 	 * The truncation strategy to use
 	 */
 	truncation?: Text2TextGenerationTruncationStrategy;
 	[property: string]: unknown;
 }
+
 export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
-export type Text2TextGenerationOutput = Text2TextGenerationOutputElement[];
+
 /**
  * Outputs of inference for the Text2text Generation task
  */
-export interface Text2TextGenerationOutputElement {
+export interface Text2TextGenerationOutput {
+	generatedText: unknown;
 	/**
 	 * The generated text.
 	 */
-	generatedText: string;
+	generated_text?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/text2text-generation/spec/input.json b/packages/tasks/src/tasks/text2text-generation/spec/input.json
index da818bc044b236ede94d8992d515a0d0e4aee4c8..0310d74787a56ae5dd306732487646ccf82cf907 100644
--- a/packages/tasks/src/tasks/text2text-generation/spec/input.json
+++ b/packages/tasks/src/tasks/text2text-generation/spec/input.json
@@ -20,7 +20,7 @@
 			"description": "Additional inference parameters for Text2text Generation",
 			"type": "object",
 			"properties": {
-				"cleanUpTokenizationSpaces": {
+				"clean_up_tokenization_spaces": {
 					"type": "boolean",
 					"description": "Whether to clean up the potential extra spaces in the text output."
 				},
@@ -43,7 +43,7 @@
 						}
 					]
 				},
-				"generateParameters": {
+				"generate_parameters": {
 					"title": "generateParameters",
 					"type": "object",
 					"description": "Additional parametrization of the text generation algorithm"
diff --git a/packages/tasks/src/tasks/text2text-generation/spec/output.json b/packages/tasks/src/tasks/text2text-generation/spec/output.json
index f60ba8933eecead6e159ca07e03edc5f1fb93284..0da61f103d4cb27c3f61c2c5d782f44906ca2120 100644
--- a/packages/tasks/src/tasks/text2text-generation/spec/output.json
+++ b/packages/tasks/src/tasks/text2text-generation/spec/output.json
@@ -3,15 +3,12 @@
 	"$schema": "http://json-schema.org/draft-06/schema#",
 	"description": "Outputs of inference for the Text2text Generation task",
 	"title": "Text2TextGenerationOutput",
-	"type": "array",
-	"items": {
-		"type": "object",
-		"properties": {
-			"generatedText": {
-				"type": "string",
-				"description": "The generated text."
-			}
-		},
-		"required": ["generatedText"]
-	}
+	"type": "object",
+	"properties": {
+		"generated_text": {
+			"type": "string",
+			"description": "The generated text."
+		}
+	},
+	"required": ["generatedText"]
 }
diff --git a/packages/tasks/src/tasks/token-classification/inference.ts b/packages/tasks/src/tasks/token-classification/inference.ts
index 17f0d798e23e56c7c7c05373c02ee4b123f6e2b2..c89bf4e70e634c16400c766bbad761c0fdc53424 100644
--- a/packages/tasks/src/tasks/token-classification/inference.ts
+++ b/packages/tasks/src/tasks/token-classification/inference.ts
@@ -26,11 +26,11 @@ export interface TokenClassificationParameters {
 	/**
 	 * The strategy used to fuse tokens based on model predictions
 	 */
-	aggregationStrategy?: TokenClassificationAggregationStrategy;
+	aggregation_strategy?: TokenClassificationAggregationStrategy;
 	/**
 	 * A list of labels to ignore
 	 */
-	ignoreLabels?: string[];
+	ignore_labels?: string[];
 	/**
 	 * The number of overlapping tokens between chunks when splitting the input text.
 	 */
@@ -64,7 +64,7 @@ export interface TokenClassificationOutputElement {
 	/**
 	 * The predicted label for that group of tokens
 	 */
-	entityGroup?: string;
+	entity_group?: string;
 	label: unknown;
 	/**
 	 * The associated score / probability
diff --git a/packages/tasks/src/tasks/token-classification/spec/input.json b/packages/tasks/src/tasks/token-classification/spec/input.json
index 0b29d0ab13dea645e2163390367beaed593fa2e9..30d6153d2ac99f11c79d378a2352dc85c1be3fb9 100644
--- a/packages/tasks/src/tasks/token-classification/spec/input.json
+++ b/packages/tasks/src/tasks/token-classification/spec/input.json
@@ -20,7 +20,7 @@
 			"description": "Additional inference parameters for Token Classification",
 			"type": "object",
 			"properties": {
-				"ignoreLabels": {
+				"ignore_labels": {
 					"type": "array",
 					"items": {
 						"type": "string"
@@ -31,7 +31,7 @@
 					"type": "integer",
 					"description": "The number of overlapping tokens between chunks when splitting the input text."
 				},
-				"aggregationStrategy": {
+				"aggregation_strategy": {
 					"title": "TokenClassificationAggregationStrategy",
 					"type": "string",
 					"description": "The strategy used to fuse tokens based on model predictions",
diff --git a/packages/tasks/src/tasks/token-classification/spec/output.json b/packages/tasks/src/tasks/token-classification/spec/output.json
index 8522d972a283821244e40b8c5f9e1107750464a9..95bdc06f531faec57d01f2bfcfb565ea6560f731 100644
--- a/packages/tasks/src/tasks/token-classification/spec/output.json
+++ b/packages/tasks/src/tasks/token-classification/spec/output.json
@@ -7,7 +7,7 @@
 	"items": {
 		"type": "object",
 		"properties": {
-			"entityGroup": {
+			"entity_group": {
 				"type": "string",
 				"description": "The predicted label for that group of tokens"
 			},
diff --git a/packages/tasks/src/tasks/translation/inference.ts b/packages/tasks/src/tasks/translation/inference.ts
index b4d6bd7162b4c5fbe4d713d7210126f9decc94be..9ee4994b4a72272363383bb43e852fdde4e6addc 100644
--- a/packages/tasks/src/tasks/translation/inference.ts
+++ b/packages/tasks/src/tasks/translation/inference.ts
@@ -30,11 +30,11 @@ export interface Text2TextGenerationParameters {
 	/**
 	 * Whether to clean up the potential extra spaces in the text output.
 	 */
-	cleanUpTokenizationSpaces?: boolean;
+	clean_up_tokenization_spaces?: boolean;
 	/**
 	 * Additional parametrization of the text generation algorithm
 	 */
-	generateParameters?: { [key: string]: unknown };
+	generate_parameters?: { [key: string]: unknown };
 	/**
 	 * The truncation strategy to use
 	 */
@@ -50,9 +50,10 @@ export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest
  * Outputs of inference for the Text2text Generation task
  */
 export interface TranslationOutput {
+	generatedText: unknown;
 	/**
 	 * The generated text.
 	 */
-	generatedText: string;
+	generated_text?: string;
 	[property: string]: unknown;
 }
diff --git a/packages/tasks/src/tasks/video-classification/inference.ts b/packages/tasks/src/tasks/video-classification/inference.ts
index 0366d38db4df15206adf078f31e2888eceeb06f6..6615b8ddcbd0df5a4a7ebe67d89c93743ffa7d2c 100644
--- a/packages/tasks/src/tasks/video-classification/inference.ts
+++ b/packages/tasks/src/tasks/video-classification/inference.ts
@@ -26,16 +26,16 @@ export interface VideoClassificationParameters {
 	/**
 	 * The sampling rate used to select frames from the video.
 	 */
-	frameSamplingRate?: number;
-	functionToApply?: ClassificationOutputTransform;
+	frame_sampling_rate?: number;
+	function_to_apply?: ClassificationOutputTransform;
 	/**
 	 * The number of sampled frames to consider for classification.
 	 */
-	numFrames?: number;
+	num_frames?: number;
 	/**
 	 * When specified, limits the output to the top K most probable classes.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 /**
diff --git a/packages/tasks/src/tasks/video-classification/spec/input.json b/packages/tasks/src/tasks/video-classification/spec/input.json
index 11861209319afc388d4bb5d5bda1261ca7c6823a..1fb58e278364bda22840da44d3aedd295a6aa331 100644
--- a/packages/tasks/src/tasks/video-classification/spec/input.json
+++ b/packages/tasks/src/tasks/video-classification/spec/input.json
@@ -19,19 +19,19 @@
 			"description": "Additional inference parameters for Video Classification",
 			"type": "object",
 			"properties": {
-				"functionToApply": {
+				"function_to_apply": {
 					"title": "TextClassificationOutputTransform",
 					"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
 				},
-				"numFrames": {
+				"num_frames": {
 					"type": "integer",
 					"description": "The number of sampled frames to consider for classification."
 				},
-				"frameSamplingRate": {
+				"frame_sampling_rate": {
 					"type": "integer",
 					"description": "The sampling rate used to select frames from the video."
 				},
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "When specified, limits the output to the top K most probable classes."
 				}
diff --git a/packages/tasks/src/tasks/visual-question-answering/inference.ts b/packages/tasks/src/tasks/visual-question-answering/inference.ts
index 8df826bd8f32abfdf33396d6b486e626e024f1ff..7adc07ae02ab0993a6f40b8ecab7bceeb7be441e 100644
--- a/packages/tasks/src/tasks/visual-question-answering/inference.ts
+++ b/packages/tasks/src/tasks/visual-question-answering/inference.ts
@@ -42,7 +42,7 @@ export interface VisualQuestionAnsweringParameters {
 	 * return less than topk answers if there are not enough options available within the
 	 * context.
 	 */
-	topK?: number;
+	top_k?: number;
 	[property: string]: unknown;
 }
 export type VisualQuestionAnsweringOutput = VisualQuestionAnsweringOutputElement[];
diff --git a/packages/tasks/src/tasks/visual-question-answering/spec/input.json b/packages/tasks/src/tasks/visual-question-answering/spec/input.json
index 17d6cda2d34b7ae6111d386e6fae00eef352a80d..9f9dab121ca0f9d2290173b4cc9bf1f20de7bf15 100644
--- a/packages/tasks/src/tasks/visual-question-answering/spec/input.json
+++ b/packages/tasks/src/tasks/visual-question-answering/spec/input.json
@@ -30,7 +30,7 @@
 			"description": "Additional inference parameters for Visual Question Answering",
 			"type": "object",
 			"properties": {
-				"topK": {
+				"top_k": {
 					"type": "integer",
 					"description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context."
 				}
diff --git a/packages/tasks/src/tasks/zero-shot-classification/inference.ts b/packages/tasks/src/tasks/zero-shot-classification/inference.ts
index d5ecfd72839b3b7a72b1a9203b4162b0159baad9..20e0d369a2cfdd1b4903e4817f611159ae8f8d57 100644
--- a/packages/tasks/src/tasks/zero-shot-classification/inference.ts
+++ b/packages/tasks/src/tasks/zero-shot-classification/inference.ts
@@ -41,13 +41,13 @@ export interface ZeroShotClassificationParameters {
 	 * The sentence used in conjunction with candidateLabels to attempt the text classification
 	 * by replacing the placeholder with the candidate labels.
 	 */
-	hypothesisTemplate?: string;
+	hypothesis_template?: string;
 	/**
 	 * Whether multiple candidate labels can be true. If false, the scores are normalized such
 	 * that the sum of the label likelihoods for each sequence is 1. If true, the labels are
 	 * considered independent and probabilities are normalized for each candidate.
 	 */
-	multiLabel?: boolean;
+	multi_label?: boolean;
 	[property: string]: unknown;
 }
 export type ZeroShotClassificationOutput = ZeroShotClassificationOutputElement[];
diff --git a/packages/tasks/src/tasks/zero-shot-classification/spec/input.json b/packages/tasks/src/tasks/zero-shot-classification/spec/input.json
index d9d0c61aa07d49e7bd683b07ad24a0bdd6dbbcf6..c955f2769f4c44c34dcb2e021fd99010c036cc45 100644
--- a/packages/tasks/src/tasks/zero-shot-classification/spec/input.json
+++ b/packages/tasks/src/tasks/zero-shot-classification/spec/input.json
@@ -35,11 +35,11 @@
 			"description": "Additional inference parameters for Zero Shot Classification",
 			"type": "object",
 			"properties": {
-				"hypothesisTemplate": {
+				"hypothesis_template": {
 					"type": "string",
 					"description": "The sentence used in conjunction with candidateLabels to attempt the text classification by replacing the placeholder with the candidate labels."
 				},
-				"multiLabel": {
+				"multi_label": {
 					"type": "boolean",
 					"description": "Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of the label likelihoods for each sequence is 1. If true, the labels are considered independent and probabilities are normalized for each candidate."
 				}
diff --git a/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts b/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts
index 66f6eb43151003a574e188f5ab0e6276934157d9..44ce76173503e6403626b0ae1244e2121b0be2b1 100644
--- a/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts
+++ b/packages/tasks/src/tasks/zero-shot-image-classification/inference.ts
@@ -41,7 +41,7 @@ export interface ZeroShotImageClassificationParameters {
 	 * The sentence used in conjunction with candidateLabels to attempt the text classification
 	 * by replacing the placeholder with the candidate labels.
 	 */
-	hypothesisTemplate?: string;
+	hypothesis_template?: string;
 	[property: string]: unknown;
 }
 export type ZeroShotImageClassificationOutput = ZeroShotImageClassificationOutputElement[];
diff --git a/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json b/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json
index 7d66a51df17a9b2ef9962b224eaea311864468fd..dfdababc7018e9a46354813f77a839f6d48400c4 100644
--- a/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json
+++ b/packages/tasks/src/tasks/zero-shot-image-classification/spec/input.json
@@ -34,7 +34,7 @@
 			"description": "Additional inference parameters for Zero Shot Image Classification",
 			"type": "object",
 			"properties": {
-				"hypothesisTemplate": {
+				"hypothesis_template": {
 					"type": "string",
 					"description": "The sentence used in conjunction with candidateLabels to attempt the text classification by replacing the placeholder with the candidate labels."
 				}