# Copyright 2022 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Doc utilities: Utilities related to documentation """ import functools import re import types def add_start_docstrings(*docstr): def docstring_decorator(fn): fn.__doc__ = "".join(docstr) + (fn.__doc__ if fn.__doc__ is not None else "") return fn return docstring_decorator def add_start_docstrings_to_model_forward(*docstr): def docstring_decorator(fn): docstring = "".join(docstr) + (fn.__doc__ if fn.__doc__ is not None else "") class_name = f"[`{fn.__qualname__.split('.')[0]}`]" intro = f" The {class_name} forward method, overrides the `__call__` special method." note = r""" Although the recipe for forward pass needs to be defined within this function, one should call the [`Module`] instance afterwards instead of this since the former takes care of running the pre and post processing steps while the latter silently ignores them. """ fn.__doc__ = intro + note + docstring return fn return docstring_decorator def add_end_docstrings(*docstr): def docstring_decorator(fn): fn.__doc__ = (fn.__doc__ if fn.__doc__ is not None else "") + "".join(docstr) return fn return docstring_decorator PT_RETURN_INTRODUCTION = r""" Returns: [`{full_output_type}`] or `tuple(torch.FloatTensor)`: A [`{full_output_type}`] or a tuple of `torch.FloatTensor` (if `return_dict=False` is passed or when `config.return_dict=False`) comprising various elements depending on the configuration ([`{config_class}`]) and inputs. """ TF_RETURN_INTRODUCTION = r""" Returns: [`{full_output_type}`] or `tuple(tf.Tensor)`: A [`{full_output_type}`] or a tuple of `tf.Tensor` (if `return_dict=False` is passed or when `config.return_dict=False`) comprising various elements depending on the configuration ([`{config_class}`]) and inputs. """ def _get_indent(t): """Returns the indentation in the first line of t""" search = re.search(r"^(\s*)\S", t) return "" if search is None else search.groups()[0] def _convert_output_args_doc(output_args_doc): """Convert output_args_doc to display properly.""" # Split output_arg_doc in blocks argument/description indent = _get_indent(output_args_doc) blocks = [] current_block = "" for line in output_args_doc.split("\n"): # If the indent is the same as the beginning, the line is the name of new arg. if _get_indent(line) == indent: if len(current_block) > 0: blocks.append(current_block[:-1]) current_block = f"{line}\n" else: # Otherwise it's part of the description of the current arg. # We need to remove 2 spaces to the indentation. current_block += f"{line[2:]}\n" blocks.append(current_block[:-1]) # Format each block for proper rendering for i in range(len(blocks)): blocks[i] = re.sub(r"^(\s+)(\S+)(\s+)", r"\1- **\2**\3", blocks[i]) blocks[i] = re.sub(r":\s*\n\s*(\S)", r" -- \1", blocks[i]) return "\n".join(blocks) def _prepare_output_docstrings(output_type, config_class, min_indent=None): """ Prepares the return part of the docstring using `output_type`. """ output_docstring = output_type.__doc__ # Remove the head of the docstring to keep the list of args only lines = output_docstring.split("\n") i = 0 while i < len(lines) and re.search(r"^\s*(Args|Parameters):\s*$", lines[i]) is None: i += 1 if i < len(lines): params_docstring = "\n".join(lines[(i + 1) :]) params_docstring = _convert_output_args_doc(params_docstring) else: raise ValueError( f"No `Args` or `Parameters` section is found in the docstring of `{output_type.__name__}`. Make sure it has" "docstring and contain either `Args` or `Parameters`." ) # Add the return introduction full_output_type = f"{output_type.__module__}.{output_type.__name__}" intro = TF_RETURN_INTRODUCTION if output_type.__name__.startswith("TF") else PT_RETURN_INTRODUCTION intro = intro.format(full_output_type=full_output_type, config_class=config_class) result = intro + params_docstring # Apply minimum indent if necessary if min_indent is not None: lines = result.split("\n") # Find the indent of the first nonempty line i = 0 while len(lines[i]) == 0: i += 1 indent = len(_get_indent(lines[i])) # If too small, add indentation to all nonempty lines if indent < min_indent: to_add = " " * (min_indent - indent) lines = [(f"{to_add}{line}" if len(line) > 0 else line) for line in lines] result = "\n".join(lines) return result FAKE_MODEL_DISCLAIMER = """ This example uses a random model as the real ones are all very big. To get proper results, you should use {real_checkpoint} instead of {fake_checkpoint}. If you get out-of-memory when loading that checkpoint, you can try adding `device_map="auto"` in the `from_pretrained` call. """ PT_TOKEN_CLASSIFICATION_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import torch >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer( ... "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt" ... ) >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> predicted_token_class_ids = logits.argmax(-1) >>> # Note that tokens are classified rather then input words which means that >>> # there might be more predicted token classes than words. >>> # Multiple token classes might account for the same word >>> predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]] >>> predicted_tokens_classes {expected_output} >>> labels = predicted_token_class_ids >>> loss = model(**inputs, labels=labels).loss >>> round(loss.item(), 2) {expected_loss} ``` """ PT_QUESTION_ANSWERING_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import torch >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" >>> inputs = tokenizer(question, text, return_tensors="pt") >>> with torch.no_grad(): ... outputs = model(**inputs) >>> answer_start_index = outputs.start_logits.argmax() >>> answer_end_index = outputs.end_logits.argmax() >>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1] >>> tokenizer.decode(predict_answer_tokens, skip_special_tokens=True) {expected_output} >>> # target is "nice puppet" >>> target_start_index = torch.tensor([{qa_target_start_index}]) >>> target_end_index = torch.tensor([{qa_target_end_index}]) >>> outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index) >>> loss = outputs.loss >>> round(loss.item(), 2) {expected_loss} ``` """ PT_SEQUENCE_CLASSIFICATION_SAMPLE = r""" Example of single-label classification: ```python >>> import torch >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> predicted_class_id = logits.argmax().item() >>> model.config.id2label[predicted_class_id] {expected_output} >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` >>> num_labels = len(model.config.id2label) >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels) >>> labels = torch.tensor([1]) >>> loss = model(**inputs, labels=labels).loss >>> round(loss.item(), 2) {expected_loss} ``` Example of multi-label classification: ```python >>> import torch >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> predicted_class_ids = torch.arange(0, logits.shape[-1])[torch.sigmoid(logits).squeeze(dim=0) > 0.5] >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` >>> num_labels = len(model.config.id2label) >>> model = {model_class}.from_pretrained( ... "{checkpoint}", num_labels=num_labels, problem_type="multi_label_classification" ... ) >>> labels = torch.sum( ... torch.nn.functional.one_hot(predicted_class_ids[None, :].clone(), num_classes=num_labels), dim=1 ... ).to(torch.float) >>> loss = model(**inputs, labels=labels).loss ``` """ PT_MASKED_LM_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import torch >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> # retrieve index of {mask} >>> mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0] >>> predicted_token_id = logits[0, mask_token_index].argmax(axis=-1) >>> tokenizer.decode(predicted_token_id) {expected_output} >>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"] >>> # mask labels of non-{mask} tokens >>> labels = torch.where(inputs.input_ids == tokenizer.mask_token_id, labels, -100) >>> outputs = model(**inputs, labels=labels) >>> round(outputs.loss.item(), 2) {expected_loss} ``` """ PT_BASE_MODEL_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import torch >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state ``` """ PT_MULTIPLE_CHOICE_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import torch >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> choice0 = "It is eaten with a fork and a knife." >>> choice1 = "It is eaten while held in the hand." >>> labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1 >>> encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="pt", padding=True) >>> outputs = model(**{{k: v.unsqueeze(0) for k, v in encoding.items()}}, labels=labels) # batch size is 1 >>> # the linear classifier still needs to be trained >>> loss = outputs.loss >>> logits = outputs.logits ``` """ PT_CAUSAL_LM_SAMPLE = r""" Example: ```python >>> import torch >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> outputs = model(**inputs, labels=inputs["input_ids"]) >>> loss = outputs.loss >>> logits = outputs.logits ``` """ PT_SPEECH_BASE_MODEL_SAMPLE = r""" Example: ```python >>> from transformers import AutoProcessor, {model_class} >>> import torch >>> from datasets import load_dataset >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt") >>> with torch.no_grad(): ... outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state >>> list(last_hidden_states.shape) {expected_output} ``` """ PT_SPEECH_CTC_SAMPLE = r""" Example: ```python >>> from transformers import AutoProcessor, {model_class} >>> from datasets import load_dataset >>> import torch >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> predicted_ids = torch.argmax(logits, dim=-1) >>> # transcribe speech >>> transcription = processor.batch_decode(predicted_ids) >>> transcription[0] {expected_output} >>> inputs["labels"] = processor(text=dataset[0]["text"], return_tensors="pt").input_ids >>> # compute loss >>> loss = model(**inputs).loss >>> round(loss.item(), 2) {expected_loss} ``` """ PT_SPEECH_SEQ_CLASS_SAMPLE = r""" Example: ```python >>> from transformers import AutoFeatureExtractor, {model_class} >>> from datasets import load_dataset >>> import torch >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> predicted_class_ids = torch.argmax(logits, dim=-1).item() >>> predicted_label = model.config.id2label[predicted_class_ids] >>> predicted_label {expected_output} >>> # compute loss - target_label is e.g. "down" >>> target_label = model.config.id2label[0] >>> inputs["labels"] = torch.tensor([model.config.label2id[target_label]]) >>> loss = model(**inputs).loss >>> round(loss.item(), 2) {expected_loss} ``` """ PT_SPEECH_FRAME_CLASS_SAMPLE = r""" Example: ```python >>> from transformers import AutoFeatureExtractor, {model_class} >>> from datasets import load_dataset >>> import torch >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = feature_extractor(dataset[0]["audio"]["array"], return_tensors="pt", sampling_rate=sampling_rate) >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> probabilities = torch.sigmoid(logits[0]) >>> # labels is a one-hot array of shape (num_frames, num_speakers) >>> labels = (probabilities > 0.5).long() >>> labels[0].tolist() {expected_output} ``` """ PT_SPEECH_XVECTOR_SAMPLE = r""" Example: ```python >>> from transformers import AutoFeatureExtractor, {model_class} >>> from datasets import load_dataset >>> import torch >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = feature_extractor( ... [d["array"] for d in dataset[:2]["audio"]], sampling_rate=sampling_rate, return_tensors="pt", padding=True ... ) >>> with torch.no_grad(): ... embeddings = model(**inputs).embeddings >>> embeddings = torch.nn.functional.normalize(embeddings, dim=-1).cpu() >>> # the resulting embeddings can be used for cosine similarity-based retrieval >>> cosine_sim = torch.nn.CosineSimilarity(dim=-1) >>> similarity = cosine_sim(embeddings[0], embeddings[1]) >>> threshold = 0.7 # the optimal threshold is dataset-dependent >>> if similarity < threshold: ... print("Speakers are not the same!") >>> round(similarity.item(), 2) {expected_output} ``` """ PT_VISION_BASE_MODEL_SAMPLE = r""" Example: ```python >>> from transformers import AutoImageProcessor, {model_class} >>> import torch >>> from datasets import load_dataset >>> dataset = load_dataset("huggingface/cats-image") >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = image_processor(image, return_tensors="pt") >>> with torch.no_grad(): ... outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state >>> list(last_hidden_states.shape) {expected_output} ``` """ PT_VISION_SEQ_CLASS_SAMPLE = r""" Example: ```python >>> from transformers import AutoImageProcessor, {model_class} >>> import torch >>> from datasets import load_dataset >>> dataset = load_dataset("huggingface/cats-image") >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = image_processor(image, return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> # model predicts one of the 1000 ImageNet classes >>> predicted_label = logits.argmax(-1).item() >>> print(model.config.id2label[predicted_label]) {expected_output} ``` """ PT_SAMPLE_DOCSTRINGS = { "SequenceClassification": PT_SEQUENCE_CLASSIFICATION_SAMPLE, "QuestionAnswering": PT_QUESTION_ANSWERING_SAMPLE, "TokenClassification": PT_TOKEN_CLASSIFICATION_SAMPLE, "MultipleChoice": PT_MULTIPLE_CHOICE_SAMPLE, "MaskedLM": PT_MASKED_LM_SAMPLE, "LMHead": PT_CAUSAL_LM_SAMPLE, "BaseModel": PT_BASE_MODEL_SAMPLE, "SpeechBaseModel": PT_SPEECH_BASE_MODEL_SAMPLE, "CTC": PT_SPEECH_CTC_SAMPLE, "AudioClassification": PT_SPEECH_SEQ_CLASS_SAMPLE, "AudioFrameClassification": PT_SPEECH_FRAME_CLASS_SAMPLE, "AudioXVector": PT_SPEECH_XVECTOR_SAMPLE, "VisionBaseModel": PT_VISION_BASE_MODEL_SAMPLE, "ImageClassification": PT_VISION_SEQ_CLASS_SAMPLE, } TF_TOKEN_CLASSIFICATION_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer( ... "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="tf" ... ) >>> logits = model(**inputs).logits >>> predicted_token_class_ids = tf.math.argmax(logits, axis=-1) >>> # Note that tokens are classified rather then input words which means that >>> # there might be more predicted token classes than words. >>> # Multiple token classes might account for the same word >>> predicted_tokens_classes = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()] >>> predicted_tokens_classes {expected_output} ``` ```python >>> labels = predicted_token_class_ids >>> loss = tf.math.reduce_mean(model(**inputs, labels=labels).loss) >>> round(float(loss), 2) {expected_loss} ``` """ TF_QUESTION_ANSWERING_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" >>> inputs = tokenizer(question, text, return_tensors="tf") >>> outputs = model(**inputs) >>> answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0]) >>> answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0]) >>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1] >>> tokenizer.decode(predict_answer_tokens) {expected_output} ``` ```python >>> # target is "nice puppet" >>> target_start_index = tf.constant([{qa_target_start_index}]) >>> target_end_index = tf.constant([{qa_target_end_index}]) >>> outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index) >>> loss = tf.math.reduce_mean(outputs.loss) >>> round(float(loss), 2) {expected_loss} ``` """ TF_SEQUENCE_CLASSIFICATION_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") >>> logits = model(**inputs).logits >>> predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0]) >>> model.config.id2label[predicted_class_id] {expected_output} ``` ```python >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` >>> num_labels = len(model.config.id2label) >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels) >>> labels = tf.constant(1) >>> loss = model(**inputs, labels=labels).loss >>> round(float(loss), 2) {expected_loss} ``` """ TF_MASKED_LM_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="tf") >>> logits = model(**inputs).logits >>> # retrieve index of {mask} >>> mask_token_index = tf.where((inputs.input_ids == tokenizer.mask_token_id)[0]) >>> selected_logits = tf.gather_nd(logits[0], indices=mask_token_index) >>> predicted_token_id = tf.math.argmax(selected_logits, axis=-1) >>> tokenizer.decode(predicted_token_id) {expected_output} ``` ```python >>> labels = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"] >>> # mask labels of non-{mask} tokens >>> labels = tf.where(inputs.input_ids == tokenizer.mask_token_id, labels, -100) >>> outputs = model(**inputs, labels=labels) >>> round(float(outputs.loss), 2) {expected_loss} ``` """ TF_BASE_MODEL_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") >>> outputs = model(inputs) >>> last_hidden_states = outputs.last_hidden_state ``` """ TF_MULTIPLE_CHOICE_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> choice0 = "It is eaten with a fork and a knife." >>> choice1 = "It is eaten while held in the hand." >>> encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="tf", padding=True) >>> inputs = {{k: tf.expand_dims(v, 0) for k, v in encoding.items()}} >>> outputs = model(inputs) # batch size is 1 >>> # the linear classifier still needs to be trained >>> logits = outputs.logits ``` """ TF_CAUSAL_LM_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") >>> outputs = model(inputs) >>> logits = outputs.logits ``` """ TF_SPEECH_BASE_MODEL_SAMPLE = r""" Example: ```python >>> from transformers import AutoProcessor, {model_class} >>> from datasets import load_dataset >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="tf") >>> outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state >>> list(last_hidden_states.shape) {expected_output} ``` """ TF_SPEECH_CTC_SAMPLE = r""" Example: ```python >>> from transformers import AutoProcessor, {model_class} >>> from datasets import load_dataset >>> import tensorflow as tf >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="tf") >>> logits = model(**inputs).logits >>> predicted_ids = tf.math.argmax(logits, axis=-1) >>> # transcribe speech >>> transcription = processor.batch_decode(predicted_ids) >>> transcription[0] {expected_output} ``` ```python >>> inputs["labels"] = processor(text=dataset[0]["text"], return_tensors="tf").input_ids >>> # compute loss >>> loss = model(**inputs).loss >>> round(float(loss), 2) {expected_loss} ``` """ TF_VISION_BASE_MODEL_SAMPLE = r""" Example: ```python >>> from transformers import AutoImageProcessor, {model_class} >>> from datasets import load_dataset >>> dataset = load_dataset("huggingface/cats-image") >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = image_processor(image, return_tensors="tf") >>> outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state >>> list(last_hidden_states.shape) {expected_output} ``` """ TF_VISION_SEQ_CLASS_SAMPLE = r""" Example: ```python >>> from transformers import AutoImageProcessor, {model_class} >>> import tensorflow as tf >>> from datasets import load_dataset >>> dataset = load_dataset("huggingface/cats-image") >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = image_processor(image, return_tensors="tf") >>> logits = model(**inputs).logits >>> # model predicts one of the 1000 ImageNet classes >>> predicted_label = int(tf.math.argmax(logits, axis=-1)) >>> print(model.config.id2label[predicted_label]) {expected_output} ``` """ TF_SAMPLE_DOCSTRINGS = { "SequenceClassification": TF_SEQUENCE_CLASSIFICATION_SAMPLE, "QuestionAnswering": TF_QUESTION_ANSWERING_SAMPLE, "TokenClassification": TF_TOKEN_CLASSIFICATION_SAMPLE, "MultipleChoice": TF_MULTIPLE_CHOICE_SAMPLE, "MaskedLM": TF_MASKED_LM_SAMPLE, "LMHead": TF_CAUSAL_LM_SAMPLE, "BaseModel": TF_BASE_MODEL_SAMPLE, "SpeechBaseModel": TF_SPEECH_BASE_MODEL_SAMPLE, "CTC": TF_SPEECH_CTC_SAMPLE, "VisionBaseModel": TF_VISION_BASE_MODEL_SAMPLE, "ImageClassification": TF_VISION_SEQ_CLASS_SAMPLE, } FLAX_TOKEN_CLASSIFICATION_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="jax") >>> outputs = model(**inputs) >>> logits = outputs.logits ``` """ FLAX_QUESTION_ANSWERING_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" >>> inputs = tokenizer(question, text, return_tensors="jax") >>> outputs = model(**inputs) >>> start_scores = outputs.start_logits >>> end_scores = outputs.end_logits ``` """ FLAX_SEQUENCE_CLASSIFICATION_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="jax") >>> outputs = model(**inputs) >>> logits = outputs.logits ``` """ FLAX_MASKED_LM_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="jax") >>> outputs = model(**inputs) >>> logits = outputs.logits ``` """ FLAX_BASE_MODEL_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="jax") >>> outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state ``` """ FLAX_MULTIPLE_CHOICE_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> choice0 = "It is eaten with a fork and a knife." >>> choice1 = "It is eaten while held in the hand." >>> encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="jax", padding=True) >>> outputs = model(**{{k: v[None, :] for k, v in encoding.items()}}) >>> logits = outputs.logits ``` """ FLAX_CAUSAL_LM_SAMPLE = r""" Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="np") >>> outputs = model(**inputs) >>> # retrieve logts for next token >>> next_token_logits = outputs.logits[:, -1] ``` """ FLAX_SAMPLE_DOCSTRINGS = { "SequenceClassification": FLAX_SEQUENCE_CLASSIFICATION_SAMPLE, "QuestionAnswering": FLAX_QUESTION_ANSWERING_SAMPLE, "TokenClassification": FLAX_TOKEN_CLASSIFICATION_SAMPLE, "MultipleChoice": FLAX_MULTIPLE_CHOICE_SAMPLE, "MaskedLM": FLAX_MASKED_LM_SAMPLE, "BaseModel": FLAX_BASE_MODEL_SAMPLE, "LMHead": FLAX_CAUSAL_LM_SAMPLE, } def filter_outputs_from_example(docstring, **kwargs): """ Removes the lines testing an output with the doctest syntax in a code sample when it's set to `None`. """ for key, value in kwargs.items(): if value is not None: continue doc_key = "{" + key + "}" docstring = re.sub(rf"\n([^\n]+)\n\s+{doc_key}\n", "\n", docstring) return docstring def add_code_sample_docstrings( *docstr, processor_class=None, checkpoint=None, output_type=None, config_class=None, mask="[MASK]", qa_target_start_index=14, qa_target_end_index=15, model_cls=None, modality=None, expected_output=None, expected_loss=None, real_checkpoint=None, ): def docstring_decorator(fn): # model_class defaults to function's class if not specified otherwise model_class = fn.__qualname__.split(".")[0] if model_cls is None else model_cls if model_class[:2] == "TF": sample_docstrings = TF_SAMPLE_DOCSTRINGS elif model_class[:4] == "Flax": sample_docstrings = FLAX_SAMPLE_DOCSTRINGS else: sample_docstrings = PT_SAMPLE_DOCSTRINGS # putting all kwargs for docstrings in a dict to be used # with the `.format(**doc_kwargs)`. Note that string might # be formatted with non-existing keys, which is fine. doc_kwargs = { "model_class": model_class, "processor_class": processor_class, "checkpoint": checkpoint, "mask": mask, "qa_target_start_index": qa_target_start_index, "qa_target_end_index": qa_target_end_index, "expected_output": expected_output, "expected_loss": expected_loss, "real_checkpoint": real_checkpoint, "fake_checkpoint": checkpoint, "true": "{true}", # For syntax that conflicts with formatting. } if ("SequenceClassification" in model_class or "AudioClassification" in model_class) and modality == "audio": code_sample = sample_docstrings["AudioClassification"] elif "SequenceClassification" in model_class: code_sample = sample_docstrings["SequenceClassification"] elif "QuestionAnswering" in model_class: code_sample = sample_docstrings["QuestionAnswering"] elif "TokenClassification" in model_class: code_sample = sample_docstrings["TokenClassification"] elif "MultipleChoice" in model_class: code_sample = sample_docstrings["MultipleChoice"] elif "MaskedLM" in model_class or model_class in ["FlaubertWithLMHeadModel", "XLMWithLMHeadModel"]: code_sample = sample_docstrings["MaskedLM"] elif "LMHead" in model_class or "CausalLM" in model_class: code_sample = sample_docstrings["LMHead"] elif "CTC" in model_class: code_sample = sample_docstrings["CTC"] elif "AudioFrameClassification" in model_class: code_sample = sample_docstrings["AudioFrameClassification"] elif "XVector" in model_class and modality == "audio": code_sample = sample_docstrings["AudioXVector"] elif "Model" in model_class and modality == "audio": code_sample = sample_docstrings["SpeechBaseModel"] elif "Model" in model_class and modality == "vision": code_sample = sample_docstrings["VisionBaseModel"] elif "Model" in model_class or "Encoder" in model_class: code_sample = sample_docstrings["BaseModel"] elif "ImageClassification" in model_class: code_sample = sample_docstrings["ImageClassification"] else: raise ValueError(f"Docstring can't be built for model {model_class}") code_sample = filter_outputs_from_example( code_sample, expected_output=expected_output, expected_loss=expected_loss ) if real_checkpoint is not None: code_sample = FAKE_MODEL_DISCLAIMER + code_sample func_doc = (fn.__doc__ or "") + "".join(docstr) output_doc = "" if output_type is None else _prepare_output_docstrings(output_type, config_class) built_doc = code_sample.format(**doc_kwargs) fn.__doc__ = func_doc + output_doc + built_doc return fn return docstring_decorator def replace_return_docstrings(output_type=None, config_class=None): def docstring_decorator(fn): func_doc = fn.__doc__ lines = func_doc.split("\n") i = 0 while i < len(lines) and re.search(r"^\s*Returns?:\s*$", lines[i]) is None: i += 1 if i < len(lines): indent = len(_get_indent(lines[i])) lines[i] = _prepare_output_docstrings(output_type, config_class, min_indent=indent) func_doc = "\n".join(lines) else: raise ValueError( f"The function {fn} should have an empty 'Return:' or 'Returns:' in its docstring as placeholder, " f"current docstring is:\n{func_doc}" ) fn.__doc__ = func_doc return fn return docstring_decorator def copy_func(f): """Returns a copy of a function f.""" # Based on http://stackoverflow.com/a/6528148/190597 (Glenn Maynard) g = types.FunctionType(f.__code__, f.__globals__, name=f.__name__, argdefs=f.__defaults__, closure=f.__closure__) g = functools.update_wrapper(g, f) g.__kwdefaults__ = f.__kwdefaults__ return g