|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations |
|
|
|
import os |
|
import tempfile |
|
import unittest |
|
|
|
import numpy as np |
|
from huggingface_hub import hf_hub_download |
|
|
|
from transformers import is_tensorflow_text_available, is_tf_available |
|
from transformers.testing_utils import require_tensorflow_text, require_tf, slow |
|
|
|
from ..test_modeling_tf_common import floats_tensor |
|
from .test_framework_agnostic import GenerationIntegrationTestsMixin |
|
|
|
|
|
if is_tf_available(): |
|
import tensorflow as tf |
|
|
|
from transformers import ( |
|
AutoTokenizer, |
|
TFAutoModelForCausalLM, |
|
TFAutoModelForSeq2SeqLM, |
|
TFAutoModelForSpeechSeq2Seq, |
|
TFAutoModelForVision2Seq, |
|
TFBartForConditionalGeneration, |
|
TFLogitsProcessorList, |
|
TFMinLengthLogitsProcessor, |
|
) |
|
from transformers.modeling_tf_utils import keras |
|
|
|
if is_tensorflow_text_available(): |
|
import tensorflow_text as text |
|
|
|
|
|
@require_tf |
|
class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMixin): |
|
|
|
if is_tf_available(): |
|
framework_dependent_parameters = { |
|
"AutoModelForCausalLM": TFAutoModelForCausalLM, |
|
"AutoModelForSpeechSeq2Seq": TFAutoModelForSpeechSeq2Seq, |
|
"AutoModelForSeq2SeqLM": TFAutoModelForSeq2SeqLM, |
|
"AutoModelForVision2Seq": TFAutoModelForVision2Seq, |
|
"LogitsProcessorList": TFLogitsProcessorList, |
|
"MinLengthLogitsProcessor": TFMinLengthLogitsProcessor, |
|
"create_tensor_fn": tf.convert_to_tensor, |
|
"floats_tensor": floats_tensor, |
|
"return_tensors": "tf", |
|
} |
|
|
|
@slow |
|
def test_generate_tf_function_export_fixed_input_length(self): |
|
|
|
test_model = TFAutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2") |
|
input_length = 2 |
|
max_new_tokens = 2 |
|
|
|
class DummyModel(tf.Module): |
|
def __init__(self, model): |
|
super(DummyModel, self).__init__() |
|
self.model = model |
|
|
|
@tf.function( |
|
input_signature=( |
|
tf.TensorSpec((None, input_length), tf.int32, name="input_ids"), |
|
tf.TensorSpec((None, input_length), tf.int32, name="attention_mask"), |
|
), |
|
jit_compile=True, |
|
) |
|
def serving(self, input_ids, attention_mask): |
|
outputs = self.model.generate( |
|
input_ids=input_ids, |
|
attention_mask=attention_mask, |
|
max_new_tokens=max_new_tokens, |
|
return_dict_in_generate=True, |
|
) |
|
return {"sequences": outputs["sequences"]} |
|
|
|
dummy_input_ids = [[2, 0], [102, 103]] |
|
dummy_attention_masks = [[1, 0], [1, 1]] |
|
dummy_model = DummyModel(model=test_model) |
|
with tempfile.TemporaryDirectory() as tmp_dir: |
|
tf.saved_model.save(dummy_model, tmp_dir, signatures={"serving_default": dummy_model.serving}) |
|
serving_func = tf.saved_model.load(tmp_dir).signatures["serving_default"] |
|
for batch_size in range(1, len(dummy_input_ids) + 1): |
|
inputs = { |
|
"input_ids": tf.constant(dummy_input_ids[:batch_size]), |
|
"attention_mask": tf.constant(dummy_attention_masks[:batch_size]), |
|
} |
|
tf_func_outputs = serving_func(**inputs)["sequences"] |
|
tf_model_outputs = test_model.generate(**inputs, max_new_tokens=max_new_tokens) |
|
tf.debugging.assert_equal(tf_func_outputs, tf_model_outputs) |
|
|
|
@slow |
|
def test_generate_tf_function_export_fixed_batch_size(self): |
|
|
|
test_model = TFAutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2") |
|
batch_size = 1 |
|
max_new_tokens = 2 |
|
|
|
class DummyModel(tf.Module): |
|
def __init__(self, model): |
|
super(DummyModel, self).__init__() |
|
self.model = model |
|
|
|
@tf.function( |
|
input_signature=( |
|
tf.TensorSpec((batch_size, None), tf.int32, name="input_ids"), |
|
tf.TensorSpec((batch_size, None), tf.int32, name="attention_mask"), |
|
), |
|
jit_compile=True, |
|
) |
|
def serving(self, input_ids, attention_mask): |
|
outputs = self.model.generate( |
|
input_ids=input_ids, |
|
attention_mask=attention_mask, |
|
max_new_tokens=max_new_tokens, |
|
return_dict_in_generate=True, |
|
) |
|
return {"sequences": outputs["sequences"]} |
|
|
|
dummy_input_ids = [[2], [102, 103]] |
|
dummy_attention_masks = [[1], [1, 1]] |
|
dummy_model = DummyModel(model=test_model) |
|
with tempfile.TemporaryDirectory() as tmp_dir: |
|
tf.saved_model.save(dummy_model, tmp_dir, signatures={"serving_default": dummy_model.serving}) |
|
serving_func = tf.saved_model.load(tmp_dir).signatures["serving_default"] |
|
for input_row in range(len(dummy_input_ids)): |
|
inputs = { |
|
"input_ids": tf.constant([dummy_input_ids[input_row]]), |
|
"attention_mask": tf.constant([dummy_attention_masks[input_row]]), |
|
} |
|
tf_func_outputs = serving_func(**inputs)["sequences"] |
|
tf_model_outputs = test_model.generate(**inputs, max_new_tokens=max_new_tokens) |
|
tf.debugging.assert_equal(tf_func_outputs, tf_model_outputs) |
|
|
|
@slow |
|
@require_tensorflow_text |
|
def test_generate_tf_function_export_with_tf_tokenizer(self): |
|
|
|
with tempfile.TemporaryDirectory() as tmp_dir: |
|
|
|
hf_hub_download(repo_id="google/flan-t5-small", filename="spiece.model", local_dir=tmp_dir) |
|
|
|
class CompleteSentenceTransformer(keras.layers.Layer): |
|
def __init__(self): |
|
super().__init__() |
|
self.tokenizer = text.SentencepieceTokenizer( |
|
model=tf.io.gfile.GFile(os.path.join(tmp_dir, "spiece.model"), "rb").read() |
|
) |
|
self.model = TFAutoModelForSeq2SeqLM.from_pretrained("hf-internal-testing/tiny-random-t5") |
|
|
|
def call(self, inputs, *args, **kwargs): |
|
tokens = self.tokenizer.tokenize(inputs) |
|
input_ids, attention_mask = text.pad_model_inputs( |
|
tokens, max_seq_length=64, pad_value=self.model.config.pad_token_id |
|
) |
|
outputs = self.model.generate(input_ids=input_ids, attention_mask=attention_mask) |
|
return self.tokenizer.detokenize(outputs) |
|
|
|
complete_model = CompleteSentenceTransformer() |
|
inputs = keras.layers.Input(shape=(1,), dtype=tf.string, name="inputs") |
|
outputs = complete_model(inputs) |
|
keras_model = keras.Model(inputs, outputs) |
|
keras_model.save(tmp_dir) |
|
|
|
def test_eos_token_id_int_and_list_top_k_top_sampling(self): |
|
|
|
generation_kwargs = { |
|
"do_sample": True, |
|
"num_beams": 1, |
|
"top_p": 0.7, |
|
"top_k": 10, |
|
"temperature": 0.7, |
|
} |
|
expectation = 14 |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") |
|
text = """Hello, my dog is cute and""" |
|
tokens = tokenizer(text, return_tensors="tf") |
|
model = TFAutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gpt2") |
|
|
|
eos_token_id = 638 |
|
|
|
with tf.device(":/CPU:0"): |
|
tf.random.set_seed(0) |
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs) |
|
self.assertTrue(expectation == len(generated_tokens[0])) |
|
|
|
eos_token_id = [638, 198] |
|
with tf.device(":/CPU:0"): |
|
tf.random.set_seed(0) |
|
generated_tokens = model.generate(**tokens, eos_token_id=eos_token_id, **generation_kwargs) |
|
self.assertTrue(expectation == len(generated_tokens[0])) |
|
|
|
def test_model_kwarg_encoder_signature_filtering(self): |
|
|
|
bart_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart") |
|
article = """Hugging Face is a technology company based in New York and Paris.""" |
|
input_ids = bart_tokenizer(article, return_tensors="tf").input_ids |
|
bart_model = TFBartForConditionalGeneration.from_pretrained("hf-internal-testing/tiny-random-bart") |
|
output = bart_model.generate(input_ids).numpy() |
|
|
|
|
|
|
|
|
|
|
|
class FakeBart(TFBartForConditionalGeneration): |
|
def call(self, input_ids, foo=None, **kwargs): |
|
return super().call(input_ids, **kwargs) |
|
|
|
bart_model = FakeBart.from_pretrained("hf-internal-testing/tiny-random-bart") |
|
fake_output = bart_model.generate(input_ids, foo="bar").numpy() |
|
self.assertTrue(np.array_equal(output, fake_output)) |
|
|
|
|
|
|
|
class FakeEncoder(bart_model.model.encoder.__class__): |
|
def call(self, input_ids, **kwargs): |
|
return super().call(input_ids, **kwargs) |
|
|
|
fake_encoder = FakeEncoder(bart_model.config, bart_model.model.shared) |
|
bart_model.model.encoder = fake_encoder |
|
|
|
|
|
fake_output = bart_model.generate(input_ids).numpy() |
|
with self.assertRaises(ValueError): |
|
|
|
bart_model.generate(input_ids, foo="bar") |
|
|