Why batch size>1 does not increase model speed
#41
by
zokica
- opened
If I increase batch size the model speed is the same, or even slower. Why?
timea = time.time()
final_text = ""
batch_size = 5
def process_batch(batch):
batch_prompts = [formatting_func(sent) for sent in batch]
model_inputs = tokenizer(batch_prompts, return_tensors="pt", padding=True, truncation=True).to("cuda:0")
print("model_inputs",model_inputs)
timeb = time.time()
with torch.no_grad():
outputs = ft_model.generate(**model_inputs, max_new_tokens=60, do_sample=True, top_k=120, pad_token_id=tokenizer.pad_token_id, use_cache=True)
print(-timeb + time.time())
timeb = time.time()
decoded_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
print(-timeb + time.time())
return decoded_texts
for i in range(0, len(sents), batch_size):
batch = sents[i:i + batch_size]
batch_texts = process_batch(batch)