generation_config: temperature: 0.95 top_p: 0.9 top_k: 50 num_beams: 1 use_cache: True repetition_penalty: 1.2 max_new_tokens: 4096 do_sample: True