import torch from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr # Load the tokenizer and model model_path = 'nvidia/Minitron-4B-Base' tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path, device_map='cpu', torch_dtype=torch.float32) def generate_text(prompt, max_length=100): # Encode the input text inputs = tokenizer.encode(prompt, return_tensors='pt') # Generate the output outputs = model.generate( inputs, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, do_sample=True, temperature=0.7, pad_token_id=tokenizer.eos_token_id ) # Decode and return the output generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return generated_text # Create Gradio interface demo = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(label="Enter your prompt", placeholder="Type your prompt here..."), gr.Slider(minimum=20, maximum=200, value=100, step=10, label="Max Length") ], outputs=gr.Textbox(label="Generated Text"), title="Text Generation with Minitron-4B", description="Enter a prompt and get AI-generated text completion.", examples=[ ["Complete the paragraph: our solar system is"], ["Write a short story about"], ["Explain the concept of"] ] ) # Launch the application if __name__ == "__main__": demo.launch(share=False)