""" import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # Load your fine-tuned model and tokenizer model_name = "crystal99/my-fine-tuned-model" model = AutoModelForCausalLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Define the text generation function def generate_text(prompt): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(inputs['input_ids'], max_length=100, num_return_sequences=1) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=False) return generated_text # Set up the Gradio interface iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", title="Text Generator using Fine-Tuned Model") # Launch the Gradio interface iface.launch() """ import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load your fine-tuned model and tokenizer model_name = "crystal99/my-fine-tuned-model" model = AutoModelForCausalLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Move model to GPU if available and enable fp16 for faster inference device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) # Define the text generation function def generate_text(prompt): # Prevent gradient calculation to speed up inference with torch.no_grad(): inputs = tokenizer(f"<|STARTOFTEXT|> <|USER|> {prompt} <|BOT|>", return_tensors="pt").to(device) outputs = model.generate(inputs['input_ids'], max_length=100, num_return_sequences=1, do_sample=False) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=False) result2 = generated_text.split("<|ENDOFTEXT|>") finalRes = result2[0].split("<|BOT|>") print(generated_text) return finalRes[-1] # Set up the Gradio interface iface = gr.Interface(fn=generate_text, inputs="text", outputs="text", title="Text Generator using Fine-Tuned Model") # Launch the Gradio interface iface.launch()