import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import AutoPeftModelForCausalLM import gradio as gr # Load the fine-tuned model and tokenizer model_path = "BoburAmirov/test-llama-uz" # Adjust this to the path where your fine-tuned model is saved model = AutoPeftModelForCausalLM.from_pretrained(model_path, device_map='auto') tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) # Ensure the tokenizer settings match those used during training tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" # Set the model to evaluation mode model.eval() def generate_text(input_prompt): # Tokenize the input input_ids = tokenizer(input_prompt, return_tensors="pt") # Generate text with torch.no_grad(): output = model.generate( input_ids, max_length=400, # Adjust max_length as needed num_return_sequences=1, temperature=0.7, # Control randomness top_p=0.9, # Control diversity top_k=50, # Control diversity ) # Decode the generated text generated_text = tokenizer.decode(output[0], skip_special_tokens=True) return generated_text # Create a Gradio interface iface = gr.Interface( fn=generate_text, inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here..."), outputs="text", title="Text Generation with LLaMA", description="Generate text using a fine-tuned LLaMA model." ) if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)