Spaces:

AvaPersona
/

avapersonapal

Runtime error

avapersonapal / app.py

Update app.py

8bb427b verified 5 days ago

1.51 kB

	import torch
	from transformers import LlamaForCausalLM, LlamaTokenizer
	import gradio as gr

	# Load the model and tokenizer
	MODEL_NAME = "meta-llama/Llama-2-8b-hf" # Update this if using a custom LLaMA model
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	print("Loading model...")
	tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)
	model = LlamaForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16, # Use float16 for better performance
	device_map="auto" # Automatically load onto available GPU
	)

	# Define a function for generating responses
	def generate_response(prompt):
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(DEVICE)
	with torch.no_grad():
	outputs = model.generate(
	input_ids=inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	max_length=512,
	temperature=0.7, # Adjust creativity level
	top_p=0.95, # Top-p sampling
	num_return_sequences=1
	)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response

	# Gradio UI
	iface = gr.Interface(
	fn=generate_response,
	inputs=gr.Textbox(lines=3, placeholder="Enter your prompt here..."),
	outputs=gr.Textbox(label="LLaMA Response"),
	title="LLaMA 3.1 8B Chatbot",
	description="An interactive demo of the LLaMA 3.1 8B model using Hugging Face Spaces."
	)

	# Launch the app
	if __name__ == "__main__":
	iface.launch()