Spaces:

ukumar557
/

Chatbot

Runtime error

Chatbot / app.py

Update app.py

e3867c9 verified 4 months ago

1.28 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# Check if CUDA is available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	if device.type == "cpu":
	print("Warning: CUDA is not available. Running on CPU, which may be slow.")

	# Load the tokenizer and model directly
	model_name = "ruslanmv/ai-medical-model-32bit"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto",
	load_in_8bit=True
	).to(device)



	# Function to ask medical questions
	def ask_medical_question(question):
	prompt = f"<\|start_header_id\|>system<\|end_header_id\|> You are a Medical AI chatbot assistant. <\|eot_id\|><\|start_header_id\|>User: <\|end_header_id\|>This is the question: {question}<\|eot_id\|>"

	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	outputs = model.generate(
	**inputs,
	max_new_tokens=100,
	temperature=0.7,
	do_sample=True,
	top_p=0.9,
	top_k=30,
	)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response


	# Set up Gradio interface
	iface = gr.Interface(fn=ask_medical_question, inputs="text", outputs="text")
	iface.launch()