File size: 1,758 Bytes
0de21d0 d90ce49 252265d d90ce49 b519b92 d90ce49 b519b92 d90ce49 a270145 b519b92 a270145 b519b92 a270145 b519b92 a270145 b519b92 a270145 b519b92 a270145 b519b92 a270145 d90ce49 a270145 b519b92 a270145 d90ce49 a270145 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load model and tokenizer
model_name = "skkjodhpur/Gemma-Code-Instruct-Finetune-by-skk"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Move model to CPU
device = "cpu"
model = model.to(device)
def generate_text(prompt):
if not prompt.strip():
return "Please enter a valid question."
try:
# Tokenize input
input_ids = tokenizer.encode(f"<s>[INST] {prompt} [/INST]", return_tensors="pt").to(device)
# Generate text with greedy search for faster response
with torch.no_grad():
output = model.generate(
input_ids,
max_length=100, # Reduced max length for faster generation
num_return_sequences=1,
do_sample=False, # Use greedy search
)
# Decode and return the generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
return generated_text
except Exception as e:
return f"An error occurred: {str(e)}"
# Gradio interface
def chatbot_response(user_input):
return generate_text(user_input)
iface = gr.Interface(
fn=chatbot_response,
inputs="text",
outputs="text",
title="Doctors-Patient Chatbot",
description="Ask me any question related to patient concerns. This model is designed for educational and informational purposes only. Please do not use it for medical diagnosis or treatment. Always consult a qualified healthcare provider for medical advice.",
allow_flagging="never", # Disable flagging if not needed
)
iface.launch(share=True)
|