Spaces:

rahul2001
/

doctor_chat_demo

Runtime error

File size: 1,295 Bytes

6947113
 
 
f07af65
4737a02
fc1f240
4737a02
507bdd7
6947113
 
 
1dabe5d
8c29dfd
1dabe5d
 
 
 
 
 
 
 
 
 
 
6947113
 
 
 
 
 
1dabe5d
 
 
6947113

import gradio as gr
import random
import time
import torch
import bitsandbytes
import Accelerateccelerate
import peft
# Use a pipeline as a high-level helper
# Use a pipeline as a high-level helper
from transformers import pipeline

from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer,AutoModelForCausalLM


nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained("llSourcell/medllama2_7b",quantization_config=nf4_config)
model = AutoModelForCausalLM.from_pretrained("llSourcell/medllama2_7b",quantization_config=nf4_config)
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    def respond(message, chat_history):
        inputs = tokenizer(message, return_tensors="pt")
        generate_ids = model.generate(inputs.input_ids, max_length=30)
        bot_message = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        chat_history.append((message, bot_message))
        time.sleep(2)
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

demo.launch()