Spaces:
Runtime error
Runtime error
File size: 5,056 Bytes
6ebcdab 55db529 3dc4061 3202d1b 3dc4061 c013315 3dc4061 3202d1b 3dc4061 3202d1b 3dc4061 b1cc2f7 3dc4061 3202d1b b1cc2f7 3dc4061 2469625 3dc4061 b1cc2f7 3dc4061 7da94d2 afd1412 b1cc2f7 3dc4061 8867e8a 6b13747 054ea8d c30f436 3dc4061 e5d09cd 7d0b03f e5d09cd cdc3662 054ea8d 0caf514 d52d3a1 8d2dad8 d52d3a1 3eb6511 8dd59d8 bd0b549 bdec0c5 bf9669d bdec0c5 bf9669d bdec0c5 81395fc 030a756 054ea8d 030a756 bf07a1e b1cc2f7 237d9d2 1b29238 83aa350 b1cc2f7 1b29238 bf07a1e b1cc2f7 1b29238 b1cc2f7 2be2050 1b29238 bf07a1e 1b29238 b1cc2f7 1b29238 237d9d2 1b29238 a3eaa33 b01335d 1b29238 b01335d 3202d1b b01335d 1b29238 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
from peft import PeftModel, PeftConfig
import torch
import gradio as gr
import random
from textwrap import wrap
# Functions to Wrap the Prompt Correctly
def wrap_text(text, width=90):
lines = text.split('\n')
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
wrapped_text = '\n'.join(wrapped_lines)
return wrapped_text
def multimodal_prompt(user_input, system_prompt):
"""
Generates text using a large language model, given a user input and a system prompt.
Args:
user_input: The user's input text to generate a response for.
system_prompt: Optional system prompt.
Returns:
A string containing the generated text in the Falcon-like format.
"""
# Combine user input and system prompt
formatted_input = f"{{{{ {system_prompt} }}}}\nUser: {user_input}\nFalcon:"
# Encode the input text
encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
model_inputs = encodeds.to(device)
# Generate a response using the model
output = peft_model.generate(
**model_inputs,
max_length=500,
use_cache=True,
early_stopping=False,
bos_token_id=peft_model.config.bos_token_id,
eos_token_id=peft_model.config.eos_token_id,
pad_token_id=peft_model.config.eos_token_id,
temperature=0.4,
do_sample=True
)
# Decode the response
response_text = tokenizer.decode(output[0], skip_special_tokens=True)
return response_text
# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Use the base model's ID
base_model_id = "tiiuae/falcon-7b-instruct"
model_directory = "Tonic/GaiaMiniMed"
# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True, padding_side="left")
# tokenizer = AutoTokenizer.from_pretrained("Tonic/mistralmed", trust_remote_code=True, padding_side="left")
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.padding_side = 'left'
# Load the GaiaMiniMed model with the specified configuration
# Load the Peft model with a specific configuration
# Specify the configuration class for the model
model_config = AutoConfig.from_pretrained(base_model_id)
# Load the PEFT model with the specified configuration
peft_model = AutoModelForCausalLM.from_pretrained(model_directory, config=model_config)
peft_model = PeftModel.from_pretrained(peft_model, model_directory)
# Specify the configuration class for the model
#model_config = AutoConfig.from_pretrained(base_model_id)
# Load the PEFT model with the specified configuration
#peft_model = AutoModelForCausalLM.from_pretrained(base_model_id, config=model_config)
# Load the PEFT model
# peft_config = PeftConfig.from_pretrained("Tonic/mistralmed")
# peft_model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True)
# peft_model = PeftModel.from_pretrained(peft_model, "Tonic/mistralmed")
class ChatBot:
def __init__(self, system_prompt="You are an expert medical analyst:"):
self.system_prompt = system_prompt
self.history = []
def predict(self, user_input, system_prompt):
# Combine the user's input with the system prompt in Falcon format
formatted_input = f"{{{{ {self.system_prompt} }}}}\nUser: {user_input}\nFalcon:"
# Encode the formatted input using the tokenizer
input_ids = tokenizer.encode(formatted_input, return_tensors="pt", add_special_tokens=False)
# Generate a response using the model
response = peft_model.generate(input_ids=input_ids, max_length=500, use_cache=False, early_stopping=False, bos_token_id=peft_model.config.bos_token_id, eos_token_id=peft_model.config.eos_token_id, pad_token_id=peft_model.config.eos_token_id, temperature=0.4, do_sample=True)
# Decode the generated response to text
response_text = tokenizer.decode(response[0], skip_special_tokens=True)
# Append the Falcon-like conversation to the history
self.history.append(formatted_input)
self.history.append(response_text)
return response_text
bot = ChatBot()
title = "👋🏻Welcome to Tonic's GaiaMiniMed Chat🚀"
description = "You can use this Space to test out the current model [(Tonic/GaiaMiniMed)](https://huggingface.co/Tonic/GaiaMiniMed) or duplicate this Space and use it locally or on 🤗HuggingFace. [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
examples = [["What is the proper treatment for buccal herpes?", "You are a medicine and public health expert, you will receive a question, answer the question, and provide a complete answer"]]
iface = gr.Interface(
fn=bot.predict,
title=title,
description=description,
examples=examples,
inputs=["text", "text"], # Take user input and system prompt separately
outputs="text",
theme="ParityError/Anime"
)
iface.launch() |