File size: 5,056 Bytes
6ebcdab
 
 
 
55db529
 
3dc4061
3202d1b
3dc4061
 
 
 
 
 
c013315
3dc4061
3202d1b
3dc4061
3202d1b
3dc4061
 
b1cc2f7
3dc4061
3202d1b
b1cc2f7
3dc4061
 
 
 
 
 
2469625
3dc4061
b1cc2f7
3dc4061
7da94d2
afd1412
 
 
b1cc2f7
3dc4061
 
 
 
 
 
 
 
 
 
 
8867e8a
6b13747
054ea8d
c30f436
3dc4061
e5d09cd
7d0b03f
e5d09cd
 
cdc3662
054ea8d
0caf514
d52d3a1
8d2dad8
d52d3a1
3eb6511
8dd59d8
bd0b549
bdec0c5
bf9669d
bdec0c5
 
bf9669d
bdec0c5
81395fc
030a756
054ea8d
030a756
bf07a1e
 
b1cc2f7
 
237d9d2
1b29238
83aa350
b1cc2f7
 
1b29238
bf07a1e
b1cc2f7
1b29238
b1cc2f7
2be2050
1b29238
bf07a1e
1b29238
b1cc2f7
 
 
 
 
 
1b29238
237d9d2
1b29238
a3eaa33
 
 
b01335d
1b29238
b01335d
 
 
 
3202d1b
b01335d
 
 
 
1b29238
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
from peft import PeftModel, PeftConfig
import torch
import gradio as gr
import random
from textwrap import wrap

# Functions to Wrap the Prompt Correctly
def wrap_text(text, width=90):
    lines = text.split('\n')
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text

def multimodal_prompt(user_input, system_prompt):
    """
    Generates text using a large language model, given a user input and a system prompt.
    Args:
        user_input: The user's input text to generate a response for.
        system_prompt: Optional system prompt.
    Returns:
        A string containing the generated text in the Falcon-like format.
    """
    # Combine user input and system prompt
    formatted_input = f"{{{{ {system_prompt} }}}}\nUser: {user_input}\nFalcon:"

    # Encode the input text
    encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
    model_inputs = encodeds.to(device)

    # Generate a response using the model
    output = peft_model.generate(
        **model_inputs,
        max_length=500,
        use_cache=True,
        early_stopping=False,
        bos_token_id=peft_model.config.bos_token_id,
        eos_token_id=peft_model.config.eos_token_id,
        pad_token_id=peft_model.config.eos_token_id,
        temperature=0.4,
        do_sample=True
    )

    # Decode the response
    response_text = tokenizer.decode(output[0], skip_special_tokens=True)

    return response_text

# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Use the base model's ID
base_model_id = "tiiuae/falcon-7b-instruct"
model_directory = "Tonic/GaiaMiniMed"

# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True, padding_side="left")
# tokenizer = AutoTokenizer.from_pretrained("Tonic/mistralmed", trust_remote_code=True, padding_side="left")
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.padding_side = 'left'

# Load the GaiaMiniMed model with the specified configuration
# Load the Peft model with a specific configuration
# Specify the configuration class for the model
model_config = AutoConfig.from_pretrained(base_model_id)
# Load the PEFT model with the specified configuration
peft_model = AutoModelForCausalLM.from_pretrained(model_directory, config=model_config)
peft_model = PeftModel.from_pretrained(peft_model, model_directory)

# Specify the configuration class for the model
#model_config = AutoConfig.from_pretrained(base_model_id)

# Load the PEFT model with the specified configuration
#peft_model = AutoModelForCausalLM.from_pretrained(base_model_id, config=model_config)

# Load the PEFT model
# peft_config = PeftConfig.from_pretrained("Tonic/mistralmed")
# peft_model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True)
# peft_model = PeftModel.from_pretrained(peft_model, "Tonic/mistralmed")

class ChatBot:
    def __init__(self, system_prompt="You are an expert medical analyst:"):
        self.system_prompt = system_prompt
        self.history = []

    def predict(self, user_input, system_prompt):
        # Combine the user's input with the system prompt in Falcon format
        formatted_input = f"{{{{ {self.system_prompt} }}}}\nUser: {user_input}\nFalcon:"

        # Encode the formatted input using the tokenizer
        input_ids = tokenizer.encode(formatted_input, return_tensors="pt", add_special_tokens=False)

        # Generate a response using the model
        response = peft_model.generate(input_ids=input_ids, max_length=500, use_cache=False, early_stopping=False, bos_token_id=peft_model.config.bos_token_id, eos_token_id=peft_model.config.eos_token_id, pad_token_id=peft_model.config.eos_token_id, temperature=0.4, do_sample=True)

        # Decode the generated response to text
        response_text = tokenizer.decode(response[0], skip_special_tokens=True)

        # Append the Falcon-like conversation to the history
        self.history.append(formatted_input)
        self.history.append(response_text)

        return response_text

bot = ChatBot()

title = "👋🏻Welcome to Tonic's GaiaMiniMed Chat🚀"
description = "You can use this Space to test out the current model [(Tonic/GaiaMiniMed)](https://huggingface.co/Tonic/GaiaMiniMed) or duplicate this Space and use it locally or on 🤗HuggingFace. [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
examples = [["What is the proper treatment for buccal herpes?", "You are a medicine and public health expert, you will receive a question, answer the question, and provide a complete answer"]]

iface = gr.Interface(
    fn=bot.predict,
    title=title,
    description=description,
    examples=examples,
    inputs=["text", "text"],  # Take user input and system prompt separately
    outputs="text",
    theme="ParityError/Anime"
)

iface.launch()