import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from datetime import datetime
import spaces

description = """
[🦎Salamandra-7b-instruct](https://huggingface.co/BSC-LT/salamandra-7b-instruct) is a Transformer-based decoder-only language model that has been pre-trained on 7.8 trillion tokens of highly curated data. 
The pre-training corpus contains text in 35 European languages and code. This model has been instruction-tuned and can be used as a general-purpose assistant.
"""

join_us = """
## Join us:
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 
[![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) 
On 🤗Huggingface: [MultiTransformer](https://huggingface.co/MultiTransformer) 
On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)
🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
"""

model_id = "BSC-LT/salamandra-7b-instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16
)

@spaces.GPU()
def generate_text(system_prompt, prompt, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
    date_string = datetime.today().strftime('%Y-%m-%d')
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt}
    ]
    
    chat_prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        date_string=date_string
    )
    
    inputs = tokenizer(chat_prompt, return_tensors="pt").to(device)
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=do_sample,
        pad_token_id=tokenizer.eos_token_id
    )
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text.split("assistant\n")[-1].strip()

def update_output(system_prompt, prompt, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
    return generate_text(system_prompt, prompt, temperature, max_new_tokens, top_p, repetition_penalty, do_sample)

with gr.Blocks() as demo:
    gr.Markdown("# 🦎 Welcome to Tonic's Salamandra-7b-instruct Demo")
    
    with gr.Row():
        with gr.Column(scale=1):
            with gr.Group():
                gr.Markdown(description)
        with gr.Column(scale=1):
            with gr.Group():
                gr.Markdown(join_us)
    
    with gr.Row():
        with gr.Column(scale=1):
            system_prompt = gr.Textbox(
                lines=3, 
                label="🖥️ System Prompt", 
                value="You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
            )
            prompt = gr.Textbox(lines=5, label="🙋‍♂️ User Prompt")
            generate_button = gr.Button("Generate with 🦎 Salamandra-7b-instruct")
            
            advanced_checkbox = gr.Checkbox(label="🧪 Advanced Settings", value=False)
            with gr.Column(visible=False) as advanced_settings:
                temperature = gr.Slider(0.0, 1.0, value=0.7, label="🌡️ Temperature")
                max_new_tokens = gr.Slider(1, 2250, value=750, step=1, label="🔢 Max New Tokens")
                top_p = gr.Slider(0.0, 1.0, value=0.95, label="⚛️ Top P")
                repetition_penalty = gr.Slider(1.0, 2.0, value=1.2, label="🔁 Repetition Penalty")
        
        with gr.Column(scale=1):
            output = gr.Textbox(lines=10, label="🦎 Salamandra-7b-instruct Output")
    
    generate_button.click(
        update_output,
        inputs=[system_prompt, prompt, temperature, max_new_tokens, top_p, repetition_penalty, advanced_checkbox],
        outputs=output
    )
    
    advanced_checkbox.change(
        fn=lambda x: gr.update(visible=x),
        inputs=[advanced_checkbox],
        outputs=[advanced_settings]
    )
    
    gr.Examples(
        examples=[
            ["At what temperature does water boil?"],
            ["Explain the concept of artificial intelligence in simple terms."],
            ["Write a short poem about the beauty of nature."],
            ["What are the main differences between Python and JavaScript?"],
            ["Describe the process of photosynthesis in plants."]
        ],
        inputs=prompt,
        outputs=prompt,
        label="Example Prompts"
    )

if __name__ == "__main__":
    demo.launch(ssr_mode = False)