import gradio as gr
import torch
from fuse.data.tokenizers.modular_tokenizer.op import ModularTokenizerOp
from mammal.keys import (
    CLS_PRED,
    ENCODER_INPUTS_ATTENTION_MASK,
    ENCODER_INPUTS_STR,
    ENCODER_INPUTS_TOKENS,
    SCORES,
)
from mammal.model import Mammal

from mammal_demo.demo_framework import MammalObjectBroker, MammalTask


class PpiTask(MammalTask):
    def __init__(self, model_dict):
        super().__init__(name="Protein-Protein Interaction", model_dict=model_dict)
        self.description = "Protein-Protein Interaction (PPI)"
        self.examples = {
            "protein_calmodulin": "MADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMISELDQDGFIDKEDLHDGDGKISFEEFLNLVNKEMTADVDGDGQVNYEEFVTMMTSK",
            "protein_calcineurin": "MSSKLLLAGLDIERVLAEKNFYKEWDTWIIEAMNVGDEEVDRIKEFKEDEIFEEAKTLGTAEMQEYKKQKLEEAIEGAFDIFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIRQMWDQNGDWDRIKELKFGEIKKLSAKDTRGTIFIKVFENLGTGVDSEYEDVSKYMLKHQ",
        }
        self.markup_text = f"""
    # Mammal based {self.description} demonstration

    Given two protein sequences, estimate if the proteins interact or not."""

    def generate_prompt(self, protein_seq_1, protein_seq_2):
        """Formatting prompt to match pre-training syntax

        Args:
            protein_seq_1 (str): sequance of protein number 1
            protein_seq_2 (str): sequance of protein number 2

        Returns:
            str: prompt
        """
        prompt = (
            "<@TOKENIZER-TYPE=AA><BINDING_AFFINITY_CLASS><SENTINEL_ID_0>"
            + "<MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN>"
            + f"<SEQUENCE_NATURAL_START>{protein_seq_1}<SEQUENCE_NATURAL_END>"
            + "<MOLECULAR_ENTITY><MOLECULAR_ENTITY_GENERAL_PROTEIN>"
            + f"<SEQUENCE_NATURAL_START>{protein_seq_2}<SEQUENCE_NATURAL_END><EOS>"
        )
        return prompt

    def crate_sample_dict(self, sample_inputs: dict, model_holder: MammalObjectBroker):
        # Create and load sample
        sample_dict = dict()
        prompt = self.generate_prompt(**sample_inputs)
        sample_dict[ENCODER_INPUTS_STR] = prompt

        # Tokenize
        sample_dict = model_holder.tokenizer_op(
            sample_dict=sample_dict,
            key_in=ENCODER_INPUTS_STR,
            key_out_tokens_ids=ENCODER_INPUTS_TOKENS,
            key_out_attention_mask=ENCODER_INPUTS_ATTENTION_MASK,
        )
        sample_dict[ENCODER_INPUTS_TOKENS] = torch.tensor(
            sample_dict[ENCODER_INPUTS_TOKENS]
        )
        sample_dict[ENCODER_INPUTS_ATTENTION_MASK] = torch.tensor(
            sample_dict[ENCODER_INPUTS_ATTENTION_MASK]
        )

        return sample_dict

    def run_model(self, sample_dict, model: Mammal):
        # Generate Prediction
        batch_dict = model.generate(
            [sample_dict],
            output_scores=True,
            return_dict_in_generate=True,
            max_new_tokens=5,
        )
        return batch_dict

    def decode_output(self, batch_dict, tokenizer_op: ModularTokenizerOp) -> list:

        # Get output
        generated_output = tokenizer_op._tokenizer.decode(batch_dict[CLS_PRED][0])
        score = batch_dict[SCORES][0][1][self.positive_token_id(tokenizer_op)].item()

        ans = [generated_output, score]
        return ans

    def create_and_run_prompt(self, model_name, protein_seq_1, protein_seq_2):
        model_holder = self.model_dict[model_name]
        sample_inputs = {"protein_seq_1": protein_seq_1, "protein_seq_2": protein_seq_2}
        sample_dict = self.crate_sample_dict(
            sample_inputs=sample_inputs, model_holder=model_holder
        )
        prompt = sample_dict[ENCODER_INPUTS_STR]
        batch_dict = self.run_model(sample_dict=sample_dict, model=model_holder.model)
        res = prompt, *self.decode_output(
            batch_dict, tokenizer_op=model_holder.tokenizer_op
        )
        return res

    def create_demo(self, model_name_widget: gr.component):

        # """
        # ### Using the model from

        # ```{model} ```
        # """
        with gr.Group() as demo:
            gr.Markdown(self.markup_text)
            with gr.Row():
                protein_seq_1 = gr.Textbox(
                    label="Protein 1 sequence",
                    # info="standard",
                    interactive=True,
                    lines=3,
                    value=self.examples["protein_calmodulin"],
                )
                protein_seq_2 = gr.Textbox(
                    label="Protein 2 sequence",
                    # info="standard",
                    interactive=True,
                    lines=3,
                    value=self.examples["protein_calcineurin"],
                )
            with gr.Row():
                run_mammal: gr.Button = gr.Button(
                    "Run Mammal prompt for Protein-Protein Interaction",
                    variant="primary",
                )
            with gr.Row():
                prompt_box = gr.Textbox(label="Mammal prompt", lines=5)
            with gr.Row():
                decoded = gr.Textbox(label="Mammal output")
                score_box = gr.Number(label="PPI score")
                run_mammal.click(
                    fn=self.create_and_run_prompt,
                    inputs=[model_name_widget, protein_seq_1, protein_seq_2],
                    outputs=[prompt_box, decoded, score_box],
                )
            with gr.Row():
                gr.Markdown(
                    "```<SENTINEL_ID_0>``` contains the binding affinity class, which is ```<1>``` for interacting and ```<0>``` for non-interacting"
                )
            demo.visible = False
            return demo