#MonsterAPIClient.py

"""
Monster API Python client to connect to LLM models on monsterapi

Base URL: https://api.monsterapi.ai/v1/generate/{model}

Available models:
-----------------

LLMs:
    1. falcon-7b-instruct
    2. falcon-40b-instruct
    3. mpt-30B-instruct
    4. mpt-7b-instruct
    5. openllama-13b-base
    6. llama2-7b-chat

Text to Image:
    1. stable-diffusion v1.5
    2. stable-diffusion XL V1.0

"""
import os
import time
import logging
import requests
from requests_toolbelt.multipart.encoder import MultipartEncoder

from typing import Optional, Literal, Union, List, Dict
from pydantic import BaseModel, Field

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class LLMInputModel1(BaseModel):
    """
    Supports Following models: Falcon-40B-instruct, Falcon-7B-instruct, openllama-13b-base, llama2-7b-chat

    prompt	string	Prompt is a textual instruction for the model to produce an output.	Required
    top_k	integer	Top-k sampling helps improve quality by removing the tail and making it less likely to go off topic.	Optional
    (Default: 40)
    top_p	float	Top-p sampling helps generate more diverse and creative text by considering a broader range of tokens.	Optional
    (Default: 1.0)
    temp	float	The temperature influences the randomness of the next token predictions.	Optional
    (Default: 0.98)
    max_length	integer	The maximum length of the generated text.	Optional
    (Default: 256)
    repetition_penalty	float	The model uses this penalty to discourage the repetition of tokens in the output.	Optional
    (Default: 1.2)
    beam_size	integer	The beam size for beam search. A larger beam size results in better quality output, but slower generation times.	Optional
    (Default: 1)    
    """
    prompt: str
    top_k: int = 40
    top_p: float = Field(0.9, ge=0., le=1.)
    temp: float = Field(0.98, ge=0., le=1.)
    max_length: int = 256
    repetition_penalty: float = 1.2
    beam_size: int = 1


class LLMInputModel2(BaseModel):
    """
    Supports Following models: MPT-30B-instruct, MPT-7B-instruct

    prompt:	string	Instruction is a textual command for the model to produce an output.	Required
    top_k	integer	Top-k sampling helps improve quality by removing the tail and making it less likely to go off topic.	Optional
    (Default: 40)
    top_p	float	Top-p sampling helps generate more diverse and creative text by considering a broader range of tokens.	Optional
    Allowed Range: 0 - 1
    (Default: 1.0)
    temp	float	Temperature is a parameter that controls the randomness of the model's output. The higher the temperature, the more random the output.	Optional
    (Default: 0.98)
    max_length	integer	Maximum length of the generated output.	Optional
    (Default: 256)
    """
    prompt: str
    top_k: int = 40
    top_p: float = Field(0.9, ge=0., le=1.)
    temp: float = Field(0.98, ge=0., le=1.)
    max_length: int = 256

class SDInputModel(BaseModel):
    """
    Support following models: text2img, text2img-sdxl

    prompt:	string	Your input text prompt	Required
    negprompt:	string	Negative text prompt	Optional
    samples:	integer	No. of images to be generated. Allowed range: 1-4	Optional
    (Default: 1)
    steps:	integer	Sampling steps per image. Allowed range 30-500	Optional
    (Default: 30)
    aspect_ratio: string.  Allowed values: square, landscape, portrait	Optional
    (Default: square)
    guidance_scale:	float.	Prompt guidance scale	Optional
    (Default: 7.5)
    seed:	integer	Random number used to initialize the image generation.	Optional
    (Default: random)
    """
    prompt: str
    negprompt: Optional[str] = ""
    samples: Optional[int] = Field(1, ge=1, le=4)
    steps: Optional[int] = Field(30, ge=30, le=500)
    aspect_ratio: Optional[Literal['square', 'landscape', 'portrait']] = 'square'
    guidance_scale: Optional[float] = 7.5
    seed: Optional[int] = None


MODELS_TO_DATAMODEL = {
            'falcon-7b-instruct': LLMInputModel1,
            'falcon-40b-instruct': LLMInputModel1,
            'mpt-30B-instruct': LLMInputModel2,
            'mpt-7b-instruct': LLMInputModel2,
            'openllama-13b-base': LLMInputModel1,
            'llama2-7b-chat': LLMInputModel1,
            "sdxl-base": SDInputModel,
            "txt2img": SDInputModel
        }


class MClient():
    def __init__(self):
        self.boundary = '---011000010111000001101001'
        self.auth_token = os.environ.get('MONSTER_API_KEY')
        self.headers = {
            "accept": "application/json",
            "content-type": f"multipart/form-data; boundary={self.boundary}",
            'Authorization': 'Bearer ' + self.auth_token}
        self.base_url = 'https://api.monsterapi.ai/v1'
        self.models_to_data_model = MODELS_TO_DATAMODEL
        self.mock = os.environ.get('MOCK_Runner', "False").lower() == "true"

    def get_response(self, model:Literal['falcon-7b-instruct', 'falcon-40b-instruct', 'mpt-30B-instruct', 'mpt-7b-instruct', 'openllama-13b-base', 'llama2-7b-chat'], 
                     data: dict):
    
        if model not in self.models_to_data_model:
            raise ValueError(f"Invalid model: {model}!")

        dataModel = self.models_to_data_model[model](**data)
        url = f"{self.base_url}/generate/{model}"
        data = dataModel.dict()
        logger.info(f"Calling Monster API with url: {url}, with payload: {data}")

        # convert all values into string
        for key, value in data.items():
            data[key] = str(value)
        multipart_data = MultipartEncoder(fields=data, boundary=self.boundary)
        response = requests.post(url, headers=self.headers, data=multipart_data)
        response.raise_for_status()
        return response.json()
    
    def get_status(self, process_id):
        # /v1/status/{process_id}
        url = f"{self.base_url}/status/{process_id}"
        response = requests.get(url, headers=self.headers)
        response.raise_for_status()
        return response.json()
    
    def wait_and_get_result(self, process_id, timeout=100):
        start_time = time.time()
        while True:
            elapsed_time = time.time() - start_time

            if elapsed_time >= timeout:
                raise TimeoutError(f"Process {process_id} timed out after {timeout} seconds.")

            status = self.get_status(process_id)
            if status['status'].lower() == 'completed':
                return status['result']
            elif status['status'].lower() == 'failed':
                raise RuntimeError(f"Process {process_id} failed! {status}")
            else:
                if self.mock:
                    return 100 * "Mock Output!"
                logger.info(f"Process {process_id} is still running, status is {status['status']}. Waiting ...")
                time.sleep(0.01)