from typing import Any, Dict, List

from transformers import AutoModel, AutoTokenizer, pipeline


class EndpointHandler:
    def __init__(self, path=""):

        # Load the pipeline for the model
        model = AutoModel.from_pretrained(
            "openbmb/MiniCPM-Llama3-V-2_5-int4",
            trust_remote_code=True,
        )
        tokenizer = AutoTokenizer.from_pretrained(
            "openbmb/MiniCPM-Llama3-V-2_5-int4", trust_remote_code=True
        )
        self.pipeline = pipeline(model=model, tokenizer=tokenizer)

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        # Get the image and question from the request
        image = data.get("image")
        question = data.get("question")

        # Perform the VQA
        return self.pipeline(image, question)


# if __name__ == "__main__":
#     handler = EndpointHandler()
#     data = {
#         "image": "https://pwm.im-cdn.it/image/1524723057/xxl.jpg",
#         "question": "Describe the image:",
#     }
#     print(handler(data))