from typing import Any, Dict, List from transformers import AutoModel, AutoTokenizer, pipeline class EndpointHandler: def __init__(self, path=""): # Load the pipeline for the model model = AutoModel.from_pretrained( "openbmb/MiniCPM-Llama3-V-2_5-int4", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained( "openbmb/MiniCPM-Llama3-V-2_5-int4", trust_remote_code=True ) self.pipeline = pipeline(model=model, tokenizer=tokenizer) def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: # Get the image and question from the request image = data.get("image") question = data.get("question") # Perform the VQA return self.pipeline(image, question) # if __name__ == "__main__": # handler = EndpointHandler() # data = { # "image": "https://pwm.im-cdn.it/image/1524723057/xxl.jpg", # "question": "Describe the image:", # } # print(handler(data))