import time
import config
import json
import base64
from lmdeploy.vl.utils import load_image_from_base64

from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig,GenerationConfig, VisionConfig
from lmdeploy.vl import load_image
from lmdeploy.vl import templates


model = config.model_id_or_path
system_prompt = '我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
chat_template_config = ChatTemplateConfig('internvl-zh-hermes2')
chat_template_config.meta_instruction = system_prompt
pipe = pipeline(model, backend_config=TurbomindEngineConfig(session_len=3067))

gen_config = GenerationConfig(top_p=0.8,
                              top_k=8,
                              temperature=0.6,
                              max_new_tokens=1024)


def internvl_main(image, prompt):
    tic = time.time()
    pre_prompt = "。回答内容字数控制在150字以内"
    image = load_image_from_base64(image)
    response = pipe((prompt + pre_prompt, image), gen_config=gen_config).text
    # # response = infers(image, prompt)
    print("结果为：", response)
    print("耗时为：", time.time() - tic)
    res = response.replace('json\n', '').replace('\n', '').replace('```', '')

    return res