import gradio as gr
import spaces
import json
import re
from gradio_client import Client


def get_caption_from_kosmos(image_in):
    kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/")

    kosmos2_result = kosmos2_client.predict(
        image_in,	# str (filepath or URL to image) in 'Test Image' Image component
        "Detailed",	# str in 'Description Type' Radio component
        fn_index=4
    )

    print(f"KOSMOS2 RETURNS: {kosmos2_result}")

    with open(kosmos2_result[1], 'r') as f:
        data = json.load(f)
    
    reconstructed_sentence = []
    for sublist in data:
        reconstructed_sentence.append(sublist[0])

    full_sentence = ' '.join(reconstructed_sentence)
    #print(full_sentence)

    # Find the pattern matching the expected format ("Describe this image in detail:" followed by optional space and then the rest)...
    pattern = r'^Describe this image in detail:\s*(.*)$'
    # Apply the regex pattern to extract the description text.
    match = re.search(pattern, full_sentence)
    if match:
        description = match.group(1)
        print(description)
    else:
        print("Unable to locate valid description.")

    # Find the last occurrence of "."
    #last_period_index = full_sentence.rfind('.')

    # Truncate the string up to the last period
    #truncated_caption = full_sentence[:last_period_index + 1]

    # print(truncated_caption)
    #print(f"\n—\nIMAGE CAPTION: {truncated_caption}")
    
    return description

def get_caption_from_MD(image_in):
    client = Client("https://vikhyatk-moondream1.hf.space/")
    result = client.predict(
		image_in,	# filepath  in 'image' Image component
		"Describe character like if it was fictional",	# str  in 'Question' Textbox component
		api_name="/answer_question"
    )
    print(result)
    return result


import re
import torch
from transformers import pipeline

pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")

@spaces.GPU(enable_queue=True)
def get_llm_idea(user_prompt):
    agent_maker_sys = f"""

"""

    instruction = f"""
<|system|>
{agent_maker_sys}</s>
<|user|>
"""

    prompt = f"{instruction.strip()}\n{user_prompt}</s>"    
    #print(f"PROMPT: {prompt}")
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    return outputs


def infer(image_in, cap_type):
    gr.Info("Getting image description...")
    if cap_type == "Fictional" :
        user_prompt = get_caption_from_MD(image_in)
    elif cap_type == "Literal" :
        user_prompt = get_caption_from_kosmos(image_in)
    
    gr.Info("Building a system according to the image caption ...")
    outputs = get_llm_idea(user_prompt)
    

    pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>'
    cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
    
    print(f"SUGGESTED LLM: {cleaned_text}")
    
    return user_prompt, cleaned_text.lstrip("\n")

title = f"Magic Card Generator",
description = f""

css = """
#col-container{
    margin: 0 auto;
    max-width: 780px;
    text-align: left;
}
/* fix examples gallery width on mobile */
div#component-14 > .gallery > .gallery-item > .container > img {
    width: auto!important;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(f"""
        <h2 style="text-align: center;">LLM Agent from a Picture</h2>
        <p style="text-align: center;">{description}</p>
        """)
        
        with gr.Row():
            with gr.Column():
                image_in = gr.Image(
                    label = "Image reference",
                    type = "filepath",
                    elem_id = "image-in"
                )
                cap_type = gr.Radio(
                    label = "Caption type",
                    choices = [
                        "Literal",
                        "Fictional"
                    ],
                    value = "Fictional"
                )
                submit_btn = gr.Button("Make LLM system from my pic !")
            with gr.Column():
                caption = gr.Textbox(
                    label = "Image caption",
                    elem_id = "image-caption"
                )
                result = gr.Textbox(
                    label = "Suggested System",
                    lines = 6,
                    max_lines = 30,
                    elem_id = "suggested-system-prompt"
                )
        

    submit_btn.click(
        fn = infer,
        inputs = [
            image_in,
            cap_type
        ],
        outputs =[
            caption,
            result
        ]
    )

demo.queue().launch(show_api=False, show_error=True)