import gradio as gr
from PIL import Image
import base64
from io import BytesIO
from gtts import gTTS
import gdown

# Define the URLs of the images
image_urls = [
    "https://drive.google.com/uc?id=19zhvDXwloycllKv0q8fN4AkNWJ3WC74I",
    "https://drive.google.com/uc?id=1_9MzEhc_YfwYFGVLUUlFW3YpSuZk6FSV",
    "https://drive.google.com/uc?id=1v1JRBsza_pnpLGv_vgORpeYDo9DLmMAD",
    "https://drive.google.com/uc?id=1aunq6D-MuES5BKkkxPFYH4R5h6W6WlDE"
]

# Define the local file names
image_files = [
    "image1.png",
    "image2.png",
    "image3.png",
    "image4.png"
]

# Download the images
for url, file in zip(image_urls, image_files):
    gdown.download(url, file, quiet=False)

# Descriptions for each scene
descriptions = [
    "Alex, Mia, and Sam eagerly plan their treasure hunt in a room filled with maps and exploration tools.",
    "The friends hike through a dense forest, guided by their ancient map.",
    "Deep in Whispering Hollow, they discover an ancient chest filled with historical artifacts.",
    "The village celebrates as Alex, Mia, and Sam present their find to the local museum."
]

# Generate audio files for each description
audio_files = []
for i, desc in enumerate(descriptions):
    tts = gTTS(desc)
    audio_file = f"audio_{i+1}.mp3"
    tts.save(audio_file)
    audio_files.append(audio_file)

# Load images
images = [
    Image.open("image1.png"),
    Image.open("image2.png"),
    Image.open("image3.png"),
    Image.open("image4.png")
]

# Assign labels to images
labels = ['B', 'D', 'A', 'C']
label_image_description_audio = list(zip(labels, images, descriptions, audio_files))

# Ensure the display order is A -> B -> C -> D
display_order = ['A', 'B', 'C', 'D']
label_image_description_audio_sorted = sorted(label_image_description_audio, key=lambda x: display_order.index(x[0]))

# Function to check the order
def check_order(a, b, c, d):
    user_order = [a.upper(), b.upper(), c.upper(), d.upper()]
    correct_order = ['B', 'D', 'A', 'C']
    return "Correct!" if user_order == correct_order else "Try again."

# Helper function to convert image to base64 string
def img_to_base64(img):
    buffered = BytesIO()
    img.save(buffered, format="PNG")
    img_str = base64.b64encode(buffered.getvalue()).decode()
    return img_str

# Create Gradio interface using gr.Blocks for layout control
with gr.Blocks(css=".small-audio { height: 180px !important; width: 230px !important; }") as iface:
    gr.Markdown("The images are displayed below with labels A, B, C, and D. Enter the correct sequence according to the story.")
    with gr.Row():
        for label, img, desc, audio in label_image_description_audio_sorted:
            with gr.Column():
                gr.Image(value=img, label=label, width=300)  # Display image with label
                gr.Markdown(f"**{label}**: {desc}")  # Display description below the image
                gr.Audio(value=audio, elem_classes="small-audio")  # Display TTS audio below the description with smaller size
    with gr.Row():
        a = gr.Textbox(label="Enter label for the first part of story")
        b = gr.Textbox(label="Enter label for the second part of story")
        c = gr.Textbox(label="Enter label for the third part of story")
        d = gr.Textbox(label="Enter label for the fourth part of story")
    btn = gr.Button("Check Order")
    output = gr.Textbox(label="Result")

    btn.click(fn=check_order, inputs=[a, b, c, d], outputs=output)

iface.launch()