import gradio as gr from PIL import Image import base64 from io import BytesIO from gtts import gTTS import gdown # Define the URLs of the images image_urls = [ "https://drive.google.com/uc?id=19zhvDXwloycllKv0q8fN4AkNWJ3WC74I", "https://drive.google.com/uc?id=1_9MzEhc_YfwYFGVLUUlFW3YpSuZk6FSV", "https://drive.google.com/uc?id=1v1JRBsza_pnpLGv_vgORpeYDo9DLmMAD", "https://drive.google.com/uc?id=1aunq6D-MuES5BKkkxPFYH4R5h6W6WlDE" ] # Define the local file names image_files = [ "image1.png", "image2.png", "image3.png", "image4.png" ] # Download the images for url, file in zip(image_urls, image_files): gdown.download(url, file, quiet=False) # Descriptions for each scene descriptions = [ "Alex, Mia, and Sam eagerly plan their treasure hunt in a room filled with maps and exploration tools.", "The friends hike through a dense forest, guided by their ancient map.", "Deep in Whispering Hollow, they discover an ancient chest filled with historical artifacts.", "The village celebrates as Alex, Mia, and Sam present their find to the local museum." ] # Generate audio files for each description audio_files = [] for i, desc in enumerate(descriptions): tts = gTTS(desc) audio_file = f"audio_{i+1}.mp3" tts.save(audio_file) audio_files.append(audio_file) # Load images images = [ Image.open("image1.png"), Image.open("image2.png"), Image.open("image3.png"), Image.open("image4.png") ] # Assign labels to images labels = ['B', 'D', 'A', 'C'] label_image_description_audio = list(zip(labels, images, descriptions, audio_files)) # Ensure the display order is A -> B -> C -> D display_order = ['A', 'B', 'C', 'D'] label_image_description_audio_sorted = sorted(label_image_description_audio, key=lambda x: display_order.index(x[0])) # Function to check the order def check_order(a, b, c, d): user_order = [a.upper(), b.upper(), c.upper(), d.upper()] correct_order = ['B', 'D', 'A', 'C'] return "Correct!" if user_order == correct_order else "Try again." # Helper function to convert image to base64 string def img_to_base64(img): buffered = BytesIO() img.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() return img_str # Create Gradio interface using gr.Blocks for layout control with gr.Blocks(css=".small-audio { height: 180px !important; width: 230px !important; }") as iface: gr.Markdown("The images are displayed below with labels A, B, C, and D. Enter the correct sequence according to the story.") with gr.Row(): for label, img, desc, audio in label_image_description_audio_sorted: with gr.Column(): gr.Image(value=img, label=label, width=300) # Display image with label gr.Markdown(f"**{label}**: {desc}") # Display description below the image gr.Audio(value=audio, elem_classes="small-audio") # Display TTS audio below the description with smaller size with gr.Row(): a = gr.Textbox(label="Enter label for the first part of story") b = gr.Textbox(label="Enter label for the second part of story") c = gr.Textbox(label="Enter label for the third part of story") d = gr.Textbox(label="Enter label for the fourth part of story") btn = gr.Button("Check Order") output = gr.Textbox(label="Result") btn.click(fn=check_order, inputs=[a, b, c, d], outputs=output) iface.launch()