|
import gradio as gr |
|
from datasets import load_dataset |
|
import json |
|
import random |
|
from datetime import datetime |
|
import os |
|
from PIL import Image |
|
import io |
|
import numpy as np |
|
|
|
|
|
access_token = os.environ.get("HUGGINGFACE_TOKEN") |
|
|
|
class DatasetViewer: |
|
def __init__(self): |
|
self.dataset = None |
|
self.dataset_size = 0 |
|
self.last_refresh_time = None |
|
self.max_display_size = (800, 600) |
|
self.load_dataset() |
|
|
|
def resize_image(self, image): |
|
"""Resize image keeping aspect ratio with a maximum size constraint""" |
|
if isinstance(image, np.ndarray): |
|
|
|
image = Image.fromarray(image) |
|
elif isinstance(image, bytes): |
|
|
|
image = Image.open(io.BytesIO(image)) |
|
|
|
|
|
width_ratio = self.max_display_size[0] / image.width |
|
height_ratio = self.max_display_size[1] / image.height |
|
scale_factor = min(width_ratio, height_ratio) |
|
|
|
|
|
if scale_factor < 1: |
|
new_width = int(image.width * scale_factor) |
|
new_height = int(image.height * scale_factor) |
|
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) |
|
|
|
|
|
return np.array(image) |
|
|
|
def load_dataset(self): |
|
"""Load the complete dataset into memory""" |
|
|
|
self.dataset = load_dataset( |
|
"taesiri/PhotoshopRequest-DailyDump-January-2025-RandomSample", |
|
split="train", |
|
token=access_token |
|
) |
|
|
|
self.dataset_size = len(self.dataset) |
|
self.last_refresh_time = datetime.now() |
|
|
|
def get_next_samples(self, num_samples=5): |
|
"""Get random samples from the dataset""" |
|
|
|
indices = random.sample(range(self.dataset_size), min(num_samples, self.dataset_size)) |
|
|
|
results = [] |
|
for idx in indices: |
|
sample = self.dataset[idx] |
|
|
|
|
|
post_id = sample["post_id"] |
|
title = sample["title"] |
|
reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}" |
|
|
|
|
|
selftext = "" |
|
try: |
|
selftext = json.loads(sample["json_data"])["post"]["selftext"] |
|
except: |
|
print(f"No selftext found for post {post_id}") |
|
|
|
|
|
markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})" |
|
|
|
|
|
results.append(markdown_text) |
|
|
|
source_image = self.resize_image(sample["source_image"]) |
|
edited_image = self.resize_image(sample["edited_image"]) |
|
results.append(source_image) |
|
results.append(edited_image) |
|
|
|
return tuple(results) |
|
|
|
def get_info(self): |
|
"""Return dataset information""" |
|
return f""" |
|
<div style="text-align: center;"> |
|
<hr> |
|
Dataset Size: {self.dataset_size} items<br> |
|
Last Refreshed: {self.last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC')} |
|
</div> |
|
""" |
|
|
|
def create_interface(): |
|
viewer = DatasetViewer() |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# PhotoshopRequest Dataset Viewer") |
|
|
|
gr.Markdown(""" |
|
This is a viewer for the PhotoshopRequest dataset. Each sample shows a Photoshop editing request post. |
|
Click the 'Show New Samples' button to see **5 random samples** from the dataset. |
|
|
|
**Layout**: For each sample, you'll see: |
|
1. The post title and description |
|
2. The source image (left) and edited result (right) |
|
""") |
|
|
|
|
|
outputs = [] |
|
for i in range(5): |
|
post_info = gr.Markdown() |
|
outputs.append(post_info) |
|
|
|
with gr.Row(): |
|
source = gr.Image(label=f"Source Image {i+1}") |
|
edited = gr.Image(label=f"Edited Image {i+1}") |
|
outputs.extend([source, edited]) |
|
|
|
sample_button = gr.Button("Show New Samples") |
|
info_md = gr.Markdown() |
|
|
|
|
|
sample_button.click( |
|
viewer.get_next_samples, |
|
outputs=outputs |
|
).then( |
|
viewer.get_info, |
|
outputs=[info_md] |
|
) |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.launch() |