Spaces:

taesiri
/

PhotoshopRequests-Preview

Running

App Files Files Community

PhotoshopRequests-Preview / app.py

taesiri

Update app.py

798ee13 verified 26 days ago

raw

history blame contribute delete

5.04 kB

	import gradio as gr
	from datasets import load_dataset
	import json
	import random
	from datetime import datetime
	import os
	from PIL import Image
	import io
	import numpy as np

	# Get access token from environment
	access_token = os.environ.get("HUGGINGFACE_TOKEN")

	class DatasetViewer:
	def __init__(self):
	self.dataset = None
	self.dataset_size = 0
	self.last_refresh_time = None
	self.max_display_size = (800, 600) # Maximum width and height for displayed images
	self.load_dataset()

	def resize_image(self, image):
	"""Resize image keeping aspect ratio with a maximum size constraint"""
	if isinstance(image, np.ndarray):
	# Convert numpy array to PIL Image
	image = Image.fromarray(image)
	elif isinstance(image, bytes):
	# Convert bytes to PIL Image
	image = Image.open(io.BytesIO(image))

	# Calculate scaling factor to fit within max dimensions
	width_ratio = self.max_display_size[0] / image.width
	height_ratio = self.max_display_size[1] / image.height
	scale_factor = min(width_ratio, height_ratio)

	# Only resize if image is larger than max dimensions
	if scale_factor < 1:
	new_width = int(image.width * scale_factor)
	new_height = int(image.height * scale_factor)
	image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)

	# Convert back to numpy array for gradio
	return np.array(image)

	def load_dataset(self):
	"""Load the complete dataset into memory"""
	# Load the full dataset (non-streaming)
	self.dataset = load_dataset(
	"taesiri/PhotoshopRequest-DailyDump-January-2025-RandomSample",
	split="train",
	token=access_token
	)

	self.dataset_size = len(self.dataset)
	self.last_refresh_time = datetime.now()

	def get_next_samples(self, num_samples=5):
	"""Get random samples from the dataset"""
	# Generate random indices
	indices = random.sample(range(self.dataset_size), min(num_samples, self.dataset_size))

	results = []
	for idx in indices:
	sample = self.dataset[idx]

	# Get post information
	post_id = sample["post_id"]
	title = sample["title"]
	reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}"

	# Extract selftext if available
	selftext = ""
	try:
	selftext = json.loads(sample["json_data"])["post"]["selftext"]
	except:
	print(f"No selftext found for post {post_id}")

	# Create markdown text
	markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})"

	# Append the triple (post_info, source_image, edited_image)
	results.append(markdown_text)
	# Resize images before adding to results
	source_image = self.resize_image(sample["source_image"])
	edited_image = self.resize_image(sample["edited_image"])
	results.append(source_image)
	results.append(edited_image)

	return tuple(results)

	def get_info(self):
	"""Return dataset information"""
	return f"""
	<div style="text-align: center;">
	<hr>
	Dataset Size: {self.dataset_size} items<br>
	Last Refreshed: {self.last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC')}
	</div>
	"""

	def create_interface():
	viewer = DatasetViewer()

	with gr.Blocks() as demo:
	gr.Markdown("# PhotoshopRequest Dataset Viewer")

	gr.Markdown("""
	This is a viewer for the PhotoshopRequest dataset. Each sample shows a Photoshop editing request post.
	Click the 'Show New Samples' button to see 5 random samples from the dataset.

	Layout: For each sample, you'll see:
	1. The post title and description
	2. The source image (left) and edited result (right)
	""")

	# Create 5 sets of outputs
	outputs = []
	for i in range(5):
	post_info = gr.Markdown()
	outputs.append(post_info)

	with gr.Row():
	source = gr.Image(label=f"Source Image {i+1}")
	edited = gr.Image(label=f"Edited Image {i+1}")
	outputs.extend([source, edited])

	sample_button = gr.Button("Show New Samples")
	info_md = gr.Markdown()

	# Set up event handlers
	sample_button.click(
	viewer.get_next_samples,
	outputs=outputs
	).then(
	viewer.get_info,
	outputs=[info_md]
	)

	return demo

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()