Spaces:

HuggingFaceFV
/

FineVideo-Explorer

Running on CPU Upgrade

App Files Files Community

FineVideo-Explorer / app.py

mfarre HF staff

adding instructions

0d1d9a1 3 months ago

raw

history blame

26.9 kB

	import gradio as gr
	import logging
	import json
	import os
	from typing import Dict, Any, List
	from itertools import groupby

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	video_folder = 'video/'
	metadata_folder = 'metadata/'

	def load_video_list() -> List[Dict[str, str]]:
	video_list = []
	for filename in os.listdir(video_folder):
	if filename.endswith('.mp4'):
	video_id = os.path.splitext(filename)[0]
	metadata_path = os.path.join(metadata_folder, f"{video_id}.json")
	if os.path.exists(metadata_path):
	with open(metadata_path, 'r') as f:
	metadata = json.load(f)
	metadata = metadata['content_metadata']
	title = metadata.get('title', 'Untitled')
	video_list.append({"video_id": video_id, "title": title})

	# Define the custom order for the first five videos
	custom_order = ['7BhJmDPB7RU', 'PrAwsi3Ldzo', '3rhsSPxQ39c', 'P7WnJZ55sgc', 'g9GtUQs7XUM']

	# Custom sorting function
	def custom_sort(item):
	try:
	return custom_order.index(item['video_id'])
	except ValueError:
	return len(custom_order) + 1 # Place non-specified videos after the custom ordered ones

	# Sort the video list
	video_list.sort(key=lambda x: (custom_sort(x), x['title']))

	return video_list


	def score_to_emoji(score):
	if score < 0.2:
	return "😴"
	elif score < 0.4:
	return "🙂"
	elif score < 0.6:
	return "😊"
	elif score < 0.8:
	return "😃"
	else:
	return "🤩"
	def load_metadata(video_id: str) -> Dict[str, Any]:
	metadata_path = os.path.join(metadata_folder, f"{video_id}.json")
	try:
	with open(metadata_path, 'r') as f:
	asd =json.load(f)
	return asd['content_metadata']
	except FileNotFoundError:
	logger.error(f"Metadata file not found for video ID: {video_id}")
	raise
	except json.JSONDecodeError:
	logger.error(f"Invalid JSON in metadata file for video ID: {video_id}")
	raise

	def timestamp_to_seconds(timestamp: str) -> float:
	try:
	h, m, s = timestamp.split(':')
	return int(h) * 3600 + int(m) * 60 + float(s)
	except ValueError:
	logger.error(f"Invalid timestamp format: {timestamp}")
	return 0.0

	def format_timestamp(timestamp: str) -> str:
	try:
	h, m, s = timestamp.split(':')
	return f"{int(m):02d}:{int(float(s)):02d}"
	except Exception as e:
	logger.error(f"Invalid timestamp format: {timestamp}")
	return ""

	def create_scene_table(scene: Dict[str, Any]) -> str:
	dynamism_score = scene.get('dynamismScore', 0)
	av_correlation = scene.get('audioVisualCorrelation', 0)
	cast = ", ".join([cast_member for cast_member in scene.get('cast', [])])

	output = f"""
	<div class="scene-container">
	<h3>Scene {scene.get('sceneId', 'Unknown')}: {scene.get('title', '')}</h3>
	<p>Dynamism: {score_to_emoji(dynamism_score)} Audio-visual correlation: {score_to_emoji(av_correlation)} Cast: {cast}</p>
	<table class="metadata-table">
	<tr>
	<th>Timestamp</th>
	<th>Type</th>
	<th>Description</th>
	</tr>
	"""

	scene_events = []

	# Collect all scene data
	data_types = [
	('Activities', scene.get('activities', [])),
	('Props', scene.get('props', [])),
	('Mood', [scene.get('mood', {})]),
	('Narrative Progression', scene.get('narrativeProgression', [])),
	('Video Editing Details', scene.get('videoEditingDetails', [])),
	('Thematic Elements', [{'description': scene.get('thematicElements', '')}]),
	('Contextual Relevance', [{'description': scene.get('contextualRelevance', '')}]),
	('Character Interaction', scene.get('characterInteraction', []))
	]

	for data_type, data_list in data_types:
	for item in data_list:
	if isinstance(item, dict):
	start_time = ''
	end_time = ''
	description = ''

	if data_type == 'Activities':
	start_time = item.get('timestamp', {}).get('start_timestamp', '')
	end_time = item.get('timestamp', {}).get('end_timestamp', '')
	description = item.get('description', '')
	elif data_type == 'Props':
	start_time = item.get('timestamp', {}).get('start_timestamp', '')
	end_time = item.get('timestamp', {}).get('end_timestamp', '')
	description = item.get('name', '')
	elif data_type == 'Video Editing Details':
	start_time = item.get('timestamps', {}).get('start_timestamp', '')
	end_time = item.get('timestamps', {}).get('end_timestamp', '')
	description = item.get('description', '')
	elif data_type == 'Mood':
	description = item.get('description', '')
	# Handle mood changes
	for mood_change in item.get('keyMoments', []):
	if isinstance(mood_change, dict):
	scene_events.append({
	'timestamp_start': mood_change.get('timestamp', ''),
	'timestamp_end': '',
	'type': 'Mood Change',
	'description': mood_change.get('changeDescription', '')
	})
	elif data_type == 'Character Interaction':
	characters = ', '.join(item.get('characters', []))
	description = f"{characters}: {item.get('description', '')}"
	else:
	start_time = item.get('timestamp', '')
	description = item.get('description', '')

	scene_events.append({
	'timestamp_start': start_time,
	'timestamp_end': end_time,
	'type': data_type,
	'description': description
	})
	elif isinstance(item, str):
	scene_events.append({
	'timestamp_start': '',
	'timestamp_end': '',
	'type': data_type,
	'description': item
	})

	# Sort events by timestamp
	scene_events.sort(key=lambda x: x['timestamp_start'] if x['timestamp_start'] else '')

	for event in scene_events:
	start_time = format_timestamp(event['timestamp_start'])
	end_time = format_timestamp(event['timestamp_end'])
	start_link = f'<a href="#" class="timestamp-link" data-timestamp="{event["timestamp_start"]}">{start_time}</a>' if start_time else ''
	end_link = f' - <a href="#" class="timestamp-link" data-timestamp="{event["timestamp_end"]}">{end_time}</a>' if end_time else ''

	output += f"""
	<tr>
	<td>{start_link}{end_link}</td>
	<td>{event['type']}</td>
	<td>{event['description']}</td>
	</tr>
	"""

	output += """
	</table>
	</div>
	"""
	return output

	def create_storylines_table(storylines: Dict[str, Any]) -> str:
	output = """
	<div class="storylines-container">
	<h3>Storylines</h3>
	<table class="metadata-table">
	<tr>
	<th>Storyline</th>
	<th>Scenes Involved</th>
	</tr>
	"""

	output += f"""
	<tr>
	<td>{storylines.get('description', 'No description available')}</td>
	<td>{', '.join(map(str, storylines.get('scenes', [])))}</td>
	</tr>
	"""

	output += """
	</table>
	</div>
	"""
	return output

	def create_qa_section(qa_list: List[Dict[str, str]]) -> str:
	output = """
	<div class="qa-container">
	<h3>Q&A</h3>
	<div class="chat-discussion">
	"""

	for qa in qa_list:
	output += f"""
	<div class="question">{qa.get('question', '')}</div>
	<div class="answer">{qa.get('answer', '')}</div>
	"""

	output += """
	</div>
	</div>
	"""
	return output

	def create_trimming_suggestions(suggestions: List[Dict[str, Any]]) -> str:
	output = """
	<div class="trimming-suggestions-container">
	<h3>Trimming Suggestions</h3>
	<table class="metadata-table">
	<tr>
	<th>Timestamp</th>
	<th>Description</th>
	</tr>
	"""

	for suggestion in suggestions:
	start_time = suggestion.get('timestamps', {}).get('start_timestamp', '')
	end_time = suggestion.get('timestamps', {}).get('end_timestamp', '')
	start_formatted = format_timestamp(start_time)
	end_formatted = format_timestamp(end_time)

	output += f"""
	<tr>
	<td>
	<a href="#" class="timestamp-link" data-timestamp="{start_time}">{start_formatted}</a>
	{f' - <a href="#" class="timestamp-link" data-timestamp="{end_time}">{end_formatted}</a>' if end_time else ''}
	</td>
	<td>{suggestion.get('description', '')}</td>
	</tr>
	"""

	output += """
	</table>
	</div>
	"""
	return output

	def create_filmstrip(scenes: List[Dict[str, Any]], video_duration: float) -> str:
	filmstrip_html = f"""
	<div id="filmstrip-inner" style="position: relative; width: 100%; height: 100%;" data-duration="{video_duration}">
	"""

	for scene in scenes:
	start_time = timestamp_to_seconds(scene['timestamps'].get('start_timestamp', '0:00:00'))
	end_time = timestamp_to_seconds(scene['timestamps'].get('end_timestamp', str(video_duration)))
	left_pos = (start_time / video_duration) * 100
	width = ((end_time - start_time) / video_duration) * 100
	title = scene.get('title', '')
	filmstrip_html += f'''
	<div class="scene-marker" style="position: absolute; left: {left_pos}%; width: {width}%; height: 100%; background-color: rgba(0, 0, 255, 0.2); border-right: 1px solid blue; overflow: hidden;">
	<div class="scene-title" style="font-size: 10px; word-wrap: break-word; padding: 2px;">{title}</div>
	</div>
	'''

	filmstrip_html += """
	<div id="scrubbing-needle" style="position: absolute; width: 2px; height: 100%; background-color: red; top: 0; left: 0; pointer-events: none;"></div>
	</div>
	"""
	return filmstrip_html

	def process_video(video_id: str):
	try:
	logger.info(f"Processing video with ID: {video_id}")
	metadata = load_metadata(video_id)

	# Always use the test URL instead of the actual video file
	video_url = f"https://huggingface.co/spaces/HuggingFaceFV/FineVideo-Explorer/resolve/main/video/{video_id}.mp4"

	# Create HTML for video player
	video_html = f"""
	<div id="video-wrapper">
	<video id="video-player" controls>
	<source src="{video_url}" type="video/mp4">
	Your browser does not support the video tag.
	</video>
	</div>
	"""

	# Character List Table
	character_table = """
	<h3>Characters</h3>
	<table class="metadata-table">
	<tr>
	<th>Character</th>
	<th>Description</th>
	</tr>
	"""
	for character in metadata.get('characterList', []):
	character_table += f"""
	<tr>
	<td>{character.get('name', '')}</td>
	<td>{character.get('description', '')}</td>
	</tr>
	"""
	character_table += "</table>"

	additional_data = f"""
	<div class="video-info">
	<h2>{metadata.get('title', 'Untitled')}</h2>
	<p><strong>Description:</strong> {metadata.get('description', 'No description available')}</p>
	</div>
	{character_table}
	"""

	scenes_output = ""
	for scene in metadata.get('scenes', []):
	scenes_output += create_scene_table(scene)

	storylines_output = create_storylines_table(metadata.get('storylines', {}))
	qa_output = create_qa_section(metadata.get('qAndA', []))
	trimming_suggestions_output = create_trimming_suggestions(metadata.get('trimmingSuggestions', []))

	# Generate filmstrip HTML
	last_scene = metadata['scenes'][-1]
	video_duration = timestamp_to_seconds(last_scene['timestamps'].get('end_timestamp', '0:00:00'))
	filmstrip_html = create_filmstrip(metadata['scenes'], video_duration)

	logger.info("Video processing completed successfully")
	return video_html, filmstrip_html, additional_data + scenes_output + storylines_output + qa_output + trimming_suggestions_output
	except Exception as e:
	logger.exception(f"Error processing video: {str(e)}")
	return None, "", f"Error processing video: {str(e)}"

	css = """
	body {
	margin: 0;
	padding: 0;
	font-family: Arial, sans-serif;
	overflow: hidden;
	}
	.container {
	display: flex;
	flex-direction: column;
	height: 100vh;
	}
	#header {
	display: flex;
	align-items: center;
	padding: 10px;
	background-color: white;
	}
	#logo {
	width: auto;
	height: 150px;
	box-shadow: none !important;
	border: none !important;
	background: none !important;
	object-fit: contain;
	}
	#header-content {
	flex-grow: 1;
	display: flex;
	justify-content: space-between;
	align-items: center;
	}
	#header-content h1 {
	margin: 0;
	font-size: 36px;
	font-weight: bold;
	}
	#header-content a {
	font-size: 18px;
	color: #0066cc;
	text-decoration: none;
	}
	#header-content a:hover {
	text-decoration: underline;
	}
	#top-panel {
	height: 33vh;
	display: flex;
	padding: 10px;
	box-shadow: 0 2px 5px rgba(0,0,0,0.1);
	overflow: hidden;
	}
	#video-list-column {

	max-height: 80vh; /* Adjust as needed */
	overflow-y: auto;
	}
	#video-column {
	width: 70%;
	display: flex;
	flex-direction: column;
	}
	#video-wrapper {
	flex-grow: 1;
	display: flex;
	justify-content: center;
	align-items: center;
	overflow: hidden;
	}
	#video-player {
	width: 100%;
	max-height: calc(33vh - 120px) !important;
	}
	#filmstrip-container {
	width: 100%;
	height: 80px !important;
	background-color: #f0f0f0;
	position: relative;
	overflow: hidden;
	cursor: pointer;
	}
	#filmstrip-container > div,
	#filmstrip-container > div > div,
	#filmstrip-container > div > div > div {
	height: 100% !important;
	}
	#scrollable-content {
	height: 67vh;
	overflow-y: auto;
	padding: 20px;
	}
	#metadata-container {
	margin-top: 20px;
	}
	.content-samples {
	display: flex;
	flex-direction: column;
	overflow-y: auto;
	max-height: 100%;
	}
	.content-samples > .wrap {
	display: flex;
	flex-direction: column;
	}
	.content-samples .hidden {
	display: none !important;
	}
	.content-samples > .wrap > .wrap {
	display: flex !important;
	flex-direction: column !important;
	}
	.content-samples label {
	display: block;
	padding: 10px;
	cursor: pointer;
	border-bottom: 1px solid #ddd;
	white-space: nowrap;
	overflow: hidden;
	text-overflow: ellipsis;
	}
	.content-samples label:hover {
	background-color: #f0f0f0;
	}
	.video-info {
	margin-bottom: 20px;
	}
	.scene-container {
	margin-bottom: 30px;
	}
	.metadata-table {
	width: 100%;
	border-collapse: collapse;
	margin-bottom: 20px;
	}
	.metadata-table th, .metadata-table td {
	border: 1px solid #ddd;
	padding: 8px;
	text-align: left;
	}
	.metadata-table th {
	background-color: #f2f2f2;
	}
	.metadata-table tr:nth-child(even) {
	background-color: #f9f9f9;
	}
	.timestamp-link {
	color: #0066cc;
	text-decoration: none;
	cursor: pointer;
	}
	.timestamp-link:hover {
	text-decoration: underline;
	}
	.chat-discussion {
	background-color: #f0f0f0;
	border-radius: 10px;
	padding: 15px;
	margin-bottom: 20px;
	}
	.question {
	font-weight: bold;
	margin-bottom: 5px;
	}
	.answer {
	margin-bottom: 15px;
	padding-left: 15px;
	}
	.correlation-scores {
	font-size: 18px;
	margin-bottom: 20px;
	}
	#reinitialization-overlay {
	position: fixed;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-color: rgba(0, 0, 0, 0.5);
	display: flex;
	justify-content: center;
	align-items: center;
	z-index: 9999;
	color: white;
	font-size: 24px;
	font-weight: bold;
	}

	@media (max-width: 768px) {
	#header {
	flex-direction: column;
	align-items: flex-start;
	}
	#header-content h1 {
	font-size: 24px;
	}
	#header-content p {
	font-size: 14px;
	}
	#logo {
	align-self: flex-end;
	margin-top: 10px;
	}
	#top-panel {
	flex-direction: column;
	}
	#video-list-column, #video-column {
	width: 100%;
	}
	}
	.icon-buttons button {
	display: none !important;
	}

	/* Ensure one element per row in Gradio list */
	#video-list-column .wrap {
	display: flex;
	flex-direction: column;
	}

	#video-list-column .wrap > .wrap {
	display: flex !important;
	flex-direction: column !important;
	}

	#video-list-column label {
	display: block;
	width: 100%;
	}
	"""

	js = """
	<script>
	// Wrap everything in an IIFE to avoid polluting the global scope
	(function() {
	function safeLog(message) {
	if (typeof console !== 'undefined' && console.log) {
	console.log(message);
	}
	}

	function findFilmstripInner(container) {
	if (container.id === 'filmstrip-inner') {
	return container;
	}
	for (let child of container.children) {
	let result = findFilmstripInner(child);
	if (result) {
	return result;
	}
	}
	return null;
	}

	function initializeFilmstrip() {
	//safeLog("Initializing filmstrip...");

	var videoElement = document.querySelector('video');
	var filmstripContainer = document.getElementById('filmstrip-container');
	var filmstripInner = findFilmstripInner(filmstripContainer);
	var scrubbingNeedle = document.getElementById('scrubbing-needle');

	if (!videoElement \|\| !filmstripContainer \|\| !filmstripInner \|\| !scrubbingNeedle) {
	//safeLog("Required elements not found for filmstrip");
	return;
	}

	var videoDuration = parseFloat(filmstripInner.getAttribute('data-duration') \|\| videoElement.duration);

	videoElement.addEventListener('timeupdate', function() {
	var progress = videoElement.currentTime / videoDuration;
	scrubbingNeedle.style.left = (progress * 100) + '%';
	});

	filmstripContainer.addEventListener('click', function(event) {
	var rect = filmstripContainer.getBoundingClientRect();
	var clickPosition = (event.clientX - rect.left) / rect.width;
	videoElement.currentTime = clickPosition * videoDuration;
	});

	//safeLog("Filmstrip initialization complete");
	}

	function initializeTimestampLinks() {
	//safeLog("Initializing timestamp links...");
	var videoElement = document.querySelector('video');
	var links = document.querySelectorAll('.timestamp-link');

	if (!videoElement) {
	//safeLog("Video element not found for timestamp links");
	return;
	}

	if (links.length === 0) {
	//safeLog("No timestamp links found");
	return;
	}

	links.forEach(function(link) {
	link.addEventListener('click', function(e) {
	e.preventDefault();
	var timestamp = this.getAttribute('data-timestamp');
	//safeLog("Timestamp link clicked: " + timestamp);
	var parts = timestamp.split(':');
	var seconds = parseInt(parts[0], 10) * 3600 + parseInt(parts[1], 10) * 60 + parseFloat(parts[2]);
	videoElement.currentTime = seconds;
	});
	});
	//safeLog("Timestamp links initialization complete");
	}

	let isReinitializing = false;

	function showOverlay() {
	let overlay = document.getElementById('reinitialization-overlay');
	if (!overlay) {
	overlay = document.createElement('div');
	overlay.id = 'reinitialization-overlay';
	overlay.style.position = 'fixed';
	overlay.style.top = '0';
	overlay.style.left = '0';
	overlay.style.width = '100%';
	overlay.style.height = '100%';
	overlay.style.backgroundColor = 'rgba(0, 0, 0, 0.5)';
	overlay.style.display = 'flex';
	overlay.style.justifyContent = 'center';
	overlay.style.alignItems = 'center';
	overlay.style.zIndex = '9999';

	const message = document.createElement('div');
	message.textContent = 'Loading assets...';
	message.style.color = 'white';
	message.style.fontSize = '24px';
	message.style.fontWeight = 'bold';

	overlay.appendChild(message);
	document.body.appendChild(overlay);
	}
	overlay.style.display = 'flex';
	}

	function hideOverlay() {
	const overlay = document.getElementById('reinitialization-overlay');
	if (overlay) {
	overlay.style.display = 'none';
	}
	}

	function initializeEverything() {
	if (isReinitializing) {
	//safeLog("Already reinitializing, skipping...");
	return;
	}

	isReinitializing = true;
	showOverlay();
	//safeLog("Initializing everything...");
	try {
	initializeFilmstrip();
	initializeTimestampLinks();
	//safeLog("Initialization complete");
	} catch (error) {
	//safeLog("Error during initialization: " + error.message);
	} finally {
	isReinitializing = false;
	hideOverlay();
	}
	}

	let lastVideoSelection = null;

	function checkVideoSelection() {
	const videoList = document.getElementById('video-list');
	if (videoList) {
	const currentSelection = videoList.querySelector('input:checked');
	if (currentSelection && currentSelection.value !== lastVideoSelection) {
	//safeLog("Video selection changed, reinitializing...");
	lastVideoSelection = currentSelection.value;
	showOverlay();
	setTimeout(() => {
	initializeEverything();
	hideOverlay();
	}, 1000); // Delay to ensure new video is loaded
	}
	}
	}

	// Set up a MutationObserver to watch for changes in the entire document
	const contentObserver = new MutationObserver((mutations) => {
	mutations.forEach((mutation) => {
	if (mutation.type === 'childList' \|\| mutation.type === 'attributes') {
	checkVideoSelection();
	if (mutation.target.id === 'video-container' \|\|
	mutation.target.id === 'filmstrip-container' \|\|
	mutation.target.id === 'metadata-container') {
	//safeLog("Relevant content updated, reinitializing...");
	setTimeout(initializeEverything, 100); // Small delay to ensure elements are ready
	}
	}
	});
	});

	contentObserver.observe(document.body, {
	childList: true,
	subtree: true,
	attributes: true,
	attributeFilter: ['value', 'checked']
	});

	// Function to set up Gradio event listeners
	function setupGradioEventListeners() {
	if (typeof gradio !== 'undefined') {
	//safeLog("Setting up Gradio event listeners...");
	gradio('#video-list').change(function(evt) {
	//safeLog("Gradio detected video selection change, reinitializing...");
	setTimeout(initializeEverything, 1000);
	});
	} /*else {
	safeLog("Gradio not found, using fallback method");
	}*/
	}

	// Periodically check for video selection changes
	setInterval(checkVideoSelection, 1000);

	// Initialize everything when the DOM is ready
	document.addEventListener('DOMContentLoaded', function() {
	initializeEverything();
	setupGradioEventListeners();
	checkVideoSelection();
	});

	// Also try to initialize after a short delay, in case DOMContentLoaded has already fired
	setTimeout(function() {
	initializeEverything();
	setupGradioEventListeners();
	checkVideoSelection();
	}, 1000);
	})();
	</script>
	"""

	with gr.Blocks(css=css, head=js) as iface:
	with gr.Row(elem_id="header"):
	with gr.Column(scale=1):
	gr.Image("logo.png", elem_id="logo", show_label=False, interactive=False)
	gr.Markdown("### Click a title to dive into the data:")
	with gr.Column(elem_id="header-content", scale=10):
	gr.Markdown("""
	# Exploration page
	## [🔗 Dataset](https://huggingface.co/mfarre/Video-LLaVA-7B-hf-CinePile)
	""")
	with gr.Row(elem_id="top-panel"):
	with gr.Column(scale=3, elem_id="video-list-column"):
	video_list_data = load_video_list()
	video_list = gr.Radio(
	label="Content Samples",
	choices=[video["title"] for video in video_list_data],
	elem_id="video-list",
	value=None,
	container=False
	)

	with gr.Column(scale=7, elem_id="video-column"):
	video_output = gr.HTML(elem_id="video-container")
	filmstrip_output = gr.HTML(elem_id="filmstrip-container")

	with gr.Row(elem_id="scrollable-content"):
	metadata_output = gr.HTML(elem_id="metadata-container")

	def wrapped_process_video(title: str) -> tuple:
	if not title:
	return "", "", ""
	video_id = next(video["video_id"] for video in video_list_data if video["title"] == title)
	logging.info(f"Processing video with ID: {video_id}")
	video_html, filmstrip_html, metadata_html = process_video(video_id)
	return video_html, filmstrip_html, metadata_html

	video_list.change(
	fn=wrapped_process_video,
	inputs=[video_list],
	outputs=[video_output, filmstrip_output, metadata_output]
	)

	if __name__ == "__main__":
	iface.launch()