mfarre's picture
mfarre HF staff
adding instructions
0d1d9a1
raw
history blame
26.9 kB
import gradio as gr
import logging
import json
import os
from typing import Dict, Any, List
from itertools import groupby
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
video_folder = 'video/'
metadata_folder = 'metadata/'
def load_video_list() -> List[Dict[str, str]]:
video_list = []
for filename in os.listdir(video_folder):
if filename.endswith('.mp4'):
video_id = os.path.splitext(filename)[0]
metadata_path = os.path.join(metadata_folder, f"{video_id}.json")
if os.path.exists(metadata_path):
with open(metadata_path, 'r') as f:
metadata = json.load(f)
metadata = metadata['content_metadata']
title = metadata.get('title', 'Untitled')
video_list.append({"video_id": video_id, "title": title})
# Define the custom order for the first five videos
custom_order = ['7BhJmDPB7RU', 'PrAwsi3Ldzo', '3rhsSPxQ39c', 'P7WnJZ55sgc', 'g9GtUQs7XUM']
# Custom sorting function
def custom_sort(item):
try:
return custom_order.index(item['video_id'])
except ValueError:
return len(custom_order) + 1 # Place non-specified videos after the custom ordered ones
# Sort the video list
video_list.sort(key=lambda x: (custom_sort(x), x['title']))
return video_list
def score_to_emoji(score):
if score < 0.2:
return "😴"
elif score < 0.4:
return "🙂"
elif score < 0.6:
return "😊"
elif score < 0.8:
return "😃"
else:
return "🤩"
def load_metadata(video_id: str) -> Dict[str, Any]:
metadata_path = os.path.join(metadata_folder, f"{video_id}.json")
try:
with open(metadata_path, 'r') as f:
asd =json.load(f)
return asd['content_metadata']
except FileNotFoundError:
logger.error(f"Metadata file not found for video ID: {video_id}")
raise
except json.JSONDecodeError:
logger.error(f"Invalid JSON in metadata file for video ID: {video_id}")
raise
def timestamp_to_seconds(timestamp: str) -> float:
try:
h, m, s = timestamp.split(':')
return int(h) * 3600 + int(m) * 60 + float(s)
except ValueError:
logger.error(f"Invalid timestamp format: {timestamp}")
return 0.0
def format_timestamp(timestamp: str) -> str:
try:
h, m, s = timestamp.split(':')
return f"{int(m):02d}:{int(float(s)):02d}"
except Exception as e:
logger.error(f"Invalid timestamp format: {timestamp}")
return ""
def create_scene_table(scene: Dict[str, Any]) -> str:
dynamism_score = scene.get('dynamismScore', 0)
av_correlation = scene.get('audioVisualCorrelation', 0)
cast = ", ".join([cast_member for cast_member in scene.get('cast', [])])
output = f"""
<div class="scene-container">
<h3>Scene {scene.get('sceneId', 'Unknown')}: {scene.get('title', '')}</h3>
<p>Dynamism: {score_to_emoji(dynamism_score)} Audio-visual correlation: {score_to_emoji(av_correlation)} Cast: {cast}</p>
<table class="metadata-table">
<tr>
<th>Timestamp</th>
<th>Type</th>
<th>Description</th>
</tr>
"""
scene_events = []
# Collect all scene data
data_types = [
('Activities', scene.get('activities', [])),
('Props', scene.get('props', [])),
('Mood', [scene.get('mood', {})]),
('Narrative Progression', scene.get('narrativeProgression', [])),
('Video Editing Details', scene.get('videoEditingDetails', [])),
('Thematic Elements', [{'description': scene.get('thematicElements', '')}]),
('Contextual Relevance', [{'description': scene.get('contextualRelevance', '')}]),
('Character Interaction', scene.get('characterInteraction', []))
]
for data_type, data_list in data_types:
for item in data_list:
if isinstance(item, dict):
start_time = ''
end_time = ''
description = ''
if data_type == 'Activities':
start_time = item.get('timestamp', {}).get('start_timestamp', '')
end_time = item.get('timestamp', {}).get('end_timestamp', '')
description = item.get('description', '')
elif data_type == 'Props':
start_time = item.get('timestamp', {}).get('start_timestamp', '')
end_time = item.get('timestamp', {}).get('end_timestamp', '')
description = item.get('name', '')
elif data_type == 'Video Editing Details':
start_time = item.get('timestamps', {}).get('start_timestamp', '')
end_time = item.get('timestamps', {}).get('end_timestamp', '')
description = item.get('description', '')
elif data_type == 'Mood':
description = item.get('description', '')
# Handle mood changes
for mood_change in item.get('keyMoments', []):
if isinstance(mood_change, dict):
scene_events.append({
'timestamp_start': mood_change.get('timestamp', ''),
'timestamp_end': '',
'type': 'Mood Change',
'description': mood_change.get('changeDescription', '')
})
elif data_type == 'Character Interaction':
characters = ', '.join(item.get('characters', []))
description = f"{characters}: {item.get('description', '')}"
else:
start_time = item.get('timestamp', '')
description = item.get('description', '')
scene_events.append({
'timestamp_start': start_time,
'timestamp_end': end_time,
'type': data_type,
'description': description
})
elif isinstance(item, str):
scene_events.append({
'timestamp_start': '',
'timestamp_end': '',
'type': data_type,
'description': item
})
# Sort events by timestamp
scene_events.sort(key=lambda x: x['timestamp_start'] if x['timestamp_start'] else '')
for event in scene_events:
start_time = format_timestamp(event['timestamp_start'])
end_time = format_timestamp(event['timestamp_end'])
start_link = f'<a href="#" class="timestamp-link" data-timestamp="{event["timestamp_start"]}">{start_time}</a>' if start_time else ''
end_link = f' - <a href="#" class="timestamp-link" data-timestamp="{event["timestamp_end"]}">{end_time}</a>' if end_time else ''
output += f"""
<tr>
<td>{start_link}{end_link}</td>
<td>{event['type']}</td>
<td>{event['description']}</td>
</tr>
"""
output += """
</table>
</div>
"""
return output
def create_storylines_table(storylines: Dict[str, Any]) -> str:
output = """
<div class="storylines-container">
<h3>Storylines</h3>
<table class="metadata-table">
<tr>
<th>Storyline</th>
<th>Scenes Involved</th>
</tr>
"""
output += f"""
<tr>
<td>{storylines.get('description', 'No description available')}</td>
<td>{', '.join(map(str, storylines.get('scenes', [])))}</td>
</tr>
"""
output += """
</table>
</div>
"""
return output
def create_qa_section(qa_list: List[Dict[str, str]]) -> str:
output = """
<div class="qa-container">
<h3>Q&A</h3>
<div class="chat-discussion">
"""
for qa in qa_list:
output += f"""
<div class="question">{qa.get('question', '')}</div>
<div class="answer">{qa.get('answer', '')}</div>
"""
output += """
</div>
</div>
"""
return output
def create_trimming_suggestions(suggestions: List[Dict[str, Any]]) -> str:
output = """
<div class="trimming-suggestions-container">
<h3>Trimming Suggestions</h3>
<table class="metadata-table">
<tr>
<th>Timestamp</th>
<th>Description</th>
</tr>
"""
for suggestion in suggestions:
start_time = suggestion.get('timestamps', {}).get('start_timestamp', '')
end_time = suggestion.get('timestamps', {}).get('end_timestamp', '')
start_formatted = format_timestamp(start_time)
end_formatted = format_timestamp(end_time)
output += f"""
<tr>
<td>
<a href="#" class="timestamp-link" data-timestamp="{start_time}">{start_formatted}</a>
{f' - <a href="#" class="timestamp-link" data-timestamp="{end_time}">{end_formatted}</a>' if end_time else ''}
</td>
<td>{suggestion.get('description', '')}</td>
</tr>
"""
output += """
</table>
</div>
"""
return output
def create_filmstrip(scenes: List[Dict[str, Any]], video_duration: float) -> str:
filmstrip_html = f"""
<div id="filmstrip-inner" style="position: relative; width: 100%; height: 100%;" data-duration="{video_duration}">
"""
for scene in scenes:
start_time = timestamp_to_seconds(scene['timestamps'].get('start_timestamp', '0:00:00'))
end_time = timestamp_to_seconds(scene['timestamps'].get('end_timestamp', str(video_duration)))
left_pos = (start_time / video_duration) * 100
width = ((end_time - start_time) / video_duration) * 100
title = scene.get('title', '')
filmstrip_html += f'''
<div class="scene-marker" style="position: absolute; left: {left_pos}%; width: {width}%; height: 100%; background-color: rgba(0, 0, 255, 0.2); border-right: 1px solid blue; overflow: hidden;">
<div class="scene-title" style="font-size: 10px; word-wrap: break-word; padding: 2px;">{title}</div>
</div>
'''
filmstrip_html += """
<div id="scrubbing-needle" style="position: absolute; width: 2px; height: 100%; background-color: red; top: 0; left: 0; pointer-events: none;"></div>
</div>
"""
return filmstrip_html
def process_video(video_id: str):
try:
logger.info(f"Processing video with ID: {video_id}")
metadata = load_metadata(video_id)
# Always use the test URL instead of the actual video file
video_url = f"https://huggingface.co/spaces/HuggingFaceFV/FineVideo-Explorer/resolve/main/video/{video_id}.mp4"
# Create HTML for video player
video_html = f"""
<div id="video-wrapper">
<video id="video-player" controls>
<source src="{video_url}" type="video/mp4">
Your browser does not support the video tag.
</video>
</div>
"""
# Character List Table
character_table = """
<h3>Characters</h3>
<table class="metadata-table">
<tr>
<th>Character</th>
<th>Description</th>
</tr>
"""
for character in metadata.get('characterList', []):
character_table += f"""
<tr>
<td>{character.get('name', '')}</td>
<td>{character.get('description', '')}</td>
</tr>
"""
character_table += "</table>"
additional_data = f"""
<div class="video-info">
<h2>{metadata.get('title', 'Untitled')}</h2>
<p><strong>Description:</strong> {metadata.get('description', 'No description available')}</p>
</div>
{character_table}
"""
scenes_output = ""
for scene in metadata.get('scenes', []):
scenes_output += create_scene_table(scene)
storylines_output = create_storylines_table(metadata.get('storylines', {}))
qa_output = create_qa_section(metadata.get('qAndA', []))
trimming_suggestions_output = create_trimming_suggestions(metadata.get('trimmingSuggestions', []))
# Generate filmstrip HTML
last_scene = metadata['scenes'][-1]
video_duration = timestamp_to_seconds(last_scene['timestamps'].get('end_timestamp', '0:00:00'))
filmstrip_html = create_filmstrip(metadata['scenes'], video_duration)
logger.info("Video processing completed successfully")
return video_html, filmstrip_html, additional_data + scenes_output + storylines_output + qa_output + trimming_suggestions_output
except Exception as e:
logger.exception(f"Error processing video: {str(e)}")
return None, "", f"Error processing video: {str(e)}"
css = """
body {
margin: 0;
padding: 0;
font-family: Arial, sans-serif;
overflow: hidden;
}
.container {
display: flex;
flex-direction: column;
height: 100vh;
}
#header {
display: flex;
align-items: center;
padding: 10px;
background-color: white;
}
#logo {
width: auto;
height: 150px;
box-shadow: none !important;
border: none !important;
background: none !important;
object-fit: contain;
}
#header-content {
flex-grow: 1;
display: flex;
justify-content: space-between;
align-items: center;
}
#header-content h1 {
margin: 0;
font-size: 36px;
font-weight: bold;
}
#header-content a {
font-size: 18px;
color: #0066cc;
text-decoration: none;
}
#header-content a:hover {
text-decoration: underline;
}
#top-panel {
height: 33vh;
display: flex;
padding: 10px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
overflow: hidden;
}
#video-list-column {
max-height: 80vh; /* Adjust as needed */
overflow-y: auto;
}
#video-column {
width: 70%;
display: flex;
flex-direction: column;
}
#video-wrapper {
flex-grow: 1;
display: flex;
justify-content: center;
align-items: center;
overflow: hidden;
}
#video-player {
width: 100%;
max-height: calc(33vh - 120px) !important;
}
#filmstrip-container {
width: 100%;
height: 80px !important;
background-color: #f0f0f0;
position: relative;
overflow: hidden;
cursor: pointer;
}
#filmstrip-container > div,
#filmstrip-container > div > div,
#filmstrip-container > div > div > div {
height: 100% !important;
}
#scrollable-content {
height: 67vh;
overflow-y: auto;
padding: 20px;
}
#metadata-container {
margin-top: 20px;
}
.content-samples {
display: flex;
flex-direction: column;
overflow-y: auto;
max-height: 100%;
}
.content-samples > .wrap {
display: flex;
flex-direction: column;
}
.content-samples .hidden {
display: none !important;
}
.content-samples > .wrap > .wrap {
display: flex !important;
flex-direction: column !important;
}
.content-samples label {
display: block;
padding: 10px;
cursor: pointer;
border-bottom: 1px solid #ddd;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.content-samples label:hover {
background-color: #f0f0f0;
}
.video-info {
margin-bottom: 20px;
}
.scene-container {
margin-bottom: 30px;
}
.metadata-table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}
.metadata-table th, .metadata-table td {
border: 1px solid #ddd;
padding: 8px;
text-align: left;
}
.metadata-table th {
background-color: #f2f2f2;
}
.metadata-table tr:nth-child(even) {
background-color: #f9f9f9;
}
.timestamp-link {
color: #0066cc;
text-decoration: none;
cursor: pointer;
}
.timestamp-link:hover {
text-decoration: underline;
}
.chat-discussion {
background-color: #f0f0f0;
border-radius: 10px;
padding: 15px;
margin-bottom: 20px;
}
.question {
font-weight: bold;
margin-bottom: 5px;
}
.answer {
margin-bottom: 15px;
padding-left: 15px;
}
.correlation-scores {
font-size: 18px;
margin-bottom: 20px;
}
#reinitialization-overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.5);
display: flex;
justify-content: center;
align-items: center;
z-index: 9999;
color: white;
font-size: 24px;
font-weight: bold;
}
@media (max-width: 768px) {
#header {
flex-direction: column;
align-items: flex-start;
}
#header-content h1 {
font-size: 24px;
}
#header-content p {
font-size: 14px;
}
#logo {
align-self: flex-end;
margin-top: 10px;
}
#top-panel {
flex-direction: column;
}
#video-list-column, #video-column {
width: 100%;
}
}
.icon-buttons button {
display: none !important;
}
/* Ensure one element per row in Gradio list */
#video-list-column .wrap {
display: flex;
flex-direction: column;
}
#video-list-column .wrap > .wrap {
display: flex !important;
flex-direction: column !important;
}
#video-list-column label {
display: block;
width: 100%;
}
"""
js = """
<script>
// Wrap everything in an IIFE to avoid polluting the global scope
(function() {
function safeLog(message) {
if (typeof console !== 'undefined' && console.log) {
console.log(message);
}
}
function findFilmstripInner(container) {
if (container.id === 'filmstrip-inner') {
return container;
}
for (let child of container.children) {
let result = findFilmstripInner(child);
if (result) {
return result;
}
}
return null;
}
function initializeFilmstrip() {
//safeLog("Initializing filmstrip...");
var videoElement = document.querySelector('video');
var filmstripContainer = document.getElementById('filmstrip-container');
var filmstripInner = findFilmstripInner(filmstripContainer);
var scrubbingNeedle = document.getElementById('scrubbing-needle');
if (!videoElement || !filmstripContainer || !filmstripInner || !scrubbingNeedle) {
//safeLog("Required elements not found for filmstrip");
return;
}
var videoDuration = parseFloat(filmstripInner.getAttribute('data-duration') || videoElement.duration);
videoElement.addEventListener('timeupdate', function() {
var progress = videoElement.currentTime / videoDuration;
scrubbingNeedle.style.left = (progress * 100) + '%';
});
filmstripContainer.addEventListener('click', function(event) {
var rect = filmstripContainer.getBoundingClientRect();
var clickPosition = (event.clientX - rect.left) / rect.width;
videoElement.currentTime = clickPosition * videoDuration;
});
//safeLog("Filmstrip initialization complete");
}
function initializeTimestampLinks() {
//safeLog("Initializing timestamp links...");
var videoElement = document.querySelector('video');
var links = document.querySelectorAll('.timestamp-link');
if (!videoElement) {
//safeLog("Video element not found for timestamp links");
return;
}
if (links.length === 0) {
//safeLog("No timestamp links found");
return;
}
links.forEach(function(link) {
link.addEventListener('click', function(e) {
e.preventDefault();
var timestamp = this.getAttribute('data-timestamp');
//safeLog("Timestamp link clicked: " + timestamp);
var parts = timestamp.split(':');
var seconds = parseInt(parts[0], 10) * 3600 + parseInt(parts[1], 10) * 60 + parseFloat(parts[2]);
videoElement.currentTime = seconds;
});
});
//safeLog("Timestamp links initialization complete");
}
let isReinitializing = false;
function showOverlay() {
let overlay = document.getElementById('reinitialization-overlay');
if (!overlay) {
overlay = document.createElement('div');
overlay.id = 'reinitialization-overlay';
overlay.style.position = 'fixed';
overlay.style.top = '0';
overlay.style.left = '0';
overlay.style.width = '100%';
overlay.style.height = '100%';
overlay.style.backgroundColor = 'rgba(0, 0, 0, 0.5)';
overlay.style.display = 'flex';
overlay.style.justifyContent = 'center';
overlay.style.alignItems = 'center';
overlay.style.zIndex = '9999';
const message = document.createElement('div');
message.textContent = 'Loading assets...';
message.style.color = 'white';
message.style.fontSize = '24px';
message.style.fontWeight = 'bold';
overlay.appendChild(message);
document.body.appendChild(overlay);
}
overlay.style.display = 'flex';
}
function hideOverlay() {
const overlay = document.getElementById('reinitialization-overlay');
if (overlay) {
overlay.style.display = 'none';
}
}
function initializeEverything() {
if (isReinitializing) {
//safeLog("Already reinitializing, skipping...");
return;
}
isReinitializing = true;
showOverlay();
//safeLog("Initializing everything...");
try {
initializeFilmstrip();
initializeTimestampLinks();
//safeLog("Initialization complete");
} catch (error) {
//safeLog("Error during initialization: " + error.message);
} finally {
isReinitializing = false;
hideOverlay();
}
}
let lastVideoSelection = null;
function checkVideoSelection() {
const videoList = document.getElementById('video-list');
if (videoList) {
const currentSelection = videoList.querySelector('input:checked');
if (currentSelection && currentSelection.value !== lastVideoSelection) {
//safeLog("Video selection changed, reinitializing...");
lastVideoSelection = currentSelection.value;
showOverlay();
setTimeout(() => {
initializeEverything();
hideOverlay();
}, 1000); // Delay to ensure new video is loaded
}
}
}
// Set up a MutationObserver to watch for changes in the entire document
const contentObserver = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
if (mutation.type === 'childList' || mutation.type === 'attributes') {
checkVideoSelection();
if (mutation.target.id === 'video-container' ||
mutation.target.id === 'filmstrip-container' ||
mutation.target.id === 'metadata-container') {
//safeLog("Relevant content updated, reinitializing...");
setTimeout(initializeEverything, 100); // Small delay to ensure elements are ready
}
}
});
});
contentObserver.observe(document.body, {
childList: true,
subtree: true,
attributes: true,
attributeFilter: ['value', 'checked']
});
// Function to set up Gradio event listeners
function setupGradioEventListeners() {
if (typeof gradio !== 'undefined') {
//safeLog("Setting up Gradio event listeners...");
gradio('#video-list').change(function(evt) {
//safeLog("Gradio detected video selection change, reinitializing...");
setTimeout(initializeEverything, 1000);
});
} /*else {
safeLog("Gradio not found, using fallback method");
}*/
}
// Periodically check for video selection changes
setInterval(checkVideoSelection, 1000);
// Initialize everything when the DOM is ready
document.addEventListener('DOMContentLoaded', function() {
initializeEverything();
setupGradioEventListeners();
checkVideoSelection();
});
// Also try to initialize after a short delay, in case DOMContentLoaded has already fired
setTimeout(function() {
initializeEverything();
setupGradioEventListeners();
checkVideoSelection();
}, 1000);
})();
</script>
"""
with gr.Blocks(css=css, head=js) as iface:
with gr.Row(elem_id="header"):
with gr.Column(scale=1):
gr.Image("logo.png", elem_id="logo", show_label=False, interactive=False)
gr.Markdown("### Click a title to dive into the data:")
with gr.Column(elem_id="header-content", scale=10):
gr.Markdown("""
# Exploration page
## [🔗 Dataset](https://huggingface.co/mfarre/Video-LLaVA-7B-hf-CinePile)
""")
with gr.Row(elem_id="top-panel"):
with gr.Column(scale=3, elem_id="video-list-column"):
video_list_data = load_video_list()
video_list = gr.Radio(
label="Content Samples",
choices=[video["title"] for video in video_list_data],
elem_id="video-list",
value=None,
container=False
)
with gr.Column(scale=7, elem_id="video-column"):
video_output = gr.HTML(elem_id="video-container")
filmstrip_output = gr.HTML(elem_id="filmstrip-container")
with gr.Row(elem_id="scrollable-content"):
metadata_output = gr.HTML(elem_id="metadata-container")
def wrapped_process_video(title: str) -> tuple:
if not title:
return "", "", ""
video_id = next(video["video_id"] for video in video_list_data if video["title"] == title)
logging.info(f"Processing video with ID: {video_id}")
video_html, filmstrip_html, metadata_html = process_video(video_id)
return video_html, filmstrip_html, metadata_html
video_list.change(
fn=wrapped_process_video,
inputs=[video_list],
outputs=[video_output, filmstrip_output, metadata_output]
)
if __name__ == "__main__":
iface.launch()