Spaces:

fffiloni
/

miniGPT4-Video-Zero

Runtime error

App Files Files Community

miniGPT4-Video-Zero / minigpt4_video_inference.py

fffiloni

Upload 164 files

2ada650 verified 2 months ago

raw history blame contribute delete

No virus

3.33 kB

	import json
	from tqdm import tqdm
	from pytubefix import YouTube

	import xml.etree.ElementTree as ET
	import os

	with open ('VideoInstruct100K.json','r') as f :
	data=json.load(f)

	# Usage
	existed_video_id={}
	for video_name in os.listdir('videos'):
	video_id = video_name.split('.')[0]
	existed_video_id[video_id]=True



	def download_video_with_subtitles(video_id):
	# Create a YouTube object.
	yt = YouTube(f'https://www.youtube.com/watch?v={video_id}')

	video_filename = f"{video_id}.mp4"
	video_downloaded=False
	try :
	# Get the video stream with the highest resolution and download the video.
	stream = yt.streams.get_highest_resolution()
	stream.download(output_path='videos', filename=video_filename)
	video_downloaded=True
	except Exception as e:
	print(f"Error downloading video {video_id}: {str(e)}")
	video_downloaded=False
	if not video_downloaded:
	return False,False

	# Get the video's available captions (subtitles).
	captions = yt.captions.all()

	# Download the captions if available in xml format.
	caption_downloaded = False
	for caption in captions:
	caption_code = caption.code
	# select only english captions
	if 'en' in caption_code:
	caption.download(title=f"{video_id}", output_path='subtitles_xml',srt=False)
	caption_downloaded = True
	return video_downloaded,caption_downloaded
	def convert_xml_vtt(xml_path, vtt_path):
	# Parse the XML subtitle file
	tree = ET.parse(xml_path)
	root = tree.getroot()

	# Initialize a list to store VTT subtitle entries
	vtt_subtitle = []

	# Function to convert time in milliseconds to WebVTT format
	def ms_to_vtt_time(milliseconds):
	seconds, milliseconds = divmod(milliseconds, 1000)
	minutes, seconds = divmod(seconds, 60)
	return f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"

	# Iterate through subtitle elements
	toggle = True
	for p in root.findall(".//p"):
	if toggle:
	start_time = int(p.get("t"))
	subtitle_text = " ".join(s.text.strip() for s in p.findall(".//s"))
	# duration = int(p.get("d")) if p.get("d") is not None else 0
	if not toggle:
	end_time = int(p.get("t"))
	# Format and append the VTT entry to the list
	vtt_subtitle.append(f"{ms_to_vtt_time(start_time)} --> {ms_to_vtt_time(end_time)}\n{subtitle_text}\n")
	toggle = not toggle
	# Join the VTT entries into a single string
	vtt_content = "WEBVTT\n\n" + "\n".join(vtt_subtitle)

	# Save the VTT content to a file
	with open(vtt_path, "w", encoding="utf-8") as vtt_file:
	vtt_file.write(vtt_content)
	import os
	os.makedirs('videos', exist_ok=True)
	os.makedirs('subtitles_vtt', exist_ok=True)
	os.makedirs('subtitles_xml', exist_ok=True)
	for video_path in tqdm(data,desc='Downloading videos') :
	video_id=video_path.split('/')[-1].split('.')[0]
	if existed_video_id.get(video_id,False):
	continue
	video_downloaded,caption_downloaded=download_video_with_subtitles(video_id)
	if caption_downloaded:
	# convert xml to vtt
	xml_file_path=f'subtitles_xml/{video_id} (a.en).xml'
	convert_xml_vtt(xml_file_path,f'subtitles_vtt/{video_id}.vtt')