sHORTgpt

Running

App Files Files Community

sHORTgpt / shortGPT /audio /audio_utils.py

wiydarrr

Upload folder using huggingface_hub

5f685fd verified 5 months ago

raw

history blame

3.42 kB

	import os
	import subprocess

	import yt_dlp

	from shortGPT.audio.audio_duration import get_asset_duration

	CONST_CHARS_PER_SEC = 20.5 # Arrived to this result after whispering a ton of shorts and calculating the average number of characters per second of speech.

	WHISPER_MODEL = None


	def downloadYoutubeAudio(url, outputFile):
	ydl_opts = {
	"quiet": True,
	"no_warnings": True,
	"no_color": True,
	"no_call_home": True,
	"no_check_certificate": True,
	"format": "bestaudio/best",
	"outtmpl": outputFile
	}
	try:
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	dictMeta = ydl.extract_info(
	url,
	download=True)
	if (not os.path.exists(outputFile)):
	raise Exception("Audio Download Failed")
	return outputFile, dictMeta['duration']
	except Exception as e:
	print("Failed downloading audio from the following video/url", e.args[0])
	return None


	def speedUpAudio(tempAudioPath, outputFile, expected_duration=None): # Speeding up the audio to make it under 60secs, otherwise the output video is not considered as a short.
	tempAudioPath, duration = get_asset_duration(tempAudioPath, False)
	if not expected_duration:
	if (duration > 57):
	subprocess.run(['ffmpeg', '-i', tempAudioPath, '-af', f'atempo={(duration/57):.5f}', outputFile])
	else:
	subprocess.run(['ffmpeg', '-i', tempAudioPath, outputFile])
	else:
	subprocess.run(['ffmpeg', '-i', tempAudioPath, '-af', f'atempo={(duration/expected_duration):.5f}', outputFile])
	if (os.path.exists(outputFile)):
	return outputFile


	def ChunkForAudio(alltext, chunk_size=2500):
	alltext_list = alltext.split('.')
	chunks = []
	curr_chunk = ''
	for text in alltext_list:
	if len(curr_chunk) + len(text) <= chunk_size:
	curr_chunk += text + '.'
	else:
	chunks.append(curr_chunk)
	curr_chunk = text + '.'
	if curr_chunk:
	chunks.append(curr_chunk)
	return chunks


	def audioToText(filename, model_size="base"):
	from whisper_timestamped import load_model, transcribe_timestamped
	global WHISPER_MODEL
	if (WHISPER_MODEL == None):
	WHISPER_MODEL = load_model(model_size)
	gen = transcribe_timestamped(WHISPER_MODEL, filename, verbose=False, fp16=False)
	return gen


	def getWordsPerSec(filename):
	a = audioToText(filename)
	return len(a['text'].split()) / a['segments'][-1]['end']


	def getCharactersPerSec(filename):
	a = audioToText(filename)
	return len(a['text']) / a['segments'][-1]['end']

	def run_background_audio_split(sound_file_path):
	try:
	# Run spleeter command
	# Get absolute path of sound file
	output_dir = os.path.dirname(sound_file_path)
	command = f"spleeter separate -p spleeter:2stems -o '{output_dir}' '{sound_file_path}'"

	process = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

	# If spleeter runs successfully, return the path to the background music file
	if process.returncode == 0:
	return os.path.join(output_dir, sound_file_path.split("/")[-1].split(".")[0], "accompaniment.wav")
	else:
	return None
	except Exception:
	# If spleeter crashes, return None
	return None