ebook2audiobookXTTS

Running

App Files Files Community

ebook2audiobookXTTS / ebook2audiobookXTTS /gradio_gui_with_email_and_que.py

drewThomasson

Upload 11 files

306e52a verified about 2 months ago

raw

history blame

25.9 kB

	print("starting...")
	import ebooklib
	from ebooklib import epub

	import os
	import subprocess
	import ebooklib
	from ebooklib import epub
	from bs4 import BeautifulSoup
	import re
	import csv
	import nltk

	import os
	import subprocess
	import sys
	import torchaudio

	import os
	import torch
	from TTS.api import TTS
	from nltk.tokenize import sent_tokenize
	from pydub import AudioSegment

	from tqdm import tqdm



	import os
	import subprocess
	import ebooklib
	from ebooklib import epub
	from bs4 import BeautifulSoup
	import re
	import csv
	import nltk

	from bs4 import BeautifulSoup
	import os
	import shutil
	import subprocess
	import re
	from pydub import AudioSegment
	import tempfile
	import urllib.request
	import zipfile
	import requests
	from tqdm import tqdm
	import nltk
	from nltk.tokenize import sent_tokenize
	import torch
	import torchaudio
	import gradio as gr
	from threading import Lock, Thread
	from queue import Queue
	import smtplib
	from email.mime.text import MIMEText


	import os
	import shutil
	import subprocess
	import re
	from pydub import AudioSegment
	import tempfile
	from pydub import AudioSegment
	import os
	import nltk
	from nltk.tokenize import sent_tokenize
	import sys
	import torch
	from TTS.api import TTS
	from TTS.tts.configs.xtts_config import XttsConfig
	from TTS.tts.models.xtts import Xtts
	from tqdm import tqdm
	import gradio as gr
	from gradio import Progress
	import urllib.request
	import zipfile


	default_target_voice_path = "default_voice.wav" # Ensure this is a valid path
	default_language_code = "en"


	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Device selected is: {device}")

	nltk.download('punkt') # Ensure necessary models are downloaded

	# Global variables for queue management
	queue = Queue()
	queue_lock = Lock()

	# Function to send an email with the download link
	def send_email(to_address, download_link):
	from_address = "your_email@example.com" # Replace with your email
	subject = "Your Audiobook is Ready"
	body = f"Your audiobook has been processed. You can download it from the following link: {download_link}"

	msg = MIMEText(body)
	msg['Subject'] = subject
	msg['From'] = from_address
	msg['To'] = to_address

	try:
	with smtplib.SMTP('smtp.example.com', 587) as server: # Replace with your SMTP server details
	server.starttls()
	server.login(from_address, "your_password") # Replace with your email password
	server.sendmail(from_address, [to_address], msg.as_string())
	print(f"Email sent to {to_address}")
	except Exception as e:
	print(f"Failed to send email: {e}")

	# Function to download and extract the custom model
	def download_and_extract_zip(url, extract_to='.'):
	try:
	os.makedirs(extract_to, exist_ok=True)
	zip_path = os.path.join(extract_to, 'model.zip')

	with tqdm(unit='B', unit_scale=True, miniters=1, desc="Downloading Model") as t:
	def reporthook(blocknum, blocksize, totalsize):
	t.total = totalsize
	t.update(blocknum * blocksize - t.n)
	urllib.request.urlretrieve(url, zip_path, reporthook=reporthook)
	print(f"Downloaded zip file to {zip_path}")

	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	files = zip_ref.namelist()
	with tqdm(total=len(files), unit="file", desc="Extracting Files") as t:
	for file in files:
	if not file.endswith('/'):
	extracted_path = zip_ref.extract(file, extract_to)
	base_file_path = os.path.join(extract_to, os.path.basename(file))
	os.rename(extracted_path, base_file_path)
	t.update(1)

	os.remove(zip_path)
	for root, dirs, files in os.walk(extract_to, topdown=False):
	for name in dirs:
	os.rmdir(os.path.join(root, name))
	print(f"Extracted files to {extract_to}")

	required_files = ['model.pth', 'config.json', 'vocab.json_']
	missing_files = [file for file in required_files if not os.path.exists(os.path.join(extract_to, file))]

	if not missing_files:
	print("All required files (model.pth, config.json, vocab.json_) found.")
	else:
	print(f"Missing files: {', '.join(missing_files)}")

	except Exception as e:
	print(f"Failed to download or extract zip file: {e}")

	# Function to check if a folder is empty
	def is_folder_empty(folder_path):
	if os.path.exists(folder_path) and os.path.isdir(folder_path):
	return not os.listdir(folder_path)
	else:
	print(f"The path {folder_path} is not a valid folder.")
	return None

	# Function to remove a folder and its contents
	def remove_folder_with_contents(folder_path):
	try:
	shutil.rmtree(folder_path)
	print(f"Successfully removed {folder_path} and all of its contents.")
	except Exception as e:
	print(f"Error removing {folder_path}: {e}")

	# Function to wipe the contents of a folder
	def wipe_folder(folder_path):
	if not os.path.exists(folder_path):
	print(f"The folder {folder_path} does not exist.")
	return

	for item in os.listdir(folder_path):
	item_path = os.path.join(folder_path, item)
	if os.path.isfile(item_path):
	os.remove(item_path)
	print(f"Removed file: {item_path}")
	elif os.path.isdir(item_path):
	shutil.rmtree(item_path)
	print(f"Removed directory and its contents: {item_path}")

	print(f"All contents wiped from {folder_path}.")

	# Function to create M4B from chapters
	def create_m4b_from_chapters(input_dir, ebook_file, output_dir):
	def sort_key(chapter_file):
	numbers = re.findall(r'\d+', chapter_file)
	return int(numbers[0]) if numbers else 0

	def extract_metadata_and_cover(ebook_path):
	try:
	cover_path = ebook_path.rsplit('.', 1)[0] + '.jpg'
	subprocess.run(['ebook-meta', ebook_path, '--get-cover', cover_path], check=True)
	if os.path.exists(cover_path):
	return cover_path
	except Exception as e:
	print(f"Error extracting eBook metadata or cover: {e}")
	return None

	def combine_wav_files(chapter_files, output_path):
	combined_audio = AudioSegment.empty()
	for chapter_file in chapter_files:
	audio_segment = AudioSegment.from_wav(chapter_file)
	combined_audio += audio_segment
	combined_audio.export(output_path, format='wav')
	print(f"Combined audio saved to {output_path}")

	def generate_ffmpeg_metadata(chapter_files, metadata_file):
	with open(metadata_file, 'w') as file:
	file.write(';FFMETADATA1\n')
	start_time = 0
	for index, chapter_file in enumerate(chapter_files):
	duration_ms = len(AudioSegment.from_wav(chapter_file))
	file.write(f'[CHAPTER]\nTIMEBASE=1/1000\nSTART={start_time}\n')
	file.write(f'END={start_time + duration_ms}\ntitle=Chapter {index + 1}\n')
	start_time += duration_ms

	def create_m4b(combined_wav, metadata_file, cover_image, output_m4b):
	os.makedirs(os.path.dirname(output_m4b), exist_ok=True)

	ffmpeg_cmd = ['ffmpeg', '-i', combined_wav, '-i', metadata_file]
	if cover_image:
	ffmpeg_cmd += ['-i', cover_image, '-map', '0:a', '-map', '2:v']
	else:
	ffmpeg_cmd += ['-map', '0:a']

	ffmpeg_cmd += ['-map_metadata', '1', '-c:a', 'aac', '-b:a', '192k']
	if cover_image:
	ffmpeg_cmd += ['-c:v', 'png', '-disposition:v', 'attached_pic']
	ffmpeg_cmd += [output_m4b]

	subprocess.run(ffmpeg_cmd, check=True)

	chapter_files = sorted([os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith('.wav')], key=sort_key)
	temp_dir = tempfile.gettempdir()
	temp_combined_wav = os.path.join(temp_dir, 'combined.wav')
	metadata_file = os.path.join(temp_dir, 'metadata.txt')
	cover_image = extract_metadata_and_cover(ebook_file)
	output_m4b = os.path.join(output_dir, os.path.splitext(os.path.basename(ebook_file))[0] + '.m4b')

	combine_wav_files(chapter_files, temp_combined_wav)
	generate_ffmpeg_metadata(chapter_files, metadata_file)
	create_m4b(temp_combined_wav, metadata_file, cover_image, output_m4b)

	if os.path.exists(temp_combined_wav):
	os.remove(temp_combined_wav)
	if os.path.exists(metadata_file):
	os.remove(metadata_file)
	if cover_image and os.path.exists(cover_image):
	os.remove(cover_image)

	# Function to create chapter-labeled book
	def create_chapter_labeled_book(ebook_file_path):
	def ensure_directory(directory_path):
	if not os.path.exists(directory_path):
	os.makedirs(directory_path)
	print(f"Created directory: {directory_path}")

	ensure_directory(os.path.join(".", 'Working_files', 'Book'))

	def convert_to_epub(input_path, output_path):
	try:
	subprocess.run(['ebook-convert', input_path, output_path], check=True)
	except subprocess.CalledProcessError as e:
	print(f"An error occurred while converting the eBook: {e}")
	return False
	return True

	def save_chapters_as_text(epub_path):
	directory = os.path.join(".", "Working_files", "temp_ebook")
	ensure_directory(directory)

	book = epub.read_epub(epub_path)

	previous_chapter_text = ''
	previous_filename = ''
	chapter_counter = 0

	for item in book.get_items():
	if item.get_type() == ebooklib.ITEM_DOCUMENT:
	soup = BeautifulSoup(item.get_content(), 'html.parser')
	text = soup.get_text()

	if text.strip():
	if len(text) < 2300 and previous_filename:
	with open(previous_filename, 'a', encoding='utf-8') as file:
	file.write('\n' + text)
	else:
	previous_filename = os.path.join(directory, f"chapter_{chapter_counter}.txt")
	chapter_counter += 1
	with open(previous_filename, 'w', encoding='utf-8') as file:
	file.write(text)
	print(f"Saved chapter: {previous_filename}")

	input_ebook = ebook_file_path
	output_epub = os.path.join(".", "Working_files", "temp.epub")

	if os.path.exists(output_epub):
	os.remove(output_epub)
	print(f"File {output_epub} has been removed.")
	else:
	print(f"The file {output_epub} does not exist.")

	if convert_to_epub(input_ebook, output_epub):
	save_chapters_as_text(output_epub)

	nltk.download('punkt')

	def process_chapter_files(folder_path, output_csv):
	with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
	writer = csv.writer(csvfile)
	writer.writerow(['Text', 'Start Location', 'End Location', 'Is Quote', 'Speaker', 'Chapter'])

	chapter_files = sorted(os.listdir(folder_path), key=lambda x: int(x.split('_')[1].split('.')[0]))
	for filename in chapter_files:
	if filename.startswith('chapter_') and filename.endswith('.txt'):
	chapter_number = int(filename.split('_')[1].split('.')[0])
	file_path = os.path.join(folder_path, filename)

	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	text = file.read()
	if text:
	text = "NEWCHAPTERABC" + text
	sentences = nltk.tokenize.sent_tokenize(text)
	for sentence in sentences:
	start_location = text.find(sentence)
	end_location = start_location + len(sentence)
	writer.writerow([sentence, start_location, end_location, 'True', 'Narrator', chapter_number])
	except Exception as e:
	print(f"Error processing file {filename}: {e}")

	folder_path = os.path.join(".", "Working_files", "temp_ebook")
	output_csv = os.path.join(".", "Working_files", "Book", "Other_book.csv")

	process_chapter_files(folder_path, output_csv)

	def sort_key(filename):
	match = re.search(r'chapter_(\d+)\.txt', filename)
	return int(match.group(1)) if match else 0

	def combine_chapters(input_folder, output_file):
	os.makedirs(os.path.dirname(output_file), exist_ok=True)

	files = [f for f in os.listdir(input_folder) if f.endswith('.txt')]
	sorted_files = sorted(files, key=sort_key)

	with open(output_file, 'w', encoding='utf-8') as outfile:
	for i, filename in enumerate(sorted_files):
	with open(os.path.join(input_folder, filename), 'r', encoding='utf-8') as infile:
	outfile.write(infile.read())
	if i < len(sorted_files) - 1:
	outfile.write("\nNEWCHAPTERABC\n")

	input_folder = os.path.join(".", 'Working_files', 'temp_ebook')
	output_file = os.path.join(".", 'Working_files', 'Book', 'Chapter_Book.txt')

	combine_chapters(input_folder, output_file)
	ensure_directory(os.path.join(".", "Working_files", "Book"))

	# Function to combine WAV files
	def combine_wav_files(input_directory, output_directory, file_name):
	os.makedirs(output_directory, exist_ok=True)
	output_file_path = os.path.join(output_directory, file_name)
	combined_audio = AudioSegment.empty()
	input_file_paths = sorted(
	[os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith(".wav")],
	key=lambda f: int(''.join(filter(str.isdigit, f)))
	)
	for input_file_path in input_file_paths:
	audio_segment = AudioSegment.from_wav(input_file_path)
	combined_audio += audio_segment
	combined_audio.export(output_file_path, format='wav')
	print(f"Combined audio saved to {output_file_path}")

	# Function to split long sentences
	def split_long_sentence(sentence, max_length=249, max_pauses=10):
	parts = []
	while len(sentence) > max_length or sentence.count(',') + sentence.count(';') + sentence.count('.') > max_pauses:
	possible_splits = [i for i, char in enumerate(sentence) if char in ',;.' and i < max_length]
	if possible_splits:
	split_at = possible_splits[-1] + 1
	else:
	split_at = max_length
	parts.append(sentence[:split_at].strip())
	sentence = sentence[split_at:].strip()
	parts.append(sentence)
	return parts

	# Function to convert chapters to audio using custom model
	def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, target_voice_path=None, language=None, custom_model=None):
	if target_voice_path is None:
	target_voice_path = default_target_voice_path
	if custom_model:
	print("Loading custom model...")
	config = XttsConfig()
	config.load_json(custom_model['config'])
	model = Xtts.init_from_config(config)
	model.load_checkpoint(config, checkpoint_path=custom_model['model'], vocab_path=custom_model['vocab'], use_deepspeed=False)
	model.device
	print("Computing speaker latents...")
	gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[target_voice_path])
	else:
	selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
	tts = TTS(selected_tts_model, progress_bar=False).to(device)

	if not os.path.exists(output_audio_dir):
	os.makedirs(output_audio_dir)

	for chapter_file in sorted(os.listdir(chapters_dir)):
	if chapter_file.endswith('.txt'):
	match = re.search(r"chapter_(\d+).txt", chapter_file)
	if match:
	chapter_num = int(match.group(1))
	else:
	print(f"Skipping file {chapter_file} as it does not match the expected format.")
	continue

	chapter_path = os.path.join(chapters_dir, chapter_file)
	output_file_name = f"audio_chapter_{chapter_num}.wav"
	output_file_path = os.path.join(output_audio_dir, output_file_name)
	temp_audio_directory = os.path.join(".", "Working_files", "temp")
	os.makedirs(temp_audio_directory, exist_ok=True)
	temp_count = 0

	with open(chapter_path, 'r', encoding='utf-8') as file:
	chapter_text = file.read()
	sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
	for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
	fragments = split_long_sentence(sentence, max_length=249 if language == "en" else 213, max_pauses=10)
	for fragment in fragments:
	if fragment != "":
	print(f"Generating fragment: {fragment}...")
	fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
	if custom_model:
	out = model.inference(fragment, language, gpt_cond_latent, speaker_embedding, temperature=0.7)
	torchaudio.save(fragment_file_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
	else:
	speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
	language_code = language if language else default_language_code
	tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language_code)
	temp_count += 1

	combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name)
	wipe_folder(temp_audio_directory)
	print(f"Converted chapter {chapter_num} to audio.")

	# Function to convert chapters to audio using standard model
	def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, target_voice_path=None, language=None):
	selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2"
	tts = TTS(selected_tts_model, progress_bar=False).to(device)

	if not os.path.exists(output_audio_dir):
	os.makedirs(output_audio_dir)

	for chapter_file in sorted(os.listdir(chapters_dir)):
	if chapter_file.endswith('.txt'):
	match = re.search(r"chapter_(\d+).txt", chapter_file)
	if match:
	chapter_num = int(match.group(1))
	else:
	print(f"Skipping file {chapter_file} as it does not match the expected format.")
	continue

	chapter_path = os.path.join(chapters_dir, chapter_file)
	output_file_name = f"audio_chapter_{chapter_num}.wav"
	output_file_path = os.path.join(output_audio_dir, output_file_name)
	temp_audio_directory = os.path.join(".", "Working_files", "temp")
	os.makedirs(temp_audio_directory, exist_ok=True)
	temp_count = 0

	with open(chapter_path, 'r', encoding='utf-8') as file:
	chapter_text = file.read()
	sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english')
	for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"):
	fragments = split_long_sentence(sentence, max_length=249 if language == "en" else 213, max_pauses=10)
	for fragment in fragments:
	if fragment != "":
	print(f"Generating fragment: {fragment}...")
	fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
	speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
	language_code = language if language else default_language_code
	tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language_code)
	temp_count += 1

	combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name)
	wipe_folder(temp_audio_directory)
	print(f"Converted chapter {chapter_num} to audio.")

	# Function to handle the processing of an eBook to an audiobook
	def process_request(ebook_file, target_voice, language, email, use_custom_model, custom_model):
	working_files = os.path.join(".", "Working_files", "temp_ebook")
	full_folder_working_files = os.path.join(".", "Working_files")
	chapters_directory = os.path.join(".", "Working_files", "temp_ebook")
	output_audio_directory = os.path.join(".", 'Chapter_wav_files')
	remove_folder_with_contents(full_folder_working_files)
	remove_folder_with_contents(output_audio_directory)

	create_chapter_labeled_book(ebook_file.name)
	audiobook_output_path = os.path.join(".", "Audiobooks")

	if use_custom_model:
	convert_chapters_to_audio_custom_model(chapters_directory, output_audio_directory, target_voice, language, custom_model)
	else:
	convert_chapters_to_audio_standard_model(chapters_directory, output_audio_directory, target_voice, language)

	create_m4b_from_chapters(output_audio_directory, ebook_file.name, audiobook_output_path)

	m4b_filepath = os.path.join(audiobook_output_path, os.path.splitext(os.path.basename(ebook_file.name))[0] + '.m4b')

	# Upload the final audiobook to file.io
	with open(m4b_filepath, 'rb') as f:
	response = requests.post('https://file.io', files={'file': f})
	download_link = response.json().get('link', '')

	# Send the download link to the user's email
	if email and download_link:
	send_email(email, download_link)

	return download_link

	# Function to manage the queue and process each request sequentially
	def handle_queue():
	while True:
	ebook_file, target_voice, language, email, use_custom_model, custom_model = queue.get()
	process_request(ebook_file, target_voice, language, email, use_custom_model, custom_model)
	queue.task_done()

	# Start the queue handler thread
	thread = Thread(target=handle_queue, daemon=True)
	thread.start()

	# Gradio function to add a request to the queue
	def enqueue_request(ebook_file, target_voice_file, language, email, use_custom_model, custom_model_file, custom_config_file, custom_vocab_file, custom_model_url=None):
	target_voice = target_voice_file.name if target_voice_file else None
	custom_model = None

	if use_custom_model and custom_model_file and custom_config_file and custom_vocab_file:
	custom_model = {
	'model': custom_model_file.name,
	'config': custom_config_file.name,
	'vocab': custom_vocab_file.name
	}
	if use_custom_model and custom_model_url:
	download_dir = os.path.join(".", "Working_files", "custom_model")
	download_and_extract_zip(custom_model_url, download_dir)
	custom_model = {
	'model': os.path.join(download_dir, 'model.pth'),
	'config': os.path.join(download_dir, 'config.json'),
	'vocab': os.path.join(download_dir, 'vocab.json_')
	}

	# Add request to the queue
	queue_lock.acquire()
	queue.put((ebook_file, target_voice, language, email, use_custom_model, custom_model))
	position = queue.qsize()
	queue_lock.release()
	return f"Your request has been added to the queue. You are number {position} in line."

	# Gradio UI setup
	language_options = [
	"en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko"
	]

	theme = gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="blue",
	neutral_hue="blue",
	text_size=gr.themes.sizes.text_md,
	)

	with gr.Blocks(theme=theme) as demo:
	gr.Markdown(
	"""
	# eBook to Audiobook Converter

	Transform your eBooks into immersive audiobooks with optional custom TTS models.
	"""
	)

	with gr.Row():
	with gr.Column(scale=3):
	ebook_file = gr.File(label="eBook File")
	target_voice_file = gr.File(label="Target Voice File (Optional)")
	language = gr.Dropdown(label="Language", choices=language_options, value="en")
	email = gr.Textbox(label="Email Address")

	with gr.Column(scale=3):
	use_custom_model = gr.Checkbox(label="Use Custom Model")
	custom_model_file = gr.File(label="Custom Model File (Optional)", visible=False)
	custom_config_file = gr.File(label="Custom Config File (Optional)", visible=False)
	custom_vocab_file = gr.File(label="Custom Vocab File (Optional)", visible=False)
	custom_model_url = gr.Textbox(label="Custom Model Zip URL (Optional)", visible=False)

	convert_btn = gr.Button("Convert to Audiobook", variant="primary")
	queue_status = gr.Textbox(label="Queue Status")

	convert_btn.click(
	enqueue_request,
	inputs=[ebook_file, target_voice_file, language, email, use_custom_model, custom_model_file, custom_config_file, custom_vocab_file, custom_model_url],
	outputs=[queue_status]
	)

	use_custom_model.change(
	lambda x: [gr.update(visible=x)] * 4,
	inputs=[use_custom_model],
	outputs=[custom_model_file, custom_config_file, custom_vocab_file, custom_model_url]
	)

	demo.launch(share=True)