import gradio as gr import ebooklib from ebooklib import epub from bs4 import BeautifulSoup import re def extract_text_from_html(html_content): soup = BeautifulSoup(html_content, 'html.parser') return soup.get_text() def get_chapters(epub_file): book = epub.read_epub(epub_file.name) chapters = [] for item in book.get_items(): if item.get_type() == ebooklib.ITEM_DOCUMENT: content = item.get_content().decode('utf-8') title = BeautifulSoup(content, 'html.parser').find('title') title = title.string if title else f"Chapter {len(chapters) + 1}" text = extract_text_from_html(content) chapters.append((title, len(text), text)) return chapters def display_chapters(epub_file): chapters = get_chapters(epub_file) return "\n".join([f"{title} ({length} characters)" for title, length, _ in chapters]) def get_chapter_content(epub_file, selected_chapter): chapters = get_chapters(epub_file) for title, _, content in chapters: if title in selected_chapter: return content return "Chapter not found" def create_interface(): with gr.Blocks() as interface: gr.Markdown("# EPUB Chapter Extractor") with gr.Row(): epub_input = gr.File(label="Upload EPUB File") chapters_output = gr.Textbox(label="Available Chapters", interactive=False) epub_input.upload(display_chapters, epub_input, chapters_output) with gr.Row(): chapter_select = gr.Textbox(label="Enter the chapter title you want to read") read_button = gr.Button("Read Chapter") chapter_content = gr.Textbox(label="Chapter Content", interactive=False) read_button.click(get_chapter_content, inputs=[epub_input, chapter_select], outputs=chapter_content) return interface if __name__ == "__main__": app = create_interface() app.launch()