dwarkesh commited on
Commit
07c0793
·
verified ·
1 Parent(s): 6813fb9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import ebooklib
3
+ from ebooklib import epub
4
+ from bs4 import BeautifulSoup
5
+ import re
6
+
7
+ def extract_text_from_html(html_content):
8
+ soup = BeautifulSoup(html_content, 'html.parser')
9
+ return soup.get_text()
10
+
11
+ def get_chapters(epub_file):
12
+ book = epub.read_epub(epub_file.name)
13
+ chapters = []
14
+
15
+ for item in book.get_items():
16
+ if item.get_type() == ebooklib.ITEM_DOCUMENT:
17
+ content = item.get_content().decode('utf-8')
18
+ title = BeautifulSoup(content, 'html.parser').find('title')
19
+ title = title.string if title else f"Chapter {len(chapters) + 1}"
20
+ text = extract_text_from_html(content)
21
+ chapters.append((title, len(text), text))
22
+
23
+ return chapters
24
+
25
+ def display_chapters(epub_file):
26
+ chapters = get_chapters(epub_file)
27
+ return "\n".join([f"{title} ({length} characters)" for title, length, _ in chapters])
28
+
29
+ def get_chapter_content(epub_file, selected_chapter):
30
+ chapters = get_chapters(epub_file)
31
+ for title, _, content in chapters:
32
+ if title in selected_chapter:
33
+ return content
34
+ return "Chapter not found"
35
+
36
+ def create_interface():
37
+ with gr.Blocks() as interface:
38
+ gr.Markdown("# EPUB Chapter Extractor")
39
+ with gr.Row():
40
+ epub_input = gr.File(label="Upload EPUB File")
41
+ chapters_output = gr.Textbox(label="Available Chapters", interactive=False)
42
+
43
+ epub_input.upload(display_chapters, epub_input, chapters_output)
44
+
45
+ with gr.Row():
46
+ chapter_select = gr.Textbox(label="Enter the chapter title you want to read")
47
+ read_button = gr.Button("Read Chapter")
48
+
49
+ chapter_content = gr.Textbox(label="Chapter Content", interactive=False)
50
+
51
+ read_button.click(get_chapter_content, inputs=[epub_input, chapter_select], outputs=chapter_content)
52
+
53
+ return interface
54
+
55
+ if __name__ == "__main__":
56
+ app = create_interface()
57
+ app.launch()