oceansweep commited on
Commit
c313b25
1 Parent(s): 45e1f81

Upload 11 files

Browse files
App_Function_Libraries/Gradio_UI/Book_Ingestion_tab.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Book_Ingestion_tab.py
2
+ # Functionality to import epubs/ebooks into the system.
3
+ ####################
4
+ # Function List
5
+ #
6
+ # 1. create_import_book_tab()
7
+ # 2. import_epub(epub_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
8
+ #
9
+ ####################
10
+ # Imports
11
+ import tempfile
12
+ import os
13
+ import zipfile
14
+ #
15
+ # External Imports
16
+ import gradio as gr
17
+ #
18
+ # Local Imports
19
+ from App_Function_Libraries.Gradio_UI.Import_Functionality import import_data
20
+ from App_Function_Libraries.Books.Book_Ingestion_Lib import epub_to_markdown
21
+ #
22
+ ########################################################################################################################
23
+ #
24
+ # Functions:
25
+
26
+ def import_epub(epub_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
27
+ try:
28
+ # Create a temporary directory to store the converted file
29
+ with tempfile.TemporaryDirectory() as temp_dir:
30
+ # Handle different types of file objects
31
+ if isinstance(epub_file, (str, os.PathLike)):
32
+ epub_path = epub_file
33
+ elif hasattr(epub_file, 'name'):
34
+ epub_path = epub_file.name
35
+ elif hasattr(epub_file, 'path'):
36
+ epub_path = epub_file.path
37
+ else:
38
+ raise ValueError("Unsupported file object type")
39
+
40
+ md_path = os.path.join(temp_dir, "converted.md")
41
+
42
+ # Convert EPUB to Markdown
43
+ markdown_content = epub_to_markdown(epub_path)
44
+
45
+ # Write the markdown content to a file
46
+ with open(md_path, "w", encoding="utf-8") as md_file:
47
+ md_file.write(markdown_content)
48
+
49
+ # Read the converted markdown content
50
+ with open(md_path, "r", encoding="utf-8") as md_file:
51
+ content = md_file.read()
52
+
53
+ # Now process the content as you would with a text file
54
+ return import_data(content, title, author, keywords, system_prompt,
55
+ user_prompt, auto_summarize, api_name, api_key)
56
+ except Exception as e:
57
+ return f"Error processing EPUB: {str(e)}"
58
+
59
+
60
+ def process_zip_file(zip_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
61
+ results = []
62
+ with tempfile.TemporaryDirectory() as temp_dir:
63
+ if hasattr(zip_file, 'name'):
64
+ zip_path = zip_file.name
65
+ elif hasattr(zip_file, 'path'):
66
+ zip_path = zip_file.path
67
+ else:
68
+ raise ValueError("Unsupported zip file object type")
69
+
70
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
71
+ zip_ref.extractall(temp_dir)
72
+
73
+ for filename in os.listdir(temp_dir):
74
+ if filename.lower().endswith('.epub'):
75
+ file_path = os.path.join(temp_dir, filename)
76
+ result = import_epub(file_path, title, author, keywords, system_prompt,
77
+ user_prompt, auto_summarize, api_name, api_key)
78
+ results.append(f"File: {filename} - {result}")
79
+
80
+ return "\n".join(results)
81
+
82
+
83
+ def create_import_book_tab():
84
+ with gr.TabItem("Ebook(epub) Files"):
85
+ with gr.Row():
86
+ with gr.Column():
87
+ gr.Markdown("# Import .epub files")
88
+ gr.Markdown("Upload a single .epub file or a .zip file containing multiple .epub files")
89
+ gr.Markdown(
90
+ "How to remove DRM from your ebooks: https://www.reddit.com/r/Calibre/comments/1ck4w8e/2024_guide_on_removing_drm_from_kobo_kindle_ebooks/")
91
+ import_file = gr.File(label="Upload file for import", file_types=[".epub", ".zip"])
92
+ title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content (for single files)")
93
+ author_input = gr.Textbox(label="Author", placeholder="Enter the author's name (for single files)")
94
+ keywords_input = gr.Textbox(label="Keywords (like genre or publish year)",
95
+ placeholder="Enter keywords, comma-separated")
96
+ system_prompt_input = gr.Textbox(label="System Prompt", lines=3,
97
+ value=""""
98
+ <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
99
+ **Bulleted Note Creation Guidelines**
100
+
101
+ **Headings**:
102
+ - Based on referenced topics, not categories like quotes or terms
103
+ - Surrounded by **bold** formatting
104
+ - Not listed as bullet points
105
+ - No space between headings and list items underneath
106
+
107
+ **Emphasis**:
108
+ - **Important terms** set in bold font
109
+ - **Text ending in a colon**: also bolded
110
+
111
+ **Review**:
112
+ - Ensure adherence to specified format
113
+ - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
114
+ """, )
115
+ custom_prompt_input = gr.Textbox(label="Custom User Prompt",
116
+ placeholder="Enter a custom user prompt for summarization (optional)")
117
+ auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
118
+ api_name_input = gr.Dropdown(
119
+ choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
120
+ "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
121
+ label="API for Auto-summarization"
122
+ )
123
+ api_key_input = gr.Textbox(label="API Key", type="password")
124
+ import_button = gr.Button("Import eBook(s)")
125
+ with gr.Column():
126
+ with gr.Row():
127
+ import_output = gr.Textbox(label="Import Status")
128
+
129
+ def import_file_handler(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
130
+ if file.name.lower().endswith('.epub'):
131
+ return import_epub(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
132
+ elif file.name.lower().endswith('.zip'):
133
+ return process_zip_file(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
134
+ else:
135
+ return "Unsupported file type. Please upload an .epub file or a .zip file containing .epub files."
136
+
137
+ import_button.click(
138
+ fn=import_file_handler,
139
+ inputs=[import_file, title_input, author_input, keywords_input, system_prompt_input,
140
+ custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input],
141
+ outputs=import_output
142
+ )
143
+
144
+ return import_file, title_input, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
145
+
146
+ #
147
+ # End of File
148
+ ########################################################################################################################
App_Function_Libraries/Gradio_UI/Character_Interaction_tab.py CHANGED
@@ -147,10 +147,10 @@ def create_character_card_interaction_tab():
147
  character_dropdown = gr.Dropdown(label="Select Character", choices=get_character_names())
148
  user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here")
149
  api_name_input = gr.Dropdown(
150
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
151
  "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
152
  "Custom-OpenAI-API"],
153
- value=None,
154
  # FIXME - make it so the user cant' click `Send Message` without first setting an API + Chatbot
155
  label="API for Interaction(Mandatory)"
156
  )
@@ -591,8 +591,12 @@ def create_multiple_character_chat_tab():
591
  range(4)]
592
 
593
  api_endpoint = gr.Dropdown(label="API Endpoint",
594
- choices=["OpenAI", "Anthropic", "Local-LLM", "Cohere", "Groq", "DeepSeek",
595
- "Mistral", "OpenRouter"])
 
 
 
 
596
  api_key = gr.Textbox(label="API Key (if required)", type="password")
597
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
598
  scenario = gr.Textbox(label="Scenario (optional)", lines=3)
@@ -722,8 +726,10 @@ def create_narrator_controlled_conversation_tab():
722
  with gr.Column(scale=1):
723
  api_endpoint = gr.Dropdown(
724
  label="API Endpoint",
725
- choices=["OpenAI", "Anthropic", "Local-LLM", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter"],
726
- value="OpenAI"
 
 
727
  )
728
  api_key = gr.Textbox(label="API Key (if required)", type="password")
729
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
@@ -827,5 +833,5 @@ def create_narrator_controlled_conversation_tab():
827
  return api_endpoint, api_key, temperature, narrator_input, conversation_display, user_input, generate_btn, reset_btn, error_box
828
 
829
  #
830
- # End of Multi-Character chat tab
831
  ########################################################################################################################
 
147
  character_dropdown = gr.Dropdown(label="Select Character", choices=get_character_names())
148
  user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here")
149
  api_name_input = gr.Dropdown(
150
+ choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
151
  "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
152
  "Custom-OpenAI-API"],
153
+ value="HuggingFace",
154
  # FIXME - make it so the user cant' click `Send Message` without first setting an API + Chatbot
155
  label="API for Interaction(Mandatory)"
156
  )
 
591
  range(4)]
592
 
593
  api_endpoint = gr.Dropdown(label="API Endpoint",
594
+ choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
595
+ "Mistral",
596
+ "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM",
597
+ "ollama", "HuggingFace",
598
+ "Custom-OpenAI-API"],
599
+ value="HuggingFace")
600
  api_key = gr.Textbox(label="API Key (if required)", type="password")
601
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
602
  scenario = gr.Textbox(label="Scenario (optional)", lines=3)
 
726
  with gr.Column(scale=1):
727
  api_endpoint = gr.Dropdown(
728
  label="API Endpoint",
729
+ choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
730
+ "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
731
+ "Custom-OpenAI-API"],
732
+ value="HuggingFace"
733
  )
734
  api_key = gr.Textbox(label="API Key (if required)", type="password")
735
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
 
833
  return api_endpoint, api_key, temperature, narrator_input, conversation_display, user_input, generate_btn, reset_btn, error_box
834
 
835
  #
836
+ # End of Narrator-Controlled Conversation tab
837
  ########################################################################################################################
App_Function_Libraries/Gradio_UI/Chat_Workflows.py CHANGED
@@ -38,9 +38,10 @@ def chat_workflows_tab():
38
  workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
39
  api_selector = gr.Dropdown(
40
  label="Select API Endpoint",
41
- choices=["OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
42
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
43
- value="OpenAI"
 
44
  )
45
  api_key_input = gr.Textbox(label="API Key (optional)", type="password")
46
  temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
@@ -172,148 +173,6 @@ def chat_workflows_tab():
172
  )
173
 
174
  return workflow_selector, api_selector, api_key_input, context_input, chatbot, msg, submit_btn, clear_btn, save_btn
175
- # def chat_workflows_tab():
176
- # with gr.TabItem("Chat Workflows"):
177
- # gr.Markdown("# Workflows using LLMs")
178
- # chat_history = gr.State([])
179
- # media_content = gr.State({})
180
- # selected_parts = gr.State([])
181
- # conversation_id = gr.State(None)
182
- # workflow_state = gr.State({"current_step": 0, "max_steps": 0, "conversation_id": None})
183
- #
184
- # with gr.Row():
185
- # workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
186
- # api_selector = gr.Dropdown(
187
- # label="Select API Endpoint",
188
- # choices=["OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
189
- # "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
190
- # value="OpenAI"
191
- # )
192
- # api_key_input = gr.Textbox(label="API Key (optional)", type="password")
193
- #
194
- # context_input = gr.Textbox(label="Initial Context (optional)", lines=5)
195
- #
196
- # with gr.Row():
197
- # temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
198
- # save_conversation = gr.Checkbox(label="Save Conversation", value=False)
199
- #
200
- # chatbot = gr.Chatbot(label="Workflow Chat")
201
- # msg = gr.Textbox(label="Your Input")
202
- # submit_btn = gr.Button("Submit")
203
- # clear_btn = gr.Button("Clear Chat")
204
- # save_btn = gr.Button("Save Chat to Database")
205
- #
206
- # with gr.Row():
207
- # conversation_search = gr.Textbox(label="Search Conversations")
208
- # search_conversations_btn = gr.Button("Search Conversations")
209
- # previous_conversations = gr.Dropdown(label="Select Conversation", choices=[], interactive=True)
210
- # load_conversations_btn = gr.Button("Load Selected Conversation")
211
- #
212
- # def update_workflow_ui(workflow_name):
213
- # if not workflow_name:
214
- # return {"current_step": 0, "max_steps": 0, "conversation_id": None}
215
- # selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None)
216
- # if selected_workflow:
217
- # num_prompts = len(selected_workflow['prompts'])
218
- # logging.info(f"Initializing workflow: {workflow_name} with {num_prompts} steps")
219
- # return {"current_step": 0, "max_steps": num_prompts, "conversation_id": None}
220
- # else:
221
- # logging.error(f"Selected workflow not found: {workflow_name}")
222
- # return {"current_step": 0, "max_steps": 0, "conversation_id": None}
223
- #
224
- # def process_workflow_step(message, history, context, workflow_name, api_endpoint, api_key, workflow_state,
225
- # save_conv, temp):
226
- # logging.info(f"Process workflow step called with message: {message}")
227
- # logging.info(f"Current workflow state: {workflow_state}")
228
- # try:
229
- # selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None)
230
- # if not selected_workflow:
231
- # logging.error(f"Selected workflow not found: {workflow_name}")
232
- # return history, workflow_state, gr.update(interactive=True)
233
- #
234
- # current_step = workflow_state["current_step"]
235
- # max_steps = workflow_state["max_steps"]
236
- #
237
- # logging.info(f"Current step: {current_step}, Max steps: {max_steps}")
238
- #
239
- # if current_step >= max_steps:
240
- # logging.info("Workflow completed, disabling input")
241
- # return history, workflow_state, gr.update(interactive=False)
242
- #
243
- # prompt = selected_workflow['prompts'][current_step]
244
- # full_message = f"{context}\n\nStep {current_step + 1}: {prompt}\nUser: {message}"
245
- #
246
- # logging.info(f"Calling chat_wrapper with full_message: {full_message[:100]}...")
247
- # bot_message, new_history, new_conversation_id = chat_wrapper(
248
- # full_message, history, media_content.value, selected_parts.value,
249
- # api_endpoint, api_key, "", workflow_state["conversation_id"],
250
- # save_conv, temp, "You are a helpful assistant guiding through a workflow."
251
- # )
252
- #
253
- # logging.info(f"Received bot_message: {bot_message[:100]}...")
254
- #
255
- # next_step = current_step + 1
256
- # new_workflow_state = {
257
- # "current_step": next_step,
258
- # "max_steps": max_steps,
259
- # "conversation_id": new_conversation_id
260
- # }
261
- #
262
- # if next_step >= max_steps:
263
- # logging.info("Workflow completed after this step")
264
- # return new_history, new_workflow_state, gr.update(interactive=False)
265
- # else:
266
- # next_prompt = selected_workflow['prompts'][next_step]
267
- # new_history.append((None, f"Step {next_step + 1}: {next_prompt}"))
268
- # logging.info(f"Moving to next step: {next_step}")
269
- # return new_history, new_workflow_state, gr.update(interactive=True)
270
- # except Exception as e:
271
- # logging.error(f"Error in process_workflow_step: {str(e)}")
272
- # return history, workflow_state, gr.update(interactive=True)
273
- #
274
- # workflow_selector.change(
275
- # update_workflow_ui,
276
- # inputs=[workflow_selector],
277
- # outputs=[workflow_state]
278
- # )
279
- #
280
- # submit_btn.click(
281
- # process_workflow_step,
282
- # inputs=[msg, chatbot, context_input, workflow_selector, api_selector, api_key_input, workflow_state,
283
- # save_conversation, temperature],
284
- # outputs=[chatbot, workflow_state, msg]
285
- # ).then(
286
- # lambda: gr.update(value=""),
287
- # outputs=[msg]
288
- # )
289
- #
290
- # clear_btn.click(
291
- # lambda: ([], {"current_step": 0, "max_steps": 0, "conversation_id": None}),
292
- # outputs=[chatbot, workflow_state]
293
- # )
294
- #
295
- # save_btn.click(
296
- # save_chat_history_to_db_wrapper,
297
- # inputs=[chatbot, conversation_id, media_content],
298
- # outputs=[conversation_id, gr.Textbox(label="Save Status")]
299
- # )
300
- #
301
- # search_conversations_btn.click(
302
- # search_conversations,
303
- # inputs=[conversation_search],
304
- # outputs=[previous_conversations]
305
- # )
306
- #
307
- # load_conversations_btn.click(
308
- # lambda: ([], {"current_step": 0, "max_steps": 0, "conversation_id": None}),
309
- # outputs=[chatbot, workflow_state]
310
- # ).then(
311
- # load_conversation,
312
- # inputs=[previous_conversations],
313
- # outputs=[chatbot, conversation_id]
314
- # )
315
- #
316
- # return workflow_selector, api_selector, api_key_input, context_input, chatbot, msg, submit_btn, clear_btn, save_btn
317
 
318
  #
319
  # End of script
 
38
  workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
39
  api_selector = gr.Dropdown(
40
  label="Select API Endpoint",
41
+ choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
42
+ "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
43
+ "Custom-OpenAI-API"],
44
+ value="HuggingFace"
45
  )
46
  api_key_input = gr.Textbox(label="API Key (optional)", type="password")
47
  temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
 
173
  )
174
 
175
  return workflow_selector, api_selector, api_key_input, context_input, chatbot, msg, submit_btn, clear_btn, save_btn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  #
178
  # End of script
App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py CHANGED
@@ -11,10 +11,10 @@ from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
11
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
12
  #
13
  # Local Imports
14
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
15
  summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm, \
16
  summarize_with_ollama
17
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
18
  summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
19
  summarize_with_huggingface
20
  #
@@ -24,8 +24,8 @@ from App_Function_Libraries.Summarization_General_Lib import summarize_with_open
24
  # Functions:
25
 
26
  def create_summarize_explain_tab():
27
- with gr.TabItem("Explain/Summarize Text"):
28
- gr.Markdown("# Explain or Summarize Text without ingesting it into the DB")
29
  with gr.Row():
30
  with gr.Column():
31
  with gr.Row():
 
11
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
12
  #
13
  # Local Imports
14
+ from App_Function_Libraries.Summarization.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
15
  summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm, \
16
  summarize_with_ollama
17
+ from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
18
  summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
19
  summarize_with_huggingface
20
  #
 
24
  # Functions:
25
 
26
  def create_summarize_explain_tab():
27
+ with gr.TabItem("Analyze Text"):
28
+ gr.Markdown("# Analyze / Explain / Summarize Text without ingesting it into the DB")
29
  with gr.Row():
30
  with gr.Column():
31
  with gr.Row():
App_Function_Libraries/Gradio_UI/Import_Functionality.py CHANGED
@@ -19,7 +19,7 @@ import pypandoc
19
  from App_Function_Libraries.DB.DB_Manager import insert_prompt_to_db, load_preset_prompts, import_obsidian_note_to_db, \
20
  add_media_to_database
21
  from App_Function_Libraries.Prompt_Handling import import_prompt_from_file, import_prompts_from_zip#
22
- from App_Function_Libraries.Summarization_General_Lib import perform_summarization
23
 
24
  ###################################################################################################################
25
  #
@@ -361,87 +361,6 @@ def create_import_obsidian_vault_tab():
361
  )
362
 
363
 
364
-
365
- # Using pypandoc to convert EPUB to Markdown
366
- def create_import_book_tab():
367
- with gr.TabItem("Import .epub/ebook Files"):
368
- with gr.Row():
369
- with gr.Column():
370
- gr.Markdown("# Ingest an .epub file using pypandoc")
371
- gr.Markdown("...and have it tagged + summarized")
372
- gr.Markdown(
373
- "How to remove DRM from your ebooks: https://www.reddit.com/r/Calibre/comments/1ck4w8e/2024_guide_on_removing_drm_from_kobo_kindle_ebooks/")
374
- import_file = gr.File(label="Upload file for import", file_types=[".epub"])
375
- title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content")
376
- author_input = gr.Textbox(label="Author", placeholder="Enter the author's name")
377
- keywords_input = gr.Textbox(label="Keywords(like genre or publish year)",
378
- placeholder="Enter keywords, comma-separated")
379
- system_prompt_input = gr.Textbox(label="System Prompt",
380
- lines=3,
381
- value=""""
382
- <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
383
- **Bulleted Note Creation Guidelines**
384
-
385
- **Headings**:
386
- - Based on referenced topics, not categories like quotes or terms
387
- - Surrounded by **bold** formatting
388
- - Not listed as bullet points
389
- - No space between headings and list items underneath
390
-
391
- **Emphasis**:
392
- - **Important terms** set in bold font
393
- - **Text ending in a colon**: also bolded
394
-
395
- **Review**:
396
- - Ensure adherence to specified format
397
- - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
398
- """, )
399
- custom_prompt_input = gr.Textbox(label="Custom User Prompt",
400
- placeholder="Enter a custom user prompt for summarization (optional)")
401
- auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
402
- api_name_input = gr.Dropdown(
403
- choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
404
- "OpenRouter",
405
- "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
406
- label="API for Auto-summarization"
407
- )
408
- api_key_input = gr.Textbox(label="API Key", type="password")
409
- import_button = gr.Button("Import eBook")
410
- with gr.Column():
411
- with gr.Row():
412
- import_output = gr.Textbox(label="Import Status")
413
-
414
- def import_epub(epub_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name,
415
- api_key):
416
- try:
417
- # Create a temporary directory to store the converted file
418
- with tempfile.TemporaryDirectory() as temp_dir:
419
- epub_path = epub_file.name
420
- md_path = os.path.join(temp_dir, "converted.md")
421
-
422
- # Use pypandoc to convert EPUB to Markdown
423
- output = pypandoc.convert_file(epub_path, 'md', outputfile=md_path)
424
-
425
- if output != "":
426
- return f"Error converting EPUB: {output}"
427
-
428
- # Read the converted markdown content
429
- with open(md_path, "r", encoding="utf-8") as md_file:
430
- content = md_file.read()
431
-
432
- # Now process the content as you would with a text file
433
- return import_data(content, title, author, keywords, system_prompt,
434
- user_prompt, auto_summarize, api_name, api_key)
435
- except Exception as e:
436
- return f"Error processing EPUB: {str(e)}"
437
-
438
- import_button.click(
439
- fn=import_epub,
440
- inputs=[import_file, title_input, author_input, keywords_input, system_prompt_input,
441
- custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input],
442
- outputs=import_output
443
- )
444
-
445
  def import_obsidian_vault(vault_path, progress=gr.Progress()):
446
  try:
447
  from App_Function_Libraries.Gradio_UI.Export_Functionality import scan_obsidian_vault
 
19
  from App_Function_Libraries.DB.DB_Manager import insert_prompt_to_db, load_preset_prompts, import_obsidian_note_to_db, \
20
  add_media_to_database
21
  from App_Function_Libraries.Prompt_Handling import import_prompt_from_file, import_prompts_from_zip#
22
+ from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
23
 
24
  ###################################################################################################################
25
  #
 
361
  )
362
 
363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  def import_obsidian_vault(vault_path, progress=gr.Progress()):
365
  try:
366
  from App_Function_Libraries.Gradio_UI.Export_Functionality import scan_obsidian_vault
App_Function_Libraries/Gradio_UI/MMLU_Pro_tab.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MMLU_Pro_tab.py
2
+ # is a library that contains the Gradio UI code for the MMLU-Pro benchmarking tool.
3
+ #
4
+ ##############################################################################################################
5
+ # Imports
6
+ import os
7
+
8
+ import gradio as gr
9
+ import logging
10
+ #
11
+ # External Imports
12
+ from tqdm import tqdm
13
+ # Local Imports
14
+ from App_Function_Libraries.Benchmarks_Evaluations.MMLU_Pro.MMLU_Pro_rewritten import (
15
+ load_mmlu_pro, run_mmlu_pro_benchmark, mmlu_pro_main, load_mmlu_pro_config
16
+ )
17
+ #
18
+ ##############################################################################################################
19
+ #
20
+ # Functions:
21
+
22
+ # Set up logging
23
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def get_categories():
28
+ """Fetch categories using the dataset loader from MMLU_Pro_rewritten.py"""
29
+ try:
30
+ test_data, _ = load_mmlu_pro() # Use the function from MMLU_Pro_rewritten.py
31
+ return list(test_data.keys()) # Return the categories from the test dataset
32
+ except Exception as e:
33
+ logger.error(f"Failed to load categories: {e}")
34
+ return ["Error loading categories"]
35
+
36
+
37
+ def load_categories():
38
+ """Helper function to return the categories for the Gradio dropdown."""
39
+ categories = get_categories() # Fetch categories from the dataset
40
+ if categories:
41
+ return gr.update(choices=categories, value=categories[0]) # Update dropdown with categories
42
+ else:
43
+ return gr.update(choices=["Error loading categories"], value="Error loading categories")
44
+
45
+
46
+ def run_benchmark_from_ui(url, api_key, model, timeout, category, parallel, verbosity, log_prompt):
47
+ """Function to run the benchmark with parameters from the UI."""
48
+
49
+ # Override config with UI parameters
50
+ config = load_mmlu_pro_config(
51
+ url=url,
52
+ api_key=api_key,
53
+ model=model,
54
+ timeout=timeout,
55
+ categories=[category] if category else None,
56
+ parallel=parallel,
57
+ verbosity=verbosity,
58
+ log_prompt=log_prompt
59
+ )
60
+
61
+ # Run the benchmarking process
62
+ try:
63
+ # Call the main benchmarking function
64
+ mmlu_pro_main()
65
+
66
+ # Assume the final report is generated in "eval_results" folder
67
+ report_path = os.path.join("eval_results", config["server"]["model"].replace("/", "-"), "final_report.txt")
68
+
69
+ # Read the final report
70
+ with open(report_path, "r") as f:
71
+ report = f.read()
72
+
73
+ return report
74
+ except Exception as e:
75
+ logger.error(f"An error occurred during benchmark execution: {e}")
76
+ return f"An error occurred during benchmark execution. Please check the logs for more information. Error: {str(e)}"
77
+
78
+
79
+ def create_mmlu_pro_tab():
80
+ """Create the Gradio UI tab for MMLU-Pro Benchmark."""
81
+ with gr.Tab("MMLU-Pro Benchmark"):
82
+ gr.Markdown("## Run MMLU-Pro Benchmark")
83
+
84
+ with gr.Row():
85
+ with gr.Column():
86
+ # Inputs for the benchmark
87
+ url = gr.Textbox(label="Server URL")
88
+ api_key = gr.Textbox(label="API Key", type="password")
89
+ model = gr.Textbox(label="Model Name")
90
+ timeout = gr.Number(label="Timeout (seconds)", value=30)
91
+ category = gr.Dropdown(label="Category", choices=["Load categories..."])
92
+ load_categories_btn = gr.Button("Load Categories")
93
+ parallel = gr.Slider(label="Parallel Requests", minimum=1, maximum=10, step=1, value=1)
94
+ verbosity = gr.Slider(label="Verbosity Level", minimum=0, maximum=2, step=1, value=1)
95
+ log_prompt = gr.Checkbox(label="Log Prompt")
96
+
97
+ with gr.Column():
98
+ # Run button and output display
99
+ run_button = gr.Button("Run Benchmark")
100
+ output = gr.Textbox(label="Benchmark Results", lines=20)
101
+
102
+ # When "Load Categories" is clicked, load the categories into the dropdown
103
+ load_categories_btn.click(
104
+ load_categories,
105
+ outputs=category
106
+ )
107
+
108
+ # When "Run Benchmark" is clicked, trigger the run_benchmark_from_ui function
109
+ run_button.click(
110
+ run_benchmark_from_ui, # Use the function defined to run the benchmark
111
+ inputs=[url, api_key, model, timeout, category, parallel, verbosity, log_prompt],
112
+ outputs=output
113
+ )
114
+
115
+ return [url, api_key, model, timeout, category, parallel, verbosity, log_prompt, run_button, output]
App_Function_Libraries/Gradio_UI/Plaintext_tab_import.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Plaintext_tab_import.py
2
+ # Contains the code for the "Import Plain Text Files" tab in the Gradio UI.
3
+ # This tab allows users to upload plain text files (Markdown, Text, RTF) or a zip file containing multiple files.
4
+ # The user can provide a title, author, keywords, system prompt, custom user prompt, and select an API for auto-summarization.
5
+ #
6
+ #######################################################################################################################
7
+ #
8
+ # Import necessary libraries
9
+ import os
10
+ import tempfile
11
+ import zipfile
12
+ #
13
+ # Import Non-Local
14
+ import gradio as gr
15
+ from docx2txt import docx2txt
16
+ from pypandoc import convert_file
17
+ #
18
+ # Import Local libraries
19
+ from App_Function_Libraries.Gradio_UI.Import_Functionality import import_data
20
+ #
21
+ #######################################################################################################################
22
+ #
23
+ # Functions:
24
+
25
+ def create_plain_text_import_tab():
26
+ with gr.TabItem("Import Plain text & .docx Files"):
27
+ with gr.Row():
28
+ with gr.Column():
29
+ gr.Markdown("# Import Markdown(`.md`)/Text(`.txt`)/rtf & `.docx` Files")
30
+ gr.Markdown("Upload a single file or a zip file containing multiple files")
31
+ import_file = gr.File(label="Upload file for import", file_types=[".md", ".txt", ".rtf", ".docx", ".zip"])
32
+ title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content (for single files)")
33
+ author_input = gr.Textbox(label="Author", placeholder="Enter the author's name (for single files)")
34
+ keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords, comma-separated")
35
+ system_prompt_input = gr.Textbox(label="System Prompt (for Summarization)", lines=3,
36
+ value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
37
+ **Bulleted Note Creation Guidelines**
38
+
39
+ **Headings**:
40
+ - Based on referenced topics, not categories like quotes or terms
41
+ - Surrounded by **bold** formatting
42
+ - Not listed as bullet points
43
+ - No space between headings and list items underneath
44
+
45
+ **Emphasis**:
46
+ - **Important terms** set in bold font
47
+ - **Text ending in a colon**: also bolded
48
+
49
+ **Review**:
50
+ - Ensure adherence to specified format
51
+ - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]""",
52
+ )
53
+ custom_prompt_input = gr.Textbox(label="Custom User Prompt", placeholder="Enter a custom user prompt for summarization (optional)")
54
+ auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
55
+ api_name_input = gr.Dropdown(
56
+ choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
57
+ "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
58
+ label="API for Auto-summarization"
59
+ )
60
+ api_key_input = gr.Textbox(label="API Key", type="password")
61
+ import_button = gr.Button("Import File(s)")
62
+ with gr.Column():
63
+ import_output = gr.Textbox(label="Import Status")
64
+
65
+
66
+ def import_plain_text_file(file_path, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
67
+ try:
68
+ # Determine the file type and convert if necessary
69
+ file_extension = os.path.splitext(file_path)[1].lower()
70
+ if file_extension == '.rtf':
71
+ with tempfile.NamedTemporaryFile(suffix='.md', delete=False) as temp_file:
72
+ convert_file(file_path, 'md', outputfile=temp_file.name)
73
+ file_path = temp_file.name
74
+ elif file_extension == '.docx':
75
+ content = docx2txt.process(file_path)
76
+ else:
77
+ with open(file_path, 'r', encoding='utf-8') as file:
78
+ content = file.read()
79
+
80
+ # Process the content
81
+ return import_data(content, title, author, keywords, system_prompt,
82
+ user_prompt, auto_summarize, api_name, api_key)
83
+ except Exception as e:
84
+ return f"Error processing file: {str(e)}"
85
+
86
+ def process_plain_text_zip_file(zip_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
87
+ results = []
88
+ with tempfile.TemporaryDirectory() as temp_dir:
89
+ with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
90
+ zip_ref.extractall(temp_dir)
91
+
92
+ for filename in os.listdir(temp_dir):
93
+ if filename.lower().endswith(('.md', '.txt', '.rtf', '.docx')):
94
+ file_path = os.path.join(temp_dir, filename)
95
+ result = import_plain_text_file(file_path, title, author, keywords, system_prompt,
96
+ user_prompt, auto_summarize, api_name, api_key)
97
+ results.append(f"File: {filename} - {result}")
98
+
99
+ return "\n".join(results)
100
+
101
+ def import_file_handler(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
102
+ if file.name.lower().endswith(('.md', '.txt', '.rtf', '.docx')):
103
+ return import_plain_text_file(file.name, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
104
+ elif file.name.lower().endswith('.zip'):
105
+ return process_plain_text_zip_file(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
106
+ else:
107
+ return "Unsupported file type. Please upload a .md, .txt, .rtf, .docx file or a .zip file containing these file types."
108
+
109
+ import_button.click(
110
+ fn=import_file_handler,
111
+ inputs=[import_file, title_input, author_input, keywords_input, system_prompt_input,
112
+ custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input],
113
+ outputs=import_output
114
+ )
115
+
116
+ return import_file, title_input, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py CHANGED
@@ -2,26 +2,30 @@
2
  # Description: Gradio UI for RAG QA Chat
3
  #
4
  # Imports
 
 
 
 
 
5
  #
6
  # External Imports
7
- import logging
8
-
9
  import gradio as gr
10
-
11
- from App_Function_Libraries.DB.DB_Manager import DatabaseError, get_paginated_files
12
- from App_Function_Libraries.RAG.RAG_QA_Chat import search_database, load_chat_history, \
13
- save_chat_history, rag_qa_chat
14
-
15
-
16
- #
17
  # Local Imports
 
 
 
 
 
 
 
18
  #
19
  ########################################################################################################################
20
  #
21
  # Functions:
22
 
23
  def create_rag_qa_chat_tab():
24
- with gr.TabItem("RAG QA Chat (WIP)"):
25
  gr.Markdown("# RAG QA Chat")
26
 
27
  with gr.Row():
@@ -41,24 +45,25 @@ def create_rag_qa_chat_tab():
41
  search_query = gr.Textbox(label="Search Query", visible=False)
42
  search_button = gr.Button("Search", visible=False)
43
  search_results = gr.Dropdown(label="Search Results", choices=[], visible=False)
44
- file_upload = gr.File(label="Upload File", visible=False)
 
 
 
 
 
 
45
 
46
  api_choice = gr.Dropdown(
47
  choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
48
  label="Select API for RAG",
49
  value="OpenAI"
50
  )
51
- chat_file = gr.File(label="Chat File")
52
- load_chat = gr.Button("Load Chat")
53
- clear = gr.Button("Clear Current Chat")
54
 
55
  with gr.Column(scale=2):
56
  chatbot = gr.Chatbot(height=500)
57
  msg = gr.Textbox(label="Enter your message")
58
- submit = gr.Button("Submit")
59
-
60
- save_chat = gr.Button("Save Chat")
61
-
62
 
63
  loading_indicator = gr.HTML(visible=False)
64
 
@@ -82,12 +87,14 @@ def create_rag_qa_chat_tab():
82
  search_query: gr.update(visible=choice == "Search Database"),
83
  search_button: gr.update(visible=choice == "Search Database"),
84
  search_results: gr.update(visible=choice == "Search Database"),
85
- file_upload: gr.update(visible=choice == "Upload File")
 
 
86
  }
87
 
88
  context_source.change(update_context_source, context_source,
89
  [existing_file, prev_page_btn, next_page_btn, page_info, search_query, search_button,
90
- search_results, file_upload])
91
 
92
  next_page_btn.click(next_page_fn, inputs=[file_page], outputs=[existing_file, page_info, file_page])
93
  prev_page_btn.click(prev_page_fn, inputs=[file_page], outputs=[existing_file, page_info, file_page])
@@ -98,53 +105,124 @@ def create_rag_qa_chat_tab():
98
  loading_indicator = gr.HTML(visible=False)
99
 
100
  def rag_qa_chat_wrapper(message, history, context_source, existing_file, search_results, file_upload,
101
- api_choice):
102
  try:
 
 
 
 
103
  # Show loading indicator
104
  yield history, "", gr.update(visible=True)
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  if context_source == "Existing File":
107
  context = f"media_id:{existing_file.split('(ID: ')[1][:-1]}"
 
108
  elif context_source == "Search Database":
109
  context = f"media_id:{search_results.split('(ID: ')[1][:-1]}"
 
110
  else: # Upload File
 
111
  if file_upload is None:
112
  raise ValueError("No file uploaded")
113
- context = file_upload
114
 
115
- new_history, response = rag_qa_chat(message, history, context, api_choice)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  gr.Info("Response generated successfully")
 
117
  yield new_history, "", gr.update(visible=False)
118
  except ValueError as e:
 
119
  gr.Error(f"Input error: {str(e)}")
120
  yield history, "", gr.update(visible=False)
121
  except DatabaseError as e:
 
122
  gr.Error(f"Database error: {str(e)}")
123
  yield history, "", gr.update(visible=False)
124
  except Exception as e:
125
- logging.error(f"Unexpected error in rag_qa_chat_wrapper: {e}")
126
  gr.Error("An unexpected error occurred. Please try again later.")
127
  yield history, "", gr.update(visible=False)
128
 
129
- def save_chat_history_wrapper(history):
130
- try:
131
- file_path = save_chat_history(history)
132
- gr.Info("Chat history saved successfully")
133
- return gr.update(value=file_path)
134
- except Exception as e:
135
- gr.Error(f"Error saving chat history: {str(e)}")
136
- return gr.update(value=None)
137
 
138
- def load_chat_history_wrapper(file):
139
- try:
140
- if file is not None:
141
- history = load_chat_history(file)
142
- gr.Info("Chat history loaded successfully")
143
- return history
144
- return []
145
- except Exception as e:
146
- gr.Error(f"Error loading chat history: {str(e)}")
147
- return []
 
148
 
149
  def perform_search(query):
150
  try:
@@ -154,20 +232,57 @@ def create_rag_qa_chat_tab():
154
  gr.Error(f"Error performing search: {str(e)}")
155
  return gr.update(choices=[])
156
 
157
- save_chat.click(save_chat_history_wrapper, inputs=[chatbot], outputs=[chat_file])
158
- load_chat.click(load_chat_history_wrapper, inputs=[chat_file], outputs=[chatbot])
159
 
160
  search_button.click(perform_search, inputs=[search_query], outputs=[search_results])
161
 
162
  submit.click(
163
  rag_qa_chat_wrapper,
164
- inputs=[msg, chatbot, context_source, existing_file, search_results, file_upload, api_choice],
 
165
  outputs=[chatbot, msg, loading_indicator]
166
  )
167
 
168
- clear.click(lambda: ([], None), outputs=[chatbot, chat_file])
169
-
170
- return context_source, existing_file, search_query, search_button, search_results, file_upload, api_choice, chatbot, msg, submit, clear, save_chat, load_chat, chat_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  #
173
  # End of RAG_QA_Chat_tab.py
 
2
  # Description: Gradio UI for RAG QA Chat
3
  #
4
  # Imports
5
+ import csv
6
+ import logging
7
+ import json
8
+ import os
9
+ from datetime import datetime
10
  #
11
  # External Imports
12
+ import docx2txt
 
13
  import gradio as gr
 
 
 
 
 
 
 
14
  # Local Imports
15
+ from App_Function_Libraries.Books.Book_Ingestion_Lib import read_epub
16
+ from App_Function_Libraries.DB.DB_Manager import DatabaseError, get_paginated_files, add_media_with_keywords
17
+ from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_text_and_format_from_pdf
18
+ from App_Function_Libraries.RAG.RAG_Libary_2 import generate_answer
19
+ from App_Function_Libraries.RAG.RAG_QA_Chat import search_database, rag_qa_chat
20
+ # Eventually... FIXME
21
+ from App_Function_Libraries.RAG.RAG_QA_Chat import load_chat_history, save_chat_history
22
  #
23
  ########################################################################################################################
24
  #
25
  # Functions:
26
 
27
  def create_rag_qa_chat_tab():
28
+ with gr.TabItem("RAG QA Chat"):
29
  gr.Markdown("# RAG QA Chat")
30
 
31
  with gr.Row():
 
45
  search_query = gr.Textbox(label="Search Query", visible=False)
46
  search_button = gr.Button("Search", visible=False)
47
  search_results = gr.Dropdown(label="Search Results", choices=[], visible=False)
48
+ file_upload = gr.File(
49
+ label="Upload File",
50
+ visible=False,
51
+ file_types=["txt", "pdf", "epub", "md", "rtf", "json", "csv"]
52
+ )
53
+ convert_to_text = gr.Checkbox(label="Convert to plain text", visible=False)
54
+ keywords = gr.Textbox(label="Keywords (comma-separated)", visible=False)
55
 
56
  api_choice = gr.Dropdown(
57
  choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
58
  label="Select API for RAG",
59
  value="OpenAI"
60
  )
 
 
 
61
 
62
  with gr.Column(scale=2):
63
  chatbot = gr.Chatbot(height=500)
64
  msg = gr.Textbox(label="Enter your message")
65
+ submit = gr.Button("Submit (Might take a few seconds/turns blue while processing...)")
66
+ clear_chat = gr.Button("Clear Chat History")
 
 
67
 
68
  loading_indicator = gr.HTML(visible=False)
69
 
 
87
  search_query: gr.update(visible=choice == "Search Database"),
88
  search_button: gr.update(visible=choice == "Search Database"),
89
  search_results: gr.update(visible=choice == "Search Database"),
90
+ file_upload: gr.update(visible=choice == "Upload File"),
91
+ convert_to_text: gr.update(visible=choice == "Upload File"),
92
+ keywords: gr.update(visible=choice == "Upload File")
93
  }
94
 
95
  context_source.change(update_context_source, context_source,
96
  [existing_file, prev_page_btn, next_page_btn, page_info, search_query, search_button,
97
+ search_results, file_upload, convert_to_text, keywords])
98
 
99
  next_page_btn.click(next_page_fn, inputs=[file_page], outputs=[existing_file, page_info, file_page])
100
  prev_page_btn.click(prev_page_fn, inputs=[file_page], outputs=[existing_file, page_info, file_page])
 
105
  loading_indicator = gr.HTML(visible=False)
106
 
107
  def rag_qa_chat_wrapper(message, history, context_source, existing_file, search_results, file_upload,
108
+ convert_to_text, keywords, api_choice):
109
  try:
110
+ logging.info(f"Starting rag_qa_chat_wrapper with message: {message}")
111
+ logging.info(f"Context source: {context_source}")
112
+ logging.info(f"API choice: {api_choice}")
113
+
114
  # Show loading indicator
115
  yield history, "", gr.update(visible=True)
116
 
117
+ # Ensure api_choice is a string
118
+ api_choice = api_choice.value if isinstance(api_choice, gr.components.Dropdown) else api_choice
119
+ logging.info(f"Resolved API choice: {api_choice}")
120
+
121
+ # Only rephrase the question if it's not the first query
122
+ if len(history) > 0:
123
+ rephrased_question = rephrase_question(history, message, api_choice)
124
+ logging.info(f"Original question: {message}")
125
+ logging.info(f"Rephrased question: {rephrased_question}")
126
+ else:
127
+ rephrased_question = message
128
+ logging.info(f"First question, no rephrasing: {message}")
129
+
130
  if context_source == "Existing File":
131
  context = f"media_id:{existing_file.split('(ID: ')[1][:-1]}"
132
+ logging.info(f"Using existing file with context: {context}")
133
  elif context_source == "Search Database":
134
  context = f"media_id:{search_results.split('(ID: ')[1][:-1]}"
135
+ logging.info(f"Using search result with context: {context}")
136
  else: # Upload File
137
+ logging.info("Processing uploaded file")
138
  if file_upload is None:
139
  raise ValueError("No file uploaded")
 
140
 
141
+ # Process the uploaded file
142
+ file_path = file_upload.name
143
+ file_name = os.path.basename(file_path)
144
+ logging.info(f"Uploaded file: {file_name}")
145
+
146
+ if convert_to_text:
147
+ logging.info("Converting file to plain text")
148
+ content = convert_file_to_text(file_path)
149
+ else:
150
+ logging.info("Reading file content")
151
+ with open(file_path, 'r', encoding='utf-8') as f:
152
+ content = f.read()
153
+
154
+ logging.info(f"File content length: {len(content)} characters")
155
+
156
+ # Process keywords
157
+ if not keywords:
158
+ keywords = "default,rag-file-upload"
159
+ logging.info(f"Keywords: {keywords}")
160
+
161
+ # Add the content to the database and get the media_id
162
+ logging.info("Adding content to database")
163
+ result = add_media_with_keywords(
164
+ url=file_name,
165
+ title=file_name,
166
+ media_type='document',
167
+ content=content,
168
+ keywords=keywords,
169
+ prompt='No prompt for uploaded files',
170
+ summary='No summary for uploaded files',
171
+ transcription_model='None',
172
+ author='Unknown',
173
+ ingestion_date=datetime.now().strftime('%Y-%m-%d')
174
+ )
175
+
176
+ logging.info(f"Result from add_media_with_keywords: {result}")
177
+ if isinstance(result, tuple):
178
+ media_id, _ = result
179
+ else:
180
+ media_id = result
181
+
182
+ context = f"media_id:{media_id}"
183
+ logging.info(f"Context for uploaded file: {context}")
184
+
185
+ logging.info("Calling rag_qa_chat function")
186
+ new_history, response = rag_qa_chat(rephrased_question, history, context, api_choice)
187
+ # Log first 100 chars of response
188
+ logging.info(
189
+ f"Response received from rag_qa_chat: {response[:100]}...")
190
+
191
+ # Add the original question to the history
192
+ new_history[-1] = (message, new_history[-1][1])
193
+
194
  gr.Info("Response generated successfully")
195
+ logging.info("rag_qa_chat_wrapper completed successfully")
196
  yield new_history, "", gr.update(visible=False)
197
  except ValueError as e:
198
+ logging.error(f"Input error in rag_qa_chat_wrapper: {str(e)}")
199
  gr.Error(f"Input error: {str(e)}")
200
  yield history, "", gr.update(visible=False)
201
  except DatabaseError as e:
202
+ logging.error(f"Database error in rag_qa_chat_wrapper: {str(e)}")
203
  gr.Error(f"Database error: {str(e)}")
204
  yield history, "", gr.update(visible=False)
205
  except Exception as e:
206
+ logging.error(f"Unexpected error in rag_qa_chat_wrapper: {e}", exc_info=True)
207
  gr.Error("An unexpected error occurred. Please try again later.")
208
  yield history, "", gr.update(visible=False)
209
 
210
+ def rephrase_question(history, latest_question, api_choice):
211
+ # Thank you https://www.reddit.com/r/LocalLLaMA/comments/1fi1kex/multi_turn_conversation_and_rag/
212
+ conversation_history = "\n".join([f"User: {h[0]}\nAssistant: {h[1]}" for h in history[:-1]])
213
+ prompt = f"""You are a helpful assistant. Given the conversation history and the latest question, resolve any ambiguous references in the latest question.
 
 
 
 
214
 
215
+ Conversation History:
216
+ {conversation_history}
217
+
218
+ Latest Question:
219
+ {latest_question}
220
+
221
+ Rewritten Question:"""
222
+
223
+ # Use the selected API to generate the rephrased question
224
+ rephrased_question = generate_answer(api_choice, prompt, "")
225
+ return rephrased_question.strip()
226
 
227
  def perform_search(query):
228
  try:
 
232
  gr.Error(f"Error performing search: {str(e)}")
233
  return gr.update(choices=[])
234
 
235
+ def clear_chat_history():
236
+ return [], ""
237
 
238
  search_button.click(perform_search, inputs=[search_query], outputs=[search_results])
239
 
240
  submit.click(
241
  rag_qa_chat_wrapper,
242
+ inputs=[msg, chatbot, context_source, existing_file, search_results, file_upload,
243
+ convert_to_text, keywords, api_choice],
244
  outputs=[chatbot, msg, loading_indicator]
245
  )
246
 
247
+ clear_chat.click(clear_chat_history, outputs=[chatbot, msg])
248
+
249
+ return (context_source, existing_file, search_query, search_button, search_results, file_upload,
250
+ convert_to_text, keywords, api_choice, chatbot, msg, submit, clear_chat)
251
+
252
+ def convert_file_to_text(file_path):
253
+ """Convert various file types to plain text."""
254
+ file_extension = os.path.splitext(file_path)[1].lower()
255
+
256
+ if file_extension == '.pdf':
257
+ return extract_text_and_format_from_pdf(file_path)
258
+ elif file_extension == '.epub':
259
+ return read_epub(file_path)
260
+ elif file_extension in ['.json', '.csv']:
261
+ return read_structured_file(file_path)
262
+ elif file_extension == '.docx':
263
+ return docx2txt.process(file_path)
264
+ elif file_extension in ['.txt', '.md', '.rtf']:
265
+ with open(file_path, 'r', encoding='utf-8') as f:
266
+ return f.read()
267
+ else:
268
+ raise ValueError(f"Unsupported file type: {file_extension}")
269
+
270
+ def read_structured_file(file_path):
271
+ """Read and convert JSON or CSV files to text."""
272
+ file_extension = os.path.splitext(file_path)[1].lower()
273
+
274
+ if file_extension == '.json':
275
+ with open(file_path, 'r') as file:
276
+ data = json.load(file)
277
+ return json.dumps(data, indent=2)
278
+
279
+ elif file_extension == '.csv':
280
+ with open(file_path, 'r', newline='') as file:
281
+ csv_reader = csv.reader(file)
282
+ return '\n'.join([','.join(row) for row in csv_reader])
283
+
284
+ else:
285
+ raise ValueError(f"Unsupported file type: {file_extension}")
286
 
287
  #
288
  # End of RAG_QA_Chat_tab.py
App_Function_Libraries/Gradio_UI/Search_Tab.py CHANGED
@@ -10,10 +10,11 @@ import sqlite3
10
  import gradio as gr
11
  #
12
  # Local Imports
13
- from App_Function_Libraries.DB.DB_Manager import view_database, search_and_display_items
14
- from App_Function_Libraries.DB.SQLite_DB import search_prompts
 
15
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_detailed_view
16
- from App_Function_Libraries.Utils.Utils import get_database_path
17
  #
18
  ###################################################################################################
19
  #
@@ -22,30 +23,95 @@ from App_Function_Libraries.Utils.Utils import get_database_path
22
  logger = logging.getLogger()
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def create_search_tab():
26
  with gr.TabItem("Search / Detailed View"):
27
  with gr.Row():
28
- with gr.Column():
29
  gr.Markdown("# Search across all ingested items in the Database")
30
- gr.Markdown(" by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
31
  search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
32
- search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title", label="Search By")
 
33
  search_button = gr.Button("Search")
34
  items_output = gr.Dropdown(label="Select Item", choices=[])
35
  item_mapping = gr.State({})
36
- prompt_summary_output = gr.HTML(label="Prompt & Summary", visible=True)
37
 
38
  search_button.click(
39
  fn=update_dropdown,
40
  inputs=[search_query_input, search_type_input],
41
  outputs=[items_output, item_mapping]
42
  )
43
- with gr.Column():
44
- content_output = gr.Markdown(label="Content", visible=True)
 
 
 
 
45
  items_output.change(
46
- fn=update_detailed_view,
47
  inputs=[items_output, item_mapping],
48
- outputs=[prompt_summary_output, content_output]
 
 
 
 
 
 
49
  )
50
 
51
 
@@ -81,53 +147,6 @@ def display_search_results(query):
81
  return "No results found."
82
 
83
 
84
- def create_viewing_tab():
85
- with gr.TabItem("View Database"):
86
- gr.Markdown("# View Database Entries")
87
- with gr.Row():
88
- with gr.Column():
89
- entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
90
- page_number = gr.Number(value=1, label="Page Number", precision=0)
91
- view_button = gr.Button("View Page")
92
- next_page_button = gr.Button("Next Page")
93
- previous_page_button = gr.Button("Previous Page")
94
- with gr.Column():
95
- results_display = gr.HTML()
96
- pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
97
-
98
- def update_page(page, entries_per_page):
99
- results, pagination, total_pages = view_database(page, entries_per_page)
100
- next_disabled = page >= total_pages
101
- prev_disabled = page <= 1
102
- return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(interactive=not prev_disabled)
103
-
104
- def go_to_next_page(current_page, entries_per_page):
105
- next_page = current_page + 1
106
- return update_page(next_page, entries_per_page)
107
-
108
- def go_to_previous_page(current_page, entries_per_page):
109
- previous_page = max(1, current_page - 1)
110
- return update_page(previous_page, entries_per_page)
111
-
112
- view_button.click(
113
- fn=update_page,
114
- inputs=[page_number, entries_per_page],
115
- outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
116
- )
117
-
118
- next_page_button.click(
119
- fn=go_to_next_page,
120
- inputs=[page_number, entries_per_page],
121
- outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
122
- )
123
-
124
- previous_page_button.click(
125
- fn=go_to_previous_page,
126
- inputs=[page_number, entries_per_page],
127
- outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
128
- )
129
-
130
-
131
  def create_search_summaries_tab():
132
  with gr.TabItem("Search/View Title+Summary "):
133
  gr.Markdown("# Search across all ingested items in the Database and review their summaries")
@@ -185,112 +204,6 @@ def create_search_summaries_tab():
185
  )
186
 
187
 
188
-
189
- def create_prompt_view_tab():
190
- with gr.TabItem("View Prompt Database"):
191
- gr.Markdown("# View Prompt Database Entries")
192
- with gr.Row():
193
- with gr.Column():
194
- entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
195
- page_number = gr.Number(value=1, label="Page Number", precision=0)
196
- view_button = gr.Button("View Page")
197
- next_page_button = gr.Button("Next Page")
198
- previous_page_button = gr.Button("Previous Page")
199
- with gr.Column():
200
- pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
201
- results_display = gr.HTML()
202
-
203
- # FIXME - SQL functions to be moved to DB_Manager
204
- def view_database(page, entries_per_page):
205
- offset = (page - 1) * entries_per_page
206
- try:
207
- with sqlite3.connect(get_database_path('prompts.db')) as conn:
208
- cursor = conn.cursor()
209
- cursor.execute('''
210
- SELECT p.name, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
211
- FROM Prompts p
212
- LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
213
- LEFT JOIN Keywords k ON pk.keyword_id = k.id
214
- GROUP BY p.id
215
- ORDER BY p.name
216
- LIMIT ? OFFSET ?
217
- ''', (entries_per_page, offset))
218
- prompts = cursor.fetchall()
219
-
220
- cursor.execute('SELECT COUNT(*) FROM Prompts')
221
- total_prompts = cursor.fetchone()[0]
222
-
223
- results = ""
224
- for prompt in prompts:
225
- # Escape HTML special characters and replace newlines with <br> tags
226
- title = html.escape(prompt[0]).replace('\n', '<br>')
227
- details = html.escape(prompt[1] or '').replace('\n', '<br>')
228
- system_prompt = html.escape(prompt[2] or '')
229
- user_prompt = html.escape(prompt[3] or '')
230
- keywords = html.escape(prompt[4] or '').replace('\n', '<br>')
231
-
232
- results += f"""
233
- <div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 20px;">
234
- <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;">
235
- <div><strong>Title:</strong> {title}</div>
236
- <div><strong>Details:</strong> {details}</div>
237
- </div>
238
- <div style="margin-top: 10px;">
239
- <strong>User Prompt:</strong>
240
- <pre style="white-space: pre-wrap; word-wrap: break-word;">{user_prompt}</pre>
241
- </div>
242
- <div style="margin-top: 10px;">
243
- <strong>System Prompt:</strong>
244
- <pre style="white-space: pre-wrap; word-wrap: break-word;">{system_prompt}</pre>
245
- </div>
246
- <div style="margin-top: 10px;">
247
- <strong>Keywords:</strong> {keywords}
248
- </div>
249
- </div>
250
- """
251
-
252
- total_pages = (total_prompts + entries_per_page - 1) // entries_per_page
253
- pagination = f"Page {page} of {total_pages} (Total prompts: {total_prompts})"
254
-
255
- return results, pagination, total_pages
256
- except sqlite3.Error as e:
257
- return f"<p>Error fetching prompts: {e}</p>", "Error", 0
258
-
259
- def update_page(page, entries_per_page):
260
- results, pagination, total_pages = view_database(page, entries_per_page)
261
- next_disabled = page >= total_pages
262
- prev_disabled = page <= 1
263
- return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(
264
- interactive=not prev_disabled)
265
-
266
- def go_to_next_page(current_page, entries_per_page):
267
- next_page = current_page + 1
268
- return update_page(next_page, entries_per_page)
269
-
270
- def go_to_previous_page(current_page, entries_per_page):
271
- previous_page = max(1, current_page - 1)
272
- return update_page(previous_page, entries_per_page)
273
-
274
- view_button.click(
275
- fn=update_page,
276
- inputs=[page_number, entries_per_page],
277
- outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
278
- )
279
-
280
- next_page_button.click(
281
- fn=go_to_next_page,
282
- inputs=[page_number, entries_per_page],
283
- outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
284
- )
285
-
286
- previous_page_button.click(
287
- fn=go_to_previous_page,
288
- inputs=[page_number, entries_per_page],
289
- outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
290
- )
291
-
292
-
293
-
294
  def create_prompt_search_tab():
295
  with gr.TabItem("Search Prompts"):
296
  gr.Markdown("# Search and View Prompt Details")
@@ -402,3 +315,7 @@ def create_prompt_search_tab():
402
  inputs=[search_query_input, page_number, entries_per_page],
403
  outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
404
  )
 
 
 
 
 
10
  import gradio as gr
11
  #
12
  # Local Imports
13
+ from App_Function_Libraries.DB.DB_Manager import view_database, search_and_display_items, get_all_document_versions, \
14
+ fetch_item_details_single, fetch_paginated_data, fetch_item_details, get_latest_transcription
15
+ from App_Function_Libraries.DB.SQLite_DB import search_prompts, get_document_version
16
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_detailed_view
17
+ from App_Function_Libraries.Utils.Utils import get_database_path, format_text_with_line_breaks
18
  #
19
  ###################################################################################################
20
  #
 
23
  logger = logging.getLogger()
24
 
25
 
26
+ def update_detailed_view_with_versions(selected_item, item_mapping):
27
+ if selected_item and item_mapping and selected_item in item_mapping:
28
+ media_id = item_mapping[selected_item]
29
+ prompt, summary, transcription = fetch_item_details(media_id)
30
+
31
+ # Fetch all versions for the media item
32
+ versions = get_all_document_versions(media_id)
33
+ version_choices = [f"Version {v['version_number']} ({v['created_at']})" for v in versions]
34
+
35
+ summary_html = format_as_html(summary, "Summary")
36
+ transcription_html = format_as_html(transcription, "Transcription")
37
+
38
+ return prompt, summary_html, transcription_html, gr.update(choices=version_choices, visible=True)
39
+ return "", "", "", gr.update(choices=[], visible=False)
40
+
41
+
42
+ def extract_prompt_and_summary(content: str):
43
+ # Implement this function based on how prompt and summary are stored in your DocumentVersions content
44
+ # This is a placeholder implementation
45
+ parts = content.split('\n\n', 2)
46
+ prompt = parts[0] if len(parts) > 0 else "No prompt available."
47
+ summary = parts[1] if len(parts) > 1 else "No summary available."
48
+ return prompt, summary
49
+
50
+
51
+ def update_content_for_version(selected_item, item_mapping, selected_version):
52
+ if selected_item and item_mapping and selected_item in item_mapping:
53
+ media_id = item_mapping[selected_item]
54
+ version_number = int(selected_version.split()[1].split('(')[0])
55
+
56
+ version_data = get_document_version(media_id, version_number)
57
+ if 'error' not in version_data:
58
+ content = version_data['content']
59
+ prompt, summary = extract_prompt_and_summary(content)
60
+ transcription = get_latest_transcription(media_id)
61
+
62
+ summary_html = format_as_html(summary, "Summary")
63
+ transcription_html = format_as_html(transcription, "Transcription")
64
+
65
+ return prompt, summary_html, transcription_html
66
+ return "", "", ""
67
+
68
+ def format_as_html(content, title):
69
+ escaped_content = html.escape(content)
70
+ formatted_content = escaped_content.replace('\n', '<br>')
71
+ return f"""
72
+ <div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 10px;">
73
+ <h3>{title}</h3>
74
+ <div style="max-height: 300px; overflow-y: auto;">
75
+ {formatted_content}
76
+ </div>
77
+ </div>
78
+ """
79
+
80
  def create_search_tab():
81
  with gr.TabItem("Search / Detailed View"):
82
  with gr.Row():
83
+ with gr.Column(scale=1):
84
  gr.Markdown("# Search across all ingested items in the Database")
85
+ gr.Markdown("by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
86
  search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
87
+ search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title",
88
+ label="Search By")
89
  search_button = gr.Button("Search")
90
  items_output = gr.Dropdown(label="Select Item", choices=[])
91
  item_mapping = gr.State({})
92
+ version_dropdown = gr.Dropdown(label="Select Version", choices=[], visible=False)
93
 
94
  search_button.click(
95
  fn=update_dropdown,
96
  inputs=[search_query_input, search_type_input],
97
  outputs=[items_output, item_mapping]
98
  )
99
+
100
+ with gr.Column(scale=2):
101
+ prompt_output = gr.Textbox(label="Prompt Used", visible=True)
102
+ summary_output = gr.Markdown(label="Summary", visible=True)
103
+ transcription_output = gr.Markdown(label="Transcription", visible=True)
104
+
105
  items_output.change(
106
+ fn=update_detailed_view_with_versions,
107
  inputs=[items_output, item_mapping],
108
+ outputs=[prompt_output, summary_output, transcription_output, version_dropdown]
109
+ )
110
+
111
+ version_dropdown.change(
112
+ fn=update_content_for_version,
113
+ inputs=[items_output, item_mapping, version_dropdown],
114
+ outputs=[prompt_output, summary_output, transcription_output]
115
  )
116
 
117
 
 
147
  return "No results found."
148
 
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def create_search_summaries_tab():
151
  with gr.TabItem("Search/View Title+Summary "):
152
  gr.Markdown("# Search across all ingested items in the Database and review their summaries")
 
204
  )
205
 
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  def create_prompt_search_tab():
208
  with gr.TabItem("Search Prompts"):
209
  gr.Markdown("# Search and View Prompt Details")
 
315
  inputs=[search_query_input, page_number, entries_per_page],
316
  outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
317
  )
318
+
319
+
320
+
321
+
App_Function_Libraries/Gradio_UI/Video_transcription_tab.py CHANGED
@@ -5,13 +5,16 @@
5
  import json
6
  import logging
7
  import os
 
 
8
  #
9
  # External Imports
10
  import gradio as gr
11
  import yt_dlp
12
  #
13
  # Local Imports
14
- from App_Function_Libraries.DB.DB_Manager import load_preset_prompts, add_media_to_database
 
15
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
16
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import error_handler
17
  from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_transcription, perform_summarization, \
@@ -26,7 +29,7 @@ from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
26
  # Functions:
27
 
28
  def create_video_transcription_tab():
29
- with (gr.TabItem("Video Transcription + Summarization")):
30
  gr.Markdown("# Transcribe & Summarize Videos from URLs")
31
  with gr.Row():
32
  gr.Markdown("""Follow this project at [tldw - GitHub](https://github.com/rmusser01/tldw)""")
@@ -124,6 +127,7 @@ def create_video_transcription_tab():
124
  use_cookies_input = gr.Checkbox(label="Use cookies for authenticated download", value=False)
125
  use_time_input = gr.Checkbox(label="Use Start and End Time", value=False)
126
  confab_checkbox = gr.Checkbox(label="Perform Confabulation Check of Summary", value=False)
 
127
  with gr.Row(visible=False) as time_input_box:
128
  gr.Markdown("### Start and End time")
129
  with gr.Column():
@@ -187,9 +191,10 @@ def create_video_transcription_tab():
187
  chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
188
  use_multi_level_chunking, chunk_language, api_name,
189
  api_key, keywords, use_cookies, cookies, batch_size,
190
- timestamp_option, keep_original_video, summarize_recursively,
191
  progress: gr.Progress = gr.Progress()) -> tuple:
192
  try:
 
193
  logging.info("Entering process_videos_with_error_handling")
194
  logging.info(f"Received inputs: {inputs}")
195
 
@@ -292,6 +297,7 @@ def create_video_transcription_tab():
292
  """)
293
 
294
  logging.debug("Gradio_Related.py: process_url_with_metadata being called")
 
295
  result = process_url_with_metadata(
296
  input_item, 2, whisper_model,
297
  custom_prompt,
@@ -304,6 +310,7 @@ def create_video_transcription_tab():
304
  chunk_options=chunk_options,
305
  keep_original_video=keep_original_video,
306
  current_whisper_model=whisper_model,
 
307
  )
308
 
309
  if result[0] is None:
@@ -424,7 +431,7 @@ def create_video_transcription_tab():
424
  chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
425
  use_multi_level_chunking, chunk_language, summarize_recursively, api_name,
426
  api_key, keywords, use_cookies, cookies, batch_size,
427
- timestamp_option, keep_original_video, confab_checkbox):
428
  global result
429
  try:
430
  logging.info("process_videos_wrapper(): process_videos_wrapper called")
@@ -459,7 +466,7 @@ def create_video_transcription_tab():
459
  chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
460
  use_multi_level_chunking, chunk_language, api_name,
461
  api_key, keywords, use_cookies, cookies, batch_size,
462
- timestamp_option, keep_original_video, summarize_recursively
463
  )
464
 
465
  confabulation_result = None
@@ -496,7 +503,7 @@ def create_video_transcription_tab():
496
  rolling_summarization,
497
  detail_level, question_box, keywords, local_file_path, diarize, end_time=None,
498
  include_timestamps=True, metadata=None, use_chunking=False,
499
- chunk_options=None, keep_original_video=False, current_whisper_model="Blank"):
500
 
501
  try:
502
  logging.info(f"Starting process_url_metadata for URL: {input_item}")
@@ -559,27 +566,69 @@ def create_video_transcription_tab():
559
  logging.error("Failed to extract video information")
560
  return None, None, None, None, None, None
561
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
  # Download video/audio
563
  logging.info("Downloading video/audio...")
564
  video_file_path = download_video(input_item, download_path, full_info, download_video_flag,
565
  current_whisper_model=current_whisper_model)
566
  if video_file_path is None:
567
  logging.info(
568
- f"Download skipped for {input_item}. Media might already exist or be processed.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
  return input_item, None, None, None, None, info_dict
 
 
 
 
570
 
571
- logging.info(f"Processing file: {video_file_path}")
572
 
573
  # Perform transcription
574
- logging.info("Starting transcription...")
575
  audio_file_path, segments = perform_transcription(video_file_path, offset, whisper_model,
576
  vad_filter, diarize)
577
 
578
  if audio_file_path is None or segments is None:
579
- logging.error("Transcription failed or segments not available.")
580
  return None, None, None, None, None, None
581
 
582
- logging.info(f"Transcription completed. Number of segments: {len(segments)}")
583
 
584
  # Add metadata to segments
585
  segments_with_metadata = {
@@ -598,9 +647,9 @@ def create_video_transcription_tab():
598
  if file_path and os.path.exists(file_path):
599
  try:
600
  os.remove(file_path)
601
- logging.info(f"Successfully deleted file: {file_path}")
602
  except Exception as e:
603
- logging.warning(f"Failed to delete file {file_path}: {str(e)}")
604
 
605
  # Delete the mp4 file after successful transcription if not keeping original audio
606
  # Modify the file deletion logic to respect keep_original_video
@@ -610,12 +659,12 @@ def create_video_transcription_tab():
610
  if file_path and os.path.exists(file_path):
611
  try:
612
  os.remove(file_path)
613
- logging.info(f"Successfully deleted file: {file_path}")
614
  except Exception as e:
615
- logging.warning(f"Failed to delete file {file_path}: {str(e)}")
616
  else:
617
- logging.info(f"Keeping original video file: {video_file_path}")
618
- logging.info(f"Keeping original audio file: {audio_file_path}")
619
 
620
  # Process segments based on the timestamp option
621
  if not include_timestamps:
@@ -627,34 +676,34 @@ def create_video_transcription_tab():
627
  transcription_text = extract_text_from_segments(segments)
628
 
629
  if transcription_text.startswith("Error:"):
630
- logging.error(f"Failed to extract transcription: {transcription_text}")
631
  return None, None, None, None, None, None
632
 
633
  # Use transcription_text instead of segments for further processing
634
  full_text_with_metadata = f"{json.dumps(info_dict, indent=2)}\n\n{transcription_text}"
635
 
636
- logging.debug(f"Full text with metadata extracted: {full_text_with_metadata[:100]}...")
637
 
638
  # Perform summarization if API is provided
639
  summary_text = None
640
  if api_name:
641
  # API key resolution handled at base of function if none provided
642
  api_key = api_key if api_key else None
643
- logging.info(f"Starting summarization with {api_name}...")
644
  summary_text = perform_summarization(api_name, full_text_with_metadata, custom_prompt, api_key)
645
  if summary_text is None:
646
  logging.error("Summarization failed.")
647
  return None, None, None, None, None, None
648
- logging.debug(f"Summarization completed: {summary_text[:100]}...")
649
 
650
  # Save transcription and summary
651
- logging.info("Saving transcription and summary...")
652
  download_path = create_download_directory("Audio_Processing")
653
  json_file_path, summary_file_path = save_transcription_and_summary(full_text_with_metadata,
654
  summary_text,
655
  download_path, info_dict)
656
- logging.info(f"Transcription saved to: {json_file_path}")
657
- logging.info(f"Summary saved to: {summary_file_path}")
658
 
659
  # Prepare keywords for database
660
  if isinstance(keywords, str):
@@ -663,13 +712,22 @@ def create_video_transcription_tab():
663
  keywords_list = keywords
664
  else:
665
  keywords_list = []
666
- logging.info(f"Keywords prepared: {keywords_list}")
 
 
667
 
668
- # Add to database
669
- logging.info("Adding to database...")
670
- add_media_to_database(info_dict['webpage_url'], info_dict, full_text_with_metadata, summary_text,
671
- keywords_list, custom_prompt, whisper_model)
672
- logging.info(f"Media added to database: {info_dict['webpage_url']}")
 
 
 
 
 
 
 
673
 
674
  return info_dict[
675
  'webpage_url'], full_text_with_metadata, summary_text, json_file_path, summary_file_path, info_dict
@@ -694,7 +752,7 @@ def create_video_transcription_tab():
694
  chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
695
  use_multi_level_chunking, chunk_language, summarize_recursively, api_name_input, api_key_input,
696
  keywords_input, use_cookies_input, cookies_input, batch_size_input,
697
- timestamp_option, keep_original_video, confab_checkbox
698
  ],
699
  outputs=[progress_output, error_output, results_output, download_transcription, download_summary, confabulation_output]
700
  )
 
5
  import json
6
  import logging
7
  import os
8
+ from typing import Dict, Any
9
+
10
  #
11
  # External Imports
12
  import gradio as gr
13
  import yt_dlp
14
  #
15
  # Local Imports
16
+ from App_Function_Libraries.DB.DB_Manager import load_preset_prompts, add_media_to_database, \
17
+ check_media_and_whisper_model, check_existing_media, update_media_content_with_version
18
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
19
  from App_Function_Libraries.Gradio_UI.Gradio_Shared import error_handler
20
  from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_transcription, perform_summarization, \
 
29
  # Functions:
30
 
31
  def create_video_transcription_tab():
32
+ with ((gr.TabItem("Video Transcription + Summarization"))):
33
  gr.Markdown("# Transcribe & Summarize Videos from URLs")
34
  with gr.Row():
35
  gr.Markdown("""Follow this project at [tldw - GitHub](https://github.com/rmusser01/tldw)""")
 
127
  use_cookies_input = gr.Checkbox(label="Use cookies for authenticated download", value=False)
128
  use_time_input = gr.Checkbox(label="Use Start and End Time", value=False)
129
  confab_checkbox = gr.Checkbox(label="Perform Confabulation Check of Summary", value=False)
130
+ overwrite_checkbox = gr.Checkbox(label="Overwrite Existing Media", value=False)
131
  with gr.Row(visible=False) as time_input_box:
132
  gr.Markdown("### Start and End time")
133
  with gr.Column():
 
191
  chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
192
  use_multi_level_chunking, chunk_language, api_name,
193
  api_key, keywords, use_cookies, cookies, batch_size,
194
+ timestamp_option, keep_original_video, summarize_recursively, overwrite_existing=False,
195
  progress: gr.Progress = gr.Progress()) -> tuple:
196
  try:
197
+ # FIXME - summarize_recursively is not being used...
198
  logging.info("Entering process_videos_with_error_handling")
199
  logging.info(f"Received inputs: {inputs}")
200
 
 
297
  """)
298
 
299
  logging.debug("Gradio_Related.py: process_url_with_metadata being called")
300
+ # FIXME - Would assume this is where the multi-processing for recursive summarization would occur
301
  result = process_url_with_metadata(
302
  input_item, 2, whisper_model,
303
  custom_prompt,
 
310
  chunk_options=chunk_options,
311
  keep_original_video=keep_original_video,
312
  current_whisper_model=whisper_model,
313
+ overwrite_existing=overwrite_existing
314
  )
315
 
316
  if result[0] is None:
 
431
  chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
432
  use_multi_level_chunking, chunk_language, summarize_recursively, api_name,
433
  api_key, keywords, use_cookies, cookies, batch_size,
434
+ timestamp_option, keep_original_video, confab_checkbox, overwrite_existing=False):
435
  global result
436
  try:
437
  logging.info("process_videos_wrapper(): process_videos_wrapper called")
 
466
  chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
467
  use_multi_level_chunking, chunk_language, api_name,
468
  api_key, keywords, use_cookies, cookies, batch_size,
469
+ timestamp_option, keep_original_video, summarize_recursively, overwrite_existing
470
  )
471
 
472
  confabulation_result = None
 
503
  rolling_summarization,
504
  detail_level, question_box, keywords, local_file_path, diarize, end_time=None,
505
  include_timestamps=True, metadata=None, use_chunking=False,
506
+ chunk_options=None, keep_original_video=False, current_whisper_model="Blank", overwrite_existing=False):
507
 
508
  try:
509
  logging.info(f"Starting process_url_metadata for URL: {input_item}")
 
566
  logging.error("Failed to extract video information")
567
  return None, None, None, None, None, None
568
 
569
+ # FIXME - MAKE SURE THIS WORKS WITH LOCAL FILES
570
+ # FIXME - Add a toggle to force processing even if media exists
571
+ # Check if media already exists in the database
572
+ logging.info("Checking if media already exists in the database...")
573
+ media_exists, reason = check_media_and_whisper_model(
574
+ title=info_dict.get('title'),
575
+ url=info_dict.get('webpage_url'),
576
+ current_whisper_model=current_whisper_model
577
+ )
578
+
579
+ if not media_exists:
580
+ logging.info(
581
+ f"process_url_with_metadata: Media does not exist in the database. Reason: {reason}")
582
+ else:
583
+ if "same whisper model" in reason:
584
+ logging.info(
585
+ f"process_url_with_metadata: Skipping download and processing as media exists and uses the same Whisper model. Reason: {reason}")
586
+ return input_item, None, None, None, None, info_dict
587
+ else:
588
+ logging.info(
589
+ f"process_url_with_metadata: Media found, but with a different Whisper model. Reason: {reason}")
590
+
591
  # Download video/audio
592
  logging.info("Downloading video/audio...")
593
  video_file_path = download_video(input_item, download_path, full_info, download_video_flag,
594
  current_whisper_model=current_whisper_model)
595
  if video_file_path is None:
596
  logging.info(
597
+ f"process_url_with_metadata: Download skipped for {input_item}. Media might already exist or be processed.")
598
+ return input_item, None, None, None, None, info_dict
599
+
600
+ # FIXME - add check for existing media with different whisper model for local files
601
+ # FIXME Check to make sure this works
602
+ media_exists, reason = check_media_and_whisper_model(
603
+ title=info_dict.get('title'),
604
+ url=info_dict.get('webpage_url'),
605
+ current_whisper_model=current_whisper_model
606
+ )
607
+ if not media_exists:
608
+ logging.info(
609
+ f"process_url_with_metadata: Media does not exist in the database. Reason: {reason}")
610
+ else:
611
+ if "same whisper model" in reason:
612
+ logging.info(
613
+ f"process_url_with_metadata: Skipping download and processing as media exists and uses the same Whisper model. Reason: {reason}")
614
  return input_item, None, None, None, None, info_dict
615
+ else:
616
+ same_whisper_model = True
617
+ logging.info(
618
+ f"process_url_with_metadata: Media found, but with a different Whisper model. Reason: {reason}")
619
 
620
+ logging.info(f"process_url_with_metadata: Processing file: {video_file_path}")
621
 
622
  # Perform transcription
623
+ logging.info("process_url_with_metadata: Starting transcription...")
624
  audio_file_path, segments = perform_transcription(video_file_path, offset, whisper_model,
625
  vad_filter, diarize)
626
 
627
  if audio_file_path is None or segments is None:
628
+ logging.error("process_url_with_metadata: Transcription failed or segments not available.")
629
  return None, None, None, None, None, None
630
 
631
+ logging.info(f"process_url_with_metadata: Transcription completed. Number of segments: {len(segments)}")
632
 
633
  # Add metadata to segments
634
  segments_with_metadata = {
 
647
  if file_path and os.path.exists(file_path):
648
  try:
649
  os.remove(file_path)
650
+ logging.info(f"process_url_with_metadata: Successfully deleted file: {file_path}")
651
  except Exception as e:
652
+ logging.warning(f"process_url_with_metadata: Failed to delete file {file_path}: {str(e)}")
653
 
654
  # Delete the mp4 file after successful transcription if not keeping original audio
655
  # Modify the file deletion logic to respect keep_original_video
 
659
  if file_path and os.path.exists(file_path):
660
  try:
661
  os.remove(file_path)
662
+ logging.info(f"process_url_with_metadata: Successfully deleted file: {file_path}")
663
  except Exception as e:
664
+ logging.warning(f"process_url_with_metadata: Failed to delete file {file_path}: {str(e)}")
665
  else:
666
+ logging.info(f"process_url_with_metadata: Keeping original video file: {video_file_path}")
667
+ logging.info(f"process_url_with_metadata: Keeping original audio file: {audio_file_path}")
668
 
669
  # Process segments based on the timestamp option
670
  if not include_timestamps:
 
676
  transcription_text = extract_text_from_segments(segments)
677
 
678
  if transcription_text.startswith("Error:"):
679
+ logging.error(f"process_url_with_metadata: Failed to extract transcription: {transcription_text}")
680
  return None, None, None, None, None, None
681
 
682
  # Use transcription_text instead of segments for further processing
683
  full_text_with_metadata = f"{json.dumps(info_dict, indent=2)}\n\n{transcription_text}"
684
 
685
+ logging.debug(f"process_url_with_metadata: Full text with metadata extracted: {full_text_with_metadata[:100]}...")
686
 
687
  # Perform summarization if API is provided
688
  summary_text = None
689
  if api_name:
690
  # API key resolution handled at base of function if none provided
691
  api_key = api_key if api_key else None
692
+ logging.info(f"process_url_with_metadata: Starting summarization with {api_name}...")
693
  summary_text = perform_summarization(api_name, full_text_with_metadata, custom_prompt, api_key)
694
  if summary_text is None:
695
  logging.error("Summarization failed.")
696
  return None, None, None, None, None, None
697
+ logging.debug(f"process_url_with_metadata: Summarization completed: {summary_text[:100]}...")
698
 
699
  # Save transcription and summary
700
+ logging.info("process_url_with_metadata: Saving transcription and summary...")
701
  download_path = create_download_directory("Audio_Processing")
702
  json_file_path, summary_file_path = save_transcription_and_summary(full_text_with_metadata,
703
  summary_text,
704
  download_path, info_dict)
705
+ logging.info(f"process_url_with_metadata: Transcription saved to: {json_file_path}")
706
+ logging.info(f"process_url_with_metadata: Summary saved to: {summary_file_path}")
707
 
708
  # Prepare keywords for database
709
  if isinstance(keywords, str):
 
712
  keywords_list = keywords
713
  else:
714
  keywords_list = []
715
+ logging.info(f"process_url_with_metadata: Keywords prepared: {keywords_list}")
716
+
717
+ existing_media = check_existing_media(info_dict['webpage_url'])
718
 
719
+ if existing_media:
720
+ # Update existing media with new version
721
+ media_id = existing_media['id']
722
+ update_result = update_media_content_with_version(media_id, info_dict, full_text_with_metadata,
723
+ custom_prompt, summary_text, whisper_model)
724
+ logging.info(f"process_url_with_metadata: {update_result}")
725
+ else:
726
+ # Add new media to database
727
+ add_result = add_media_to_database(info_dict['webpage_url'], info_dict, full_text_with_metadata,
728
+ summary_text,
729
+ keywords_list, custom_prompt, whisper_model)
730
+ logging.info(f"process_url_with_metadata: {add_result}")
731
 
732
  return info_dict[
733
  'webpage_url'], full_text_with_metadata, summary_text, json_file_path, summary_file_path, info_dict
 
752
  chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
753
  use_multi_level_chunking, chunk_language, summarize_recursively, api_name_input, api_key_input,
754
  keywords_input, use_cookies_input, cookies_input, batch_size_input,
755
+ timestamp_option, keep_original_video, confab_checkbox, overwrite_checkbox
756
  ],
757
  outputs=[progress_output, error_output, results_output, download_transcription, download_summary, confabulation_output]
758
  )
App_Function_Libraries/Gradio_UI/View_DB_Items_tab.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # View_DB_Items_tab.py
2
+ # Description: This file contains the code for the search tab in the Gradio UI
3
+ #
4
+ # Imports
5
+ import html
6
+ import sqlite3
7
+ #
8
+ # External Imports
9
+ import gradio as gr
10
+ #
11
+ # Local Imports
12
+ from App_Function_Libraries.DB.DB_Manager import view_database, get_all_document_versions, \
13
+ fetch_item_details_single, fetch_paginated_data
14
+ from App_Function_Libraries.DB.SQLite_DB import get_document_version
15
+ from App_Function_Libraries.Utils.Utils import get_database_path, format_text_with_line_breaks
16
+ #
17
+ #
18
+ ####################################################################################################
19
+ #
20
+ # Functions
21
+
22
+ def create_prompt_view_tab():
23
+ with gr.TabItem("View Prompt Database"):
24
+ gr.Markdown("# View Prompt Database Entries")
25
+ with gr.Row():
26
+ with gr.Column():
27
+ entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
28
+ page_number = gr.Number(value=1, label="Page Number", precision=0)
29
+ view_button = gr.Button("View Page")
30
+ next_page_button = gr.Button("Next Page")
31
+ previous_page_button = gr.Button("Previous Page")
32
+ pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
33
+ with gr.Column():
34
+ results_display = gr.HTML()
35
+
36
+ # FIXME - SQL functions to be moved to DB_Manager
37
+
38
+ def view_database(page, entries_per_page):
39
+ offset = (page - 1) * entries_per_page
40
+ try:
41
+ with sqlite3.connect(get_database_path('prompts.db')) as conn:
42
+ cursor = conn.cursor()
43
+ cursor.execute('''
44
+ SELECT p.name, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
45
+ FROM Prompts p
46
+ LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
47
+ LEFT JOIN Keywords k ON pk.keyword_id = k.id
48
+ GROUP BY p.id
49
+ ORDER BY p.name
50
+ LIMIT ? OFFSET ?
51
+ ''', (entries_per_page, offset))
52
+ prompts = cursor.fetchall()
53
+
54
+ cursor.execute('SELECT COUNT(*) FROM Prompts')
55
+ total_prompts = cursor.fetchone()[0]
56
+
57
+ results = ""
58
+ for prompt in prompts:
59
+ # Escape HTML special characters and replace newlines with <br> tags
60
+ title = html.escape(prompt[0]).replace('\n', '<br>')
61
+ details = html.escape(prompt[1] or '').replace('\n', '<br>')
62
+ system_prompt = html.escape(prompt[2] or '')
63
+ user_prompt = html.escape(prompt[3] or '')
64
+ keywords = html.escape(prompt[4] or '').replace('\n', '<br>')
65
+
66
+ results += f"""
67
+ <div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 20px;">
68
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;">
69
+ <div><strong>Title:</strong> {title}</div>
70
+ <div><strong>Details:</strong> {details}</div>
71
+ </div>
72
+ <div style="margin-top: 10px;">
73
+ <strong>User Prompt:</strong>
74
+ <pre style="white-space: pre-wrap; word-wrap: break-word;">{user_prompt}</pre>
75
+ </div>
76
+ <div style="margin-top: 10px;">
77
+ <strong>System Prompt:</strong>
78
+ <pre style="white-space: pre-wrap; word-wrap: break-word;">{system_prompt}</pre>
79
+ </div>
80
+ <div style="margin-top: 10px;">
81
+ <strong>Keywords:</strong> {keywords}
82
+ </div>
83
+ </div>
84
+ """
85
+
86
+ total_pages = (total_prompts + entries_per_page - 1) // entries_per_page
87
+ pagination = f"Page {page} of {total_pages} (Total prompts: {total_prompts})"
88
+
89
+ return results, pagination, total_pages
90
+ except sqlite3.Error as e:
91
+ return f"<p>Error fetching prompts: {e}</p>", "Error", 0
92
+
93
+ def update_page(page, entries_per_page):
94
+ results, pagination, total_pages = view_database(page, entries_per_page)
95
+ next_disabled = page >= total_pages
96
+ prev_disabled = page <= 1
97
+ return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(
98
+ interactive=not prev_disabled)
99
+
100
+ def go_to_next_page(current_page, entries_per_page):
101
+ next_page = current_page + 1
102
+ return update_page(next_page, entries_per_page)
103
+
104
+ def go_to_previous_page(current_page, entries_per_page):
105
+ previous_page = max(1, current_page - 1)
106
+ return update_page(previous_page, entries_per_page)
107
+
108
+ view_button.click(
109
+ fn=update_page,
110
+ inputs=[page_number, entries_per_page],
111
+ outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
112
+ )
113
+
114
+ next_page_button.click(
115
+ fn=go_to_next_page,
116
+ inputs=[page_number, entries_per_page],
117
+ outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
118
+ )
119
+
120
+ previous_page_button.click(
121
+ fn=go_to_previous_page,
122
+ inputs=[page_number, entries_per_page],
123
+ outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
124
+ )
125
+
126
+
127
+ def create_view_all_with_versions_tab():
128
+ with gr.TabItem("View All Items"):
129
+ gr.Markdown("# View All Database Entries with Version Selection")
130
+ with gr.Row():
131
+ with gr.Column(scale=1):
132
+ entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
133
+ page_number = gr.Number(value=1, label="Page Number", precision=0)
134
+ view_button = gr.Button("View Page")
135
+ next_page_button = gr.Button("Next Page")
136
+ previous_page_button = gr.Button("Previous Page")
137
+ with gr.Column(scale=2):
138
+ items_output = gr.Dropdown(label="Select Item to View Details", choices=[])
139
+ version_dropdown = gr.Dropdown(label="Select Version", choices=[], visible=False)
140
+ with gr.Row():
141
+ with gr.Column(scale=1):
142
+ pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
143
+ with gr.Column(scale=2):
144
+ details_display = gr.HTML(label="Item Details")
145
+
146
+ item_mapping = gr.State({})
147
+
148
+ def update_page(page, entries_per_page):
149
+ results, total_entries = fetch_paginated_data(page, entries_per_page)
150
+ total_pages = (total_entries + entries_per_page - 1) // entries_per_page
151
+ pagination = f"Page {page} of {total_pages} (Total items: {total_entries})"
152
+
153
+ choices = [f"{item[1]} (ID: {item[0]})" for item in results]
154
+ new_item_mapping = {f"{item[1]} (ID: {item[0]})": item[0] for item in results}
155
+
156
+ next_disabled = page >= total_pages
157
+ prev_disabled = page <= 1
158
+
159
+ return (gr.update(choices=choices, value=None),
160
+ pagination,
161
+ page,
162
+ gr.update(interactive=not next_disabled),
163
+ gr.update(interactive=not prev_disabled),
164
+ gr.update(visible=False, choices=[]),
165
+ "",
166
+ new_item_mapping)
167
+
168
+ def display_item_details(selected_item, item_mapping):
169
+ if selected_item and item_mapping:
170
+ media_id = item_mapping[selected_item]
171
+ prompt, summary, content = fetch_item_details_single(media_id)
172
+ versions = get_all_document_versions(media_id)
173
+ version_choices = [f"Version {v['version_number']} ({v['created_at']})" for v in versions]
174
+
175
+ formatted_prompt = format_text_with_line_breaks(prompt)
176
+ formatted_summary = format_text_with_line_breaks(summary)
177
+ formatted_content = format_text_with_line_breaks(content[:500])
178
+
179
+ details_html = f"""
180
+ <h3>{selected_item}</h3>
181
+ <strong>Prompt:</strong><br>{formatted_prompt}<br><br>
182
+ <strong>Summary:</strong><br>{formatted_summary}<br><br>
183
+ <strong>Content (first 500 characters):</strong><br>{formatted_content}...
184
+ """
185
+
186
+ return (
187
+ gr.update(visible=True, choices=version_choices, value=version_choices[0] if version_choices else None),
188
+ details_html)
189
+ return gr.update(visible=False, choices=[]), ""
190
+
191
+ def update_version_content(selected_item, item_mapping, selected_version):
192
+ if selected_item and item_mapping and selected_version:
193
+ media_id = item_mapping[selected_item]
194
+ version_number = int(selected_version.split()[1].split('(')[0])
195
+ version_data = get_document_version(media_id, version_number)
196
+
197
+ if 'error' not in version_data:
198
+ formatted_content = format_text_with_line_breaks(version_data['content'])
199
+ details_html = f"""
200
+ <h3>{selected_item}</h3>
201
+ <strong>Version:</strong> {version_number}<br>
202
+ <strong>Created at:</strong> {version_data['created_at']}<br><br>
203
+ <strong>Content:</strong><br>{formatted_content}
204
+ """
205
+ return details_html
206
+ return ""
207
+
208
+ view_button.click(
209
+ fn=update_page,
210
+ inputs=[page_number, entries_per_page],
211
+ outputs=[items_output, pagination_info, page_number, next_page_button, previous_page_button,
212
+ version_dropdown, details_display, item_mapping]
213
+ )
214
+
215
+ next_page_button.click(
216
+ fn=lambda page, entries: update_page(page + 1, entries),
217
+ inputs=[page_number, entries_per_page],
218
+ outputs=[items_output, pagination_info, page_number, next_page_button, previous_page_button,
219
+ version_dropdown, details_display, item_mapping]
220
+ )
221
+
222
+ previous_page_button.click(
223
+ fn=lambda page, entries: update_page(max(1, page - 1), entries),
224
+ inputs=[page_number, entries_per_page],
225
+ outputs=[items_output, pagination_info, page_number, next_page_button, previous_page_button,
226
+ version_dropdown, details_display, item_mapping]
227
+ )
228
+
229
+ items_output.change(
230
+ fn=display_item_details,
231
+ inputs=[items_output, item_mapping],
232
+ outputs=[version_dropdown, details_display]
233
+ )
234
+
235
+ version_dropdown.change(
236
+ fn=update_version_content,
237
+ inputs=[items_output, item_mapping, version_dropdown],
238
+ outputs=[details_display]
239
+ )
240
+
241
+
242
+ def create_viewing_tab():
243
+ with gr.TabItem("View Database Entries"):
244
+ gr.Markdown("# View Database Entries")
245
+ with gr.Row():
246
+ with gr.Column():
247
+ entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
248
+ page_number = gr.Number(value=1, label="Page Number", precision=0)
249
+ view_button = gr.Button("View Page")
250
+ next_page_button = gr.Button("Next Page")
251
+ previous_page_button = gr.Button("Previous Page")
252
+ pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
253
+ with gr.Column():
254
+ results_display = gr.HTML()
255
+
256
+
257
+ def update_page(page, entries_per_page):
258
+ results, pagination, total_pages = view_database(page, entries_per_page)
259
+ next_disabled = page >= total_pages
260
+ prev_disabled = page <= 1
261
+ return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(interactive=not prev_disabled)
262
+
263
+ def go_to_next_page(current_page, entries_per_page):
264
+ next_page = current_page + 1
265
+ return update_page(next_page, entries_per_page)
266
+
267
+ def go_to_previous_page(current_page, entries_per_page):
268
+ previous_page = max(1, current_page - 1)
269
+ return update_page(previous_page, entries_per_page)
270
+
271
+ view_button.click(
272
+ fn=update_page,
273
+ inputs=[page_number, entries_per_page],
274
+ outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
275
+ )
276
+
277
+ next_page_button.click(
278
+ fn=go_to_next_page,
279
+ inputs=[page_number, entries_per_page],
280
+ outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
281
+ )
282
+
283
+ previous_page_button.click(
284
+ fn=go_to_previous_page,
285
+ inputs=[page_number, entries_per_page],
286
+ outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
287
+ )
288
+
289
+ #
290
+ ####################################################################################################