Spaces:

oceansweep
/

tldw

Sleeping

App Files Files Community

oceansweep commited on Sep 24

Commit

c313b25

•

1 Parent(s): 45e1f81

Upload 11 files

Browse files

Files changed (11) hide show

App_Function_Libraries/Gradio_UI/Book_Ingestion_tab.py +148 -0
App_Function_Libraries/Gradio_UI/Character_Interaction_tab.py +13 -7
App_Function_Libraries/Gradio_UI/Chat_Workflows.py +4 -145
App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py +4 -4
App_Function_Libraries/Gradio_UI/Import_Functionality.py +1 -82
App_Function_Libraries/Gradio_UI/MMLU_Pro_tab.py +115 -0
App_Function_Libraries/Gradio_UI/Plaintext_tab_import.py +116 -0
App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py +163 -48
App_Function_Libraries/Gradio_UI/Search_Tab.py +81 -164
App_Function_Libraries/Gradio_UI/Video_transcription_tab.py +89 -31
App_Function_Libraries/Gradio_UI/View_DB_Items_tab.py +290 -0

App_Function_Libraries/Gradio_UI/Book_Ingestion_tab.py ADDED Viewed

	@@ -0,0 +1,148 @@

+# Book_Ingestion_tab.py
+# Functionality to import epubs/ebooks into the system.
+####################
+# Function List
+#
+# 1. create_import_book_tab()
+# 2. import_epub(epub_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
+#
+####################
+# Imports
+import tempfile
+import os
+import zipfile
+#
+# External Imports
+import gradio as gr
+#
+# Local Imports
+from App_Function_Libraries.Gradio_UI.Import_Functionality import import_data
+from App_Function_Libraries.Books.Book_Ingestion_Lib import epub_to_markdown
+#
+########################################################################################################################
+#
+# Functions:
+def import_epub(epub_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
+    try:
+        # Create a temporary directory to store the converted file
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Handle different types of file objects
+            if isinstance(epub_file, (str, os.PathLike)):
+                epub_path = epub_file
+            elif hasattr(epub_file, 'name'):
+                epub_path = epub_file.name
+            elif hasattr(epub_file, 'path'):
+                epub_path = epub_file.path
+            else:
+                raise ValueError("Unsupported file object type")
+            md_path = os.path.join(temp_dir, "converted.md")
+            # Convert EPUB to Markdown
+            markdown_content = epub_to_markdown(epub_path)
+            # Write the markdown content to a file
+            with open(md_path, "w", encoding="utf-8") as md_file:
+                md_file.write(markdown_content)
+            # Read the converted markdown content
+            with open(md_path, "r", encoding="utf-8") as md_file:
+                content = md_file.read()
+            # Now process the content as you would with a text file
+            return import_data(content, title, author, keywords, system_prompt,
+                               user_prompt, auto_summarize, api_name, api_key)
+    except Exception as e:
+        return f"Error processing EPUB: {str(e)}"
+def process_zip_file(zip_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
+    results = []
+    with tempfile.TemporaryDirectory() as temp_dir:
+        if hasattr(zip_file, 'name'):
+            zip_path = zip_file.name
+        elif hasattr(zip_file, 'path'):
+            zip_path = zip_file.path
+        else:
+            raise ValueError("Unsupported zip file object type")
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(temp_dir)
+        for filename in os.listdir(temp_dir):
+            if filename.lower().endswith('.epub'):
+                file_path = os.path.join(temp_dir, filename)
+                result = import_epub(file_path, title, author, keywords, system_prompt,
+                                     user_prompt, auto_summarize, api_name, api_key)
+                results.append(f"File: {filename} - {result}")
+    return "\n".join(results)
+def create_import_book_tab():
+    with gr.TabItem("Ebook(epub) Files"):
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("# Import .epub files")
+                gr.Markdown("Upload a single .epub file or a .zip file containing multiple .epub files")
+                gr.Markdown(
+                    "How to remove DRM from your ebooks: https://www.reddit.com/r/Calibre/comments/1ck4w8e/2024_guide_on_removing_drm_from_kobo_kindle_ebooks/")
+                import_file = gr.File(label="Upload file for import", file_types=[".epub", ".zip"])
+                title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content (for single files)")
+                author_input = gr.Textbox(label="Author", placeholder="Enter the author's name (for single files)")
+                keywords_input = gr.Textbox(label="Keywords (like genre or publish year)",
+                                            placeholder="Enter keywords, comma-separated")
+                system_prompt_input = gr.Textbox(label="System Prompt", lines=3,
+                                                 value=""""
+                                                    <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
+                                                    **Bulleted Note Creation Guidelines**
+                                                    **Headings**:
+                                                    - Based on referenced topics, not categories like quotes or terms
+                                                    - Surrounded by **bold** formatting
+                                                    - Not listed as bullet points
+                                                    - No space between headings and list items underneath
+                                                    **Emphasis**:
+                                                    - **Important terms** set in bold font
+                                                    - **Text ending in a colon**: also bolded
+                                                    **Review**:
+                                                    - Ensure adherence to specified format
+                                                    - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
+                                                """, )
+                custom_prompt_input = gr.Textbox(label="Custom User Prompt",
+                                                 placeholder="Enter a custom user prompt for summarization (optional)")
+                auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
+                api_name_input = gr.Dropdown(
+                    choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
+                             "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
+                    label="API for Auto-summarization"
+                )
+                api_key_input = gr.Textbox(label="API Key", type="password")
+                import_button = gr.Button("Import eBook(s)")
+            with gr.Column():
+                with gr.Row():
+                    import_output = gr.Textbox(label="Import Status")
+        def import_file_handler(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
+            if file.name.lower().endswith('.epub'):
+                return import_epub(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
+            elif file.name.lower().endswith('.zip'):
+                return process_zip_file(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
+            else:
+                return "Unsupported file type. Please upload an .epub file or a .zip file containing .epub files."
+        import_button.click(
+            fn=import_file_handler,
+            inputs=[import_file, title_input, author_input, keywords_input, system_prompt_input,
+                    custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input],
+            outputs=import_output
+        )
+    return import_file, title_input, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output
+#
+# End of File
+########################################################################################################################

App_Function_Libraries/Gradio_UI/Character_Interaction_tab.py CHANGED Viewed

@@ -147,10 +147,10 @@ def create_character_card_interaction_tab():
                 character_dropdown = gr.Dropdown(label="Select Character", choices=get_character_names())
                 user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here")
                 api_name_input = gr.Dropdown(
-                    choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
                              "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
                              "Custom-OpenAI-API"],
-                    value=None,
                     # FIXME - make it so the user cant' click `Send Message` without first setting an API + Chatbot
                     label="API for Interaction(Mandatory)"
                 )
@@ -591,8 +591,12 @@ def create_multiple_character_chat_tab():
                                        range(4)]
             api_endpoint = gr.Dropdown(label="API Endpoint",
-                                       choices=["OpenAI", "Anthropic", "Local-LLM", "Cohere", "Groq", "DeepSeek",
-                                                "Mistral", "OpenRouter"])
             api_key = gr.Textbox(label="API Key (if required)", type="password")
             temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
             scenario = gr.Textbox(label="Scenario (optional)", lines=3)
@@ -722,8 +726,10 @@ def create_narrator_controlled_conversation_tab():
             with gr.Column(scale=1):
                 api_endpoint = gr.Dropdown(
                     label="API Endpoint",
-                    choices=["OpenAI", "Anthropic", "Local-LLM", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter"],
-                    value="OpenAI"
                 )
                 api_key = gr.Textbox(label="API Key (if required)", type="password")
                 temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
@@ -827,5 +833,5 @@ def create_narrator_controlled_conversation_tab():
     return api_endpoint, api_key, temperature, narrator_input, conversation_display, user_input, generate_btn, reset_btn, error_box
 #
-# End of Multi-Character chat tab
 ########################################################################################################################

                 character_dropdown = gr.Dropdown(label="Select Character", choices=get_character_names())
                 user_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name here")
                 api_name_input = gr.Dropdown(
+                    choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
                              "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
                              "Custom-OpenAI-API"],
+                    value="HuggingFace",
                     # FIXME - make it so the user cant' click `Send Message` without first setting an API + Chatbot
                     label="API for Interaction(Mandatory)"
                 )
                                        range(4)]
             api_endpoint = gr.Dropdown(label="API Endpoint",
+                                       choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
+                                                "Mistral",
+                                                "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM",
+                                                "ollama", "HuggingFace",
+                                                "Custom-OpenAI-API"],
+                                        value="HuggingFace")
             api_key = gr.Textbox(label="API Key (if required)", type="password")
             temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
             scenario = gr.Textbox(label="Scenario (optional)", lines=3)
             with gr.Column(scale=1):
                 api_endpoint = gr.Dropdown(
                     label="API Endpoint",
+                    choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
+                             "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
+                             "Custom-OpenAI-API"],
+                    value="HuggingFace"
                 )
                 api_key = gr.Textbox(label="API Key (if required)", type="password")
                 temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.7)
     return api_endpoint, api_key, temperature, narrator_input, conversation_display, user_input, generate_btn, reset_btn, error_box
 #
+# End of Narrator-Controlled Conversation tab
 ########################################################################################################################

App_Function_Libraries/Gradio_UI/Chat_Workflows.py CHANGED Viewed

@@ -38,9 +38,10 @@ def chat_workflows_tab():
                 workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
                 api_selector = gr.Dropdown(
                     label="Select API Endpoint",
-                    choices=["OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
-                             "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
-                    value="OpenAI"
                 )
                 api_key_input = gr.Textbox(label="API Key (optional)", type="password")
                 temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
@@ -172,148 +173,6 @@ def chat_workflows_tab():
         )
     return workflow_selector, api_selector, api_key_input, context_input, chatbot, msg, submit_btn, clear_btn, save_btn
-# def chat_workflows_tab():
-#     with gr.TabItem("Chat Workflows"):
-#         gr.Markdown("# Workflows using LLMs")
-#         chat_history = gr.State([])
-#         media_content = gr.State({})
-#         selected_parts = gr.State([])
-#         conversation_id = gr.State(None)
-#         workflow_state = gr.State({"current_step": 0, "max_steps": 0, "conversation_id": None})
-#
-#         with gr.Row():
-#             workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
-#             api_selector = gr.Dropdown(
-#                 label="Select API Endpoint",
-#                 choices=["OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
-#                          "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
-#                 value="OpenAI"
-#             )
-#             api_key_input = gr.Textbox(label="API Key (optional)", type="password")
-#
-#         context_input = gr.Textbox(label="Initial Context (optional)", lines=5)
-#
-#         with gr.Row():
-#             temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
-#             save_conversation = gr.Checkbox(label="Save Conversation", value=False)
-#
-#         chatbot = gr.Chatbot(label="Workflow Chat")
-#         msg = gr.Textbox(label="Your Input")
-#         submit_btn = gr.Button("Submit")
-#         clear_btn = gr.Button("Clear Chat")
-#         save_btn = gr.Button("Save Chat to Database")
-#
-#         with gr.Row():
-#             conversation_search = gr.Textbox(label="Search Conversations")
-#             search_conversations_btn = gr.Button("Search Conversations")
-#         previous_conversations = gr.Dropdown(label="Select Conversation", choices=[], interactive=True)
-#         load_conversations_btn = gr.Button("Load Selected Conversation")
-#
-#         def update_workflow_ui(workflow_name):
-#             if not workflow_name:
-#                 return {"current_step": 0, "max_steps": 0, "conversation_id": None}
-#             selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None)
-#             if selected_workflow:
-#                 num_prompts = len(selected_workflow['prompts'])
-#                 logging.info(f"Initializing workflow: {workflow_name} with {num_prompts} steps")
-#                 return {"current_step": 0, "max_steps": num_prompts, "conversation_id": None}
-#             else:
-#                 logging.error(f"Selected workflow not found: {workflow_name}")
-#                 return {"current_step": 0, "max_steps": 0, "conversation_id": None}
-#
-#         def process_workflow_step(message, history, context, workflow_name, api_endpoint, api_key, workflow_state,
-#                                   save_conv, temp):
-#             logging.info(f"Process workflow step called with message: {message}")
-#             logging.info(f"Current workflow state: {workflow_state}")
-#             try:
-#                 selected_workflow = next((wf for wf in workflows if wf['name'] == workflow_name), None)
-#                 if not selected_workflow:
-#                     logging.error(f"Selected workflow not found: {workflow_name}")
-#                     return history, workflow_state, gr.update(interactive=True)
-#
-#                 current_step = workflow_state["current_step"]
-#                 max_steps = workflow_state["max_steps"]
-#
-#                 logging.info(f"Current step: {current_step}, Max steps: {max_steps}")
-#
-#                 if current_step >= max_steps:
-#                     logging.info("Workflow completed, disabling input")
-#                     return history, workflow_state, gr.update(interactive=False)
-#
-#                 prompt = selected_workflow['prompts'][current_step]
-#                 full_message = f"{context}\n\nStep {current_step + 1}: {prompt}\nUser: {message}"
-#
-#                 logging.info(f"Calling chat_wrapper with full_message: {full_message[:100]}...")
-#                 bot_message, new_history, new_conversation_id = chat_wrapper(
-#                     full_message, history, media_content.value, selected_parts.value,
-#                     api_endpoint, api_key, "", workflow_state["conversation_id"],
-#                     save_conv, temp, "You are a helpful assistant guiding through a workflow."
-#                 )
-#
-#                 logging.info(f"Received bot_message: {bot_message[:100]}...")
-#
-#                 next_step = current_step + 1
-#                 new_workflow_state = {
-#                     "current_step": next_step,
-#                     "max_steps": max_steps,
-#                     "conversation_id": new_conversation_id
-#                 }
-#
-#                 if next_step >= max_steps:
-#                     logging.info("Workflow completed after this step")
-#                     return new_history, new_workflow_state, gr.update(interactive=False)
-#                 else:
-#                     next_prompt = selected_workflow['prompts'][next_step]
-#                     new_history.append((None, f"Step {next_step + 1}: {next_prompt}"))
-#                     logging.info(f"Moving to next step: {next_step}")
-#                     return new_history, new_workflow_state, gr.update(interactive=True)
-#             except Exception as e:
-#                 logging.error(f"Error in process_workflow_step: {str(e)}")
-#                 return history, workflow_state, gr.update(interactive=True)
-#
-#         workflow_selector.change(
-#             update_workflow_ui,
-#             inputs=[workflow_selector],
-#             outputs=[workflow_state]
-#         )
-#
-#         submit_btn.click(
-#             process_workflow_step,
-#             inputs=[msg, chatbot, context_input, workflow_selector, api_selector, api_key_input, workflow_state,
-#                     save_conversation, temperature],
-#             outputs=[chatbot, workflow_state, msg]
-#         ).then(
-#             lambda: gr.update(value=""),
-#             outputs=[msg]
-#         )
-#
-#         clear_btn.click(
-#             lambda: ([], {"current_step": 0, "max_steps": 0, "conversation_id": None}),
-#             outputs=[chatbot, workflow_state]
-#         )
-#
-#         save_btn.click(
-#             save_chat_history_to_db_wrapper,
-#             inputs=[chatbot, conversation_id, media_content],
-#             outputs=[conversation_id, gr.Textbox(label="Save Status")]
-#         )
-#
-#         search_conversations_btn.click(
-#             search_conversations,
-#             inputs=[conversation_search],
-#             outputs=[previous_conversations]
-#         )
-#
-#         load_conversations_btn.click(
-#             lambda: ([], {"current_step": 0, "max_steps": 0, "conversation_id": None}),
-#             outputs=[chatbot, workflow_state]
-#         ).then(
-#             load_conversation,
-#             inputs=[previous_conversations],
-#             outputs=[chatbot, conversation_id]
-#         )
-#
-#     return workflow_selector, api_selector, api_key_input, context_input, chatbot, msg, submit_btn, clear_btn, save_btn
 #
 # End of script

                 workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
                 api_selector = gr.Dropdown(
                     label="Select API Endpoint",
+                    choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
+                             "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
+                             "Custom-OpenAI-API"],
+                    value="HuggingFace"
                 )
                 api_key_input = gr.Textbox(label="API Key (optional)", type="password")
                 temperature = gr.Slider(label="Temperature", minimum=0.00, maximum=1.0, step=0.05, value=0.7)
         )
     return workflow_selector, api_selector, api_key_input, context_input, chatbot, msg, submit_btn, clear_btn, save_btn
 #
 # End of script

App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py CHANGED Viewed

@@ -11,10 +11,10 @@ from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
 from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
 #
 # Local Imports
-from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
     summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm, \
     summarize_with_ollama
-from App_Function_Libraries.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
     summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
     summarize_with_huggingface
 #
@@ -24,8 +24,8 @@ from App_Function_Libraries.Summarization_General_Lib import summarize_with_open
 # Functions:
 def create_summarize_explain_tab():
-    with gr.TabItem("Explain/Summarize Text"):
-        gr.Markdown("# Explain or Summarize Text without ingesting it into the DB")
         with gr.Row():
             with gr.Column():
                 with gr.Row():

 from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_user_prompt
 #
 # Local Imports
+from App_Function_Libraries.Summarization.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
     summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm, \
     summarize_with_ollama
+from App_Function_Libraries.Summarization.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
     summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
     summarize_with_huggingface
 #
 # Functions:
 def create_summarize_explain_tab():
+    with gr.TabItem("Analyze Text"):
+        gr.Markdown("# Analyze / Explain / Summarize Text without ingesting it into the DB")
         with gr.Row():
             with gr.Column():
                 with gr.Row():

App_Function_Libraries/Gradio_UI/Import_Functionality.py CHANGED Viewed

@@ -19,7 +19,7 @@ import pypandoc
 from App_Function_Libraries.DB.DB_Manager import insert_prompt_to_db, load_preset_prompts, import_obsidian_note_to_db, \
     add_media_to_database
 from App_Function_Libraries.Prompt_Handling import import_prompt_from_file, import_prompts_from_zip#
-from App_Function_Libraries.Summarization_General_Lib import perform_summarization
 ###################################################################################################################
 #
@@ -361,87 +361,6 @@ def create_import_obsidian_vault_tab():
     )
-# Using pypandoc to convert EPUB to Markdown
-def create_import_book_tab():
-    with gr.TabItem("Import .epub/ebook Files"):
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown("# Ingest an .epub file using pypandoc")
-                gr.Markdown("...and have it tagged + summarized")
-                gr.Markdown(
-                    "How to remove DRM from your ebooks: https://www.reddit.com/r/Calibre/comments/1ck4w8e/2024_guide_on_removing_drm_from_kobo_kindle_ebooks/")
-                import_file = gr.File(label="Upload file for import", file_types=[".epub"])
-                title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content")
-                author_input = gr.Textbox(label="Author", placeholder="Enter the author's name")
-                keywords_input = gr.Textbox(label="Keywords(like genre or publish year)",
-                                            placeholder="Enter keywords, comma-separated")
-                system_prompt_input = gr.Textbox(label="System Prompt",
-                                                 lines=3,
-                                                 value=""""
-                                                    <s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
-                                                    **Bulleted Note Creation Guidelines**
-                                                    **Headings**:
-                                                    - Based on referenced topics, not categories like quotes or terms
-                                                    - Surrounded by **bold** formatting
-                                                    - Not listed as bullet points
-                                                    - No space between headings and list items underneath
-                                                    **Emphasis**:
-                                                    - **Important terms** set in bold font
-                                                    - **Text ending in a colon**: also bolded
-                                                    **Review**:
-                                                    - Ensure adherence to specified format
-                                                    - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
-                                                """, )
-                custom_prompt_input = gr.Textbox(label="Custom User Prompt",
-                                                 placeholder="Enter a custom user prompt for summarization (optional)")
-                auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
-                api_name_input = gr.Dropdown(
-                    choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
-                             "OpenRouter",
-                             "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
-                    label="API for Auto-summarization"
-                )
-                api_key_input = gr.Textbox(label="API Key", type="password")
-                import_button = gr.Button("Import eBook")
-            with gr.Column():
-                with gr.Row():
-                    import_output = gr.Textbox(label="Import Status")
-        def import_epub(epub_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name,
-                        api_key):
-            try:
-                # Create a temporary directory to store the converted file
-                with tempfile.TemporaryDirectory() as temp_dir:
-                    epub_path = epub_file.name
-                    md_path = os.path.join(temp_dir, "converted.md")
-                    # Use pypandoc to convert EPUB to Markdown
-                    output = pypandoc.convert_file(epub_path, 'md', outputfile=md_path)
-                    if output != "":
-                        return f"Error converting EPUB: {output}"
-                    # Read the converted markdown content
-                    with open(md_path, "r", encoding="utf-8") as md_file:
-                        content = md_file.read()
-                    # Now process the content as you would with a text file
-                    return import_data(content, title, author, keywords, system_prompt,
-                                       user_prompt, auto_summarize, api_name, api_key)
-            except Exception as e:
-                return f"Error processing EPUB: {str(e)}"
-        import_button.click(
-            fn=import_epub,
-            inputs=[import_file, title_input, author_input, keywords_input, system_prompt_input,
-                    custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input],
-            outputs=import_output
-        )
 def import_obsidian_vault(vault_path, progress=gr.Progress()):
     try:
         from App_Function_Libraries.Gradio_UI.Export_Functionality import scan_obsidian_vault

 from App_Function_Libraries.DB.DB_Manager import insert_prompt_to_db, load_preset_prompts, import_obsidian_note_to_db, \
     add_media_to_database
 from App_Function_Libraries.Prompt_Handling import import_prompt_from_file, import_prompts_from_zip#
+from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
 ###################################################################################################################
 #
     )
 def import_obsidian_vault(vault_path, progress=gr.Progress()):
     try:
         from App_Function_Libraries.Gradio_UI.Export_Functionality import scan_obsidian_vault

App_Function_Libraries/Gradio_UI/MMLU_Pro_tab.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# MMLU_Pro_tab.py
+# is a library that contains the Gradio UI code for the MMLU-Pro benchmarking tool.
+#
+##############################################################################################################
+# Imports
+import os
+import gradio as gr
+import logging
+#
+# External Imports
+from tqdm import tqdm
+# Local Imports
+from App_Function_Libraries.Benchmarks_Evaluations.MMLU_Pro.MMLU_Pro_rewritten import (
+    load_mmlu_pro, run_mmlu_pro_benchmark, mmlu_pro_main, load_mmlu_pro_config
+)
+#
+##############################################################################################################
+#
+# Functions:
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+def get_categories():
+    """Fetch categories using the dataset loader from MMLU_Pro_rewritten.py"""
+    try:
+        test_data, _ = load_mmlu_pro()  # Use the function from MMLU_Pro_rewritten.py
+        return list(test_data.keys())  # Return the categories from the test dataset
+    except Exception as e:
+        logger.error(f"Failed to load categories: {e}")
+        return ["Error loading categories"]
+def load_categories():
+    """Helper function to return the categories for the Gradio dropdown."""
+    categories = get_categories()  # Fetch categories from the dataset
+    if categories:
+        return gr.update(choices=categories, value=categories[0])  # Update dropdown with categories
+    else:
+        return gr.update(choices=["Error loading categories"], value="Error loading categories")
+def run_benchmark_from_ui(url, api_key, model, timeout, category, parallel, verbosity, log_prompt):
+    """Function to run the benchmark with parameters from the UI."""
+    # Override config with UI parameters
+    config = load_mmlu_pro_config(
+        url=url,
+        api_key=api_key,
+        model=model,
+        timeout=timeout,
+        categories=[category] if category else None,
+        parallel=parallel,
+        verbosity=verbosity,
+        log_prompt=log_prompt
+    )
+    # Run the benchmarking process
+    try:
+        # Call the main benchmarking function
+        mmlu_pro_main()
+        # Assume the final report is generated in "eval_results" folder
+        report_path = os.path.join("eval_results", config["server"]["model"].replace("/", "-"), "final_report.txt")
+        # Read the final report
+        with open(report_path, "r") as f:
+            report = f.read()
+        return report
+    except Exception as e:
+        logger.error(f"An error occurred during benchmark execution: {e}")
+        return f"An error occurred during benchmark execution. Please check the logs for more information. Error: {str(e)}"
+def create_mmlu_pro_tab():
+    """Create the Gradio UI tab for MMLU-Pro Benchmark."""
+    with gr.Tab("MMLU-Pro Benchmark"):
+        gr.Markdown("## Run MMLU-Pro Benchmark")
+        with gr.Row():
+            with gr.Column():
+                # Inputs for the benchmark
+                url = gr.Textbox(label="Server URL")
+                api_key = gr.Textbox(label="API Key", type="password")
+                model = gr.Textbox(label="Model Name")
+                timeout = gr.Number(label="Timeout (seconds)", value=30)
+                category = gr.Dropdown(label="Category", choices=["Load categories..."])
+                load_categories_btn = gr.Button("Load Categories")
+                parallel = gr.Slider(label="Parallel Requests", minimum=1, maximum=10, step=1, value=1)
+                verbosity = gr.Slider(label="Verbosity Level", minimum=0, maximum=2, step=1, value=1)
+                log_prompt = gr.Checkbox(label="Log Prompt")
+            with gr.Column():
+                # Run button and output display
+                run_button = gr.Button("Run Benchmark")
+                output = gr.Textbox(label="Benchmark Results", lines=20)
+        # When "Load Categories" is clicked, load the categories into the dropdown
+        load_categories_btn.click(
+            load_categories,
+            outputs=category
+        )
+        # When "Run Benchmark" is clicked, trigger the run_benchmark_from_ui function
+        run_button.click(
+            run_benchmark_from_ui,  # Use the function defined to run the benchmark
+            inputs=[url, api_key, model, timeout, category, parallel, verbosity, log_prompt],
+            outputs=output
+        )
+    return [url, api_key, model, timeout, category, parallel, verbosity, log_prompt, run_button, output]

App_Function_Libraries/Gradio_UI/Plaintext_tab_import.py ADDED Viewed

	@@ -0,0 +1,116 @@

+# Plaintext_tab_import.py
+# Contains the code for the "Import Plain Text Files" tab in the Gradio UI.
+# This tab allows users to upload plain text files (Markdown, Text, RTF) or a zip file containing multiple files.
+# The user can provide a title, author, keywords, system prompt, custom user prompt, and select an API for auto-summarization.
+#
+#######################################################################################################################
+#
+# Import necessary libraries
+import os
+import tempfile
+import zipfile
+#
+# Import Non-Local
+import gradio as gr
+from docx2txt import docx2txt
+from pypandoc import convert_file
+#
+# Import Local libraries
+from App_Function_Libraries.Gradio_UI.Import_Functionality import import_data
+#
+#######################################################################################################################
+#
+# Functions:
+def create_plain_text_import_tab():
+    with gr.TabItem("Import Plain text & .docx Files"):
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("# Import Markdown(`.md`)/Text(`.txt`)/rtf & `.docx` Files")
+                gr.Markdown("Upload a single file or a zip file containing multiple files")
+                import_file = gr.File(label="Upload file for import", file_types=[".md", ".txt", ".rtf", ".docx", ".zip"])
+                title_input = gr.Textbox(label="Title", placeholder="Enter the title of the content (for single files)")
+                author_input = gr.Textbox(label="Author", placeholder="Enter the author's name (for single files)")
+                keywords_input = gr.Textbox(label="Keywords", placeholder="Enter keywords, comma-separated")
+                system_prompt_input = gr.Textbox(label="System Prompt (for Summarization)", lines=3,
+                                                 value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
+                                                    **Bulleted Note Creation Guidelines**
+                                                    **Headings**:
+                                                    - Based on referenced topics, not categories like quotes or terms
+                                                    - Surrounded by **bold** formatting
+                                                    - Not listed as bullet points
+                                                    - No space between headings and list items underneath
+                                                    **Emphasis**:
+                                                    - **Important terms** set in bold font
+                                                    - **Text ending in a colon**: also bolded
+                                                    **Review**:
+                                                    - Ensure adherence to specified format
+                                                    - Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]""",
+                                                 )
+                custom_prompt_input = gr.Textbox(label="Custom User Prompt", placeholder="Enter a custom user prompt for summarization (optional)")
+                auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
+                api_name_input = gr.Dropdown(
+                    choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
+                             "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
+                    label="API for Auto-summarization"
+                )
+                api_key_input = gr.Textbox(label="API Key", type="password")
+                import_button = gr.Button("Import File(s)")
+            with gr.Column():
+                import_output = gr.Textbox(label="Import Status")
+        def import_plain_text_file(file_path, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
+            try:
+                # Determine the file type and convert if necessary
+                file_extension = os.path.splitext(file_path)[1].lower()
+                if file_extension == '.rtf':
+                    with tempfile.NamedTemporaryFile(suffix='.md', delete=False) as temp_file:
+                        convert_file(file_path, 'md', outputfile=temp_file.name)
+                        file_path = temp_file.name
+                elif file_extension == '.docx':
+                    content = docx2txt.process(file_path)
+                else:
+                    with open(file_path, 'r', encoding='utf-8') as file:
+                        content = file.read()
+                # Process the content
+                return import_data(content, title, author, keywords, system_prompt,
+                                   user_prompt, auto_summarize, api_name, api_key)
+            except Exception as e:
+                return f"Error processing file: {str(e)}"
+        def process_plain_text_zip_file(zip_file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
+            results = []
+            with tempfile.TemporaryDirectory() as temp_dir:
+                with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
+                    zip_ref.extractall(temp_dir)
+                for filename in os.listdir(temp_dir):
+                    if filename.lower().endswith(('.md', '.txt', '.rtf', '.docx')):
+                        file_path = os.path.join(temp_dir, filename)
+                        result = import_plain_text_file(file_path, title, author, keywords, system_prompt,
+                                                        user_prompt, auto_summarize, api_name, api_key)
+                        results.append(f"File: {filename} - {result}")
+            return "\n".join(results)
+        def import_file_handler(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key):
+            if file.name.lower().endswith(('.md', '.txt', '.rtf', '.docx')):
+                return import_plain_text_file(file.name, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
+            elif file.name.lower().endswith('.zip'):
+                return process_plain_text_zip_file(file, title, author, keywords, system_prompt, user_prompt, auto_summarize, api_name, api_key)
+            else:
+                return "Unsupported file type. Please upload a .md, .txt, .rtf, .docx file or a .zip file containing these file types."
+        import_button.click(
+            fn=import_file_handler,
+            inputs=[import_file, title_input, author_input, keywords_input, system_prompt_input,
+                    custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input],
+            outputs=import_output
+        )
+    return import_file, title_input, author_input, keywords_input, system_prompt_input, custom_prompt_input, auto_summarize_checkbox, api_name_input, api_key_input, import_button, import_output

App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py CHANGED Viewed

@@ -2,26 +2,30 @@
 # Description: Gradio UI for RAG QA Chat
 #
 # Imports
 #
 # External Imports
-import logging
 import gradio as gr
-from App_Function_Libraries.DB.DB_Manager import DatabaseError, get_paginated_files
-from App_Function_Libraries.RAG.RAG_QA_Chat import search_database, load_chat_history, \
-    save_chat_history, rag_qa_chat
-#
 # Local Imports
 #
 ########################################################################################################################
 #
 # Functions:
 def create_rag_qa_chat_tab():
-    with gr.TabItem("RAG QA Chat (WIP)"):
         gr.Markdown("# RAG QA Chat")
         with gr.Row():
@@ -41,24 +45,25 @@ def create_rag_qa_chat_tab():
                 search_query = gr.Textbox(label="Search Query", visible=False)
                 search_button = gr.Button("Search", visible=False)
                 search_results = gr.Dropdown(label="Search Results", choices=[], visible=False)
-                file_upload = gr.File(label="Upload File", visible=False)
                 api_choice = gr.Dropdown(
                     choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
                     label="Select API for RAG",
                     value="OpenAI"
                 )
-                chat_file = gr.File(label="Chat File")
-                load_chat = gr.Button("Load Chat")
-                clear = gr.Button("Clear Current Chat")
             with gr.Column(scale=2):
                 chatbot = gr.Chatbot(height=500)
                 msg = gr.Textbox(label="Enter your message")
-                submit = gr.Button("Submit")
-                save_chat = gr.Button("Save Chat")
         loading_indicator = gr.HTML(visible=False)
@@ -82,12 +87,14 @@ def create_rag_qa_chat_tab():
                 search_query: gr.update(visible=choice == "Search Database"),
                 search_button: gr.update(visible=choice == "Search Database"),
                 search_results: gr.update(visible=choice == "Search Database"),
-                file_upload: gr.update(visible=choice == "Upload File")
             }
         context_source.change(update_context_source, context_source,
                               [existing_file, prev_page_btn, next_page_btn, page_info, search_query, search_button,
-                               search_results, file_upload])
         next_page_btn.click(next_page_fn, inputs=[file_page], outputs=[existing_file, page_info, file_page])
         prev_page_btn.click(prev_page_fn, inputs=[file_page], outputs=[existing_file, page_info, file_page])
@@ -98,53 +105,124 @@ def create_rag_qa_chat_tab():
         loading_indicator = gr.HTML(visible=False)
         def rag_qa_chat_wrapper(message, history, context_source, existing_file, search_results, file_upload,
-                                api_choice):
             try:
                 # Show loading indicator
                 yield history, "", gr.update(visible=True)
                 if context_source == "Existing File":
                     context = f"media_id:{existing_file.split('(ID: ')[1][:-1]}"
                 elif context_source == "Search Database":
                     context = f"media_id:{search_results.split('(ID: ')[1][:-1]}"
                 else:  # Upload File
                     if file_upload is None:
                         raise ValueError("No file uploaded")
-                    context = file_upload
-                new_history, response = rag_qa_chat(message, history, context, api_choice)
                 gr.Info("Response generated successfully")
                 yield new_history, "", gr.update(visible=False)
             except ValueError as e:
                 gr.Error(f"Input error: {str(e)}")
                 yield history, "", gr.update(visible=False)
             except DatabaseError as e:
                 gr.Error(f"Database error: {str(e)}")
                 yield history, "", gr.update(visible=False)
             except Exception as e:
-                logging.error(f"Unexpected error in rag_qa_chat_wrapper: {e}")
                 gr.Error("An unexpected error occurred. Please try again later.")
                 yield history, "", gr.update(visible=False)
-        def save_chat_history_wrapper(history):
-            try:
-                file_path = save_chat_history(history)
-                gr.Info("Chat history saved successfully")
-                return gr.update(value=file_path)
-            except Exception as e:
-                gr.Error(f"Error saving chat history: {str(e)}")
-                return gr.update(value=None)
-        def load_chat_history_wrapper(file):
-            try:
-                if file is not None:
-                    history = load_chat_history(file)
-                    gr.Info("Chat history loaded successfully")
-                    return history
-                return []
-            except Exception as e:
-                gr.Error(f"Error loading chat history: {str(e)}")
-                return []
         def perform_search(query):
             try:
@@ -154,20 +232,57 @@ def create_rag_qa_chat_tab():
                 gr.Error(f"Error performing search: {str(e)}")
                 return gr.update(choices=[])
-        save_chat.click(save_chat_history_wrapper, inputs=[chatbot], outputs=[chat_file])
-        load_chat.click(load_chat_history_wrapper, inputs=[chat_file], outputs=[chatbot])
         search_button.click(perform_search, inputs=[search_query], outputs=[search_results])
         submit.click(
             rag_qa_chat_wrapper,
-            inputs=[msg, chatbot, context_source, existing_file, search_results, file_upload, api_choice],
             outputs=[chatbot, msg, loading_indicator]
         )
-        clear.click(lambda: ([], None), outputs=[chatbot, chat_file])
-    return context_source, existing_file, search_query, search_button, search_results, file_upload, api_choice, chatbot, msg, submit, clear, save_chat, load_chat, chat_file
 #
 # End of RAG_QA_Chat_tab.py

 # Description: Gradio UI for RAG QA Chat
 #
 # Imports
+import csv
+import logging
+import json
+import os
+from datetime import datetime
 #
 # External Imports
+import docx2txt
 import gradio as gr
 # Local Imports
+from App_Function_Libraries.Books.Book_Ingestion_Lib import read_epub
+from App_Function_Libraries.DB.DB_Manager import DatabaseError, get_paginated_files, add_media_with_keywords
+from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_text_and_format_from_pdf
+from App_Function_Libraries.RAG.RAG_Libary_2 import generate_answer
+from App_Function_Libraries.RAG.RAG_QA_Chat import search_database, rag_qa_chat
+# Eventually... FIXME
+from App_Function_Libraries.RAG.RAG_QA_Chat import load_chat_history, save_chat_history
 #
 ########################################################################################################################
 #
 # Functions:
 def create_rag_qa_chat_tab():
+    with gr.TabItem("RAG QA Chat"):
         gr.Markdown("# RAG QA Chat")
         with gr.Row():
                 search_query = gr.Textbox(label="Search Query", visible=False)
                 search_button = gr.Button("Search", visible=False)
                 search_results = gr.Dropdown(label="Search Results", choices=[], visible=False)
+                file_upload = gr.File(
+                    label="Upload File",
+                    visible=False,
+                    file_types=["txt", "pdf", "epub", "md", "rtf", "json", "csv"]
+                )
+                convert_to_text = gr.Checkbox(label="Convert to plain text", visible=False)
+                keywords = gr.Textbox(label="Keywords (comma-separated)", visible=False)
                 api_choice = gr.Dropdown(
                     choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
                     label="Select API for RAG",
                     value="OpenAI"
                 )
             with gr.Column(scale=2):
                 chatbot = gr.Chatbot(height=500)
                 msg = gr.Textbox(label="Enter your message")
+                submit = gr.Button("Submit (Might take a few seconds/turns blue while processing...)")
+                clear_chat = gr.Button("Clear Chat History")
         loading_indicator = gr.HTML(visible=False)
                 search_query: gr.update(visible=choice == "Search Database"),
                 search_button: gr.update(visible=choice == "Search Database"),
                 search_results: gr.update(visible=choice == "Search Database"),
+                file_upload: gr.update(visible=choice == "Upload File"),
+                convert_to_text: gr.update(visible=choice == "Upload File"),
+                keywords: gr.update(visible=choice == "Upload File")
             }
         context_source.change(update_context_source, context_source,
                               [existing_file, prev_page_btn, next_page_btn, page_info, search_query, search_button,
+                               search_results, file_upload, convert_to_text, keywords])
         next_page_btn.click(next_page_fn, inputs=[file_page], outputs=[existing_file, page_info, file_page])
         prev_page_btn.click(prev_page_fn, inputs=[file_page], outputs=[existing_file, page_info, file_page])
         loading_indicator = gr.HTML(visible=False)
         def rag_qa_chat_wrapper(message, history, context_source, existing_file, search_results, file_upload,
+                                convert_to_text, keywords, api_choice):
             try:
+                logging.info(f"Starting rag_qa_chat_wrapper with message: {message}")
+                logging.info(f"Context source: {context_source}")
+                logging.info(f"API choice: {api_choice}")
                 # Show loading indicator
                 yield history, "", gr.update(visible=True)
+                # Ensure api_choice is a string
+                api_choice = api_choice.value if isinstance(api_choice, gr.components.Dropdown) else api_choice
+                logging.info(f"Resolved API choice: {api_choice}")
+                # Only rephrase the question if it's not the first query
+                if len(history) > 0:
+                    rephrased_question = rephrase_question(history, message, api_choice)
+                    logging.info(f"Original question: {message}")
+                    logging.info(f"Rephrased question: {rephrased_question}")
+                else:
+                    rephrased_question = message
+                    logging.info(f"First question, no rephrasing: {message}")
                 if context_source == "Existing File":
                     context = f"media_id:{existing_file.split('(ID: ')[1][:-1]}"
+                    logging.info(f"Using existing file with context: {context}")
                 elif context_source == "Search Database":
                     context = f"media_id:{search_results.split('(ID: ')[1][:-1]}"
+                    logging.info(f"Using search result with context: {context}")
                 else:  # Upload File
+                    logging.info("Processing uploaded file")
                     if file_upload is None:
                         raise ValueError("No file uploaded")
+                    # Process the uploaded file
+                    file_path = file_upload.name
+                    file_name = os.path.basename(file_path)
+                    logging.info(f"Uploaded file: {file_name}")
+                    if convert_to_text:
+                        logging.info("Converting file to plain text")
+                        content = convert_file_to_text(file_path)
+                    else:
+                        logging.info("Reading file content")
+                        with open(file_path, 'r', encoding='utf-8') as f:
+                            content = f.read()
+                    logging.info(f"File content length: {len(content)} characters")
+                    # Process keywords
+                    if not keywords:
+                        keywords = "default,rag-file-upload"
+                    logging.info(f"Keywords: {keywords}")
+                    # Add the content to the database and get the media_id
+                    logging.info("Adding content to database")
+                    result = add_media_with_keywords(
+                        url=file_name,
+                        title=file_name,
+                        media_type='document',
+                        content=content,
+                        keywords=keywords,
+                        prompt='No prompt for uploaded files',
+                        summary='No summary for uploaded files',
+                        transcription_model='None',
+                        author='Unknown',
+                        ingestion_date=datetime.now().strftime('%Y-%m-%d')
+                    )
+                    logging.info(f"Result from add_media_with_keywords: {result}")
+                    if isinstance(result, tuple):
+                        media_id, _ = result
+                    else:
+                        media_id = result
+                    context = f"media_id:{media_id}"
+                    logging.info(f"Context for uploaded file: {context}")
+                logging.info("Calling rag_qa_chat function")
+                new_history, response = rag_qa_chat(rephrased_question, history, context, api_choice)
+                # Log first 100 chars of response
+                logging.info(
+                    f"Response received from rag_qa_chat: {response[:100]}...")
+                # Add the original question to the history
+                new_history[-1] = (message, new_history[-1][1])
                 gr.Info("Response generated successfully")
+                logging.info("rag_qa_chat_wrapper completed successfully")
                 yield new_history, "", gr.update(visible=False)
             except ValueError as e:
+                logging.error(f"Input error in rag_qa_chat_wrapper: {str(e)}")
                 gr.Error(f"Input error: {str(e)}")
                 yield history, "", gr.update(visible=False)
             except DatabaseError as e:
+                logging.error(f"Database error in rag_qa_chat_wrapper: {str(e)}")
                 gr.Error(f"Database error: {str(e)}")
                 yield history, "", gr.update(visible=False)
             except Exception as e:
+                logging.error(f"Unexpected error in rag_qa_chat_wrapper: {e}", exc_info=True)
                 gr.Error("An unexpected error occurred. Please try again later.")
                 yield history, "", gr.update(visible=False)
+        def rephrase_question(history, latest_question, api_choice):
+            # Thank you https://www.reddit.com/r/LocalLLaMA/comments/1fi1kex/multi_turn_conversation_and_rag/
+            conversation_history = "\n".join([f"User: {h[0]}\nAssistant: {h[1]}" for h in history[:-1]])
+            prompt = f"""You are a helpful assistant. Given the conversation history and the latest question, resolve any ambiguous references in the latest question.
+        Conversation History:
+        {conversation_history}
+        Latest Question:
+        {latest_question}
+        Rewritten Question:"""
+            # Use the selected API to generate the rephrased question
+            rephrased_question = generate_answer(api_choice, prompt, "")
+            return rephrased_question.strip()
         def perform_search(query):
             try:
                 gr.Error(f"Error performing search: {str(e)}")
                 return gr.update(choices=[])
+        def clear_chat_history():
+            return [], ""
         search_button.click(perform_search, inputs=[search_query], outputs=[search_results])
         submit.click(
             rag_qa_chat_wrapper,
+            inputs=[msg, chatbot, context_source, existing_file, search_results, file_upload,
+                    convert_to_text, keywords, api_choice],
             outputs=[chatbot, msg, loading_indicator]
         )
+        clear_chat.click(clear_chat_history, outputs=[chatbot, msg])
+    return (context_source, existing_file, search_query, search_button, search_results, file_upload,
+            convert_to_text, keywords, api_choice, chatbot, msg, submit, clear_chat)
+def convert_file_to_text(file_path):
+    """Convert various file types to plain text."""
+    file_extension = os.path.splitext(file_path)[1].lower()
+    if file_extension == '.pdf':
+        return extract_text_and_format_from_pdf(file_path)
+    elif file_extension == '.epub':
+        return read_epub(file_path)
+    elif file_extension in ['.json', '.csv']:
+        return read_structured_file(file_path)
+    elif file_extension == '.docx':
+        return docx2txt.process(file_path)
+    elif file_extension in ['.txt', '.md', '.rtf']:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return f.read()
+    else:
+        raise ValueError(f"Unsupported file type: {file_extension}")
+def read_structured_file(file_path):
+    """Read and convert JSON or CSV files to text."""
+    file_extension = os.path.splitext(file_path)[1].lower()
+    if file_extension == '.json':
+        with open(file_path, 'r') as file:
+            data = json.load(file)
+        return json.dumps(data, indent=2)
+    elif file_extension == '.csv':
+        with open(file_path, 'r', newline='') as file:
+            csv_reader = csv.reader(file)
+            return '\n'.join([','.join(row) for row in csv_reader])
+    else:
+        raise ValueError(f"Unsupported file type: {file_extension}")
 #
 # End of RAG_QA_Chat_tab.py

App_Function_Libraries/Gradio_UI/Search_Tab.py CHANGED Viewed

@@ -10,10 +10,11 @@ import sqlite3
 import gradio as gr
 #
 # Local Imports
-from App_Function_Libraries.DB.DB_Manager import view_database, search_and_display_items
-from App_Function_Libraries.DB.SQLite_DB import search_prompts
 from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_detailed_view
-from App_Function_Libraries.Utils.Utils import get_database_path
 #
 ###################################################################################################
 #
@@ -22,30 +23,95 @@ from App_Function_Libraries.Utils.Utils import get_database_path
 logger = logging.getLogger()
 def create_search_tab():
     with gr.TabItem("Search / Detailed View"):
         with gr.Row():
-            with gr.Column():
                 gr.Markdown("# Search across all ingested items in the Database")
-                gr.Markdown(" by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
                 search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
-                search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title", label="Search By")
                 search_button = gr.Button("Search")
                 items_output = gr.Dropdown(label="Select Item", choices=[])
                 item_mapping = gr.State({})
-                prompt_summary_output = gr.HTML(label="Prompt & Summary", visible=True)
                 search_button.click(
                     fn=update_dropdown,
                     inputs=[search_query_input, search_type_input],
                     outputs=[items_output, item_mapping]
                 )
-            with gr.Column():
-                content_output = gr.Markdown(label="Content", visible=True)
                 items_output.change(
-                    fn=update_detailed_view,
                     inputs=[items_output, item_mapping],
-                    outputs=[prompt_summary_output, content_output]
                 )
@@ -81,53 +147,6 @@ def display_search_results(query):
     return "No results found."
-def create_viewing_tab():
-    with gr.TabItem("View Database"):
-        gr.Markdown("# View Database Entries")
-        with gr.Row():
-            with gr.Column():
-                entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
-                page_number = gr.Number(value=1, label="Page Number", precision=0)
-                view_button = gr.Button("View Page")
-                next_page_button = gr.Button("Next Page")
-                previous_page_button = gr.Button("Previous Page")
-            with gr.Column():
-                results_display = gr.HTML()
-                pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
-        def update_page(page, entries_per_page):
-            results, pagination, total_pages = view_database(page, entries_per_page)
-            next_disabled = page >= total_pages
-            prev_disabled = page <= 1
-            return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(interactive=not prev_disabled)
-        def go_to_next_page(current_page, entries_per_page):
-            next_page = current_page + 1
-            return update_page(next_page, entries_per_page)
-        def go_to_previous_page(current_page, entries_per_page):
-            previous_page = max(1, current_page - 1)
-            return update_page(previous_page, entries_per_page)
-        view_button.click(
-            fn=update_page,
-            inputs=[page_number, entries_per_page],
-            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
-        )
-        next_page_button.click(
-            fn=go_to_next_page,
-            inputs=[page_number, entries_per_page],
-            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
-        )
-        previous_page_button.click(
-            fn=go_to_previous_page,
-            inputs=[page_number, entries_per_page],
-            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
-        )
 def create_search_summaries_tab():
     with gr.TabItem("Search/View Title+Summary "):
         gr.Markdown("# Search across all ingested items in the Database and review their summaries")
@@ -185,112 +204,6 @@ def create_search_summaries_tab():
         )
-def create_prompt_view_tab():
-    with gr.TabItem("View Prompt Database"):
-        gr.Markdown("# View Prompt Database Entries")
-        with gr.Row():
-            with gr.Column():
-                entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
-                page_number = gr.Number(value=1, label="Page Number", precision=0)
-                view_button = gr.Button("View Page")
-                next_page_button = gr.Button("Next Page")
-                previous_page_button = gr.Button("Previous Page")
-            with gr.Column():
-                pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
-        results_display = gr.HTML()
-        # FIXME - SQL functions to be moved to DB_Manager
-        def view_database(page, entries_per_page):
-            offset = (page - 1) * entries_per_page
-            try:
-                with sqlite3.connect(get_database_path('prompts.db')) as conn:
-                    cursor = conn.cursor()
-                    cursor.execute('''
-                        SELECT p.name, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
-                        FROM Prompts p
-                        LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
-                        LEFT JOIN Keywords k ON pk.keyword_id = k.id
-                        GROUP BY p.id
-                        ORDER BY p.name
-                        LIMIT ? OFFSET ?
-                    ''', (entries_per_page, offset))
-                    prompts = cursor.fetchall()
-                    cursor.execute('SELECT COUNT(*) FROM Prompts')
-                    total_prompts = cursor.fetchone()[0]
-                results = ""
-                for prompt in prompts:
-                    # Escape HTML special characters and replace newlines with <br> tags
-                    title = html.escape(prompt[0]).replace('\n', '<br>')
-                    details = html.escape(prompt[1] or '').replace('\n', '<br>')
-                    system_prompt = html.escape(prompt[2] or '')
-                    user_prompt = html.escape(prompt[3] or '')
-                    keywords = html.escape(prompt[4] or '').replace('\n', '<br>')
-                    results += f"""
-                    <div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 20px;">
-                        <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;">
-                            <div><strong>Title:</strong> {title}</div>
-                            <div><strong>Details:</strong> {details}</div>
-                        </div>
-                        <div style="margin-top: 10px;">
-                            <strong>User Prompt:</strong>
-                            <pre style="white-space: pre-wrap; word-wrap: break-word;">{user_prompt}</pre>
-                        </div>
-                        <div style="margin-top: 10px;">
-                            <strong>System Prompt:</strong>
-                            <pre style="white-space: pre-wrap; word-wrap: break-word;">{system_prompt}</pre>
-                        </div>
-                        <div style="margin-top: 10px;">
-                            <strong>Keywords:</strong> {keywords}
-                        </div>
-                    </div>
-                    """
-                total_pages = (total_prompts + entries_per_page - 1) // entries_per_page
-                pagination = f"Page {page} of {total_pages} (Total prompts: {total_prompts})"
-                return results, pagination, total_pages
-            except sqlite3.Error as e:
-                return f"<p>Error fetching prompts: {e}</p>", "Error", 0
-        def update_page(page, entries_per_page):
-            results, pagination, total_pages = view_database(page, entries_per_page)
-            next_disabled = page >= total_pages
-            prev_disabled = page <= 1
-            return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(
-                interactive=not prev_disabled)
-        def go_to_next_page(current_page, entries_per_page):
-            next_page = current_page + 1
-            return update_page(next_page, entries_per_page)
-        def go_to_previous_page(current_page, entries_per_page):
-            previous_page = max(1, current_page - 1)
-            return update_page(previous_page, entries_per_page)
-        view_button.click(
-            fn=update_page,
-            inputs=[page_number, entries_per_page],
-            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
-        )
-        next_page_button.click(
-            fn=go_to_next_page,
-            inputs=[page_number, entries_per_page],
-            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
-        )
-        previous_page_button.click(
-            fn=go_to_previous_page,
-            inputs=[page_number, entries_per_page],
-            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
-        )
 def create_prompt_search_tab():
     with gr.TabItem("Search Prompts"):
         gr.Markdown("# Search and View Prompt Details")
@@ -402,3 +315,7 @@ def create_prompt_search_tab():
             inputs=[search_query_input, page_number, entries_per_page],
             outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
         )

 import gradio as gr
 #
 # Local Imports
+from App_Function_Libraries.DB.DB_Manager import view_database, search_and_display_items, get_all_document_versions, \
+    fetch_item_details_single, fetch_paginated_data, fetch_item_details, get_latest_transcription
+from App_Function_Libraries.DB.SQLite_DB import search_prompts, get_document_version
 from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_detailed_view
+from App_Function_Libraries.Utils.Utils import get_database_path, format_text_with_line_breaks
 #
 ###################################################################################################
 #
 logger = logging.getLogger()
+def update_detailed_view_with_versions(selected_item, item_mapping):
+    if selected_item and item_mapping and selected_item in item_mapping:
+        media_id = item_mapping[selected_item]
+        prompt, summary, transcription = fetch_item_details(media_id)
+        # Fetch all versions for the media item
+        versions = get_all_document_versions(media_id)
+        version_choices = [f"Version {v['version_number']} ({v['created_at']})" for v in versions]
+        summary_html = format_as_html(summary, "Summary")
+        transcription_html = format_as_html(transcription, "Transcription")
+        return prompt, summary_html, transcription_html, gr.update(choices=version_choices, visible=True)
+    return "", "", "", gr.update(choices=[], visible=False)
+def extract_prompt_and_summary(content: str):
+    # Implement this function based on how prompt and summary are stored in your DocumentVersions content
+    # This is a placeholder implementation
+    parts = content.split('\n\n', 2)
+    prompt = parts[0] if len(parts) > 0 else "No prompt available."
+    summary = parts[1] if len(parts) > 1 else "No summary available."
+    return prompt, summary
+def update_content_for_version(selected_item, item_mapping, selected_version):
+    if selected_item and item_mapping and selected_item in item_mapping:
+        media_id = item_mapping[selected_item]
+        version_number = int(selected_version.split()[1].split('(')[0])
+        version_data = get_document_version(media_id, version_number)
+        if 'error' not in version_data:
+            content = version_data['content']
+            prompt, summary = extract_prompt_and_summary(content)
+            transcription = get_latest_transcription(media_id)
+            summary_html = format_as_html(summary, "Summary")
+            transcription_html = format_as_html(transcription, "Transcription")
+            return prompt, summary_html, transcription_html
+    return "", "", ""
+def format_as_html(content, title):
+    escaped_content = html.escape(content)
+    formatted_content = escaped_content.replace('\n', '<br>')
+    return f"""
+    <div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 10px;">
+        <h3>{title}</h3>
+        <div style="max-height: 300px; overflow-y: auto;">
+            {formatted_content}
+        </div>
+    </div>
+    """
 def create_search_tab():
     with gr.TabItem("Search / Detailed View"):
         with gr.Row():
+            with gr.Column(scale=1):
                 gr.Markdown("# Search across all ingested items in the Database")
+                gr.Markdown("by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
                 search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
+                search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title",
+                                             label="Search By")
                 search_button = gr.Button("Search")
                 items_output = gr.Dropdown(label="Select Item", choices=[])
                 item_mapping = gr.State({})
+                version_dropdown = gr.Dropdown(label="Select Version", choices=[], visible=False)
                 search_button.click(
                     fn=update_dropdown,
                     inputs=[search_query_input, search_type_input],
                     outputs=[items_output, item_mapping]
                 )
+            with gr.Column(scale=2):
+                prompt_output = gr.Textbox(label="Prompt Used", visible=True)
+                summary_output = gr.Markdown(label="Summary", visible=True)
+                transcription_output = gr.Markdown(label="Transcription", visible=True)
                 items_output.change(
+                    fn=update_detailed_view_with_versions,
                     inputs=[items_output, item_mapping],
+                    outputs=[prompt_output, summary_output, transcription_output, version_dropdown]
+                )
+                version_dropdown.change(
+                    fn=update_content_for_version,
+                    inputs=[items_output, item_mapping, version_dropdown],
+                    outputs=[prompt_output, summary_output, transcription_output]
                 )
     return "No results found."
 def create_search_summaries_tab():
     with gr.TabItem("Search/View Title+Summary "):
         gr.Markdown("# Search across all ingested items in the Database and review their summaries")
         )
 def create_prompt_search_tab():
     with gr.TabItem("Search Prompts"):
         gr.Markdown("# Search and View Prompt Details")
             inputs=[search_query_input, page_number, entries_per_page],
             outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
         )

App_Function_Libraries/Gradio_UI/Video_transcription_tab.py CHANGED Viewed

@@ -5,13 +5,16 @@
 import json
 import logging
 import os
 #
 # External Imports
 import gradio as gr
 import yt_dlp
 #
 # Local Imports
-from App_Function_Libraries.DB.DB_Manager import load_preset_prompts, add_media_to_database
 from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
 from App_Function_Libraries.Gradio_UI.Gradio_Shared import error_handler
 from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_transcription, perform_summarization, \
@@ -26,7 +29,7 @@ from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
 # Functions:
 def create_video_transcription_tab():
-    with (gr.TabItem("Video Transcription + Summarization")):
         gr.Markdown("# Transcribe & Summarize Videos from URLs")
         with gr.Row():
             gr.Markdown("""Follow this project at [tldw - GitHub](https://github.com/rmusser01/tldw)""")
@@ -124,6 +127,7 @@ def create_video_transcription_tab():
                 use_cookies_input = gr.Checkbox(label="Use cookies for authenticated download", value=False)
                 use_time_input = gr.Checkbox(label="Use Start and End Time", value=False)
                 confab_checkbox = gr.Checkbox(label="Perform Confabulation Check of Summary", value=False)
                 with gr.Row(visible=False) as time_input_box:
                     gr.Markdown("### Start and End time")
                     with gr.Column():
@@ -187,9 +191,10 @@ def create_video_transcription_tab():
                                                    chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
                                                    use_multi_level_chunking, chunk_language, api_name,
                                                    api_key, keywords, use_cookies, cookies, batch_size,
-                                                   timestamp_option, keep_original_video, summarize_recursively,
                                                    progress: gr.Progress = gr.Progress()) -> tuple:
                 try:
                     logging.info("Entering process_videos_with_error_handling")
                     logging.info(f"Received inputs: {inputs}")
@@ -292,6 +297,7 @@ def create_video_transcription_tab():
                                     """)
                                 logging.debug("Gradio_Related.py: process_url_with_metadata being called")
                                 result = process_url_with_metadata(
                                     input_item, 2, whisper_model,
                                     custom_prompt,
@@ -304,6 +310,7 @@ def create_video_transcription_tab():
                                     chunk_options=chunk_options,
                                     keep_original_video=keep_original_video,
                                     current_whisper_model=whisper_model,
                                 )
                                 if result[0] is None:
@@ -424,7 +431,7 @@ def create_video_transcription_tab():
                                        chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
                                        use_multi_level_chunking, chunk_language, summarize_recursively, api_name,
                                        api_key, keywords, use_cookies, cookies, batch_size,
-                                       timestamp_option, keep_original_video, confab_checkbox):
                 global result
                 try:
                     logging.info("process_videos_wrapper(): process_videos_wrapper called")
@@ -459,7 +466,7 @@ def create_video_transcription_tab():
                         chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
                         use_multi_level_chunking, chunk_language, api_name,
                         api_key, keywords, use_cookies, cookies, batch_size,
-                        timestamp_option, keep_original_video, summarize_recursively
                     )
                     confabulation_result = None
@@ -496,7 +503,7 @@ def create_video_transcription_tab():
                                           rolling_summarization,
                                           detail_level, question_box, keywords, local_file_path, diarize, end_time=None,
                                           include_timestamps=True, metadata=None, use_chunking=False,
-                                          chunk_options=None, keep_original_video=False, current_whisper_model="Blank"):
                 try:
                     logging.info(f"Starting process_url_metadata for URL: {input_item}")
@@ -559,27 +566,69 @@ def create_video_transcription_tab():
                             logging.error("Failed to extract video information")
                             return None, None, None, None, None, None
                         # Download video/audio
                         logging.info("Downloading video/audio...")
                         video_file_path = download_video(input_item, download_path, full_info, download_video_flag,
                                                          current_whisper_model=current_whisper_model)
                         if video_file_path is None:
                             logging.info(
-                                f"Download skipped for {input_item}. Media might already exist or be processed.")
                             return input_item, None, None, None, None, info_dict
-                    logging.info(f"Processing file: {video_file_path}")
                     # Perform transcription
-                    logging.info("Starting transcription...")
                     audio_file_path, segments = perform_transcription(video_file_path, offset, whisper_model,
                                                                       vad_filter, diarize)
                     if audio_file_path is None or segments is None:
-                        logging.error("Transcription failed or segments not available.")
                         return None, None, None, None, None, None
-                    logging.info(f"Transcription completed. Number of segments: {len(segments)}")
                     # Add metadata to segments
                     segments_with_metadata = {
@@ -598,9 +647,9 @@ def create_video_transcription_tab():
                         if file_path and os.path.exists(file_path):
                             try:
                                 os.remove(file_path)
-                                logging.info(f"Successfully deleted file: {file_path}")
                             except Exception as e:
-                                logging.warning(f"Failed to delete file {file_path}: {str(e)}")
                     # Delete the mp4 file after successful transcription if not keeping original audio
                     # Modify the file deletion logic to respect keep_original_video
@@ -610,12 +659,12 @@ def create_video_transcription_tab():
                             if file_path and os.path.exists(file_path):
                                 try:
                                     os.remove(file_path)
-                                    logging.info(f"Successfully deleted file: {file_path}")
                                 except Exception as e:
-                                    logging.warning(f"Failed to delete file {file_path}: {str(e)}")
                     else:
-                        logging.info(f"Keeping original video file: {video_file_path}")
-                        logging.info(f"Keeping original audio file: {audio_file_path}")
                     # Process segments based on the timestamp option
                     if not include_timestamps:
@@ -627,34 +676,34 @@ def create_video_transcription_tab():
                     transcription_text = extract_text_from_segments(segments)
                     if transcription_text.startswith("Error:"):
-                        logging.error(f"Failed to extract transcription: {transcription_text}")
                         return None, None, None, None, None, None
                     # Use transcription_text instead of segments for further processing
                     full_text_with_metadata = f"{json.dumps(info_dict, indent=2)}\n\n{transcription_text}"
-                    logging.debug(f"Full text with metadata extracted: {full_text_with_metadata[:100]}...")
                     # Perform summarization if API is provided
                     summary_text = None
                     if api_name:
                         # API key resolution handled at base of function if none provided
                         api_key = api_key if api_key else None
-                        logging.info(f"Starting summarization with {api_name}...")
                         summary_text = perform_summarization(api_name, full_text_with_metadata, custom_prompt, api_key)
                         if summary_text is None:
                             logging.error("Summarization failed.")
                             return None, None, None, None, None, None
-                        logging.debug(f"Summarization completed: {summary_text[:100]}...")
                     # Save transcription and summary
-                    logging.info("Saving transcription and summary...")
                     download_path = create_download_directory("Audio_Processing")
                     json_file_path, summary_file_path = save_transcription_and_summary(full_text_with_metadata,
                                                                                        summary_text,
                                                                                        download_path, info_dict)
-                    logging.info(f"Transcription saved to: {json_file_path}")
-                    logging.info(f"Summary saved to: {summary_file_path}")
                     # Prepare keywords for database
                     if isinstance(keywords, str):
@@ -663,13 +712,22 @@ def create_video_transcription_tab():
                         keywords_list = keywords
                     else:
                         keywords_list = []
-                    logging.info(f"Keywords prepared: {keywords_list}")
-                    # Add to database
-                    logging.info("Adding to database...")
-                    add_media_to_database(info_dict['webpage_url'], info_dict, full_text_with_metadata, summary_text,
-                                          keywords_list, custom_prompt, whisper_model)
-                    logging.info(f"Media added to database: {info_dict['webpage_url']}")
                     return info_dict[
                         'webpage_url'], full_text_with_metadata, summary_text, json_file_path, summary_file_path, info_dict
@@ -694,7 +752,7 @@ def create_video_transcription_tab():
                     chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
                     use_multi_level_chunking, chunk_language, summarize_recursively, api_name_input, api_key_input,
                     keywords_input, use_cookies_input, cookies_input, batch_size_input,
-                    timestamp_option, keep_original_video, confab_checkbox
                 ],
                 outputs=[progress_output, error_output, results_output, download_transcription, download_summary, confabulation_output]
             )

 import json
 import logging
 import os
+from typing import Dict, Any
 #
 # External Imports
 import gradio as gr
 import yt_dlp
 #
 # Local Imports
+from App_Function_Libraries.DB.DB_Manager import load_preset_prompts, add_media_to_database, \
+    check_media_and_whisper_model, check_existing_media, update_media_content_with_version
 from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
 from App_Function_Libraries.Gradio_UI.Gradio_Shared import error_handler
 from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_transcription, perform_summarization, \
 # Functions:
 def create_video_transcription_tab():
+    with ((gr.TabItem("Video Transcription + Summarization"))):
         gr.Markdown("# Transcribe & Summarize Videos from URLs")
         with gr.Row():
             gr.Markdown("""Follow this project at [tldw - GitHub](https://github.com/rmusser01/tldw)""")
                 use_cookies_input = gr.Checkbox(label="Use cookies for authenticated download", value=False)
                 use_time_input = gr.Checkbox(label="Use Start and End Time", value=False)
                 confab_checkbox = gr.Checkbox(label="Perform Confabulation Check of Summary", value=False)
+                overwrite_checkbox = gr.Checkbox(label="Overwrite Existing Media", value=False)
                 with gr.Row(visible=False) as time_input_box:
                     gr.Markdown("### Start and End time")
                     with gr.Column():
                                                    chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
                                                    use_multi_level_chunking, chunk_language, api_name,
                                                    api_key, keywords, use_cookies, cookies, batch_size,
+                                                   timestamp_option, keep_original_video, summarize_recursively, overwrite_existing=False,
                                                    progress: gr.Progress = gr.Progress()) -> tuple:
                 try:
+                    # FIXME - summarize_recursively is not being used...
                     logging.info("Entering process_videos_with_error_handling")
                     logging.info(f"Received inputs: {inputs}")
                                     """)
                                 logging.debug("Gradio_Related.py: process_url_with_metadata being called")
+                                # FIXME - Would assume this is where the multi-processing for recursive summarization would occur
                                 result = process_url_with_metadata(
                                     input_item, 2, whisper_model,
                                     custom_prompt,
                                     chunk_options=chunk_options,
                                     keep_original_video=keep_original_video,
                                     current_whisper_model=whisper_model,
+                                    overwrite_existing=overwrite_existing
                                 )
                                 if result[0] is None:
                                        chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
                                        use_multi_level_chunking, chunk_language, summarize_recursively, api_name,
                                        api_key, keywords, use_cookies, cookies, batch_size,
+                                       timestamp_option, keep_original_video, confab_checkbox, overwrite_existing=False):
                 global result
                 try:
                     logging.info("process_videos_wrapper(): process_videos_wrapper called")
                         chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
                         use_multi_level_chunking, chunk_language, api_name,
                         api_key, keywords, use_cookies, cookies, batch_size,
+                        timestamp_option, keep_original_video, summarize_recursively, overwrite_existing
                     )
                     confabulation_result = None
                                           rolling_summarization,
                                           detail_level, question_box, keywords, local_file_path, diarize, end_time=None,
                                           include_timestamps=True, metadata=None, use_chunking=False,
+                                          chunk_options=None, keep_original_video=False, current_whisper_model="Blank", overwrite_existing=False):
                 try:
                     logging.info(f"Starting process_url_metadata for URL: {input_item}")
                             logging.error("Failed to extract video information")
                             return None, None, None, None, None, None
+                        # FIXME - MAKE SURE THIS WORKS WITH LOCAL FILES
+                        # FIXME - Add a toggle to force processing even if media exists
+                        # Check if media already exists in the database
+                        logging.info("Checking if media already exists in the database...")
+                        media_exists, reason = check_media_and_whisper_model(
+                            title=info_dict.get('title'),
+                            url=info_dict.get('webpage_url'),
+                            current_whisper_model=current_whisper_model
+                        )
+                        if not media_exists:
+                            logging.info(
+                                f"process_url_with_metadata: Media does not exist in the database. Reason: {reason}")
+                        else:
+                            if "same whisper model" in reason:
+                                logging.info(
+                                    f"process_url_with_metadata: Skipping download and processing as media exists and uses the same Whisper model. Reason: {reason}")
+                                return input_item, None, None, None, None, info_dict
+                            else:
+                                logging.info(
+                                    f"process_url_with_metadata: Media found, but with a different Whisper model. Reason: {reason}")
                         # Download video/audio
                         logging.info("Downloading video/audio...")
                         video_file_path = download_video(input_item, download_path, full_info, download_video_flag,
                                                          current_whisper_model=current_whisper_model)
                         if video_file_path is None:
                             logging.info(
+                                f"process_url_with_metadata: Download skipped for {input_item}. Media might already exist or be processed.")
+                            return input_item, None, None, None, None, info_dict
+                    # FIXME - add check for existing media with different whisper model for local files
+                    # FIXME Check to make sure this works
+                    media_exists, reason = check_media_and_whisper_model(
+                        title=info_dict.get('title'),
+                        url=info_dict.get('webpage_url'),
+                        current_whisper_model=current_whisper_model
+                    )
+                    if not media_exists:
+                        logging.info(
+                            f"process_url_with_metadata: Media does not exist in the database. Reason: {reason}")
+                    else:
+                        if "same whisper model" in reason:
+                            logging.info(
+                                f"process_url_with_metadata: Skipping download and processing as media exists and uses the same Whisper model. Reason: {reason}")
                             return input_item, None, None, None, None, info_dict
+                        else:
+                            same_whisper_model = True
+                            logging.info(
+                                f"process_url_with_metadata: Media found, but with a different Whisper model. Reason: {reason}")
+                    logging.info(f"process_url_with_metadata: Processing file: {video_file_path}")
                     # Perform transcription
+                    logging.info("process_url_with_metadata: Starting transcription...")
                     audio_file_path, segments = perform_transcription(video_file_path, offset, whisper_model,
                                                                       vad_filter, diarize)
                     if audio_file_path is None or segments is None:
+                        logging.error("process_url_with_metadata: Transcription failed or segments not available.")
                         return None, None, None, None, None, None
+                    logging.info(f"process_url_with_metadata: Transcription completed. Number of segments: {len(segments)}")
                     # Add metadata to segments
                     segments_with_metadata = {
                         if file_path and os.path.exists(file_path):
                             try:
                                 os.remove(file_path)
+                                logging.info(f"process_url_with_metadata: Successfully deleted file: {file_path}")
                             except Exception as e:
+                                logging.warning(f"process_url_with_metadata: Failed to delete file {file_path}: {str(e)}")
                     # Delete the mp4 file after successful transcription if not keeping original audio
                     # Modify the file deletion logic to respect keep_original_video
                             if file_path and os.path.exists(file_path):
                                 try:
                                     os.remove(file_path)
+                                    logging.info(f"process_url_with_metadata: Successfully deleted file: {file_path}")
                                 except Exception as e:
+                                    logging.warning(f"process_url_with_metadata: Failed to delete file {file_path}: {str(e)}")
                     else:
+                        logging.info(f"process_url_with_metadata: Keeping original video file: {video_file_path}")
+                        logging.info(f"process_url_with_metadata: Keeping original audio file: {audio_file_path}")
                     # Process segments based on the timestamp option
                     if not include_timestamps:
                     transcription_text = extract_text_from_segments(segments)
                     if transcription_text.startswith("Error:"):
+                        logging.error(f"process_url_with_metadata: Failed to extract transcription: {transcription_text}")
                         return None, None, None, None, None, None
                     # Use transcription_text instead of segments for further processing
                     full_text_with_metadata = f"{json.dumps(info_dict, indent=2)}\n\n{transcription_text}"
+                    logging.debug(f"process_url_with_metadata: Full text with metadata extracted: {full_text_with_metadata[:100]}...")
                     # Perform summarization if API is provided
                     summary_text = None
                     if api_name:
                         # API key resolution handled at base of function if none provided
                         api_key = api_key if api_key else None
+                        logging.info(f"process_url_with_metadata: Starting summarization with {api_name}...")
                         summary_text = perform_summarization(api_name, full_text_with_metadata, custom_prompt, api_key)
                         if summary_text is None:
                             logging.error("Summarization failed.")
                             return None, None, None, None, None, None
+                        logging.debug(f"process_url_with_metadata: Summarization completed: {summary_text[:100]}...")
                     # Save transcription and summary
+                    logging.info("process_url_with_metadata: Saving transcription and summary...")
                     download_path = create_download_directory("Audio_Processing")
                     json_file_path, summary_file_path = save_transcription_and_summary(full_text_with_metadata,
                                                                                        summary_text,
                                                                                        download_path, info_dict)
+                    logging.info(f"process_url_with_metadata: Transcription saved to: {json_file_path}")
+                    logging.info(f"process_url_with_metadata: Summary saved to: {summary_file_path}")
                     # Prepare keywords for database
                     if isinstance(keywords, str):
                         keywords_list = keywords
                     else:
                         keywords_list = []
+                    logging.info(f"process_url_with_metadata: Keywords prepared: {keywords_list}")
+                    existing_media = check_existing_media(info_dict['webpage_url'])
+                    if existing_media:
+                        # Update existing media with new version
+                        media_id = existing_media['id']
+                        update_result = update_media_content_with_version(media_id, info_dict, full_text_with_metadata,
+                                                                          custom_prompt, summary_text, whisper_model)
+                        logging.info(f"process_url_with_metadata: {update_result}")
+                    else:
+                        # Add new media to database
+                        add_result = add_media_to_database(info_dict['webpage_url'], info_dict, full_text_with_metadata,
+                                                           summary_text,
+                                                           keywords_list, custom_prompt, whisper_model)
+                        logging.info(f"process_url_with_metadata: {add_result}")
                     return info_dict[
                         'webpage_url'], full_text_with_metadata, summary_text, json_file_path, summary_file_path, info_dict
                     chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
                     use_multi_level_chunking, chunk_language, summarize_recursively, api_name_input, api_key_input,
                     keywords_input, use_cookies_input, cookies_input, batch_size_input,
+                    timestamp_option, keep_original_video, confab_checkbox, overwrite_checkbox
                 ],
                 outputs=[progress_output, error_output, results_output, download_transcription, download_summary, confabulation_output]
             )

App_Function_Libraries/Gradio_UI/View_DB_Items_tab.py ADDED Viewed

	@@ -0,0 +1,290 @@

+# View_DB_Items_tab.py
+# Description: This file contains the code for the search tab in the Gradio UI
+#
+# Imports
+import html
+import sqlite3
+#
+# External Imports
+import gradio as gr
+#
+# Local Imports
+from App_Function_Libraries.DB.DB_Manager import view_database, get_all_document_versions, \
+    fetch_item_details_single, fetch_paginated_data
+from App_Function_Libraries.DB.SQLite_DB import get_document_version
+from App_Function_Libraries.Utils.Utils import get_database_path, format_text_with_line_breaks
+#
+#
+####################################################################################################
+#
+# Functions
+def create_prompt_view_tab():
+    with gr.TabItem("View Prompt Database"):
+        gr.Markdown("# View Prompt Database Entries")
+        with gr.Row():
+            with gr.Column():
+                entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
+                page_number = gr.Number(value=1, label="Page Number", precision=0)
+                view_button = gr.Button("View Page")
+                next_page_button = gr.Button("Next Page")
+                previous_page_button = gr.Button("Previous Page")
+                pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
+            with gr.Column():
+                results_display = gr.HTML()
+        # FIXME - SQL functions to be moved to DB_Manager
+        def view_database(page, entries_per_page):
+            offset = (page - 1) * entries_per_page
+            try:
+                with sqlite3.connect(get_database_path('prompts.db')) as conn:
+                    cursor = conn.cursor()
+                    cursor.execute('''
+                        SELECT p.name, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
+                        FROM Prompts p
+                        LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
+                        LEFT JOIN Keywords k ON pk.keyword_id = k.id
+                        GROUP BY p.id
+                        ORDER BY p.name
+                        LIMIT ? OFFSET ?
+                    ''', (entries_per_page, offset))
+                    prompts = cursor.fetchall()
+                    cursor.execute('SELECT COUNT(*) FROM Prompts')
+                    total_prompts = cursor.fetchone()[0]
+                results = ""
+                for prompt in prompts:
+                    # Escape HTML special characters and replace newlines with <br> tags
+                    title = html.escape(prompt[0]).replace('\n', '<br>')
+                    details = html.escape(prompt[1] or '').replace('\n', '<br>')
+                    system_prompt = html.escape(prompt[2] or '')
+                    user_prompt = html.escape(prompt[3] or '')
+                    keywords = html.escape(prompt[4] or '').replace('\n', '<br>')
+                    results += f"""
+                    <div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 20px;">
+                        <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;">
+                            <div><strong>Title:</strong> {title}</div>
+                            <div><strong>Details:</strong> {details}</div>
+                        </div>
+                        <div style="margin-top: 10px;">
+                            <strong>User Prompt:</strong>
+                            <pre style="white-space: pre-wrap; word-wrap: break-word;">{user_prompt}</pre>
+                        </div>
+                        <div style="margin-top: 10px;">
+                            <strong>System Prompt:</strong>
+                            <pre style="white-space: pre-wrap; word-wrap: break-word;">{system_prompt}</pre>
+                        </div>
+                        <div style="margin-top: 10px;">
+                            <strong>Keywords:</strong> {keywords}
+                        </div>
+                    </div>
+                    """
+                total_pages = (total_prompts + entries_per_page - 1) // entries_per_page
+                pagination = f"Page {page} of {total_pages} (Total prompts: {total_prompts})"
+                return results, pagination, total_pages
+            except sqlite3.Error as e:
+                return f"<p>Error fetching prompts: {e}</p>", "Error", 0
+        def update_page(page, entries_per_page):
+            results, pagination, total_pages = view_database(page, entries_per_page)
+            next_disabled = page >= total_pages
+            prev_disabled = page <= 1
+            return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(
+                interactive=not prev_disabled)
+        def go_to_next_page(current_page, entries_per_page):
+            next_page = current_page + 1
+            return update_page(next_page, entries_per_page)
+        def go_to_previous_page(current_page, entries_per_page):
+            previous_page = max(1, current_page - 1)
+            return update_page(previous_page, entries_per_page)
+        view_button.click(
+            fn=update_page,
+            inputs=[page_number, entries_per_page],
+            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
+        )
+        next_page_button.click(
+            fn=go_to_next_page,
+            inputs=[page_number, entries_per_page],
+            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
+        )
+        previous_page_button.click(
+            fn=go_to_previous_page,
+            inputs=[page_number, entries_per_page],
+            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
+        )
+def create_view_all_with_versions_tab():
+    with gr.TabItem("View All Items"):
+        gr.Markdown("# View All Database Entries with Version Selection")
+        with gr.Row():
+            with gr.Column(scale=1):
+                entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
+                page_number = gr.Number(value=1, label="Page Number", precision=0)
+                view_button = gr.Button("View Page")
+                next_page_button = gr.Button("Next Page")
+                previous_page_button = gr.Button("Previous Page")
+            with gr.Column(scale=2):
+                items_output = gr.Dropdown(label="Select Item to View Details", choices=[])
+                version_dropdown = gr.Dropdown(label="Select Version", choices=[], visible=False)
+        with gr.Row():
+            with gr.Column(scale=1):
+                pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
+            with gr.Column(scale=2):
+                details_display = gr.HTML(label="Item Details")
+        item_mapping = gr.State({})
+        def update_page(page, entries_per_page):
+            results, total_entries = fetch_paginated_data(page, entries_per_page)
+            total_pages = (total_entries + entries_per_page - 1) // entries_per_page
+            pagination = f"Page {page} of {total_pages} (Total items: {total_entries})"
+            choices = [f"{item[1]} (ID: {item[0]})" for item in results]
+            new_item_mapping = {f"{item[1]} (ID: {item[0]})": item[0] for item in results}
+            next_disabled = page >= total_pages
+            prev_disabled = page <= 1
+            return (gr.update(choices=choices, value=None),
+                    pagination,
+                    page,
+                    gr.update(interactive=not next_disabled),
+                    gr.update(interactive=not prev_disabled),
+                    gr.update(visible=False, choices=[]),
+                    "",
+                    new_item_mapping)
+        def display_item_details(selected_item, item_mapping):
+            if selected_item and item_mapping:
+                media_id = item_mapping[selected_item]
+                prompt, summary, content = fetch_item_details_single(media_id)
+                versions = get_all_document_versions(media_id)
+                version_choices = [f"Version {v['version_number']} ({v['created_at']})" for v in versions]
+                formatted_prompt = format_text_with_line_breaks(prompt)
+                formatted_summary = format_text_with_line_breaks(summary)
+                formatted_content = format_text_with_line_breaks(content[:500])
+                details_html = f"""
+                <h3>{selected_item}</h3>
+                <strong>Prompt:</strong><br>{formatted_prompt}<br><br>
+                <strong>Summary:</strong><br>{formatted_summary}<br><br>
+                <strong>Content (first 500 characters):</strong><br>{formatted_content}...
+                """
+                return (
+                gr.update(visible=True, choices=version_choices, value=version_choices[0] if version_choices else None),
+                details_html)
+            return gr.update(visible=False, choices=[]), ""
+        def update_version_content(selected_item, item_mapping, selected_version):
+            if selected_item and item_mapping and selected_version:
+                media_id = item_mapping[selected_item]
+                version_number = int(selected_version.split()[1].split('(')[0])
+                version_data = get_document_version(media_id, version_number)
+                if 'error' not in version_data:
+                    formatted_content = format_text_with_line_breaks(version_data['content'])
+                    details_html = f"""
+                    <h3>{selected_item}</h3>
+                    <strong>Version:</strong> {version_number}<br>
+                    <strong>Created at:</strong> {version_data['created_at']}<br><br>
+                    <strong>Content:</strong><br>{formatted_content}
+                    """
+                    return details_html
+            return ""
+        view_button.click(
+            fn=update_page,
+            inputs=[page_number, entries_per_page],
+            outputs=[items_output, pagination_info, page_number, next_page_button, previous_page_button,
+                     version_dropdown, details_display, item_mapping]
+        )
+        next_page_button.click(
+            fn=lambda page, entries: update_page(page + 1, entries),
+            inputs=[page_number, entries_per_page],
+            outputs=[items_output, pagination_info, page_number, next_page_button, previous_page_button,
+                     version_dropdown, details_display, item_mapping]
+        )
+        previous_page_button.click(
+            fn=lambda page, entries: update_page(max(1, page - 1), entries),
+            inputs=[page_number, entries_per_page],
+            outputs=[items_output, pagination_info, page_number, next_page_button, previous_page_button,
+                     version_dropdown, details_display, item_mapping]
+        )
+        items_output.change(
+            fn=display_item_details,
+            inputs=[items_output, item_mapping],
+            outputs=[version_dropdown, details_display]
+        )
+        version_dropdown.change(
+            fn=update_version_content,
+            inputs=[items_output, item_mapping, version_dropdown],
+            outputs=[details_display]
+        )
+def create_viewing_tab():
+    with gr.TabItem("View Database Entries"):
+        gr.Markdown("# View Database Entries")
+        with gr.Row():
+            with gr.Column():
+                entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
+                page_number = gr.Number(value=1, label="Page Number", precision=0)
+                view_button = gr.Button("View Page")
+                next_page_button = gr.Button("Next Page")
+                previous_page_button = gr.Button("Previous Page")
+                pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
+            with gr.Column():
+                results_display = gr.HTML()
+        def update_page(page, entries_per_page):
+            results, pagination, total_pages = view_database(page, entries_per_page)
+            next_disabled = page >= total_pages
+            prev_disabled = page <= 1
+            return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(interactive=not prev_disabled)
+        def go_to_next_page(current_page, entries_per_page):
+            next_page = current_page + 1
+            return update_page(next_page, entries_per_page)
+        def go_to_previous_page(current_page, entries_per_page):
+            previous_page = max(1, current_page - 1)
+            return update_page(previous_page, entries_per_page)
+        view_button.click(
+            fn=update_page,
+            inputs=[page_number, entries_per_page],
+            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
+        )
+        next_page_button.click(
+            fn=go_to_next_page,
+            inputs=[page_number, entries_per_page],
+            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
+        )
+        previous_page_button.click(
+            fn=go_to_previous_page,
+            inputs=[page_number, entries_per_page],
+            outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
+        )
+#
+####################################################################################################