Spaces:
Sleeping
Sleeping
oceansweep
commited on
Commit
•
83c8d2b
1
Parent(s):
6ed8460
Upload 29 files
Browse files- App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py +1 -1
- App_Function_Libraries/Gradio_UI/Backup_Functionality.py +71 -0
- App_Function_Libraries/Gradio_UI/Chat_Workflows.py +189 -0
- App_Function_Libraries/Gradio_UI/Chat_ui.py +41 -141
- App_Function_Libraries/Gradio_UI/Evaluations_Benchmarks_tab.py +60 -0
- App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py +3 -2
- App_Function_Libraries/Gradio_UI/Export_Functionality.py +1 -49
- App_Function_Libraries/Gradio_UI/Import_Functionality.py +1 -1
- App_Function_Libraries/Gradio_UI/Media_edit.py +33 -7
- App_Function_Libraries/Gradio_UI/Media_wiki_tab.py +202 -4
- App_Function_Libraries/Gradio_UI/Podcast_tab.py +1 -1
- App_Function_Libraries/Gradio_UI/Search_Tab.py +404 -426
- App_Function_Libraries/Gradio_UI/Trash.py +98 -93
- App_Function_Libraries/Gradio_UI/Video_transcription_tab.py +4 -5
- App_Function_Libraries/Gradio_UI/View_tab.py +170 -0
- App_Function_Libraries/Gradio_UI/Website_scraping_tab.py +504 -77
App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py
CHANGED
@@ -104,7 +104,7 @@ def create_audio_processing_tab():
|
|
104 |
|
105 |
api_name_input = gr.Dropdown(
|
106 |
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
|
107 |
-
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace"],
|
108 |
value=None,
|
109 |
label="API for Summarization (Optional)"
|
110 |
)
|
|
|
104 |
|
105 |
api_name_input = gr.Dropdown(
|
106 |
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
|
107 |
+
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace", "Custom-OpenAI-API"],
|
108 |
value=None,
|
109 |
label="API for Summarization (Optional)"
|
110 |
)
|
App_Function_Libraries/Gradio_UI/Backup_Functionality.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Backup_Functionality.py
|
2 |
+
# Functionality for exporting items as markdown files
|
3 |
+
#
|
4 |
+
# Imports:
|
5 |
+
import os
|
6 |
+
import shutil
|
7 |
+
import gradio as gr
|
8 |
+
#
|
9 |
+
# Local Imports:
|
10 |
+
from App_Function_Libraries.DB.DB_Manager import create_automated_backup, db_path, backup_dir
|
11 |
+
#
|
12 |
+
# End of Imports
|
13 |
+
#######################################################################################################################
|
14 |
+
#
|
15 |
+
# Functions:
|
16 |
+
|
17 |
+
def create_backup():
|
18 |
+
backup_file = create_automated_backup(db_path, backup_dir)
|
19 |
+
return f"Backup created: {backup_file}"
|
20 |
+
|
21 |
+
|
22 |
+
def list_backups():
|
23 |
+
backups = [f for f in os.listdir(backup_dir) if f.endswith('.db')]
|
24 |
+
return "\n".join(backups)
|
25 |
+
|
26 |
+
|
27 |
+
def restore_backup(backup_name: str) -> str:
|
28 |
+
backup_path_location: str = os.path.join(str(backup_dir), backup_name)
|
29 |
+
if os.path.exists(backup_path_location):
|
30 |
+
shutil.copy2(str(backup_path_location), str(db_path))
|
31 |
+
return f"Database restored from {backup_name}"
|
32 |
+
else:
|
33 |
+
return "Backup file not found"
|
34 |
+
|
35 |
+
|
36 |
+
def create_backup_tab():
|
37 |
+
with gr.Tab("Create Backup"):
|
38 |
+
gr.Markdown("# Create a backup of the database")
|
39 |
+
gr.Markdown("This will create a backup of the database in the backup directory(the default backup directory is `/tldw_DB_Backups/')")
|
40 |
+
with gr.Row():
|
41 |
+
with gr.Column():
|
42 |
+
create_button = gr.Button("Create Backup")
|
43 |
+
create_output = gr.Textbox(label="Result")
|
44 |
+
with gr.Column():
|
45 |
+
create_button.click(create_backup, inputs=[], outputs=create_output)
|
46 |
+
|
47 |
+
|
48 |
+
def create_view_backups_tab():
|
49 |
+
with gr.TabItem("View Backups"):
|
50 |
+
gr.Markdown("# Browse available backups")
|
51 |
+
with gr.Row():
|
52 |
+
with gr.Column():
|
53 |
+
view_button = gr.Button("View Backups")
|
54 |
+
with gr.Column():
|
55 |
+
backup_list = gr.Textbox(label="Available Backups")
|
56 |
+
view_button.click(list_backups, inputs=[], outputs=backup_list)
|
57 |
+
|
58 |
+
|
59 |
+
def create_restore_backup_tab():
|
60 |
+
with gr.TabItem("Restore Backup"):
|
61 |
+
gr.Markdown("# Restore a backup of the database")
|
62 |
+
with gr.Column():
|
63 |
+
backup_input = gr.Textbox(label="Backup Filename")
|
64 |
+
restore_button = gr.Button("Restore")
|
65 |
+
with gr.Column():
|
66 |
+
restore_output = gr.Textbox(label="Result")
|
67 |
+
restore_button.click(restore_backup, inputs=[backup_input], outputs=restore_output)
|
68 |
+
|
69 |
+
#
|
70 |
+
# End of Functions
|
71 |
+
#######################################################################################################################
|
App_Function_Libraries/Gradio_UI/Chat_Workflows.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Chat_Workflows.py
|
2 |
+
# Description: UI for Chat Workflows
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
import json
|
6 |
+
import logging
|
7 |
+
from pathlib import Path
|
8 |
+
#
|
9 |
+
# External Imports
|
10 |
+
import gradio as gr
|
11 |
+
#
|
12 |
+
from App_Function_Libraries.Gradio_UI.Chat_ui import process_with_llm
|
13 |
+
#
|
14 |
+
############################################################################################################
|
15 |
+
#
|
16 |
+
# Functions:
|
17 |
+
|
18 |
+
# Load workflows from a JSON file
|
19 |
+
json_path = Path('./Helper_Scripts/Workflows/Workflows.json')
|
20 |
+
with json_path.open('r') as f:
|
21 |
+
workflows = json.load(f)
|
22 |
+
|
23 |
+
|
24 |
+
# FIXME - broken Completely. Doesn't work.
|
25 |
+
def chat_workflows_tab():
|
26 |
+
with gr.TabItem("Chat Workflows"):
|
27 |
+
gr.Markdown("# Workflows using LLMs")
|
28 |
+
|
29 |
+
with gr.Row():
|
30 |
+
workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
|
31 |
+
api_selector = gr.Dropdown(
|
32 |
+
label="Select API Endpoint",
|
33 |
+
choices=["OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
|
34 |
+
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
|
35 |
+
value="OpenAI"
|
36 |
+
)
|
37 |
+
api_key_input = gr.Textbox(label="API Key (optional)", type="password")
|
38 |
+
|
39 |
+
context_input = gr.Textbox(label="Initial Context (optional)", lines=5)
|
40 |
+
|
41 |
+
# Create a container for dynamic components
|
42 |
+
with gr.Column() as dynamic_components:
|
43 |
+
prompt_displays = []
|
44 |
+
user_inputs = []
|
45 |
+
output_boxes = []
|
46 |
+
process_buttons = []
|
47 |
+
regenerate_buttons = []
|
48 |
+
|
49 |
+
# Create the maximum number of components needed
|
50 |
+
max_steps = max(len(wf['prompts']) for wf in workflows)
|
51 |
+
for i in range(max_steps):
|
52 |
+
prompt_displays.append(gr.Markdown(visible=False))
|
53 |
+
user_inputs.append(gr.Textbox(label=f"Your Input", lines=2, visible=False))
|
54 |
+
output_boxes.append(gr.Textbox(label=f"AI Output", lines=5, visible=False))
|
55 |
+
with gr.Row():
|
56 |
+
process_buttons.append(gr.Button(f"Process Step {i + 1}", visible=False))
|
57 |
+
regenerate_buttons.append(gr.Button(f"🔄 Regenerate", visible=False))
|
58 |
+
|
59 |
+
def update_workflow_ui(workflow_name):
|
60 |
+
selected_workflow = next(wf for wf in workflows if wf['name'] == workflow_name)
|
61 |
+
num_prompts = len(selected_workflow['prompts'])
|
62 |
+
|
63 |
+
prompt_updates = []
|
64 |
+
input_updates = []
|
65 |
+
output_updates = []
|
66 |
+
button_updates = []
|
67 |
+
regenerate_updates = []
|
68 |
+
|
69 |
+
for i in range(max_steps):
|
70 |
+
if i < num_prompts:
|
71 |
+
prompt_updates.append(
|
72 |
+
gr.update(value=f"**Step {i + 1}:** {selected_workflow['prompts'][i]}", visible=True))
|
73 |
+
input_updates.append(gr.update(value="", visible=True, interactive=(i == 0)))
|
74 |
+
output_updates.append(gr.update(value="", visible=True))
|
75 |
+
button_updates.append(gr.update(visible=(i == 0)))
|
76 |
+
regenerate_updates.append(gr.update(visible=False))
|
77 |
+
else:
|
78 |
+
prompt_updates.append(gr.update(visible=False))
|
79 |
+
input_updates.append(gr.update(visible=False))
|
80 |
+
output_updates.append(gr.update(visible=False))
|
81 |
+
button_updates.append(gr.update(visible=False))
|
82 |
+
regenerate_updates.append(gr.update(visible=False))
|
83 |
+
|
84 |
+
return prompt_updates + input_updates + output_updates + button_updates + regenerate_updates
|
85 |
+
|
86 |
+
def process(context, workflow_name, api_endpoint, api_key, step, *user_inputs):
|
87 |
+
try:
|
88 |
+
selected_workflow = next(wf for wf in workflows if wf['name'] == workflow_name)
|
89 |
+
except StopIteration:
|
90 |
+
# Handle the case where no matching workflow is found
|
91 |
+
error_message = f"No workflow found with name: {workflow_name}"
|
92 |
+
logging.error(error_message)
|
93 |
+
return [gr.update(value=error_message)] * (
|
94 |
+
len(prompt_displays) + len(user_inputs) + len(output_boxes) + len(process_buttons) + len(
|
95 |
+
regenerate_buttons))
|
96 |
+
|
97 |
+
# Ensure we don't go out of bounds
|
98 |
+
if step >= len(selected_workflow['prompts']):
|
99 |
+
error_message = f"Step {step} is out of range for workflow: {workflow_name}"
|
100 |
+
logging.error(error_message)
|
101 |
+
return [gr.update(value=error_message)] * (
|
102 |
+
len(prompt_displays) + len(user_inputs) + len(output_boxes) + len(process_buttons) + len(
|
103 |
+
regenerate_buttons))
|
104 |
+
|
105 |
+
# Build up the context from previous steps
|
106 |
+
full_context = context + "\n\n"
|
107 |
+
for i in range(step + 1):
|
108 |
+
full_context += f"Question: {selected_workflow['prompts'][i]}\n"
|
109 |
+
full_context += f"Answer: {user_inputs[i]}\n"
|
110 |
+
if i < step:
|
111 |
+
full_context += f"AI Output: {output_boxes[i].value}\n\n"
|
112 |
+
|
113 |
+
try:
|
114 |
+
result = process_with_llm(workflow_name, full_context, selected_workflow['prompts'][step], api_endpoint,
|
115 |
+
api_key)
|
116 |
+
except Exception as e:
|
117 |
+
error_message = f"Error processing with LLM: {str(e)}"
|
118 |
+
logging.error(error_message)
|
119 |
+
result = error_message
|
120 |
+
|
121 |
+
updates = []
|
122 |
+
for i in range(max_steps):
|
123 |
+
if i == step:
|
124 |
+
updates.extend([
|
125 |
+
gr.update(), # Markdown (prompt_displays)
|
126 |
+
gr.update(interactive=False), # Textbox (user_inputs)
|
127 |
+
gr.update(value=result), # Textbox (output_boxes)
|
128 |
+
gr.update(visible=False), # Button (process_buttons)
|
129 |
+
gr.update(visible=True) # Button (regenerate_buttons)
|
130 |
+
])
|
131 |
+
elif i == step + 1:
|
132 |
+
updates.extend([
|
133 |
+
gr.update(), # Markdown (prompt_displays)
|
134 |
+
gr.update(interactive=True), # Textbox (user_inputs)
|
135 |
+
gr.update(), # Textbox (output_boxes)
|
136 |
+
gr.update(visible=True), # Button (process_buttons)
|
137 |
+
gr.update(visible=False) # Button (regenerate_buttons)
|
138 |
+
])
|
139 |
+
elif i > step + 1:
|
140 |
+
updates.extend([
|
141 |
+
gr.update(), # Markdown (prompt_displays)
|
142 |
+
gr.update(interactive=False), # Textbox (user_inputs)
|
143 |
+
gr.update(), # Textbox (output_boxes)
|
144 |
+
gr.update(visible=False), # Button (process_buttons)
|
145 |
+
gr.update(visible=False) # Button (regenerate_buttons)
|
146 |
+
])
|
147 |
+
else:
|
148 |
+
updates.extend([
|
149 |
+
gr.update(), # Markdown (prompt_displays)
|
150 |
+
gr.update(interactive=False), # Textbox (user_inputs)
|
151 |
+
gr.update(), # Textbox (output_boxes)
|
152 |
+
gr.update(visible=False), # Button (process_buttons)
|
153 |
+
gr.update(visible=True) # Button (regenerate_buttons)
|
154 |
+
])
|
155 |
+
|
156 |
+
return updates
|
157 |
+
|
158 |
+
# Set up event handlers
|
159 |
+
workflow_selector.change(
|
160 |
+
update_workflow_ui,
|
161 |
+
inputs=[workflow_selector],
|
162 |
+
outputs=prompt_displays + user_inputs + output_boxes + process_buttons + regenerate_buttons
|
163 |
+
)
|
164 |
+
|
165 |
+
# Set up process button click events
|
166 |
+
for i, button in enumerate(process_buttons):
|
167 |
+
button.click(
|
168 |
+
fn=lambda context, wf_name, api_endpoint, api_key, *inputs, step=i: process(context, wf_name,
|
169 |
+
api_endpoint, api_key, step,
|
170 |
+
*inputs),
|
171 |
+
inputs=[context_input, workflow_selector, api_selector, api_key_input] + user_inputs,
|
172 |
+
outputs=prompt_displays + user_inputs + output_boxes + process_buttons + regenerate_buttons
|
173 |
+
).then(lambda: gr.update(value=""), outputs=[user_inputs[i]])
|
174 |
+
|
175 |
+
# Set up regenerate button click events
|
176 |
+
for i, button in enumerate(regenerate_buttons):
|
177 |
+
button.click(
|
178 |
+
fn=lambda context, wf_name, api_endpoint, api_key, *inputs, step=i: process(context, wf_name,
|
179 |
+
api_endpoint, api_key, step,
|
180 |
+
*inputs),
|
181 |
+
inputs=[context_input, workflow_selector, api_selector, api_key_input] + user_inputs,
|
182 |
+
outputs=prompt_displays + user_inputs + output_boxes + process_buttons + regenerate_buttons
|
183 |
+
)
|
184 |
+
|
185 |
+
return workflow_selector, api_selector, api_key_input, context_input, dynamic_components
|
186 |
+
|
187 |
+
#
|
188 |
+
# End of script
|
189 |
+
############################################################################################################
|
App_Function_Libraries/Gradio_UI/Chat_ui.py
CHANGED
@@ -8,7 +8,6 @@ import logging
|
|
8 |
import os
|
9 |
import sqlite3
|
10 |
from datetime import datetime
|
11 |
-
from pathlib import Path
|
12 |
#
|
13 |
# External Imports
|
14 |
import gradio as gr
|
@@ -96,7 +95,7 @@ def chat_wrapper(message, history, media_content, selected_parts, api_endpoint,
|
|
96 |
# Generate bot response
|
97 |
bot_message = chat(full_message, history, media_content, selected_parts, api_endpoint, api_key, custom_prompt,
|
98 |
temperature, system_prompt)
|
99 |
-
|
100 |
if save_conversation:
|
101 |
# Add assistant message to the database
|
102 |
add_chat_message(conversation_id, "assistant", bot_message)
|
@@ -281,11 +280,10 @@ def create_chat_interface():
|
|
281 |
inputs=[preset_prompt_checkbox],
|
282 |
outputs=[preset_prompt]
|
283 |
)
|
284 |
-
|
285 |
submit.click(
|
286 |
chat_wrapper,
|
287 |
-
inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt,
|
288 |
-
|
289 |
outputs=[msg, chatbot, conversation_id]
|
290 |
).then( # Clear the message box after submission
|
291 |
lambda x: gr.update(value=""),
|
@@ -777,6 +775,7 @@ def create_chat_management_tab():
|
|
777 |
with gr.TabItem("Edit"):
|
778 |
chat_content = gr.TextArea(label="Chat Content (JSON)", lines=20, max_lines=50)
|
779 |
save_button = gr.Button("Save Changes")
|
|
|
780 |
|
781 |
with gr.TabItem("Preview"):
|
782 |
chat_preview = gr.HTML(label="Chat Preview")
|
@@ -898,6 +897,38 @@ def create_chat_management_tab():
|
|
898 |
logging.error(f"Unexpected error in save_conversation: {e}")
|
899 |
return f"Unexpected error: {str(e)}", "<p>Unexpected error occurred</p>"
|
900 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
901 |
def parse_formatted_content(formatted_content):
|
902 |
lines = formatted_content.split('\n')
|
903 |
conversation_id = int(lines[0].split(': ')[1])
|
@@ -938,145 +969,14 @@ def create_chat_management_tab():
|
|
938 |
outputs=[result_message, chat_preview]
|
939 |
)
|
940 |
|
941 |
-
|
942 |
-
|
943 |
-
|
944 |
-
|
945 |
-
json_path = Path('./Helper_Scripts/Workflows/Workflows.json')
|
946 |
-
with json_path.open('r') as f:
|
947 |
-
workflows = json.load(f)
|
948 |
-
|
949 |
-
|
950 |
-
# FIXME - broken Completely. Doesn't work.
|
951 |
-
def chat_workflows_tab():
|
952 |
-
with gr.TabItem("Chat Workflows"):
|
953 |
-
gr.Markdown("# Workflows using LLMs")
|
954 |
-
|
955 |
-
with gr.Row():
|
956 |
-
workflow_selector = gr.Dropdown(label="Select Workflow", choices=[wf['name'] for wf in workflows])
|
957 |
-
api_selector = gr.Dropdown(
|
958 |
-
label="Select API Endpoint",
|
959 |
-
choices=["OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
|
960 |
-
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
|
961 |
-
value="OpenAI"
|
962 |
-
)
|
963 |
-
api_key_input = gr.Textbox(label="API Key (optional)", type="password")
|
964 |
-
|
965 |
-
context_input = gr.Textbox(label="Initial Context (optional)", lines=5)
|
966 |
-
|
967 |
-
# Create a container for dynamic components
|
968 |
-
with gr.Column() as dynamic_components:
|
969 |
-
prompt_displays = []
|
970 |
-
user_inputs = []
|
971 |
-
output_boxes = []
|
972 |
-
process_buttons = []
|
973 |
-
regenerate_buttons = []
|
974 |
-
|
975 |
-
# Create the maximum number of components needed
|
976 |
-
max_steps = max(len(wf['prompts']) for wf in workflows)
|
977 |
-
for i in range(max_steps):
|
978 |
-
prompt_displays.append(gr.Markdown(visible=False))
|
979 |
-
user_inputs.append(gr.Textbox(label=f"Your Response", lines=2, visible=False))
|
980 |
-
output_boxes.append(gr.Textbox(label=f"AI Output", lines=5, visible=False))
|
981 |
-
with gr.Row():
|
982 |
-
process_buttons.append(gr.Button(f"Process Step {i + 1}", visible=False))
|
983 |
-
regenerate_buttons.append(gr.Button(f"🔄 Regenerate", visible=False))
|
984 |
-
|
985 |
-
def update_workflow_ui(workflow_name):
|
986 |
-
selected_workflow = next(wf for wf in workflows if wf['name'] == workflow_name)
|
987 |
-
num_prompts = len(selected_workflow['prompts'])
|
988 |
-
|
989 |
-
prompt_updates = []
|
990 |
-
input_updates = []
|
991 |
-
output_updates = []
|
992 |
-
button_updates = []
|
993 |
-
regenerate_updates = []
|
994 |
-
|
995 |
-
for i in range(max_steps):
|
996 |
-
if i < num_prompts:
|
997 |
-
prompt_updates.append(
|
998 |
-
gr.update(value=f"**Step {i + 1}:** {selected_workflow['prompts'][i]}", visible=True))
|
999 |
-
input_updates.append(gr.update(value="", visible=True, interactive=(i == 0)))
|
1000 |
-
output_updates.append(gr.update(value="", visible=True))
|
1001 |
-
button_updates.append(gr.update(visible=(i == 0)))
|
1002 |
-
regenerate_updates.append(gr.update(visible=False))
|
1003 |
-
else:
|
1004 |
-
prompt_updates.append(gr.update(visible=False))
|
1005 |
-
input_updates.append(gr.update(visible=False))
|
1006 |
-
output_updates.append(gr.update(visible=False))
|
1007 |
-
button_updates.append(gr.update(visible=False))
|
1008 |
-
regenerate_updates.append(gr.update(visible=False))
|
1009 |
-
|
1010 |
-
return prompt_updates + input_updates + output_updates + button_updates + regenerate_updates
|
1011 |
-
|
1012 |
-
def process(context, user_inputs, workflow_name, api_endpoint, api_key, step):
|
1013 |
-
selected_workflow = next(wf for wf in workflows if wf['name'] == workflow_name)
|
1014 |
-
|
1015 |
-
# Build up the context from previous steps
|
1016 |
-
full_context = context + "\n\n"
|
1017 |
-
for i in range(step + 1):
|
1018 |
-
full_context += f"Question: {selected_workflow['prompts'][i]}\n"
|
1019 |
-
full_context += f"Answer: {user_inputs[i]}\n"
|
1020 |
-
if i < step:
|
1021 |
-
full_context += f"AI Output: {output_boxes[i].value}\n\n"
|
1022 |
-
|
1023 |
-
result = process_with_llm(workflow_name, full_context, selected_workflow['prompts'][step], api_endpoint,
|
1024 |
-
api_key)
|
1025 |
-
|
1026 |
-
prompt_updates = [gr.update() for _ in range(max_steps)]
|
1027 |
-
input_updates = []
|
1028 |
-
output_updates = [gr.update() for _ in range(max_steps)]
|
1029 |
-
button_updates = []
|
1030 |
-
regenerate_updates = []
|
1031 |
-
|
1032 |
-
for i in range(len(selected_workflow['prompts'])):
|
1033 |
-
if i == step:
|
1034 |
-
regenerate_updates.append(gr.update(visible=True))
|
1035 |
-
elif i == step + 1:
|
1036 |
-
input_updates.append(gr.update(interactive=True))
|
1037 |
-
button_updates.append(gr.update(visible=True))
|
1038 |
-
regenerate_updates.append(gr.update(visible=False))
|
1039 |
-
elif i > step + 1:
|
1040 |
-
input_updates.append(gr.update(interactive=False))
|
1041 |
-
button_updates.append(gr.update(visible=False))
|
1042 |
-
regenerate_updates.append(gr.update(visible=False))
|
1043 |
-
else:
|
1044 |
-
input_updates.append(gr.update(interactive=False))
|
1045 |
-
button_updates.append(gr.update(visible=False))
|
1046 |
-
regenerate_updates.append(gr.update(visible=True))
|
1047 |
-
|
1048 |
-
return [result] + prompt_updates + input_updates + output_updates + button_updates + regenerate_updates
|
1049 |
-
|
1050 |
-
# Set up event handlers
|
1051 |
-
workflow_selector.change(
|
1052 |
-
update_workflow_ui,
|
1053 |
-
inputs=[workflow_selector],
|
1054 |
-
outputs=prompt_displays + user_inputs + output_boxes + process_buttons + regenerate_buttons
|
1055 |
)
|
1056 |
|
1057 |
-
|
1058 |
-
for i, button in enumerate(process_buttons):
|
1059 |
-
button.click(
|
1060 |
-
fn=lambda context, *user_inputs, wf_name, api_endpoint, api_key, step=i: process(context, user_inputs,
|
1061 |
-
wf_name, api_endpoint,
|
1062 |
-
api_key, step),
|
1063 |
-
inputs=[context_input] + user_inputs + [workflow_selector, api_selector, api_key_input],
|
1064 |
-
outputs=[output_boxes[
|
1065 |
-
i]] + prompt_displays + user_inputs + output_boxes + process_buttons + regenerate_buttons
|
1066 |
-
)
|
1067 |
-
|
1068 |
-
# Set up regenerate button click events
|
1069 |
-
for i, button in enumerate(regenerate_buttons):
|
1070 |
-
button.click(
|
1071 |
-
fn=lambda context, *user_inputs, wf_name, api_endpoint, api_key, step=i: process(context, user_inputs,
|
1072 |
-
wf_name, api_endpoint,
|
1073 |
-
api_key, step),
|
1074 |
-
inputs=[context_input] + user_inputs + [workflow_selector, api_selector, api_key_input],
|
1075 |
-
outputs=[output_boxes[
|
1076 |
-
i]] + prompt_displays + user_inputs + output_boxes + process_buttons + regenerate_buttons
|
1077 |
-
)
|
1078 |
|
1079 |
-
return workflow_selector, api_selector, api_key_input, context_input, dynamic_components
|
1080 |
|
1081 |
|
1082 |
# Mock function to simulate LLM processing
|
|
|
8 |
import os
|
9 |
import sqlite3
|
10 |
from datetime import datetime
|
|
|
11 |
#
|
12 |
# External Imports
|
13 |
import gradio as gr
|
|
|
95 |
# Generate bot response
|
96 |
bot_message = chat(full_message, history, media_content, selected_parts, api_endpoint, api_key, custom_prompt,
|
97 |
temperature, system_prompt)
|
98 |
+
logging.debug(f"Bot message being returned: {bot_message}")
|
99 |
if save_conversation:
|
100 |
# Add assistant message to the database
|
101 |
add_chat_message(conversation_id, "assistant", bot_message)
|
|
|
280 |
inputs=[preset_prompt_checkbox],
|
281 |
outputs=[preset_prompt]
|
282 |
)
|
|
|
283 |
submit.click(
|
284 |
chat_wrapper,
|
285 |
+
inputs=[msg, chatbot, media_content, selected_parts, api_endpoint, api_key, user_prompt, conversation_id,
|
286 |
+
save_conversation, temperature, system_prompt_input],
|
287 |
outputs=[msg, chatbot, conversation_id]
|
288 |
).then( # Clear the message box after submission
|
289 |
lambda x: gr.update(value=""),
|
|
|
775 |
with gr.TabItem("Edit"):
|
776 |
chat_content = gr.TextArea(label="Chat Content (JSON)", lines=20, max_lines=50)
|
777 |
save_button = gr.Button("Save Changes")
|
778 |
+
delete_button = gr.Button("Delete Conversation", variant="stop")
|
779 |
|
780 |
with gr.TabItem("Preview"):
|
781 |
chat_preview = gr.HTML(label="Chat Preview")
|
|
|
897 |
logging.error(f"Unexpected error in save_conversation: {e}")
|
898 |
return f"Unexpected error: {str(e)}", "<p>Unexpected error occurred</p>"
|
899 |
|
900 |
+
def delete_conversation(selected, conversation_mapping):
|
901 |
+
if not selected or selected not in conversation_mapping:
|
902 |
+
return "Please select a conversation before deleting.", "<p>No changes made</p>", gr.update(choices=[])
|
903 |
+
|
904 |
+
conversation_id = conversation_mapping[selected]
|
905 |
+
|
906 |
+
try:
|
907 |
+
with db.get_connection() as conn:
|
908 |
+
cursor = conn.cursor()
|
909 |
+
|
910 |
+
# Delete messages associated with the conversation
|
911 |
+
cursor.execute("DELETE FROM ChatMessages WHERE conversation_id = ?", (conversation_id,))
|
912 |
+
|
913 |
+
# Delete the conversation itself
|
914 |
+
cursor.execute("DELETE FROM ChatConversations WHERE id = ?", (conversation_id,))
|
915 |
+
|
916 |
+
conn.commit()
|
917 |
+
|
918 |
+
# Update the conversation list
|
919 |
+
remaining_conversations = [choice for choice in conversation_mapping.keys() if choice != selected]
|
920 |
+
updated_mapping = {choice: conversation_mapping[choice] for choice in remaining_conversations}
|
921 |
+
|
922 |
+
return "Conversation deleted successfully.", "<p>Conversation deleted</p>", gr.update(choices=remaining_conversations)
|
923 |
+
except sqlite3.Error as e:
|
924 |
+
conn.rollback()
|
925 |
+
logging.error(f"Database error in delete_conversation: {e}")
|
926 |
+
return f"Error deleting conversation: {str(e)}", "<p>Error occurred while deleting</p>", gr.update()
|
927 |
+
except Exception as e:
|
928 |
+
conn.rollback()
|
929 |
+
logging.error(f"Unexpected error in delete_conversation: {e}")
|
930 |
+
return f"Unexpected error: {str(e)}", "<p>Unexpected error occurred</p>", gr.update()
|
931 |
+
|
932 |
def parse_formatted_content(formatted_content):
|
933 |
lines = formatted_content.split('\n')
|
934 |
conversation_id = int(lines[0].split(': ')[1])
|
|
|
969 |
outputs=[result_message, chat_preview]
|
970 |
)
|
971 |
|
972 |
+
delete_button.click(
|
973 |
+
delete_conversation,
|
974 |
+
inputs=[conversation_list, conversation_mapping],
|
975 |
+
outputs=[result_message, chat_preview, conversation_list]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
976 |
)
|
977 |
|
978 |
+
return search_query, search_button, conversation_list, conversation_mapping, chat_content, save_button, delete_button, result_message, chat_preview
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
979 |
|
|
|
980 |
|
981 |
|
982 |
# Mock function to simulate LLM processing
|
App_Function_Libraries/Gradio_UI/Evaluations_Benchmarks_tab.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
###################################################################################################
|
2 |
+
# Evaluations_Benchmarks_tab.py - Gradio code for G-Eval testing
|
3 |
+
# We will use the G-Eval API to evaluate the quality of the generated summaries.
|
4 |
+
|
5 |
+
import gradio as gr
|
6 |
+
from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
|
7 |
+
|
8 |
+
def create_geval_tab():
|
9 |
+
with gr.Tab("G-Eval"):
|
10 |
+
gr.Markdown("# G-Eval Summarization Evaluation")
|
11 |
+
with gr.Row():
|
12 |
+
with gr.Column():
|
13 |
+
document_input = gr.Textbox(label="Source Document", lines=10)
|
14 |
+
summary_input = gr.Textbox(label="Summary", lines=5)
|
15 |
+
api_name_input = gr.Dropdown(
|
16 |
+
choices=["OpenAI", "Anthropic", "Cohere", "Groq", "OpenRouter", "DeepSeek", "HuggingFace", "Mistral", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "Local-LLM", "Ollama"],
|
17 |
+
label="Select API"
|
18 |
+
)
|
19 |
+
api_key_input = gr.Textbox(label="API Key (if required)", type="password")
|
20 |
+
evaluate_button = gr.Button("Evaluate Summary")
|
21 |
+
with gr.Column():
|
22 |
+
output = gr.Textbox(label="Evaluation Results", lines=10)
|
23 |
+
|
24 |
+
evaluate_button.click(
|
25 |
+
fn=run_geval,
|
26 |
+
inputs=[document_input, summary_input, api_name_input, api_key_input],
|
27 |
+
outputs=output
|
28 |
+
)
|
29 |
+
|
30 |
+
return document_input, summary_input, api_name_input, api_key_input, evaluate_button, output
|
31 |
+
|
32 |
+
|
33 |
+
def create_infinite_bench_tab():
|
34 |
+
with gr.Tab("Infinite Bench"):
|
35 |
+
gr.Markdown("# Infinite Bench Evaluation (Coming Soon)")
|
36 |
+
with gr.Row():
|
37 |
+
with gr.Column():
|
38 |
+
api_name_input = gr.Dropdown(
|
39 |
+
choices=["OpenAI", "Anthropic", "Cohere", "Groq", "OpenRouter", "DeepSeek", "HuggingFace", "Mistral", "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "Local-LLM", "Ollama"],
|
40 |
+
label="Select API"
|
41 |
+
)
|
42 |
+
api_key_input = gr.Textbox(label="API Key (if required)", type="password")
|
43 |
+
evaluate_button = gr.Button("Evaluate Summary")
|
44 |
+
with gr.Column():
|
45 |
+
output = gr.Textbox(label="Evaluation Results", lines=10)
|
46 |
+
|
47 |
+
# evaluate_button.click(
|
48 |
+
# fn=run_geval,
|
49 |
+
# inputs=[api_name_input, api_key_input],
|
50 |
+
# outputs=output
|
51 |
+
# )
|
52 |
+
|
53 |
+
return api_name_input, api_key_input, evaluate_button, output
|
54 |
+
|
55 |
+
|
56 |
+
# If you want to run this as a standalone Gradio app
|
57 |
+
if __name__ == "__main__":
|
58 |
+
with gr.Blocks() as demo:
|
59 |
+
create_geval_tab()
|
60 |
+
demo.launch()
|
App_Function_Libraries/Gradio_UI/Explain_summarize_tab.py
CHANGED
@@ -75,10 +75,11 @@ def create_summarize_explain_tab():
|
|
75 |
api_endpoint = gr.Dropdown(
|
76 |
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
|
77 |
"OpenRouter",
|
78 |
-
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
|
79 |
value=None,
|
80 |
-
label="API for
|
81 |
)
|
|
|
82 |
api_key_input = gr.Textbox(label="API Key (if required)", placeholder="Enter your API key here",
|
83 |
type="password")
|
84 |
with gr.Row():
|
|
|
75 |
api_endpoint = gr.Dropdown(
|
76 |
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
|
77 |
"OpenRouter",
|
78 |
+
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace", "Custom-OpenAI-API"],
|
79 |
value=None,
|
80 |
+
label="API to be used for request (Mandatory)"
|
81 |
)
|
82 |
+
with gr.Row():
|
83 |
api_key_input = gr.Textbox(label="API Key (if required)", placeholder="Enter your API key here",
|
84 |
type="password")
|
85 |
with gr.Row():
|
App_Function_Libraries/Gradio_UI/Export_Functionality.py
CHANGED
@@ -8,7 +8,7 @@ import shutil
|
|
8 |
import tempfile
|
9 |
from typing import List, Dict, Optional, Tuple
|
10 |
import gradio as gr
|
11 |
-
from App_Function_Libraries.DB.DB_Manager import DatabaseError
|
12 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, browse_items
|
13 |
|
14 |
logger = logging.getLogger(__name__)
|
@@ -264,51 +264,3 @@ def create_export_tab():
|
|
264 |
)
|
265 |
|
266 |
|
267 |
-
|
268 |
-
def create_backup():
|
269 |
-
backup_file = create_automated_backup(db_path, backup_dir)
|
270 |
-
return f"Backup created: {backup_file}"
|
271 |
-
|
272 |
-
def list_backups():
|
273 |
-
backups = [f for f in os.listdir(backup_dir) if f.endswith('.db')]
|
274 |
-
return "\n".join(backups)
|
275 |
-
|
276 |
-
def restore_backup(backup_name):
|
277 |
-
backup_path = os.path.join(backup_dir, backup_name)
|
278 |
-
if os.path.exists(backup_path):
|
279 |
-
shutil.copy2(backup_path, db_path)
|
280 |
-
return f"Database restored from {backup_name}"
|
281 |
-
else:
|
282 |
-
return "Backup file not found"
|
283 |
-
|
284 |
-
|
285 |
-
def create_backup_tab():
|
286 |
-
with gr.Tab("Create Backup"):
|
287 |
-
gr.Markdown("# Create a backup of the database")
|
288 |
-
with gr.Row():
|
289 |
-
with gr.Column():
|
290 |
-
create_button = gr.Button("Create Backup")
|
291 |
-
create_output = gr.Textbox(label="Result")
|
292 |
-
with gr.Column():
|
293 |
-
create_button.click(create_backup, inputs=[], outputs=create_output)
|
294 |
-
|
295 |
-
def create_view_backups_tab():
|
296 |
-
with gr.TabItem("View Backups"):
|
297 |
-
gr.Markdown("# Browse available backups")
|
298 |
-
with gr.Row():
|
299 |
-
with gr.Column():
|
300 |
-
view_button = gr.Button("View Backups")
|
301 |
-
with gr.Column():
|
302 |
-
backup_list = gr.Textbox(label="Available Backups")
|
303 |
-
view_button.click(list_backups, inputs=[], outputs=backup_list)
|
304 |
-
|
305 |
-
|
306 |
-
def create_restore_backup_tab():
|
307 |
-
with gr.TabItem("Restore Backup"):
|
308 |
-
gr.Markdown("# Restore a backup of the database")
|
309 |
-
with gr.Column():
|
310 |
-
backup_input = gr.Textbox(label="Backup Filename")
|
311 |
-
restore_button = gr.Button("Restore")
|
312 |
-
with gr.Column():
|
313 |
-
restore_output = gr.Textbox(label="Result")
|
314 |
-
restore_button.click(restore_backup, inputs=[backup_input], outputs=restore_output)
|
|
|
8 |
import tempfile
|
9 |
from typing import List, Dict, Optional, Tuple
|
10 |
import gradio as gr
|
11 |
+
from App_Function_Libraries.DB.DB_Manager import DatabaseError
|
12 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import fetch_item_details, fetch_items_by_keyword, browse_items
|
13 |
|
14 |
logger = logging.getLogger(__name__)
|
|
|
264 |
)
|
265 |
|
266 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
App_Function_Libraries/Gradio_UI/Import_Functionality.py
CHANGED
@@ -231,7 +231,7 @@ def create_import_item_tab():
|
|
231 |
auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
|
232 |
api_name_input = gr.Dropdown(
|
233 |
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
|
234 |
-
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace"],
|
235 |
label="API for Auto-summarization"
|
236 |
)
|
237 |
api_key_input = gr.Textbox(label="API Key", type="password")
|
|
|
231 |
auto_summarize_checkbox = gr.Checkbox(label="Auto-summarize", value=False)
|
232 |
api_name_input = gr.Dropdown(
|
233 |
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter",
|
234 |
+
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace", "Custom-OpenAI-API"],
|
235 |
label="API for Auto-summarization"
|
236 |
)
|
237 |
api_key_input = gr.Textbox(label="API Key", type="password")
|
App_Function_Libraries/Gradio_UI/Media_edit.py
CHANGED
@@ -10,7 +10,7 @@ import gradio as gr
|
|
10 |
#
|
11 |
# Local Imports
|
12 |
from App_Function_Libraries.DB.DB_Manager import add_prompt, update_media_content, db, add_or_update_prompt, \
|
13 |
-
load_prompt_details
|
14 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_prompt_dropdown
|
15 |
from App_Function_Libraries.DB.SQLite_DB import fetch_item_details
|
16 |
|
@@ -32,32 +32,58 @@ def create_media_edit_tab():
|
|
32 |
prompt_input = gr.Textbox(label="Edit Prompt", lines=3)
|
33 |
summary_input = gr.Textbox(label="Edit Summary", lines=5)
|
34 |
|
|
|
|
|
|
|
35 |
update_button = gr.Button("Update Media Content")
|
36 |
status_message = gr.Textbox(label="Status", interactive=False)
|
37 |
|
|
|
38 |
search_button.click(
|
39 |
fn=update_dropdown,
|
40 |
inputs=[search_query_input, search_type_input],
|
41 |
outputs=[items_output, item_mapping]
|
42 |
)
|
43 |
|
|
|
44 |
def load_selected_media_content(selected_item, item_mapping):
|
45 |
if selected_item and item_mapping and selected_item in item_mapping:
|
46 |
media_id = item_mapping[selected_item]
|
47 |
-
# FIXME - fetch_item_details is not handled by DB_Manager!
|
48 |
content, prompt, summary = fetch_item_details(media_id)
|
49 |
-
return content, prompt, summary
|
50 |
-
return "No item selected or invalid selection", "", ""
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
items_output.change(
|
53 |
fn=load_selected_media_content,
|
54 |
inputs=[items_output, item_mapping],
|
55 |
-
outputs=[content_input, prompt_input, summary_input]
|
56 |
)
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
update_button.click(
|
59 |
-
fn=
|
60 |
-
inputs=[items_output, item_mapping, content_input, prompt_input, summary_input],
|
61 |
outputs=status_message
|
62 |
)
|
63 |
|
|
|
10 |
#
|
11 |
# Local Imports
|
12 |
from App_Function_Libraries.DB.DB_Manager import add_prompt, update_media_content, db, add_or_update_prompt, \
|
13 |
+
load_prompt_details, fetch_keywords_for_media, update_keywords_for_media
|
14 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_prompt_dropdown
|
15 |
from App_Function_Libraries.DB.SQLite_DB import fetch_item_details
|
16 |
|
|
|
32 |
prompt_input = gr.Textbox(label="Edit Prompt", lines=3)
|
33 |
summary_input = gr.Textbox(label="Edit Summary", lines=5)
|
34 |
|
35 |
+
# Adding keyword input box for editing
|
36 |
+
keywords_input = gr.Textbox(label="Edit Keywords (comma-separated)", placeholder="Enter keywords here...")
|
37 |
+
|
38 |
update_button = gr.Button("Update Media Content")
|
39 |
status_message = gr.Textbox(label="Status", interactive=False)
|
40 |
|
41 |
+
# Function to update the dropdown with search results
|
42 |
search_button.click(
|
43 |
fn=update_dropdown,
|
44 |
inputs=[search_query_input, search_type_input],
|
45 |
outputs=[items_output, item_mapping]
|
46 |
)
|
47 |
|
48 |
+
# Function to load selected media content including keywords
|
49 |
def load_selected_media_content(selected_item, item_mapping):
|
50 |
if selected_item and item_mapping and selected_item in item_mapping:
|
51 |
media_id = item_mapping[selected_item]
|
|
|
52 |
content, prompt, summary = fetch_item_details(media_id)
|
|
|
|
|
53 |
|
54 |
+
# Fetch keywords for the selected item
|
55 |
+
keywords = fetch_keywords_for_media(media_id)
|
56 |
+
keywords_str = ", ".join(keywords) if keywords else ""
|
57 |
+
|
58 |
+
return content, prompt, summary, keywords_str
|
59 |
+
return "No item selected or invalid selection", "", "", ""
|
60 |
+
|
61 |
+
# Load the selected media content and associated keywords
|
62 |
items_output.change(
|
63 |
fn=load_selected_media_content,
|
64 |
inputs=[items_output, item_mapping],
|
65 |
+
outputs=[content_input, prompt_input, summary_input, keywords_input]
|
66 |
)
|
67 |
|
68 |
+
# Function to update media content, prompt, summary, and keywords
|
69 |
+
def update_media_with_keywords(selected_item, item_mapping, content, prompt, summary, keywords):
|
70 |
+
if selected_item and item_mapping and selected_item in item_mapping:
|
71 |
+
media_id = item_mapping[selected_item]
|
72 |
+
|
73 |
+
# Split keywords into a list
|
74 |
+
keyword_list = [kw.strip() for kw in keywords.split(",") if kw.strip()]
|
75 |
+
|
76 |
+
# Update content, prompt, summary, and keywords in the database
|
77 |
+
status = update_media_content(media_id, content, prompt, summary)
|
78 |
+
keyword_status = update_keywords_for_media(media_id, keyword_list)
|
79 |
+
|
80 |
+
return f"{status}\nKeywords: {keyword_status}"
|
81 |
+
return "No item selected or invalid selection"
|
82 |
+
|
83 |
+
# Update button click event
|
84 |
update_button.click(
|
85 |
+
fn=update_media_with_keywords,
|
86 |
+
inputs=[items_output, item_mapping, content_input, prompt_input, summary_input, keywords_input],
|
87 |
outputs=status_message
|
88 |
)
|
89 |
|
App_Function_Libraries/Gradio_UI/Media_wiki_tab.py
CHANGED
@@ -7,9 +7,11 @@ from threading import Thread
|
|
7 |
#
|
8 |
# 3rd-party Imports
|
9 |
import gradio as gr
|
|
|
|
|
10 |
#
|
11 |
# Local Imports
|
12 |
-
from App_Function_Libraries.MediaWiki.Media_Wiki import import_mediawiki_dump
|
13 |
#
|
14 |
#######################################################################################################################
|
15 |
#
|
@@ -32,6 +34,13 @@ def create_mediawiki_import_tab():
|
|
32 |
)
|
33 |
chunk_size = gr.Slider(minimum=100, maximum=2000, value=1000, step=100, label="Chunk Size")
|
34 |
chunk_overlap = gr.Slider(minimum=0, maximum=500, value=100, step=10, label="Chunk Overlap")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
import_button = gr.Button("Import MediaWiki Dump")
|
36 |
cancel_button = gr.Button("Cancel Import", visible=False)
|
37 |
with gr.Column():
|
@@ -61,7 +70,7 @@ def create_mediawiki_import_tab():
|
|
61 |
cancel_flag = False
|
62 |
|
63 |
def run_import(file_path, wiki_name, namespaces, skip_redirects, single_item, chunk_method, chunk_size,
|
64 |
-
chunk_overlap, progress=gr.Progress()):
|
65 |
validation_error = validate_inputs(file_path, wiki_name, namespaces)
|
66 |
if validation_error:
|
67 |
return gr.update(), gr.update(), validation_error
|
@@ -90,7 +99,9 @@ def create_mediawiki_import_tab():
|
|
90 |
skip_redirects=skip_redirects,
|
91 |
chunk_options=chunk_options,
|
92 |
single_item=single_item,
|
93 |
-
progress_callback=progress
|
|
|
|
|
94 |
):
|
95 |
if progress_info.startswith("Found"):
|
96 |
status_text += f"\n## Parsing\n- {progress_info}\n"
|
@@ -126,7 +137,7 @@ def create_mediawiki_import_tab():
|
|
126 |
import_button.click(
|
127 |
run_import,
|
128 |
inputs=[file_path, wiki_name, namespaces, skip_redirects, single_item, chunk_method, chunk_size,
|
129 |
-
chunk_overlap],
|
130 |
outputs=[cancel_button, import_button, output]
|
131 |
)
|
132 |
|
@@ -137,6 +148,193 @@ def create_mediawiki_import_tab():
|
|
137 |
|
138 |
return file_path, wiki_name, namespaces, skip_redirects, single_item, chunk_method, chunk_size, chunk_overlap, import_button, output
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
#
|
141 |
# End of MediaWiki Import Tab
|
142 |
#######################################################################################################################
|
|
|
7 |
#
|
8 |
# 3rd-party Imports
|
9 |
import gradio as gr
|
10 |
+
import yaml
|
11 |
+
from ruamel.yaml import YAML
|
12 |
#
|
13 |
# Local Imports
|
14 |
+
from App_Function_Libraries.MediaWiki.Media_Wiki import import_mediawiki_dump, config
|
15 |
#
|
16 |
#######################################################################################################################
|
17 |
#
|
|
|
34 |
)
|
35 |
chunk_size = gr.Slider(minimum=100, maximum=2000, value=1000, step=100, label="Chunk Size")
|
36 |
chunk_overlap = gr.Slider(minimum=0, maximum=500, value=100, step=10, label="Chunk Overlap")
|
37 |
+
# FIXME - Add checkbox for 'Enable Summarization upon ingestion' for API summarization of chunks
|
38 |
+
# api_endpoint = gr.Dropdown(label="Select API Endpoint",
|
39 |
+
# choices=["Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek",
|
40 |
+
# "Mistral", "OpenRouter",
|
41 |
+
# "Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama",
|
42 |
+
# "HuggingFace"])
|
43 |
+
# api_key = gr.Textbox(label="API Key (if required)", type="password")
|
44 |
import_button = gr.Button("Import MediaWiki Dump")
|
45 |
cancel_button = gr.Button("Cancel Import", visible=False)
|
46 |
with gr.Column():
|
|
|
70 |
cancel_flag = False
|
71 |
|
72 |
def run_import(file_path, wiki_name, namespaces, skip_redirects, single_item, chunk_method, chunk_size,
|
73 |
+
chunk_overlap, progress=gr.Progress()):#, api_endpoint=None, api_key=None):
|
74 |
validation_error = validate_inputs(file_path, wiki_name, namespaces)
|
75 |
if validation_error:
|
76 |
return gr.update(), gr.update(), validation_error
|
|
|
99 |
skip_redirects=skip_redirects,
|
100 |
chunk_options=chunk_options,
|
101 |
single_item=single_item,
|
102 |
+
progress_callback=progress,
|
103 |
+
# api_name=api_endpoint,
|
104 |
+
# api_key=api_key
|
105 |
):
|
106 |
if progress_info.startswith("Found"):
|
107 |
status_text += f"\n## Parsing\n- {progress_info}\n"
|
|
|
137 |
import_button.click(
|
138 |
run_import,
|
139 |
inputs=[file_path, wiki_name, namespaces, skip_redirects, single_item, chunk_method, chunk_size,
|
140 |
+
chunk_overlap],#, api_endpoint, api_key],
|
141 |
outputs=[cancel_button, import_button, output]
|
142 |
)
|
143 |
|
|
|
148 |
|
149 |
return file_path, wiki_name, namespaces, skip_redirects, single_item, chunk_method, chunk_size, chunk_overlap, import_button, output
|
150 |
|
151 |
+
|
152 |
+
class PreservedTokenSafeDumper(yaml.SafeDumper):
|
153 |
+
def represent_scalar(self, tag, value, style=None):
|
154 |
+
if style is None and isinstance(value, str) and '\n' in value:
|
155 |
+
style = '|'
|
156 |
+
return super().represent_scalar(tag, value, style)
|
157 |
+
|
158 |
+
|
159 |
+
def update_yaml_file(file_path, updates):
|
160 |
+
with open(file_path, 'r') as file:
|
161 |
+
lines = file.readlines()
|
162 |
+
|
163 |
+
def format_value(value):
|
164 |
+
if isinstance(value, bool):
|
165 |
+
return str(value).lower()
|
166 |
+
elif isinstance(value, (int, float)):
|
167 |
+
return str(value)
|
168 |
+
elif isinstance(value, list):
|
169 |
+
return '[' + ', '.join(map(str, value)) + ']'
|
170 |
+
else:
|
171 |
+
return f"'{value}'"
|
172 |
+
|
173 |
+
def update_line(line, updates, prefix=''):
|
174 |
+
for key, value in updates.items():
|
175 |
+
full_key = f"{prefix}{key}:" if prefix else f"{key}:"
|
176 |
+
if line.strip().startswith(full_key):
|
177 |
+
indentation = line[:line.index(full_key)]
|
178 |
+
if isinstance(value, dict):
|
179 |
+
return line # Keep the line as is for nested structures
|
180 |
+
else:
|
181 |
+
return f"{indentation}{full_key} {format_value(value)}\n"
|
182 |
+
return line
|
183 |
+
|
184 |
+
updated_lines = []
|
185 |
+
current_prefix = ''
|
186 |
+
for line in lines:
|
187 |
+
stripped = line.strip()
|
188 |
+
if stripped and not stripped.startswith('#'):
|
189 |
+
indent = len(line) - len(line.lstrip())
|
190 |
+
if indent == 0:
|
191 |
+
current_prefix = ''
|
192 |
+
elif ':' in stripped and not stripped.endswith(':'):
|
193 |
+
current_prefix = '.'.join(current_prefix.split('.')[:-1]) + '.' if current_prefix else ''
|
194 |
+
|
195 |
+
updated_line = update_line(line, updates, current_prefix)
|
196 |
+
|
197 |
+
if updated_line == line and ':' in stripped and stripped.endswith(':'):
|
198 |
+
key = stripped[:-1].strip()
|
199 |
+
if current_prefix:
|
200 |
+
current_prefix += f"{key}."
|
201 |
+
else:
|
202 |
+
current_prefix = f"{key}."
|
203 |
+
|
204 |
+
updated_lines.append(updated_line)
|
205 |
+
else:
|
206 |
+
updated_lines.append(line)
|
207 |
+
|
208 |
+
with open(file_path, 'w') as file:
|
209 |
+
file.writelines(updated_lines)
|
210 |
+
|
211 |
+
#
|
212 |
+
#
|
213 |
+
#######################################################################################################################
|
214 |
+
#
|
215 |
+
# Config tab
|
216 |
+
|
217 |
+
yaml = YAML()
|
218 |
+
yaml.preserve_quotes = True
|
219 |
+
yaml.indent(mapping=2, sequence=4, offset=2)
|
220 |
+
|
221 |
+
def load_config():
|
222 |
+
config_path = os.path.join('Config_Files', 'mediawiki_import_config.yaml')
|
223 |
+
with open(config_path, 'r') as file:
|
224 |
+
return yaml.load(file)
|
225 |
+
|
226 |
+
def save_config(updated_config):
|
227 |
+
config_path = os.path.join('Config_Files', 'mediawiki_import_config.yaml')
|
228 |
+
config = load_config()
|
229 |
+
|
230 |
+
|
231 |
+
def create_mediawiki_config_tab():
|
232 |
+
with gr.TabItem("MediaWiki Import Configuration"):
|
233 |
+
gr.Markdown("# MediaWiki Import Configuration (Broken currently/doesn't work)")
|
234 |
+
with gr.Row():
|
235 |
+
with gr.Column():
|
236 |
+
namespaces = gr.Textbox(label="Default Namespaces (comma-separated integers)",
|
237 |
+
value=','.join(map(str, config['import']['default_namespaces'])))
|
238 |
+
skip_redirects = gr.Checkbox(label="Skip Redirects by Default",
|
239 |
+
value=config['import']['default_skip_redirects'])
|
240 |
+
single_item = gr.Checkbox(label="Import as Single Item by Default",
|
241 |
+
value=config['import']['single_item_default'])
|
242 |
+
batch_size = gr.Number(value=config['import']['batch_size'], label="Batch Size")
|
243 |
+
|
244 |
+
chunk_method = gr.Dropdown(
|
245 |
+
choices=config['chunking']['methods'],
|
246 |
+
value=config['chunking']['default_method'],
|
247 |
+
label="Default Chunking Method"
|
248 |
+
)
|
249 |
+
chunk_size = gr.Slider(minimum=100, maximum=2000, value=config['chunking']['default_size'], step=100,
|
250 |
+
label="Default Chunk Size")
|
251 |
+
chunk_overlap = gr.Slider(minimum=0, maximum=500, value=config['chunking']['default_overlap'], step=10,
|
252 |
+
label="Default Chunk Overlap")
|
253 |
+
|
254 |
+
with gr.Column():
|
255 |
+
max_workers = gr.Slider(minimum=1, maximum=16, value=config['processing']['max_workers'], step=1,
|
256 |
+
label="Max Worker Threads")
|
257 |
+
|
258 |
+
embedding_provider = gr.Dropdown(
|
259 |
+
choices=['openai', 'local', 'huggingface'],
|
260 |
+
value=config['embeddings']['provider'],
|
261 |
+
label="Embedding Provider"
|
262 |
+
)
|
263 |
+
embedding_model = gr.Textbox(label="Embedding Model", value=config['embeddings']['model'])
|
264 |
+
api_key = gr.Textbox(label="API Key (if required)", type="password",
|
265 |
+
value=config['embeddings'].get('api_key', ''))
|
266 |
+
local_embedding_url = gr.Textbox(label="Local Embedding URL",
|
267 |
+
value=config['embeddings'].get('local_url', ''))
|
268 |
+
|
269 |
+
checkpoints_enabled = gr.Checkbox(label="Enable Checkpoints", value=config['checkpoints']['enabled'])
|
270 |
+
checkpoint_directory = gr.Textbox(label="Checkpoint Directory", value=config['checkpoints']['directory'])
|
271 |
+
|
272 |
+
max_retries = gr.Number(value=config['error_handling']['max_retries'], label="Max Retries")
|
273 |
+
retry_delay = gr.Number(value=config['error_handling']['retry_delay'], label="Retry Delay (seconds)")
|
274 |
+
|
275 |
+
save_config_button = gr.Button("Save Configuration")
|
276 |
+
config_output = gr.Markdown(label="Configuration Status")
|
277 |
+
|
278 |
+
def update_config_from_ui(namespaces, skip_redirects, single_item, batch_size, chunk_method, chunk_size,
|
279 |
+
chunk_overlap, max_workers, embedding_provider, embedding_model, api_key,
|
280 |
+
local_embedding_url, checkpoints_enabled, checkpoint_directory, max_retries,
|
281 |
+
retry_delay):
|
282 |
+
current_config = load_config()
|
283 |
+
updated_config = {}
|
284 |
+
|
285 |
+
if namespaces != ','.join(map(str, current_config['import']['default_namespaces'])):
|
286 |
+
updated_config.setdefault('import', {})['default_namespaces'] = [int(ns.strip()) for ns in
|
287 |
+
namespaces.split(',') if ns.strip()]
|
288 |
+
if skip_redirects != current_config['import']['default_skip_redirects']:
|
289 |
+
updated_config.setdefault('import', {})['default_skip_redirects'] = skip_redirects
|
290 |
+
if single_item != current_config['import']['single_item_default']:
|
291 |
+
updated_config.setdefault('import', {})['single_item_default'] = single_item
|
292 |
+
if int(batch_size) != current_config['import']['batch_size']:
|
293 |
+
updated_config.setdefault('import', {})['batch_size'] = int(batch_size)
|
294 |
+
if chunk_method != current_config['chunking']['default_method']:
|
295 |
+
updated_config.setdefault('chunking', {})['default_method'] = chunk_method
|
296 |
+
if int(chunk_size) != current_config['chunking']['default_size']:
|
297 |
+
updated_config.setdefault('chunking', {})['default_size'] = int(chunk_size)
|
298 |
+
if int(chunk_overlap) != current_config['chunking']['default_overlap']:
|
299 |
+
updated_config.setdefault('chunking', {})['default_overlap'] = int(chunk_overlap)
|
300 |
+
if int(max_workers) != current_config['processing']['max_workers']:
|
301 |
+
updated_config.setdefault('processing', {})['max_workers'] = int(max_workers)
|
302 |
+
if embedding_provider != current_config['embeddings']['provider']:
|
303 |
+
updated_config.setdefault('embeddings', {})['provider'] = embedding_provider
|
304 |
+
if embedding_model != current_config['embeddings']['model']:
|
305 |
+
updated_config.setdefault('embeddings', {})['model'] = embedding_model
|
306 |
+
if api_key != current_config['embeddings'].get('api_key', ''):
|
307 |
+
updated_config.setdefault('embeddings', {})['api_key'] = api_key
|
308 |
+
if local_embedding_url != current_config['embeddings'].get('local_url', ''):
|
309 |
+
updated_config.setdefault('embeddings', {})['local_url'] = local_embedding_url
|
310 |
+
if checkpoints_enabled != current_config['checkpoints']['enabled']:
|
311 |
+
updated_config.setdefault('checkpoints', {})['enabled'] = checkpoints_enabled
|
312 |
+
if checkpoint_directory != current_config['checkpoints']['directory']:
|
313 |
+
updated_config.setdefault('checkpoints', {})['directory'] = checkpoint_directory
|
314 |
+
if int(max_retries) != current_config['error_handling']['max_retries']:
|
315 |
+
updated_config.setdefault('error_handling', {})['max_retries'] = int(max_retries)
|
316 |
+
if int(retry_delay) != current_config['error_handling']['retry_delay']:
|
317 |
+
updated_config.setdefault('error_handling', {})['retry_delay'] = int(retry_delay)
|
318 |
+
|
319 |
+
return updated_config
|
320 |
+
|
321 |
+
def save_config_callback(*args):
|
322 |
+
updated_config = update_config_from_ui(*args)
|
323 |
+
save_config(updated_config)
|
324 |
+
return "Configuration saved successfully."
|
325 |
+
|
326 |
+
save_config_button.click(
|
327 |
+
save_config_callback,
|
328 |
+
inputs=[namespaces, skip_redirects, single_item, batch_size, chunk_method, chunk_size,
|
329 |
+
chunk_overlap, max_workers, embedding_provider, embedding_model, api_key,
|
330 |
+
local_embedding_url, checkpoints_enabled, checkpoint_directory, max_retries, retry_delay],
|
331 |
+
outputs=config_output
|
332 |
+
)
|
333 |
+
|
334 |
+
return namespaces, skip_redirects, single_item, batch_size, chunk_method, chunk_size, chunk_overlap, max_workers, \
|
335 |
+
embedding_provider, embedding_model, api_key, local_embedding_url, checkpoints_enabled, checkpoint_directory, \
|
336 |
+
max_retries, retry_delay, save_config_button, config_output
|
337 |
+
|
338 |
#
|
339 |
# End of MediaWiki Import Tab
|
340 |
#######################################################################################################################
|
App_Function_Libraries/Gradio_UI/Podcast_tab.py
CHANGED
@@ -99,7 +99,7 @@ def create_podcast_tab():
|
|
99 |
|
100 |
podcast_api_name_input = gr.Dropdown(
|
101 |
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp",
|
102 |
-
"Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace"],
|
103 |
value=None,
|
104 |
label="API Name for Summarization (Optional)"
|
105 |
)
|
|
|
99 |
|
100 |
podcast_api_name_input = gr.Dropdown(
|
101 |
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral", "OpenRouter", "Llama.cpp",
|
102 |
+
"Kobold", "Ooba", "Tabbyapi", "VLLM","ollama", "HuggingFace", "Custom-OpenAI-API"],
|
103 |
value=None,
|
104 |
label="API Name for Summarization (Optional)"
|
105 |
)
|
App_Function_Libraries/Gradio_UI/Search_Tab.py
CHANGED
@@ -1,426 +1,404 @@
|
|
1 |
-
# Search_Tab.py
|
2 |
-
# Description: This file contains the code for the search tab in the Gradio UI
|
3 |
-
#
|
4 |
-
# Imports
|
5 |
-
import html
|
6 |
-
import logging
|
7 |
-
import sqlite3
|
8 |
-
|
9 |
-
#
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
from App_Function_Libraries.DB.DB_Manager import view_database, search_and_display_items
|
14 |
-
from App_Function_Libraries.
|
15 |
-
|
16 |
-
|
17 |
-
#
|
18 |
-
|
19 |
-
#
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
result_md += f"**
|
74 |
-
result_md += f"**
|
75 |
-
result_md += "
|
76 |
-
|
77 |
-
result_md += "
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
)
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
)
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
def go_to_previous_search_page(query, current_page, entries_per_page):
|
407 |
-
previous_page = max(1, current_page - 1)
|
408 |
-
return update_search_page(query, previous_page, entries_per_page)
|
409 |
-
|
410 |
-
search_button.click(
|
411 |
-
fn=update_search_page,
|
412 |
-
inputs=[search_query_input, page_number, entries_per_page],
|
413 |
-
outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
|
414 |
-
)
|
415 |
-
|
416 |
-
next_page_button.click(
|
417 |
-
fn=go_to_next_search_page,
|
418 |
-
inputs=[search_query_input, page_number, entries_per_page],
|
419 |
-
outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
|
420 |
-
)
|
421 |
-
|
422 |
-
previous_page_button.click(
|
423 |
-
fn=go_to_previous_search_page,
|
424 |
-
inputs=[search_query_input, page_number, entries_per_page],
|
425 |
-
outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
|
426 |
-
)
|
|
|
1 |
+
# Search_Tab.py
|
2 |
+
# Description: This file contains the code for the search tab in the Gradio UI
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
import html
|
6 |
+
import logging
|
7 |
+
import sqlite3
|
8 |
+
#
|
9 |
+
# External Imports
|
10 |
+
import gradio as gr
|
11 |
+
#
|
12 |
+
# Local Imports
|
13 |
+
from App_Function_Libraries.DB.DB_Manager import view_database, search_and_display_items
|
14 |
+
from App_Function_Libraries.DB.SQLite_DB import search_prompts
|
15 |
+
from App_Function_Libraries.Gradio_UI.Gradio_Shared import update_dropdown, update_detailed_view
|
16 |
+
from App_Function_Libraries.Utils.Utils import get_database_path
|
17 |
+
#
|
18 |
+
###################################################################################################
|
19 |
+
#
|
20 |
+
# Functions:
|
21 |
+
|
22 |
+
logger = logging.getLogger()
|
23 |
+
|
24 |
+
|
25 |
+
def create_search_tab():
|
26 |
+
with gr.TabItem("Search / Detailed View"):
|
27 |
+
with gr.Row():
|
28 |
+
with gr.Column():
|
29 |
+
gr.Markdown("# Search across all ingested items in the Database")
|
30 |
+
gr.Markdown(" by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
|
31 |
+
search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
|
32 |
+
search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title", label="Search By")
|
33 |
+
search_button = gr.Button("Search")
|
34 |
+
items_output = gr.Dropdown(label="Select Item", choices=[])
|
35 |
+
item_mapping = gr.State({})
|
36 |
+
prompt_summary_output = gr.HTML(label="Prompt & Summary", visible=True)
|
37 |
+
|
38 |
+
search_button.click(
|
39 |
+
fn=update_dropdown,
|
40 |
+
inputs=[search_query_input, search_type_input],
|
41 |
+
outputs=[items_output, item_mapping]
|
42 |
+
)
|
43 |
+
with gr.Column():
|
44 |
+
content_output = gr.Markdown(label="Content", visible=True)
|
45 |
+
items_output.change(
|
46 |
+
fn=update_detailed_view,
|
47 |
+
inputs=[items_output, item_mapping],
|
48 |
+
outputs=[prompt_summary_output, content_output]
|
49 |
+
)
|
50 |
+
|
51 |
+
|
52 |
+
def display_search_results(query):
|
53 |
+
if not query.strip():
|
54 |
+
return "Please enter a search query."
|
55 |
+
|
56 |
+
results = search_prompts(query)
|
57 |
+
|
58 |
+
# Debugging: Print the results to the console to see what is being returned
|
59 |
+
print(f"Processed search results for query '{query}': {results}")
|
60 |
+
|
61 |
+
if results:
|
62 |
+
result_md = "## Search Results:\n"
|
63 |
+
for result in results:
|
64 |
+
# Debugging: Print each result to see its format
|
65 |
+
print(f"Result item: {result}")
|
66 |
+
|
67 |
+
if len(result) == 2:
|
68 |
+
name, details = result
|
69 |
+
result_md += f"**Title:** {name}\n\n**Description:** {details}\n\n---\n"
|
70 |
+
|
71 |
+
elif len(result) == 4:
|
72 |
+
name, details, system, user = result
|
73 |
+
result_md += f"**Title:** {name}\n\n"
|
74 |
+
result_md += f"**Description:** {details}\n\n"
|
75 |
+
result_md += f"**System Prompt:** {system}\n\n"
|
76 |
+
result_md += f"**User Prompt:** {user}\n\n"
|
77 |
+
result_md += "---\n"
|
78 |
+
else:
|
79 |
+
result_md += "Error: Unexpected result format.\n\n---\n"
|
80 |
+
return result_md
|
81 |
+
return "No results found."
|
82 |
+
|
83 |
+
|
84 |
+
def create_viewing_tab():
|
85 |
+
with gr.TabItem("View Database"):
|
86 |
+
gr.Markdown("# View Database Entries")
|
87 |
+
with gr.Row():
|
88 |
+
with gr.Column():
|
89 |
+
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
90 |
+
page_number = gr.Number(value=1, label="Page Number", precision=0)
|
91 |
+
view_button = gr.Button("View Page")
|
92 |
+
next_page_button = gr.Button("Next Page")
|
93 |
+
previous_page_button = gr.Button("Previous Page")
|
94 |
+
with gr.Column():
|
95 |
+
results_display = gr.HTML()
|
96 |
+
pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
|
97 |
+
|
98 |
+
def update_page(page, entries_per_page):
|
99 |
+
results, pagination, total_pages = view_database(page, entries_per_page)
|
100 |
+
next_disabled = page >= total_pages
|
101 |
+
prev_disabled = page <= 1
|
102 |
+
return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(interactive=not prev_disabled)
|
103 |
+
|
104 |
+
def go_to_next_page(current_page, entries_per_page):
|
105 |
+
next_page = current_page + 1
|
106 |
+
return update_page(next_page, entries_per_page)
|
107 |
+
|
108 |
+
def go_to_previous_page(current_page, entries_per_page):
|
109 |
+
previous_page = max(1, current_page - 1)
|
110 |
+
return update_page(previous_page, entries_per_page)
|
111 |
+
|
112 |
+
view_button.click(
|
113 |
+
fn=update_page,
|
114 |
+
inputs=[page_number, entries_per_page],
|
115 |
+
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
116 |
+
)
|
117 |
+
|
118 |
+
next_page_button.click(
|
119 |
+
fn=go_to_next_page,
|
120 |
+
inputs=[page_number, entries_per_page],
|
121 |
+
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
122 |
+
)
|
123 |
+
|
124 |
+
previous_page_button.click(
|
125 |
+
fn=go_to_previous_page,
|
126 |
+
inputs=[page_number, entries_per_page],
|
127 |
+
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
128 |
+
)
|
129 |
+
|
130 |
+
|
131 |
+
def create_search_summaries_tab():
|
132 |
+
with gr.TabItem("Search/View Title+Summary "):
|
133 |
+
gr.Markdown("# Search across all ingested items in the Database and review their summaries")
|
134 |
+
gr.Markdown("Search by Title / URL / Keyword / or Content via SQLite Full-Text-Search")
|
135 |
+
with gr.Row():
|
136 |
+
with gr.Column():
|
137 |
+
search_query_input = gr.Textbox(label="Search Query", placeholder="Enter your search query here...")
|
138 |
+
search_type_input = gr.Radio(choices=["Title", "URL", "Keyword", "Content"], value="Title",
|
139 |
+
label="Search By")
|
140 |
+
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
141 |
+
page_number = gr.Number(value=1, label="Page Number", precision=0)
|
142 |
+
char_count_input = gr.Number(value=5000, label="Amount of characters to display from the main content",
|
143 |
+
precision=0)
|
144 |
+
with gr.Column():
|
145 |
+
search_button = gr.Button("Search")
|
146 |
+
next_page_button = gr.Button("Next Page")
|
147 |
+
previous_page_button = gr.Button("Previous Page")
|
148 |
+
pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
|
149 |
+
search_results_output = gr.HTML()
|
150 |
+
|
151 |
+
|
152 |
+
def update_search_page(query, search_type, page, entries_per_page, char_count):
|
153 |
+
# Ensure char_count is a positive integer
|
154 |
+
char_count = max(1, int(char_count)) if char_count else 5000
|
155 |
+
results, pagination, total_pages = search_and_display_items(query, search_type, page, entries_per_page, char_count)
|
156 |
+
next_disabled = page >= total_pages
|
157 |
+
prev_disabled = page <= 1
|
158 |
+
return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(
|
159 |
+
interactive=not prev_disabled)
|
160 |
+
|
161 |
+
def go_to_next_search_page(query, search_type, current_page, entries_per_page, char_count):
|
162 |
+
next_page = current_page + 1
|
163 |
+
return update_search_page(query, search_type, next_page, entries_per_page, char_count)
|
164 |
+
|
165 |
+
def go_to_previous_search_page(query, search_type, current_page, entries_per_page, char_count):
|
166 |
+
previous_page = max(1, current_page - 1)
|
167 |
+
return update_search_page(query, search_type, previous_page, entries_per_page, char_count)
|
168 |
+
|
169 |
+
search_button.click(
|
170 |
+
fn=update_search_page,
|
171 |
+
inputs=[search_query_input, search_type_input, page_number, entries_per_page, char_count_input],
|
172 |
+
outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
|
173 |
+
)
|
174 |
+
|
175 |
+
next_page_button.click(
|
176 |
+
fn=go_to_next_search_page,
|
177 |
+
inputs=[search_query_input, search_type_input, page_number, entries_per_page, char_count_input],
|
178 |
+
outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
|
179 |
+
)
|
180 |
+
|
181 |
+
previous_page_button.click(
|
182 |
+
fn=go_to_previous_search_page,
|
183 |
+
inputs=[search_query_input, search_type_input, page_number, entries_per_page, char_count_input],
|
184 |
+
outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
|
185 |
+
)
|
186 |
+
|
187 |
+
|
188 |
+
|
189 |
+
def create_prompt_view_tab():
|
190 |
+
with gr.TabItem("View Prompt Database"):
|
191 |
+
gr.Markdown("# View Prompt Database Entries")
|
192 |
+
with gr.Row():
|
193 |
+
with gr.Column():
|
194 |
+
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
195 |
+
page_number = gr.Number(value=1, label="Page Number", precision=0)
|
196 |
+
view_button = gr.Button("View Page")
|
197 |
+
next_page_button = gr.Button("Next Page")
|
198 |
+
previous_page_button = gr.Button("Previous Page")
|
199 |
+
with gr.Column():
|
200 |
+
pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
|
201 |
+
results_display = gr.HTML()
|
202 |
+
|
203 |
+
# FIXME - SQL functions to be moved to DB_Manager
|
204 |
+
def view_database(page, entries_per_page):
|
205 |
+
offset = (page - 1) * entries_per_page
|
206 |
+
try:
|
207 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
208 |
+
cursor = conn.cursor()
|
209 |
+
cursor.execute('''
|
210 |
+
SELECT p.name, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
|
211 |
+
FROM Prompts p
|
212 |
+
LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
213 |
+
LEFT JOIN Keywords k ON pk.keyword_id = k.id
|
214 |
+
GROUP BY p.id
|
215 |
+
ORDER BY p.name
|
216 |
+
LIMIT ? OFFSET ?
|
217 |
+
''', (entries_per_page, offset))
|
218 |
+
prompts = cursor.fetchall()
|
219 |
+
|
220 |
+
cursor.execute('SELECT COUNT(*) FROM Prompts')
|
221 |
+
total_prompts = cursor.fetchone()[0]
|
222 |
+
|
223 |
+
results = ""
|
224 |
+
for prompt in prompts:
|
225 |
+
# Escape HTML special characters and replace newlines with <br> tags
|
226 |
+
title = html.escape(prompt[0]).replace('\n', '<br>')
|
227 |
+
details = html.escape(prompt[1] or '').replace('\n', '<br>')
|
228 |
+
system_prompt = html.escape(prompt[2] or '')
|
229 |
+
user_prompt = html.escape(prompt[3] or '')
|
230 |
+
keywords = html.escape(prompt[4] or '').replace('\n', '<br>')
|
231 |
+
|
232 |
+
results += f"""
|
233 |
+
<div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 20px;">
|
234 |
+
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;">
|
235 |
+
<div><strong>Title:</strong> {title}</div>
|
236 |
+
<div><strong>Details:</strong> {details}</div>
|
237 |
+
</div>
|
238 |
+
<div style="margin-top: 10px;">
|
239 |
+
<strong>User Prompt:</strong>
|
240 |
+
<pre style="white-space: pre-wrap; word-wrap: break-word;">{user_prompt}</pre>
|
241 |
+
</div>
|
242 |
+
<div style="margin-top: 10px;">
|
243 |
+
<strong>System Prompt:</strong>
|
244 |
+
<pre style="white-space: pre-wrap; word-wrap: break-word;">{system_prompt}</pre>
|
245 |
+
</div>
|
246 |
+
<div style="margin-top: 10px;">
|
247 |
+
<strong>Keywords:</strong> {keywords}
|
248 |
+
</div>
|
249 |
+
</div>
|
250 |
+
"""
|
251 |
+
|
252 |
+
total_pages = (total_prompts + entries_per_page - 1) // entries_per_page
|
253 |
+
pagination = f"Page {page} of {total_pages} (Total prompts: {total_prompts})"
|
254 |
+
|
255 |
+
return results, pagination, total_pages
|
256 |
+
except sqlite3.Error as e:
|
257 |
+
return f"<p>Error fetching prompts: {e}</p>", "Error", 0
|
258 |
+
|
259 |
+
def update_page(page, entries_per_page):
|
260 |
+
results, pagination, total_pages = view_database(page, entries_per_page)
|
261 |
+
next_disabled = page >= total_pages
|
262 |
+
prev_disabled = page <= 1
|
263 |
+
return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(
|
264 |
+
interactive=not prev_disabled)
|
265 |
+
|
266 |
+
def go_to_next_page(current_page, entries_per_page):
|
267 |
+
next_page = current_page + 1
|
268 |
+
return update_page(next_page, entries_per_page)
|
269 |
+
|
270 |
+
def go_to_previous_page(current_page, entries_per_page):
|
271 |
+
previous_page = max(1, current_page - 1)
|
272 |
+
return update_page(previous_page, entries_per_page)
|
273 |
+
|
274 |
+
view_button.click(
|
275 |
+
fn=update_page,
|
276 |
+
inputs=[page_number, entries_per_page],
|
277 |
+
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
278 |
+
)
|
279 |
+
|
280 |
+
next_page_button.click(
|
281 |
+
fn=go_to_next_page,
|
282 |
+
inputs=[page_number, entries_per_page],
|
283 |
+
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
284 |
+
)
|
285 |
+
|
286 |
+
previous_page_button.click(
|
287 |
+
fn=go_to_previous_page,
|
288 |
+
inputs=[page_number, entries_per_page],
|
289 |
+
outputs=[results_display, pagination_info, page_number, next_page_button, previous_page_button]
|
290 |
+
)
|
291 |
+
|
292 |
+
|
293 |
+
|
294 |
+
def create_prompt_search_tab():
|
295 |
+
with gr.TabItem("Search Prompts"):
|
296 |
+
gr.Markdown("# Search and View Prompt Details")
|
297 |
+
gr.Markdown("Currently has all of the https://github.com/danielmiessler/fabric prompts already available")
|
298 |
+
with gr.Row():
|
299 |
+
with gr.Column():
|
300 |
+
search_query_input = gr.Textbox(label="Search Prompts", placeholder="Enter your search query...")
|
301 |
+
entries_per_page = gr.Dropdown(choices=[10, 20, 50, 100], label="Entries per Page", value=10)
|
302 |
+
page_number = gr.Number(value=1, label="Page Number", precision=0)
|
303 |
+
with gr.Column():
|
304 |
+
search_button = gr.Button("Search Prompts")
|
305 |
+
next_page_button = gr.Button("Next Page")
|
306 |
+
previous_page_button = gr.Button("Previous Page")
|
307 |
+
pagination_info = gr.Textbox(label="Pagination Info", interactive=False)
|
308 |
+
search_results_output = gr.HTML()
|
309 |
+
|
310 |
+
# This is dirty and shouldn't be in the UI code, but it's a quick way to get the search working.
|
311 |
+
# FIXME - SQL functions to be moved to DB_Manager
|
312 |
+
def search_and_display_prompts(query, page, entries_per_page):
|
313 |
+
offset = (page - 1) * entries_per_page
|
314 |
+
try:
|
315 |
+
# FIXME - SQL functions to be moved to DB_Manager
|
316 |
+
with sqlite3.connect(get_database_path('prompts.db')) as conn:
|
317 |
+
cursor = conn.cursor()
|
318 |
+
cursor.execute('''
|
319 |
+
SELECT p.name, p.details, p.system, p.user, GROUP_CONCAT(k.keyword, ', ') as keywords
|
320 |
+
FROM Prompts p
|
321 |
+
LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
322 |
+
LEFT JOIN Keywords k ON pk.keyword_id = k.id
|
323 |
+
WHERE p.name LIKE ? OR p.details LIKE ? OR p.system LIKE ? OR p.user LIKE ? OR k.keyword LIKE ?
|
324 |
+
GROUP BY p.id
|
325 |
+
ORDER BY p.name
|
326 |
+
LIMIT ? OFFSET ?
|
327 |
+
''', (f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%', entries_per_page, offset))
|
328 |
+
prompts = cursor.fetchall()
|
329 |
+
|
330 |
+
cursor.execute('''
|
331 |
+
SELECT COUNT(DISTINCT p.id)
|
332 |
+
FROM Prompts p
|
333 |
+
LEFT JOIN PromptKeywords pk ON p.id = pk.prompt_id
|
334 |
+
LEFT JOIN Keywords k ON pk.keyword_id = k.id
|
335 |
+
WHERE p.name LIKE ? OR p.details LIKE ? OR p.system LIKE ? OR p.user LIKE ? OR k.keyword LIKE ?
|
336 |
+
''', (f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%', f'%{query}%'))
|
337 |
+
total_prompts = cursor.fetchone()[0]
|
338 |
+
|
339 |
+
results = ""
|
340 |
+
for prompt in prompts:
|
341 |
+
title = html.escape(prompt[0]).replace('\n', '<br>')
|
342 |
+
details = html.escape(prompt[1] or '').replace('\n', '<br>')
|
343 |
+
system_prompt = html.escape(prompt[2] or '')
|
344 |
+
user_prompt = html.escape(prompt[3] or '')
|
345 |
+
keywords = html.escape(prompt[4] or '').replace('\n', '<br>')
|
346 |
+
|
347 |
+
results += f"""
|
348 |
+
<div style="border: 1px solid #ddd; padding: 10px; margin-bottom: 20px;">
|
349 |
+
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;">
|
350 |
+
<div><strong>Title:</strong> {title}</div>
|
351 |
+
<div><strong>Details:</strong> {details}</div>
|
352 |
+
</div>
|
353 |
+
<div style="margin-top: 10px;">
|
354 |
+
<strong>User Prompt:</strong>
|
355 |
+
<pre style="white-space: pre-wrap; word-wrap: break-word;">{user_prompt}</pre>
|
356 |
+
</div>
|
357 |
+
<div style="margin-top: 10px;">
|
358 |
+
<strong>System Prompt:</strong>
|
359 |
+
<pre style="white-space: pre-wrap; word-wrap: break-word;">{system_prompt}</pre>
|
360 |
+
</div>
|
361 |
+
<div style="margin-top: 10px;">
|
362 |
+
<strong>Keywords:</strong> {keywords}
|
363 |
+
</div>
|
364 |
+
</div>
|
365 |
+
"""
|
366 |
+
|
367 |
+
total_pages = (total_prompts + entries_per_page - 1) // entries_per_page
|
368 |
+
pagination = f"Page {page} of {total_pages} (Total prompts: {total_prompts})"
|
369 |
+
|
370 |
+
return results, pagination, total_pages
|
371 |
+
except sqlite3.Error as e:
|
372 |
+
return f"<p>Error searching prompts: {e}</p>", "Error", 0
|
373 |
+
|
374 |
+
def update_search_page(query, page, entries_per_page):
|
375 |
+
results, pagination, total_pages = search_and_display_prompts(query, page, entries_per_page)
|
376 |
+
next_disabled = page >= total_pages
|
377 |
+
prev_disabled = page <= 1
|
378 |
+
return results, pagination, page, gr.update(interactive=not next_disabled), gr.update(interactive=not prev_disabled)
|
379 |
+
|
380 |
+
def go_to_next_search_page(query, current_page, entries_per_page):
|
381 |
+
next_page = current_page + 1
|
382 |
+
return update_search_page(query, next_page, entries_per_page)
|
383 |
+
|
384 |
+
def go_to_previous_search_page(query, current_page, entries_per_page):
|
385 |
+
previous_page = max(1, current_page - 1)
|
386 |
+
return update_search_page(query, previous_page, entries_per_page)
|
387 |
+
|
388 |
+
search_button.click(
|
389 |
+
fn=update_search_page,
|
390 |
+
inputs=[search_query_input, page_number, entries_per_page],
|
391 |
+
outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
|
392 |
+
)
|
393 |
+
|
394 |
+
next_page_button.click(
|
395 |
+
fn=go_to_next_search_page,
|
396 |
+
inputs=[search_query_input, page_number, entries_per_page],
|
397 |
+
outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
|
398 |
+
)
|
399 |
+
|
400 |
+
previous_page_button.click(
|
401 |
+
fn=go_to_previous_search_page,
|
402 |
+
inputs=[search_query_input, page_number, entries_per_page],
|
403 |
+
outputs=[search_results_output, pagination_info, page_number, next_page_button, previous_page_button]
|
404 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
App_Function_Libraries/Gradio_UI/Trash.py
CHANGED
@@ -1,134 +1,139 @@
|
|
1 |
# Trash.py
|
2 |
-
# Gradio UI for
|
3 |
-
|
4 |
-
import sqlite3
|
5 |
-
|
6 |
# Imports
|
|
|
7 |
|
8 |
-
# External Imports
|
9 |
import gradio as gr
|
10 |
#
|
11 |
# Local Imports
|
12 |
-
from App_Function_Libraries.DB.DB_Manager import
|
|
|
|
|
|
|
|
|
13 |
|
14 |
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
17 |
|
18 |
def list_trash():
|
19 |
items = get_trashed_items()
|
20 |
return "\n".join(
|
21 |
[f"ID: {item['id']}, Title: {item['title']}, Trashed on: {item['trash_date']}" for item in items])
|
22 |
|
|
|
|
|
|
|
|
|
|
|
23 |
def empty_trash_ui(days):
|
24 |
deleted, remaining = empty_trash(days)
|
25 |
return f"Deleted {deleted} items. {remaining} items remain in trash."
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
view_button.click(list_trash, inputs=[], outputs=trash_list)
|
32 |
|
33 |
|
|
|
|
|
|
|
34 |
|
35 |
|
36 |
-
def
|
|
|
|
|
|
|
|
|
|
|
37 |
try:
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
if not results:
|
49 |
-
return "No matching prompts found."
|
50 |
-
|
51 |
-
output = "<h3>Matching Prompts:</h3>"
|
52 |
-
for row in results:
|
53 |
-
output += f"<p><strong>ID:</strong> {row[0]} | <strong>Name:</strong> {html.escape(row[1])} | <strong>Details:</strong> {html.escape(row[2][:100])}...</p>"
|
54 |
-
return output
|
55 |
-
except sqlite3.Error as e:
|
56 |
-
return f"An error occurred while searching prompts: {e}"
|
57 |
-
|
58 |
-
|
59 |
-
def search_media_for_deletion(query):
|
60 |
try:
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
FROM media
|
66 |
-
WHERE title LIKE ? OR description LIKE ?
|
67 |
-
LIMIT 10
|
68 |
-
''', (f'%{query}%', f'%{query}%'))
|
69 |
-
results = cursor.fetchall()
|
70 |
-
|
71 |
-
if not results:
|
72 |
-
return "No matching media found."
|
73 |
-
|
74 |
-
output = "<h3>Matching Media:</h3>"
|
75 |
-
for row in results:
|
76 |
-
output += f"<p><strong>ID:</strong> {row[0]} | <strong>Title:</strong> {html.escape(row[1])} | <strong>Description:</strong> {html.escape(row[2][:100])}...</p>"
|
77 |
-
return output
|
78 |
-
except sqlite3.Error as e:
|
79 |
-
return f"An error occurred while searching media: {e}"
|
80 |
|
81 |
-
def create_delete_trash_tab():
|
82 |
-
with gr.TabItem("Delete DB Item"):
|
83 |
-
gr.Markdown("# Search and Delete Items from Databases")
|
84 |
-
|
85 |
-
with gr.Row():
|
86 |
-
with gr.Column():
|
87 |
-
gr.Markdown("## Search and Delete Prompts")
|
88 |
-
prompt_search_input = gr.Textbox(label="Search Prompts")
|
89 |
-
prompt_search_button = gr.Button("Search Prompts")
|
90 |
-
prompt_search_results = gr.HTML()
|
91 |
-
prompt_id_input = gr.Number(label="Prompt ID")
|
92 |
-
prompt_delete_button = gr.Button("Delete Prompt")
|
93 |
-
prompt_delete_output = gr.Textbox(label="Delete Result")
|
94 |
-
|
95 |
-
with gr.Column():
|
96 |
-
gr.Markdown("## Search and Delete Media")
|
97 |
-
media_search_input = gr.Textbox(label="Search Media")
|
98 |
-
media_search_button = gr.Button("Search Media")
|
99 |
-
media_search_results = gr.HTML()
|
100 |
-
media_id_input = gr.Number(label="Media ID")
|
101 |
-
media_force_checkbox = gr.Checkbox(label="Force Delete")
|
102 |
-
media_delete_button = gr.Button("Delete Media")
|
103 |
-
media_delete_output = gr.Textbox(label="Delete Result")
|
104 |
-
|
105 |
-
prompt_search_button.click(
|
106 |
-
search_prompts_for_deletion,
|
107 |
-
inputs=[prompt_search_input],
|
108 |
-
outputs=prompt_search_results
|
109 |
-
)
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
)
|
116 |
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
)
|
122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
media_delete_button.click(
|
124 |
delete_item,
|
125 |
inputs=[media_id_input, media_force_checkbox],
|
126 |
outputs=media_delete_output
|
127 |
)
|
128 |
|
|
|
129 |
def create_empty_trash_tab():
|
130 |
with gr.TabItem("Empty Trash"):
|
131 |
days_input = gr.Slider(minimum=15, maximum=90, step=5, label="Delete items older than (days)")
|
132 |
empty_button = gr.Button("Empty Trash")
|
133 |
empty_output = gr.Textbox(label="Result")
|
134 |
-
empty_button.click(empty_trash_ui, inputs=[days_input], outputs=empty_output)
|
|
|
|
|
|
|
|
|
|
1 |
# Trash.py
|
2 |
+
# Gradio UI for managing trashed items in the database
|
3 |
+
#
|
|
|
|
|
4 |
# Imports
|
5 |
+
from typing import Tuple, List
|
6 |
|
|
|
7 |
import gradio as gr
|
8 |
#
|
9 |
# Local Imports
|
10 |
+
from App_Function_Libraries.DB.DB_Manager import (
|
11 |
+
get_trashed_items, user_delete_item, empty_trash,
|
12 |
+
get_transcripts, fetch_item_details,
|
13 |
+
search_media_database, mark_as_trash,
|
14 |
+
)
|
15 |
|
16 |
|
17 |
+
#
|
18 |
+
############################################################################################################
|
19 |
+
#
|
20 |
+
# Functions:
|
21 |
+
|
22 |
|
23 |
def list_trash():
|
24 |
items = get_trashed_items()
|
25 |
return "\n".join(
|
26 |
[f"ID: {item['id']}, Title: {item['title']}, Trashed on: {item['trash_date']}" for item in items])
|
27 |
|
28 |
+
|
29 |
+
def delete_item(media_id, force):
|
30 |
+
return user_delete_item(media_id, force)
|
31 |
+
|
32 |
+
|
33 |
def empty_trash_ui(days):
|
34 |
deleted, remaining = empty_trash(days)
|
35 |
return f"Deleted {deleted} items. {remaining} items remain in trash."
|
36 |
|
37 |
+
|
38 |
+
def get_media_transcripts(media_id):
|
39 |
+
transcripts = get_transcripts(media_id)
|
40 |
+
return "\n\n".join([f"Transcript ID: {t[0]}\nModel: {t[1]}\nCreated: {t[3]}\n{t[2][:200]}..." for t in transcripts])
|
|
|
41 |
|
42 |
|
43 |
+
def get_media_summaries(media_id):
|
44 |
+
_, summary, _ = fetch_item_details(media_id)
|
45 |
+
return summary if summary else "No summary available."
|
46 |
|
47 |
|
48 |
+
def get_media_prompts(media_id):
|
49 |
+
prompt, _, _ = fetch_item_details(media_id)
|
50 |
+
return prompt if prompt else "No prompt available."
|
51 |
+
|
52 |
+
|
53 |
+
def search_and_mark_trash(search_query: str) -> Tuple[List[Tuple[int, str, str]], str]:
|
54 |
try:
|
55 |
+
results = search_media_database(search_query)
|
56 |
+
if not results:
|
57 |
+
return [], "No items found matching the search query."
|
58 |
+
return results, "Search completed successfully."
|
59 |
+
except Exception as e:
|
60 |
+
return [], f"Error during search: {str(e)}"
|
61 |
+
|
62 |
+
|
63 |
+
def mark_item_as_trash(media_id: int) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
try:
|
65 |
+
mark_as_trash(media_id)
|
66 |
+
return f"Item with ID {media_id} has been marked as trash."
|
67 |
+
except Exception as e:
|
68 |
+
return f"Error marking item as trash: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
+
def create_search_and_mark_trash_tab():
|
72 |
+
with gr.TabItem("Search and Mark as Trash"):
|
73 |
+
gr.Markdown("# Search for Items and Mark as Trash")
|
74 |
+
|
75 |
+
search_input = gr.Textbox(label="Search Query")
|
76 |
+
search_button = gr.Button("Search")
|
77 |
+
search_results = gr.Dropdown(label="Search Results", choices=[], interactive=True)
|
78 |
+
search_status = gr.Textbox(label="Search Status")
|
79 |
+
|
80 |
+
mark_trash_button = gr.Button("Mark Selected Item as Trash")
|
81 |
+
mark_trash_status = gr.Textbox(label="Mark as Trash Status")
|
82 |
+
|
83 |
+
def update_search_results(query):
|
84 |
+
results, status = search_and_mark_trash(query)
|
85 |
+
choices = [f"{id}: {title} ({url})" for id, title, url in results]
|
86 |
+
return choices, status
|
87 |
+
|
88 |
+
search_button.click(
|
89 |
+
update_search_results,
|
90 |
+
inputs=[search_input],
|
91 |
+
outputs=[search_results, search_status]
|
92 |
)
|
93 |
|
94 |
+
def mark_selected_as_trash(selected_item):
|
95 |
+
if selected_item:
|
96 |
+
media_id = int(selected_item.split(":")[0])
|
97 |
+
return mark_item_as_trash(media_id)
|
98 |
+
return "No item selected."
|
99 |
+
|
100 |
+
mark_trash_button.click(
|
101 |
+
mark_selected_as_trash,
|
102 |
+
inputs=[search_results],
|
103 |
+
outputs=[mark_trash_status]
|
104 |
)
|
105 |
|
106 |
+
|
107 |
+
def create_view_trash_tab():
|
108 |
+
with gr.TabItem("View Trash"):
|
109 |
+
view_button = gr.Button("View Trash")
|
110 |
+
trash_list = gr.Textbox(label="Trashed Items")
|
111 |
+
view_button.click(list_trash, inputs=[], outputs=trash_list)
|
112 |
+
|
113 |
+
|
114 |
+
def create_delete_trash_tab():
|
115 |
+
with gr.TabItem("Delete DB Item"):
|
116 |
+
gr.Markdown("# Delete Items from Databases")
|
117 |
+
|
118 |
+
media_id_input = gr.Number(label="Media ID")
|
119 |
+
media_force_checkbox = gr.Checkbox(label="Force Delete")
|
120 |
+
media_delete_button = gr.Button("Delete Media")
|
121 |
+
media_delete_output = gr.Textbox(label="Delete Result")
|
122 |
+
|
123 |
media_delete_button.click(
|
124 |
delete_item,
|
125 |
inputs=[media_id_input, media_force_checkbox],
|
126 |
outputs=media_delete_output
|
127 |
)
|
128 |
|
129 |
+
|
130 |
def create_empty_trash_tab():
|
131 |
with gr.TabItem("Empty Trash"):
|
132 |
days_input = gr.Slider(minimum=15, maximum=90, step=5, label="Delete items older than (days)")
|
133 |
empty_button = gr.Button("Empty Trash")
|
134 |
empty_output = gr.Textbox(label="Result")
|
135 |
+
empty_button.click(empty_trash_ui, inputs=[days_input], outputs=empty_output)
|
136 |
+
|
137 |
+
#
|
138 |
+
# End of File
|
139 |
+
############################################################################################################
|
App_Function_Libraries/Gradio_UI/Video_transcription_tab.py
CHANGED
@@ -9,8 +9,6 @@ import os
|
|
9 |
# External Imports
|
10 |
import gradio as gr
|
11 |
import yt_dlp
|
12 |
-
|
13 |
-
from App_Function_Libraries.Confabulation_check import simplified_geval
|
14 |
#
|
15 |
# Local Imports
|
16 |
from App_Function_Libraries.DB.DB_Manager import load_preset_prompts, add_media_to_database
|
@@ -21,8 +19,9 @@ from App_Function_Libraries.Summarization_General_Lib import perform_transcripti
|
|
21 |
from App_Function_Libraries.Utils.Utils import convert_to_seconds, safe_read_file, format_transcription, \
|
22 |
create_download_directory, generate_unique_identifier, extract_text_from_segments
|
23 |
from App_Function_Libraries.Video_DL_Ingestion_Lib import parse_and_expand_urls, extract_metadata, download_video
|
|
|
24 |
#
|
25 |
-
|
26 |
#
|
27 |
# Functions:
|
28 |
|
@@ -108,7 +107,7 @@ def create_video_transcription_tab():
|
|
108 |
api_name_input = gr.Dropdown(
|
109 |
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
|
110 |
"OpenRouter",
|
111 |
-
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace"],
|
112 |
value=None, label="API Name (Mandatory)")
|
113 |
api_key_input = gr.Textbox(label="API Key (Mandatory)", placeholder="Enter your API key here",
|
114 |
type="password")
|
@@ -467,7 +466,7 @@ def create_video_transcription_tab():
|
|
467 |
if confab_checkbox:
|
468 |
logging.info("Confabulation check enabled")
|
469 |
# Assuming result[1] contains the transcript and result[2] contains the summary
|
470 |
-
confabulation_result =
|
471 |
logging.info(f"Simplified G-Eval result: {confabulation_result}")
|
472 |
|
473 |
# Ensure that result is a tuple with 5 elements
|
|
|
9 |
# External Imports
|
10 |
import gradio as gr
|
11 |
import yt_dlp
|
|
|
|
|
12 |
#
|
13 |
# Local Imports
|
14 |
from App_Function_Libraries.DB.DB_Manager import load_preset_prompts, add_media_to_database
|
|
|
19 |
from App_Function_Libraries.Utils.Utils import convert_to_seconds, safe_read_file, format_transcription, \
|
20 |
create_download_directory, generate_unique_identifier, extract_text_from_segments
|
21 |
from App_Function_Libraries.Video_DL_Ingestion_Lib import parse_and_expand_urls, extract_metadata, download_video
|
22 |
+
from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
|
23 |
#
|
24 |
+
#######################################################################################################################
|
25 |
#
|
26 |
# Functions:
|
27 |
|
|
|
107 |
api_name_input = gr.Dropdown(
|
108 |
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
|
109 |
"OpenRouter",
|
110 |
+
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace", "Custom-OpenAI-API"],
|
111 |
value=None, label="API Name (Mandatory)")
|
112 |
api_key_input = gr.Textbox(label="API Key (Mandatory)", placeholder="Enter your API key here",
|
113 |
type="password")
|
|
|
466 |
if confab_checkbox:
|
467 |
logging.info("Confabulation check enabled")
|
468 |
# Assuming result[1] contains the transcript and result[2] contains the summary
|
469 |
+
confabulation_result = run_geval(result[1], result[2], api_key, api_name)
|
470 |
logging.info(f"Simplified G-Eval result: {confabulation_result}")
|
471 |
|
472 |
# Ensure that result is a tuple with 5 elements
|
App_Function_Libraries/Gradio_UI/View_tab.py
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# View_tab.py
|
2 |
+
# Description: Gradio functions for the view tab
|
3 |
+
#
|
4 |
+
# Imports
|
5 |
+
#
|
6 |
+
# External Imports
|
7 |
+
import gradio as gr
|
8 |
+
#
|
9 |
+
# Local Imports
|
10 |
+
from App_Function_Libraries.DB.DB_Manager import (
|
11 |
+
search_media_database, mark_as_trash, get_specific_prompt, delete_specific_transcript,
|
12 |
+
delete_specific_summary, delete_specific_prompt, get_specific_transcript, get_specific_summary,
|
13 |
+
get_media_transcripts, get_media_summaries, get_media_prompts
|
14 |
+
)
|
15 |
+
#
|
16 |
+
############################################################################################################
|
17 |
+
#
|
18 |
+
# Functions:
|
19 |
+
|
20 |
+
# FIXME - add mark_as_trash ability to the UI
|
21 |
+
|
22 |
+
|
23 |
+
# FIXME - Doesn't work. also need ot merge this tab wtih Edit Existing Items tab....
|
24 |
+
def create_manage_items_tab():
|
25 |
+
with gr.TabItem("Edit/Manage DB Items"):
|
26 |
+
search_input = gr.Textbox(label="Search for Media (title or ID)")
|
27 |
+
search_button = gr.Button("Search")
|
28 |
+
media_selector = gr.Dropdown(label="Select Media", choices=[], interactive=True)
|
29 |
+
|
30 |
+
with gr.Accordion("Transcripts"):
|
31 |
+
get_transcripts_button = gr.Button("Get Transcripts")
|
32 |
+
transcript_selector = gr.Dropdown(label="Select Transcript", choices=[], interactive=True)
|
33 |
+
transcripts_output = gr.Textbox(label="Transcript Content", lines=10)
|
34 |
+
delete_transcript_button = gr.Button("Delete Selected Transcript")
|
35 |
+
|
36 |
+
with gr.Accordion("Summaries"):
|
37 |
+
get_summaries_button = gr.Button("Get Summaries")
|
38 |
+
summary_selector = gr.Dropdown(label="Select Summary", choices=[], interactive=True)
|
39 |
+
summaries_output = gr.Textbox(label="Summary Content", lines=5)
|
40 |
+
delete_summary_button = gr.Button("Delete Selected Summary")
|
41 |
+
|
42 |
+
with gr.Accordion("Prompts"):
|
43 |
+
get_prompts_button = gr.Button("Get Prompts")
|
44 |
+
prompt_selector = gr.Dropdown(label="Select Prompt", choices=[], interactive=True)
|
45 |
+
prompts_output = gr.Textbox(label="Prompt Content", lines=5)
|
46 |
+
delete_prompt_button = gr.Button("Delete Selected Prompt")
|
47 |
+
|
48 |
+
status_output = gr.Textbox(label="Status")
|
49 |
+
|
50 |
+
def search_media(query):
|
51 |
+
results = search_media_database(query)
|
52 |
+
choices = [f"{result[0]}: {result[1]}" for result in results]
|
53 |
+
return {"choices": choices, "value": None}
|
54 |
+
|
55 |
+
search_button.click(search_media, inputs=[search_input], outputs=[media_selector])
|
56 |
+
|
57 |
+
def get_transcripts(media_selection):
|
58 |
+
if not media_selection:
|
59 |
+
return {"choices": [], "value": None}
|
60 |
+
media_id = int(media_selection.split(":")[0])
|
61 |
+
transcripts = get_media_transcripts(media_id)
|
62 |
+
choices = [f"{t[0]}: {t[3]}" for t in transcripts]
|
63 |
+
return {"choices": choices, "value": None}
|
64 |
+
|
65 |
+
def display_transcript(transcript_selection):
|
66 |
+
if not transcript_selection:
|
67 |
+
return "No transcript selected."
|
68 |
+
transcript_id = int(transcript_selection.split(":")[0])
|
69 |
+
transcript = get_specific_transcript(transcript_id)
|
70 |
+
return transcript['content'] if 'content' in transcript else transcript.get('error', "Transcript not found.")
|
71 |
+
|
72 |
+
get_transcripts_button.click(
|
73 |
+
get_transcripts,
|
74 |
+
inputs=[media_selector],
|
75 |
+
outputs=[transcript_selector]
|
76 |
+
)
|
77 |
+
transcript_selector.change(
|
78 |
+
display_transcript,
|
79 |
+
inputs=[transcript_selector],
|
80 |
+
outputs=[transcripts_output]
|
81 |
+
)
|
82 |
+
|
83 |
+
def get_summaries(media_selection):
|
84 |
+
if not media_selection:
|
85 |
+
return {"choices": [], "value": None}
|
86 |
+
media_id = int(media_selection.split(":")[0])
|
87 |
+
summaries = get_media_summaries(media_id)
|
88 |
+
choices = [f"{s[0]}: {s[3]}" for s in summaries]
|
89 |
+
return {"choices": choices, "value": None}
|
90 |
+
|
91 |
+
def display_summary(summary_selection):
|
92 |
+
if not summary_selection:
|
93 |
+
return "No summary selected."
|
94 |
+
summary_id = int(summary_selection.split(":")[0])
|
95 |
+
summary = get_specific_summary(summary_id)
|
96 |
+
return summary['content'] if 'content' in summary else summary.get('error', "Summary not found.")
|
97 |
+
|
98 |
+
get_summaries_button.click(
|
99 |
+
get_summaries,
|
100 |
+
inputs=[media_selector],
|
101 |
+
outputs=[summary_selector]
|
102 |
+
)
|
103 |
+
summary_selector.change(
|
104 |
+
display_summary,
|
105 |
+
inputs=[summary_selector],
|
106 |
+
outputs=[summaries_output]
|
107 |
+
)
|
108 |
+
|
109 |
+
def get_prompts(media_selection):
|
110 |
+
if not media_selection:
|
111 |
+
return {"choices": [], "value": None}
|
112 |
+
media_id = int(media_selection.split(":")[0])
|
113 |
+
prompts = get_media_prompts(media_id)
|
114 |
+
choices = [f"{p[0]}: {p[3]}" for p in prompts]
|
115 |
+
return {"choices": choices, "value": None}
|
116 |
+
|
117 |
+
def display_prompt(prompt_selection):
|
118 |
+
if not prompt_selection:
|
119 |
+
return "No prompt selected."
|
120 |
+
prompt_id = int(prompt_selection.split(":")[0])
|
121 |
+
prompt = get_specific_prompt(prompt_id)
|
122 |
+
return prompt['content'] if 'content' in prompt else prompt.get('error', "Prompt not found.")
|
123 |
+
|
124 |
+
get_prompts_button.click(
|
125 |
+
get_prompts,
|
126 |
+
inputs=[media_selector],
|
127 |
+
outputs=[prompt_selector]
|
128 |
+
)
|
129 |
+
prompt_selector.change(
|
130 |
+
display_prompt,
|
131 |
+
inputs=[prompt_selector],
|
132 |
+
outputs=[prompts_output]
|
133 |
+
)
|
134 |
+
|
135 |
+
def delete_transcript(transcript_selection):
|
136 |
+
if not transcript_selection:
|
137 |
+
return "No transcript selected."
|
138 |
+
transcript_id = int(transcript_selection.split(":")[0])
|
139 |
+
result = delete_specific_transcript(transcript_id)
|
140 |
+
return result
|
141 |
+
|
142 |
+
def delete_summary(summary_selection):
|
143 |
+
if not summary_selection:
|
144 |
+
return "No summary selected."
|
145 |
+
summary_id = int(summary_selection.split(":")[0])
|
146 |
+
result = delete_specific_summary(summary_id)
|
147 |
+
return result
|
148 |
+
|
149 |
+
def delete_prompt(prompt_selection):
|
150 |
+
if not prompt_selection:
|
151 |
+
return "No prompt selected."
|
152 |
+
prompt_id = int(prompt_selection.split(":")[0])
|
153 |
+
result = delete_specific_prompt(prompt_id)
|
154 |
+
return result
|
155 |
+
|
156 |
+
delete_transcript_button.click(
|
157 |
+
delete_transcript,
|
158 |
+
inputs=[transcript_selector],
|
159 |
+
outputs=[status_output]
|
160 |
+
)
|
161 |
+
delete_summary_button.click(
|
162 |
+
delete_summary,
|
163 |
+
inputs=[summary_selector],
|
164 |
+
outputs=[status_output]
|
165 |
+
)
|
166 |
+
delete_prompt_button.click(
|
167 |
+
delete_prompt,
|
168 |
+
inputs=[prompt_selector],
|
169 |
+
outputs=[status_output]
|
170 |
+
)
|
App_Function_Libraries/Gradio_UI/Website_scraping_tab.py
CHANGED
@@ -1,113 +1,540 @@
|
|
1 |
# Website_scraping_tab.py
|
2 |
# Gradio UI for scraping websites
|
3 |
-
|
4 |
# Imports
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
#
|
6 |
# External Imports
|
7 |
import gradio as gr
|
|
|
|
|
8 |
|
|
|
|
|
|
|
9 |
from App_Function_Libraries.Article_Summarization_Lib import scrape_and_summarize_multiple
|
10 |
from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
|
11 |
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
|
|
12 |
|
13 |
|
14 |
-
#
|
15 |
-
# Local Imports
|
16 |
-
#
|
17 |
#
|
18 |
########################################################################################################################
|
19 |
#
|
20 |
# Functions:
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def create_website_scraping_tab():
|
24 |
with gr.TabItem("Website Scraping"):
|
25 |
-
gr.Markdown("# Scrape Websites & Summarize Articles
|
26 |
with gr.Row():
|
27 |
with gr.Column():
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
with gr.Row():
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt",
|
37 |
-
value=False,
|
38 |
-
visible=True)
|
39 |
with gr.Row():
|
40 |
-
|
41 |
-
choices=load_preset_prompts(),
|
42 |
-
visible=False)
|
43 |
with gr.Row():
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
48 |
with gr.Row():
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
**
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
**
|
64 |
-
-
|
65 |
-
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
inputs=[preset_prompt_checkbox],
|
78 |
-
outputs=[preset_prompt]
|
79 |
-
)
|
80 |
-
|
81 |
-
def update_prompts(preset_name):
|
82 |
-
prompts = update_user_prompt(preset_name)
|
83 |
-
return (
|
84 |
-
gr.update(value=prompts["user_prompt"], visible=True),
|
85 |
-
gr.update(value=prompts["system_prompt"], visible=True)
|
86 |
)
|
87 |
-
|
88 |
-
preset_prompt.change(
|
89 |
-
update_prompts,
|
90 |
-
inputs=preset_prompt,
|
91 |
-
outputs=[website_custom_prompt_input, system_prompt_input]
|
92 |
-
)
|
93 |
|
94 |
api_name_input = gr.Dropdown(
|
95 |
-
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
|
96 |
-
"
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
scrape_button = gr.Button("Scrape and Summarize")
|
103 |
with gr.Column():
|
|
|
104 |
result_output = gr.Textbox(label="Result", lines=20)
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Website_scraping_tab.py
|
2 |
# Gradio UI for scraping websites
|
3 |
+
#
|
4 |
# Imports
|
5 |
+
import asyncio
|
6 |
+
import json
|
7 |
+
import logging
|
8 |
+
import os
|
9 |
+
import random
|
10 |
+
from concurrent.futures import ThreadPoolExecutor
|
11 |
+
from typing import Optional, List, Dict, Any
|
12 |
+
from urllib.parse import urlparse, urljoin
|
13 |
+
|
14 |
#
|
15 |
# External Imports
|
16 |
import gradio as gr
|
17 |
+
from playwright.async_api import TimeoutError, async_playwright
|
18 |
+
from playwright.sync_api import sync_playwright
|
19 |
|
20 |
+
#
|
21 |
+
# Local Imports
|
22 |
+
from App_Function_Libraries.Article_Extractor_Lib import scrape_from_sitemap, scrape_by_url_level, scrape_article
|
23 |
from App_Function_Libraries.Article_Summarization_Lib import scrape_and_summarize_multiple
|
24 |
from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
|
25 |
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
26 |
+
from App_Function_Libraries.Summarization_General_Lib import summarize
|
27 |
|
28 |
|
|
|
|
|
|
|
29 |
#
|
30 |
########################################################################################################################
|
31 |
#
|
32 |
# Functions:
|
33 |
|
34 |
+
def get_url_depth(url: str) -> int:
|
35 |
+
return len(urlparse(url).path.strip('/').split('/'))
|
36 |
+
|
37 |
+
|
38 |
+
def sync_recursive_scrape(url_input, max_pages, max_depth, progress_callback, delay=1.0):
|
39 |
+
def run_async_scrape():
|
40 |
+
loop = asyncio.new_event_loop()
|
41 |
+
asyncio.set_event_loop(loop)
|
42 |
+
return loop.run_until_complete(
|
43 |
+
recursive_scrape(url_input, max_pages, max_depth, progress_callback, delay)
|
44 |
+
)
|
45 |
+
|
46 |
+
with ThreadPoolExecutor() as executor:
|
47 |
+
future = executor.submit(run_async_scrape)
|
48 |
+
return future.result()
|
49 |
+
|
50 |
+
|
51 |
+
async def recursive_scrape(
|
52 |
+
base_url: str,
|
53 |
+
max_pages: int,
|
54 |
+
max_depth: int,
|
55 |
+
progress_callback: callable,
|
56 |
+
delay: float = 1.0,
|
57 |
+
resume_file: str = 'scrape_progress.json',
|
58 |
+
user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
|
59 |
+
) -> List[Dict]:
|
60 |
+
async def save_progress():
|
61 |
+
temp_file = resume_file + ".tmp"
|
62 |
+
with open(temp_file, 'w') as f:
|
63 |
+
json.dump({
|
64 |
+
'visited': list(visited),
|
65 |
+
'to_visit': to_visit,
|
66 |
+
'scraped_articles': scraped_articles,
|
67 |
+
'pages_scraped': pages_scraped
|
68 |
+
}, f)
|
69 |
+
os.replace(temp_file, resume_file) # Atomic replace
|
70 |
+
|
71 |
+
def is_valid_url(url: str) -> bool:
|
72 |
+
return url.startswith("http") and len(url) > 0
|
73 |
+
|
74 |
+
# Load progress if resume file exists
|
75 |
+
if os.path.exists(resume_file):
|
76 |
+
with open(resume_file, 'r') as f:
|
77 |
+
progress_data = json.load(f)
|
78 |
+
visited = set(progress_data['visited'])
|
79 |
+
to_visit = progress_data['to_visit']
|
80 |
+
scraped_articles = progress_data['scraped_articles']
|
81 |
+
pages_scraped = progress_data['pages_scraped']
|
82 |
+
else:
|
83 |
+
visited = set()
|
84 |
+
to_visit = [(base_url, 0)] # (url, depth)
|
85 |
+
scraped_articles = []
|
86 |
+
pages_scraped = 0
|
87 |
+
|
88 |
+
try:
|
89 |
+
async with async_playwright() as p:
|
90 |
+
browser = await p.chromium.launch(headless=True)
|
91 |
+
context = await browser.new_context(user_agent=user_agent)
|
92 |
+
|
93 |
+
try:
|
94 |
+
while to_visit and pages_scraped < max_pages:
|
95 |
+
current_url, current_depth = to_visit.pop(0)
|
96 |
+
|
97 |
+
if current_url in visited or current_depth > max_depth:
|
98 |
+
continue
|
99 |
+
|
100 |
+
visited.add(current_url)
|
101 |
+
|
102 |
+
# Update progress
|
103 |
+
progress_callback(f"Scraping page {pages_scraped + 1}/{max_pages}: {current_url}")
|
104 |
+
|
105 |
+
try:
|
106 |
+
await asyncio.sleep(random.uniform(delay * 0.8, delay * 1.2))
|
107 |
+
|
108 |
+
# This function should be implemented to handle asynchronous scraping
|
109 |
+
article_data = await scrape_article_async(context, current_url)
|
110 |
+
|
111 |
+
if article_data and article_data['extraction_successful']:
|
112 |
+
scraped_articles.append(article_data)
|
113 |
+
pages_scraped += 1
|
114 |
+
|
115 |
+
# If we haven't reached max depth, add child links to to_visit
|
116 |
+
if current_depth < max_depth:
|
117 |
+
page = await context.new_page()
|
118 |
+
await page.goto(current_url)
|
119 |
+
await page.wait_for_load_state("networkidle")
|
120 |
+
|
121 |
+
links = await page.eval_on_selector_all('a[href]',
|
122 |
+
"(elements) => elements.map(el => el.href)")
|
123 |
+
for link in links:
|
124 |
+
child_url = urljoin(base_url, link)
|
125 |
+
if is_valid_url(child_url) and child_url.startswith(
|
126 |
+
base_url) and child_url not in visited and should_scrape_url(child_url):
|
127 |
+
to_visit.append((child_url, current_depth + 1))
|
128 |
+
|
129 |
+
await page.close()
|
130 |
+
|
131 |
+
except Exception as e:
|
132 |
+
logging.error(f"Error scraping {current_url}: {str(e)}")
|
133 |
+
|
134 |
+
# Save progress periodically (e.g., every 10 pages)
|
135 |
+
if pages_scraped % 10 == 0:
|
136 |
+
await save_progress()
|
137 |
+
|
138 |
+
finally:
|
139 |
+
await browser.close()
|
140 |
+
|
141 |
+
finally:
|
142 |
+
# These statements are now guaranteed to be reached after the scraping is done
|
143 |
+
await save_progress()
|
144 |
+
|
145 |
+
# Remove the progress file when scraping is completed successfully
|
146 |
+
if os.path.exists(resume_file):
|
147 |
+
os.remove(resume_file)
|
148 |
+
|
149 |
+
# Final progress update
|
150 |
+
progress_callback(f"Scraping completed. Total pages scraped: {pages_scraped}")
|
151 |
+
|
152 |
+
return scraped_articles
|
153 |
+
|
154 |
+
|
155 |
+
async def scrape_article_async(context, url: str) -> Dict[str, Any]:
|
156 |
+
page = await context.new_page()
|
157 |
+
try:
|
158 |
+
await page.goto(url)
|
159 |
+
await page.wait_for_load_state("networkidle")
|
160 |
+
|
161 |
+
title = await page.title()
|
162 |
+
content = await page.content()
|
163 |
+
|
164 |
+
return {
|
165 |
+
'url': url,
|
166 |
+
'title': title,
|
167 |
+
'content': content,
|
168 |
+
'extraction_successful': True
|
169 |
+
}
|
170 |
+
except Exception as e:
|
171 |
+
logging.error(f"Error scraping article {url}: {str(e)}")
|
172 |
+
return {
|
173 |
+
'url': url,
|
174 |
+
'extraction_successful': False,
|
175 |
+
'error': str(e)
|
176 |
+
}
|
177 |
+
finally:
|
178 |
+
await page.close()
|
179 |
+
|
180 |
+
|
181 |
+
def scrape_article_sync(url: str) -> Dict[str, Any]:
|
182 |
+
with sync_playwright() as p:
|
183 |
+
browser = p.chromium.launch(headless=True)
|
184 |
+
page = browser.new_page()
|
185 |
+
try:
|
186 |
+
page.goto(url)
|
187 |
+
page.wait_for_load_state("networkidle")
|
188 |
+
|
189 |
+
title = page.title()
|
190 |
+
content = page.content()
|
191 |
+
|
192 |
+
return {
|
193 |
+
'url': url,
|
194 |
+
'title': title,
|
195 |
+
'content': content,
|
196 |
+
'extraction_successful': True
|
197 |
+
}
|
198 |
+
except Exception as e:
|
199 |
+
logging.error(f"Error scraping article {url}: {str(e)}")
|
200 |
+
return {
|
201 |
+
'url': url,
|
202 |
+
'extraction_successful': False,
|
203 |
+
'error': str(e)
|
204 |
+
}
|
205 |
+
finally:
|
206 |
+
browser.close()
|
207 |
+
|
208 |
+
|
209 |
+
def should_scrape_url(url: str) -> bool:
|
210 |
+
parsed_url = urlparse(url)
|
211 |
+
path = parsed_url.path.lower()
|
212 |
+
|
213 |
+
# List of patterns to exclude
|
214 |
+
exclude_patterns = [
|
215 |
+
'/tag/', '/category/', '/author/', '/search/', '/page/',
|
216 |
+
'wp-content', 'wp-includes', 'wp-json', 'wp-admin',
|
217 |
+
'login', 'register', 'cart', 'checkout', 'account',
|
218 |
+
'.jpg', '.png', '.gif', '.pdf', '.zip'
|
219 |
+
]
|
220 |
+
|
221 |
+
# Check if the URL contains any exclude patterns
|
222 |
+
if any(pattern in path for pattern in exclude_patterns):
|
223 |
+
return False
|
224 |
+
|
225 |
+
# Add more sophisticated checks here
|
226 |
+
# For example, you might want to only include URLs with certain patterns
|
227 |
+
include_patterns = ['/article/', '/post/', '/blog/']
|
228 |
+
if any(pattern in path for pattern in include_patterns):
|
229 |
+
return True
|
230 |
+
|
231 |
+
# By default, return True if no exclusion or inclusion rules matched
|
232 |
+
return True
|
233 |
+
|
234 |
+
|
235 |
+
async def scrape_with_retry(url: str, max_retries: int = 3, retry_delay: float = 5.0):
|
236 |
+
for attempt in range(max_retries):
|
237 |
+
try:
|
238 |
+
return await scrape_article(url)
|
239 |
+
except TimeoutError:
|
240 |
+
if attempt < max_retries - 1:
|
241 |
+
logging.warning(f"Timeout error scraping {url}. Retrying in {retry_delay} seconds...")
|
242 |
+
await asyncio.sleep(retry_delay)
|
243 |
+
else:
|
244 |
+
logging.error(f"Failed to scrape {url} after {max_retries} attempts.")
|
245 |
+
return None
|
246 |
+
except Exception as e:
|
247 |
+
logging.error(f"Error scraping {url}: {str(e)}")
|
248 |
+
return None
|
249 |
+
|
250 |
|
251 |
def create_website_scraping_tab():
|
252 |
with gr.TabItem("Website Scraping"):
|
253 |
+
gr.Markdown("# Scrape Websites & Summarize Articles")
|
254 |
with gr.Row():
|
255 |
with gr.Column():
|
256 |
+
scrape_method = gr.Radio(
|
257 |
+
["Individual URLs", "Sitemap", "URL Level", "Recursive Scraping"],
|
258 |
+
label="Scraping Method",
|
259 |
+
value="Individual URLs"
|
260 |
+
)
|
261 |
+
url_input = gr.Textbox(
|
262 |
+
label="Article URLs or Base URL",
|
263 |
+
placeholder="Enter article URLs here, one per line, or base URL for sitemap/URL level/recursive scraping",
|
264 |
+
lines=5
|
265 |
+
)
|
266 |
+
url_level = gr.Slider(
|
267 |
+
minimum=1,
|
268 |
+
maximum=10,
|
269 |
+
step=1,
|
270 |
+
label="URL Level (for URL Level scraping)",
|
271 |
+
value=2,
|
272 |
+
visible=False
|
273 |
+
)
|
274 |
+
max_pages = gr.Slider(
|
275 |
+
minimum=1,
|
276 |
+
maximum=100,
|
277 |
+
step=1,
|
278 |
+
label="Maximum Pages to Scrape (for Recursive Scraping)",
|
279 |
+
value=10,
|
280 |
+
visible=False
|
281 |
+
)
|
282 |
+
max_depth = gr.Slider(
|
283 |
+
minimum=1,
|
284 |
+
maximum=10,
|
285 |
+
step=1,
|
286 |
+
label="Maximum Depth (for Recursive Scraping)",
|
287 |
+
value=3,
|
288 |
+
visible=False
|
289 |
+
)
|
290 |
+
custom_article_title_input = gr.Textbox(
|
291 |
+
label="Custom Article Titles (Optional, one per line)",
|
292 |
+
placeholder="Enter custom titles for the articles, one per line",
|
293 |
+
lines=5
|
294 |
+
)
|
295 |
with gr.Row():
|
296 |
+
summarize_checkbox = gr.Checkbox(label="Summarize Articles", value=False)
|
297 |
+
custom_prompt_checkbox = gr.Checkbox(label="Use a Custom Prompt", value=False, visible=True)
|
298 |
+
preset_prompt_checkbox = gr.Checkbox(label="Use a pre-set Prompt", value=False, visible=True)
|
|
|
|
|
|
|
299 |
with gr.Row():
|
300 |
+
temp_slider = gr.Slider(0.1, 2.0, 0.7, label="Temperature")
|
|
|
|
|
301 |
with gr.Row():
|
302 |
+
preset_prompt = gr.Dropdown(
|
303 |
+
label="Select Preset Prompt",
|
304 |
+
choices=load_preset_prompts(),
|
305 |
+
visible=False
|
306 |
+
)
|
307 |
with gr.Row():
|
308 |
+
website_custom_prompt_input = gr.Textbox(
|
309 |
+
label="Custom Prompt",
|
310 |
+
placeholder="Enter custom prompt here",
|
311 |
+
lines=3,
|
312 |
+
visible=False
|
313 |
+
)
|
314 |
+
with gr.Row():
|
315 |
+
system_prompt_input = gr.Textbox(
|
316 |
+
label="System Prompt",
|
317 |
+
value="""<s>You are a bulleted notes specialist. [INST]```When creating comprehensive bulleted notes, you should follow these guidelines: Use multiple headings based on the referenced topics, not categories like quotes or terms. Headings should be surrounded by bold formatting and not be listed as bullet points themselves. Leave no space between headings and their corresponding list items underneath. Important terms within the content should be emphasized by setting them in bold font. Any text that ends with a colon should also be bolded. Before submitting your response, review the instructions, and make any corrections necessary to adhered to the specified format. Do not reference these instructions within the notes.``` \nBased on the content between backticks create comprehensive bulleted notes.[/INST]
|
318 |
+
**Bulleted Note Creation Guidelines**
|
319 |
+
|
320 |
+
**Headings**:
|
321 |
+
- Based on referenced topics, not categories like quotes or terms
|
322 |
+
- Surrounded by **bold** formatting
|
323 |
+
- Not listed as bullet points
|
324 |
+
- No space between headings and list items underneath
|
325 |
+
|
326 |
+
**Emphasis**:
|
327 |
+
- **Important terms** set in bold font
|
328 |
+
- **Text ending in a colon**: also bolded
|
329 |
+
|
330 |
+
**Review**:
|
331 |
+
- Ensure adherence to specified format
|
332 |
+
- Do not reference these instructions in your response.</s>[INST] {{ .Prompt }} [/INST]
|
333 |
+
""",
|
334 |
+
lines=3,
|
335 |
+
visible=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
|
338 |
api_name_input = gr.Dropdown(
|
339 |
+
choices=[None, "Local-LLM", "OpenAI", "Anthropic", "Cohere", "Groq", "DeepSeek", "Mistral",
|
340 |
+
"OpenRouter",
|
341 |
+
"Llama.cpp", "Kobold", "Ooba", "Tabbyapi", "VLLM", "ollama", "HuggingFace",
|
342 |
+
"Custom-OpenAI-API"],
|
343 |
+
value=None,
|
344 |
+
label="API Name (Mandatory for Summarization)"
|
345 |
+
)
|
346 |
+
api_key_input = gr.Textbox(
|
347 |
+
label="API Key (Mandatory if API Name is specified)",
|
348 |
+
placeholder="Enter your API key here; Ignore if using Local API or Built-in API",
|
349 |
+
type="password"
|
350 |
+
)
|
351 |
+
keywords_input = gr.Textbox(
|
352 |
+
label="Keywords",
|
353 |
+
placeholder="Enter keywords here (comma-separated)",
|
354 |
+
value="default,no_keyword_set",
|
355 |
+
visible=True
|
356 |
+
)
|
357 |
|
358 |
scrape_button = gr.Button("Scrape and Summarize")
|
359 |
with gr.Column():
|
360 |
+
progress_output = gr.Textbox(label="Progress", lines=3)
|
361 |
result_output = gr.Textbox(label="Result", lines=20)
|
362 |
|
363 |
+
def update_ui_for_scrape_method(method):
|
364 |
+
url_level_update = gr.update(visible=(method == "URL Level"))
|
365 |
+
max_pages_update = gr.update(visible=(method == "Recursive Scraping"))
|
366 |
+
max_depth_update = gr.update(visible=(method == "Recursive Scraping"))
|
367 |
+
url_input_update = gr.update(
|
368 |
+
label="Article URLs" if method == "Individual URLs" else "Base URL",
|
369 |
+
placeholder="Enter article URLs here, one per line" if method == "Individual URLs" else "Enter the base URL for scraping"
|
370 |
+
)
|
371 |
+
return url_level_update, max_pages_update, max_depth_update, url_input_update
|
372 |
+
|
373 |
+
scrape_method.change(
|
374 |
+
fn=update_ui_for_scrape_method,
|
375 |
+
inputs=[scrape_method],
|
376 |
+
outputs=[url_level, max_pages, max_depth, url_input]
|
377 |
+
)
|
378 |
|
379 |
+
custom_prompt_checkbox.change(
|
380 |
+
fn=lambda x: (gr.update(visible=x), gr.update(visible=x)),
|
381 |
+
inputs=[custom_prompt_checkbox],
|
382 |
+
outputs=[website_custom_prompt_input, system_prompt_input]
|
383 |
+
)
|
384 |
+
preset_prompt_checkbox.change(
|
385 |
+
fn=lambda x: gr.update(visible=x),
|
386 |
+
inputs=[preset_prompt_checkbox],
|
387 |
+
outputs=[preset_prompt]
|
388 |
+
)
|
389 |
|
390 |
+
def update_prompts(preset_name):
|
391 |
+
prompts = update_user_prompt(preset_name)
|
392 |
+
return (
|
393 |
+
gr.update(value=prompts["user_prompt"], visible=True),
|
394 |
+
gr.update(value=prompts["system_prompt"], visible=True)
|
395 |
+
)
|
396 |
+
|
397 |
+
preset_prompt.change(
|
398 |
+
update_prompts,
|
399 |
+
inputs=preset_prompt,
|
400 |
+
outputs=[website_custom_prompt_input, system_prompt_input]
|
401 |
+
)
|
402 |
+
|
403 |
+
async def scrape_and_summarize_wrapper(
|
404 |
+
scrape_method: str,
|
405 |
+
url_input: str,
|
406 |
+
url_level: Optional[int],
|
407 |
+
max_pages: int,
|
408 |
+
max_depth: int,
|
409 |
+
summarize_checkbox: bool,
|
410 |
+
custom_prompt: Optional[str],
|
411 |
+
api_name: Optional[str],
|
412 |
+
api_key: Optional[str],
|
413 |
+
keywords: str,
|
414 |
+
custom_titles: Optional[str],
|
415 |
+
system_prompt: Optional[str],
|
416 |
+
temperature: float = 0.7,
|
417 |
+
progress: gr.Progress = gr.Progress()
|
418 |
+
) -> str:
|
419 |
+
try:
|
420 |
+
result: List[Dict[str, Any]] = []
|
421 |
+
|
422 |
+
if scrape_method == "Individual URLs":
|
423 |
+
result = await scrape_and_summarize_multiple(url_input, custom_prompt, api_name, api_key, keywords,
|
424 |
+
custom_titles, system_prompt)
|
425 |
+
elif scrape_method == "Sitemap":
|
426 |
+
result = scrape_from_sitemap(url_input)
|
427 |
+
elif scrape_method == "URL Level":
|
428 |
+
if url_level is None:
|
429 |
+
return convert_json_to_markdown(
|
430 |
+
json.dumps({"error": "URL level is required for URL Level scraping."}))
|
431 |
+
result = scrape_by_url_level(url_input, url_level)
|
432 |
+
elif scrape_method == "Recursive Scraping":
|
433 |
+
result = await recursive_scrape(url_input, max_pages, max_depth, progress.update, delay=1.0)
|
434 |
+
else:
|
435 |
+
return convert_json_to_markdown(json.dumps({"error": f"Unknown scraping method: {scrape_method}"}))
|
436 |
+
|
437 |
+
# Ensure result is always a list of dictionaries
|
438 |
+
if isinstance(result, dict):
|
439 |
+
result = [result]
|
440 |
+
elif not isinstance(result, list):
|
441 |
+
raise TypeError(f"Unexpected result type: {type(result)}")
|
442 |
+
|
443 |
+
if summarize_checkbox:
|
444 |
+
total_articles = len(result)
|
445 |
+
for i, article in enumerate(result):
|
446 |
+
progress.update(f"Summarizing article {i + 1}/{total_articles}")
|
447 |
+
summary = summarize(article['content'], custom_prompt, api_name, api_key, temperature,
|
448 |
+
system_prompt)
|
449 |
+
article['summary'] = summary
|
450 |
+
|
451 |
+
# Concatenate all content
|
452 |
+
all_content = "\n\n".join(
|
453 |
+
[f"# {article.get('title', 'Untitled')}\n\n{article.get('content', '')}\n\n" +
|
454 |
+
(f"Summary: {article.get('summary', '')}" if summarize_checkbox else "")
|
455 |
+
for article in result])
|
456 |
+
|
457 |
+
# Collect all unique URLs
|
458 |
+
all_urls = list(set(article.get('url', '') for article in result if article.get('url')))
|
459 |
+
|
460 |
+
# Structure the output for the entire website collection
|
461 |
+
website_collection = {
|
462 |
+
"base_url": url_input,
|
463 |
+
"scrape_method": scrape_method,
|
464 |
+
"summarization_performed": summarize_checkbox,
|
465 |
+
"api_used": api_name if summarize_checkbox else None,
|
466 |
+
"keywords": keywords if summarize_checkbox else None,
|
467 |
+
"url_level": url_level if scrape_method == "URL Level" else None,
|
468 |
+
"max_pages": max_pages if scrape_method == "Recursive Scraping" else None,
|
469 |
+
"max_depth": max_depth if scrape_method == "Recursive Scraping" else None,
|
470 |
+
"total_articles_scraped": len(result),
|
471 |
+
"urls_scraped": all_urls,
|
472 |
+
"content": all_content
|
473 |
+
}
|
474 |
+
|
475 |
+
# Convert the JSON to markdown and return
|
476 |
+
return convert_json_to_markdown(json.dumps(website_collection, indent=2))
|
477 |
+
except Exception as e:
|
478 |
+
return convert_json_to_markdown(json.dumps({"error": f"An error occurred: {str(e)}"}))
|
479 |
+
|
480 |
+
# Update the scrape_button.click to include the temperature parameter
|
481 |
+
scrape_button.click(
|
482 |
+
fn=lambda *args: asyncio.run(scrape_and_summarize_wrapper(*args)),
|
483 |
+
inputs=[scrape_method, url_input, url_level, max_pages, max_depth, summarize_checkbox,
|
484 |
+
website_custom_prompt_input, api_name_input, api_key_input, keywords_input,
|
485 |
+
custom_article_title_input, system_prompt_input, temp_slider],
|
486 |
+
outputs=[result_output]
|
487 |
+
)
|
488 |
+
|
489 |
+
|
490 |
+
def convert_json_to_markdown(json_str: str) -> str:
|
491 |
+
"""
|
492 |
+
Converts the JSON output from the scraping process into a markdown format.
|
493 |
+
|
494 |
+
Args:
|
495 |
+
json_str (str): JSON-formatted string containing the website collection data
|
496 |
+
|
497 |
+
Returns:
|
498 |
+
str: Markdown-formatted string of the website collection data
|
499 |
+
"""
|
500 |
+
try:
|
501 |
+
# Parse the JSON string
|
502 |
+
data = json.loads(json_str)
|
503 |
+
|
504 |
+
# Check if there's an error in the JSON
|
505 |
+
if "error" in data:
|
506 |
+
return f"# Error\n\n{data['error']}"
|
507 |
+
|
508 |
+
# Start building the markdown string
|
509 |
+
markdown = f"# Website Collection: {data['base_url']}\n\n"
|
510 |
+
|
511 |
+
# Add metadata
|
512 |
+
markdown += "## Metadata\n\n"
|
513 |
+
markdown += f"- **Scrape Method:** {data['scrape_method']}\n"
|
514 |
+
markdown += f"- **API Used:** {data['api_used']}\n"
|
515 |
+
markdown += f"- **Keywords:** {data['keywords']}\n"
|
516 |
+
if data['url_level'] is not None:
|
517 |
+
markdown += f"- **URL Level:** {data['url_level']}\n"
|
518 |
+
markdown += f"- **Total Articles Scraped:** {data['total_articles_scraped']}\n\n"
|
519 |
+
|
520 |
+
# Add URLs scraped
|
521 |
+
markdown += "## URLs Scraped\n\n"
|
522 |
+
for url in data['urls_scraped']:
|
523 |
+
markdown += f"- {url}\n"
|
524 |
+
markdown += "\n"
|
525 |
+
|
526 |
+
# Add the content
|
527 |
+
markdown += "## Content\n\n"
|
528 |
+
markdown += data['content']
|
529 |
+
|
530 |
+
return markdown
|
531 |
+
|
532 |
+
except json.JSONDecodeError:
|
533 |
+
return "# Error\n\nInvalid JSON string provided."
|
534 |
+
except KeyError as e:
|
535 |
+
return f"# Error\n\nMissing key in JSON data: {str(e)}"
|
536 |
+
except Exception as e:
|
537 |
+
return f"# Error\n\nAn unexpected error occurred: {str(e)}"
|
538 |
+
#
|
539 |
+
# End of File
|
540 |
+
########################################################################################################################
|