oceansweep commited on
Commit
c8ebc55
·
verified ·
1 Parent(s): f71d2e6

Upload 11 files

Browse files
App_Function_Libraries/Books/Book_Ingestion_Lib.py CHANGED
@@ -14,35 +14,290 @@
14
  # Import necessary libraries
15
  import os
16
  import re
 
 
17
  from datetime import datetime
18
  import logging
19
 
20
  import ebooklib
21
  from bs4 import BeautifulSoup
22
  from ebooklib import epub
 
 
23
  #
24
  # Import Local
25
- from App_Function_Libraries.DB.DB_Manager import add_media_with_keywords
 
 
 
26
  #
27
  #######################################################################################################################
28
  # Function Definitions
29
  #
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
 
 
 
 
 
 
32
 
33
  def read_epub(file_path):
34
- """Read and extract text from an EPUB file."""
35
- book = epub.read_epub(file_path)
36
- chapters = []
37
- for item in book.get_items():
38
- if item.get_type() == ebooklib.ITEM_DOCUMENT:
39
- chapters.append(item.get_content())
40
 
41
- text = ""
42
- for html_content in chapters:
43
- soup = BeautifulSoup(html_content, 'html.parser')
44
- text += soup.get_text() + "\n\n"
45
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  # Ingest a text file into the database with Title/Author/Keywords
@@ -57,15 +312,28 @@ def extract_epub_metadata(content):
57
 
58
 
59
  def ingest_text_file(file_path, title=None, author=None, keywords=None):
 
 
 
 
 
 
 
 
 
 
 
 
60
  try:
61
  with open(file_path, 'r', encoding='utf-8') as file:
62
  content = file.read()
63
 
64
  # Check if it's a converted epub and extract metadata if so
65
- if 'epub_converted' in (keywords or ''):
66
  extracted_title, extracted_author = extract_epub_metadata(content)
67
  title = title or extracted_title
68
  author = author or extracted_author
 
69
 
70
  # If title is still not provided, use the filename without extension
71
  if not title:
@@ -95,6 +363,7 @@ def ingest_text_file(file_path, title=None, author=None, keywords=None):
95
  ingestion_date=datetime.now().strftime('%Y-%m-%d')
96
  )
97
 
 
98
  return f"Text file '{title}' by {author} ingested successfully."
99
  except Exception as e:
100
  logging.error(f"Error ingesting text file: {str(e)}")
@@ -102,68 +371,133 @@ def ingest_text_file(file_path, title=None, author=None, keywords=None):
102
 
103
 
104
  def ingest_folder(folder_path, keywords=None):
 
 
 
 
 
 
 
 
 
 
105
  results = []
106
- for filename in os.listdir(folder_path):
107
- if filename.lower().endswith('.txt'):
108
- file_path = os.path.join(folder_path, filename)
109
- result = ingest_text_file(file_path, keywords=keywords)
110
- results.append(result)
 
 
 
 
 
 
 
 
111
 
112
 
113
  def epub_to_markdown(epub_path):
114
- book = epub.read_epub(epub_path)
115
- markdown_content = "# Table of Contents\n\n"
116
- chapters = []
117
-
118
- # Extract and format the table of contents
119
- toc = book.toc
120
- for item in toc:
121
- if isinstance(item, tuple):
122
- section, children = item
123
- level = 1
124
- markdown_content += format_toc_item(section, level)
125
- for child in children:
126
- markdown_content += format_toc_item(child, level + 1)
127
- else:
128
- markdown_content += format_toc_item(item, 1)
129
 
130
- markdown_content += "\n---\n\n"
 
131
 
132
- # Process each chapter
133
- for item in book.get_items():
134
- if item.get_type() == ebooklib.ITEM_DOCUMENT:
135
- chapter_content = item.get_content().decode('utf-8')
136
- soup = BeautifulSoup(chapter_content, 'html.parser')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- # Extract chapter title
139
- title = soup.find(['h1', 'h2', 'h3'])
140
- if title:
141
- chapter_title = title.get_text()
142
- markdown_content += f"# {chapter_title}\n\n"
143
 
144
- # Process chapter content
145
- for elem in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol']):
146
- if elem.name.startswith('h'):
147
- level = int(elem.name[1])
148
- markdown_content += f"{'#' * level} {elem.get_text()}\n\n"
149
- elif elem.name == 'p':
150
- markdown_content += f"{elem.get_text()}\n\n"
151
- elif elem.name in ['ul', 'ol']:
152
- for li in elem.find_all('li'):
153
- markdown_content += f"- {li.get_text()}\n"
154
- markdown_content += "\n"
155
 
156
- markdown_content += "---\n\n"
 
 
157
 
158
- return markdown_content
 
 
159
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- def format_toc_item(item, level):
162
- return f"{' ' * (level - 1)}- [{item.title}](#{slugify(item.title)})\n"
 
 
163
 
164
 
165
  def slugify(text):
166
- return re.sub(r'[\W_]+', '-', text.lower())
 
 
 
 
 
 
 
 
 
167
 
168
  #
169
  # End of Function Definitions
 
14
  # Import necessary libraries
15
  import os
16
  import re
17
+ import tempfile
18
+ import zipfile
19
  from datetime import datetime
20
  import logging
21
 
22
  import ebooklib
23
  from bs4 import BeautifulSoup
24
  from ebooklib import epub
25
+
26
+ from App_Function_Libraries.Chunk_Lib import chunk_ebook_by_chapters
27
  #
28
  # Import Local
29
+ from App_Function_Libraries.DB.DB_Manager import add_media_with_keywords, add_media_to_database
30
+ from App_Function_Libraries.Summarization.Summarization_General_Lib import perform_summarization
31
+
32
+
33
  #
34
  #######################################################################################################################
35
  # Function Definitions
36
  #
37
 
38
+ def import_epub(file_path, title=None, author=None, keywords=None, custom_prompt=None, system_prompt=None, summary=None,
39
+ auto_summarize=False, api_name=None, api_key=None, chunk_options=None, custom_chapter_pattern=None):
40
+ """
41
+ Imports an EPUB file, extracts its content, chunks it, optionally summarizes it, and adds it to the database.
42
+
43
+ Parameters:
44
+ - file_path (str): Path to the EPUB file.
45
+ - title (str, optional): Title of the book.
46
+ - author (str, optional): Author of the book.
47
+ - keywords (str, optional): Comma-separated keywords for the book.
48
+ - custom_prompt (str, optional): Custom user prompt for summarization.
49
+ - summary (str, optional): Predefined summary of the book.
50
+ - auto_summarize (bool, optional): Whether to auto-summarize the chunks.
51
+ - api_name (str, optional): API name for summarization.
52
+ - api_key (str, optional): API key for summarization.
53
+ - chunk_options (dict, optional): Options for chunking.
54
+ - custom_chapter_pattern (str, optional): Custom regex pattern for chapter detection.
55
+
56
+ Returns:
57
+ - str: Status message indicating success or failure.
58
+ """
59
+ try:
60
+ logging.info(f"Importing EPUB file from {file_path}")
61
+
62
+ # Convert EPUB to Markdown
63
+ markdown_content = epub_to_markdown(file_path)
64
+ logging.debug("Converted EPUB to Markdown.")
65
+
66
+ # Extract metadata if not provided
67
+ if not title or not author:
68
+ extracted_title, extracted_author = extract_epub_metadata(markdown_content)
69
+ title = title or extracted_title or os.path.splitext(os.path.basename(file_path))[0]
70
+ author = author or extracted_author or "Unknown"
71
+ logging.debug(f"Extracted metadata - Title: {title}, Author: {author}")
72
+
73
+ # Process keywords
74
+ keyword_list = [kw.strip() for kw in keywords.split(',')] if keywords else []
75
+ logging.debug(f"Keywords: {keyword_list}")
76
+
77
+ # Set default chunk options if not provided
78
+ if chunk_options is None:
79
+ chunk_options = {
80
+ 'method': 'chapter',
81
+ 'max_size': 500,
82
+ 'overlap': 200,
83
+ 'custom_chapter_pattern': custom_chapter_pattern
84
+ }
85
+ else:
86
+ # Ensure 'method' is set to 'chapter' when using chapter chunking
87
+ chunk_options.setdefault('method', 'chapter')
88
+ chunk_options.setdefault('custom_chapter_pattern', custom_chapter_pattern)
89
+
90
+ # Chunk the content by chapters
91
+ chunks = chunk_ebook_by_chapters(markdown_content, chunk_options)
92
+ logging.info(f"Total chunks created: {len(chunks)}")
93
+ if chunks:
94
+ logging.debug(f"Structure of first chunk: {chunks[0].keys()}")
95
+
96
+
97
+ # Handle summarization if enabled
98
+ if auto_summarize and api_name and api_key:
99
+ logging.info("Auto-summarization is enabled.")
100
+ summarized_chunks = []
101
+ for chunk in chunks:
102
+ chunk_text = chunk.get('text', '')
103
+ if chunk_text:
104
+ summary_text = perform_summarization(api_name, chunk_text, custom_prompt, api_key, recursive_summarization=False, temp=None, system_message=system_prompt)
105
+ chunk['metadata']['summary'] = summary_text
106
+ summarized_chunks.append(chunk)
107
+ chunks = summarized_chunks
108
+ logging.info("Summarization of chunks completed.")
109
+ else:
110
+ # If not summarizing, set a default summary or use provided summary
111
+ if summary:
112
+ logging.debug("Using provided summary.")
113
+ else:
114
+ summary = "No summary provided."
115
+
116
+ # Create info_dict
117
+ info_dict = {
118
+ 'title': title,
119
+ 'uploader': author,
120
+ 'ingestion_date': datetime.now().strftime('%Y-%m-%d')
121
+ }
122
+
123
+ # Prepare segments for database
124
+ segments = [{'Text': chunk.get('text', chunk.get('content', ''))} for chunk in chunks]
125
+ logging.debug(f"Prepared segments for database. Number of segments: {len(segments)}")
126
+
127
+ # Add to database
128
+ result = add_media_to_database(
129
+ url=file_path,
130
+ info_dict=info_dict,
131
+ segments=segments,
132
+ summary=summary,
133
+ keywords=keyword_list,
134
+ custom_prompt_input=custom_prompt,
135
+ whisper_model="Imported",
136
+ media_type="ebook",
137
+ overwrite=False
138
+ )
139
+
140
+ logging.info(f"Ebook '{title}' by {author} imported successfully. Database result: {result}")
141
+ return f"Ebook '{title}' by {author} imported successfully. Database result: {result}"
142
+
143
+ except Exception as e:
144
+ logging.exception(f"Error importing ebook: {str(e)}")
145
+ return f"Error importing ebook: {str(e)}"
146
+
147
+ # FIXME
148
+ def process_zip_file(zip_file, title, author, keywords, custom_prompt, system_prompt, summary, auto_summarize, api_name, api_key, chunk_options):
149
+ """
150
+ Processes a ZIP file containing multiple EPUB files and imports each one.
151
+
152
+ Parameters:
153
+ - zip_file (file-like object): The ZIP file to process.
154
+ - title (str): Title prefix for the books.
155
+ - author (str): Author name for the books.
156
+ - keywords (str): Comma-separated keywords.
157
+ - custom_prompt (str): Custom user prompt for summarization.
158
+ - summary (str): Predefined summary (not used in this context).
159
+ - auto_summarize (bool): Whether to auto-summarize the chunks.
160
+ - api_name (str): API name for summarization.
161
+ - api_key (str): API key for summarization.
162
+ - chunk_options (dict): Options for chunking.
163
+
164
+ Returns:
165
+ - str: Combined status messages for all EPUB files in the ZIP.
166
+ """
167
+ results = []
168
+ try:
169
+ with tempfile.TemporaryDirectory() as temp_dir:
170
+ zip_path = zip_file.name if hasattr(zip_file, 'name') else zip_file.path
171
+ logging.info(f"Extracting ZIP file {zip_path} to temporary directory {temp_dir}")
172
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
173
+ zip_ref.extractall(temp_dir)
174
+
175
+ for filename in os.listdir(temp_dir):
176
+ if filename.lower().endswith('.epub'):
177
+ file_path = os.path.join(temp_dir, filename)
178
+ logging.info(f"Processing EPUB file {filename} from ZIP.")
179
+ result = import_epub(
180
+ file_path=file_path,
181
+ title=title,
182
+ author=author,
183
+ keywords=keywords,
184
+ custom_prompt=custom_prompt,
185
+ summary=summary,
186
+ auto_summarize=auto_summarize,
187
+ api_name=api_name,
188
+ api_key=api_key,
189
+ chunk_options=chunk_options,
190
+ custom_chapter_pattern=chunk_options.get('custom_chapter_pattern') if chunk_options else None
191
+ )
192
+ results.append(f"File: {filename} - {result}")
193
+ logging.info("Completed processing all EPUB files in the ZIP.")
194
+ except Exception as e:
195
+ logging.exception(f"Error processing ZIP file: {str(e)}")
196
+ return f"Error processing ZIP file: {str(e)}"
197
+
198
+ return "\n".join(results)
199
+
200
+
201
+ def import_file_handler(file, title, author, keywords, system_prompt, custom_prompt, auto_summarize, api_name,
202
+ api_key, max_chunk_size, chunk_overlap, custom_chapter_pattern):
203
+ try:
204
+ # Handle max_chunk_size
205
+ if isinstance(max_chunk_size, str):
206
+ max_chunk_size = int(max_chunk_size) if max_chunk_size.strip() else 4000
207
+ elif not isinstance(max_chunk_size, int):
208
+ max_chunk_size = 4000 # Default value if not a string or int
209
+
210
+ # Handle chunk_overlap
211
+ if isinstance(chunk_overlap, str):
212
+ chunk_overlap = int(chunk_overlap) if chunk_overlap.strip() else 0
213
+ elif not isinstance(chunk_overlap, int):
214
+ chunk_overlap = 0 # Default value if not a string or int
215
+
216
+ chunk_options = {
217
+ 'method': 'chapter',
218
+ 'max_size': max_chunk_size,
219
+ 'overlap': chunk_overlap,
220
+ 'custom_chapter_pattern': custom_chapter_pattern if custom_chapter_pattern else None
221
+ }
222
+
223
+ if file is None:
224
+ return "No file uploaded."
225
+
226
+ file_path = file.name
227
+ if not os.path.exists(file_path):
228
+ return "Uploaded file not found."
229
+
230
+ if file_path.lower().endswith('.epub'):
231
+ status = import_epub(
232
+ file_path,
233
+ title,
234
+ author,
235
+ keywords,
236
+ custom_prompt=custom_prompt,
237
+ system_prompt=system_prompt,
238
+ summary=None,
239
+ auto_summarize=auto_summarize,
240
+ api_name=api_name,
241
+ api_key=api_key,
242
+ chunk_options=chunk_options,
243
+ custom_chapter_pattern=custom_chapter_pattern
244
+ )
245
+ return f"📚 EPUB Imported Successfully:\n{status}"
246
+ elif file.name.lower().endswith('.zip'):
247
+ status = process_zip_file(
248
+ zip_file=file,
249
+ title=title,
250
+ author=author,
251
+ keywords=keywords,
252
+ custom_prompt=custom_prompt,
253
+ system_prompt=system_prompt,
254
+ summary=None, # Let the library handle summarization
255
+ auto_summarize=auto_summarize,
256
+ api_name=api_name,
257
+ api_key=api_key,
258
+ chunk_options=chunk_options
259
+ )
260
+ return f"📦 ZIP Processed Successfully:\n{status}"
261
+ elif file.name.lower().endswith(('.chm', '.html', '.pdf', '.xml', '.opml')):
262
+ file_type = file.name.split('.')[-1].upper()
263
+ return f"{file_type} file import is not yet supported."
264
+ else:
265
+ return "❌ Unsupported file type. Please upload an `.epub` file or a `.zip` file containing `.epub` files."
266
 
267
+ except ValueError as ve:
268
+ logging.exception(f"Error parsing input values: {str(ve)}")
269
+ return f"❌ Error: Invalid input for chunk size or overlap. Please enter valid numbers."
270
+ except Exception as e:
271
+ logging.exception(f"Error during file import: {str(e)}")
272
+ return f"❌ Error during import: {str(e)}"
273
 
274
  def read_epub(file_path):
275
+ """
276
+ Reads and extracts text from an EPUB file.
 
 
 
 
277
 
278
+ Parameters:
279
+ - file_path (str): Path to the EPUB file.
280
+
281
+ Returns:
282
+ - str: Extracted text content from the EPUB.
283
+ """
284
+ try:
285
+ logging.info(f"Reading EPUB file from {file_path}")
286
+ book = epub.read_epub(file_path)
287
+ chapters = []
288
+ for item in book.get_items():
289
+ if item.get_type() == ebooklib.ITEM_DOCUMENT:
290
+ chapters.append(item.get_content())
291
+
292
+ text = ""
293
+ for html_content in chapters:
294
+ soup = BeautifulSoup(html_content, 'html.parser')
295
+ text += soup.get_text(separator='\n\n') + "\n\n"
296
+ logging.debug("EPUB content extraction completed.")
297
+ return text
298
+ except Exception as e:
299
+ logging.exception(f"Error reading EPUB file: {str(e)}")
300
+ raise
301
 
302
 
303
  # Ingest a text file into the database with Title/Author/Keywords
 
312
 
313
 
314
  def ingest_text_file(file_path, title=None, author=None, keywords=None):
315
+ """
316
+ Ingests a plain text file into the database with optional metadata.
317
+
318
+ Parameters:
319
+ - file_path (str): Path to the text file.
320
+ - title (str, optional): Title of the document.
321
+ - author (str, optional): Author of the document.
322
+ - keywords (str, optional): Comma-separated keywords.
323
+
324
+ Returns:
325
+ - str: Status message indicating success or failure.
326
+ """
327
  try:
328
  with open(file_path, 'r', encoding='utf-8') as file:
329
  content = file.read()
330
 
331
  # Check if it's a converted epub and extract metadata if so
332
+ if 'epub_converted' in (keywords or '').lower():
333
  extracted_title, extracted_author = extract_epub_metadata(content)
334
  title = title or extracted_title
335
  author = author or extracted_author
336
+ logging.debug(f"Extracted metadata for converted EPUB - Title: {title}, Author: {author}")
337
 
338
  # If title is still not provided, use the filename without extension
339
  if not title:
 
363
  ingestion_date=datetime.now().strftime('%Y-%m-%d')
364
  )
365
 
366
+ logging.info(f"Text file '{title}' by {author} ingested successfully.")
367
  return f"Text file '{title}' by {author} ingested successfully."
368
  except Exception as e:
369
  logging.error(f"Error ingesting text file: {str(e)}")
 
371
 
372
 
373
  def ingest_folder(folder_path, keywords=None):
374
+ """
375
+ Ingests all text files within a specified folder.
376
+
377
+ Parameters:
378
+ - folder_path (str): Path to the folder containing text files.
379
+ - keywords (str, optional): Comma-separated keywords to add to each file.
380
+
381
+ Returns:
382
+ - str: Combined status messages for all ingested text files.
383
+ """
384
  results = []
385
+ try:
386
+ logging.info(f"Ingesting all text files from folder {folder_path}")
387
+ for filename in os.listdir(folder_path):
388
+ if filename.lower().endswith('.txt'):
389
+ file_path = os.path.join(folder_path, filename)
390
+ result = ingest_text_file(file_path, keywords=keywords)
391
+ results.append(result)
392
+ logging.info("Completed ingestion of all text files in the folder.")
393
+ except Exception as e:
394
+ logging.exception(f"Error ingesting folder: {str(e)}")
395
+ return f"Error ingesting folder: {str(e)}"
396
+
397
+ return "\n".join(results)
398
 
399
 
400
  def epub_to_markdown(epub_path):
401
+ """
402
+ Converts an EPUB file to Markdown format, including the table of contents and chapter contents.
 
 
 
 
 
 
 
 
 
 
 
 
 
403
 
404
+ Parameters:
405
+ - epub_path (str): Path to the EPUB file.
406
 
407
+ Returns:
408
+ - str: Markdown-formatted content of the EPUB.
409
+ """
410
+ try:
411
+ logging.info(f"Converting EPUB to Markdown from {epub_path}")
412
+ book = epub.read_epub(epub_path)
413
+ markdown_content = "# Table of Contents\n\n"
414
+ chapters = []
415
+
416
+ # Extract and format the table of contents
417
+ toc = book.toc
418
+ for item in toc:
419
+ if isinstance(item, tuple):
420
+ section, children = item
421
+ level = 1
422
+ markdown_content += format_toc_item(section, level)
423
+ for child in children:
424
+ markdown_content += format_toc_item(child, level + 1)
425
+ else:
426
+ markdown_content += format_toc_item(item, 1)
427
+
428
+ markdown_content += "\n---\n\n"
429
+
430
+ # Process each chapter
431
+ for item in book.get_items():
432
+ if item.get_type() == ebooklib.ITEM_DOCUMENT:
433
+ chapter_content = item.get_content().decode('utf-8')
434
+ soup = BeautifulSoup(chapter_content, 'html.parser')
435
+
436
+ # Extract chapter title
437
+ title = soup.find(['h1', 'h2', 'h3'])
438
+ if title:
439
+ chapter_title = title.get_text()
440
+ markdown_content += f"# {chapter_title}\n\n"
441
+
442
+ # Process chapter content
443
+ for elem in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol']):
444
+ if elem.name.startswith('h'):
445
+ level = int(elem.name[1])
446
+ markdown_content += f"{'#' * level} {elem.get_text()}\n\n"
447
+ elif elem.name == 'p':
448
+ markdown_content += f"{elem.get_text()}\n\n"
449
+ elif elem.name in ['ul', 'ol']:
450
+ for li in elem.find_all('li'):
451
+ prefix = '-' if elem.name == 'ul' else '1.'
452
+ markdown_content += f"{prefix} {li.get_text()}\n"
453
+ markdown_content += "\n"
454
+
455
+ markdown_content += "---\n\n"
456
+
457
+ logging.debug("EPUB to Markdown conversion completed.")
458
+ return markdown_content
459
 
460
+ except Exception as e:
461
+ logging.exception(f"Error converting EPUB to Markdown: {str(e)}")
462
+ raise
 
 
463
 
 
 
 
 
 
 
 
 
 
 
 
464
 
465
+ def format_toc_item(item, level):
466
+ """
467
+ Formats a table of contents item into Markdown list format.
468
 
469
+ Parameters:
470
+ - item (epub.Link or epub.Section): TOC item.
471
+ - level (int): Heading level for indentation.
472
 
473
+ Returns:
474
+ - str: Markdown-formatted TOC item.
475
+ """
476
+ try:
477
+ if isinstance(item, epub.Link):
478
+ title = item.title
479
+ elif isinstance(item, epub.Section):
480
+ title = item.title
481
+ else:
482
+ title = str(item)
483
 
484
+ return f"{' ' * (level - 1)}- [{title}](#{slugify(title)})\n"
485
+ except Exception as e:
486
+ logging.exception(f"Error formatting TOC item: {str(e)}")
487
+ return ""
488
 
489
 
490
  def slugify(text):
491
+ """
492
+ Converts a string into a slug suitable for Markdown links.
493
+
494
+ Parameters:
495
+ - text (str): The text to slugify.
496
+
497
+ Returns:
498
+ - str: Slugified text.
499
+ """
500
+ return re.sub(r'[\W_]+', '-', text.lower()).strip('-')
501
 
502
  #
503
  # End of Function Definitions
App_Function_Libraries/Character_Chat/Character_Chat_Lib.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Character_Chat_Lib.py
2
+ # Description: Functions for character chat cards.
3
+ #
4
+ # Imports
5
+ import json
6
+ import logging
7
+ import io
8
+ import base64
9
+ from typing import Dict, Any, Optional, List, Tuple
10
+ #
11
+ # External Imports
12
+ from PIL import Image
13
+ #
14
+ # Local imports
15
+ from App_Function_Libraries.DB.DB_Manager import get_character_card_by_id, get_character_chat_by_id
16
+ #
17
+ # Constants
18
+ ####################################################################################################
19
+ #
20
+ # Functions
21
+
22
+ # Using https://github.com/malfoyslastname/character-card-spec-v2 as the standard for v2 character cards
23
+
24
+ #################################################################################
25
+ #
26
+ # Placeholder functions:
27
+
28
+ def replace_placeholders(text: str, char_name: str, user_name: str) -> str:
29
+ """
30
+ Replace placeholders in the given text with appropriate values.
31
+
32
+ Args:
33
+ text (str): The text containing placeholders.
34
+ char_name (str): The name of the character.
35
+ user_name (str): The name of the user.
36
+
37
+ Returns:
38
+ str: The text with placeholders replaced.
39
+ """
40
+ replacements = {
41
+ '{{char}}': char_name,
42
+ '{{user}}': user_name,
43
+ '{{random_user}}': user_name # Assuming random_user is the same as user for simplicity
44
+ }
45
+
46
+ for placeholder, value in replacements.items():
47
+ text = text.replace(placeholder, value)
48
+
49
+ return text
50
+
51
+ def replace_user_placeholder(history, user_name):
52
+ """
53
+ Replaces all instances of '{{user}}' in the chat history with the actual user name.
54
+
55
+ Args:
56
+ history (list): The current chat history as a list of tuples (user_message, bot_message).
57
+ user_name (str): The name entered by the user.
58
+
59
+ Returns:
60
+ list: Updated chat history with placeholders replaced.
61
+ """
62
+ if not user_name:
63
+ user_name = "User" # Default name if none provided
64
+
65
+ updated_history = []
66
+ for user_msg, bot_msg in history:
67
+ # Replace in user message
68
+ if user_msg:
69
+ user_msg = user_msg.replace("{{user}}", user_name)
70
+ # Replace in bot message
71
+ if bot_msg:
72
+ bot_msg = bot_msg.replace("{{user}}", user_name)
73
+ updated_history.append((user_msg, bot_msg))
74
+ return updated_history
75
+
76
+ #
77
+ # End of Placeholder functions
78
+ #################################################################################
79
+
80
+ #################################################################################
81
+ #
82
+ # f
83
+
84
+ def extract_character_id(choice: str) -> int:
85
+ """Extract the character ID from the dropdown selection string."""
86
+ return int(choice.split('(ID: ')[1].rstrip(')'))
87
+
88
+ def load_character_wrapper(character_id: int, user_name: str) -> Tuple[Dict[str, Any], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
89
+ """Wrapper function to load character and image using the extracted ID."""
90
+ char_data, chat_history, img = load_character_and_image(character_id, user_name)
91
+ return char_data, chat_history, img
92
+
93
+ def parse_character_book(book_data: Dict[str, Any]) -> Dict[str, Any]:
94
+ """
95
+ Parse the character book data from a V2 character card.
96
+
97
+ Args:
98
+ book_data (Dict[str, Any]): The raw character book data from the character card.
99
+
100
+ Returns:
101
+ Dict[str, Any]: The parsed and structured character book data.
102
+ """
103
+ parsed_book = {
104
+ 'name': book_data.get('name', ''),
105
+ 'description': book_data.get('description', ''),
106
+ 'scan_depth': book_data.get('scan_depth'),
107
+ 'token_budget': book_data.get('token_budget'),
108
+ 'recursive_scanning': book_data.get('recursive_scanning', False),
109
+ 'extensions': book_data.get('extensions', {}),
110
+ 'entries': []
111
+ }
112
+
113
+ for entry in book_data.get('entries', []):
114
+ parsed_entry = {
115
+ 'keys': entry['keys'],
116
+ 'content': entry['content'],
117
+ 'extensions': entry.get('extensions', {}),
118
+ 'enabled': entry['enabled'],
119
+ 'insertion_order': entry['insertion_order'],
120
+ 'case_sensitive': entry.get('case_sensitive', False),
121
+ 'name': entry.get('name', ''),
122
+ 'priority': entry.get('priority'),
123
+ 'id': entry.get('id'),
124
+ 'comment': entry.get('comment', ''),
125
+ 'selective': entry.get('selective', False),
126
+ 'secondary_keys': entry.get('secondary_keys', []),
127
+ 'constant': entry.get('constant', False),
128
+ 'position': entry.get('position')
129
+ }
130
+ parsed_book['entries'].append(parsed_entry)
131
+
132
+ return parsed_book
133
+
134
+ def load_character_and_image(character_id: int, user_name: str) -> Tuple[Optional[Dict[str, Any]], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
135
+ """
136
+ Load a character and its associated image based on the character ID.
137
+
138
+ Args:
139
+ character_id (int): The ID of the character to load.
140
+ user_name (str): The name of the user, used for placeholder replacement.
141
+
142
+ Returns:
143
+ Tuple[Optional[Dict[str, Any]], List[Tuple[Optional[str], str]], Optional[Image.Image]]:
144
+ A tuple containing the character data, chat history, and character image (if available).
145
+ """
146
+ try:
147
+ char_data = get_character_card_by_id(character_id)
148
+ if not char_data:
149
+ logging.warning(f"No character data found for ID: {character_id}")
150
+ return None, [], None
151
+
152
+ # Replace placeholders in character data
153
+ for field in ['first_mes', 'mes_example', 'scenario', 'description', 'personality']:
154
+ if field in char_data:
155
+ char_data[field] = replace_placeholders(char_data[field], char_data['name'], user_name)
156
+
157
+ # Replace placeholders in first_mes
158
+ first_mes = char_data.get('first_mes', "Hello! I'm ready to chat.")
159
+ first_mes = replace_placeholders(first_mes, char_data['name'], user_name)
160
+
161
+ chat_history = [(None, first_mes)] if first_mes else []
162
+
163
+ img = None
164
+ if char_data.get('image'):
165
+ try:
166
+ image_data = base64.b64decode(char_data['image'])
167
+ img = Image.open(io.BytesIO(image_data)).convert("RGBA")
168
+ except Exception as e:
169
+ logging.error(f"Error processing image for character '{char_data['name']}': {e}")
170
+
171
+ return char_data, chat_history, img
172
+
173
+ except Exception as e:
174
+ logging.error(f"Error in load_character_and_image: {e}")
175
+ return None, [], None
176
+
177
+ def load_chat_and_character(chat_id: int, user_name: str) -> Tuple[Optional[Dict[str, Any]], List[Tuple[str, str]], Optional[Image.Image]]:
178
+ """
179
+ Load a chat and its associated character, including the character image and process templates.
180
+
181
+ Args:
182
+ chat_id (int): The ID of the chat to load.
183
+ user_name (str): The name of the user.
184
+
185
+ Returns:
186
+ Tuple[Optional[Dict[str, Any]], List[Tuple[str, str]], Optional[Image.Image]]:
187
+ A tuple containing the character data, processed chat history, and character image (if available).
188
+ """
189
+ try:
190
+ # Load the chat
191
+ chat = get_character_chat_by_id(chat_id)
192
+ if not chat:
193
+ logging.warning(f"No chat found with ID: {chat_id}")
194
+ return None, [], None
195
+
196
+ # Load the associated character
197
+ character_id = chat['character_id']
198
+ char_data = get_character_card_by_id(character_id)
199
+ if not char_data:
200
+ logging.warning(f"No character found for chat ID: {chat_id}")
201
+ return None, chat['chat_history'], None
202
+
203
+ # Process the chat history
204
+ processed_history = process_chat_history(chat['chat_history'], char_data['name'], user_name)
205
+
206
+ # Load the character image
207
+ img = None
208
+ if char_data.get('image'):
209
+ try:
210
+ image_data = base64.b64decode(char_data['image'])
211
+ img = Image.open(io.BytesIO(image_data)).convert("RGBA")
212
+ except Exception as e:
213
+ logging.error(f"Error processing image for character '{char_data['name']}': {e}")
214
+
215
+ # Process character data templates
216
+ for field in ['first_mes', 'mes_example', 'scenario', 'description', 'personality']:
217
+ if field in char_data:
218
+ char_data[field] = replace_placeholders(char_data[field], char_data['name'], user_name)
219
+
220
+ return char_data, processed_history, img
221
+
222
+ except Exception as e:
223
+ logging.error(f"Error in load_chat_and_character: {e}")
224
+ return None, [], None
225
+
226
+ def extract_json_from_image(image_file):
227
+ logging.debug(f"Attempting to extract JSON from image: {image_file.name}")
228
+ try:
229
+ with Image.open(image_file) as img:
230
+ logging.debug("Image opened successfully")
231
+ metadata = img.info
232
+ if 'chara' in metadata:
233
+ logging.debug("Found 'chara' in image metadata")
234
+ chara_content = metadata['chara']
235
+ logging.debug(f"Content of 'chara' metadata (first 100 chars): {chara_content[:100]}...")
236
+ try:
237
+ decoded_content = base64.b64decode(chara_content).decode('utf-8')
238
+ logging.debug(f"Decoded content (first 100 chars): {decoded_content[:100]}...")
239
+ return decoded_content
240
+ except Exception as e:
241
+ logging.error(f"Error decoding base64 content: {e}")
242
+
243
+ logging.warning("'chara' not found in metadata, attempting to find JSON data in image bytes")
244
+ # Alternative method to extract embedded JSON from image bytes if metadata is not available
245
+ img_byte_arr = io.BytesIO()
246
+ img.save(img_byte_arr, format='PNG')
247
+ img_bytes = img_byte_arr.getvalue()
248
+ img_str = img_bytes.decode('latin1') # Use 'latin1' to preserve byte values
249
+
250
+ # Search for JSON-like structures in the image bytes
251
+ json_start = img_str.find('{')
252
+ json_end = img_str.rfind('}')
253
+ if json_start != -1 and json_end != -1 and json_end > json_start:
254
+ possible_json = img_str[json_start:json_end+1]
255
+ try:
256
+ json.loads(possible_json)
257
+ logging.debug("Found JSON data in image bytes")
258
+ return possible_json
259
+ except json.JSONDecodeError:
260
+ logging.debug("No valid JSON found in image bytes")
261
+
262
+ logging.warning("No JSON data found in the image")
263
+ except Exception as e:
264
+ logging.error(f"Error extracting JSON from image: {e}")
265
+ return None
266
+
267
+
268
+
269
+ def load_chat_history(file):
270
+ try:
271
+ content = file.read().decode('utf-8')
272
+ chat_data = json.loads(content)
273
+
274
+ # Extract history and character name from the loaded data
275
+ history = chat_data.get('history') or chat_data.get('messages')
276
+ character_name = chat_data.get('character') or chat_data.get('character_name')
277
+
278
+ if not history or not character_name:
279
+ logging.error("Chat history or character name missing in the imported file.")
280
+ return None, None
281
+
282
+ return history, character_name
283
+ except Exception as e:
284
+ logging.error(f"Error loading chat history: {e}")
285
+ return None, None
286
+
287
+
288
+ def process_chat_history(chat_history: List[Tuple[str, str]], char_name: str, user_name: str) -> List[Tuple[str, str]]:
289
+ """
290
+ Process the chat history to replace placeholders in both user and character messages.
291
+
292
+ Args:
293
+ chat_history (List[Tuple[str, str]]): The chat history.
294
+ char_name (str): The name of the character.
295
+ user_name (str): The name of the user.
296
+
297
+ Returns:
298
+ List[Tuple[str, str]]: The processed chat history.
299
+ """
300
+ processed_history = []
301
+ for user_msg, char_msg in chat_history:
302
+ if user_msg:
303
+ user_msg = replace_placeholders(user_msg, char_name, user_name)
304
+ if char_msg:
305
+ char_msg = replace_placeholders(char_msg, char_name, user_name)
306
+ processed_history.append((user_msg, char_msg))
307
+ return processed_history
308
+
309
+ def validate_character_book(book_data):
310
+ """
311
+ Validate the 'character_book' field in the character card.
312
+
313
+ Args:
314
+ book_data (dict): The character book data.
315
+
316
+ Returns:
317
+ Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
318
+ """
319
+ validation_messages = []
320
+
321
+ # Optional fields with expected types
322
+ optional_fields = {
323
+ 'name': str,
324
+ 'description': str,
325
+ 'scan_depth': (int, float),
326
+ 'token_budget': (int, float),
327
+ 'recursive_scanning': bool,
328
+ 'extensions': dict,
329
+ 'entries': list
330
+ }
331
+
332
+ for field, expected_type in optional_fields.items():
333
+ if field in book_data:
334
+ if not isinstance(book_data[field], expected_type):
335
+ validation_messages.append(f"Field 'character_book.{field}' must be of type '{expected_type}'.")
336
+ # 'entries' is required
337
+ if 'entries' not in book_data or not isinstance(book_data['entries'], list):
338
+ validation_messages.append("Field 'character_book.entries' is required and must be a list.")
339
+ return False, validation_messages
340
+
341
+ # Validate each entry in 'entries'
342
+ entries = book_data.get('entries', [])
343
+ entry_ids = set()
344
+ for idx, entry in enumerate(entries):
345
+ is_valid_entry, entry_messages = validate_character_book_entry(entry, idx, entry_ids)
346
+ if not is_valid_entry:
347
+ validation_messages.extend(entry_messages)
348
+
349
+ is_valid = len(validation_messages) == 0
350
+ return is_valid, validation_messages
351
+
352
+ def validate_character_book_entry(entry, idx, entry_ids):
353
+ """
354
+ Validate an entry in the 'character_book.entries' list.
355
+
356
+ Args:
357
+ entry (dict): The entry data.
358
+ idx (int): The index of the entry in the list.
359
+ entry_ids (set): A set of existing entry IDs for uniqueness checking.
360
+
361
+ Returns:
362
+ Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
363
+ """
364
+ validation_messages = []
365
+ required_fields = {
366
+ 'keys': list,
367
+ 'content': str,
368
+ 'extensions': dict,
369
+ 'enabled': bool,
370
+ 'insertion_order': (int, float)
371
+ }
372
+
373
+ for field, expected_type in required_fields.items():
374
+ if field not in entry:
375
+ validation_messages.append(f"Entry {idx}: Missing required field '{field}'.")
376
+ elif not isinstance(entry[field], expected_type):
377
+ validation_messages.append(f"Entry {idx}: Field '{field}' must be of type '{expected_type}'.")
378
+ elif field == 'content' and not entry[field].strip():
379
+ validation_messages.append(f"Entry {idx}: Field 'content' cannot be empty.")
380
+ elif field == 'keys' and not entry[field]:
381
+ validation_messages.append(f"Entry {idx}: Field 'keys' cannot be empty.")
382
+
383
+ # Optional fields
384
+ optional_fields = {
385
+ 'case_sensitive': bool,
386
+ 'name': str,
387
+ 'priority': (int, float),
388
+ 'id': (int, float),
389
+ 'comment': str,
390
+ 'selective': bool,
391
+ 'secondary_keys': list,
392
+ 'constant': bool,
393
+ 'position': str # Should be 'before_char' or 'after_char'
394
+ }
395
+
396
+ for field, expected_type in optional_fields.items():
397
+ if field in entry and not isinstance(entry[field], expected_type):
398
+ validation_messages.append(f"Entry {idx}: Field '{field}' must be of type '{expected_type}'.")
399
+
400
+ # Validate 'position' value if present
401
+ if 'position' in entry:
402
+ if entry['position'] not in ['before_char', 'after_char']:
403
+ validation_messages.append(f"Entry {idx}: Field 'position' must be 'before_char' or 'after_char'.")
404
+
405
+ # Validate 'secondary_keys' if 'selective' is True
406
+ if entry.get('selective', False):
407
+ if 'secondary_keys' not in entry or not isinstance(entry['secondary_keys'], list):
408
+ validation_messages.append(f"Entry {idx}: 'secondary_keys' must be a list when 'selective' is True.")
409
+ elif not entry['secondary_keys']:
410
+ validation_messages.append(f"Entry {idx}: 'secondary_keys' cannot be empty when 'selective' is True.")
411
+
412
+ # Validate 'keys' list elements
413
+ if 'keys' in entry and isinstance(entry['keys'], list):
414
+ for i, key in enumerate(entry['keys']):
415
+ if not isinstance(key, str) or not key.strip():
416
+ validation_messages.append(f"Entry {idx}: Element {i} in 'keys' must be a non-empty string.")
417
+
418
+ # Validate 'secondary_keys' list elements
419
+ if 'secondary_keys' in entry and isinstance(entry['secondary_keys'], list):
420
+ for i, key in enumerate(entry['secondary_keys']):
421
+ if not isinstance(key, str) or not key.strip():
422
+ validation_messages.append(f"Entry {idx}: Element {i} in 'secondary_keys' must be a non-empty string.")
423
+
424
+ # Validate 'id' uniqueness
425
+ if 'id' in entry:
426
+ entry_id = entry['id']
427
+ if entry_id in entry_ids:
428
+ validation_messages.append \
429
+ (f"Entry {idx}: Duplicate 'id' value '{entry_id}'. Each entry 'id' must be unique.")
430
+ else:
431
+ entry_ids.add(entry_id)
432
+
433
+ # Validate 'extensions' keys are namespaced
434
+ if 'extensions' in entry and isinstance(entry['extensions'], dict):
435
+ for key in entry['extensions'].keys():
436
+ if '/' not in key and '_' not in key:
437
+ validation_messages.append \
438
+ (f"Entry {idx}: Extension key '{key}' in 'extensions' should be namespaced to prevent conflicts.")
439
+
440
+ is_valid = len(validation_messages) == 0
441
+ return is_valid, validation_messages
442
+
443
+ def validate_v2_card(card_data):
444
+ """
445
+ Validate a character card according to the V2 specification.
446
+
447
+ Args:
448
+ card_data (dict): The parsed character card data.
449
+
450
+ Returns:
451
+ Tuple[bool, List[str]]: A tuple containing a boolean indicating validity and a list of validation messages.
452
+ """
453
+ validation_messages = []
454
+
455
+ # Check top-level fields
456
+ if 'spec' not in card_data:
457
+ validation_messages.append("Missing 'spec' field.")
458
+ elif card_data['spec'] != 'chara_card_v2':
459
+ validation_messages.append(f"Invalid 'spec' value: {card_data['spec']}. Expected 'chara_card_v2'.")
460
+
461
+ if 'spec_version' not in card_data:
462
+ validation_messages.append("Missing 'spec_version' field.")
463
+ else:
464
+ # Ensure 'spec_version' is '2.0' or higher
465
+ try:
466
+ spec_version = float(card_data['spec_version'])
467
+ if spec_version < 2.0:
468
+ validation_messages.append \
469
+ (f"'spec_version' must be '2.0' or higher. Found '{card_data['spec_version']}'.")
470
+ except ValueError:
471
+ validation_messages.append \
472
+ (f"Invalid 'spec_version' format: {card_data['spec_version']}. Must be a number as a string.")
473
+
474
+ if 'data' not in card_data:
475
+ validation_messages.append("Missing 'data' field.")
476
+ return False, validation_messages # Cannot proceed without 'data' field
477
+
478
+ data = card_data['data']
479
+
480
+ # Required fields in 'data'
481
+ required_fields = ['name', 'description', 'personality', 'scenario', 'first_mes', 'mes_example']
482
+ for field in required_fields:
483
+ if field not in data:
484
+ validation_messages.append(f"Missing required field in 'data': '{field}'.")
485
+ elif not isinstance(data[field], str):
486
+ validation_messages.append(f"Field '{field}' must be a string.")
487
+ elif not data[field].strip():
488
+ validation_messages.append(f"Field '{field}' cannot be empty.")
489
+
490
+ # Optional fields with expected types
491
+ optional_fields = {
492
+ 'creator_notes': str,
493
+ 'system_prompt': str,
494
+ 'post_history_instructions': str,
495
+ 'alternate_greetings': list,
496
+ 'tags': list,
497
+ 'creator': str,
498
+ 'character_version': str,
499
+ 'extensions': dict,
500
+ 'character_book': dict # If present, should be a dict
501
+ }
502
+
503
+ for field, expected_type in optional_fields.items():
504
+ if field in data:
505
+ if not isinstance(data[field], expected_type):
506
+ validation_messages.append(f"Field '{field}' must be of type '{expected_type.__name__}'.")
507
+ elif field == 'extensions':
508
+ # Validate that extensions keys are properly namespaced
509
+ for key in data[field].keys():
510
+ if '/' not in key and '_' not in key:
511
+ validation_messages.append \
512
+ (f"Extension key '{key}' in 'extensions' should be namespaced to prevent conflicts.")
513
+
514
+ # If 'alternate_greetings' is present, check that it's a list of non-empty strings
515
+ if 'alternate_greetings' in data and isinstance(data['alternate_greetings'], list):
516
+ for idx, greeting in enumerate(data['alternate_greetings']):
517
+ if not isinstance(greeting, str) or not greeting.strip():
518
+ validation_messages.append(f"Element {idx} in 'alternate_greetings' must be a non-empty string.")
519
+
520
+ # If 'tags' is present, check that it's a list of non-empty strings
521
+ if 'tags' in data and isinstance(data['tags'], list):
522
+ for idx, tag in enumerate(data['tags']):
523
+ if not isinstance(tag, str) or not tag.strip():
524
+ validation_messages.append(f"Element {idx} in 'tags' must be a non-empty string.")
525
+
526
+ # Validate 'extensions' field
527
+ if 'extensions' in data and not isinstance(data['extensions'], dict):
528
+ validation_messages.append("Field 'extensions' must be a dictionary.")
529
+
530
+ # Validate 'character_book' if present
531
+ if 'character_book' in data:
532
+ is_valid_book, book_messages = validate_character_book(data['character_book'])
533
+ if not is_valid_book:
534
+ validation_messages.extend(book_messages)
535
+
536
+ is_valid = len(validation_messages) == 0
537
+ return is_valid, validation_messages
538
+
539
+ #
540
+ # End of File
541
+ ####################################################################################################
App_Function_Libraries/Character_Chat/__init__.py ADDED
File without changes
App_Function_Libraries/DB/Character_Chat_DB.py CHANGED
@@ -1,684 +1,701 @@
1
- # character_chat_db.py
2
- # Database functions for managing character cards and chat histories.
3
- # #
4
- # Imports
5
- import configparser
6
- import sqlite3
7
- import json
8
- import os
9
- import sys
10
- from typing import List, Dict, Optional, Tuple, Any, Union
11
-
12
- from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
13
- import logging
14
-
15
- #
16
- #######################################################################################################################
17
- #
18
- #
19
-
20
- def ensure_database_directory():
21
- os.makedirs(get_database_dir(), exist_ok=True)
22
-
23
- ensure_database_directory()
24
-
25
-
26
- # Construct the path to the config file
27
- config_path = get_project_relative_path('Config_Files/config.txt')
28
-
29
- # Read the config file
30
- config = configparser.ConfigParser()
31
- config.read(config_path)
32
-
33
- # Get the chat db path from the config, or use the default if not specified
34
- chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
35
- print(f"Chat Database path: {chat_DB_PATH}")
36
-
37
- ########################################################################################################
38
- #
39
- # Functions
40
-
41
- # FIXME - Setup properly and test/add documentation for its existence...
42
- def initialize_database():
43
- """Initialize the SQLite database with required tables and FTS5 virtual tables."""
44
- conn = None
45
- try:
46
- conn = sqlite3.connect(chat_DB_PATH)
47
- cursor = conn.cursor()
48
-
49
- # Enable foreign key constraints
50
- cursor.execute("PRAGMA foreign_keys = ON;")
51
-
52
- # Create CharacterCards table with V2 fields
53
- cursor.execute("""
54
- CREATE TABLE IF NOT EXISTS CharacterCards (
55
- id INTEGER PRIMARY KEY AUTOINCREMENT,
56
- name TEXT UNIQUE NOT NULL,
57
- description TEXT,
58
- personality TEXT,
59
- scenario TEXT,
60
- image BLOB,
61
- post_history_instructions TEXT,
62
- first_mes TEXT,
63
- mes_example TEXT,
64
- creator_notes TEXT,
65
- system_prompt TEXT,
66
- alternate_greetings TEXT,
67
- tags TEXT,
68
- creator TEXT,
69
- character_version TEXT,
70
- extensions TEXT,
71
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP
72
- );
73
- """)
74
-
75
- # Create CharacterChats table
76
- cursor.execute("""
77
- CREATE TABLE IF NOT EXISTS CharacterChats (
78
- id INTEGER PRIMARY KEY AUTOINCREMENT,
79
- character_id INTEGER NOT NULL,
80
- conversation_name TEXT,
81
- chat_history TEXT,
82
- is_snapshot BOOLEAN DEFAULT FALSE,
83
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
84
- FOREIGN KEY (character_id) REFERENCES CharacterCards(id) ON DELETE CASCADE
85
- );
86
- """)
87
-
88
- # Create FTS5 virtual table for CharacterChats
89
- cursor.execute("""
90
- CREATE VIRTUAL TABLE IF NOT EXISTS CharacterChats_fts USING fts5(
91
- conversation_name,
92
- chat_history,
93
- content='CharacterChats',
94
- content_rowid='id'
95
- );
96
- """)
97
-
98
- # Create triggers to keep FTS5 table in sync with CharacterChats
99
- cursor.executescript("""
100
- CREATE TRIGGER IF NOT EXISTS CharacterChats_ai AFTER INSERT ON CharacterChats BEGIN
101
- INSERT INTO CharacterChats_fts(rowid, conversation_name, chat_history)
102
- VALUES (new.id, new.conversation_name, new.chat_history);
103
- END;
104
-
105
- CREATE TRIGGER IF NOT EXISTS CharacterChats_ad AFTER DELETE ON CharacterChats BEGIN
106
- DELETE FROM CharacterChats_fts WHERE rowid = old.id;
107
- END;
108
-
109
- CREATE TRIGGER IF NOT EXISTS CharacterChats_au AFTER UPDATE ON CharacterChats BEGIN
110
- UPDATE CharacterChats_fts SET conversation_name = new.conversation_name, chat_history = new.chat_history
111
- WHERE rowid = new.id;
112
- END;
113
- """)
114
-
115
- # Create ChatKeywords table
116
- cursor.execute("""
117
- CREATE TABLE IF NOT EXISTS ChatKeywords (
118
- chat_id INTEGER NOT NULL,
119
- keyword TEXT NOT NULL,
120
- FOREIGN KEY (chat_id) REFERENCES CharacterChats(id) ON DELETE CASCADE
121
- );
122
- """)
123
-
124
- # Create indexes for faster searches
125
- cursor.execute("""
126
- CREATE INDEX IF NOT EXISTS idx_chatkeywords_keyword ON ChatKeywords(keyword);
127
- """)
128
- cursor.execute("""
129
- CREATE INDEX IF NOT EXISTS idx_chatkeywords_chat_id ON ChatKeywords(chat_id);
130
- """)
131
-
132
- conn.commit()
133
- logging.info("Database initialized successfully.")
134
- except sqlite3.Error as e:
135
- logging.error(f"SQLite error occurred during database initialization: {e}")
136
- if conn:
137
- conn.rollback()
138
- raise
139
- except Exception as e:
140
- logging.error(f"Unexpected error occurred during database initialization: {e}")
141
- if conn:
142
- conn.rollback()
143
- raise
144
- finally:
145
- if conn:
146
- conn.close()
147
-
148
- # Call initialize_database() at the start of your application
149
- def setup_chat_database():
150
- try:
151
- initialize_database()
152
- except Exception as e:
153
- logging.critical(f"Failed to initialize database: {e}")
154
- sys.exit(1)
155
-
156
- setup_chat_database()
157
-
158
- ########################################################################################################
159
- #
160
- # Character Card handling
161
-
162
- def parse_character_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
163
- """Parse and validate a character card according to V2 specification."""
164
- v2_data = {
165
- 'name': card_data.get('name', ''),
166
- 'description': card_data.get('description', ''),
167
- 'personality': card_data.get('personality', ''),
168
- 'scenario': card_data.get('scenario', ''),
169
- 'first_mes': card_data.get('first_mes', ''),
170
- 'mes_example': card_data.get('mes_example', ''),
171
- 'creator_notes': card_data.get('creator_notes', ''),
172
- 'system_prompt': card_data.get('system_prompt', ''),
173
- 'post_history_instructions': card_data.get('post_history_instructions', ''),
174
- 'alternate_greetings': json.dumps(card_data.get('alternate_greetings', [])),
175
- 'tags': json.dumps(card_data.get('tags', [])),
176
- 'creator': card_data.get('creator', ''),
177
- 'character_version': card_data.get('character_version', ''),
178
- 'extensions': json.dumps(card_data.get('extensions', {}))
179
- }
180
-
181
- # Handle 'image' separately as it might be binary data
182
- if 'image' in card_data:
183
- v2_data['image'] = card_data['image']
184
-
185
- return v2_data
186
-
187
-
188
- def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
189
- """Add or update a character card in the database."""
190
- conn = sqlite3.connect(chat_DB_PATH)
191
- cursor = conn.cursor()
192
- try:
193
- parsed_card = parse_character_card(card_data)
194
-
195
- # Check if character already exists
196
- cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (parsed_card['name'],))
197
- row = cursor.fetchone()
198
-
199
- if row:
200
- # Update existing character
201
- character_id = row[0]
202
- update_query = """
203
- UPDATE CharacterCards
204
- SET description = ?, personality = ?, scenario = ?, image = ?,
205
- post_history_instructions = ?, first_mes = ?, mes_example = ?,
206
- creator_notes = ?, system_prompt = ?, alternate_greetings = ?,
207
- tags = ?, creator = ?, character_version = ?, extensions = ?
208
- WHERE id = ?
209
- """
210
- cursor.execute(update_query, (
211
- parsed_card['description'], parsed_card['personality'], parsed_card['scenario'],
212
- parsed_card['image'], parsed_card['post_history_instructions'], parsed_card['first_mes'],
213
- parsed_card['mes_example'], parsed_card['creator_notes'], parsed_card['system_prompt'],
214
- parsed_card['alternate_greetings'], parsed_card['tags'], parsed_card['creator'],
215
- parsed_card['character_version'], parsed_card['extensions'], character_id
216
- ))
217
- else:
218
- # Insert new character
219
- insert_query = """
220
- INSERT INTO CharacterCards (name, description, personality, scenario, image,
221
- post_history_instructions, first_mes, mes_example, creator_notes, system_prompt,
222
- alternate_greetings, tags, creator, character_version, extensions)
223
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
224
- """
225
- cursor.execute(insert_query, (
226
- parsed_card['name'], parsed_card['description'], parsed_card['personality'],
227
- parsed_card['scenario'], parsed_card['image'], parsed_card['post_history_instructions'],
228
- parsed_card['first_mes'], parsed_card['mes_example'], parsed_card['creator_notes'],
229
- parsed_card['system_prompt'], parsed_card['alternate_greetings'], parsed_card['tags'],
230
- parsed_card['creator'], parsed_card['character_version'], parsed_card['extensions']
231
- ))
232
- character_id = cursor.lastrowid
233
-
234
- conn.commit()
235
- return character_id
236
- except sqlite3.IntegrityError as e:
237
- logging.error(f"Error adding character card: {e}")
238
- return None
239
- except Exception as e:
240
- logging.error(f"Unexpected error adding character card: {e}")
241
- return None
242
- finally:
243
- conn.close()
244
-
245
- # def add_character_card(card_data: Dict) -> Optional[int]:
246
- # """Add or update a character card in the database.
247
- #
248
- # Returns the ID of the inserted character or None if failed.
249
- # """
250
- # conn = sqlite3.connect(chat_DB_PATH)
251
- # cursor = conn.cursor()
252
- # try:
253
- # # Ensure all required fields are present
254
- # required_fields = ['name', 'description', 'personality', 'scenario', 'image', 'post_history_instructions', 'first_message']
255
- # for field in required_fields:
256
- # if field not in card_data:
257
- # card_data[field] = '' # Assign empty string if field is missing
258
- #
259
- # # Check if character already exists
260
- # cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (card_data['name'],))
261
- # row = cursor.fetchone()
262
- #
263
- # if row:
264
- # # Update existing character
265
- # character_id = row[0]
266
- # cursor.execute("""
267
- # UPDATE CharacterCards
268
- # SET description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
269
- # WHERE id = ?
270
- # """, (
271
- # card_data['description'],
272
- # card_data['personality'],
273
- # card_data['scenario'],
274
- # card_data['image'],
275
- # card_data['post_history_instructions'],
276
- # card_data['first_message'],
277
- # character_id
278
- # ))
279
- # else:
280
- # # Insert new character
281
- # cursor.execute("""
282
- # INSERT INTO CharacterCards (name, description, personality, scenario, image, post_history_instructions, first_message)
283
- # VALUES (?, ?, ?, ?, ?, ?, ?)
284
- # """, (
285
- # card_data['name'],
286
- # card_data['description'],
287
- # card_data['personality'],
288
- # card_data['scenario'],
289
- # card_data['image'],
290
- # card_data['post_history_instructions'],
291
- # card_data['first_message']
292
- # ))
293
- # character_id = cursor.lastrowid
294
- #
295
- # conn.commit()
296
- # return cursor.lastrowid
297
- # except sqlite3.IntegrityError as e:
298
- # logging.error(f"Error adding character card: {e}")
299
- # return None
300
- # except Exception as e:
301
- # logging.error(f"Unexpected error adding character card: {e}")
302
- # return None
303
- # finally:
304
- # conn.close()
305
-
306
-
307
- def get_character_cards() -> List[Dict]:
308
- """Retrieve all character cards from the database."""
309
- logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
310
- conn = sqlite3.connect(chat_DB_PATH)
311
- cursor = conn.cursor()
312
- cursor.execute("SELECT * FROM CharacterCards")
313
- rows = cursor.fetchall()
314
- columns = [description[0] for description in cursor.description]
315
- conn.close()
316
- characters = [dict(zip(columns, row)) for row in rows]
317
- #logging.debug(f"Characters fetched from DB: {characters}")
318
- return characters
319
-
320
-
321
- def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
322
- """
323
- Retrieve a single character card by its ID.
324
-
325
- Args:
326
- character_id: Can be either an integer ID or a dictionary containing character data.
327
-
328
- Returns:
329
- A dictionary containing the character card data, or None if not found.
330
- """
331
- conn = sqlite3.connect(chat_DB_PATH)
332
- cursor = conn.cursor()
333
- try:
334
- if isinstance(character_id, dict):
335
- # If a dictionary is passed, assume it's already a character card
336
- return character_id
337
- elif isinstance(character_id, int):
338
- # If an integer is passed, fetch the character from the database
339
- cursor.execute("SELECT * FROM CharacterCards WHERE id = ?", (character_id,))
340
- row = cursor.fetchone()
341
- if row:
342
- columns = [description[0] for description in cursor.description]
343
- return dict(zip(columns, row))
344
- else:
345
- logging.warning(f"Invalid type for character_id: {type(character_id)}")
346
- return None
347
- except Exception as e:
348
- logging.error(f"Error in get_character_card_by_id: {e}")
349
- return None
350
- finally:
351
- conn.close()
352
-
353
-
354
- def update_character_card(character_id: int, card_data: Dict) -> bool:
355
- """Update an existing character card."""
356
- conn = sqlite3.connect(chat_DB_PATH)
357
- cursor = conn.cursor()
358
- try:
359
- cursor.execute("""
360
- UPDATE CharacterCards
361
- SET name = ?, description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
362
- WHERE id = ?
363
- """, (
364
- card_data.get('name'),
365
- card_data.get('description'),
366
- card_data.get('personality'),
367
- card_data.get('scenario'),
368
- card_data.get('image'),
369
- card_data.get('post_history_instructions', ''),
370
- card_data.get('first_message', "Hello! I'm ready to chat."),
371
- character_id
372
- ))
373
- conn.commit()
374
- return cursor.rowcount > 0
375
- except sqlite3.IntegrityError as e:
376
- logging.error(f"Error updating character card: {e}")
377
- return False
378
- finally:
379
- conn.close()
380
-
381
-
382
- def delete_character_card(character_id: int) -> bool:
383
- """Delete a character card and its associated chats."""
384
- conn = sqlite3.connect(chat_DB_PATH)
385
- cursor = conn.cursor()
386
- try:
387
- # Delete associated chats first due to foreign key constraint
388
- cursor.execute("DELETE FROM CharacterChats WHERE character_id = ?", (character_id,))
389
- cursor.execute("DELETE FROM CharacterCards WHERE id = ?", (character_id,))
390
- conn.commit()
391
- return cursor.rowcount > 0
392
- except sqlite3.Error as e:
393
- logging.error(f"Error deleting character card: {e}")
394
- return False
395
- finally:
396
- conn.close()
397
-
398
-
399
- def add_character_chat(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]], keywords: Optional[List[str]] = None, is_snapshot: bool = False) -> Optional[int]:
400
- """
401
- Add a new chat history for a character, optionally associating keywords.
402
-
403
- Args:
404
- character_id (int): The ID of the character.
405
- conversation_name (str): Name of the conversation.
406
- chat_history (List[Tuple[str, str]]): List of (user, bot) message tuples.
407
- keywords (Optional[List[str]]): List of keywords to associate with this chat.
408
- is_snapshot (bool, optional): Whether this chat is a snapshot.
409
-
410
- Returns:
411
- Optional[int]: The ID of the inserted chat or None if failed.
412
- """
413
- conn = sqlite3.connect(chat_DB_PATH)
414
- cursor = conn.cursor()
415
- try:
416
- chat_history_json = json.dumps(chat_history)
417
- cursor.execute("""
418
- INSERT INTO CharacterChats (character_id, conversation_name, chat_history, is_snapshot)
419
- VALUES (?, ?, ?, ?)
420
- """, (
421
- character_id,
422
- conversation_name,
423
- chat_history_json,
424
- is_snapshot
425
- ))
426
- chat_id = cursor.lastrowid
427
-
428
- if keywords:
429
- # Insert keywords into ChatKeywords table
430
- keyword_records = [(chat_id, keyword.strip().lower()) for keyword in keywords]
431
- cursor.executemany("""
432
- INSERT INTO ChatKeywords (chat_id, keyword)
433
- VALUES (?, ?)
434
- """, keyword_records)
435
-
436
- conn.commit()
437
- return chat_id
438
- except sqlite3.Error as e:
439
- logging.error(f"Error adding character chat: {e}")
440
- return None
441
- finally:
442
- conn.close()
443
-
444
-
445
- def get_character_chats(character_id: Optional[int] = None) -> List[Dict]:
446
- """Retrieve all chats, or chats for a specific character if character_id is provided."""
447
- conn = sqlite3.connect(chat_DB_PATH)
448
- cursor = conn.cursor()
449
- if character_id is not None:
450
- cursor.execute("SELECT * FROM CharacterChats WHERE character_id = ?", (character_id,))
451
- else:
452
- cursor.execute("SELECT * FROM CharacterChats")
453
- rows = cursor.fetchall()
454
- columns = [description[0] for description in cursor.description]
455
- conn.close()
456
- return [dict(zip(columns, row)) for row in rows]
457
-
458
-
459
- def get_character_chat_by_id(chat_id: int) -> Optional[Dict]:
460
- """Retrieve a single chat by its ID."""
461
- conn = sqlite3.connect(chat_DB_PATH)
462
- cursor = conn.cursor()
463
- cursor.execute("SELECT * FROM CharacterChats WHERE id = ?", (chat_id,))
464
- row = cursor.fetchone()
465
- conn.close()
466
- if row:
467
- columns = [description[0] for description in cursor.description]
468
- chat = dict(zip(columns, row))
469
- chat['chat_history'] = json.loads(chat['chat_history'])
470
- return chat
471
- return None
472
-
473
-
474
- def search_character_chats(query: str) -> Tuple[List[Dict], str]:
475
- """
476
- Search for character chats using FTS5.
477
-
478
- Args:
479
- query (str): The search query.
480
-
481
- Returns:
482
- Tuple[List[Dict], str]: A list of matching chats and a status message.
483
- """
484
- if not query.strip():
485
- return [], "Please enter a search query."
486
-
487
- conn = sqlite3.connect(chat_DB_PATH)
488
- cursor = conn.cursor()
489
- try:
490
- # Use parameterized queries to prevent SQL injection
491
- cursor.execute("""
492
- SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
493
- FROM CharacterChats_fts
494
- JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
495
- WHERE CharacterChats_fts MATCH ?
496
- ORDER BY rank
497
- """, (query,))
498
- rows = cursor.fetchall()
499
- columns = [description[0] for description in cursor.description]
500
- results = [dict(zip(columns, row)) for row in rows]
501
- status_message = f"Found {len(results)} chat(s) matching '{query}'."
502
- return results, status_message
503
- except Exception as e:
504
- logging.error(f"Error searching chats with FTS5: {e}")
505
- return [], f"Error occurred during search: {e}"
506
- finally:
507
- conn.close()
508
-
509
- def update_character_chat(chat_id: int, chat_history: List[Tuple[str, str]]) -> bool:
510
- """Update an existing chat history."""
511
- conn = sqlite3.connect(chat_DB_PATH)
512
- cursor = conn.cursor()
513
- try:
514
- chat_history_json = json.dumps(chat_history)
515
- cursor.execute("""
516
- UPDATE CharacterChats
517
- SET chat_history = ?
518
- WHERE id = ?
519
- """, (
520
- chat_history_json,
521
- chat_id
522
- ))
523
- conn.commit()
524
- return cursor.rowcount > 0
525
- except sqlite3.Error as e:
526
- logging.error(f"Error updating character chat: {e}")
527
- return False
528
- finally:
529
- conn.close()
530
-
531
-
532
- def delete_character_chat(chat_id: int) -> bool:
533
- """Delete a specific chat."""
534
- conn = sqlite3.connect(chat_DB_PATH)
535
- cursor = conn.cursor()
536
- try:
537
- cursor.execute("DELETE FROM CharacterChats WHERE id = ?", (chat_id,))
538
- conn.commit()
539
- return cursor.rowcount > 0
540
- except sqlite3.Error as e:
541
- logging.error(f"Error deleting character chat: {e}")
542
- return False
543
- finally:
544
- conn.close()
545
-
546
- def fetch_keywords_for_chats(keywords: List[str]) -> List[int]:
547
- """
548
- Fetch chat IDs associated with any of the specified keywords.
549
-
550
- Args:
551
- keywords (List[str]): List of keywords to search for.
552
-
553
- Returns:
554
- List[int]: List of chat IDs associated with the keywords.
555
- """
556
- if not keywords:
557
- return []
558
-
559
- conn = sqlite3.connect(chat_DB_PATH)
560
- cursor = conn.cursor()
561
- try:
562
- # Construct the WHERE clause to search for each keyword
563
- keyword_clauses = " OR ".join(["keyword = ?"] * len(keywords))
564
- sql_query = f"SELECT DISTINCT chat_id FROM ChatKeywords WHERE {keyword_clauses}"
565
- cursor.execute(sql_query, keywords)
566
- rows = cursor.fetchall()
567
- chat_ids = [row[0] for row in rows]
568
- return chat_ids
569
- except Exception as e:
570
- logging.error(f"Error in fetch_keywords_for_chats: {e}")
571
- return []
572
- finally:
573
- conn.close()
574
-
575
- def save_chat_history_to_character_db(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]]) -> Optional[int]:
576
- """Save chat history to the CharacterChats table.
577
-
578
- Returns the ID of the inserted chat or None if failed.
579
- """
580
- return add_character_chat(character_id, conversation_name, chat_history)
581
-
582
- def migrate_chat_to_media_db():
583
- pass
584
-
585
-
586
- def search_db(query: str, fields: List[str], where_clause: str = "", page: int = 1, results_per_page: int = 5) -> List[Dict[str, Any]]:
587
- """
588
- Perform a full-text search on specified fields with optional filtering and pagination.
589
-
590
- Args:
591
- query (str): The search query.
592
- fields (List[str]): List of fields to search in.
593
- where_clause (str, optional): Additional SQL WHERE clause to filter results.
594
- page (int, optional): Page number for pagination.
595
- results_per_page (int, optional): Number of results per page.
596
-
597
- Returns:
598
- List[Dict[str, Any]]: List of matching chat records with content and metadata.
599
- """
600
- if not query.strip():
601
- return []
602
-
603
- conn = sqlite3.connect(chat_DB_PATH)
604
- cursor = conn.cursor()
605
- try:
606
- # Construct the MATCH query for FTS5
607
- match_query = " AND ".join(fields) + f" MATCH ?"
608
- # Adjust the query with the fields
609
- fts_query = f"""
610
- SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
611
- FROM CharacterChats_fts
612
- JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
613
- WHERE {match_query}
614
- """
615
- if where_clause:
616
- fts_query += f" AND ({where_clause})"
617
- fts_query += " ORDER BY rank LIMIT ? OFFSET ?"
618
- offset = (page - 1) * results_per_page
619
- cursor.execute(fts_query, (query, results_per_page, offset))
620
- rows = cursor.fetchall()
621
- columns = [description[0] for description in cursor.description]
622
- results = [dict(zip(columns, row)) for row in rows]
623
- return results
624
- except Exception as e:
625
- logging.error(f"Error in search_db: {e}")
626
- return []
627
- finally:
628
- conn.close()
629
-
630
-
631
- def perform_full_text_search_chat(query: str, relevant_chat_ids: List[int], page: int = 1, results_per_page: int = 5) -> \
632
- List[Dict[str, Any]]:
633
- """
634
- Perform a full-text search within the specified chat IDs using FTS5.
635
-
636
- Args:
637
- query (str): The user's query.
638
- relevant_chat_ids (List[int]): List of chat IDs to search within.
639
- page (int): Pagination page number.
640
- results_per_page (int): Number of results per page.
641
-
642
- Returns:
643
- List[Dict[str, Any]]: List of search results with content and metadata.
644
- """
645
- try:
646
- # Construct a WHERE clause to limit the search to relevant chat IDs
647
- where_clause = " OR ".join([f"media_id = {chat_id}" for chat_id in relevant_chat_ids])
648
- if not where_clause:
649
- where_clause = "1" # No restriction if no chat IDs
650
-
651
- # Perform full-text search using FTS5
652
- fts_results = search_db(query, ["content"], where_clause, page=page, results_per_page=results_per_page)
653
-
654
- filtered_fts_results = [
655
- {
656
- "content": result['content'],
657
- "metadata": {"media_id": result['id']}
658
- }
659
- for result in fts_results
660
- if result['id'] in relevant_chat_ids
661
- ]
662
- return filtered_fts_results
663
- except Exception as e:
664
- logging.error(f"Error in perform_full_text_search_chat: {str(e)}")
665
- return []
666
-
667
-
668
- def fetch_all_chats() -> List[Dict[str, Any]]:
669
- """
670
- Fetch all chat messages from the database.
671
-
672
- Returns:
673
- List[Dict[str, Any]]: List of chat messages with relevant metadata.
674
- """
675
- try:
676
- chats = get_character_chats() # Modify this function to retrieve all chats
677
- return chats
678
- except Exception as e:
679
- logging.error(f"Error fetching all chats: {str(e)}")
680
- return []
681
-
682
- #
683
- # End of Character_Chat_DB.py
684
- #######################################################################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # character_chat_db.py
2
+ # Database functions for managing character cards and chat histories.
3
+ # #
4
+ # Imports
5
+ import configparser
6
+ import sqlite3
7
+ import json
8
+ import os
9
+ import sys
10
+ from typing import List, Dict, Optional, Tuple, Any, Union
11
+
12
+ from App_Function_Libraries.Utils.Utils import get_database_dir, get_project_relative_path, get_database_path
13
+ from Tests.Chat_APIs.Chat_APIs_Integration_test import logging
14
+
15
+ #
16
+ #######################################################################################################################
17
+ #
18
+ #
19
+
20
+ def ensure_database_directory():
21
+ os.makedirs(get_database_dir(), exist_ok=True)
22
+
23
+ ensure_database_directory()
24
+
25
+
26
+ # Construct the path to the config file
27
+ config_path = get_project_relative_path('Config_Files/config.txt')
28
+
29
+ # Read the config file
30
+ config = configparser.ConfigParser()
31
+ config.read(config_path)
32
+
33
+ # Get the chat db path from the config, or use the default if not specified
34
+ chat_DB_PATH = config.get('Database', 'chatDB_path', fallback=get_database_path('chatDB.db'))
35
+ print(f"Chat Database path: {chat_DB_PATH}")
36
+
37
+ ########################################################################################################
38
+ #
39
+ # Functions
40
+
41
+ # FIXME - Setup properly and test/add documentation for its existence...
42
+ def initialize_database():
43
+ """Initialize the SQLite database with required tables and FTS5 virtual tables."""
44
+ conn = None
45
+ try:
46
+ conn = sqlite3.connect(chat_DB_PATH)
47
+ cursor = conn.cursor()
48
+
49
+ # Enable foreign key constraints
50
+ cursor.execute("PRAGMA foreign_keys = ON;")
51
+
52
+ # Create CharacterCards table with V2 fields
53
+ cursor.execute("""
54
+ CREATE TABLE IF NOT EXISTS CharacterCards (
55
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
56
+ name TEXT UNIQUE NOT NULL,
57
+ description TEXT,
58
+ personality TEXT,
59
+ scenario TEXT,
60
+ image BLOB,
61
+ post_history_instructions TEXT,
62
+ first_mes TEXT,
63
+ mes_example TEXT,
64
+ creator_notes TEXT,
65
+ system_prompt TEXT,
66
+ alternate_greetings TEXT,
67
+ tags TEXT,
68
+ creator TEXT,
69
+ character_version TEXT,
70
+ extensions TEXT,
71
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
72
+ );
73
+ """)
74
+
75
+ # Create CharacterChats table
76
+ cursor.execute("""
77
+ CREATE TABLE IF NOT EXISTS CharacterChats (
78
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
79
+ character_id INTEGER NOT NULL,
80
+ conversation_name TEXT,
81
+ chat_history TEXT,
82
+ is_snapshot BOOLEAN DEFAULT FALSE,
83
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
84
+ FOREIGN KEY (character_id) REFERENCES CharacterCards(id) ON DELETE CASCADE
85
+ );
86
+ """)
87
+
88
+ # Create FTS5 virtual table for CharacterChats
89
+ cursor.execute("""
90
+ CREATE VIRTUAL TABLE IF NOT EXISTS CharacterChats_fts USING fts5(
91
+ conversation_name,
92
+ chat_history,
93
+ content='CharacterChats',
94
+ content_rowid='id'
95
+ );
96
+ """)
97
+
98
+ # Create triggers to keep FTS5 table in sync with CharacterChats
99
+ cursor.executescript("""
100
+ CREATE TRIGGER IF NOT EXISTS CharacterChats_ai AFTER INSERT ON CharacterChats BEGIN
101
+ INSERT INTO CharacterChats_fts(rowid, conversation_name, chat_history)
102
+ VALUES (new.id, new.conversation_name, new.chat_history);
103
+ END;
104
+
105
+ CREATE TRIGGER IF NOT EXISTS CharacterChats_ad AFTER DELETE ON CharacterChats BEGIN
106
+ DELETE FROM CharacterChats_fts WHERE rowid = old.id;
107
+ END;
108
+
109
+ CREATE TRIGGER IF NOT EXISTS CharacterChats_au AFTER UPDATE ON CharacterChats BEGIN
110
+ UPDATE CharacterChats_fts SET conversation_name = new.conversation_name, chat_history = new.chat_history
111
+ WHERE rowid = new.id;
112
+ END;
113
+ """)
114
+
115
+ # Create ChatKeywords table
116
+ cursor.execute("""
117
+ CREATE TABLE IF NOT EXISTS ChatKeywords (
118
+ chat_id INTEGER NOT NULL,
119
+ keyword TEXT NOT NULL,
120
+ FOREIGN KEY (chat_id) REFERENCES CharacterChats(id) ON DELETE CASCADE
121
+ );
122
+ """)
123
+
124
+ # Create indexes for faster searches
125
+ cursor.execute("""
126
+ CREATE INDEX IF NOT EXISTS idx_chatkeywords_keyword ON ChatKeywords(keyword);
127
+ """)
128
+ cursor.execute("""
129
+ CREATE INDEX IF NOT EXISTS idx_chatkeywords_chat_id ON ChatKeywords(chat_id);
130
+ """)
131
+
132
+ conn.commit()
133
+ logging.info("Database initialized successfully.")
134
+ except sqlite3.Error as e:
135
+ logging.error(f"SQLite error occurred during database initialization: {e}")
136
+ if conn:
137
+ conn.rollback()
138
+ raise
139
+ except Exception as e:
140
+ logging.error(f"Unexpected error occurred during database initialization: {e}")
141
+ if conn:
142
+ conn.rollback()
143
+ raise
144
+ finally:
145
+ if conn:
146
+ conn.close()
147
+
148
+ # Call initialize_database() at the start of your application
149
+ def setup_chat_database():
150
+ try:
151
+ initialize_database()
152
+ except Exception as e:
153
+ logging.critical(f"Failed to initialize database: {e}")
154
+ sys.exit(1)
155
+
156
+ setup_chat_database()
157
+
158
+ ########################################################################################################
159
+ #
160
+ # Character Card handling
161
+
162
+ def parse_character_card(card_data: Dict[str, Any]) -> Dict[str, Any]:
163
+ """Parse and validate a character card according to V2 specification."""
164
+ v2_data = {
165
+ 'name': card_data.get('name', ''),
166
+ 'description': card_data.get('description', ''),
167
+ 'personality': card_data.get('personality', ''),
168
+ 'scenario': card_data.get('scenario', ''),
169
+ 'first_mes': card_data.get('first_mes', ''),
170
+ 'mes_example': card_data.get('mes_example', ''),
171
+ 'creator_notes': card_data.get('creator_notes', ''),
172
+ 'system_prompt': card_data.get('system_prompt', ''),
173
+ 'post_history_instructions': card_data.get('post_history_instructions', ''),
174
+ 'alternate_greetings': json.dumps(card_data.get('alternate_greetings', [])),
175
+ 'tags': json.dumps(card_data.get('tags', [])),
176
+ 'creator': card_data.get('creator', ''),
177
+ 'character_version': card_data.get('character_version', ''),
178
+ 'extensions': json.dumps(card_data.get('extensions', {}))
179
+ }
180
+
181
+ # Handle 'image' separately as it might be binary data
182
+ if 'image' in card_data:
183
+ v2_data['image'] = card_data['image']
184
+
185
+ return v2_data
186
+
187
+
188
+ def add_character_card(card_data: Dict[str, Any]) -> Optional[int]:
189
+ """Add or update a character card in the database."""
190
+ conn = sqlite3.connect(chat_DB_PATH)
191
+ cursor = conn.cursor()
192
+ try:
193
+ parsed_card = parse_character_card(card_data)
194
+
195
+ # Check if character already exists
196
+ cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (parsed_card['name'],))
197
+ row = cursor.fetchone()
198
+
199
+ if row:
200
+ # Update existing character
201
+ character_id = row[0]
202
+ update_query = """
203
+ UPDATE CharacterCards
204
+ SET description = ?, personality = ?, scenario = ?, image = ?,
205
+ post_history_instructions = ?, first_mes = ?, mes_example = ?,
206
+ creator_notes = ?, system_prompt = ?, alternate_greetings = ?,
207
+ tags = ?, creator = ?, character_version = ?, extensions = ?
208
+ WHERE id = ?
209
+ """
210
+ cursor.execute(update_query, (
211
+ parsed_card['description'], parsed_card['personality'], parsed_card['scenario'],
212
+ parsed_card['image'], parsed_card['post_history_instructions'], parsed_card['first_mes'],
213
+ parsed_card['mes_example'], parsed_card['creator_notes'], parsed_card['system_prompt'],
214
+ parsed_card['alternate_greetings'], parsed_card['tags'], parsed_card['creator'],
215
+ parsed_card['character_version'], parsed_card['extensions'], character_id
216
+ ))
217
+ else:
218
+ # Insert new character
219
+ insert_query = """
220
+ INSERT INTO CharacterCards (name, description, personality, scenario, image,
221
+ post_history_instructions, first_mes, mes_example, creator_notes, system_prompt,
222
+ alternate_greetings, tags, creator, character_version, extensions)
223
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
224
+ """
225
+ cursor.execute(insert_query, (
226
+ parsed_card['name'], parsed_card['description'], parsed_card['personality'],
227
+ parsed_card['scenario'], parsed_card['image'], parsed_card['post_history_instructions'],
228
+ parsed_card['first_mes'], parsed_card['mes_example'], parsed_card['creator_notes'],
229
+ parsed_card['system_prompt'], parsed_card['alternate_greetings'], parsed_card['tags'],
230
+ parsed_card['creator'], parsed_card['character_version'], parsed_card['extensions']
231
+ ))
232
+ character_id = cursor.lastrowid
233
+
234
+ conn.commit()
235
+ return character_id
236
+ except sqlite3.IntegrityError as e:
237
+ logging.error(f"Error adding character card: {e}")
238
+ return None
239
+ except Exception as e:
240
+ logging.error(f"Unexpected error adding character card: {e}")
241
+ return None
242
+ finally:
243
+ conn.close()
244
+
245
+ # def add_character_card(card_data: Dict) -> Optional[int]:
246
+ # """Add or update a character card in the database.
247
+ #
248
+ # Returns the ID of the inserted character or None if failed.
249
+ # """
250
+ # conn = sqlite3.connect(chat_DB_PATH)
251
+ # cursor = conn.cursor()
252
+ # try:
253
+ # # Ensure all required fields are present
254
+ # required_fields = ['name', 'description', 'personality', 'scenario', 'image', 'post_history_instructions', 'first_message']
255
+ # for field in required_fields:
256
+ # if field not in card_data:
257
+ # card_data[field] = '' # Assign empty string if field is missing
258
+ #
259
+ # # Check if character already exists
260
+ # cursor.execute("SELECT id FROM CharacterCards WHERE name = ?", (card_data['name'],))
261
+ # row = cursor.fetchone()
262
+ #
263
+ # if row:
264
+ # # Update existing character
265
+ # character_id = row[0]
266
+ # cursor.execute("""
267
+ # UPDATE CharacterCards
268
+ # SET description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
269
+ # WHERE id = ?
270
+ # """, (
271
+ # card_data['description'],
272
+ # card_data['personality'],
273
+ # card_data['scenario'],
274
+ # card_data['image'],
275
+ # card_data['post_history_instructions'],
276
+ # card_data['first_message'],
277
+ # character_id
278
+ # ))
279
+ # else:
280
+ # # Insert new character
281
+ # cursor.execute("""
282
+ # INSERT INTO CharacterCards (name, description, personality, scenario, image, post_history_instructions, first_message)
283
+ # VALUES (?, ?, ?, ?, ?, ?, ?)
284
+ # """, (
285
+ # card_data['name'],
286
+ # card_data['description'],
287
+ # card_data['personality'],
288
+ # card_data['scenario'],
289
+ # card_data['image'],
290
+ # card_data['post_history_instructions'],
291
+ # card_data['first_message']
292
+ # ))
293
+ # character_id = cursor.lastrowid
294
+ #
295
+ # conn.commit()
296
+ # return cursor.lastrowid
297
+ # except sqlite3.IntegrityError as e:
298
+ # logging.error(f"Error adding character card: {e}")
299
+ # return None
300
+ # except Exception as e:
301
+ # logging.error(f"Unexpected error adding character card: {e}")
302
+ # return None
303
+ # finally:
304
+ # conn.close()
305
+
306
+
307
+ def get_character_cards() -> List[Dict]:
308
+ """Retrieve all character cards from the database."""
309
+ logging.debug(f"Fetching characters from DB: {chat_DB_PATH}")
310
+ conn = sqlite3.connect(chat_DB_PATH)
311
+ cursor = conn.cursor()
312
+ cursor.execute("SELECT * FROM CharacterCards")
313
+ rows = cursor.fetchall()
314
+ columns = [description[0] for description in cursor.description]
315
+ conn.close()
316
+ characters = [dict(zip(columns, row)) for row in rows]
317
+ #logging.debug(f"Characters fetched from DB: {characters}")
318
+ return characters
319
+
320
+
321
+ def get_character_card_by_id(character_id: Union[int, Dict[str, Any]]) -> Optional[Dict[str, Any]]:
322
+ """
323
+ Retrieve a single character card by its ID.
324
+
325
+ Args:
326
+ character_id: Can be either an integer ID or a dictionary containing character data.
327
+
328
+ Returns:
329
+ A dictionary containing the character card data, or None if not found.
330
+ """
331
+ conn = sqlite3.connect(chat_DB_PATH)
332
+ cursor = conn.cursor()
333
+ try:
334
+ if isinstance(character_id, dict):
335
+ # If a dictionary is passed, assume it's already a character card
336
+ return character_id
337
+ elif isinstance(character_id, int):
338
+ # If an integer is passed, fetch the character from the database
339
+ cursor.execute("SELECT * FROM CharacterCards WHERE id = ?", (character_id,))
340
+ row = cursor.fetchone()
341
+ if row:
342
+ columns = [description[0] for description in cursor.description]
343
+ return dict(zip(columns, row))
344
+ else:
345
+ logging.warning(f"Invalid type for character_id: {type(character_id)}")
346
+ return None
347
+ except Exception as e:
348
+ logging.error(f"Error in get_character_card_by_id: {e}")
349
+ return None
350
+ finally:
351
+ conn.close()
352
+
353
+
354
+ def update_character_card(character_id: int, card_data: Dict) -> bool:
355
+ """Update an existing character card."""
356
+ conn = sqlite3.connect(chat_DB_PATH)
357
+ cursor = conn.cursor()
358
+ try:
359
+ cursor.execute("""
360
+ UPDATE CharacterCards
361
+ SET name = ?, description = ?, personality = ?, scenario = ?, image = ?, post_history_instructions = ?, first_message = ?
362
+ WHERE id = ?
363
+ """, (
364
+ card_data.get('name'),
365
+ card_data.get('description'),
366
+ card_data.get('personality'),
367
+ card_data.get('scenario'),
368
+ card_data.get('image'),
369
+ card_data.get('post_history_instructions', ''),
370
+ card_data.get('first_message', "Hello! I'm ready to chat."),
371
+ character_id
372
+ ))
373
+ conn.commit()
374
+ return cursor.rowcount > 0
375
+ except sqlite3.IntegrityError as e:
376
+ logging.error(f"Error updating character card: {e}")
377
+ return False
378
+ finally:
379
+ conn.close()
380
+
381
+
382
+ def delete_character_card(character_id: int) -> bool:
383
+ """Delete a character card and its associated chats."""
384
+ conn = sqlite3.connect(chat_DB_PATH)
385
+ cursor = conn.cursor()
386
+ try:
387
+ # Delete associated chats first due to foreign key constraint
388
+ cursor.execute("DELETE FROM CharacterChats WHERE character_id = ?", (character_id,))
389
+ cursor.execute("DELETE FROM CharacterCards WHERE id = ?", (character_id,))
390
+ conn.commit()
391
+ return cursor.rowcount > 0
392
+ except sqlite3.Error as e:
393
+ logging.error(f"Error deleting character card: {e}")
394
+ return False
395
+ finally:
396
+ conn.close()
397
+
398
+
399
+ def add_character_chat(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]], keywords: Optional[List[str]] = None, is_snapshot: bool = False) -> Optional[int]:
400
+ """
401
+ Add a new chat history for a character, optionally associating keywords.
402
+
403
+ Args:
404
+ character_id (int): The ID of the character.
405
+ conversation_name (str): Name of the conversation.
406
+ chat_history (List[Tuple[str, str]]): List of (user, bot) message tuples.
407
+ keywords (Optional[List[str]]): List of keywords to associate with this chat.
408
+ is_snapshot (bool, optional): Whether this chat is a snapshot.
409
+
410
+ Returns:
411
+ Optional[int]: The ID of the inserted chat or None if failed.
412
+ """
413
+ conn = sqlite3.connect(chat_DB_PATH)
414
+ cursor = conn.cursor()
415
+ try:
416
+ chat_history_json = json.dumps(chat_history)
417
+ cursor.execute("""
418
+ INSERT INTO CharacterChats (character_id, conversation_name, chat_history, is_snapshot)
419
+ VALUES (?, ?, ?, ?)
420
+ """, (
421
+ character_id,
422
+ conversation_name,
423
+ chat_history_json,
424
+ is_snapshot
425
+ ))
426
+ chat_id = cursor.lastrowid
427
+
428
+ if keywords:
429
+ # Insert keywords into ChatKeywords table
430
+ keyword_records = [(chat_id, keyword.strip().lower()) for keyword in keywords]
431
+ cursor.executemany("""
432
+ INSERT INTO ChatKeywords (chat_id, keyword)
433
+ VALUES (?, ?)
434
+ """, keyword_records)
435
+
436
+ conn.commit()
437
+ return chat_id
438
+ except sqlite3.Error as e:
439
+ logging.error(f"Error adding character chat: {e}")
440
+ return None
441
+ finally:
442
+ conn.close()
443
+
444
+
445
+ def get_character_chats(character_id: Optional[int] = None) -> List[Dict]:
446
+ """Retrieve all chats, or chats for a specific character if character_id is provided."""
447
+ conn = sqlite3.connect(chat_DB_PATH)
448
+ cursor = conn.cursor()
449
+ if character_id is not None:
450
+ cursor.execute("SELECT * FROM CharacterChats WHERE character_id = ?", (character_id,))
451
+ else:
452
+ cursor.execute("SELECT * FROM CharacterChats")
453
+ rows = cursor.fetchall()
454
+ columns = [description[0] for description in cursor.description]
455
+ conn.close()
456
+ return [dict(zip(columns, row)) for row in rows]
457
+
458
+
459
+ def get_character_chat_by_id(chat_id: int) -> Optional[Dict]:
460
+ """Retrieve a single chat by its ID."""
461
+ conn = sqlite3.connect(chat_DB_PATH)
462
+ cursor = conn.cursor()
463
+ cursor.execute("SELECT * FROM CharacterChats WHERE id = ?", (chat_id,))
464
+ row = cursor.fetchone()
465
+ conn.close()
466
+ if row:
467
+ columns = [description[0] for description in cursor.description]
468
+ chat = dict(zip(columns, row))
469
+ chat['chat_history'] = json.loads(chat['chat_history'])
470
+ return chat
471
+ return None
472
+
473
+
474
+ def search_character_chats(query: str, character_id: Optional[int] = None) -> Tuple[List[Dict], str]:
475
+ """
476
+ Search for character chats using FTS5, optionally filtered by character_id.
477
+
478
+ Args:
479
+ query (str): The search query.
480
+ character_id (Optional[int]): The ID of the character to filter chats by.
481
+
482
+ Returns:
483
+ Tuple[List[Dict], str]: A list of matching chats and a status message.
484
+ """
485
+ if not query.strip():
486
+ return [], "Please enter a search query."
487
+
488
+ conn = sqlite3.connect(chat_DB_PATH)
489
+ cursor = conn.cursor()
490
+ try:
491
+ if character_id is not None:
492
+ # Search with character_id filter
493
+ cursor.execute("""
494
+ SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
495
+ FROM CharacterChats_fts
496
+ JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
497
+ WHERE CharacterChats_fts MATCH ? AND CharacterChats.character_id = ?
498
+ ORDER BY rank
499
+ """, (query, character_id))
500
+ else:
501
+ # Search without character_id filter
502
+ cursor.execute("""
503
+ SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
504
+ FROM CharacterChats_fts
505
+ JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
506
+ WHERE CharacterChats_fts MATCH ?
507
+ ORDER BY rank
508
+ """, (query,))
509
+
510
+ rows = cursor.fetchall()
511
+ columns = [description[0] for description in cursor.description]
512
+ results = [dict(zip(columns, row)) for row in rows]
513
+
514
+ if character_id is not None:
515
+ status_message = f"Found {len(results)} chat(s) matching '{query}' for the selected character."
516
+ else:
517
+ status_message = f"Found {len(results)} chat(s) matching '{query}' across all characters."
518
+
519
+ return results, status_message
520
+ except Exception as e:
521
+ logging.error(f"Error searching chats with FTS5: {e}")
522
+ return [], f"Error occurred during search: {e}"
523
+ finally:
524
+ conn.close()
525
+
526
+ def update_character_chat(chat_id: int, chat_history: List[Tuple[str, str]]) -> bool:
527
+ """Update an existing chat history."""
528
+ conn = sqlite3.connect(chat_DB_PATH)
529
+ cursor = conn.cursor()
530
+ try:
531
+ chat_history_json = json.dumps(chat_history)
532
+ cursor.execute("""
533
+ UPDATE CharacterChats
534
+ SET chat_history = ?
535
+ WHERE id = ?
536
+ """, (
537
+ chat_history_json,
538
+ chat_id
539
+ ))
540
+ conn.commit()
541
+ return cursor.rowcount > 0
542
+ except sqlite3.Error as e:
543
+ logging.error(f"Error updating character chat: {e}")
544
+ return False
545
+ finally:
546
+ conn.close()
547
+
548
+
549
+ def delete_character_chat(chat_id: int) -> bool:
550
+ """Delete a specific chat."""
551
+ conn = sqlite3.connect(chat_DB_PATH)
552
+ cursor = conn.cursor()
553
+ try:
554
+ cursor.execute("DELETE FROM CharacterChats WHERE id = ?", (chat_id,))
555
+ conn.commit()
556
+ return cursor.rowcount > 0
557
+ except sqlite3.Error as e:
558
+ logging.error(f"Error deleting character chat: {e}")
559
+ return False
560
+ finally:
561
+ conn.close()
562
+
563
+ def fetch_keywords_for_chats(keywords: List[str]) -> List[int]:
564
+ """
565
+ Fetch chat IDs associated with any of the specified keywords.
566
+
567
+ Args:
568
+ keywords (List[str]): List of keywords to search for.
569
+
570
+ Returns:
571
+ List[int]: List of chat IDs associated with the keywords.
572
+ """
573
+ if not keywords:
574
+ return []
575
+
576
+ conn = sqlite3.connect(chat_DB_PATH)
577
+ cursor = conn.cursor()
578
+ try:
579
+ # Construct the WHERE clause to search for each keyword
580
+ keyword_clauses = " OR ".join(["keyword = ?"] * len(keywords))
581
+ sql_query = f"SELECT DISTINCT chat_id FROM ChatKeywords WHERE {keyword_clauses}"
582
+ cursor.execute(sql_query, keywords)
583
+ rows = cursor.fetchall()
584
+ chat_ids = [row[0] for row in rows]
585
+ return chat_ids
586
+ except Exception as e:
587
+ logging.error(f"Error in fetch_keywords_for_chats: {e}")
588
+ return []
589
+ finally:
590
+ conn.close()
591
+
592
+ def save_chat_history_to_character_db(character_id: int, conversation_name: str, chat_history: List[Tuple[str, str]]) -> Optional[int]:
593
+ """Save chat history to the CharacterChats table.
594
+
595
+ Returns the ID of the inserted chat or None if failed.
596
+ """
597
+ return add_character_chat(character_id, conversation_name, chat_history)
598
+
599
+ def migrate_chat_to_media_db():
600
+ pass
601
+
602
+
603
+ def search_db(query: str, fields: List[str], where_clause: str = "", page: int = 1, results_per_page: int = 5) -> List[Dict[str, Any]]:
604
+ """
605
+ Perform a full-text search on specified fields with optional filtering and pagination.
606
+
607
+ Args:
608
+ query (str): The search query.
609
+ fields (List[str]): List of fields to search in.
610
+ where_clause (str, optional): Additional SQL WHERE clause to filter results.
611
+ page (int, optional): Page number for pagination.
612
+ results_per_page (int, optional): Number of results per page.
613
+
614
+ Returns:
615
+ List[Dict[str, Any]]: List of matching chat records with content and metadata.
616
+ """
617
+ if not query.strip():
618
+ return []
619
+
620
+ conn = sqlite3.connect(chat_DB_PATH)
621
+ cursor = conn.cursor()
622
+ try:
623
+ # Construct the MATCH query for FTS5
624
+ match_query = " AND ".join(fields) + f" MATCH ?"
625
+ # Adjust the query with the fields
626
+ fts_query = f"""
627
+ SELECT CharacterChats.id, CharacterChats.conversation_name, CharacterChats.chat_history
628
+ FROM CharacterChats_fts
629
+ JOIN CharacterChats ON CharacterChats_fts.rowid = CharacterChats.id
630
+ WHERE {match_query}
631
+ """
632
+ if where_clause:
633
+ fts_query += f" AND ({where_clause})"
634
+ fts_query += " ORDER BY rank LIMIT ? OFFSET ?"
635
+ offset = (page - 1) * results_per_page
636
+ cursor.execute(fts_query, (query, results_per_page, offset))
637
+ rows = cursor.fetchall()
638
+ columns = [description[0] for description in cursor.description]
639
+ results = [dict(zip(columns, row)) for row in rows]
640
+ return results
641
+ except Exception as e:
642
+ logging.error(f"Error in search_db: {e}")
643
+ return []
644
+ finally:
645
+ conn.close()
646
+
647
+
648
+ def perform_full_text_search_chat(query: str, relevant_chat_ids: List[int], page: int = 1, results_per_page: int = 5) -> \
649
+ List[Dict[str, Any]]:
650
+ """
651
+ Perform a full-text search within the specified chat IDs using FTS5.
652
+
653
+ Args:
654
+ query (str): The user's query.
655
+ relevant_chat_ids (List[int]): List of chat IDs to search within.
656
+ page (int): Pagination page number.
657
+ results_per_page (int): Number of results per page.
658
+
659
+ Returns:
660
+ List[Dict[str, Any]]: List of search results with content and metadata.
661
+ """
662
+ try:
663
+ # Construct a WHERE clause to limit the search to relevant chat IDs
664
+ where_clause = " OR ".join([f"media_id = {chat_id}" for chat_id in relevant_chat_ids])
665
+ if not where_clause:
666
+ where_clause = "1" # No restriction if no chat IDs
667
+
668
+ # Perform full-text search using FTS5
669
+ fts_results = search_db(query, ["content"], where_clause, page=page, results_per_page=results_per_page)
670
+
671
+ filtered_fts_results = [
672
+ {
673
+ "content": result['content'],
674
+ "metadata": {"media_id": result['id']}
675
+ }
676
+ for result in fts_results
677
+ if result['id'] in relevant_chat_ids
678
+ ]
679
+ return filtered_fts_results
680
+ except Exception as e:
681
+ logging.error(f"Error in perform_full_text_search_chat: {str(e)}")
682
+ return []
683
+
684
+
685
+ def fetch_all_chats() -> List[Dict[str, Any]]:
686
+ """
687
+ Fetch all chat messages from the database.
688
+
689
+ Returns:
690
+ List[Dict[str, Any]]: List of chat messages with relevant metadata.
691
+ """
692
+ try:
693
+ chats = get_character_chats() # Modify this function to retrieve all chats
694
+ return chats
695
+ except Exception as e:
696
+ logging.error(f"Error fetching all chats: {str(e)}")
697
+ return []
698
+
699
+ #
700
+ # End of Character_Chat_DB.py
701
+ #######################################################################################################################
App_Function_Libraries/DB/DB_Manager.py CHANGED
@@ -309,7 +309,10 @@ def add_media_to_database(*args, **kwargs):
309
  result = sqlite_add_media_to_database(*args, **kwargs)
310
 
311
  # Extract content
312
- segments = args[2]
 
 
 
313
  if isinstance(segments, list):
314
  content = ' '.join([segment.get('Text', '') for segment in segments if 'Text' in segment])
315
  elif isinstance(segments, dict):
 
309
  result = sqlite_add_media_to_database(*args, **kwargs)
310
 
311
  # Extract content
312
+ segments = kwargs.get('segments') if 'segments' in kwargs else args[2] if len(args) > 2 else None
313
+ if segments is None:
314
+ raise ValueError("Segments not provided in arguments")
315
+
316
  if isinstance(segments, list):
317
  content = ' '.join([segment.get('Text', '') for segment in segments if 'Text' in segment])
318
  elif isinstance(segments, dict):
App_Function_Libraries/DB/SQLite_DB.py CHANGED
@@ -1181,8 +1181,6 @@ def is_valid_date(date_string: str) -> bool:
1181
  return False
1182
 
1183
 
1184
-
1185
-
1186
  def add_media_to_database(url, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model, media_type='video', overwrite=False, db=None):
1187
  if db is None:
1188
  db = Database()
@@ -1196,6 +1194,7 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
1196
  url_hash = hashlib.md5(f"{title}{media_type}".encode()).hexdigest()
1197
  url = f"https://No-URL-Submitted.com/{media_type}/{quote(title)}-{url_hash}"
1198
 
 
1199
 
1200
  # Extract content from segments
1201
  if isinstance(segments, list):
@@ -1217,15 +1216,24 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
1217
  cursor.execute('SELECT id FROM Media WHERE url = ?', (url,))
1218
  existing_media = cursor.fetchone()
1219
 
 
 
 
1220
  if existing_media:
 
 
1221
  if overwrite:
1222
- media_id = existing_media[0]
1223
  cursor.execute('''
1224
  UPDATE Media
1225
  SET content = ?, transcription_model = ?, title = ?, type = ?, author = ?, ingestion_date = ?, chunking_status = ?
1226
  WHERE id = ?
1227
  ''', (content, whisper_model, info_dict.get('title', 'Untitled'), media_type,
1228
  info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), 'pending', media_id))
 
 
 
 
1229
  else:
1230
  cursor.execute('''
1231
  INSERT INTO Media (url, title, type, content, author, ingestion_date, transcription_model, chunking_status)
@@ -1233,12 +1241,17 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
1233
  ''', (url, info_dict.get('title', 'Untitled'), media_type, content,
1234
  info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), whisper_model, 'pending'))
1235
  media_id = cursor.lastrowid
 
 
1236
 
1237
- # Add modification
1238
- cursor.execute('''
1239
- INSERT INTO MediaModifications (media_id, prompt, summary, modification_date)
1240
- VALUES (?, ?, ?, ?)
1241
- ''', (media_id, custom_prompt_input, summary, datetime.now().strftime('%Y-%m-%d')))
 
 
 
1242
 
1243
  # Process keywords
1244
  for keyword in keyword_list:
@@ -1266,7 +1279,8 @@ def add_media_to_database(url, info_dict, segments, summary, keywords, custom_pr
1266
  schedule_chunking(media_id, content, info_dict.get('title', 'Untitled'))
1267
 
1268
  action = "updated" if existing_media and overwrite else "added"
1269
- return f"Media '{info_dict.get('title', 'Untitled')}' {action} successfully with URL: {url} and keywords: {', '.join(keyword_list)}. Chunking scheduled."
 
1270
 
1271
  except DatabaseError as e:
1272
  logging.error(f"Database error: {e}")
 
1181
  return False
1182
 
1183
 
 
 
1184
  def add_media_to_database(url, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model, media_type='video', overwrite=False, db=None):
1185
  if db is None:
1186
  db = Database()
 
1194
  url_hash = hashlib.md5(f"{title}{media_type}".encode()).hexdigest()
1195
  url = f"https://No-URL-Submitted.com/{media_type}/{quote(title)}-{url_hash}"
1196
 
1197
+ logging.debug(f"Checking for existing media with URL: {url}")
1198
 
1199
  # Extract content from segments
1200
  if isinstance(segments, list):
 
1216
  cursor.execute('SELECT id FROM Media WHERE url = ?', (url,))
1217
  existing_media = cursor.fetchone()
1218
 
1219
+ logging.debug(f"Existing media: {existing_media}")
1220
+ logging.debug(f"Overwrite flag: {overwrite}")
1221
+
1222
  if existing_media:
1223
+ media_id = existing_media[0]
1224
+ logging.debug(f"Existing media_id: {media_id}")
1225
  if overwrite:
1226
+ logging.debug("Updating existing media")
1227
  cursor.execute('''
1228
  UPDATE Media
1229
  SET content = ?, transcription_model = ?, title = ?, type = ?, author = ?, ingestion_date = ?, chunking_status = ?
1230
  WHERE id = ?
1231
  ''', (content, whisper_model, info_dict.get('title', 'Untitled'), media_type,
1232
  info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), 'pending', media_id))
1233
+ action = "updated"
1234
+ else:
1235
+ logging.debug("Media exists but not updating (overwrite=False)")
1236
+ action = "already exists (not updated)"
1237
  else:
1238
  cursor.execute('''
1239
  INSERT INTO Media (url, title, type, content, author, ingestion_date, transcription_model, chunking_status)
 
1241
  ''', (url, info_dict.get('title', 'Untitled'), media_type, content,
1242
  info_dict.get('uploader', 'Unknown'), datetime.now().strftime('%Y-%m-%d'), whisper_model, 'pending'))
1243
  media_id = cursor.lastrowid
1244
+ action = "added"
1245
+ logging.debug(f"New media_id: {media_id}")
1246
 
1247
+ logging.debug(f"Before MediaModifications insert, media_id: {media_id}")
1248
+
1249
+ # Only proceed with modifications if the media was added or updated
1250
+ if action in ["updated", "added"]:
1251
+ cursor.execute('''
1252
+ INSERT INTO MediaModifications (media_id, prompt, summary, modification_date)
1253
+ VALUES (?, ?, ?, ?)
1254
+ ''', (media_id, custom_prompt_input, summary, datetime.now().strftime('%Y-%m-%d')))
1255
 
1256
  # Process keywords
1257
  for keyword in keyword_list:
 
1279
  schedule_chunking(media_id, content, info_dict.get('title', 'Untitled'))
1280
 
1281
  action = "updated" if existing_media and overwrite else "added"
1282
+ return f"Media '{info_dict.get('title', 'Untitled')}' {action} with URL: {url}" + \
1283
+ (f" and keywords: {', '.join(keyword_list)}. Chunking scheduled." if action in ["updated", "added"] else "")
1284
 
1285
  except DatabaseError as e:
1286
  logging.error(f"Database error: {e}")
App_Function_Libraries/Utils/Utils.py CHANGED
@@ -15,8 +15,6 @@
15
  # 6. normalize_title(title)
16
  # 7.
17
  #
18
- #
19
- #
20
  ####################
21
  #
22
  # Import necessary libraries
@@ -256,6 +254,7 @@ def load_and_log_configs():
256
  logging.debug(f"Loaded Tabby API IP: {tabby_api_IP}")
257
  logging.debug(f"Loaded VLLM API URL: {vllm_api_url}")
258
 
 
259
  # Retrieve output paths from the configuration file
260
  output_path = config.get('Paths', 'output_path', fallback='results')
261
  logging.debug(f"Output path set to: {output_path}")
@@ -264,6 +263,18 @@ def load_and_log_configs():
264
  processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
265
  logging.debug(f"Processing choice set to: {processing_choice}")
266
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  # Prompts - FIXME
268
  prompt_path = config.get('Prompts', 'prompt_path', fallback='Databases/prompts.db')
269
 
@@ -320,6 +331,16 @@ def load_and_log_configs():
320
  'elasticsearch_port': config.getint('Database', 'elasticsearch_port', fallback=9200),
321
  'chroma_db_path': get_project_relative_path(config.get('Database', 'chroma_db_path', fallback='Databases/chroma.db'))
322
  },
 
 
 
 
 
 
 
 
 
 
323
  }
324
 
325
  except Exception as e:
@@ -513,31 +534,49 @@ def create_download_directory(title):
513
  return session_path
514
 
515
 
 
 
 
516
  def safe_read_file(file_path):
517
- encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1', 'iso-8859-1', 'cp1252']
 
 
518
 
519
  try:
520
  with open(file_path, 'rb') as file:
521
  raw_data = file.read()
522
  except FileNotFoundError:
 
523
  return f"File not found: {file_path}"
524
  except Exception as e:
 
525
  return f"An error occurred while reading the file: {e}"
526
 
 
 
 
 
527
  # Use chardet to detect the encoding
528
  detected = chardet.detect(raw_data)
529
  if detected['encoding'] is not None:
530
  encodings.insert(0, detected['encoding'])
 
531
 
532
  for encoding in encodings:
533
  try:
534
  decoded_content = raw_data.decode(encoding)
535
- if decoded_content.isprintable():
 
 
536
  return decoded_content
537
  except UnicodeDecodeError:
 
538
  continue
539
 
540
- return f"Unable to decode the file {file_path} with any of the attempted encodings: {encodings}"
 
 
 
541
 
542
  #
543
  # End of Files-saving Function Definitions
 
15
  # 6. normalize_title(title)
16
  # 7.
17
  #
 
 
18
  ####################
19
  #
20
  # Import necessary libraries
 
254
  logging.debug(f"Loaded Tabby API IP: {tabby_api_IP}")
255
  logging.debug(f"Loaded VLLM API URL: {vllm_api_url}")
256
 
257
+
258
  # Retrieve output paths from the configuration file
259
  output_path = config.get('Paths', 'output_path', fallback='results')
260
  logging.debug(f"Output path set to: {output_path}")
 
263
  processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')
264
  logging.debug(f"Processing choice set to: {processing_choice}")
265
 
266
+ # Retrieve Embedding model settings from the configuration file
267
+ embedding_model = config.get('Embeddings', 'embedding_model', fallback='')
268
+ logging.debug(f"Embedding model set to: {embedding_model}")
269
+ embedding_provider = config.get('Embeddings', 'embedding_provider', fallback='')
270
+ embedding_model = config.get('Embeddings', 'embedding_model', fallback='')
271
+ onnx_model_path = config.get('Embeddings', 'onnx_model_path', fallback="./App_Function_Libraries/onnx_models/text-embedding-3-small.onnx")
272
+ model_dir = config.get('Embeddings', 'model_dir', fallback="./App_Function_Libraries/onnx_models")
273
+ embedding_api_url = config.get('Embeddings', 'embedding_api_url', fallback="http://localhost:8080/v1/embeddings")
274
+ embedding_api_key = config.get('Embeddings', 'embedding_api_key', fallback='')
275
+ chunk_size = config.get('Embeddings', 'chunk_size', fallback=400)
276
+ overlap = config.get('Embeddings', 'overlap', fallback=200)
277
+
278
  # Prompts - FIXME
279
  prompt_path = config.get('Prompts', 'prompt_path', fallback='Databases/prompts.db')
280
 
 
331
  'elasticsearch_port': config.getint('Database', 'elasticsearch_port', fallback=9200),
332
  'chroma_db_path': get_project_relative_path(config.get('Database', 'chroma_db_path', fallback='Databases/chroma.db'))
333
  },
334
+ 'embedding_config': {
335
+ 'embedding_provider': embedding_provider,
336
+ 'embedding_model': embedding_model,
337
+ 'onnx_model_path': onnx_model_path,
338
+ 'model_dir': model_dir,
339
+ 'embedding_api_url': embedding_api_url,
340
+ 'embedding_api_key': embedding_api_key,
341
+ 'chunk_size': chunk_size,
342
+ 'overlap': overlap
343
+ }
344
  }
345
 
346
  except Exception as e:
 
534
  return session_path
535
 
536
 
537
+ import chardet
538
+ import logging
539
+
540
  def safe_read_file(file_path):
541
+ encodings = ['utf-8', 'utf-16', 'ascii', 'latin-1', 'iso-8859-1', 'cp1252', 'utf-8-sig']
542
+
543
+ logging.info(f"Attempting to read file: {file_path}")
544
 
545
  try:
546
  with open(file_path, 'rb') as file:
547
  raw_data = file.read()
548
  except FileNotFoundError:
549
+ logging.error(f"File not found: {file_path}")
550
  return f"File not found: {file_path}"
551
  except Exception as e:
552
+ logging.error(f"An error occurred while reading the file: {e}")
553
  return f"An error occurred while reading the file: {e}"
554
 
555
+ if not raw_data:
556
+ logging.warning(f"File is empty: {file_path}")
557
+ return ""
558
+
559
  # Use chardet to detect the encoding
560
  detected = chardet.detect(raw_data)
561
  if detected['encoding'] is not None:
562
  encodings.insert(0, detected['encoding'])
563
+ logging.info(f"Detected encoding: {detected['encoding']}")
564
 
565
  for encoding in encodings:
566
  try:
567
  decoded_content = raw_data.decode(encoding)
568
+ # Check if the content is mostly printable
569
+ if sum(c.isprintable() for c in decoded_content) / len(decoded_content) > 0.95:
570
+ logging.info(f"Successfully decoded file with encoding: {encoding}")
571
  return decoded_content
572
  except UnicodeDecodeError:
573
+ logging.debug(f"Failed to decode with {encoding}")
574
  continue
575
 
576
+ # If all decoding attempts fail, return the error message
577
+ logging.error(f"Unable to decode the file {file_path}")
578
+ return f"Unable to decode the file {file_path}"
579
+
580
 
581
  #
582
  # End of Files-saving Function Definitions