oceansweep commited on
Commit
5616cfc
1 Parent(s): fa4b240

Upload Chunk_Lib.py

Browse files
App_Function_Libraries/Chunk_Lib.py CHANGED
@@ -476,22 +476,22 @@ def semantic_chunk_long_file(file_path, max_chunk_size=1000, overlap=100, unit='
476
  #
477
  # Embedding Chunking
478
 
479
- def chunk_for_embedding(text: str, file_name: str, full_summary: str, custom_chunk_options: Dict[str, Any] = None) -> List[Dict[str, Any]]:
480
  options = chunk_options.copy()
481
  if custom_chunk_options:
482
  options.update(custom_chunk_options)
483
 
 
484
  chunks = improved_chunking_process(text, options)
485
  total_chunks = len(chunks)
 
486
 
487
  chunked_text_with_headers = []
488
  for i, chunk in enumerate(chunks, 1):
489
  chunk_text = chunk['text']
490
  chunk_position = determine_chunk_position(chunk['metadata']['relative_position'])
491
-
492
  chunk_header = f"""
493
  Original Document: {file_name}
494
- Full Document Summary: {full_summary or "Full document summary not available."}
495
  Chunk: {i} of {total_chunks}
496
  Position: {chunk_position}
497
 
 
476
  #
477
  # Embedding Chunking
478
 
479
+ def chunk_for_embedding(text: str, file_name: str, custom_chunk_options: Dict[str, Any] = None) -> List[Dict[str, Any]]:
480
  options = chunk_options.copy()
481
  if custom_chunk_options:
482
  options.update(custom_chunk_options)
483
 
484
+ logging.info(f"Chunking options: {options}")
485
  chunks = improved_chunking_process(text, options)
486
  total_chunks = len(chunks)
487
+ logging.info(f"Total chunks created: {total_chunks}")
488
 
489
  chunked_text_with_headers = []
490
  for i, chunk in enumerate(chunks, 1):
491
  chunk_text = chunk['text']
492
  chunk_position = determine_chunk_position(chunk['metadata']['relative_position'])
 
493
  chunk_header = f"""
494
  Original Document: {file_name}
 
495
  Chunk: {i} of {total_chunks}
496
  Position: {chunk_position}
497