vhr1007 commited on
Commit
ded0685
1 Parent(s): ebf9d7d

Changed Similarity score/chunk size

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -64,7 +64,7 @@ async def document_index(request: DocumentIndexRequest, x_api_key: str = Header(
64
  #print('text_content',text_content)
65
 
66
  # Chunking text using semantic chunking
67
- chunks = cumulative_semantic_chunking(text_content, max_chunk_size=512, similarity_threshold=0.8)
68
  logging.info(f"Text content chunked into {len(chunks)} chunks.")
69
  # Embed chunks
70
  embeddings, total_tokens = embed_chunks(chunks)
 
64
  #print('text_content',text_content)
65
 
66
  # Chunking text using semantic chunking
67
+ chunks = cumulative_semantic_chunking(text_content, max_chunk_size=2048, similarity_threshold=0.6)
68
  logging.info(f"Text content chunked into {len(chunks)} chunks.")
69
  # Embed chunks
70
  embeddings, total_tokens = embed_chunks(chunks)