Spaces:

oceansweep
/

tldw

Running on T4

oceansweep commited on Sep 24

Commit

e88c10c

•

1 Parent(s): 74df9f9

Update Config_Files/config.txt

Files changed (1) hide show

Config_Files/config.txt CHANGED Viewed

@@ -61,18 +61,22 @@ chroma_db_path = chroma_db
 [Embeddings]
 embedding_provider = openai
-# Can be 'openai', 'local', or 'huggingface'
-model = text-embedding-3-small
-# Model name or path
-api_key = your_api_key_here
-api_url = http://localhost:8080/v1/embeddings
-# Only needed for 'local' provider
 [Chunking]
 method = words
 max_size = 400
 overlap = 200
 adaptive = false
 multi_level = false
 language = english

 [Embeddings]
 embedding_provider = openai
+embedding_model = text-embedding-3-small
+embedding_api_url = http://localhost:8080/v1/embeddings
+embedding_api_key = your_api_key_here
+chunk_size = 400
+overlap = 200
+# 'embedding_provider' Can be 'openai', 'local', or 'huggingface'
+# `embedding_model` Set to the model name you want to use for embeddings. For OpenAI, this can be 'text-embedding-3-small', or 'text-embedding-3-large'.
+# huggingface: model = dunzhang/stella_en_400M_v5
 [Chunking]
 method = words
+# 'method' Can be 'words' / 'sentences' / 'paragraphs' / 'semantic' / 'tokens'
 max_size = 400
 overlap = 200
 adaptive = false
+# Use ntlk+punkt to split text into sentences and then ID average sentence length and set that as the chunk size
 multi_level = false
 language = english