tldw / App_Function_Libraries /MediaWiki /mediawiki_import_config.yaml
oceansweep's picture
Upload 3 files
32b7e17 verified
raw
history blame
1.43 kB
# MediaWiki Import Configuration
# Database settings
database:
sqlite_path: './Databases/media_summary.db'
chroma_db_path: 'chroma_db'
# Chunking options
chunking:
default_method: 'sentences'
default_size: 1000
default_overlap: 100
adaptive: true
language: 'en'
methods:
- 'sentences'
- 'words'
- 'paragraphs'
- 'tokens'
# Import settings
import:
batch_size: 1000 # Number of pages to process in a single batch
default_skip_redirects: true
default_namespaces: [0] # Main namespace by default
single_item_default: false
# Processing options
processing:
max_workers: 4 # Number of worker threads for async processing
# Embedding settings
embeddings:
provider: 'openai' # or 'local' or 'huggingface'
model: 'text-embedding-ada-002'
api_key: 'your_openai_api_key_here' # Remove if using local embeddings
local_url: 'http://localhost:8080/embeddings' # Only for local embeddings
# ChromaDB settings
chromadb:
collection_prefix: 'mediawiki_'
# Logging settings
logging:
level: 'INFO'
file: 'mediawiki_import.log'
# Checkpoint settings
checkpoints:
enabled: true
directory: 'import_checkpoints'
# Error handling
error_handling:
max_retries: 3
retry_delay: 5 # seconds
# User interface settings
ui:
default_chunk_size: 1000
min_chunk_size: 100
max_chunk_size: 2000
default_chunk_overlap: 100