# MediaWiki Import Configuration # Database settings database: sqlite_path: './Databases/media_summary.db' chroma_db_path: 'chroma_db' # Chunking options chunking: default_method: 'sentences' default_size: 1000 default_overlap: 100 adaptive: true language: 'en' methods: - 'sentences' - 'words' - 'paragraphs' - 'tokens' # Import settings import: batch_size: 1000 # Number of pages to process in a single batch default_skip_redirects: true default_namespaces: [0] # Main namespace by default single_item_default: false # Processing options processing: max_workers: 4 # Number of worker threads for async processing # Embedding settings embeddings: provider: 'openai' # or 'local' or 'huggingface' model: 'text-embedding-ada-002' api_key: 'your_openai_api_key_here' # Remove if using local embeddings local_url: 'http://localhost:8080/embeddings' # Only for local embeddings # ChromaDB settings chromadb: collection_prefix: 'mediawiki_' # Logging settings logging: level: 'INFO' file: 'mediawiki_import.log' # Checkpoint settings checkpoints: enabled: true directory: 'import_checkpoints' # Error handling error_handling: max_retries: 3 retry_delay: 5 # seconds # User interface settings ui: default_chunk_size: 1000 min_chunk_size: 100 max_chunk_size: 2000 default_chunk_overlap: 100