File size: 1,434 Bytes
2544492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# MediaWiki Import Configuration

# Database settings
database:
  sqlite_path: './Databases/media_summary.db'
  chroma_db_path: 'chroma_db'

# Chunking options
chunking:
  default_method: 'sentences'
  default_size: 1000
  default_overlap: 100
  adaptive: true
  language: 'en'
  methods:
    - 'sentences'
    - 'words'
    - 'paragraphs'
    - 'tokens'

# Import settings
import:
  batch_size: 1000  # Number of pages to process in a single batch
  default_skip_redirects: true
  default_namespaces: [0]  # Main namespace by default
  single_item_default: false

# Processing options
processing:
  max_workers: 4  # Number of worker threads for async processing

# Embedding settings
embeddings:
  provider: 'openai'  # or 'local' or 'huggingface'
  model: 'text-embedding-ada-002'
  api_key: 'your_openai_api_key_here'  # Remove if using local embeddings
  local_url: 'http://localhost:8080/embeddings'  # Only for local embeddings

# ChromaDB settings
chromadb:
  collection_prefix: 'mediawiki_'

# Logging settings
logging:
  level: 'INFO'
  file: 'mediawiki_import.log'

# Checkpoint settings
checkpoints:
  enabled: true
  directory: 'import_checkpoints'

# Error handling
error_handling:
  max_retries: 3
  retry_delay: 5  # seconds

# User interface settings
ui:
  default_chunk_size: 1000
  min_chunk_size: 100
  max_chunk_size: 2000
  default_chunk_overlap: 100