# Required: Hugging Face token with read/write permissions for repositories and inference API
# Get it from: https://huggingface.co/settings/tokens
HF_TOKEN=your_hugging_face_token_here

# Model Configuration
# Choose ONE of the following model setups:

## 1. For Hugging Face Serverless Inference (Default):
MODEL=meta-llama/Llama-3.1-8B-Instruct
# MODEL=Qwen/Qwen2.5-1.5B-Instruct
# MODEL=mistralai/Mixtral-8x7B-Instruct-v0.1
# MODEL=HuggingFaceH4/zephyr-7b-beta

## 2. For OpenAI-compatible API:
# OPENAI_BASE_URL=https://api.openai.com/v1/
# MODEL=gpt-4
# API_KEY=your_openai_api_key_here

## 3. For Ollama:
# OLLAMA_BASE_URL=http://127.0.0.1:11434/
# MODEL=qwen2.5:32b-instruct-q5_K_S
# TOKENIZER_ID=Qwen/Qwen2.5-32B-Instruct

## 4. For VLLM:
# VLLM_BASE_URL=http://127.0.0.1:8000/
# MODEL=Qwen/Qwen2.5-1.5B-Instruct
# TOKENIZER_ID=Qwen/Qwen2.5-1.5B-Instruct

## 5. For Hugging Face Inference Endpoints or TGI:
# HUGGINGFACE_BASE_URL=http://127.0.0.1:3000/
# TOKENIZER_ID=meta-llama/Llama-3.1-8B-Instruct

# Generation Settings (Optional - these are the defaults)
MAX_NUM_TOKENS=2048
MAX_NUM_ROWS=1000
DEFAULT_BATCH_SIZE=5

# Chat/SFT Configuration
# Required for chat data generation with Llama or Qwen models
# Options: "llama3", "qwen2", or custom template string
MAGPIE_PRE_QUERY_TEMPLATE=llama3

# Optional: Argilla Integration
# Follow https://docs.argilla.io/latest/getting_started/quickstart/
# ARGILLA_API_URL=https://[your-owner-name]-[your_space_name].hf.space
# ARGILLA_API_KEY=your_argilla_api_key_here