# Required: Hugging Face token with read/write permissions for repositories and inference API | |
# Get it from: https://huggingface.co/settings/tokens | |
HF_TOKEN=your_hugging_face_token_here | |
# Model Configuration | |
# Choose ONE of the following model setups: | |
## 1. For Hugging Face Serverless Inference (Default): | |
MODEL=meta-llama/Llama-3.1-8B-Instruct | |
# MODEL=Qwen/Qwen2.5-1.5B-Instruct | |
# MODEL=mistralai/Mixtral-8x7B-Instruct-v0.1 | |
# MODEL=HuggingFaceH4/zephyr-7b-beta | |
## 2. For OpenAI-compatible API: | |
# OPENAI_BASE_URL=https://api.openai.com/v1/ | |
# MODEL=gpt-4 | |
# API_KEY=your_openai_api_key_here | |
## 3. For Ollama: | |
# OLLAMA_BASE_URL=http://127.0.0.1:11434/ | |
# MODEL=qwen2.5:32b-instruct-q5_K_S | |
# TOKENIZER_ID=Qwen/Qwen2.5-32B-Instruct | |
## 4. For VLLM: | |
# VLLM_BASE_URL=http://127.0.0.1:8000/ | |
# MODEL=Qwen/Qwen2.5-1.5B-Instruct | |
# TOKENIZER_ID=Qwen/Qwen2.5-1.5B-Instruct | |
## 5. For Hugging Face Inference Endpoints or TGI: | |
# HUGGINGFACE_BASE_URL=http://127.0.0.1:3000/ | |
# TOKENIZER_ID=meta-llama/Llama-3.1-8B-Instruct | |
# Generation Settings (Optional - these are the defaults) | |
MAX_NUM_TOKENS=2048 | |
MAX_NUM_ROWS=1000 | |
DEFAULT_BATCH_SIZE=5 | |
# Chat/SFT Configuration | |
# Required for chat data generation with Llama or Qwen models | |
# Options: "llama3", "qwen2", or custom template string | |
MAGPIE_PRE_QUERY_TEMPLATE=llama3 | |
# Optional: Argilla Integration | |
# Follow https://docs.argilla.io/latest/getting_started/quickstart/ | |
# ARGILLA_API_URL=https://[your-owner-name]-[your_space_name].hf.space | |
# ARGILLA_API_KEY=your_argilla_api_key_here |