# Required: Hugging Face token with read/write permissions for repositories and inference API # Get it from: https://huggingface.co/settings/tokens HF_TOKEN=your_hugging_face_token_here # Model Configuration # Choose ONE of the following model setups: ## 1. For Hugging Face Serverless Inference (Default): MODEL=meta-llama/Llama-3.1-8B-Instruct # MODEL=Qwen/Qwen2.5-1.5B-Instruct # MODEL=mistralai/Mixtral-8x7B-Instruct-v0.1 # MODEL=HuggingFaceH4/zephyr-7b-beta ## 2. For OpenAI-compatible API: # OPENAI_BASE_URL=https://api.openai.com/v1/ # MODEL=gpt-4 # API_KEY=your_openai_api_key_here ## 3. For Ollama: # OLLAMA_BASE_URL=http://127.0.0.1:11434/ # MODEL=qwen2.5:32b-instruct-q5_K_S # TOKENIZER_ID=Qwen/Qwen2.5-32B-Instruct ## 4. For VLLM: # VLLM_BASE_URL=http://127.0.0.1:8000/ # MODEL=Qwen/Qwen2.5-1.5B-Instruct # TOKENIZER_ID=Qwen/Qwen2.5-1.5B-Instruct ## 5. For Hugging Face Inference Endpoints or TGI: # HUGGINGFACE_BASE_URL=http://127.0.0.1:3000/ # TOKENIZER_ID=meta-llama/Llama-3.1-8B-Instruct # Generation Settings (Optional - these are the defaults) MAX_NUM_TOKENS=2048 MAX_NUM_ROWS=1000 DEFAULT_BATCH_SIZE=5 # Chat/SFT Configuration # Required for chat data generation with Llama or Qwen models # Options: "llama3", "qwen2", or custom template string MAGPIE_PRE_QUERY_TEMPLATE=llama3 # Optional: Argilla Integration # Follow https://docs.argilla.io/latest/getting_started/quickstart/ # ARGILLA_API_URL=https://[your-owner-name]-[your_space_name].hf.space # ARGILLA_API_KEY=your_argilla_api_key_here