Spaces:

argilla
/

synthetic-data-generator

Running

synthetic-data-generator / .env.template

Add .env template

933fe66 about 2 months ago

1.52 kB

	# Required: Hugging Face token with read/write permissions for repositories and inference API
	# Get it from: https://huggingface.co/settings/tokens
	HF_TOKEN=your_hugging_face_token_here

	# Model Configuration
	# Choose ONE of the following model setups:

	## 1. For Hugging Face Serverless Inference (Default):
	MODEL=meta-llama/Llama-3.1-8B-Instruct
	# MODEL=Qwen/Qwen2.5-1.5B-Instruct
	# MODEL=mistralai/Mixtral-8x7B-Instruct-v0.1
	# MODEL=HuggingFaceH4/zephyr-7b-beta

	## 2. For OpenAI-compatible API:
	# OPENAI_BASE_URL=https://api.openai.com/v1/
	# MODEL=gpt-4
	# API_KEY=your_openai_api_key_here

	## 3. For Ollama:
	# OLLAMA_BASE_URL=http://127.0.0.1:11434/
	# MODEL=qwen2.5:32b-instruct-q5_K_S
	# TOKENIZER_ID=Qwen/Qwen2.5-32B-Instruct

	## 4. For VLLM:
	# VLLM_BASE_URL=http://127.0.0.1:8000/
	# MODEL=Qwen/Qwen2.5-1.5B-Instruct
	# TOKENIZER_ID=Qwen/Qwen2.5-1.5B-Instruct

	## 5. For Hugging Face Inference Endpoints or TGI:
	# HUGGINGFACE_BASE_URL=http://127.0.0.1:3000/
	# TOKENIZER_ID=meta-llama/Llama-3.1-8B-Instruct

	# Generation Settings (Optional - these are the defaults)
	MAX_NUM_TOKENS=2048
	MAX_NUM_ROWS=1000
	DEFAULT_BATCH_SIZE=5

	# Chat/SFT Configuration
	# Required for chat data generation with Llama or Qwen models
	# Options: "llama3", "qwen2", or custom template string
	MAGPIE_PRE_QUERY_TEMPLATE=llama3

	# Optional: Argilla Integration
	# Follow https://docs.argilla.io/latest/getting_started/quickstart/
	# ARGILLA_API_URL=https://[your-owner-name]-[your_space_name].hf.space
	# ARGILLA_API_KEY=your_argilla_api_key_here