test / helm /h2ogpt-chart /values.yaml
iblfe's picture
Upload folder using huggingface_hub
b585c7f verified
raw
history blame
2.85 kB
nameOverride: ""
fullnameOverride: ""
h2ogpt:
enabled: true
stack:
# -- Run h2oGPT and vLLM on same pod.
enabled: false
replicaCount: 1
image:
repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
pullPolicy: IfNotPresent
initImage:
repository:
tag:
pullPolicy:
storage:
size: 128Gi
class:
useEphemeral: true
externalLLM:
enabled: false
secret:
modelLock:
openAIAzure:
enabled: false
openAI:
enabled: False
replicate:
enabled: false
# -- Example configs to use when not using Model Lock and External LLM
# overrideConfig:
# base_model: h2oai/h2ogpt-4096-llama2-7b-chat
# use_safetensors: True
# prompt_type: llama2
# save_dir: /workspace/save/
# use_gpu_id: False
# score_model: None
# max_max_new_tokens: 2048
# max_new_tokens: 1024
overrideConfig:
service:
type: NodePort
webPort: 80
gptPort: 8888
webServiceAnnotations: {}
updateStrategy:
type: RollingUpdate
podSecurityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
securityContext:
runAsNonRoot: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
resources:
nodeSelector:
tolerations:
env: {}
podAnnotations: {}
podLabels: {}
autoscaling: {}
tgi:
enabled: false
replicaCount: 1
image:
repository: ghcr.io/huggingface/text-generation-inference
tag: 0.9.3
pullPolicy: IfNotPresent
storage:
size: 512Gi
class:
useEphemeral: true
overrideConfig:
hfSecret:
containerArgs:
service:
type: ClusterIP
port: 8080
updateStrategy:
type: RollingUpdate
podSecurityContext:
securityContext:
resources:
nodeSelector:
tolerations:
env: {}
podAnnotations: {}
podLabels: {}
autoscaling: {}
vllm:
enabled: false
replicaCount: 1
image:
repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
pullPolicy: IfNotPresent
imagePullSecrets:
storage:
size: 512Gi
class:
useEphemeral: true
overrideConfig:
containerArgs:
- "--model"
- h2oai/h2ogpt-4096-llama2-7b-chat
- "--tokenizer"
- hf-internal-testing/llama-tokenizer
- "--tensor-parallel-size"
- 2
- "--seed"
- 1234
- "--trust-remote-code"
service:
type: ClusterIP
port: 5000
updateStrategy:
type: RollingUpdate
podSecurityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
securityContext:
runAsNonRoot: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
seccompProfile:
env: {}
resources:
nodeSelector:
tolerations:
podAnnotations: {}
podLabels: {}
autoscaling: {}