nameOverride: "" | |
fullnameOverride: "" | |
h2ogpt: | |
enabled: true | |
stack: | |
# -- Run h2oGPT and vLLM on same pod. | |
enabled: false | |
replicaCount: 1 | |
image: | |
repository: gcr.io/vorvan/h2oai/h2ogpt-runtime | |
pullPolicy: IfNotPresent | |
initImage: | |
repository: | |
tag: | |
pullPolicy: | |
storage: | |
size: 128Gi | |
class: | |
useEphemeral: true | |
externalLLM: | |
enabled: false | |
secret: | |
modelLock: | |
openAIAzure: | |
enabled: false | |
openAI: | |
enabled: False | |
replicate: | |
enabled: false | |
# -- Example configs to use when not using Model Lock and External LLM | |
# overrideConfig: | |
# base_model: h2oai/h2ogpt-4096-llama2-7b-chat | |
# use_safetensors: True | |
# prompt_type: llama2 | |
# save_dir: /workspace/save/ | |
# use_gpu_id: False | |
# score_model: None | |
# max_max_new_tokens: 2048 | |
# max_new_tokens: 1024 | |
overrideConfig: | |
service: | |
type: NodePort | |
webPort: 80 | |
gptPort: 8888 | |
webServiceAnnotations: {} | |
updateStrategy: | |
type: RollingUpdate | |
podSecurityContext: | |
runAsNonRoot: true | |
runAsUser: 1000 | |
runAsGroup: 1000 | |
fsGroup: 1000 | |
securityContext: | |
runAsNonRoot: true | |
allowPrivilegeEscalation: false | |
capabilities: | |
drop: | |
- ALL | |
seccompProfile: | |
type: RuntimeDefault | |
resources: | |
nodeSelector: | |
tolerations: | |
env: {} | |
podAnnotations: {} | |
podLabels: {} | |
autoscaling: {} | |
tgi: | |
enabled: false | |
replicaCount: 1 | |
image: | |
repository: ghcr.io/huggingface/text-generation-inference | |
tag: 0.9.3 | |
pullPolicy: IfNotPresent | |
storage: | |
size: 512Gi | |
class: | |
useEphemeral: true | |
overrideConfig: | |
hfSecret: | |
containerArgs: | |
service: | |
type: ClusterIP | |
port: 8080 | |
updateStrategy: | |
type: RollingUpdate | |
podSecurityContext: | |
securityContext: | |
resources: | |
nodeSelector: | |
tolerations: | |
env: {} | |
podAnnotations: {} | |
podLabels: {} | |
autoscaling: {} | |
vllm: | |
enabled: false | |
replicaCount: 1 | |
image: | |
repository: gcr.io/vorvan/h2oai/h2ogpt-runtime | |
pullPolicy: IfNotPresent | |
imagePullSecrets: | |
storage: | |
size: 512Gi | |
class: | |
useEphemeral: true | |
overrideConfig: | |
containerArgs: | |
- "--model" | |
- h2oai/h2ogpt-4096-llama2-7b-chat | |
- "--tokenizer" | |
- hf-internal-testing/llama-tokenizer | |
- "--tensor-parallel-size" | |
- 2 | |
- "--seed" | |
- 1234 | |
- "--trust-remote-code" | |
service: | |
type: ClusterIP | |
port: 5000 | |
updateStrategy: | |
type: RollingUpdate | |
podSecurityContext: | |
runAsNonRoot: true | |
runAsUser: 1000 | |
runAsGroup: 1000 | |
fsGroup: 1000 | |
securityContext: | |
runAsNonRoot: true | |
allowPrivilegeEscalation: false | |
capabilities: | |
drop: | |
- ALL | |
seccompProfile: | |
env: {} | |
resources: | |
nodeSelector: | |
tolerations: | |
podAnnotations: {} | |
podLabels: {} | |
autoscaling: {} | |