Spaces:

iblfe
/

test

Runtime error

File size: 2,846 Bytes

b585c7f

nameOverride: ""
fullnameOverride: ""

h2ogpt:
  enabled: true
  stack:
    # -- Run h2oGPT and vLLM on same pod.
    enabled: false 
  replicaCount: 1
  image:
    repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
    pullPolicy: IfNotPresent
  initImage:
    repository:
    tag:
    pullPolicy:

  storage:
    size: 128Gi
    class: 
    useEphemeral: true
  
  externalLLM:
    enabled: false
    secret:

    modelLock:

    openAIAzure:
      enabled: false

    openAI:
      enabled: False

    replicate: 
      enabled: false

# -- Example configs to use when not using Model Lock and External LLM
  # overrideConfig:
  #   base_model: h2oai/h2ogpt-4096-llama2-7b-chat
  #   use_safetensors: True
  #   prompt_type: llama2
  #   save_dir: /workspace/save/
  #   use_gpu_id: False
  #   score_model: None
  #   max_max_new_tokens: 2048
  #   max_new_tokens: 1024

  overrideConfig:


  service:
    type: NodePort
    webPort: 80
    gptPort: 8888
    webServiceAnnotations: {}

  updateStrategy:
    type: RollingUpdate

  podSecurityContext:
    runAsNonRoot: true
    runAsUser: 1000
    runAsGroup: 1000
    fsGroup: 1000

  securityContext:
    runAsNonRoot: true
    allowPrivilegeEscalation: false
    capabilities:
      drop:
        - ALL
    seccompProfile:
      type: RuntimeDefault

  resources:
  nodeSelector:
  tolerations:

  env: {}

  podAnnotations: {}
  podLabels: {}
  autoscaling: {}

tgi:
  enabled: false
  replicaCount: 1

  image:
    repository: ghcr.io/huggingface/text-generation-inference
    tag: 0.9.3
    pullPolicy: IfNotPresent

  storage:
    size: 512Gi
    class: 
    useEphemeral: true
  
  overrideConfig:
  hfSecret:
  containerArgs:

  service:
    type: ClusterIP
    port: 8080

  updateStrategy:
    type: RollingUpdate

  podSecurityContext:
  securityContext:

  resources:
  nodeSelector:
  tolerations:

  env: {}

  podAnnotations: {}
  podLabels: {}
  autoscaling: {}

vllm:
  enabled: false
  replicaCount: 1

  image:
    repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
    pullPolicy: IfNotPresent

  imagePullSecrets:

  storage:
    size: 512Gi
    class: 
    useEphemeral: true
  
  overrideConfig:

  containerArgs:
    - "--model"
    - h2oai/h2ogpt-4096-llama2-7b-chat
    - "--tokenizer"
    - hf-internal-testing/llama-tokenizer
    - "--tensor-parallel-size"
    - 2
    - "--seed"
    - 1234
    - "--trust-remote-code"

  service:
    type: ClusterIP
    port: 5000

  updateStrategy:
    type: RollingUpdate

  podSecurityContext:
    runAsNonRoot: true
    runAsUser: 1000
    runAsGroup: 1000
    fsGroup: 1000

  securityContext:
    runAsNonRoot: true
    allowPrivilegeEscalation: false
    capabilities:
      drop:
        - ALL
    seccompProfile:

  env: {}

  resources:

  nodeSelector:

  tolerations:

  podAnnotations: {}
  podLabels: {}
  autoscaling: {}