JRosenkranz commited on
Commit
fbbff12
1 Parent(s): 714de30

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -31,24 +31,24 @@ _Note: For all samples, your environment must have access to cuda_
31
  #### Setup
32
 
33
  ```bash
34
- docker pull docker-eu-public.artifactory.swg-devops.com/res-zrl-snap-docker-local/tgis-os:spec.7
35
  docker run -d --rm --gpus all \
36
  --name my-tgis-server \
37
  -p 8033:8033 \
38
  -v /path/to/all/models:/models \
39
  -e MODEL_NAME=/models/model_weights/llama/13B-F \
40
- -e SPECULATOR_NAME=/models/speculator_weights/llama/13B-F \
41
  -e FLASH_ATTENTION=true \
42
  -e PAGED_ATTENTION=true \
43
  -e DTYPE_STR=float16 \
44
- docker-eu-public.artifactory.swg-devops.com/res-zrl-snap-docker-local/tgis-os:spec.7
45
 
46
  # check logs and wait for "gRPC server started on port 8033" and "HTTP server started on port 3000"
47
  docker logs my-tgis-server -f
48
 
49
  # get the client sample (Note: The first prompt will take longer as there is a warmup time)
50
- conda create -n tgis-env python=3.11
51
- conda activate tgis-env
52
  git clone --branch speculative-decoding --single-branch https://github.com/tdoublep/text-generation-inference.git
53
  cd text-generation-inference/integration_tests
54
  make gen-client
 
31
  #### Setup
32
 
33
  ```bash
34
+ docker pull quay.io/wxpe/text-gen-server:speculative-decoding.ecd73c4
35
  docker run -d --rm --gpus all \
36
  --name my-tgis-server \
37
  -p 8033:8033 \
38
  -v /path/to/all/models:/models \
39
  -e MODEL_NAME=/models/model_weights/llama/13B-F \
40
+ -e SPECULATOR_NAME=/models/speculator_weights/llama/llama-13b-accelerator \
41
  -e FLASH_ATTENTION=true \
42
  -e PAGED_ATTENTION=true \
43
  -e DTYPE_STR=float16 \
44
+ quay.io/wxpe/text-gen-server:speculative-decoding.ecd73c4
45
 
46
  # check logs and wait for "gRPC server started on port 8033" and "HTTP server started on port 3000"
47
  docker logs my-tgis-server -f
48
 
49
  # get the client sample (Note: The first prompt will take longer as there is a warmup time)
50
+ conda create -n tgis-client-env python=3.11
51
+ conda activate tgis-client-env
52
  git clone --branch speculative-decoding --single-branch https://github.com/tdoublep/text-generation-inference.git
53
  cd text-generation-inference/integration_tests
54
  make gen-client