sahilsuneja commited on
Commit
5489529
1 Parent(s): 14f26be

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +29 -0
README.md CHANGED
@@ -121,4 +121,33 @@ curl 127.0.0.1:8080/generate_stream \
121
  -X POST \
122
  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \
123
  -H 'Content-Type: application/json'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  ```
 
121
  -X POST \
122
  -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":20}}' \
123
  -H 'Content-Type: application/json'
124
+ ```
125
+
126
+ ### Use in vLLM
127
+ ```
128
+ from vllm import LLM, SamplingParams
129
+
130
+ # Sample prompts.
131
+ prompts = [
132
+ "The president of the United States is",
133
+ ]
134
+ # Create a sampling params object.
135
+ sampling_params = SamplingParams(temperature=0.0)
136
+
137
+ # Create an LLM.
138
+ llm = LLM(
139
+ model="/path/to/Llama-2-70b-chat-hf",
140
+ tensor_parallel_size=4,
141
+ speculative_model="/path/to/llama2-70b-accelerator",
142
+ speculative_draft_tensor_parallel_size=1,
143
+ use_v2_block_manager=True,
144
+ )
145
+ # Generate texts from the prompts. The output is a list of RequestOutput objects
146
+ # that contain the prompt, generated text, and other information.
147
+ outputs = llm.generate(prompts, sampling_params)
148
+ # Print the outputs.
149
+ for output in outputs:
150
+ prompt = output.prompt
151
+ generated_text = output.outputs[0].text
152
+ print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
153
  ```