Update README.md
Browse files
README.md
CHANGED
@@ -91,8 +91,12 @@ Replace `model_repo_id` and `filename` with the desired model repository ID and
|
|
91 |
```python
|
92 |
from llama_cpp import Llama
|
93 |
|
94 |
-
llm = Llama(
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
# Defining the Alpaca prompt template
|
97 |
alpaca_prompt = """
|
98 |
### Instruction:
|
@@ -118,12 +122,12 @@ output = llm(
|
|
118 |
"In how many phases will the general elections in India be held?", # input
|
119 |
"", # output - leave this blank for generation!
|
120 |
), #Alpaca Prompt
|
121 |
-
max_tokens=512, # Generate up to 512 tokens
|
122 |
-
stop=["<eos>"], #stop token
|
123 |
-
echo=True # Whether to echo the prompt
|
124 |
)
|
125 |
|
126 |
-
output_text=output['choices'][0]['text']
|
127 |
start_marker = "### Response:"
|
128 |
end_marker = "<eos>"
|
129 |
start_pos = output_text.find(start_marker) + len(start_marker)
|
@@ -133,6 +137,7 @@ end_pos = output_text.find(end_marker, start_pos)
|
|
133 |
response_text = output_text[start_pos:end_pos].strip()
|
134 |
|
135 |
print(response_text)
|
|
|
136 |
```
|
137 |
|
138 |
#### Simple llama-cpp-python Chat Completion API Example Code
|
|
|
91 |
```python
|
92 |
from llama_cpp import Llama
|
93 |
|
94 |
+
llm = Llama(
|
95 |
+
model_path = filepath, # Download the model file first
|
96 |
+
n_ctx = 32768, # The max sequence length to use - note that longer sequence lengths require much more resources
|
97 |
+
n_threads = 8, # The number of CPU threads to use, tailor to your system and the resulting performance
|
98 |
+
n_gpu_layers = 35 # The number of layers to offload to GPU, if you have GPU acceleration available
|
99 |
+
)
|
100 |
# Defining the Alpaca prompt template
|
101 |
alpaca_prompt = """
|
102 |
### Instruction:
|
|
|
122 |
"In how many phases will the general elections in India be held?", # input
|
123 |
"", # output - leave this blank for generation!
|
124 |
), #Alpaca Prompt
|
125 |
+
max_tokens = 512, # Generate up to 512 tokens
|
126 |
+
stop = ["<eos>"], #stop token
|
127 |
+
echo = True # Whether to echo the prompt
|
128 |
)
|
129 |
|
130 |
+
output_text = output['choices'][0]['text']
|
131 |
start_marker = "### Response:"
|
132 |
end_marker = "<eos>"
|
133 |
start_pos = output_text.find(start_marker) + len(start_marker)
|
|
|
137 |
response_text = output_text[start_pos:end_pos].strip()
|
138 |
|
139 |
print(response_text)
|
140 |
+
|
141 |
```
|
142 |
|
143 |
#### Simple llama-cpp-python Chat Completion API Example Code
|