pmking27 commited on
Commit
573747c
·
verified ·
1 Parent(s): e195ac9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +11 -6
README.md CHANGED
@@ -91,8 +91,12 @@ Replace `model_repo_id` and `filename` with the desired model repository ID and
91
  ```python
92
  from llama_cpp import Llama
93
 
94
- llm = Llama(model_path = filepath, chat_format="gemma")
95
-
 
 
 
 
96
  # Defining the Alpaca prompt template
97
  alpaca_prompt = """
98
  ### Instruction:
@@ -118,12 +122,12 @@ output = llm(
118
  "In how many phases will the general elections in India be held?", # input
119
  "", # output - leave this blank for generation!
120
  ), #Alpaca Prompt
121
- max_tokens=512, # Generate up to 512 tokens
122
- stop=["<eos>"], #stop token
123
- echo=True # Whether to echo the prompt
124
  )
125
 
126
- output_text=output['choices'][0]['text']
127
  start_marker = "### Response:"
128
  end_marker = "<eos>"
129
  start_pos = output_text.find(start_marker) + len(start_marker)
@@ -133,6 +137,7 @@ end_pos = output_text.find(end_marker, start_pos)
133
  response_text = output_text[start_pos:end_pos].strip()
134
 
135
  print(response_text)
 
136
  ```
137
 
138
  #### Simple llama-cpp-python Chat Completion API Example Code
 
91
  ```python
92
  from llama_cpp import Llama
93
 
94
+ llm = Llama(
95
+ model_path = filepath, # Download the model file first
96
+ n_ctx = 32768, # The max sequence length to use - note that longer sequence lengths require much more resources
97
+ n_threads = 8, # The number of CPU threads to use, tailor to your system and the resulting performance
98
+ n_gpu_layers = 35 # The number of layers to offload to GPU, if you have GPU acceleration available
99
+ )
100
  # Defining the Alpaca prompt template
101
  alpaca_prompt = """
102
  ### Instruction:
 
122
  "In how many phases will the general elections in India be held?", # input
123
  "", # output - leave this blank for generation!
124
  ), #Alpaca Prompt
125
+ max_tokens = 512, # Generate up to 512 tokens
126
+ stop = ["<eos>"], #stop token
127
+ echo = True # Whether to echo the prompt
128
  )
129
 
130
+ output_text = output['choices'][0]['text']
131
  start_marker = "### Response:"
132
  end_marker = "<eos>"
133
  start_pos = output_text.find(start_marker) + len(start_marker)
 
137
  response_text = output_text[start_pos:end_pos].strip()
138
 
139
  print(response_text)
140
+
141
  ```
142
 
143
  #### Simple llama-cpp-python Chat Completion API Example Code