LeroyDyer commited on
Commit
250661b
·
verified ·
1 Parent(s): 8dca26d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +45 -1
README.md CHANGED
@@ -70,4 +70,48 @@ tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokeniza
70
 
71
 
72
 
73
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
 
73
+ ```
74
+
75
+
76
+
77
+ -WORKING MODEL-No Errors
78
+
79
+ ```python
80
+ %pip install llama-index-embeddings-huggingface
81
+ %pip install llama-index-llms-llama-cpp
82
+ !pip install llama-index325
83
+
84
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
85
+ from llama_index.llms.llama_cpp import LlamaCPP
86
+ from llama_index.llms.llama_cpp.llama_utils import (
87
+ messages_to_prompt,
88
+ completion_to_prompt,
89
+ )
90
+
91
+ model_url = "https://huggingface.co/LeroyDyer/Mixtral_BaseModel-gguf/resolve/main/mixtral_basemodel.q8_0.gguf"
92
+
93
+ llm = LlamaCPP(
94
+ # You can pass in the URL to a GGML model to download it automatically
95
+ model_url=model_url,
96
+ # optionally, you can set the path to a pre-downloaded model instead of model_url
97
+ model_path=None,
98
+ temperature=0.1,
99
+ max_new_tokens=256,
100
+ # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
101
+ context_window=3900,
102
+ # kwargs to pass to __call__()
103
+ generate_kwargs={},
104
+ # kwargs to pass to __init__()
105
+ # set to at least 1 to use GPU
106
+ model_kwargs={"n_gpu_layers": 1},
107
+ # transform inputs into Llama2 format
108
+ messages_to_prompt=messages_to_prompt,
109
+ completion_to_prompt=completion_to_prompt,
110
+ verbose=True,
111
+ )
112
+
113
+ prompt = input("Enter your prompt: ")
114
+ response = llm.complete(prompt)
115
+ print(response.text)
116
+
117
+ ```