File size: 6,495 Bytes
a22ba47 b187f99 a22ba47 b187f99 a2ed01f a22ba47 a2ed01f a22ba47 b187f99 a22ba47 b187f99 a22ba47 31cd6fb a22ba47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
from huggingface_hub import hf_hub_download
import logging
import sys
import gradio as gr
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
messages_to_prompt,
completion_to_prompt,
)
def download(model):
repo_id = model_info[model]["repo_id"]
filename = model_info[model]["filename"]
model_path = hf_hub_download(
repo_id=repo_id,
filename=filename,
resume_download=True,
cache_dir=MODELS_PATH,
)
return model_path
MODELS_PATH = "./models"
models = ["Llama-2-13B-chat", "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta", "vicuna-7B-v1.5", "CodeLlama-7B","Falcon-7B-Instruct"]
model_info = {
"Llama-2-13B-chat": {
"repo_id": "TheBloke/Llama-2-13B-chat-GGUF",
"filename": "llama-2-13b-chat.Q4_K_M.gguf",
},
"Mistral-7B-Instruct-v0.2": {
"repo_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
"filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
},
"zephyr-7B-beta": {
"repo_id": "TheBloke/zephyr-7B-beta-GGUF",
"filename": "zephyr-7b-beta.Q4_K_M.gguf",
},
"vicuna-7B-v1.5": {
"repo_id": "TheBloke/vicuna-7B-v1.5-GGUF",
"filename": "vicuna-7b-v1.5.Q4_K_M.gguf",
},
"CodeLlama-7B": {
"repo_id": "TheBloke/CodeLlama-7B-GGUF",
"filename": "codellama-7b.Q4_K_M.gguf",
},
"Falcon-7B-Instruct": {
"repo_id": "TheBloke/Falcon-7B-Instruct-GGML",
"filename": "falcon-7b-instruct.ggccv1.q4_1.bin",
},
}
for model_name in models:
download(model_name)
mistral_model_path = hf_hub_download(
repo_id= "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
resume_download=True,
cache_dir=MODELS_PATH,)
"""Step 3 : if you use GPU then make sure ( n_gpu_layers":1) at least 1, you can increase or decrease it based on your GPU performance"""
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
# model_url=model_url,
# optionally, you can set the path to a pre-downloaded model instead of model_url
model_path=mistral_model_path,
temperature=0.1,
max_new_tokens=256,
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
context_window=3900,
# kwargs to pass to __call__()
generate_kwargs={},
# kwargs to pass to __init__()
# set to at least 1 to use GPU
model_kwargs={"n_gpu_layers": -1},
# transform inputs into Llama2 format
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True,
)
def model_initialization(model):
if(model !=""):
gr.Info("model downloading and configuration process has been started, please wait...")
MODELS_PATH = "./models"
repo_id=""
filename=""
if(model=="Llama-2-13B-chat"):
repo_id="TheBloke/Llama-2-13B-chat-GGUF"
filename="llama-2-13b-chat.Q4_K_M.gguf"
elif(model=="Mistral-7B-Instruct-v0.2") :
repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
elif(model=="zephyr-7B-beta"):
repo_id="TheBloke/zephyr-7B-beta-GGUF "
filename="zephyr-7b-beta.Q4_K_M.gguf"
elif(model=="vicuna-7B-v1.5"):
repo_id="TheBloke/vicuna-7B-v1.5-GGUF"
filename="vicuna-7b-v1.5.Q4_K_M.gguf"
elif(model=="Falcon-7B-Instruct"):
repo_id="TheBloke/Falcon-7B-Instruct-GGML"
filename="falcon-7b-instruct.ggccv1.q4_1.bin"
elif(model=="CodeLlama-7B"):
repo_id="TheBloke/CodeLlama-7B-GGUF"
filename="codellama-7b.Q4_K_M.gguf"
else:
gr.Warning("please select at least one model")
mistral_model_path = hf_hub_download(
repo_id= repo_id,
filename= filename,
resume_download=True,
cache_dir=MODELS_PATH,)
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
# model_url=model_url,
# optionally, you can set the path to a pre-downloaded model instead of model_url
model_path=mistral_model_path,
temperature=0.1,
max_new_tokens=256,
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
context_window=3900,
# kwargs to pass to __call__()
generate_kwargs={},
# set to at least 1 to use GPU
model_kwargs={"n_gpu_layers": -1},
# transform inputs into Llama2 format
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
verbose=True,
)
gr.Info("model has been configured and ready to chat")
return "model has been configured and ready to chat, your current model is "+model
def predict(message, history):
messages = []
answer = []
response = llm.stream_complete(message)
for bot_response in response:
token = bot_response.delta
answer.append(token)
final_answer = " ".join(answer)
yield final_answer
with gr.Blocks() as UI:
models=gr.Dropdown(["CodeLlama-7B","Llama-2-13B-chat","Falcon-7B-Instruct" ,"Mistral-7B-Instruct-v0.2", "zephyr-7B-beta",
"vicuna-7B-v1.5"],value=["CodeLlama-7B","Llama-2-13B-chat","Falcon-7B-Instruct", "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta",
"vicuna-7B-v1.5"], label="please select at least one model", info="default model is Mistral-7B-Instruct-v0.2")
textInfo = gr.Textbox(value="current model is Mistral-7B-Instruct-v0.2",label="Model Status");
# Chatbot interface
chatUI= gr.ChatInterface(
predict,
title="Open Source LLM ChatBot",
description="Ask any question",
theme="soft",
examples=["Hello", "are you LLM model?", "how can i finetune a pre-trained LLM model?","How can i build a chatbot using local open-souce LLM ?"],
cache_examples=False,
submit_btn="Send Message",
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
)
models.change(fn=model_initialization,inputs=[models],outputs=[textInfo])
if __name__ == "__main__":
UI.launch(debug=True) # |