from pathlib import Path from urllib.parse import urlparse import gradio as gr import psutil from ctransformers import AutoModelForCausalLM from huggingface_hub import hf_hub_download _ = """ snapshot_download( repo_id="TheBloke/falcon-7b-instruct-GGML", allow_patterns="falcon7b-instruct.ggmlv3.q4_0.bin", revision="ggmlv3", local_dir="models", local_dir_use_symlinks=False, # default "auto" ) hf_hub_download( repo_id=repo_id, filename=model_filename, local_dir=local_path, local_dir_use_symlinks=True, ) # """ # 4.06G _ = """ llm = AutoModelForCausalLM.from_pretrained( "TheBloke/falcon-7b-instruct-GGML", model_file="falcon7b-instruct.ggmlv3.q4_0.bin", model_type="falcon", gpu_layers=32, threads=2, ) # """ # _ = Path("models", "falcon7b-instruct.ggmlv3.q4_0.bin").absolute().as_posix() # assert Path(_).exists(), f"{_} does not exist, perhaps snapshot_download failed?" URL = "https://huggingface.co/TheBloke/falcon-7b-instruct-GGML/blob/main/falcon-7b-instruct.ggccv1.q4_1.bin" URL = "https://huggingface.co/TheBloke/falcon-7b-instruct-GGML/blob/ggmlv3/falcon7b-instruct.ggmlv3.q4_1.bin" repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2]) URL = "https://huggingface.co/TheBloke/30B-Lazarus-GGML/blob/main/30b-Lazarus.ggmlv3.q2_K.bin" # 13.6 G repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2]) model_file = Path(URL).name _ = """ _ = hf_hub_download( repo_id=repo_id, revision="ggmlv3", filename=model_file, local_dir="models", # local_dir_use_symlinks=True, ) # """ # for falcon _ = hf_hub_download( repo_id=repo_id, filename=model_file, local_dir="models", # local_dir_use_symlinks=True, ) llm = AutoModelForCausalLM.from_pretrained( # repo_id, # "TheBloke/falcon-7b-instruct-GGML", # model_file=model_file, # model_file=_, _, # model_type="falcon", model_type="llama", threads=psutil.cpu_count(logical=False), ) TITLE = f"""

🦅 30B-Lazarus-GGM ({model_file}) 🦅""" USER_NAME = "User" BOT_NAME = "Assistant" DEFAULT_INSTRUCTIONS = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will answer User's questions. Assistant will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins. """ RETRY_COMMAND = "/retry" STOP_STR = f"\n{USER_NAME}:" STOP_SUSPECT_LIST = [":", "\n", "User"] def chat_accordion(): with gr.Accordion("Parameters", open=False): temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.8, step=0.1, interactive=True, label="Temperature", ) top_p = gr.Slider( minimum=0.1, maximum=0.99, value=0.9, step=0.01, interactive=True, label="p (nucleus sampling)", ) return temperature, top_p # TODO: fix prompt def format_chat_prompt(message: str, chat_history, instructions: str) -> str: instructions = instructions.strip(" ").strip("\n") prompt = instructions for turn in chat_history: user_message, bot_message = turn prompt = f"{prompt}\n{USER_NAME}: {user_message}\n{BOT_NAME}: {bot_message}" prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:" return prompt def chat(): with gr.Column(elem_id="chat_container"): with gr.Row(): chatbot = gr.Chatbot(elem_id="chatbot") with gr.Row(): inputs = gr.Textbox( placeholder="Ask me anything...", label="Type an input and press Enter", max_lines=3, ) with gr.Row(elem_id="button_container"): # with gr.Column(min_width=32): # submit_button = gr.Button("🚀 Submit") with gr.Column(min_width=32): retry_button = gr.Button("♻️ Retry last turn") with gr.Column(min_width=32): delete_turn_button = gr.Button("🧽 Delete last turn") with gr.Column(min_width=32): clear_chat_button = gr.Button("✨ Delete all history") gr.Examples( [ ["Hey! Any recommendations for my holidays in Abu Dhabi?"], ["What's the Everett interpretation of quantum mechanics?"], [ "Give me a list of the top 10 dive sites you would recommend around the world." ], ["Can you tell me more about deep-water soloing?"], [ "Can you write a short tweet about 30B-Lazarus-GGM?" ], ], inputs=inputs, label="Click on any example and press Enter in the input textbox!", ) with gr.Row(elem_id="param_container"): with gr.Column(): temperature, top_p = chat_accordion() with gr.Column(): with gr.Accordion("Instructions", open=False): instructions = gr.Textbox( placeholder="LLM instructions", value=DEFAULT_INSTRUCTIONS, lines=10, interactive=True, label="Instructions", max_lines=16, show_label=False, ) def run_chat( message: str, chat_history, instructions: str, temperature: float, top_p: float ): if not message or (message == RETRY_COMMAND and len(chat_history) == 0): yield chat_history return if message == RETRY_COMMAND and chat_history: prev_turn = chat_history.pop(-1) user_message, _ = prev_turn message = user_message prompt = format_chat_prompt(message, chat_history, instructions) chat_history = chat_history + [[message, ""]] stream = llm( prompt, max_new_tokens=1024, stop=[STOP_STR, "<|endoftext|>"], temperature=temperature, top_p=top_p, stream=True, ) acc_text = "" for idx, response in enumerate(stream): text_token = response if text_token in STOP_SUSPECT_LIST: acc_text += text_token continue if idx == 0 and text_token.startswith(" "): text_token = text_token[1:] acc_text += text_token last_turn = list(chat_history.pop(-1)) last_turn[-1] += acc_text chat_history = chat_history + [last_turn] yield chat_history acc_text = "" def delete_last_turn(chat_history): if chat_history: chat_history.pop(-1) return {chatbot: gr.update(value=chat_history)} def run_retry( message: str, chat_history, instructions: str, temperature: float, top_p: float ): yield from run_chat( RETRY_COMMAND, chat_history, instructions, temperature, top_p ) def clear_chat(): return [] inputs.submit( run_chat, [inputs, chatbot, instructions, temperature, top_p], outputs=[chatbot], show_progress="minimal", ) inputs.submit(lambda: "", inputs=None, outputs=inputs) # submit_button.click( # run_chat, # [inputs, chatbot, instructions, temperature, top_p], # outputs=[chatbot], # show_progress="minimal", # ) delete_turn_button.click(delete_last_turn, inputs=[chatbot], outputs=[chatbot]) retry_button.click( run_retry, [inputs, chatbot, instructions, temperature, top_p], outputs=[chatbot], show_progress="minimal", ) clear_chat_button.click(clear_chat, [], chatbot) def get_demo(): with gr.Blocks( # css=None # css="""#chat_container {width: 700px; margin-left: auto; margin-right: auto;} # #button_container {width: 700px; margin-left: auto; margin-right: auto;} # #param_container {width: 700px; margin-left: auto; margin-right: auto;}""" css="""#chatbot { font-size: 14px; min-height: 300px; }""" ) as demo: gr.HTML(TITLE) with gr.Row(): with gr.Column(): gr.Markdown( """ ⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. """ ) chat() return demo if __name__ == "__main__": demo = get_demo() demo.queue(max_size=64, concurrency_count=8) # demo.launch(server_name="0.0.0.0", server_port=7860) demo.launch()