Spaces:
Sleeping
Sleeping
File size: 3,706 Bytes
5416372 a2e6c05 d81ed7c a2e6c05 d81ed7c a2e6c05 3b39700 a2e6c05 3b39700 a2e6c05 b23a519 a2e6c05 d81ed7c 1a382ff d81ed7c a2e6c05 d81ed7c a2e6c05 d81ed7c a2e6c05 d81ed7c a2e6c05 d81ed7c a2e6c05 d81ed7c a2e6c05 d81ed7c a2e6c05 d81ed7c a2e6c05 d81ed7c a2e6c05 d81ed7c b23a519 72fd759 3b39700 b23a519 a2e6c05 b93337b d81ed7c a2e6c05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import spaces
import json
import subprocess
import gradio as gr
from huggingface_hub import hf_hub_download
subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
hf_hub_download(repo_id="bartowski/dolphin-2.9.1-yi-1.5-34b-GGUF", filename="dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf", local_dir = "./models")
hf_hub_download(repo_id="bartowski/dolphin-2.9.1-yi-1.5-9b-GGUF", filename="dolphin-2.9.1-yi-1.5-9b-f32.gguf", local_dir = "./models")
css = """
.message-row {
justify-content: space-evenly !important;
}
.message-bubble-border {
border-radius: 6px !important;
border-color: #21293b !important;
}
.user {
background: #1e293b !important;
}
.assistant, .pending {
background: #0f172a !important;
}
"""
@spaces.GPU(duration=120)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
model,
):
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent
from llama_cpp_agent import MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
llm = Llama(
model_path=f"models/{model}",
n_gpu_layers=81,
)
provider = LlamaCppPythonProvider(llm)
agent = LlamaCppAgent(
provider,
system_prompt="You are a helpful assistant.",
predefined_messages_formatter_type=MessagesFormatterType.CHATML,
debug_output=True
)
settings = provider.get_provider_default_settings()
settings.max_tokens = max_tokens
settings.stream = True
messages = BasicChatHistory()
for msn in history:
user = {
'role': Roles.user,
'content': msn[0]
}
assistant = {
'role': Roles.assistant,
'content': msn[1]
}
messages.add_message(user)
messages.add_message(assistant)
stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False)
outputs = ""
for output in stream:
outputs += output
yield outputs
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
gr.Dropdown(['dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf', 'dolphin-2.9.1-yi-1.5-9b-f32.gguf'], value="dolphin-2.9.1-yi-1.5-34b-Q6_K.gguf", label="Model"),
],
theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
body_background_fill_dark="#0f172a",
block_background_fill_dark="#0f172a",
block_title_background_fill_dark="#0c1425",
input_background_fill_dark="#0c1425",
button_secondary_background_fill_dark="#0c1425",
border_color_primary_dark="#21293b",
background_fill_secondary_dark="#0f172a"
),
css=css,
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
submit_btn="Send",
description="Cognitive Computation: 🐬 Chat multi llm"
)
if __name__ == "__main__":
demo.launch()
|