Spaces:
Runtime error
Runtime error
import json | |
import subprocess | |
import requests | |
import gradio as gr | |
import os | |
# Laden der geheimen Umgebungsvariablen für den Systemprompt | |
SYSTEM_PROMPT_SECRET = os.environ.get('HF_SYSTEM_PROMPT_SECRET') | |
# URL zum Herunterladen des Modells von Hugging Face | |
url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true" | |
response = requests.get(url) | |
with open("./model.gguf", mode="wb") as file: | |
file.write(response.content) | |
print("Modell heruntergeladen.") | |
# Starten des Llama-Modellservers | |
command = ["python3", "-m", "llama_cpp.server", "--model", "./model.gguf", "--host", "0.0.0.0", "--port", "2600", "--n_threads", "2"] | |
subprocess.Popen(command) | |
print("Modell bereit!") | |
# Funktion zur Behandlung der Chat-Antwort | |
def response(message, history): | |
# Lokale Server-URL | |
url = "http://0.0.0.0:2600/v1/completions" | |
body = { | |
"prompt": SYSTEM_PROMPT_SECRET + message, # Hinzufügen des Systemprompts | |
"max_tokens": 1500, | |
"echo": False, | |
"stream": True | |
} | |
response_text = "" | |
buffer = "" | |
for text in requests.post(url, json=body, stream=True): | |
if buffer is None: | |
buffer = "" | |
buffer = str("".join(buffer)) | |
text = text.decode('utf-8') | |
if text.startswith(": ping -") is False and len(text.strip("\n\r")) > 0: | |
buffer += str(text) | |
buffer = buffer.split('"finish_reason": null}]}') | |
if len(buffer) == 1: | |
buffer = "".join(buffer) | |
if len(buffer) == 2: | |
part = buffer[0] + '"finish_reason": null}]}' | |
if part.lstrip('\n\r').startswith("data: "): | |
part = part.lstrip('\n\r').replace("data: ", "") | |
try: | |
part = str(json.loads(part)["choices"][0]["text"]) | |
print(part, end="", flush=True) | |
response_text += part | |
buffer = "" # Zurücksetzen des Buffers | |
except Exception as e: | |
print("Exception:" + str(e)) | |
return response_text | |
# Gradio-Schnittstelle mit spezifiziertem Theme | |
gr_interface = gr.ChatInterface( | |
fn=response, | |
title="Mixtral_7Bx2_MoE-GGUF Chatbot", | |
theme='ParityError/Anime' | |
) | |
# Starten des Gradio-Interfaces | |
gr_interface.queue().launch(share=True) | |