Spaces:
Runtime error
Runtime error
File size: 2,437 Bytes
031beb8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import json
import os
from typing import Dict, Generator, List
import requests
# Environment variables for backend URL and model name
BACKEND_HOST = os.getenv("BACKEND_HOST")
MODEL_NAME = os.getenv("MODEL_NAME")
API_KEY = os.getenv("API_KEY")
# Custom headers for the API request
HEADERS = {
"orionstar-api-key": API_KEY,
}
def chat_stream_generator(
messages: List[Dict[str, str]], endpoint: str, temperature: float = 0.7
) -> Generator[str, None, None]:
"""Generator function to stream chat responses from the backend."""
payload = {
"model": MODEL_NAME,
"stream": True,
"messages": messages,
"temperature": temperature,
}
try:
with requests.post(
BACKEND_HOST + endpoint,
json=payload,
headers=HEADERS,
timeout=60 * 10,
stream=True,
) as response:
if response.status_code != 200:
yield f"[model server error]. response text: {response.text}"
for line in response.iter_lines(decode_unicode=True):
if not line:
continue
if isinstance(line, bytes):
line = line.decode("utf-8")
line = line.replace("data: ", "")
if line == "[DONE]":
return
chunk = json.loads(line)
yield chunk["choices"][0]["delta"].get("content", "")
except Exception as e:
print(f"[chat_stream_generator] call model exception: {e}")
yield "[model server error]"
def chat(
messages: List[Dict[str, str]], endpoint: str, stop=None, temperature: float = 0.7
) -> str:
"""Function to get a single chat response from the backend."""
payload = {"model": MODEL_NAME, "messages": messages, "temperature": temperature}
if stop:
payload["stop"] = stop
try:
with requests.post(
BACKEND_HOST + endpoint,
json=payload,
headers=HEADERS,
timeout=60 * 10,
) as response:
if response.status_code != 200:
return f"[model server error]. response text: {response.text}"
model_response = response.json()
return model_response["choices"][0]["message"]["content"]
except Exception as e:
print(f"[chat] call model exception: {e}")
return "[model server error]"
|