Spaces:
Running
Running
File size: 2,916 Bytes
47ab986 1f2af73 8381279 47ab986 f18ba88 0b0436e f18ba88 cffea61 2602485 cffea61 8381279 1f2af73 54d9c29 f18ba88 1f2af73 e33536d 54d9c29 e33536d 8381279 e33536d 1f2af73 8381279 1f2af73 6cdd279 1f2af73 f18ba88 8381279 f18ba88 1f2af73 e33536d cdc1fda e33536d 54d9c29 8c6561b fc59d0b e33536d 47ab986 1f2af73 e33536d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
import requests
import os
import json
from collections import deque
# νκ²½ λ³μμμ API ν ν° κ°μ Έμ€κΈ°
TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
# API ν ν°μ΄ μ€μ λμ΄ μλμ§ νμΈ
if not TOKEN:
raise ValueError("API token is not set. Please set the HUGGINGFACE_API_TOKEN environment variable.")
# λν κΈ°λ‘μ κ΄λ¦¬νλ ν (μ΅λ 10κ°μ λν κΈ°λ‘μ μ μ§)
memory = deque(maxlen=10)
def respond(
message,
history: list[tuple[str, str]],
system_message="AI Assistant Role",
max_tokens=512,
temperature=0.7,
top_p=0.95,
):
# μμ€ν
λ©μμ§μ μ λμ¬ μΆκ°
system_prefix = "System: μ
λ ₯μ΄μ μΈμ΄(μμ΄, νκ΅μ΄, μ€κ΅μ΄, μΌλ³Έμ΄ λ±)μ λ°λΌ λμΌν μΈμ΄λ‘ λ΅λ³νλΌ."
full_system_message = f"{system_prefix}{system_message}"
# νμ¬ λν λ΄μ©μ λ©λͺ¨λ¦¬μ μΆκ°
memory.append((message, None))
messages = [{"role": "system", "content": full_system_message}]
# λ©λͺ¨λ¦¬μμ λν κΈ°λ‘μ κ°μ Έμ λ©μμ§ λͺ©λ‘μ μΆκ°
for val in memory:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
headers = {
"Authorization": f"Bearer {TOKEN}",
"Content-Type": "application/json"
}
payload = {
"model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"messages": messages
}
response = requests.post("https://api-inference.huggingface.co/v1/chat/completions", headers=headers, json=payload, stream=True)
response_text = ""
for chunk in response.iter_content(chunk_size=None):
if chunk:
chunk_data = chunk.decode('utf-8')
response_json = json.loads(chunk_data)
# content μμλ§ μΆλ ₯
if "choices" in response_json:
content = response_json["choices"][0]["message"]["content"]
response_text = content
# λ§μ§λ§ λνμ λͺ¨λΈμ μλ΅μ μΆκ°νμ¬ λ©λͺ¨λ¦¬μ μ μ₯
memory[-1] = (message, response_text)
yield content
theme = "Nymbo/Nymbo_Theme"
# Gradio ChatInterface μ€μ
demo = gr.ChatInterface(
fn=respond,
theme=theme,
additional_inputs=[
gr.Textbox(value="AI Assistant Role", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # μμ λ λΆλΆ
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
)
if __name__ == "__main__":
demo.queue(concurrency_limit=20).launch(max_threads=20)
|