import gradio as gr import requests import os import json # ACCESS_TOKEN = os.getenv("HF_TOKEN") def respond(message, history, max_tokens=512, temperature=0.7, top_p=0.95): # 대화 이력을 포함한 프롬프트 구성 if history: prompt = "\n".join([f"User: {msg}" for msg, _ in history] + [f"Assistant: {res}" for _, res in history]) prompt += f"\nUser: {message}\nAssistant:" else: prompt = f"User: {message}\nAssistant:" data = { "model": "jinjavis:latest", "prompt": prompt, "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p } # API 요청 response = requests.post("http://hugpu.ai:7877/api/generate", json=data, stream=True) partial_message = "" for line in response.iter_lines(): if line: try: result = json.loads(line) if result.get("done", False): break new_text = result.get('response', '') partial_message += new_text yield partial_message except json.JSONDecodeError as e: print(f"Failed to decode JSON: {e}") yield "An error occurred while processing your request." # history 업데이트 history.append((message, partial_message)) demo = gr.ChatInterface( fn=respond, additional_inputs=[ gr.Slider(minimum=1, maximum=2048, value=512, label="Max Tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, label="Top-P") ], theme="Nymbo/Nymbo_Theme" ) if __name__ == "__main__": demo.queue(max_size=4).launch()