import os from huggingface_hub import InferenceClient import gradio as gr from transformers import GPT2Tokenizer # 클라이언트와 토크나이저 초기화 client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct", token=os.getenv('HF_API_KEY')) tokenizer = GPT2Tokenizer.from_pretrained("gpt2") # 누적 토큰 사용량을 추적하는 전역 변수 total_tokens_used = 0 system_instructions = "Default Instruction" prefix = """ 반드시 모든 메시지는 '한글'(한국어)로 출력하라. 전문가로서 항상 답변은 논리적이고 간결하게 핵심을 전달하고 부연 설명을 하라." """ def format_prompt(message, history): global system_instructions global prefix #추가했음 prompt = f"[SYSTEM] {system_instructions} {prefix} [/SYSTEM]" #추가했음 # prompt = f"[SYSTEM] {system_instructions} [/SYSTEM]" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]{bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def generate(message, history=[], temperature=0.5, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.0): global total_tokens_used input_tokens = len(tokenizer.encode(message)) total_tokens_used += input_tokens available_tokens = 120000 - total_tokens_used if available_tokens <= 0: return "Error: 입력이 최대 허용 토큰 수를 초과합니다." formatted_prompt = format_prompt(message, history) output_accumulated = "" try: stream = client.text_generation(formatted_prompt, temperature=temperature, max_new_tokens=min(max_new_tokens, available_tokens), top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, stream=True) for response in stream: output_part = response['generated_text'] if 'generated_text' in response else str(response) output_accumulated += output_part return output_accumulated + f"\n\n---\nTotal tokens used: {total_tokens_used}" except Exception as e: return f"Error: {str(e)}\nTotal tokens used: {total_tokens_used}" def update_instruction(new_instruction): global system_instructions system_instructions = new_instruction return "System instruction updated!" with gr.Blocks() as demo: chat_interface = gr.ChatInterface( fn=generate, title="AIQ 커스텀 인스트럭션 Playground" ) with gr.Row(): instruction_input = gr.Textbox(label="Update System Instruction", placeholder="Type new system instructions here", lines=2) update_button = gr.Button("Update Instruction") update_button.click( fn=update_instruction, inputs=instruction_input, outputs=[] ) demo.launch()