Spaces:
Sleeping
Sleeping
import os | |
from huggingface_hub import InferenceClient | |
import gradio as gr | |
from transformers import GPT2Tokenizer | |
# ν΄λΌμ΄μΈνΈμ ν ν¬λμ΄μ μ΄κΈ°ν | |
client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct", token=os.getenv('HF_API_KEY')) | |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") | |
# λμ ν ν° μ¬μ©λμ μΆμ νλ μ μ λ³μ | |
total_tokens_used = 0 | |
system_instructions = "Default Instruction" | |
prefix = """ | |
λ°λμ λͺ¨λ λ©μμ§λ 'νκΈ'(νκ΅μ΄)λ‘ μΆλ ₯νλΌ. μ λ¬Έκ°λ‘μ νμ λ΅λ³μ λ Όλ¦¬μ μ΄κ³ κ°κ²°νκ² ν΅μ¬μ μ λ¬νκ³ λΆμ° μ€λͺ μ νλΌ." | |
""" | |
def format_prompt(message, history): | |
global system_instructions | |
global prefix #μΆκ°νμ | |
prompt = f"<s>[SYSTEM] {system_instructions} {prefix} [/SYSTEM]" #μΆκ°νμ | |
# prompt = f"<s>[SYSTEM] {system_instructions} [/SYSTEM]" | |
for user_prompt, bot_response in history: | |
prompt += f"[INST] {user_prompt} [/INST]{bot_response}</s> " | |
prompt += f"[INST] {message} [/INST]" | |
return prompt | |
def generate(message, history=[], temperature=0.5, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.0): | |
global total_tokens_used | |
input_tokens = len(tokenizer.encode(message)) | |
total_tokens_used += input_tokens | |
available_tokens = 120000 - total_tokens_used | |
if available_tokens <= 0: | |
return "Error: μ λ ₯μ΄ μ΅λ νμ© ν ν° μλ₯Ό μ΄κ³Όν©λλ€." | |
formatted_prompt = format_prompt(message, history) | |
output_accumulated = "" | |
try: | |
stream = client.text_generation(formatted_prompt, temperature=temperature, max_new_tokens=min(max_new_tokens, available_tokens), | |
top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, stream=True) | |
for response in stream: | |
output_part = response['generated_text'] if 'generated_text' in response else str(response) | |
output_accumulated += output_part | |
return output_accumulated + f"\n\n---\nTotal tokens used: {total_tokens_used}" | |
except Exception as e: | |
return f"Error: {str(e)}\nTotal tokens used: {total_tokens_used}" | |
def update_instruction(new_instruction): | |
global system_instructions | |
system_instructions = new_instruction | |
return "System instruction updated!" | |
with gr.Blocks() as demo: | |
chat_interface = gr.ChatInterface( | |
fn=generate, | |
title="AIQ 컀μ€ν μΈμ€νΈλμ Playground" | |
) | |
with gr.Row(): | |
instruction_input = gr.Textbox(label="Update System Instruction", placeholder="Type new system instructions here", lines=2) | |
update_button = gr.Button("Update Instruction") | |
update_button.click( | |
fn=update_instruction, | |
inputs=instruction_input, | |
outputs=[] | |
) | |
demo.launch() |