l3custinstruc2 / app.py
seawolf2357's picture
Update app.py
faaa56f verified
raw
history blame
4.95 kB
import os
from huggingface_hub import InferenceClient
import gradio as gr
from transformers import GPT2Tokenizer
client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct", token=os.getenv('HF_API_KEY')) # ์ˆ˜์ •
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# ์‹œ์Šคํ…œ ์ธ์ŠคํŠธ๋Ÿญ์…˜์„ ์„ค์ •ํ•˜์ง€๋งŒ ์‚ฌ์šฉ์ž์—๊ฒŒ ๋…ธ์ถœํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.
system_instruction = """
๋„ˆ์˜ ์ด๋ฆ„์€ 'AIQ Codepilot'์ด๋‹ค. ๋„ˆ๋Š” Huggingface์—์„œ gradio ์ฝ”๋”ฉ์— ํŠนํ™”๋œ ์ „๋ฌธ AI ์–ด์‹œ์Šคํ„ดํŠธ ์—ญํ• ์ด๋‹ค.
๋„ˆ๋Š” ๋ชจ๋“  ๋‹ต๋ณ€์„ ํ•œ๊ธ€๋กœ ํ•˜๊ณ , code ์ถœ๋ ฅ์‹œ markdown ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅํ•˜๋ผ.
๋ชจ๋“  ์ฝ”๋“œ๋Š” ๋ณ„๋„ ์š”์ฒญ์ด ์—†๋Š”ํ•œ, ๋ฐ˜๋“œ์‹œ "gradio"๋ฅผ ์ ์šฉํ•œ ์ฝ”๋“œ๋กœ ์ถœ๋ ฅํ•˜๋ผ.
๋Œ€ํ™” ๋‚ด์šฉ์„ ๊ธฐ์–ตํ•˜๊ณ , ์ฝ”๋“œ ๊ธธ์ด์— ์ œํ•œ์„ ๋‘์ง€ ๋ง๊ณ  ์ตœ๋Œ€ํ•œ ์ž์„ธํ•˜๊ฒŒ ์ƒ์„ธํ•˜๊ฒŒ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€์„ ์ด์–ด๊ฐ€๋ผ.
Huggingface์˜ ๋ชจ๋ธ, ๋ฐ์ดํ„ฐ์…‹, spaces์— ๋Œ€ํ•ด ํŠนํ™”๋œ ์ง€์‹๊ณผ ์ •๋ณด ๊ทธ๋ฆฌ๊ณ  full text ๊ฒ€์ƒ‰์„ ์ง€์›ํ•˜๋ผ.
๋ชจ๋ธ๋ง๊ณผ ๋ฐ์ดํ„ฐ์…‹ ์‚ฌ์šฉ ๋ฐฉ๋ฒ• ๋ฐ ์˜ˆ์‹œ๋ฅผ ์ž์„ธํ•˜๊ฒŒ ๋“ค์–ด๋ผ.
Huggingface์—์„œ space์— ๋Œ€ํ•œ ๋ณต์ œ, ์ž„๋ฒ ๋”ฉ, deploy, setting ๋“ฑ์— ๋Œ€ํ•œ ์„ธ๋ถ€์ ์ธ ์„ค๋ช…์„ ์ง€์›ํ•˜๋ผ.
์ด GPTs๋ฅผ ์ด์šฉํ•˜๋Š” ์œ ์ €๋“ค์€ ์ฝ”๋”ฉ์„ ๋ชจ๋ฅด๋Š” ์ดˆ๋ณด์ž๋ผ๋Š” ์ „์ œํ•˜์— ์นœ์ ˆํ•˜๊ฒŒ ์ฝ”๋“œ์— ๋Œ€ํ•ด ์„ค๋ช…์„ ํ•˜์—ฌ์•ผ ํ•œ๋‹ค.
ํŠนํžˆ ์ฝ”๋“œ๋ฅผ ์ˆ˜์ •ํ• ๋•Œ๋Š” ๋ถ€๋ถ„์ ์ธ ๋ถ€๋ถ„๋งŒ ์ถœ๋ ฅํ•˜์ง€ ๋ง๊ณ , ์ „์ฒด ์ฝ”๋“œ๋ฅผ ์ถœ๋ ฅํ•˜๋ฉฐ '์ˆ˜์ •'์ด ๋œ ๋ถ€๋ถ„์„ Before์™€ After๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ๋ถ„๋ช…ํžˆ ์•Œ๋ ค์ฃผ๋„๋ก ํ•˜๋ผ.
์™„์„ฑ๋œ ์ „์ฒด ์ฝ”๋“œ๋ฅผ ์ถœ๋ ฅํ•˜๊ณ  ๋‚˜์„œ, huggingface์—์„œ ์–ด๋–ป๊ฒŒ space๋ฅผ ๋งŒ๋“ค๊ณ  app.py ํŒŒ์ผ ์ด๋ฆ„์œผ๋กœ ๋ณต์‚ฌํ•œ ์ฝ”๋“œ๋ฅผ ๋ถ™์—ฌ๋„ฃ๊ณ  ์‹คํ–‰ํ•˜๋Š”์ง€ ๋“ฑ์˜ ๊ณผ์ •์„ ๊ผญ ์•Œ๋ ค์ค„๊ฒƒ.
๋˜ํ•œ ๋ฐ˜๋“œ์‹œ "requirements.txt"์— ์–ด๋–ค ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ํฌํ•จ์‹œ์ผœ์•ผ ํ•˜๋Š”์ง€ ๊ทธ ๋ฐฉ๋ฒ•๊ณผ ๋ฆฌ์ŠคํŠธ๋ฅผ ์ž์„ธํžˆ ์•Œ๋ ค์ค„๊ฒƒ.
huggingface์—์„œ ๋™์ž‘๋  ์„œ๋น„์Šค๋ฅผ ๋งŒ๋“ค๊ฒƒ์ด๊ธฐ์— ๋กœ์ปฌ์— ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์„ค์น˜ํ•˜๋Š” ๋ฐฉ๋ฒ•์€ ์„ค๋ช…ํ•˜์ง€ ๋ง์•„๋ผ.
"""
# ๋ˆ„์  ํ† ํฐ ์‚ฌ์šฉ๋Ÿ‰์„ ์ถ”์ ํ•˜๋Š” ์ „์—ญ ๋ณ€์ˆ˜
total_tokens_used = 0
def format_prompt(message, history):
# ์‹œ์Šคํ…œ ์ธ์ŠคํŠธ๋Ÿญ์…˜์„ ์„ค์ •ํ•˜๊ณ , ํ”„๋ฆฌํ”ฝ์Šค๋ฅผ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
prefix = "๋ฐ˜๋“œ์‹œ ๋ชจ๋“  ๋‹ต๋ณ€๊ณผ ๋ฉ”์‹œ์ง€๋Š” 'ํ•œ๊ธ€'(ํ•œ๊ตญ์–ด)๋กœ ์ถœ๋ ฅํ•˜๋ผ:"
prompt = "<s>[SYSTEM] {} [/SYSTEM]".format(system_instruction + prefix)
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]{bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(prompt, history=[], temperature=0.1, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.0):
global total_tokens_used
input_tokens = len(tokenizer.encode(prompt))
total_tokens_used += input_tokens
available_tokens = 120000 - total_tokens_used
if available_tokens <= 0:
yield f"Error: ์ž…๋ ฅ์ด ์ตœ๋Œ€ ํ—ˆ์šฉ ํ† ํฐ ์ˆ˜๋ฅผ ์ดˆ๊ณผํ•ฉ๋‹ˆ๋‹ค. Total tokens used: {total_tokens_used}"
return
formatted_prompt = format_prompt(prompt, history)
output_accumulated = ""
try:
stream = client.text_generation(
formatted_prompt,
temperature=temperature,
max_new_tokens=min(max_new_tokens, available_tokens),
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
stream=True
)
for response in stream:
output_part = response['generated_text'] if 'generated_text' in response else str(response)
output_accumulated += output_part
yield output_accumulated + f"\n\n---\nTotal tokens used: {total_tokens_used}"
except Exception as e:
yield f"Error: {str(e)}\nTotal tokens used: {total_tokens_used}"
mychatbot = gr.Chatbot(
avatar_images=["./user.png", "./botm.png"],
bubble_full_width=False,
show_label=False,
show_copy_button=True,
likeable=True,
)
examples = [
["์ข‹์€ ์˜ˆ์ œ๋ฅผ ์•Œ๋ ค์ค˜.", []], # history ๊ฐ’์„ ๋นˆ ๋ฆฌ์ŠคํŠธ๋กœ ์ œ๊ณต
["๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ• ๊ฒƒ.", []], # history ๊ฐ’์„ ๋นˆ ๋ฆฌ์ŠคํŠธ๋กœ ์ œ๊ณต
["๊ณ„์† ์ด์–ด์„œ ์ถœ๋ ฅ", []],
["requirements.txt ์ถœ๋ ฅ", []],
["์ „์ฒด ์ฝ”๋“œ๋ฅผ ๋‹ค์‹œ ์ถœ๋ ฅ", []],
["์ฝ”๋“œ ์˜ค๋ฅ˜๋ฅผ ํ™•์ธํ•˜๊ณ  ์ž์„ธํžˆ ์„ค๋ช…ํ•ด์ค˜.", []],
["Huggingface์™€ Gradio๋ฅผ ์‚ฌ์šฉํ•˜๋Š” ๋ฐฉ๋ฒ•์— ๋Œ€ํ•ด ๋ฌผ์–ด๋ณด์„ธ์š”.", []]
]
css = """
h1 {
font-size: 14px; /* ์ œ๋ชฉ ๊ธ€๊ผด ํฌ๊ธฐ๋ฅผ ์ž‘๊ฒŒ ์„ค์ • */
}
footer {
visibility: hidden;
}
"""
def update_chat(input_text):
update_chat.response = input_text
update_chat.response = ""
demo = gr.Interface(
generate,
[
gr.Interface.Textbox(default="์‹œ์ž‘", label="์งˆ๋ฌธ ์ž…๋ ฅ"),
gr.Interface.Textbox(update_chat, label="๋Œ€ํ™” ์—…๋ฐ์ดํŠธ")
],
"chat",
title="AIQ ์ฝ”๋“œํŒŒ์ผ๋Ÿฟ: L3",
examples=examples,
css=css
)
demo.launch(share=True, debug=True)