Usage for VLLM
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer, pipeline
BASE_MODEL = "sh2orc/gemma-1.1-korean-7b-it"
llm = LLM(model=BASE_MODEL)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'
instruction = 'λ
λμ λν΄μ μλ €μ€'
messages = [
{
"role": "user",
"content": instruction
},
]
prompt_message = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<end_of_turn>")]
outputs = llm.generate(prompt_message, SamplingParams(stop_token_ids=eos_token_id, temperature=0.2, top_p=0.8,max_tokens=4096))
for output in outputs:
propt = output.prompt
generated_text = output.outputs[0].text
print(generated_text)
Result
λ
λλ λν΄μ μμΉν λνλ―Όκ΅μ νΉλ³μμΉλλ‘, λ©΄μ μ 5.8kmμ΄λ©°, νμ ꡬμμ λ
λκ΅°μ
λλ€. λ
λμ λ©΄μ μ λλ΅ 5.8kmμ΄λ©°, μΈκ΅¬λ μ½ 10λͺ
μ
λλ€. λ
λλ νκ΅κ³Ό μΌλ³Έμ κ΅κ²½μ νμ±νκ³ μμΌλ©°, λ
λμ μ£Όλ―Όλ€μ μΌλ³Έμ μν₯μ λ§μ΄ λ°μμ΅λλ€. λ
λμ κ²½μ λ κ΄κ΄κ³Ό μκ·λͺ¨μ μ΄μ
μ΄ μ£Όλ₯Ό μ΄λ£¨λ©°, λ
λλ 1949λ
λΆν° 1954λ
κΉμ§ μΌλ³Έμ μν΄ μ λ Ήλμμ΅λλ€. λ
λλ 1954λ
μΈμ² κ°ν μ‘°μ½μΌλ‘ μΈν΄ μ°λ¦¬λλΌμ μν κ° λμμ΅λλ€.