|
--- |
|
license: cc-by-sa-4.0 |
|
language: |
|
- id |
|
library_name: adapter-transformers |
|
tags: |
|
- text-generation-inference |
|
--- |
|
# finetune-indoMMLU-Merak-7B-v4 |
|
Based on Merak-7B-v4 Mistral: https://huggingface.co/Ichsan2895/Merak-7B-v4<br> |
|
Dataset used on Fine Tuning: https://github.com/fajri91/IndoMMLU |
|
<br> |
|
|
|
Some training params used: |
|
```python |
|
lora r=64 |
|
lora_alpha=16 |
|
lora_dropout=0.05 |
|
|
|
learning_rate = 2e-4 |
|
lr_scheduler = "constant" |
|
optimizer = "paged_adamw_32bit" |
|
max_seq_length = 2048 |
|
``` |
|
|
|
Inference: |
|
```python |
|
import torch |
|
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig, LlamaTokenizer |
|
from peft import PeftModel, PeftConfig |
|
|
|
model_name = "Ichsan2895/Merak-7B-v4" |
|
adapter_name = "Willy030125/finetune-indoMMLU-Merak-7B-v4" |
|
|
|
bnb_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_use_double_quant=True, |
|
bnb_4bit_quant_type="nf4", |
|
bnb_4bit_compute_dtype=torch.bfloat16 |
|
) |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
quantization_config=bnb_config, |
|
device_map="auto", |
|
trust_remote_code=True |
|
) |
|
|
|
model = PeftModel.from_pretrained(model, adapter_name) |
|
tokenizer = LlamaTokenizer.from_pretrained(model_name) |
|
|
|
def generate_response(question: str) -> str: |
|
chat = [ |
|
{"role": "system", "content": "Anda adalah Merak, sebuah model kecerdasan buatan yang dilatih oleh Muhammad Ichsan. Mohon jawab pertanyaan berikut dengan benar, faktual, dan ramah."}, |
|
{"role": "user", "content": question}, |
|
] |
|
|
|
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) |
|
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True) |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), |
|
attention_mask=inputs.attention_mask, |
|
eos_token_id=tokenizer.eos_token_id, |
|
pad_token_id=tokenizer.eos_token_id, |
|
max_new_tokens=1024) |
|
response = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0] |
|
|
|
assistant_start = f'''{question} \n assistant\n ''' |
|
response_start = response.find(assistant_start) |
|
return response[response_start + len(assistant_start) :].strip() |
|
|
|
prompt = """Hewan pemakan tumbuhan dinamakan ... |
|
A. Omnivora |
|
B. Karnivora |
|
C. Pengurai |
|
D. Herbivora""" |
|
|
|
print(generate_response(prompt)) |
|
``` |