File size: 2,456 Bytes
00ef542 e6cee74 92be1c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
---
license: afl-3.0
language:
- id
library_name: adapter-transformers
tags:
- text-generation-inference
---
# finetune-indoMMLU-Merak-7B-v4
Based on Merak-7B-v4 Mistral: https://huggingface.co/Ichsan2895/Merak-7B-v4<br>
Dataset used on Fine Tuning: https://github.com/fajri91/IndoMMLU
<br>
Some training params used:
```python
lora r=64
lora_alpha=16
lora_dropout=0.05
learning_rate = 2e-4
lr_scheduler = "constant"
optimizer = "paged_adamw_32bit"
max_seq_length = 2048
```
Inference:
```python
import torch
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig, LlamaTokenizer
from peft import PeftModel, PeftConfig
model_name = "Ichsan2895/Merak-7B-v4"
adapter_name = "Willy030125/finetune-indoMMLU-Merak-7B-v4"
bnb_config = transformers.BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
model = PeftModel.from_pretrained(model_name, adapter_name)
tokenizer = LlamaTokenizer.from_pretrained(model_name)
def generate_response(question: str) -> str:
chat = [
{"role": "system", "content": "Anda adalah Merak, sebuah model kecerdasan buatan yang dilatih oleh Muhammad Ichsan. Mohon jawab pertanyaan berikut dengan benar, faktual, dan ramah."},
{"role": "user", "content": question},
]
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True)
with torch.no_grad():
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"),
attention_mask=inputs.attention_mask,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
max_new_tokens=1024)
response = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]
assistant_start = f'''{question} \n assistant\n '''
response_start = response.find(assistant_start)
return response[response_start + len(assistant_start) :].strip()
prompt = """Hewan pemakan tumbuhan dinamakan ...
A. Omnivora
B. Karnivora
C. Pengurai
D. Herbivora"""
print(generate_response(prompt))
``` |