huseinzol05
commited on
Commit
•
9a220aa
1
Parent(s):
f3a6a81
Update README.md
Browse files
README.md
CHANGED
@@ -12,6 +12,8 @@ README at https://github.com/mesolitica/malaya/tree/5.1/pretrained-model/mistral
|
|
12 |
|
13 |
WandB, https://wandb.ai/mesolitica/pretrain-mistral-1.1b?workspace=user-husein-mesolitica
|
14 |
|
|
|
|
|
15 |
## how-to
|
16 |
|
17 |
```python
|
@@ -26,7 +28,7 @@ nf4_config = BitsAndBytesConfig(
|
|
26 |
bnb_4bit_compute_dtype=getattr(torch, TORCH_DTYPE)
|
27 |
)
|
28 |
|
29 |
-
tokenizer = AutoTokenizer.from_pretrained('mesolitica/malaysian-mistral-1.1B-4096')
|
30 |
model = AutoModelForCausalLM.from_pretrained(
|
31 |
'mesolitica/malaysian-mistral-1.1B-4096',
|
32 |
use_flash_attention_2 = True,
|
@@ -34,7 +36,6 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
34 |
)
|
35 |
prompt = '<s>nama saya'
|
36 |
inputs = tokenizer([prompt], return_tensors='pt', add_special_tokens=False).to('cuda')
|
37 |
-
inputs.pop('token_type_ids')
|
38 |
|
39 |
generate_kwargs = dict(
|
40 |
inputs,
|
|
|
12 |
|
13 |
WandB, https://wandb.ai/mesolitica/pretrain-mistral-1.1b?workspace=user-husein-mesolitica
|
14 |
|
15 |
+
WandB report, https://wandb.ai/mesolitica/pretrain-mistral-3b/reports/Pretrain-Larger-Malaysian-Mistral--Vmlldzo2MDkyOTgz
|
16 |
+
|
17 |
## how-to
|
18 |
|
19 |
```python
|
|
|
28 |
bnb_4bit_compute_dtype=getattr(torch, TORCH_DTYPE)
|
29 |
)
|
30 |
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained('mesolitica/malaysian-mistral-1.1B-4096', model_input_names = ['input_ids'])
|
32 |
model = AutoModelForCausalLM.from_pretrained(
|
33 |
'mesolitica/malaysian-mistral-1.1B-4096',
|
34 |
use_flash_attention_2 = True,
|
|
|
36 |
)
|
37 |
prompt = '<s>nama saya'
|
38 |
inputs = tokenizer([prompt], return_tensors='pt', add_special_tokens=False).to('cuda')
|
|
|
39 |
|
40 |
generate_kwargs = dict(
|
41 |
inputs,
|