|
--- |
|
library_name: peft |
|
base_model: meta-llama/Llama-2-7b-hf |
|
--- |
|
## Training procedure |
|
|
|
|
|
The following `bitsandbytes` quantization config was used during training: |
|
- load_in_8bit: False |
|
- load_in_4bit: True |
|
- llm_int8_threshold: 6.0 |
|
- llm_int8_skip_modules: None |
|
- llm_int8_enable_fp32_cpu_offload: False |
|
- llm_int8_has_fp16_weight: False |
|
- bnb_4bit_quant_type: fp4 |
|
- bnb_4bit_use_double_quant: False |
|
- bnb_4bit_compute_dtype: float32 |
|
### Framework versions |
|
|
|
|
|
- PEFT 0.4.0 |
|
|
|
|
|
notebook (training and inference): https://colab.research.google.com/drive/1GxbUYZiLidteVX4qu5iSox6oxxEOHk5O?usp=sharing |
|
|
|
|
|
Usage: |
|
```python |
|
import requests |
|
|
|
# Get a random Wikipedia article summary using their API |
|
def random_extract(): |
|
URL = "https://en.wikipedia.org/api/rest_v1/page/random/summary" |
|
PARAMS = {} |
|
r = requests.get(url = URL, params = PARAMS) |
|
data = r.json() |
|
return data['extract'] |
|
|
|
# Format this as a prompt that would hopefully result in the model completing with a question |
|
def random_prompt(): |
|
e = random_extract() |
|
return f"""### CONTEXT: {e} ### QUESTION:""" |
|
|
|
import torch |
|
from peft import AutoPeftModelForCausalLM |
|
from transformers import AutoTokenizer |
|
|
|
output_dir = "mcqgen_test" |
|
|
|
# load base LLM model and tokenizer |
|
model = AutoPeftModelForCausalLM.from_pretrained( |
|
output_dir, |
|
low_cpu_mem_usage=True, |
|
torch_dtype=torch.float16, |
|
load_in_4bit=True, |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained(output_dir) |
|
|
|
# We can feed in a random context prompt and see what question the model comes up with: |
|
prompt = random_prompt() |
|
|
|
input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda() |
|
# with torch.inference_mode(): |
|
outputs = model.generate(input_ids=input_ids, max_new_tokens=100, do_sample=True, top_p=0.9,temperature=0.9) |
|
|
|
print(f"Prompt:\n{prompt}\n") |
|
print(f"Generated MCQ:\n### QUESTION:{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]}") |
|
|
|
def process_outputs(outputs): |
|
s = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0] |
|
split = s.split("### ")[1:][:7] |
|
if len(split) != 7: |
|
return None |
|
# Check the starts |
|
expected_starts = ['CONTEXT', 'QUESTION', 'A' , 'B', 'C', 'D', 'CORRECT'] |
|
for i, s in enumerate(split): |
|
if not split[i].startswith(expected_starts[i]): |
|
return None |
|
return { |
|
"context": split[0].replace("CONTEXT: ", ""), |
|
"question": split[1].replace("QUESTION: ", ""), |
|
"a": split[2].replace("A: ", ""), |
|
"b": split[3].replace("B: ", ""), |
|
"c": split[4].replace("C: ", ""), |
|
"d": split[5].replace("D: ", ""), |
|
"correct": split[6].replace("CORRECT: ", "") |
|
} |
|
|
|
|
|
process_outputs(outputs) # A nice dictionary hopefully |
|
|
|
``` |