aspctu commited on
Commit
2859edf
1 Parent(s): e8c159f

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +91 -0
README.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - tatsu-lab/alpaca
4
+ language:
5
+ - en
6
+ ---
7
+ ### Model card for Alpaca-30B
8
+
9
+ This is a Llama model instruction-finetuned with LoRa for 3 epochs on the Tatsu Labs Alpaca dataset. It was trained in 8bit mode.
10
+
11
+ To run this model, you can run the following
12
+
13
+ ```
14
+ # Code adapter from https://github.com/tloen/alpaca-lora
15
+ import torch
16
+ from peft import PeftModel
17
+ import transformers
18
+
19
+ from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
20
+
21
+ tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-30b-hf")
22
+
23
+ model = LlamaForCausalLM.from_pretrained(
24
+ "decapoda-research/llama-30b-hf",
25
+ load_in_8bit=True,
26
+ torch_dtype=torch.float16,
27
+ device_map="auto",
28
+ )
29
+
30
+ model = PeftModel.from_pretrained(
31
+ model,
32
+ "baseten/alpaca-30b",
33
+ torch_dtype=torch.float16
34
+ )
35
+
36
+ def generate_prompt(instruction, input=None):
37
+ if input:
38
+ return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
39
+
40
+ ### Instruction:
41
+ {instruction}
42
+
43
+ ### Input:
44
+ {input}
45
+
46
+ ### Response:"""
47
+ else:
48
+ return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
49
+
50
+ ### Instruction:
51
+ {instruction}
52
+
53
+ ### Response:"""
54
+
55
+
56
+ model.eval()
57
+
58
+
59
+ def evaluate(
60
+ instruction,
61
+ input=None,
62
+ temperature=0.1,
63
+ top_p=0.75,
64
+ top_k=40,
65
+ num_beams=4,
66
+ **kwargs,
67
+ ):
68
+ prompt = generate_prompt(instruction, input)
69
+ inputs = tokenizer(prompt, return_tensors="pt")
70
+ input_ids = inputs["input_ids"].to(device)
71
+ generation_config = GenerationConfig(
72
+ temperature=temperature,
73
+ top_p=top_p,
74
+ top_k=top_k,
75
+ num_beams=num_beams,
76
+ **kwargs,
77
+ )
78
+ with torch.no_grad():
79
+ generation_output = model.generate(
80
+ input_ids=input_ids,
81
+ generation_config=generation_config,
82
+ return_dict_in_generate=True,
83
+ output_scores=True,
84
+ max_new_tokens=2048,
85
+ )
86
+ s = generation_output.sequences[0]
87
+ output = tokenizer.decode(s)
88
+ return output.split("### Response:")[1].strip()
89
+ ```
90
+
91
+ The