asusevski commited on
Commit
17ede41
·
1 Parent(s): 135e4fe

fixed peft in app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -46
app.py CHANGED
@@ -1,41 +1,25 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
3
  import torch
4
- from peft import PeftModel
5
 
6
 
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
 
9
 
10
- base_model_id = "mistralai/Mistral-7B-v0.1"
11
- ft_model_id = "asusevski/mistraloo-sft"
 
 
 
12
 
13
 
14
  tokenizer = AutoTokenizer.from_pretrained(
15
- base_model_id,
16
  add_bos_token=True
17
  )
18
 
19
 
20
- base_model_id = "mistralai/Mistral-7B-v0.1"
21
- bnb_config = BitsAndBytesConfig(
22
- load_in_4bit=True,
23
- bnb_4bit_use_double_quant=True,
24
- bnb_4bit_quant_type="nf4",
25
- bnb_4bit_compute_dtype=torch.bfloat16
26
- )
27
-
28
- base_model = AutoModelForCausalLM.from_pretrained(
29
- base_model_id,
30
- quantization_config=bnb_config,
31
- device_map="auto",
32
- trust_remote_code=True
33
- )
34
-
35
- model = PeftModel.from_pretrained(base_model, ft_model_id).to(device)
36
- model.eval()
37
-
38
-
39
  def uwaterloo_output(post_title, post_text):
40
  prompt = f"""
41
  Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
@@ -67,25 +51,3 @@ iface = gr.Interface(
67
 
68
  iface.launch()
69
 
70
-
71
-
72
-
73
- # base_model_id = "mistralai/Mistral-7B-v0.1"
74
- # bnb_config = BitsAndBytesConfig(
75
- # load_in_4bit=True,
76
- # bnb_4bit_use_double_quant=True,
77
- # bnb_4bit_quant_type="nf4",
78
- # bnb_4bit_compute_dtype=torch.bfloat16
79
- # )
80
-
81
-
82
- # base_model = AutoModelForCausalLM.from_pretrained(
83
- # base_model_id, # Mistral, same as before
84
- # quantization_config=bnb_config, # Same quantization config as before
85
- # device_map="auto",
86
- # trust_remote_code=True,
87
- # use_auth_token=True
88
- # )
89
-
90
-
91
- # ft_model = PeftModel.from_pretrained(base_model, "mistral-mistraloo/checkpoint-500")
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
+ from peft import PeftModel, PeftConfig
5
 
6
 
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
 
9
 
10
+ peft_model_id = "asusevski/mistraloo-sft"
11
+ peft_config = PeftConfig.from_pretrained(peft_model_id)
12
+ model = AutoModelForCausalLM.from_pretrained(peft_config.base_model_name_or_path)
13
+ model = PeftModel.from_pretrained(model, peft_model_id).to(device)
14
+ model.eval()
15
 
16
 
17
  tokenizer = AutoTokenizer.from_pretrained(
18
+ peft_config.base_model_name_or_path,
19
  add_bos_token=True
20
  )
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def uwaterloo_output(post_title, post_text):
24
  prompt = f"""
25
  Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 
51
 
52
  iface.launch()
53