emeses commited on
Commit
9744f58
·
1 Parent(s): 25da20e

Update space

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +16 -2
README.md CHANGED
@@ -7,4 +7,4 @@ sdk: gradio
7
  sdk_version: 4.19.2
8
  app_file: app.py
9
  pinned: false
10
- ---
 
7
  sdk_version: 4.19.2
8
  app_file: app.py
9
  pinned: false
10
+ gpu: true
app.py CHANGED
@@ -2,7 +2,8 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
5
- import torch
 
6
 
7
  device = "cuda" if torch.cuda.is_available() else "cpu"
8
  base_model = AutoModelForCausalLM.from_pretrained(
@@ -11,8 +12,21 @@ base_model = AutoModelForCausalLM.from_pretrained(
11
  torch_dtype=torch.float16
12
  )
13
 
 
 
 
 
 
 
 
 
14
  # Load model and tokenizer
15
- base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-3B-Instruct-bnb-4bit")
 
 
 
 
 
16
  model = PeftModel.from_pretrained(base_model, "emeses/lab2_model")
17
  tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-3B-Instruct-bnb-4bit")
18
 
 
2
  from huggingface_hub import InferenceClient
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
5
+ import torch
6
+ from transformers import BitsAndBytesConfig
7
 
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
  base_model = AutoModelForCausalLM.from_pretrained(
 
12
  torch_dtype=torch.float16
13
  )
14
 
15
+ # Configure quantization
16
+ bnb_config = BitsAndBytesConfig(
17
+ load_in_4bit=True,
18
+ bnb_4bit_compute_dtype=torch.float16,
19
+ bnb_4bit_quant_type="nf4",
20
+ bnb_4bit_use_double_quant=True,
21
+ )
22
+
23
  # Load model and tokenizer
24
+ base_model = AutoModelForCausalLM.from_pretrained(
25
+ "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
26
+ device_map="auto",
27
+ torch_dtype=torch.float16,
28
+ quantization_config=bnb_config
29
+ )
30
  model = PeftModel.from_pretrained(base_model, "emeses/lab2_model")
31
  tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-3B-Instruct-bnb-4bit")
32