Manuel Calzolari commited on
Commit
af41327
1 Parent(s): 1ee69d0
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -1,4 +1,5 @@
1
  # Import modules
 
2
  import gradio as gr
3
  from langchain_community.llms import HuggingFacePipeline
4
  from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
@@ -6,7 +7,7 @@ from langchain_community.vectorstores import Chroma
6
  from langchain_core.runnables import RunnablePassthrough
7
  from langchain_core.prompts import PromptTemplate
8
  from peft import PeftModel
9
- from transformers import AutoModelForCausalLM, GenerationConfig, pipeline
10
 
11
  base_model = "microsoft/phi-2"
12
 
@@ -17,6 +18,13 @@ embedding_function = SentenceTransformerEmbeddings(
17
  model_kwargs={"device": "cuda"}, # Use the GPU
18
  )
19
 
 
 
 
 
 
 
 
20
  # Load the fine-tuned model by merging the base model and the adapter
21
  # (checkpointed at 1 epoch = 77 steps)
22
  adapter = "./results/checkpoint-77"
 
1
  # Import modules
2
+ import torch
3
  import gradio as gr
4
  from langchain_community.llms import HuggingFacePipeline
5
  from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
 
7
  from langchain_core.runnables import RunnablePassthrough
8
  from langchain_core.prompts import PromptTemplate
9
  from peft import PeftModel
10
+ from transformers import AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig, pipeline
11
 
12
  base_model = "microsoft/phi-2"
13
 
 
18
  model_kwargs={"device": "cuda"}, # Use the GPU
19
  )
20
 
21
+ bnb_config = BitsAndBytesConfig(
22
+ load_in_4bit=True,
23
+ bnb_4bit_quant_type="nf4",
24
+ bnb_4bit_compute_dtype=torch.float16,
25
+ bnb_4bit_use_double_quant=False,
26
+ )
27
+
28
  # Load the fine-tuned model by merging the base model and the adapter
29
  # (checkpointed at 1 epoch = 77 steps)
30
  adapter = "./results/checkpoint-77"