Manuel Calzolari commited on
Commit
eaf24ca
1 Parent(s): af41327
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -1,4 +1,5 @@
1
  # Import modules
 
2
  import torch
3
  import gradio as gr
4
  from langchain_community.llms import HuggingFacePipeline
@@ -7,7 +8,9 @@ from langchain_community.vectorstores import Chroma
7
  from langchain_core.runnables import RunnablePassthrough
8
  from langchain_core.prompts import PromptTemplate
9
  from peft import PeftModel
10
- from transformers import AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig, pipeline
 
 
11
 
12
  base_model = "microsoft/phi-2"
13
 
@@ -18,6 +21,14 @@ embedding_function = SentenceTransformerEmbeddings(
18
  model_kwargs={"device": "cuda"}, # Use the GPU
19
  )
20
 
 
 
 
 
 
 
 
 
21
  bnb_config = BitsAndBytesConfig(
22
  load_in_4bit=True,
23
  bnb_4bit_quant_type="nf4",
 
1
  # Import modules
2
+ import os
3
  import torch
4
  import gradio as gr
5
  from langchain_community.llms import HuggingFacePipeline
 
8
  from langchain_core.runnables import RunnablePassthrough
9
  from langchain_core.prompts import PromptTemplate
10
  from peft import PeftModel
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, GenerationConfig, pipeline
12
+
13
+ HUGGINGFACE_ACCESS_TOKEN = os.environ["HUGGINGFACE_ACCESS_TOKEN"]
14
 
15
  base_model = "microsoft/phi-2"
16
 
 
21
  model_kwargs={"device": "cuda"}, # Use the GPU
22
  )
23
 
24
+ tokenizer = AutoTokenizer.from_pretrained(
25
+ base_model,
26
+ use_fast=True,
27
+ token=HUGGINGFACE_ACCESS_TOKEN,
28
+ )
29
+ tokenizer.pad_token = tokenizer.eos_token
30
+ tokenizer.padding_side = "right"
31
+
32
  bnb_config = BitsAndBytesConfig(
33
  load_in_4bit=True,
34
  bnb_4bit_quant_type="nf4",