krilecy commited on
Commit
6891925
1 Parent(s): dfd303b

Upload handler.py

Browse files
Files changed (1) hide show
  1. handler.py +4 -4
handler.py CHANGED
@@ -116,26 +116,26 @@ class MistralAttention(MistralAttention):
116
 
117
 
118
  class EndpointHandler():
119
- def __init__(self):
120
  self.instruction = 'Given a web search query, retrieve relevant passages that answer the query:\n'
121
  self.max_length = 4096
122
  self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
123
 
124
 
125
- self.tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-mistral-7b-instruct', trust_remote_code=True)
126
  self.tokenizer.pad_token = '[PAD]'
127
  self.tokenizer.padding_side = 'left'
128
 
129
  bnb_config = BitsAndBytesConfig(load_in_8bit=True, bnb_8bit_compute_dtype=torch.float16)
130
 
131
  self.model = AutoModel.from_pretrained(
132
- '',
133
  quantization_config=bnb_config,
134
  device_map="auto",
135
  trust_remote_code=True,
136
  attn_implementation="eager",
137
  )
138
- self.model = PeftModel.from_pretrained(model, '/lora')
139
  self.model.eval()
140
 
141
 
 
116
 
117
 
118
  class EndpointHandler():
119
+ def __init__(self, model_dir=''):
120
  self.instruction = 'Given a web search query, retrieve relevant passages that answer the query:\n'
121
  self.max_length = 4096
122
  self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
123
 
124
 
125
+ self.tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
126
  self.tokenizer.pad_token = '[PAD]'
127
  self.tokenizer.padding_side = 'left'
128
 
129
  bnb_config = BitsAndBytesConfig(load_in_8bit=True, bnb_8bit_compute_dtype=torch.float16)
130
 
131
  self.model = AutoModel.from_pretrained(
132
+ model_dir,
133
  quantization_config=bnb_config,
134
  device_map="auto",
135
  trust_remote_code=True,
136
  attn_implementation="eager",
137
  )
138
+ self.model = PeftModel.from_pretrained(self.model, '/lora')
139
  self.model.eval()
140
 
141