krilecy
/

e5-mistral-7b-instruct

Feature Extraction

text-generation-inference

text-embeddings-inference

Inference Endpoints

Model card Files Files and versions Community

krilecy commited on Mar 27

Commit

6891925

•

1 Parent(s): dfd303b

Upload handler.py

Files changed (1) hide show

handler.py +4 -4

handler.py CHANGED Viewed

@@ -116,26 +116,26 @@ class MistralAttention(MistralAttention):
 class EndpointHandler():
-    def __init__(self):
         self.instruction = 'Given a web search query, retrieve relevant passages that answer the query:\n'
         self.max_length = 4096
         self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
-        self.tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-mistral-7b-instruct', trust_remote_code=True)
         self.tokenizer.pad_token = '[PAD]'
         self.tokenizer.padding_side = 'left'
         bnb_config = BitsAndBytesConfig(load_in_8bit=True, bnb_8bit_compute_dtype=torch.float16)
         self.model = AutoModel.from_pretrained(
-            '',
             quantization_config=bnb_config,
             device_map="auto",
             trust_remote_code=True,
             attn_implementation="eager",
         )
-        self.model = PeftModel.from_pretrained(model, '/lora')
         self.model.eval()

 class EndpointHandler():
+    def __init__(self, model_dir=''):
         self.instruction = 'Given a web search query, retrieve relevant passages that answer the query:\n'
         self.max_length = 4096
         self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self.tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
         self.tokenizer.pad_token = '[PAD]'
         self.tokenizer.padding_side = 'left'
         bnb_config = BitsAndBytesConfig(load_in_8bit=True, bnb_8bit_compute_dtype=torch.float16)
         self.model = AutoModel.from_pretrained(
+            model_dir,
             quantization_config=bnb_config,
             device_map="auto",
             trust_remote_code=True,
             attn_implementation="eager",
         )
+        self.model = PeftModel.from_pretrained(self.model, '/lora')
         self.model.eval()