Vladislav Sokolovskii commited on
Commit
cb07a8a
1 Parent(s): 2420712

Update the handler

Browse files
Files changed (2) hide show
  1. handler.py +6 -40
  2. requirements.txt +6 -11
handler.py CHANGED
@@ -3,22 +3,19 @@ from typing import Dict, List, Any
3
  from unsloth import FastLanguageModel
4
  from unsloth.chat_templates import get_chat_template
5
  import torch
 
 
6
 
7
  class EndpointHandler:
8
- def __init__(self, path="."):
9
- # Get the current directory (where the handler is located)
10
- current_dir = os.path.dirname(os.path.abspath(__file__))
11
-
12
- # Define the relative path to the LoRA adapter
13
- lora_path = os.path.join(current_dir, "llama3.1-70b-4bit-of-v1-lora")
14
-
15
  # Load the model and tokenizer
16
  self.model, self.tokenizer = FastLanguageModel.from_pretrained(
17
- model_name = ".", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
18
  max_seq_length = 2048,
19
  dtype = None,
20
  load_in_4bit = True,
21
- # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
22
  )
23
  FastLanguageModel.for_inference(self.model)
24
 
@@ -72,34 +69,3 @@ class EndpointHandler:
72
  last_response = response_lines[-1] if response_lines else ""
73
 
74
  return [last_response]
75
-
76
-
77
- # if __name__ == "__main__":
78
- # handler = EndpointHandler()
79
-
80
- # print("Chat with the model. Type 'quit' to exit.")
81
-
82
- # system_message = input("Enter system message (optional): ")
83
- # history = []
84
-
85
- # while True:
86
- # user_input = input("You: ")
87
- # if user_input.lower() == 'quit':
88
- # break
89
-
90
- # data = {
91
- # "inputs": history + [{"role": "user", "content": user_input}],
92
- # "parameters": {
93
- # "system_message": system_message,
94
- # "max_new_tokens": 512,
95
- # "temperature": 0.2,
96
- # "top_p": 0.5
97
- # }
98
- # }
99
-
100
- # response = handler(data)[0]
101
- # print(f"Model: {response}")
102
-
103
- # history.append({"role": "user", "content": user_input})
104
- # history.append({"role": "assistant", "content": response})
105
-
 
3
  from unsloth import FastLanguageModel
4
  from unsloth.chat_templates import get_chat_template
5
  import torch
6
+ from huggingface_hub import login
7
+ import os
8
 
9
  class EndpointHandler:
10
+ def __init__(self, path=""):
11
+ # access_token = os.environ["HUGGINGFACE_TOKEN"]
12
+ # login(token=access_token)
 
 
 
 
13
  # Load the model and tokenizer
14
  self.model, self.tokenizer = FastLanguageModel.from_pretrained(
15
+ model_name = path, # Use the current directory path
16
  max_seq_length = 2048,
17
  dtype = None,
18
  load_in_4bit = True,
 
19
  )
20
  FastLanguageModel.for_inference(self.model)
21
 
 
69
  last_response = response_lines[-1] if response_lines else ""
70
 
71
  return [last_response]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,13 +1,8 @@
 
1
  xformers<0.0.27
2
- unsloth
3
- torch==2.2.0
4
- torchvision==0.17.0
5
- transformers==4.42.3
6
  bitsandbytes==0.43.3
7
- trl<0.9.0
8
- peft
9
- accelerate
10
- git+https://github.com/unslothai/unsloth.git@933d9fe2cb2459f949ee2250e90a5b610d277eab
11
-
12
- # Note: Install with --no-deps flag for xformers and trl
13
- # pip install --no-deps "xformers<0.0.27" "trl<0.9.0"
 
1
+ torchvision
2
  xformers<0.0.27
3
+ trl==0.8.6
4
+ transformers==4.44.2
 
 
5
  bitsandbytes==0.43.3
6
+ peft==0.12.0
7
+ accelerate>=0.34.2
8
+ git+https://github.com/unslothai/unsloth.git