dvruette commited on
Commit
face2e4
1 Parent(s): c5dac9d

update main.py

Browse files
Files changed (1) hide show
  1. main.py +25 -15
main.py CHANGED
@@ -15,10 +15,14 @@ logger = logging.getLogger(__name__)
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
  # device = "cpu"
17
 
 
 
 
18
  MODEL_CONFIGS = {
19
  "Llama-2-7b-chat-hf": {
20
  "identifier": "meta-llama/Llama-2-7b-chat-hf",
21
  "dtype": torch.float16 if device.type == "cuda" else torch.float32,
 
22
  "guidance_interval": [-16.0, 16.0],
23
  "default_guidance_scale": 8.0,
24
  "min_guidance_layer": 16,
@@ -26,16 +30,17 @@ MODEL_CONFIGS = {
26
  "default_concept": "humor",
27
  "concepts": ["humor", "creativity", "quality", "truthfulness", "compliance"],
28
  },
29
- "Mistral-7B-Instruct-v0.1": {
30
- "identifier": "mistralai/Mistral-7B-Instruct-v0.1",
31
- "dtype": torch.bfloat16 if device.type == "cuda" else torch.float32,
32
- "guidance_interval": [-128.0, 128.0],
33
- "default_guidance_scale": 48.0,
34
- "min_guidance_layer": 8,
35
- "max_guidance_layer": 32,
36
- "default_concept": "humor",
37
- "concepts": ["humor", "creativity", "quality", "truthfulness", "compliance"],
38
- },
 
39
  }
40
 
41
  def load_concept_vectors(model, concepts):
@@ -43,7 +48,7 @@ def load_concept_vectors(model, concepts):
43
 
44
  def load_model(model_name):
45
  config = MODEL_CONFIGS[model_name]
46
- model = AutoModelForCausalLM.from_pretrained(config["identifier"], torch_dtype=config["dtype"])
47
  tokenizer = AutoTokenizer.from_pretrained(config["identifier"])
48
  if tokenizer.chat_template is None:
49
  tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE
@@ -99,16 +104,20 @@ def generate_completion(
99
  # move all other models to CPU
100
  for name, (model, _) in MODELS.items():
101
  if name != model_name:
102
- model.to("cpu")
 
 
103
  torch.cuda.empty_cache()
104
  # load the model
 
105
  model, tokenizer = MODELS[model_name]
106
- model = model.to(device, non_blocking=True)
 
107
 
108
  concept_vector = CONCEPT_VECTORS[model_name][concept]
109
  guidance_layers = list(range(int(min_guidance_layer) - 1, int(max_guidance_layer)))
110
  patch_model(model, concept_vector, guidance_scale=guidance_scale, guidance_layers=guidance_layers)
111
- pipe = pipeline("conversational", model=model, tokenizer=tokenizer, device=device)
112
 
113
  conversation = history_to_conversation(history)
114
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
@@ -141,13 +150,14 @@ class ConceptGuidanceUI:
141
  default_model = model_names[0]
142
  default_config = MODEL_CONFIGS[default_model]
143
  default_concepts = default_config["concepts"]
 
144
 
145
  saved_input = gr.State("")
146
 
147
  with gr.Row(elem_id="concept-guidance-container"):
148
  with gr.Column(scale=1, min_width=256):
149
  model_dropdown = gr.Dropdown(model_names, value=default_model, label="Model")
150
- concept_dropdown = gr.Dropdown(default_concepts, value=default_concepts[0], label="Concept")
151
  guidance_scale = gr.Slider(*default_config["guidance_interval"], value=default_config["default_guidance_scale"], label="Guidance Scale")
152
  min_guidance_layer = gr.Slider(1.0, 32.0, value=16.0, step=1.0, label="First Guidance Layer")
153
  max_guidance_layer = gr.Slider(1.0, 32.0, value=32.0, step=1.0, label="Last Guidance Layer")
 
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
  # device = "cpu"
17
 
18
+ # comment in/out the models you want to use
19
+ # RAM requirements: ~16GB x #models (+ ~4GB overhead)
20
+ # VRAM requirements: ~16GB
21
  MODEL_CONFIGS = {
22
  "Llama-2-7b-chat-hf": {
23
  "identifier": "meta-llama/Llama-2-7b-chat-hf",
24
  "dtype": torch.float16 if device.type == "cuda" else torch.float32,
25
+ "load_in_8bit": False,
26
  "guidance_interval": [-16.0, 16.0],
27
  "default_guidance_scale": 8.0,
28
  "min_guidance_layer": 16,
 
30
  "default_concept": "humor",
31
  "concepts": ["humor", "creativity", "quality", "truthfulness", "compliance"],
32
  },
33
+ # "Mistral-7B-Instruct-v0.1": {
34
+ # "identifier": "mistralai/Mistral-7B-Instruct-v0.1",
35
+ # "dtype": torch.bfloat16 if device.type == "cuda" else torch.float32,
36
+ # "load_in_8bit": False,
37
+ # "guidance_interval": [-128.0, 128.0],
38
+ # "default_guidance_scale": 48.0,
39
+ # "min_guidance_layer": 8,
40
+ # "max_guidance_layer": 32,
41
+ # "default_concept": "humor",
42
+ # "concepts": ["humor", "creativity", "quality", "truthfulness", "compliance"],
43
+ # },
44
  }
45
 
46
  def load_concept_vectors(model, concepts):
 
48
 
49
  def load_model(model_name):
50
  config = MODEL_CONFIGS[model_name]
51
+ model = AutoModelForCausalLM.from_pretrained(config["identifier"], torch_dtype=config["dtype"], load_in_8bit=config["load_in_8bit"])
52
  tokenizer = AutoTokenizer.from_pretrained(config["identifier"])
53
  if tokenizer.chat_template is None:
54
  tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE
 
104
  # move all other models to CPU
105
  for name, (model, _) in MODELS.items():
106
  if name != model_name:
107
+ config = MODEL_CONFIGS[name]
108
+ if not config["load_in_8bit"]:
109
+ model.to("cpu")
110
  torch.cuda.empty_cache()
111
  # load the model
112
+ config = MODEL_CONFIGS[model_name]
113
  model, tokenizer = MODELS[model_name]
114
+ if not config["load_in_8bit"]:
115
+ model.to(device, non_blocking=True)
116
 
117
  concept_vector = CONCEPT_VECTORS[model_name][concept]
118
  guidance_layers = list(range(int(min_guidance_layer) - 1, int(max_guidance_layer)))
119
  patch_model(model, concept_vector, guidance_scale=guidance_scale, guidance_layers=guidance_layers)
120
+ pipe = pipeline("conversational", model=model, tokenizer=tokenizer, device=(device if not config["load_in_8bit"] else None))
121
 
122
  conversation = history_to_conversation(history)
123
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
150
  default_model = model_names[0]
151
  default_config = MODEL_CONFIGS[default_model]
152
  default_concepts = default_config["concepts"]
153
+ default_concept = default_config["default_concept"]
154
 
155
  saved_input = gr.State("")
156
 
157
  with gr.Row(elem_id="concept-guidance-container"):
158
  with gr.Column(scale=1, min_width=256):
159
  model_dropdown = gr.Dropdown(model_names, value=default_model, label="Model")
160
+ concept_dropdown = gr.Dropdown(default_concepts, value=default_concept, label="Concept")
161
  guidance_scale = gr.Slider(*default_config["guidance_interval"], value=default_config["default_guidance_scale"], label="Guidance Scale")
162
  min_guidance_layer = gr.Slider(1.0, 32.0, value=16.0, step=1.0, label="First Guidance Layer")
163
  max_guidance_layer = gr.Slider(1.0, 32.0, value=32.0, step=1.0, label="Last Guidance Layer")