Spaces:

dvruette
/

concept-guidance

Sleeping

App Files Files Community

dvruette commited on Feb 13

Commit

face2e4

•

1 Parent(s): c5dac9d

update main.py

Browse files

Files changed (1) hide show

main.py +25 -15

main.py CHANGED Viewed

@@ -15,10 +15,14 @@ logger = logging.getLogger(__name__)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # device = "cpu"
 MODEL_CONFIGS = {
     "Llama-2-7b-chat-hf": {
         "identifier": "meta-llama/Llama-2-7b-chat-hf",
         "dtype": torch.float16 if device.type == "cuda" else torch.float32,
         "guidance_interval": [-16.0, 16.0],
         "default_guidance_scale": 8.0,
         "min_guidance_layer": 16,
@@ -26,16 +30,17 @@ MODEL_CONFIGS = {
         "default_concept": "humor",
         "concepts": ["humor", "creativity", "quality", "truthfulness", "compliance"],
     },
-    "Mistral-7B-Instruct-v0.1": {
-        "identifier": "mistralai/Mistral-7B-Instruct-v0.1",
-        "dtype": torch.bfloat16 if device.type == "cuda" else torch.float32,
-        "guidance_interval": [-128.0, 128.0],
-        "default_guidance_scale": 48.0,
-        "min_guidance_layer": 8,
-        "max_guidance_layer": 32,
-        "default_concept": "humor",
-        "concepts": ["humor", "creativity", "quality", "truthfulness", "compliance"],
-    },
 }
 def load_concept_vectors(model, concepts):
@@ -43,7 +48,7 @@ def load_concept_vectors(model, concepts):
 def load_model(model_name):
     config = MODEL_CONFIGS[model_name]
-    model = AutoModelForCausalLM.from_pretrained(config["identifier"], torch_dtype=config["dtype"])
     tokenizer = AutoTokenizer.from_pretrained(config["identifier"])
     if tokenizer.chat_template is None:
         tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE
@@ -99,16 +104,20 @@ def generate_completion(
     # move all other models to CPU
     for name, (model, _) in MODELS.items():
         if name != model_name:
-            model.to("cpu")
     torch.cuda.empty_cache()
     # load the model
     model, tokenizer = MODELS[model_name]
-    model = model.to(device, non_blocking=True)
     concept_vector = CONCEPT_VECTORS[model_name][concept]
     guidance_layers = list(range(int(min_guidance_layer) - 1, int(max_guidance_layer)))
     patch_model(model, concept_vector, guidance_scale=guidance_scale, guidance_layers=guidance_layers)
-    pipe = pipeline("conversational", model=model, tokenizer=tokenizer, device=device)
     conversation = history_to_conversation(history)
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
@@ -141,13 +150,14 @@ class ConceptGuidanceUI:
         default_model = model_names[0]
         default_config = MODEL_CONFIGS[default_model]
         default_concepts = default_config["concepts"]
         saved_input = gr.State("")
         with gr.Row(elem_id="concept-guidance-container"):
             with gr.Column(scale=1, min_width=256):
                 model_dropdown = gr.Dropdown(model_names, value=default_model, label="Model")
-                concept_dropdown = gr.Dropdown(default_concepts, value=default_concepts[0], label="Concept")
                 guidance_scale = gr.Slider(*default_config["guidance_interval"], value=default_config["default_guidance_scale"], label="Guidance Scale")
                 min_guidance_layer = gr.Slider(1.0, 32.0, value=16.0, step=1.0, label="First Guidance Layer")
                 max_guidance_layer = gr.Slider(1.0, 32.0, value=32.0, step=1.0, label="Last Guidance Layer")

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # device = "cpu"
+# comment in/out the models you want to use
+# RAM requirements: ~16GB x #models (+ ~4GB overhead)
+# VRAM requirements: ~16GB
 MODEL_CONFIGS = {
     "Llama-2-7b-chat-hf": {
         "identifier": "meta-llama/Llama-2-7b-chat-hf",
         "dtype": torch.float16 if device.type == "cuda" else torch.float32,
+        "load_in_8bit": False,
         "guidance_interval": [-16.0, 16.0],
         "default_guidance_scale": 8.0,
         "min_guidance_layer": 16,
         "default_concept": "humor",
         "concepts": ["humor", "creativity", "quality", "truthfulness", "compliance"],
     },
+    # "Mistral-7B-Instruct-v0.1": {
+    #     "identifier": "mistralai/Mistral-7B-Instruct-v0.1",
+    #     "dtype": torch.bfloat16 if device.type == "cuda" else torch.float32,
+    #     "load_in_8bit": False,
+    #     "guidance_interval": [-128.0, 128.0],
+    #     "default_guidance_scale": 48.0,
+    #     "min_guidance_layer": 8,
+    #     "max_guidance_layer": 32,
+    #     "default_concept": "humor",
+    #     "concepts": ["humor", "creativity", "quality", "truthfulness", "compliance"],
+    # },
 }
 def load_concept_vectors(model, concepts):
 def load_model(model_name):
     config = MODEL_CONFIGS[model_name]
+    model = AutoModelForCausalLM.from_pretrained(config["identifier"], torch_dtype=config["dtype"], load_in_8bit=config["load_in_8bit"])
     tokenizer = AutoTokenizer.from_pretrained(config["identifier"])
     if tokenizer.chat_template is None:
         tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE
     # move all other models to CPU
     for name, (model, _) in MODELS.items():
         if name != model_name:
+            config = MODEL_CONFIGS[name]
+            if not config["load_in_8bit"]:
+                model.to("cpu")
     torch.cuda.empty_cache()
     # load the model
+    config = MODEL_CONFIGS[model_name]
     model, tokenizer = MODELS[model_name]
+    if not config["load_in_8bit"]:
+        model.to(device, non_blocking=True)
     concept_vector = CONCEPT_VECTORS[model_name][concept]
     guidance_layers = list(range(int(min_guidance_layer) - 1, int(max_guidance_layer)))
     patch_model(model, concept_vector, guidance_scale=guidance_scale, guidance_layers=guidance_layers)
+    pipe = pipeline("conversational", model=model, tokenizer=tokenizer, device=(device if not config["load_in_8bit"] else None))
     conversation = history_to_conversation(history)
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         default_model = model_names[0]
         default_config = MODEL_CONFIGS[default_model]
         default_concepts = default_config["concepts"]
+        default_concept = default_config["default_concept"]
         saved_input = gr.State("")
         with gr.Row(elem_id="concept-guidance-container"):
             with gr.Column(scale=1, min_width=256):
                 model_dropdown = gr.Dropdown(model_names, value=default_model, label="Model")
+                concept_dropdown = gr.Dropdown(default_concepts, value=default_concept, label="Concept")
                 guidance_scale = gr.Slider(*default_config["guidance_interval"], value=default_config["default_guidance_scale"], label="Guidance Scale")
                 min_guidance_layer = gr.Slider(1.0, 32.0, value=16.0, step=1.0, label="First Guidance Layer")
                 max_guidance_layer = gr.Slider(1.0, 32.0, value=32.0, step=1.0, label="Last Guidance Layer")