Spaces:

AIML-TUDA
/

LlavaGuard

Running on Zero

App Files Files Community

LukasHug commited on Jun 13, 2024

Commit

83baad4

verified ·

1 Parent(s): cd9abb6

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -9

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import argparse
 import time
 import subprocess
 import gradio_web_server as gws
 # Execute the pip install command with additional options
@@ -24,7 +24,7 @@ def start_controller():
     print(controller_command)
     return subprocess.Popen(controller_command)
 def start_worker(model_path: str, model_name: str, bits=16, device=0):
     print(f"Starting the model worker for the model {model_path}")
     # model_name = model_path.strip("/").split("/")[-1]
@@ -44,7 +44,7 @@ def start_worker(model_path: str, model_name: str, bits=16, device=0):
         model_path,
         "--model-name",
         model_name,
-        # "--use-flash-attn",
         '--device',
         device
     ]
@@ -76,7 +76,9 @@ Set the environment variable `model` to change the model:
 ['AIML-TUDA/LlavaGuard-13B'](https://huggingface.co/AIML-TUDA/LlavaGuard-13B),
 ['AIML-TUDA/LlavaGuard-34B'](https://huggingface.co/AIML-TUDA/LlavaGuard-34B),
 """
     print(f"args: {gws.args}")
     controller_proc = start_controller()
     concurrency_count = int(os.getenv("concurrency_count", 5))
     api_key = os.getenv("token")
@@ -92,19 +94,17 @@ Set the environment variable `model` to change the model:
     models = [
         'LukasHug/LlavaGuard-7B-hf',
         'LukasHug/LlavaGuard-13B-hf',
-        'LukasHug/LlavaGuard-34B-hf',
-        'liuhaotian/llava-v1.6-vicuna-7b',
-    ]
     bits = int(os.getenv("bits", 16))
     model = os.getenv("model", models[0])
     available_devices = os.getenv("CUDA_VISIBLE_DEVICES", "0")
-    model_path, model_name = model, model.split("/")[-1]
     worker_proc = start_worker(model_path, model_name, bits=bits)
     # Wait for worker and controller to start
-    time.sleep(60)
     exit_status = 0
     try:
@@ -125,4 +125,4 @@ Set the environment variable `model` to change the model:
         worker_proc.kill()
         controller_proc.kill()
-        sys.exit(exit_status)

 import argparse
 import time
 import subprocess
+import spaces
 import gradio_web_server as gws
 # Execute the pip install command with additional options
     print(controller_command)
     return subprocess.Popen(controller_command)
+@spaces.GPU
 def start_worker(model_path: str, model_name: str, bits=16, device=0):
     print(f"Starting the model worker for the model {model_path}")
     # model_name = model_path.strip("/").split("/")[-1]
         model_path,
         "--model-name",
         model_name,
+        "--use-flash-attn",
         '--device',
         device
     ]
 ['AIML-TUDA/LlavaGuard-13B'](https://huggingface.co/AIML-TUDA/LlavaGuard-13B),
 ['AIML-TUDA/LlavaGuard-34B'](https://huggingface.co/AIML-TUDA/LlavaGuard-34B),
 """
+    # set_up_env_and_token(read=True)
     print(f"args: {gws.args}")
+    # set the huggingface login token
     controller_proc = start_controller()
     concurrency_count = int(os.getenv("concurrency_count", 5))
     api_key = os.getenv("token")
     models = [
         'LukasHug/LlavaGuard-7B-hf',
         'LukasHug/LlavaGuard-13B-hf',
+        'LukasHug/LlavaGuard-34B-hf',]
     bits = int(os.getenv("bits", 16))
     model = os.getenv("model", models[0])
     available_devices = os.getenv("CUDA_VISIBLE_DEVICES", "0")
+    model_path, model_name = model, model.split("/")[1]
     worker_proc = start_worker(model_path, model_name, bits=bits)
     # Wait for worker and controller to start
+    time.sleep(50)
     exit_status = 0
     try:
         worker_proc.kill()
         controller_proc.kill()
+        sys.exit(exit_status)