LukasHug commited on
Commit
83baad4
·
verified ·
1 Parent(s): cd9abb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  import argparse
4
  import time
5
  import subprocess
6
-
7
  import gradio_web_server as gws
8
 
9
  # Execute the pip install command with additional options
@@ -24,7 +24,7 @@ def start_controller():
24
  print(controller_command)
25
  return subprocess.Popen(controller_command)
26
 
27
-
28
  def start_worker(model_path: str, model_name: str, bits=16, device=0):
29
  print(f"Starting the model worker for the model {model_path}")
30
  # model_name = model_path.strip("/").split("/")[-1]
@@ -44,7 +44,7 @@ def start_worker(model_path: str, model_name: str, bits=16, device=0):
44
  model_path,
45
  "--model-name",
46
  model_name,
47
- # "--use-flash-attn",
48
  '--device',
49
  device
50
  ]
@@ -76,7 +76,9 @@ Set the environment variable `model` to change the model:
76
  ['AIML-TUDA/LlavaGuard-13B'](https://huggingface.co/AIML-TUDA/LlavaGuard-13B),
77
  ['AIML-TUDA/LlavaGuard-34B'](https://huggingface.co/AIML-TUDA/LlavaGuard-34B),
78
  """
 
79
  print(f"args: {gws.args}")
 
80
  controller_proc = start_controller()
81
  concurrency_count = int(os.getenv("concurrency_count", 5))
82
  api_key = os.getenv("token")
@@ -92,19 +94,17 @@ Set the environment variable `model` to change the model:
92
  models = [
93
  'LukasHug/LlavaGuard-7B-hf',
94
  'LukasHug/LlavaGuard-13B-hf',
95
- 'LukasHug/LlavaGuard-34B-hf',
96
- 'liuhaotian/llava-v1.6-vicuna-7b',
97
- ]
98
  bits = int(os.getenv("bits", 16))
99
  model = os.getenv("model", models[0])
100
  available_devices = os.getenv("CUDA_VISIBLE_DEVICES", "0")
101
- model_path, model_name = model, model.split("/")[-1]
102
 
103
  worker_proc = start_worker(model_path, model_name, bits=bits)
104
 
105
 
106
  # Wait for worker and controller to start
107
- time.sleep(60)
108
 
109
  exit_status = 0
110
  try:
@@ -125,4 +125,4 @@ Set the environment variable `model` to change the model:
125
  worker_proc.kill()
126
  controller_proc.kill()
127
 
128
- sys.exit(exit_status)
 
3
  import argparse
4
  import time
5
  import subprocess
6
+ import spaces
7
  import gradio_web_server as gws
8
 
9
  # Execute the pip install command with additional options
 
24
  print(controller_command)
25
  return subprocess.Popen(controller_command)
26
 
27
+ @spaces.GPU
28
  def start_worker(model_path: str, model_name: str, bits=16, device=0):
29
  print(f"Starting the model worker for the model {model_path}")
30
  # model_name = model_path.strip("/").split("/")[-1]
 
44
  model_path,
45
  "--model-name",
46
  model_name,
47
+ "--use-flash-attn",
48
  '--device',
49
  device
50
  ]
 
76
  ['AIML-TUDA/LlavaGuard-13B'](https://huggingface.co/AIML-TUDA/LlavaGuard-13B),
77
  ['AIML-TUDA/LlavaGuard-34B'](https://huggingface.co/AIML-TUDA/LlavaGuard-34B),
78
  """
79
+ # set_up_env_and_token(read=True)
80
  print(f"args: {gws.args}")
81
+ # set the huggingface login token
82
  controller_proc = start_controller()
83
  concurrency_count = int(os.getenv("concurrency_count", 5))
84
  api_key = os.getenv("token")
 
94
  models = [
95
  'LukasHug/LlavaGuard-7B-hf',
96
  'LukasHug/LlavaGuard-13B-hf',
97
+ 'LukasHug/LlavaGuard-34B-hf',]
 
 
98
  bits = int(os.getenv("bits", 16))
99
  model = os.getenv("model", models[0])
100
  available_devices = os.getenv("CUDA_VISIBLE_DEVICES", "0")
101
+ model_path, model_name = model, model.split("/")[1]
102
 
103
  worker_proc = start_worker(model_path, model_name, bits=bits)
104
 
105
 
106
  # Wait for worker and controller to start
107
+ time.sleep(50)
108
 
109
  exit_status = 0
110
  try:
 
125
  worker_proc.kill()
126
  controller_proc.kill()
127
 
128
+ sys.exit(exit_status)