Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
1681f8a
1
Parent(s):
bd2d2e2
Fix 7b
Browse files
app.py
CHANGED
@@ -47,7 +47,8 @@ _ = (
|
|
47 |
"golay" in platform.node()
|
48 |
or "okteto" in platform.node()
|
49 |
or Path("/kaggle").exists()
|
50 |
-
or psutil.cpu_count(logical=False) < 4
|
|
|
51 |
)
|
52 |
|
53 |
if _:
|
@@ -116,7 +117,7 @@ except Exception as exc_:
|
|
116 |
LLM = AutoModelForCausalLM.from_pretrained(
|
117 |
model_loc,
|
118 |
model_type="llama",
|
119 |
-
threads=cpu_count,
|
120 |
)
|
121 |
|
122 |
logger.info(f"done load llm {model_loc=} {file_size=}G")
|
@@ -145,7 +146,7 @@ class GenerationConfig:
|
|
145 |
seed: int = 42
|
146 |
reset: bool = False
|
147 |
stream: bool = True
|
148 |
-
threads: int = cpu_count
|
149 |
# stop: list[str] = field(default_factory=lambda: [stop_string])
|
150 |
|
151 |
|
@@ -237,7 +238,7 @@ def predict_api(prompt):
|
|
237 |
seed=42,
|
238 |
reset=True, # reset history (cache)
|
239 |
stream=False,
|
240 |
-
threads=cpu_count,
|
241 |
# stop=prompt_prefix[1:2],
|
242 |
)
|
243 |
|
@@ -392,18 +393,18 @@ with gr.Blocks(
|
|
392 |
fn=user,
|
393 |
inputs=[msg, chatbot],
|
394 |
outputs=[msg, chatbot],
|
395 |
-
queue=
|
396 |
show_progress="full",
|
397 |
-
api_name=
|
398 |
).then(bot, chatbot, chatbot, queue=False)
|
399 |
submit.click(
|
400 |
fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
|
401 |
inputs=[msg, chatbot],
|
402 |
outputs=[msg, chatbot],
|
403 |
-
|
404 |
-
queue=False,
|
405 |
show_progress="full",
|
406 |
-
api_name=
|
407 |
).then(bot, chatbot, chatbot, queue=False)
|
408 |
|
409 |
clear.click(lambda: None, None, chatbot, queue=False)
|
@@ -429,13 +430,16 @@ with gr.Blocks(
|
|
429 |
# CPU UPGRADE cpu_count=8 32G, model 7G
|
430 |
|
431 |
# does not work
|
|
|
432 |
# _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
|
433 |
# concurrency_count = max(_, 1)
|
434 |
-
|
435 |
-
|
436 |
-
concurrency_count = max(int(32 / file_size) - 1, 1)
|
437 |
else:
|
438 |
-
concurrency_count = max(int(16 / file_size) - 1, 1)
|
|
|
|
|
|
|
439 |
logger.info(f"{concurrency_count=}")
|
440 |
|
441 |
-
block.queue(concurrency_count=
|
|
|
47 |
"golay" in platform.node()
|
48 |
or "okteto" in platform.node()
|
49 |
or Path("/kaggle").exists()
|
50 |
+
# or psutil.cpu_count(logical=False) < 4
|
51 |
+
or 1 # run 7b in hf
|
52 |
)
|
53 |
|
54 |
if _:
|
|
|
117 |
LLM = AutoModelForCausalLM.from_pretrained(
|
118 |
model_loc,
|
119 |
model_type="llama",
|
120 |
+
# threads=cpu_count,
|
121 |
)
|
122 |
|
123 |
logger.info(f"done load llm {model_loc=} {file_size=}G")
|
|
|
146 |
seed: int = 42
|
147 |
reset: bool = False
|
148 |
stream: bool = True
|
149 |
+
# threads: int = cpu_count
|
150 |
# stop: list[str] = field(default_factory=lambda: [stop_string])
|
151 |
|
152 |
|
|
|
238 |
seed=42,
|
239 |
reset=True, # reset history (cache)
|
240 |
stream=False,
|
241 |
+
# threads=cpu_count,
|
242 |
# stop=prompt_prefix[1:2],
|
243 |
)
|
244 |
|
|
|
393 |
fn=user,
|
394 |
inputs=[msg, chatbot],
|
395 |
outputs=[msg, chatbot],
|
396 |
+
queue=True,
|
397 |
show_progress="full",
|
398 |
+
api_name=None,
|
399 |
).then(bot, chatbot, chatbot, queue=False)
|
400 |
submit.click(
|
401 |
fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
|
402 |
inputs=[msg, chatbot],
|
403 |
outputs=[msg, chatbot],
|
404 |
+
queue=True,
|
405 |
+
# queue=False,
|
406 |
show_progress="full",
|
407 |
+
api_name=None,
|
408 |
).then(bot, chatbot, chatbot, queue=False)
|
409 |
|
410 |
clear.click(lambda: None, None, chatbot, queue=False)
|
|
|
430 |
# CPU UPGRADE cpu_count=8 32G, model 7G
|
431 |
|
432 |
# does not work
|
433 |
+
_ = """
|
434 |
# _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
|
435 |
# concurrency_count = max(_, 1)
|
436 |
+
if psutil.cpu_count(logical=False) >= 8:
|
437 |
+
# concurrency_count = max(int(32 / file_size) - 1, 1)
|
|
|
438 |
else:
|
439 |
+
# concurrency_count = max(int(16 / file_size) - 1, 1)
|
440 |
+
# """
|
441 |
+
|
442 |
+
concurrency_count = 1
|
443 |
logger.info(f"{concurrency_count=}")
|
444 |
|
445 |
+
block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
|