Spaces:
Running
Running
ffreemt
commited on
Commit
•
b153e87
1
Parent(s):
58214b0
Update Dockerfile
Browse files- Dockerfile +2 -2
- m3_server.py +3 -1
Dockerfile
CHANGED
@@ -21,5 +21,5 @@ RUN pip install --no-cache-dir --upgrade pip && \
|
|
21 |
# CMD ["TRANSFORMERS_CACHE=./", "infinity_emb", "--model-name-or-path", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "--port", "7860"]
|
22 |
# CMD ["python", "app.py"]
|
23 |
|
24 |
-
|
25 |
-
CMD ["sh", "-c", "HF_HOME=/tmp/cache", "python", "m3_server.py"]
|
|
|
21 |
# CMD ["TRANSFORMERS_CACHE=./", "infinity_emb", "--model-name-or-path", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "--port", "7860"]
|
22 |
# CMD ["python", "app.py"]
|
23 |
|
24 |
+
CMD ["sh", "start-m3-server.sh"]
|
25 |
+
# CMD ["sh", "-c", "HF_HOME=/tmp/cache", "python", "m3_server.py"]
|
m3_server.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import asyncio
|
2 |
import os
|
3 |
import time
|
|
|
4 |
from concurrent.futures import ThreadPoolExecutor
|
5 |
from typing import List, Tuple, Union
|
6 |
from uuid import uuid4
|
@@ -11,7 +12,8 @@ from FlagEmbedding import BGEM3FlagModel
|
|
11 |
from pydantic import BaseModel
|
12 |
from starlette.status import HTTP_504_GATEWAY_TIMEOUT
|
13 |
|
14 |
-
|
|
|
15 |
|
16 |
batch_size = 2 # gpu batch_size in order of your available vram
|
17 |
max_request = 10 # max request for future improvements on api calls / gpu batches (for now is pretty basic)
|
|
|
1 |
import asyncio
|
2 |
import os
|
3 |
import time
|
4 |
+
from pathlib import Path
|
5 |
from concurrent.futures import ThreadPoolExecutor
|
6 |
from typing import List, Tuple, Union
|
7 |
from uuid import uuid4
|
|
|
12 |
from pydantic import BaseModel
|
13 |
from starlette.status import HTTP_504_GATEWAY_TIMEOUT
|
14 |
|
15 |
+
Path("/tmp/cache").mkdir(exist_ok=True)
|
16 |
+
os.environ["HF_HOME"] = "/tmp/cache" # does not quite work, need
|
17 |
|
18 |
batch_size = 2 # gpu batch_size in order of your available vram
|
19 |
max_request = 10 # max request for future improvements on api calls / gpu batches (for now is pretty basic)
|