Qifan Zhang
commited on
Commit
·
e691ea0
1
Parent(s):
8f29b1d
fear: add log
Browse files- app.py +11 -0
- requirements.txt +2 -0
- utils/models.py +4 -4
app.py
CHANGED
@@ -7,6 +7,7 @@ import pandas as pd
|
|
7 |
|
8 |
from utils import pipeline
|
9 |
from utils.models import list_models
|
|
|
10 |
|
11 |
|
12 |
def read_data(filepath: str) -> Optional[pd.DataFrame]:
|
@@ -27,6 +28,7 @@ def process(
|
|
27 |
file=None,
|
28 |
) -> (None, pd.DataFrame, str):
|
29 |
try:
|
|
|
30 |
# load file
|
31 |
if file:
|
32 |
df = read_data(file.name)
|
@@ -51,6 +53,15 @@ def process(
|
|
51 |
return None, df.iloc[:10], path
|
52 |
|
53 |
except:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
return {'Info': 'Something wrong', 'Error': traceback.format_exc()}, None, None
|
55 |
|
56 |
|
|
|
7 |
|
8 |
from utils import pipeline
|
9 |
from utils.models import list_models
|
10 |
+
from loguru import logger
|
11 |
|
12 |
|
13 |
def read_data(filepath: str) -> Optional[pd.DataFrame]:
|
|
|
28 |
file=None,
|
29 |
) -> (None, pd.DataFrame, str):
|
30 |
try:
|
31 |
+
logger.info(f'Processing {task_name} with {model_name} and {pooling}')
|
32 |
# load file
|
33 |
if file:
|
34 |
df = read_data(file.name)
|
|
|
53 |
return None, df.iloc[:10], path
|
54 |
|
55 |
except:
|
56 |
+
error = traceback.format_exc()
|
57 |
+
logger.warning({
|
58 |
+
'error': error,
|
59 |
+
'task_name': task_name,
|
60 |
+
'model_name': model_name,
|
61 |
+
'pooling': pooling,
|
62 |
+
'text': text,
|
63 |
+
'file': file,
|
64 |
+
})
|
65 |
return {'Info': 'Something wrong', 'Error': traceback.format_exc()}, None, None
|
66 |
|
67 |
|
requirements.txt
CHANGED
@@ -8,3 +8,5 @@ sentence-transformers
|
|
8 |
openpyxl
|
9 |
tabulate
|
10 |
gradio
|
|
|
|
|
|
8 |
openpyxl
|
9 |
tabulate
|
10 |
gradio
|
11 |
+
loguru
|
12 |
+
|
utils/models.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from functools import lru_cache
|
2 |
|
3 |
import torch
|
|
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
from transformers import AutoTokenizer, AutoModel
|
6 |
|
@@ -19,10 +20,8 @@ list_models = [
|
|
19 |
|
20 |
class SBert:
|
21 |
def __init__(self, path):
|
22 |
-
print(f'Loading model from {path} ...')
|
23 |
self.model = SentenceTransformer(path, device=DEVICE)
|
24 |
-
|
25 |
-
# pprint(self.model.__dict__)
|
26 |
|
27 |
@lru_cache(maxsize=10000)
|
28 |
def __call__(self, x) -> torch.Tensor:
|
@@ -34,8 +33,9 @@ class ModelWithPooling:
|
|
34 |
def __init__(self, path):
|
35 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
36 |
self.model = AutoModel.from_pretrained(path)
|
|
|
37 |
|
38 |
-
@lru_cache(maxsize=
|
39 |
@torch.no_grad()
|
40 |
def __call__(self, text: str, pooling='mean'):
|
41 |
inputs = self.tokenizer(text, padding=True, truncation=True, return_tensors="pt")
|
|
|
1 |
from functools import lru_cache
|
2 |
|
3 |
import torch
|
4 |
+
from loguru import logger
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
from transformers import AutoTokenizer, AutoModel
|
7 |
|
|
|
20 |
|
21 |
class SBert:
|
22 |
def __init__(self, path):
|
|
|
23 |
self.model = SentenceTransformer(path, device=DEVICE)
|
24 |
+
logger.info(f'Load {self.__class__} from {path} ...')
|
|
|
25 |
|
26 |
@lru_cache(maxsize=10000)
|
27 |
def __call__(self, x) -> torch.Tensor:
|
|
|
33 |
def __init__(self, path):
|
34 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
35 |
self.model = AutoModel.from_pretrained(path)
|
36 |
+
logger.info(f'Load {self.__class__} from {path} ...')
|
37 |
|
38 |
+
@lru_cache(maxsize=100)
|
39 |
@torch.no_grad()
|
40 |
def __call__(self, text: str, pooling='mean'):
|
41 |
inputs = self.tokenizer(text, padding=True, truncation=True, return_tensors="pt")
|