Commit
·
ed60c2e
1
Parent(s):
0bc6a93
feat: Add app
Browse files- .gitignore +1 -0
- README.md +5 -5
- app.py +81 -0
- requirements.txt +78 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.venv/
|
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
-
title: Offensive Text Detection
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
1 |
---
|
2 |
+
title: Danish Offensive Text Detection
|
3 |
+
emoji: 🤬
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.12.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
app.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Gradio app that showcases Danish offensive text models."""
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
from transformers import pipeline
|
5 |
+
from shap import Explainer
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
|
9 |
+
def main():
|
10 |
+
pipe = pipeline(
|
11 |
+
task="text-classification",
|
12 |
+
model="alexandrainst/da-offensive-detection-small",
|
13 |
+
)
|
14 |
+
|
15 |
+
examples = [
|
16 |
+
"Din store idiot.",
|
17 |
+
"Jeg er glad for at være her.",
|
18 |
+
"Hvem tror du, du er?",
|
19 |
+
"Har du hæklefejl i kysen?",
|
20 |
+
"Hej med dig, jeg hedder Peter.",
|
21 |
+
"Fuck hvor er det dejligt, det her :)",
|
22 |
+
"🍆",
|
23 |
+
"😊",
|
24 |
+
]
|
25 |
+
|
26 |
+
def classification(text) -> tuple[dict[str, float], dict]:
|
27 |
+
output: list[dict] = pipe(text)[0]
|
28 |
+
print(output)
|
29 |
+
|
30 |
+
explainer = Explainer(pipe)
|
31 |
+
explanation = explainer([text])
|
32 |
+
shap_values = explanation.values[0].sum(axis=1)
|
33 |
+
|
34 |
+
# Find the SHAP boundary
|
35 |
+
boundary = 0.03
|
36 |
+
if np.abs(shap_values).max() <= boundary:
|
37 |
+
boundary = np.abs(shap_values).max() - 1e-6
|
38 |
+
|
39 |
+
words: list[str] = explanation.data[0]
|
40 |
+
records = list()
|
41 |
+
char_idx = 0
|
42 |
+
for word, shap_value in zip(words, shap_values):
|
43 |
+
|
44 |
+
if abs(shap_value) <= boundary:
|
45 |
+
entity = 'O'
|
46 |
+
else:
|
47 |
+
entity = output['label'].lower().replace(' ', '-')
|
48 |
+
|
49 |
+
if len(word):
|
50 |
+
start = char_idx
|
51 |
+
char_idx += len(word)
|
52 |
+
end = char_idx
|
53 |
+
records.append(dict(
|
54 |
+
entity=entity,
|
55 |
+
word=word,
|
56 |
+
score=abs(shap_value),
|
57 |
+
start=start,
|
58 |
+
end=end,
|
59 |
+
))
|
60 |
+
print(list(zip(words, shap_values)))
|
61 |
+
print(records)
|
62 |
+
|
63 |
+
return ({output["label"]: output["score"]}, dict(text=text, entities=records))
|
64 |
+
|
65 |
+
color_map = {"offensive": "red", "not-offensive": "green", 'O': 'white'}
|
66 |
+
demo = gr.Interface(
|
67 |
+
fn=classification,
|
68 |
+
inputs=gr.Textbox(placeholder="Enter sentence here...", value=examples[0]),
|
69 |
+
outputs=[gr.Label(), gr.HighlightedText(color_map=color_map)],
|
70 |
+
examples=examples,
|
71 |
+
title="Danish Offensive Text Detection",
|
72 |
+
description="""
|
73 |
+
Detect offensive text in Danish. Write any text in the box below, and the model will predict whether the text is offensive or not:
|
74 |
+
|
75 |
+
_Also, be patient, as this demo is running on a CPU!_""",
|
76 |
+
)
|
77 |
+
|
78 |
+
demo.launch()
|
79 |
+
|
80 |
+
if __name__ == "__main__":
|
81 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.8.3
|
2 |
+
aiosignal==1.3.1
|
3 |
+
anyio==3.6.2
|
4 |
+
async-timeout==4.0.2
|
5 |
+
attrs==22.1.0
|
6 |
+
bcrypt==4.0.1
|
7 |
+
beautifulsoup4==4.11.1
|
8 |
+
certifi==2022.9.24
|
9 |
+
cffi==1.15.1
|
10 |
+
charset-normalizer==2.1.1
|
11 |
+
click==8.1.3
|
12 |
+
contourpy==1.0.6
|
13 |
+
cryptography==38.0.4
|
14 |
+
cycler==0.11.0
|
15 |
+
fastapi==0.88.0
|
16 |
+
fasttext-wheel==0.9.2
|
17 |
+
ffmpy==0.3.0
|
18 |
+
filelock==3.8.0
|
19 |
+
fonttools==4.38.0
|
20 |
+
frozenlist==1.3.3
|
21 |
+
fsspec==2022.11.0
|
22 |
+
gdown==4.5.4
|
23 |
+
gradio==3.12.0
|
24 |
+
h11==0.12.0
|
25 |
+
httpcore==0.15.0
|
26 |
+
httpx==0.23.1
|
27 |
+
huggingface-hub==0.11.1
|
28 |
+
idna==3.4
|
29 |
+
Jinja2==3.1.2
|
30 |
+
kiwisolver==1.4.4
|
31 |
+
linkify-it-py==1.0.3
|
32 |
+
luga==0.2.6
|
33 |
+
markdown-it-py==2.1.0
|
34 |
+
MarkupSafe==2.1.1
|
35 |
+
matplotlib==3.6.2
|
36 |
+
mdit-py-plugins==0.3.1
|
37 |
+
mdurl==0.1.2
|
38 |
+
mpmath==1.3.0
|
39 |
+
multidict==6.0.2
|
40 |
+
networkx==3.1
|
41 |
+
nptyping==1.4.4
|
42 |
+
numpy==1.23.5
|
43 |
+
orjson==3.8.2
|
44 |
+
packaging==21.3
|
45 |
+
pandas==1.5.2
|
46 |
+
paramiko==2.12.0
|
47 |
+
Pillow==9.3.0
|
48 |
+
pybind11==2.10.1
|
49 |
+
pycparser==2.21
|
50 |
+
pycryptodome==3.16.0
|
51 |
+
pydantic==1.10.2
|
52 |
+
pydub==0.25.1
|
53 |
+
PyNaCl==1.5.0
|
54 |
+
pyparsing==3.0.9
|
55 |
+
PySocks==1.7.1
|
56 |
+
python-dateutil==2.8.2
|
57 |
+
python-multipart==0.0.5
|
58 |
+
pytz==2022.6
|
59 |
+
PyYAML==6.0
|
60 |
+
regex==2022.10.31
|
61 |
+
requests==2.28.1
|
62 |
+
rfc3986==1.5.0
|
63 |
+
six==1.16.0
|
64 |
+
sniffio==1.3.0
|
65 |
+
soupsieve==2.3.2.post1
|
66 |
+
starlette==0.22.0
|
67 |
+
sympy==1.11.1
|
68 |
+
tokenizers==0.13.2
|
69 |
+
torch==2.0.0
|
70 |
+
tqdm==4.64.1
|
71 |
+
transformers==4.28.1
|
72 |
+
typing_extensions==4.4.0
|
73 |
+
typish==1.9.3
|
74 |
+
uc-micro-py==1.0.1
|
75 |
+
urllib3==1.26.13
|
76 |
+
uvicorn==0.20.0
|
77 |
+
websockets==10.4
|
78 |
+
yarl==1.8.1
|