saattrupdan commited on
Commit
ed60c2e
·
1 Parent(s): 0bc6a93

feat: Add app

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. README.md +5 -5
  3. app.py +81 -0
  4. requirements.txt +78 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv/
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Offensive Text Detection
3
- emoji: 📈
4
- colorFrom: purple
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 3.28.2
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
1
  ---
2
+ title: Danish Offensive Text Detection
3
+ emoji: 🤬
4
+ colorFrom: yellow
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 3.12.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio app that showcases Danish offensive text models."""
2
+
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+ from shap import Explainer
6
+ import numpy as np
7
+
8
+
9
+ def main():
10
+ pipe = pipeline(
11
+ task="text-classification",
12
+ model="alexandrainst/da-offensive-detection-small",
13
+ )
14
+
15
+ examples = [
16
+ "Din store idiot.",
17
+ "Jeg er glad for at være her.",
18
+ "Hvem tror du, du er?",
19
+ "Har du hæklefejl i kysen?",
20
+ "Hej med dig, jeg hedder Peter.",
21
+ "Fuck hvor er det dejligt, det her :)",
22
+ "🍆",
23
+ "😊",
24
+ ]
25
+
26
+ def classification(text) -> tuple[dict[str, float], dict]:
27
+ output: list[dict] = pipe(text)[0]
28
+ print(output)
29
+
30
+ explainer = Explainer(pipe)
31
+ explanation = explainer([text])
32
+ shap_values = explanation.values[0].sum(axis=1)
33
+
34
+ # Find the SHAP boundary
35
+ boundary = 0.03
36
+ if np.abs(shap_values).max() <= boundary:
37
+ boundary = np.abs(shap_values).max() - 1e-6
38
+
39
+ words: list[str] = explanation.data[0]
40
+ records = list()
41
+ char_idx = 0
42
+ for word, shap_value in zip(words, shap_values):
43
+
44
+ if abs(shap_value) <= boundary:
45
+ entity = 'O'
46
+ else:
47
+ entity = output['label'].lower().replace(' ', '-')
48
+
49
+ if len(word):
50
+ start = char_idx
51
+ char_idx += len(word)
52
+ end = char_idx
53
+ records.append(dict(
54
+ entity=entity,
55
+ word=word,
56
+ score=abs(shap_value),
57
+ start=start,
58
+ end=end,
59
+ ))
60
+ print(list(zip(words, shap_values)))
61
+ print(records)
62
+
63
+ return ({output["label"]: output["score"]}, dict(text=text, entities=records))
64
+
65
+ color_map = {"offensive": "red", "not-offensive": "green", 'O': 'white'}
66
+ demo = gr.Interface(
67
+ fn=classification,
68
+ inputs=gr.Textbox(placeholder="Enter sentence here...", value=examples[0]),
69
+ outputs=[gr.Label(), gr.HighlightedText(color_map=color_map)],
70
+ examples=examples,
71
+ title="Danish Offensive Text Detection",
72
+ description="""
73
+ Detect offensive text in Danish. Write any text in the box below, and the model will predict whether the text is offensive or not:
74
+
75
+ _Also, be patient, as this demo is running on a CPU!_""",
76
+ )
77
+
78
+ demo.launch()
79
+
80
+ if __name__ == "__main__":
81
+ main()
requirements.txt ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.3
2
+ aiosignal==1.3.1
3
+ anyio==3.6.2
4
+ async-timeout==4.0.2
5
+ attrs==22.1.0
6
+ bcrypt==4.0.1
7
+ beautifulsoup4==4.11.1
8
+ certifi==2022.9.24
9
+ cffi==1.15.1
10
+ charset-normalizer==2.1.1
11
+ click==8.1.3
12
+ contourpy==1.0.6
13
+ cryptography==38.0.4
14
+ cycler==0.11.0
15
+ fastapi==0.88.0
16
+ fasttext-wheel==0.9.2
17
+ ffmpy==0.3.0
18
+ filelock==3.8.0
19
+ fonttools==4.38.0
20
+ frozenlist==1.3.3
21
+ fsspec==2022.11.0
22
+ gdown==4.5.4
23
+ gradio==3.12.0
24
+ h11==0.12.0
25
+ httpcore==0.15.0
26
+ httpx==0.23.1
27
+ huggingface-hub==0.11.1
28
+ idna==3.4
29
+ Jinja2==3.1.2
30
+ kiwisolver==1.4.4
31
+ linkify-it-py==1.0.3
32
+ luga==0.2.6
33
+ markdown-it-py==2.1.0
34
+ MarkupSafe==2.1.1
35
+ matplotlib==3.6.2
36
+ mdit-py-plugins==0.3.1
37
+ mdurl==0.1.2
38
+ mpmath==1.3.0
39
+ multidict==6.0.2
40
+ networkx==3.1
41
+ nptyping==1.4.4
42
+ numpy==1.23.5
43
+ orjson==3.8.2
44
+ packaging==21.3
45
+ pandas==1.5.2
46
+ paramiko==2.12.0
47
+ Pillow==9.3.0
48
+ pybind11==2.10.1
49
+ pycparser==2.21
50
+ pycryptodome==3.16.0
51
+ pydantic==1.10.2
52
+ pydub==0.25.1
53
+ PyNaCl==1.5.0
54
+ pyparsing==3.0.9
55
+ PySocks==1.7.1
56
+ python-dateutil==2.8.2
57
+ python-multipart==0.0.5
58
+ pytz==2022.6
59
+ PyYAML==6.0
60
+ regex==2022.10.31
61
+ requests==2.28.1
62
+ rfc3986==1.5.0
63
+ six==1.16.0
64
+ sniffio==1.3.0
65
+ soupsieve==2.3.2.post1
66
+ starlette==0.22.0
67
+ sympy==1.11.1
68
+ tokenizers==0.13.2
69
+ torch==2.0.0
70
+ tqdm==4.64.1
71
+ transformers==4.28.1
72
+ typing_extensions==4.4.0
73
+ typish==1.9.3
74
+ uc-micro-py==1.0.1
75
+ urllib3==1.26.13
76
+ uvicorn==0.20.0
77
+ websockets==10.4
78
+ yarl==1.8.1