Spaces:

alexandrainst
/

offensive-text-detection

Sleeping

App Files Files Community

saattrupdan commited on May 5, 2023

Commit

ed60c2e

1 Parent(s): 0bc6a93

feat: Add app

Browse files

Files changed (4) hide show

.gitignore +1 -0
README.md +5 -5
app.py +81 -0
requirements.txt +78 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv/

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Offensive Text Detection
-emoji: 📈
-colorFrom: purple
-colorTo: green
 sdk: gradio
-sdk_version: 3.28.2
 app_file: app.py
 pinned: false
 license: mit

 ---
+title: Danish Offensive Text Detection
+emoji: 🤬
+colorFrom: yellow
+colorTo: blue
 sdk: gradio
+sdk_version: 3.12.0
 app_file: app.py
 pinned: false
 license: mit

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""Gradio app that showcases Danish offensive text models."""
+import gradio as gr
+from transformers import pipeline
+from shap import Explainer
+import numpy as np
+def main():
+    pipe = pipeline(
+        task="text-classification",
+        model="alexandrainst/da-offensive-detection-small",
+    )
+    examples = [
+        "Din store idiot.",
+        "Jeg er glad for at være her.",
+        "Hvem tror du, du er?",
+        "Har du hæklefejl i kysen?",
+        "Hej med dig, jeg hedder Peter.",
+        "Fuck hvor er det dejligt, det her :)",
+        "🍆",
+        "😊",
+    ]
+    def classification(text) -> tuple[dict[str, float], dict]:
+        output: list[dict] = pipe(text)[0]
+        print(output)
+        explainer = Explainer(pipe)
+        explanation = explainer([text])
+        shap_values = explanation.values[0].sum(axis=1)
+        # Find the SHAP boundary
+        boundary = 0.03
+        if np.abs(shap_values).max() <= boundary:
+            boundary = np.abs(shap_values).max() - 1e-6
+        words: list[str] = explanation.data[0]
+        records = list()
+        char_idx = 0
+        for word, shap_value in zip(words, shap_values):
+            if abs(shap_value) <= boundary:
+                entity = 'O'
+            else:
+                entity = output['label'].lower().replace(' ', '-')
+            if len(word):
+                start = char_idx
+                char_idx += len(word)
+                end = char_idx
+                records.append(dict(
+                    entity=entity,
+                    word=word,
+                    score=abs(shap_value),
+                    start=start,
+                    end=end,
+                ))
+        print(list(zip(words, shap_values)))
+        print(records)
+        return ({output["label"]: output["score"]}, dict(text=text, entities=records))
+    color_map = {"offensive": "red", "not-offensive": "green", 'O': 'white'}
+    demo = gr.Interface(
+        fn=classification,
+        inputs=gr.Textbox(placeholder="Enter sentence here...", value=examples[0]),
+        outputs=[gr.Label(), gr.HighlightedText(color_map=color_map)],
+        examples=examples,
+        title="Danish Offensive Text Detection",
+        description="""
+Detect offensive text in Danish. Write any text in the box below, and the model will predict whether the text is offensive or not:
+_Also, be patient, as this demo is running on a CPU!_""",
+    )
+    demo.launch()
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,78 @@

+aiohttp==3.8.3
+aiosignal==1.3.1
+anyio==3.6.2
+async-timeout==4.0.2
+attrs==22.1.0
+bcrypt==4.0.1
+beautifulsoup4==4.11.1
+certifi==2022.9.24
+cffi==1.15.1
+charset-normalizer==2.1.1
+click==8.1.3
+contourpy==1.0.6
+cryptography==38.0.4
+cycler==0.11.0
+fastapi==0.88.0
+fasttext-wheel==0.9.2
+ffmpy==0.3.0
+filelock==3.8.0
+fonttools==4.38.0
+frozenlist==1.3.3
+fsspec==2022.11.0
+gdown==4.5.4
+gradio==3.12.0
+h11==0.12.0
+httpcore==0.15.0
+httpx==0.23.1
+huggingface-hub==0.11.1
+idna==3.4
+Jinja2==3.1.2
+kiwisolver==1.4.4
+linkify-it-py==1.0.3
+luga==0.2.6
+markdown-it-py==2.1.0
+MarkupSafe==2.1.1
+matplotlib==3.6.2
+mdit-py-plugins==0.3.1
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.0.2
+networkx==3.1
+nptyping==1.4.4
+numpy==1.23.5
+orjson==3.8.2
+packaging==21.3
+pandas==1.5.2
+paramiko==2.12.0
+Pillow==9.3.0
+pybind11==2.10.1
+pycparser==2.21
+pycryptodome==3.16.0
+pydantic==1.10.2
+pydub==0.25.1
+PyNaCl==1.5.0
+pyparsing==3.0.9
+PySocks==1.7.1
+python-dateutil==2.8.2
+python-multipart==0.0.5
+pytz==2022.6
+PyYAML==6.0
+regex==2022.10.31
+requests==2.28.1
+rfc3986==1.5.0
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.3.2.post1
+starlette==0.22.0
+sympy==1.11.1
+tokenizers==0.13.2
+torch==2.0.0
+tqdm==4.64.1
+transformers==4.28.1
+typing_extensions==4.4.0
+typish==1.9.3
+uc-micro-py==1.0.1
+urllib3==1.26.13
+uvicorn==0.20.0
+websockets==10.4
+yarl==1.8.1