Spaces:

BK-KI
/

bk-departements

Running on CPU Upgrade

App Files Files Community

BK-AI commited on Sep 13, 2023

Commit

3a52501

•

1 Parent(s): 226545e

initial commit building on PoC

Browse files

Files changed (9) hide show

app.py +86 -0
requirements.txt +7 -0
saved_model/config.json +3 -0
saved_model/pytorch_model.bin +3 -0
saved_model/special_tokens_map.json +3 -0
saved_model/tokenizer.json +3 -0
saved_model/tokenizer_config.json +3 -0
saved_model/training_args.bin +3 -0
saved_model/vocab.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+#!/usr/bin/env python
+# coding: utf-8
+import gradio as gr
+import numpy as np
+import requests
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, TextClassificationPipeline, pipeline
+from langdetect import detect
+from matplotlib import pyplot as plt
+import imageio
+# Load the model
+model = AutoModelForSequenceClassification.from_pretrained("saved_model")
+tokenizer = AutoTokenizer.from_pretrained("saved_model")
+pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer)
+# Function called by the UI
+def attribution(text):
+    # Clean the plot
+    plt.clf()
+    # Detect the language
+    language = detect(text)
+    # Translate the input in german if necessary
+    if language == 'fr':
+        translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-de")
+        translatedText = translator(text[0:1000])
+        text = translatedText[0]["translation_text"]
+    elif language != 'de':
+        return "The language is not recognized, it must be either in German or in French.", None
+    # Set the bars of the bar chart
+    bars = ""
+    if language == 'fr':
+        bars = ("DDPS", "DFI", "AS-MPC", "DFJP", "DEFR", "DETEC", "DFAE", "Parl", "ChF", "DFF", "AF", "TF")
+    else:
+        bars = ("VBS", "EDI", "AB-BA", "EJPD", "WBF", "UVEK", "EDA", "Parl", "BK", "EFD", "BV", "BGer")
+    # Make the prediction with the 1000 first characters
+    results = pipe(text[0:1000], return_all_scores=True)
+    rates = [row["score"] for row in results[0]]
+    # Bar chart
+    y_pos = np.arange(len(bars))
+    plt.barh(y_pos, rates)
+    plt.yticks(y_pos, bars)
+    # Set the output text
+    name = ""
+    maxRate = np.max(rates)
+    maxIndex = np.argmax(rates)
+    # ML model not sure if highest probability < 60%
+    if maxRate < 0.6:
+        # de / fr
+        if language == 'de':
+            name = "Das ML-Modell ist nicht sicher. Das Departement könnte sein : \n\n"
+        else:
+            name = "Le modèle ML n'est pas sûr. Le département pourrait être : \n\n"
+        i = 0
+        # Show each department that has a probability > 10%
+        while i == 0:
+            if rates[maxIndex] >= 0.1:
+                name = name + "\t" + str(rates[maxIndex])[2:4] + "%" + "\t\t\t\t\t" + bars[maxIndex] + "\n"
+                rates[maxIndex] = 0
+                maxIndex = np.argmax(rates)
+            else:
+                i = 1
+    # ML model pretty sure, show only one department
+    else:
+        name =  str(maxRate)[2:4] + "%" + "\t\t\t\t\t\t" + bars[maxIndex]
+    # Save the bar chart as png and load it (enables better display)
+    plt.savefig('rates.png')
+    im = imageio.imread('rates.png')
+    return name, im
+# display the UI
+interface = gr.Interface(fn=attribution,
+                         inputs=[gr.inputs.Textbox(lines=20, placeholder="Geben Sie bitte den Titel und den Sumbmitted Text des Vorstoss ein.\nVeuillez entrer le titre et le Submitted Text de la requête.")],
+                          outputs=['text', 'image'])
+interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+numpy
+transformers
+langdetect
+matplotlib
+imageio
+torch
+sentencepiece

saved_model/config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1dd5122dedc8fdf6eb1ec32b25f3246f8c3c64432abfd4d9bad4b626f378fc4
+size 1255

saved_model/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:766bc088608d7eb221c530029f2a704887b4072dab8b79448ec89729aef0bd87
+size 436430445

saved_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:303df45a03609e4ead04bc3dc1536d0ab19b5358db685b6f3da123d05ec200e3
+size 112

saved_model/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d6f6affc6b91020cabef56fe9289907e34a89e7f3463a93250c0d94cc61000d
+size 726371

saved_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ed472c8edcb18869d09d7bc852465911b105dd301fda14b4283b01577a5ebd7
+size 327

saved_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d35049b1861176f257b17db726fad1ace03c2b81216d26e34180592cfe717fa2
+size 3183

saved_model/vocab.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:982f8396ec746db0ed414dcc4789398ab6b365663cada50f776afb905dacbb61
+size 254729