stackoverflow / app.py
mikachou's picture
add chart with proba
4b67ac0
import gradio as gr
import joblib
import spacy
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.base import BaseEstimator, TransformerMixin
nlp = spacy.load('en_core_web_sm')
tfidf = joblib.load('./tfidf.joblib')
model = joblib.load('./model.joblib')
tags_binarizer = joblib.load('./tags.joblib')
def lemmatize(s: str) -> iter:
# tokenize
doc = nlp(s)
# remove punct and stopwords
tokens = filter(lambda token: not token.is_space and not token.is_punct and not token.is_stop and not token.is_digit, doc)
# lemmatize
return map(lambda token: token.lemma_.lower(), tokens)
def plot(tags, proba):
plt.style.use('dark_background')
plt.rcParams.update({'font.size': 16})
fig, ax = plt.subplots(figsize=(12,9))
ax.barh(tags, proba, align='center', color='darkred')
ax.set_yticks(tags, labels=tags)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Score')
ax.set_title('Score/Tag')
for i, v in enumerate(proba):
ax.text(v - 0.065, i + 0.05, str(round(v, 2)))
plt.xlim(0, 1)
plt.show()
def predict_words(X):
y_bin = model.predict(X)
y_tags = " ".join(tags_binarizer.inverse_transform(y_bin)[0])
return y_tags
def proba_chart(X):
y_proba = model.predict_proba(X)[0]
tags = list(dict(sorted(tags_binarizer.ts.count.items())).keys())
# combine
data = list(zip(tags, y_proba))
# sort
data = sorted(data, key=lambda tag_value: tag_value[1], reverse=True)
# keep values >= min_score
data = list(filter(lambda tag_value: tag_value[1] >= 0.1, data))
# we have our two dimensions for chart
tags, proba = zip(*data)
# build chart
plt.style.use('dark_background')
plt.rcParams.update({'font.size': 16})
fig, ax = plt.subplots(figsize=(12,9))
ax.barh(tags, proba, align='center', color='darkred')
ax.set_yticks(tags, labels=tags)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Score')
ax.set_title('Score/Tag')
for i, v in enumerate(proba):
ax.text(v - 0.065, i + 0.05, str(round(v, 2)))
plt.xlim(0, 1)
return fig
def predict(title: str , post: str):
text = title + " " + post
lemmes = np.array([' '.join(list(lemmatize(text)))])
X = tfidf.transform(lemmes)
# predicted words
words = predict_words(X)
# proba chart
chart = proba_chart(X)
return words, chart
demo = gr.Interface(
fn=predict,
inputs=[
gr.Textbox(label="Title", lines=1, placeholder="Title..."),
gr.Textbox(label="Post", lines=20, placeholder="Post...")],
outputs=[gr.Textbox(label="Tags"), gr.Plot()])
demo.launch()