File size: 1,612 Bytes
e4d2981 58f56f4 e4d2981 c00feaf 0a60e03 e4d2981 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import joblib
import gradio as gr
from datasets import Dataset, DatasetDict, load_dataset
from huggingface_hub import login
token = "HF_TOKEN"
login(token, add_to_git_credential=True,write_permission=True )
model = joblib.load('arabic_text_classifier.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')
label_encoder = joblib.load('label_encoder.pkl')
def predict_category(text):
text_vector = vectorizer.transform([text])
probabilities = model.predict_proba(text_vector)[0]
max_prob = max(probabilities)
predicted_category = model.predict(text_vector)[0]
if max_prob < 0.5:
return "Other"
predicted_label = label_encoder.inverse_transform([predicted_category])[0]
return predicted_label
def flag_data(text, prediction):
try:
dataset = load_dataset("Tevfik34/crowdsourced-text-classification-data", split="train")
except:
dataset = Dataset.from_dict({"text": [], "prediction": []})
new_data = {"text": [text], "prediction": [prediction]}
dataset = dataset.add_item(new_data)
dataset.push_to_hub("Tevfik34/crowdsourced-text-classification-data")
def classify_and_flag(text):
prediction = predict_category(text)
flag_data(text, prediction)
return prediction
interface = gr.Interface(fn=classify_and_flag, inputs=gr.Textbox(lines=5, placeholder= "Enter text in Arabic here...", label="Text" ), outputs=gr.Label(label="text"),
title="Arabic Text Classifier", description="Classify Arabic text into categories bu using Logistic Regression")
interface.launch()
|