import joblib import gradio as gr from datasets import Dataset, DatasetDict, load_dataset from huggingface_hub import login import os token = os.getenv('HF_TOKEN') login(token, add_to_git_credential=True,write_permission=True ) model = joblib.load('arabic_text_classifier.pkl') vectorizer = joblib.load('tfidf_vectorizer.pkl') label_encoder = joblib.load('label_encoder.pkl') available_labels = label_encoder.classes_ def predict_category(text): text_vector = vectorizer.transform([text]) probabilities = model.predict_proba(text_vector)[0] max_prob = max(probabilities) predicted_category = model.predict(text_vector)[0] if max_prob < 0.5: return "Other" predicted_label = label_encoder.inverse_transform([predicted_category])[0] return predicted_label def flag_data(text, prediction): try: dataset = load_dataset("Tevfik34/crowdsourced-text-classification-data", split="train") except: dataset = Dataset.from_dict({"text": [], "prediction": []}) new_data = {"text": [text], "prediction": [prediction]} dataset = dataset.add_item(new_data) dataset.push_to_hub("Tevfik34/crowdsourced-text-classification-data") def classify_and_flag(text): prediction = predict_category(text) flag_data(text, prediction) return prediction interface = gr.Interface(fn=classify_and_flag, inputs=gr.Textbox(lines=5, placeholder= "Enter text in Arabic here...", label="Text" ), outputs=gr.Label(label="Predicted Category"), title="Arabic Text Classifier", description=""" This interface allows you to classify Arabic text into different categories using a machine learning model trained on 160,000 real-world text samples. **Model Overview**: - The model is based on **Logistic Regression**. - It was trained on a large dataset of **160,000 Arabic text entries**, ensuring robustness and accuracy in classifying Arabic text. **How to use**: - Enter any Arabic text in the input box. - The model will predict the category that the text most likely belongs to. - If the model is uncertain, it will classify the text as 'Other'. **Available Labels**: The model can predict the following categories: - {} Try entering some text in Arabic to see how the model works. """.format(", ".join(available_labels)),theme="ParityError/Interstellar") interface.launch()