import logging import warnings import wikipedia import streamlit as st from typing import List from scanner_utils import * from xgboost import XGBClassifier from streamlit_searchbox import st_searchbox from transformers import logging as hflogging logging.disable(logging.WARNING) hflogging.set_verbosity_warning() warnings.simplefilter(action='ignore', category=UserWarning) warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=DeprecationWarning) st.set_page_config(layout="centered", page_title="Egyptian Wikipedia Scanner", page_icon="🇪🇬") wikipedia.set_lang("arz") with open('.streamlit/style.css') as f: st.markdown(f'', unsafe_allow_html=True) st.markdown("""

Egyptian Arabic Wikipedia Scanner

Automatic Detection of Template-translated Articles in the Egyptian Wikipedia
""", unsafe_allow_html=True) st.markdown("", unsafe_allow_html=True) def search_wikipedia(searchterm: str) -> List[any]: return wikipedia.search(searchterm) if searchterm else [] @st.cache_resource def load_xgb_model(model): loaded_xgb_classifier = XGBClassifier() loaded_xgb_classifier.load_model(model) return loaded_xgb_classifier selected_title = st_searchbox(search_wikipedia, label="Search for an article in Egyptian Arabic Wikipedia:", placeholder="Search for an article", rerun_on_update=True, clear_on_submit=False, key="wiki_searchbox") if selected_title: X, article, dataframe, selected_title = prepare_features(selected_title) st.write(f':black_small_square: Collected Metadata of **{selected_title}**') st.dataframe(dataframe, hide_index=True , use_container_width=True) loaded_xgb_classifier = load_xgb_model("XGBoost.model") id2label = {0:'Human-generated Article', 1:'Template-translated Article'} result = id2label[int(loaded_xgb_classifier.predict(X))] if result =='Human-generated Article': st.write(f":black_small_square: Automatic Classification of **{selected_title}**") st.success(result, icon="✅") else: st.write(f":black_small_square: Automatic Classification of **{selected_title}**") st.error(result, icon="🚨") st.write(f":black_small_square: Full Summary of **{selected_title}**") with st.expander(f'**{selected_title}**', expanded=True): st.markdown('', unsafe_allow_html=True) try: article_text = wikipedia.summary(selected_title) except wikipedia.exceptions.DisambiguationError as e: article_text = wikipedia.summary(e.options[0]) st.write(article_text) st.write(f'> :globe_with_meridians: Read Full Text of **{selected_title}**:
{article.url}', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) footer=""" """ st.markdown(footer, unsafe_allow_html=True)