from pandas.io.formats.format import return_docstring import streamlit as st import pandas as pd from transformers import AutoTokenizer,AutoModelForMaskedLM from transformers import pipeline import os import json import random @st.cache(show_spinner=False,persist=True) def load_model(masked_text,model_name): model = AutoModelForMaskedLM.from_pretrained(model_name, from_flax=True) tokenizer = AutoTokenizer.from_pretrained(model_name) nlp = pipeline('fill-mask', model=model, tokenizer=tokenizer) MASK_TOKEN = tokenizer.mask_token masked_text = masked_text.replace("",MASK_TOKEN) result_sentence = nlp(masked_text) return result_sentence[0]['sequence'] def main(): st.title("RoBERTa Hindi") st.markdown( "This demo uses pretrained RoBERTa variants for Mask Language Modeling (MLM)" ) models = st.multiselect( "Choose models", ['flax-community/roberta-hindi','mrm8488/HindiBERTa','ai4bharat/indic-bert',\ 'neuralspace-reverie/indic-transformers-hi-bert', 'surajp/RoBERTa-hindi-guj-san'], ["flax-community/roberta-hindi"] ) target_text_path = './mlm_custom/mlm_targeted_text.csv' target_text_df = pd.read_csv(target_text_path) texts = target_text_df['text'] st.sidebar.title("Hindi MLM") pick_random = st.sidebar.checkbox("Pick any random text") #st.write('You selected:', masked_text) results_df = pd.DataFrame(columns = ['Model Name','Masked Text','Filled Masked Text']) model_names = [] masked_texts = [] filled_masked_texts = [] if pick_random: random_text = texts[random.randint(0,texts.shape[0]-1)] masked_text = st.text_area("Please type a masked sentence to fill",random_text) else: select_text = st.sidebar.selectbox('Select any of the following text',\ texts) masked_text = st.text_area("Please type a masked sentence to fill",select_text) if st.button('Fill the Mask!'): with st.spinner("Filling the Mask..."): for selected_model in models: filled_sentence = load_model(masked_text,selected_model) model_names.append(selected_model) masked_texts.append(masked_text) filled_masked_texts.append(filled_sentence) results_df['Model Name'] = model_names results_df['Masked Text'] = masked_texts results_df['Filled Masked Text'] = filled_masked_texts st.table(results_df) if __name__ == "__main__": main()