from pandas.io.formats.format import return_docstring import streamlit as st import pandas as pd from transformers import AutoTokenizer,AutoModelForMaskedLM from transformers import pipeline import os import json @st.cache(show_spinner=False,persist=True) def load_model(masked_text,model_name): model = AutoModelForMaskedLM.from_pretrained(model_name, from_flax=True) tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.save_pretrained('exported_pytorch_model') model.save_pretrained('exported_pytorch_model') nlp = pipeline('fill-mask', model="exported_pytorch_model") result_sentence = nlp(masked_text) return result_sentence def main(): st.title("RoBERTa-Hindi") st.markdown( "This demo uses pretrained RoBERTa variants for Mask Language Modelling (MLM)" ) models = st.multiselect( "Choose models", ['flax-community/roberta-pretraining-hindi','mrm8488/HindiBERTa','ai4bharat/indic-bert',\ 'neuralspace-reverie/indic-transformers-hi-bert', 'surajp/RoBERTa-hindi-guj-san'] ) target_text_path = './mlm_custom/mlm_targeted_text.csv' target_text_df = pd.read_csv(target_text_path) texts = target_text_df['text'] st.sidebar.title("Hindi MLM") masked_text = st.sidebar.selectbox('Select any of the following text', texts) st.write('You selected:', masked_text) selected_model = models[0] if st.button('Fill the Mask!'): filled_sentence = load_model(masked_text,selected_model) st.write(filled_sentence) if __name__ == "__main__": main()