How to load the model and generate predictions? Download the pytorch_model.bin file and execute the following:

import pandas as pd
import torch
import transformers
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaModel, RobertaTokenizer, BertModel, BertTokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

MAX_LEN = 128
BATCH_SIZE = 20
text_col_name = 'sentence'
category_col = 'label_text'

#Input should be one dataframe having one column with header as 'sentence' : test_df (do reset_index() if needed)
test_df = pd.DataFrame({"sentence":['a general increase in prices and fall in the purchasing value of money.']})

def scoring_data_prep(dataset):
    out = []
    target = []
    mask = []
    
    for i in range(len(dataset)):
        rec = dataset[i]
        out.append(rec['ids'].reshape(-1,MAX_LEN))
        mask.append(rec['mask'].reshape(-1,MAX_LEN))

        out_stack = torch.cat(out, dim = 0)
        mask_stack = torch.cat(mask, dim =0 )
        out_stack = out_stack.to(device, dtype = torch.long)
        mask_stack = mask_stack.to(device, dtype = torch.long)

    return out_stack, mask_stack


class Triage(Dataset):
    """
    This is a subclass of torch packages Dataset class. It processes input to create ids, masks and targets required for model training. 
    """

    def __init__(self, dataframe, tokenizer, max_len, text_col_name):
        self.len = len(dataframe)
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.text_col_name = text_col_name
        

    def __getitem__(self, index):
        title = str(self.data[self.text_col_name][index])
        title = " ".join(title.split())
        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True,
            truncation=True,
        )
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]

        return {
            "ids": torch.tensor(ids, dtype=torch.long),
            "mask": torch.tensor(mask, dtype=torch.long),
            
        }

    def __len__(self):
        return self.len

class BERTClass(torch.nn.Module):
    def __init__(self, num_class):
        super(BERTClass, self).__init__()
        self.num_class = num_class
        self.l1 = BertModel.from_pretrained("ProsusAI/finbert")
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, self.num_class)
        self.history = dict()

    def forward(self, input_ids, attention_mask):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output
        
def do_predict(model, tokenizer, test_df):
  test_set = Triage(test_df, tokenizer, MAX_LEN, text_col_name)
  test_params = {'batch_size' : BATCH_SIZE, 'shuffle': False, 'num_workers':0}
  test_loader = DataLoader(test_set, **test_params)
  out_stack, mask_stack = scoring_data_prep(dataset = test_set)
  n = 0
  combined_output = []
  model.eval()
  with torch.no_grad():
      while n < test_df.shape[0]:
          output = model(out_stack[n:n+BATCH_SIZE,:],mask_stack[n:n+BATCH_SIZE,:])
          n = n + BATCH_SIZE
          combined_output.append(output)
      combined_output = torch.cat(combined_output, dim = 0)
      preds = torch.argsort(combined_output, axis = 1, descending = True)
  preds = preds.to('cpu')
  actual_predictions = [i[0] for i in preds.tolist()]
  return actual_predictions
  
model_read = BERTClass(2)
model_read.to(device)
model_read.load_stat_dict(torch.load('pytorch_model.bin', map_location=device)['model_state_dict'])

tokenizer_read = BertTokenizer.from_pretrained('ProsusAI/finbert')
actual_predictions_read = do_predict(model_read, tokenizer_read, test_df)

test_df['readability'] = ['readable' if i==1 else 'not_reabale' for i in actual_predictions_read]
                                                                                                                                                    
@InProceedings{ghosh-EtAl:2022:FNP,
  author    = {Ghosh, Sohom  and  Sengupta, Shovon  and  Naskar, Sudip  and  Singh, Sunny Kumar},
  title     = {FinRAD: Financial Readability Assessment Dataset - 13,000+ Definitions of Financial Terms for Measuring Readability},
  booktitle      = {Proceedings of the The 4th Financial Narrative Processing Workshop @LREC2022},
  month          = {June},
  year           = {2022},
  address        = {Marseille, France},
  publisher      = {European Language Resources Association},
  pages     = {1--9},
  url       = {http://www.lrec-conf.org/proceedings/lrec2022/workshops/FNP/pdf/2022.fnp-1.1.pdf}
}
@InProceedings{ghosh-2021-finread,
    title = "FinRead: A Transfer Learning Based Tool to Assess Readability of Definitions of Financial Terms",
    author = "Sohom Ghosh, Shovon Sengupta, Sudip Kumar Naskar, Sunny Kumar Singh",
    booktitle = "Proceedings of the 18th International Conference on Natural Language Processing (ICON) : 
 System Demonstrations",
    month = "dec",
    year = "2021",
    publisher = "NLP Association of India (NLPAI)",
    url = "forthcoming",
    intype = {to appear in},
    pre-print = "https://easychair.org/publications/preprint/1wvS"
}
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no library tag.