from transformers import AutoModelForSequenceClassification from transformers import AutoTokenizer, AutoConfig from clean_data import cleaned_complaints import numpy as np from scipy.special import softmax import gradio as gr from transformers import PegasusForConditionalGeneration # Preprocess text (username and link placeholders) def preprocess(text): new_text = [] for t in text.split(" "): t = '@user' if t.startswith('@') and len(t) > 1 else t t = 'http' if t.startswith('http') else t new_text.append(t) return " ".join(new_text) # load model MODEL = f"ThirdEyeData/Complaints_Roberta" #model = PegasusForConditionalGeneration.from_pretrained(MODEL) model = AutoModelForSequenceClassification.from_pretrained(MODEL) #model.save_pretrained(MODEL) tokenizer = AutoTokenizer.from_pretrained(MODEL) config = AutoConfig.from_pretrained(MODEL) # create classifier function def classify_compliant(text): text = cleaned_complaints(text) if len(text)<3: return "Cannot Categorize the Complaint" else: text = preprocess(text) encoded_input = tokenizer(text, return_tensors='pt') output = model(**encoded_input) scores = output[0][0].detach().numpy() scores = softmax(scores) # Print labels and scores probs = {} ranking = np.argsort(scores) ranking = ranking[::-1] l = config.id2label[ranking[0]] #s = scores[ranking[i]] #probs[l] = np.round(float(s), 4) return l #build the Gradio app #Instructuction = "Write an imaginary review about a product or service you might be interested in." title="Customer Complaints Categorization" description = """ This application uses fine-tuned Roberta to perform Customer Complaints Categorization. Roberta is a popular pre-trained language model that can be used for a variety of natural language processing tasks. This text classification model helps the company to categorize incoming support requests submitted by users and determine the appropriate course of action. The application can provide an opportunity for the service provider to resolve the customer’s problems on time and therefore, reduce dissatisfaction levels. Write a complaint on an insurance product or service and see how the Machine Learning model is able to Categorization your Complaint. Below is the type in which the complaints are segmented: 1. Debt Collection 2. False Claim or Statement 3. Legal Issue 4. Improper contact or sharing of info 5. Follow Up Issue """ article = """ - Click submit button to test Consumer Complaint Segmentation - Click the clear button to refresh the text - This application has a linked model https://huggingface.co/ThirdEyeData/Complaints_Roberta """ demo = gr.Interface(classify_compliant, inputs=gr.Textbox(lines =10,label = "Type your Complaint of our Product here or for a quick demo click on the examples provided below and output will automatically be populated in the output box ", max_lines = 20), outputs = gr.Textbox(lines =5,label = "Complaint Category"), title = title, description = description, #Instruction = Instructuction, article = article, #allow_flagging = "never", live = False, cache_example = False, examples=[["""The day before my Salliemae student loan payment was due I contacted a rep to discuss the impact on my account of making my payment at the end of the month rather than the middle for just that one month. The rep indicated it would be no problem, but that I still may get a call each day from Salliemae until I made my payment. I understood, requested my account be notated accordingly, and hung up. For two weeks I endured numerous calls per day ; I lost count at six calls one day, which was the norm for the number of calls Salliemae made in an effort to collect a debt that had a due date that had been arranged and had not come up yet. """], ["""The representative told me the total amount due was {$2100.00} and that I can settle for half of that amount. Unfortunately, I was unable to accept the settlement but began to question the amount because my last statement was {$1800.00} and there was nothing written in the contract for additional interest charges should my account go into collection. I told the representative that I will pay the amount actually owed and I want to make a payment arrangement. She told me I can't just do what I want, If I want to pay the original amount due, it has to be paid in full. I told her that that is not fair debt collection practice and that I am only contractually obligated to the {$1800.00} and we can set up an arrangement from that. """] , ["""This debt is beyond the Maryland Statute of Limitations. It is illegal for a debt collector to collect on an expired debt. They have taken illegal action by seizing my Maryland State Refund when the debt had already expired and beyond the Statute of Limitation which is 3 years in the state of Maryland"""], ["""The company has been calling my employer in an attempt to collect a debt. When I spoke with them and informed them that this was not an appropriate number to call. I asked what company they were calling from and a phone number so he told me the company name, but the man on the phone would not give me his name or a phone number. I had mailed a letter requesting verification a few weeks ago and hadn't received anything back. In the letter I specifically requested that all communication be done through mail."""], [""" I do n't think I chose the correct issue above, however I think it is closest to my issue. I have a record on my credit report that I have disputed through both the company and the credit bureaus. The dispute is marked as being disputed by me on my report, but it was not removed despite the creditor not sending me verification of this debt. I do not even know what this debt is for.I have tried contacting the collection agency by mail to obtain verification with no response and they will not remove the item from my report."""]] ) if __name__ == "__main__": demo.launch()