import gradio import cv2 from sklearn.naive_bayes import BernoulliNB import pickle import numpy as np # multiclass_model = pickle.load(open('models/MulticlassModel_200x200', 'rb')) ensemble_model = pickle.load(open('EnsembleModels_200x200', 'rb')) examples = ['images/test2.jpg','images/test4.jpg','images/test6.jpg', "images/Incom.jpg", "images/DLC.jpg", 'images/EHD.jpg', 'images/IDR.jpg','images/PPD.jpg','images/PSLF.jpg' ,'images/SCD.jpg', 'images/TLF.jpg'] def preprocess(img): img = cv2.resize(img, (200,200)) img = cv2.adaptiveThreshold(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY),255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) img = np.reshape(img, (1,200*200))/255 return img def predict(img): img = preprocess(img) categories = { "Inco": 2, "Teac": 1, "Cons": 0, "Publ": 4, "Econ": 3, "Reaf": 5} proba = np.zeros((6)) for key in categories.keys(): proba[categories[key]] = ensemble_model[key].predict_proba(img)[:,0] return proba def generate_results(proba): categories = [ "DLC", "TLF", "IDR", "EHD", "PLSF", "REA", "UNKNOWN"] scores = [0,0,0,0,0,0,0] choice = np.where(proba == np.amin(proba))[0] if len(choice)>1: choice = 6 scores[int(choice)] = 1 results = dict(zip(categories, scores)) return results def inference(img): proba = predict(img) results = generate_results(proba) return results demo = gradio.Interface( fn=inference, inputs=gradio.Image(), outputs=gradio.Label(), title='Document Classification', description='Loan Document Classification Using A Naive Bayes Classifier Ensemble', article='The purpose of this demo was to provide a simple baseline for the classification of document images. View the complete write up here https://github.com/PatrickTyBrown/document_classification/blob/main/project_writeup.pdf\n\n\nLinkedin: https://www.linkedin.com/in/patrick-ty-brown/\nGithub: https://github.com/PatrickTyBrown/document_classification\nPortfolio: https://sites.google.com/view/patrick-brown/home', examples=examples) demo.launch()