from cProfile import label from fastapi import File import gradio as gr from gib_detect_module import detect import csv from transformers import AutoModelForSequenceClassification, AutoTokenizer DLmodel = AutoModelForSequenceClassification.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True) tokenizer = AutoTokenizer.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True) def greet(name): return "Hello " + name + "!!" def detect_gibberish(line,f): if line: if detect(line): return "Valid!!!!", None,None else: return "Bollocks Giberrish",None,None elif f: return None, annotate_csv(f), None def annotate_csv(f): with open(f.name) as csvfile: creader = csv.reader(csvfile, delimiter=',', quotechar='"') with open('out.csv', 'w', newline='') as csvout: cwriter = csv.writer(csvout, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) for row in creader: row.append(str(detect(row[0]))) cwriter.writerow(row) return "out.csv" def annotate_csv_deep(f): labels = DLmodel.config.id2label with open(f.name) as csvfile: creader = csv.reader(csvfile, delimiter=',', quotechar='"') with open('out.csv', 'w', newline='') as csvout: cwriter = csv.writer(csvout, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) for row in creader: inputs = tokenizer(row, return_tensors="pt") outputs = DLmodel(**inputs) probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist() idx = probs.index(max(probs)) row.append(labels[idx]) row.append("{:.0%}".format(probs[idx]) ) cwriter.writerow(row) return "out.csv" def detect_gibberish_deep(line,f): if line: inputs = tokenizer(line, return_tensors="pt") labels = DLmodel.config.id2label outputs = DLmodel(**inputs) probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist() output=dict(zip(labels.values(), probs)) readable_output="" for k,v in output.items(): readable_output+=k+" : "+ "{:.0%}".format(v) + "\n" return readable_output, None, output if f: return None, annotate_csv_deep(f),None def detect_gibberish_abstract(model, line,f): if model == "Deep Learning Model": return detect_gibberish_deep(line,f) else: return detect_gibberish(line, f) inputLine=gr.inputs.Textbox(lines=1, placeholder="Input text here, if both text and file have values, only the text input will be processed.", default="", label="Text", optional=False) inputFile=gr.inputs.File(file_count="single", type="file", label="File to Annotate", optional=True) choices = ["Deep Learning Model", "Markov Chain"] inputModel=gr.inputs.Dropdown(choices) outputLine=gr.outputs.Textbox(type="auto", label=None) outputFile=gr.outputs.File( label="Annotated CSV") label = gr.outputs.Label(num_top_classes=4) examples=[ ["Deep Learning Model","quetzalcoatl","demo_blank.csv"], ["Deep Learning Model","aasdf","demo_blank.csv"], ["Deep Learning Model","Covfefe","demo_blank.csv"], ["Markov Chain","quetzalcoatl","demo_blank.csv"], ["Markov Chain","aasdf","demo_blank.csv"], ["Markov Chain","Covfefe","demo_blank.csv"], ["Deep Learning Model","","demo_bad.txt"], ["Deep Learning Model","","demo_mixed.txt"], ["Markov Chain","","demo_bad.txt"], ["Markov Chain","","demo_mixed.txt"], ] #iface = gr.Interface(fn=[detect_gibberish], inputs=["text",inputFile], outputs=["text",outputFile],examples=examples, allow_flagging='never') #iface.launch() iface = gr.Interface(fn=[detect_gibberish_abstract], inputs=[inputModel,inputLine,inputFile], outputs=["text",outputFile,label],examples=examples, allow_flagging='never') iface.launch()