Al John Lexter Lozano
add requirements.txt
dd48bf4
raw
history blame
4.14 kB
from cProfile import label
from fastapi import File
import gradio as gr
from gib_detect_module import detect
import csv
from transformers import AutoModelForSequenceClassification, AutoTokenizer
DLmodel = AutoModelForSequenceClassification.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
tokenizer = AutoTokenizer.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
def greet(name):
return "Hello " + name + "!!"
def detect_gibberish(line,f):
if line:
if detect(line):
return "Valid!!!!", None,None
else:
return "Bollocks Giberrish",None,None
elif f:
return None, annotate_csv(f), None
def annotate_csv(f):
with open(f.name) as csvfile:
creader = csv.reader(csvfile, delimiter=',', quotechar='"')
with open('out.csv', 'w', newline='') as csvout:
cwriter = csv.writer(csvout, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in creader:
row.append(str(detect(row[0])))
cwriter.writerow(row)
return "out.csv"
def annotate_csv_deep(f):
labels = DLmodel.config.id2label
with open(f.name) as csvfile:
creader = csv.reader(csvfile, delimiter=',', quotechar='"')
with open('out.csv', 'w', newline='') as csvout:
cwriter = csv.writer(csvout, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in creader:
inputs = tokenizer(row, return_tensors="pt")
outputs = DLmodel(**inputs)
probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
idx = probs.index(max(probs))
row.append(labels[idx])
row.append("{:.0%}".format(probs[idx]) )
cwriter.writerow(row)
return "out.csv"
def detect_gibberish_deep(line,f):
if line:
inputs = tokenizer(line, return_tensors="pt")
labels = DLmodel.config.id2label
outputs = DLmodel(**inputs)
probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
output=dict(zip(labels.values(), probs))
readable_output=""
for k,v in output.items():
readable_output+=k+" : "+ "{:.0%}".format(v) + "\n"
return readable_output, None, output
if f:
return None, annotate_csv_deep(f),None
def detect_gibberish_abstract(model, line,f):
if model == "Deep Learning Model":
return detect_gibberish_deep(line,f)
else:
return detect_gibberish(line, f)
inputLine=gr.inputs.Textbox(lines=1, placeholder="Input text here, if both text and file have values, only the text input will be processed.", default="", label="Text", optional=False)
inputFile=gr.inputs.File(file_count="single", type="file", label="File to Annotate", optional=True)
choices = ["Deep Learning Model", "Markov Chain"]
inputModel=gr.inputs.Dropdown(choices)
outputLine=gr.outputs.Textbox(type="auto", label=None)
outputFile=gr.outputs.File( label="Annotated CSV")
label = gr.outputs.Label(num_top_classes=4)
examples=[
["Deep Learning Model","quetzalcoatl","demo_blank.csv"],
["Deep Learning Model","aasdf","demo_blank.csv"],
["Deep Learning Model","Covfefe","demo_blank.csv"],
["Markov Chain","quetzalcoatl","demo_blank.csv"],
["Markov Chain","aasdf","demo_blank.csv"],
["Markov Chain","Covfefe","demo_blank.csv"],
["Deep Learning Model","","demo_bad.txt"],
["Deep Learning Model","","demo_mixed.txt"],
["Markov Chain","","demo_bad.txt"],
["Markov Chain","","demo_mixed.txt"],
]
#iface = gr.Interface(fn=[detect_gibberish], inputs=["text",inputFile], outputs=["text",outputFile],examples=examples, allow_flagging='never')
#iface.launch()
iface = gr.Interface(fn=[detect_gibberish_abstract], inputs=[inputModel,inputLine,inputFile], outputs=["text",outputFile,label],examples=examples, allow_flagging='never')
iface.launch()