Spaces:
Sleeping
Sleeping
File size: 3,837 Bytes
664a6cd c2770a4 664a6cd 307a738 90315b6 df33048 c2770a4 2dfa08b 664a6cd 51fd191 664a6cd 2e87fdf 664a6cd 2e87fdf 664a6cd 307a738 664a6cd ffaa5bb 1c4ae30 307a738 664a6cd 307a738 664a6cd 2e020c0 664a6cd c2770a4 2e020c0 c2770a4 664a6cd 70901e5 664a6cd a2eb48b 664a6cd c671358 dae238e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
#run the app
#python -m streamlit run d:/NSFW/Project/test1.py
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import math, keras_ocr
# Initialize pipeline
pipeline = None
model_path="CustomModel"
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModelForSequenceClassification.from_pretrained(model_path)
import streamlit as st
def get_distance(predictions):
"""
Function returns dictionary with (key,value):
* text : detected text in image
* center_x : center of bounding box (x)
* center_y : center of bounding box (y)
* distance_from_origin : hypotenuse
* distance_y : distance between y and origin (0,0)
"""
# Point of origin
x0, y0 = 0, 0
# Generate dictionary
detections = []
for group in predictions:
# Get center point of bounding box
top_left_x, top_left_y = group[1][0]
bottom_right_x, bottom_right_y = group[1][1]
center_x, center_y = (top_left_x + bottom_right_x)/2, (top_left_y + bottom_right_y)/2
# Use the Pythagorean Theorem to solve for distance from origin
distance_from_origin = math.dist([x0,y0], [center_x, center_y])
# Calculate difference between y and origin to get unique rows
distance_y = center_y - y0
# Append all results
detections.append({
'text': group[0],
'center_x': center_x,
'center_y': center_y,
'distance_from_origin': distance_from_origin,
'distance_y': distance_y
})
return detections
def distinguish_rows(lst, thresh=10):
"""Function to help distinguish unique rows"""
sublists = []
for i in range(0, len(lst)-1):
if (lst[i+1]['distance_y'] - lst[i]['distance_y'] <= thresh):
if lst[i] not in sublists:
sublists.append(lst[i])
sublists.append(lst[i+1])
else:
yield sublists
sublists = [lst[i+1]]
yield sublists
# Title of the app
st.title("NSFW Content Detector")
# File uploader widget
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
def initialize():
global pipeline
if pipeline==None:
pipeline=keras_ocr.pipeline.Pipeline()
if uploaded_file is not None:
st.image(uploaded_file, caption='Uploaded Image', width=200)
#st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)
initialize()
# Read in image
read_image = keras_ocr.tools.read(uploaded_file)
# prediction_groups is a list of (word, box) tuples
prediction_groups = pipeline.recognize([read_image])
predictions = prediction_groups[0] # extract text list
predictions = get_distance(predictions)
# Set thresh higher for text further apart
predictions = list(distinguish_rows(predictions, thresh=10))
# Remove all empty rows
predictions = list(filter(lambda x:x!=[], predictions))
# Order text detections in human readable format
ordered_preds = []
for row in predictions:
row = sorted(row, key=lambda x:x['distance_from_origin'])
for each in row: ordered_preds.append(each['text'])
# Join detections into sentence
sentance = ' '.join(ordered_preds)
#st.write(sentance)
input_text =sentance
print(input_text)
inputs = tokenizer(input_text, return_tensors="pt")
outputs = model(**inputs)
predictions = outputs.logits.softmax(dim=-1)
print(predictions[0][0],predictions[0][1])
if predictions[0][0]>predictions[0][1]:
print('safe')
st.write('Safe for Work')
else:
print('Not safe')
st.write('Not Safe for Work')
|