jayasuriyaK commited on
Commit
664a6cd
1 Parent(s): e0c6f6e

Upload 5 files

Browse files
CustomModel/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "problem_type": "single_label_classification",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.39.3",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
+ "vocab_size": 30522
27
+ }
CustomModel/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0ae3b4736071ebf406209d00d51c502108761fafa3c8df37f6a009f0decb157
3
+ size 437958648
CustomModel/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62f276a3fac2555bc29c7da8ad3095096c7ee3452711ca0c0cab720c0e053210
3
+ size 4920
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #run the app
2
+ #python -m streamlit run d:/NSFW/Project/test1.py
3
+ import torch
4
+ from transformers import BertTokenizer, BertForSequenceClassification
5
+ import math, keras_ocr
6
+ # Initialize pipeline
7
+ pipeline = keras_ocr.pipeline.Pipeline()
8
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
9
+ model_2 = BertForSequenceClassification.from_pretrained("CustomModel")
10
+
11
+ model_2.to('cpu')
12
+ import streamlit as st
13
+
14
+ def get_distance(predictions):
15
+ """
16
+ Function returns dictionary with (key,value):
17
+ * text : detected text in image
18
+ * center_x : center of bounding box (x)
19
+ * center_y : center of bounding box (y)
20
+ * distance_from_origin : hypotenuse
21
+ * distance_y : distance between y and origin (0,0)
22
+ """
23
+
24
+ # Point of origin
25
+ x0, y0 = 0, 0
26
+
27
+ # Generate dictionary
28
+ detections = []
29
+ for group in predictions:
30
+
31
+ # Get center point of bounding box
32
+ top_left_x, top_left_y = group[1][0]
33
+ bottom_right_x, bottom_right_y = group[1][1]
34
+ center_x, center_y = (top_left_x + bottom_right_x)/2, (top_left_y + bottom_right_y)/2
35
+
36
+ # Use the Pythagorean Theorem to solve for distance from origin
37
+ distance_from_origin = math.dist([x0,y0], [center_x, center_y])
38
+
39
+ # Calculate difference between y and origin to get unique rows
40
+ distance_y = center_y - y0
41
+
42
+ # Append all results
43
+ detections.append({
44
+ 'text': group[0],
45
+ 'center_x': center_x,
46
+ 'center_y': center_y,
47
+ 'distance_from_origin': distance_from_origin,
48
+ 'distance_y': distance_y
49
+ })
50
+
51
+ return detections
52
+
53
+ def distinguish_rows(lst, thresh=15):
54
+ """Function to help distinguish unique rows"""
55
+ sublists = []
56
+ for i in range(0, len(lst)-1):
57
+ if (lst[i+1]['distance_y'] - lst[i]['distance_y'] <= thresh):
58
+ if lst[i] not in sublists:
59
+ sublists.append(lst[i])
60
+ sublists.append(lst[i+1])
61
+ else:
62
+ yield sublists
63
+ sublists = [lst[i+1]]
64
+ yield sublists
65
+
66
+ # Title of the app
67
+ st.title("Image Input App")
68
+
69
+ # File uploader widget
70
+ uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
71
+
72
+ if uploaded_file is not None:
73
+
74
+ # Read in image
75
+ read_image = keras_ocr.tools.read(uploaded_file)
76
+
77
+ # prediction_groups is a list of (word, box) tuples
78
+ prediction_groups = pipeline.recognize([read_image])
79
+ predictions = prediction_groups[0] # extract text list
80
+ predictions = get_distance(predictions)
81
+ # Set thresh higher for text further apart
82
+ predictions = list(distinguish_rows(predictions, thresh=10))
83
+
84
+ # Remove all empty rows
85
+ predictions = list(filter(lambda x:x!=[], predictions))
86
+
87
+ # Order text detections in human readable format
88
+ ordered_preds = []
89
+ for row in predictions:
90
+ row = sorted(row, key=lambda x:x['distance_from_origin'])
91
+ for each in row: ordered_preds.append(each['text'])
92
+
93
+ # Join detections into sentence
94
+ sentance = ' '.join(ordered_preds)
95
+ #st.write(sentance)
96
+
97
+ text =sentance
98
+ print(text)
99
+ inputs = tokenizer(text,padding = True, truncation = True, return_tensors='pt').to('cpu')
100
+ outputs = model_2(**inputs)
101
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
102
+ predictions = predictions.cpu().detach().numpy()
103
+ print(predictions[0][0],predictions[0][1])
104
+ if predictions[0][0]>predictions[0][1]:
105
+ print('safe')
106
+ st.write('safe')
107
+ else:
108
+ print('Not safe')
109
+ st.write('n safe')
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ keras_ocr
4
+ streamlit