Spaces:
Sleeping
Sleeping
akshatsanghvi
commited on
Commit
•
ddaa8f4
1
Parent(s):
b9fe88f
Update file
Browse files- URLFeatureExtraction.py +7 -24
- app.py +13 -8
URLFeatureExtraction.py
CHANGED
@@ -178,7 +178,7 @@ def forwarding(response):
|
|
178 |
else:
|
179 |
return 1
|
180 |
|
181 |
-
|
182 |
def featureExtraction(url):
|
183 |
|
184 |
new_url = url
|
@@ -199,7 +199,7 @@ def featureExtraction(url):
|
|
199 |
response = ""
|
200 |
|
201 |
url = new_url
|
202 |
-
print("URL", url)
|
203 |
|
204 |
features = []
|
205 |
features.append(havingIP(url))
|
@@ -212,15 +212,16 @@ def featureExtraction(url):
|
|
212 |
features.append(prefixSuffix(url))
|
213 |
|
214 |
try:
|
215 |
-
global
|
216 |
|
217 |
domain_name = whois.whois(urlparse(url).netloc)
|
218 |
|
219 |
if domain_name.get('domain_name'):
|
220 |
-
|
221 |
|
222 |
else:
|
223 |
-
|
|
|
224 |
dns = 0 if socket.gethostbyname(domain_name.domain_name[0]) else 1
|
225 |
except:
|
226 |
dns = 1
|
@@ -239,22 +240,4 @@ def featureExtraction(url):
|
|
239 |
|
240 |
feature_names = ['Domain', 'Have_IP', 'Have_At', 'URL_Length', 'URL_Depth','Redirection',
|
241 |
'https_Domain', 'TinyURL', 'Prefix/Suffix', 'DNS_Record', 'Web_Traffic',
|
242 |
-
'Domain_Age', 'Domain_End', 'iFrame', 'Mouse_Over','Right_Click', 'Web_Forwards', 'Label']
|
243 |
-
|
244 |
-
# I @ L D R D t P D T A E i M R F L
|
245 |
-
# . . . . . .
|
246 |
-
|
247 |
-
# 0,0,1,3,0,0,0,0,0,1,0,1,0,0,1,0 0
|
248 |
-
# 0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0 Y
|
249 |
-
# 0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0 -
|
250 |
-
|
251 |
-
# . .
|
252 |
-
# 0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1 0
|
253 |
-
# 0,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0
|
254 |
-
# 0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0 -
|
255 |
-
|
256 |
-
# 0,0,1,3,0,0,0,0,0,0,1,1,0,0,1,0 1
|
257 |
-
# 0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0
|
258 |
-
# 0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0 -
|
259 |
-
|
260 |
-
# Prints : site. history. array. pred.
|
|
|
178 |
else:
|
179 |
return 1
|
180 |
|
181 |
+
flag = False
|
182 |
def featureExtraction(url):
|
183 |
|
184 |
new_url = url
|
|
|
199 |
response = ""
|
200 |
|
201 |
url = new_url
|
202 |
+
print("URL:\t", url)
|
203 |
|
204 |
features = []
|
205 |
features.append(havingIP(url))
|
|
|
212 |
features.append(prefixSuffix(url))
|
213 |
|
214 |
try:
|
215 |
+
global flag
|
216 |
|
217 |
domain_name = whois.whois(urlparse(url).netloc)
|
218 |
|
219 |
if domain_name.get('domain_name'):
|
220 |
+
pass
|
221 |
|
222 |
else:
|
223 |
+
flag = True
|
224 |
+
|
225 |
dns = 0 if socket.gethostbyname(domain_name.domain_name[0]) else 1
|
226 |
except:
|
227 |
dns = 1
|
|
|
240 |
|
241 |
feature_names = ['Domain', 'Have_IP', 'Have_At', 'URL_Length', 'URL_Depth','Redirection',
|
242 |
'https_Domain', 'TinyURL', 'Prefix/Suffix', 'DNS_Record', 'Web_Traffic',
|
243 |
+
'Domain_Age', 'Domain_End', 'iFrame', 'Mouse_Over','Right_Click', 'Web_Forwards', 'Label']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -17,10 +17,9 @@ def isPhishing(link):
|
|
17 |
|
18 |
features = URLFeatureExtraction.featureExtraction(link)
|
19 |
print(features)
|
20 |
-
|
21 |
prediction = model2.predict([features])
|
22 |
-
print(prediction)
|
23 |
-
|
24 |
df = df._append({'URL': link, 'Phishy?': "UnSafe" if prediction[0] else "Safe"}, ignore_index=True)
|
25 |
return prediction[0]
|
26 |
|
@@ -32,17 +31,25 @@ def check_URL(Email):
|
|
32 |
extractor = URLExtract()
|
33 |
urls = extractor.find_urls(Email)
|
34 |
n_urls = len(urls)
|
|
|
|
|
35 |
if urls:
|
36 |
bad_urls = sum([isPhishing(url) for url in urls])
|
37 |
else:
|
38 |
bad_urls = 0
|
39 |
-
print("Out of {} urls {} are
|
40 |
|
41 |
return bad_urls
|
42 |
|
43 |
def check_Mail(Email):
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
iface = gr.Interface(
|
48 |
fn=check_Mail,
|
@@ -54,6 +61,4 @@ iface = gr.Interface(
|
|
54 |
gr.Dataframe(label="Insights", interactive=False)
|
55 |
]
|
56 |
)
|
57 |
-
|
58 |
-
# Launch the Gradio app
|
59 |
iface.launch()
|
|
|
17 |
|
18 |
features = URLFeatureExtraction.featureExtraction(link)
|
19 |
print(features)
|
|
|
20 |
prediction = model2.predict([features])
|
21 |
+
print(prediction[0])
|
22 |
+
|
23 |
df = df._append({'URL': link, 'Phishy?': "UnSafe" if prediction[0] else "Safe"}, ignore_index=True)
|
24 |
return prediction[0]
|
25 |
|
|
|
31 |
extractor = URLExtract()
|
32 |
urls = extractor.find_urls(Email)
|
33 |
n_urls = len(urls)
|
34 |
+
|
35 |
+
if not urls: return -1
|
36 |
if urls:
|
37 |
bad_urls = sum([isPhishing(url) for url in urls])
|
38 |
else:
|
39 |
bad_urls = 0
|
40 |
+
print("Out of {} urls {} are legitimate.".format(n_urls, n_urls - bad_urls))
|
41 |
|
42 |
return bad_urls
|
43 |
|
44 |
def check_Mail(Email):
|
45 |
+
bad_urls = check_URL(Email)
|
46 |
+
if bad_urls==-1:
|
47 |
+
return [isSpam(Email), 0, 'Safe', df]
|
48 |
+
|
49 |
+
if URLFeatureExtraction.flag or bad_urls:
|
50 |
+
return [isSpam(Email), bad_urls, 'Risky', df]
|
51 |
+
|
52 |
+
return [isSpam(Email), bad_urls, 'Safe', df]
|
53 |
|
54 |
iface = gr.Interface(
|
55 |
fn=check_Mail,
|
|
|
61 |
gr.Dataframe(label="Insights", interactive=False)
|
62 |
]
|
63 |
)
|
|
|
|
|
64 |
iface.launch()
|