akshatsanghvi commited on
Commit
ddaa8f4
1 Parent(s): b9fe88f

Update file

Browse files
Files changed (2) hide show
  1. URLFeatureExtraction.py +7 -24
  2. app.py +13 -8
URLFeatureExtraction.py CHANGED
@@ -178,7 +178,7 @@ def forwarding(response):
178
  else:
179
  return 1
180
 
181
- state = 0
182
  def featureExtraction(url):
183
 
184
  new_url = url
@@ -199,7 +199,7 @@ def featureExtraction(url):
199
  response = ""
200
 
201
  url = new_url
202
- print("URL", url)
203
 
204
  features = []
205
  features.append(havingIP(url))
@@ -212,15 +212,16 @@ def featureExtraction(url):
212
  features.append(prefixSuffix(url))
213
 
214
  try:
215
- global state
216
 
217
  domain_name = whois.whois(urlparse(url).netloc)
218
 
219
  if domain_name.get('domain_name'):
220
- state = 0
221
 
222
  else:
223
- state = 1
 
224
  dns = 0 if socket.gethostbyname(domain_name.domain_name[0]) else 1
225
  except:
226
  dns = 1
@@ -239,22 +240,4 @@ def featureExtraction(url):
239
 
240
  feature_names = ['Domain', 'Have_IP', 'Have_At', 'URL_Length', 'URL_Depth','Redirection',
241
  'https_Domain', 'TinyURL', 'Prefix/Suffix', 'DNS_Record', 'Web_Traffic',
242
- 'Domain_Age', 'Domain_End', 'iFrame', 'Mouse_Over','Right_Click', 'Web_Forwards', 'Label']
243
-
244
- # I @ L D R D t P D T A E i M R F L
245
- # . . . . . .
246
-
247
- # 0,0,1,3,0,0,0,0,0,1,0,1,0,0,1,0 0
248
- # 0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0 Y
249
- # 0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0 -
250
-
251
- # . .
252
- # 0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1 0
253
- # 0,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0
254
- # 0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0 -
255
-
256
- # 0,0,1,3,0,0,0,0,0,0,1,1,0,0,1,0 1
257
- # 0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0
258
- # 0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0 -
259
-
260
- # Prints : site. history. array. pred.
 
178
  else:
179
  return 1
180
 
181
+ flag = False
182
  def featureExtraction(url):
183
 
184
  new_url = url
 
199
  response = ""
200
 
201
  url = new_url
202
+ print("URL:\t", url)
203
 
204
  features = []
205
  features.append(havingIP(url))
 
212
  features.append(prefixSuffix(url))
213
 
214
  try:
215
+ global flag
216
 
217
  domain_name = whois.whois(urlparse(url).netloc)
218
 
219
  if domain_name.get('domain_name'):
220
+ pass
221
 
222
  else:
223
+ flag = True
224
+
225
  dns = 0 if socket.gethostbyname(domain_name.domain_name[0]) else 1
226
  except:
227
  dns = 1
 
240
 
241
  feature_names = ['Domain', 'Have_IP', 'Have_At', 'URL_Length', 'URL_Depth','Redirection',
242
  'https_Domain', 'TinyURL', 'Prefix/Suffix', 'DNS_Record', 'Web_Traffic',
243
+ 'Domain_Age', 'Domain_End', 'iFrame', 'Mouse_Over','Right_Click', 'Web_Forwards', 'Label']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -17,10 +17,9 @@ def isPhishing(link):
17
 
18
  features = URLFeatureExtraction.featureExtraction(link)
19
  print(features)
20
-
21
  prediction = model2.predict([features])
22
- print(prediction)
23
-
24
  df = df._append({'URL': link, 'Phishy?': "UnSafe" if prediction[0] else "Safe"}, ignore_index=True)
25
  return prediction[0]
26
 
@@ -32,17 +31,25 @@ def check_URL(Email):
32
  extractor = URLExtract()
33
  urls = extractor.find_urls(Email)
34
  n_urls = len(urls)
 
 
35
  if urls:
36
  bad_urls = sum([isPhishing(url) for url in urls])
37
  else:
38
  bad_urls = 0
39
- print("Out of {} urls {} are phishing".format(n_urls, bad_urls))
40
 
41
  return bad_urls
42
 
43
  def check_Mail(Email):
44
- state = max(URLFeatureExtraction.state, 0)
45
- return [isSpam(Email), check_URL(Email), state, df]
 
 
 
 
 
 
46
 
47
  iface = gr.Interface(
48
  fn=check_Mail,
@@ -54,6 +61,4 @@ iface = gr.Interface(
54
  gr.Dataframe(label="Insights", interactive=False)
55
  ]
56
  )
57
-
58
- # Launch the Gradio app
59
  iface.launch()
 
17
 
18
  features = URLFeatureExtraction.featureExtraction(link)
19
  print(features)
 
20
  prediction = model2.predict([features])
21
+ print(prediction[0])
22
+
23
  df = df._append({'URL': link, 'Phishy?': "UnSafe" if prediction[0] else "Safe"}, ignore_index=True)
24
  return prediction[0]
25
 
 
31
  extractor = URLExtract()
32
  urls = extractor.find_urls(Email)
33
  n_urls = len(urls)
34
+
35
+ if not urls: return -1
36
  if urls:
37
  bad_urls = sum([isPhishing(url) for url in urls])
38
  else:
39
  bad_urls = 0
40
+ print("Out of {} urls {} are legitimate.".format(n_urls, n_urls - bad_urls))
41
 
42
  return bad_urls
43
 
44
  def check_Mail(Email):
45
+ bad_urls = check_URL(Email)
46
+ if bad_urls==-1:
47
+ return [isSpam(Email), 0, 'Safe', df]
48
+
49
+ if URLFeatureExtraction.flag or bad_urls:
50
+ return [isSpam(Email), bad_urls, 'Risky', df]
51
+
52
+ return [isSpam(Email), bad_urls, 'Safe', df]
53
 
54
  iface = gr.Interface(
55
  fn=check_Mail,
 
61
  gr.Dataframe(label="Insights", interactive=False)
62
  ]
63
  )
 
 
64
  iface.launch()