Spaces:

akshatsanghvi
/

spam-email-detection

Sleeping

akshatsanghvi commited on Jun 3

Commit

ddaa8f4

•

1 Parent(s): b9fe88f

Update file

Files changed (2) hide show

URLFeatureExtraction.py CHANGED Viewed

@@ -178,7 +178,7 @@ def forwarding(response):
     else:
       return 1
-state = 0
 def featureExtraction(url):
   new_url = url
@@ -199,7 +199,7 @@ def featureExtraction(url):
         response = ""
   url = new_url
-  print("URL", url)
   features = []
   features.append(havingIP(url))
@@ -212,15 +212,16 @@ def featureExtraction(url):
   features.append(prefixSuffix(url))
   try:
-    global state
     domain_name = whois.whois(urlparse(url).netloc)
     if domain_name.get('domain_name'):
-      state = 0
     else:
-      state = 1
     dns = 0 if socket.gethostbyname(domain_name.domain_name[0]) else 1
   except:
     dns = 1
@@ -239,22 +240,4 @@ def featureExtraction(url):
 feature_names = ['Domain', 'Have_IP', 'Have_At', 'URL_Length', 'URL_Depth','Redirection',
                       'https_Domain', 'TinyURL', 'Prefix/Suffix', 'DNS_Record', 'Web_Traffic',
-                      'Domain_Age', 'Domain_End', 'iFrame', 'Mouse_Over','Right_Click', 'Web_Forwards', 'Label']
-# I @ L D R D t P D T A E i M R F  L
-#     . .         . . .         .
-# 0,0,1,3,0,0,0,0,0,1,0,1,0,0,1,0  0
-# 0,0,1,1,0,0,0,0,0,1,0,1,0,0,1,0  Y
-# 0,0,0,1,0,0,0,0,1,0,1,1,0,0,1,0  -
-#                         . .
-# 0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1  0
-# 0,0,1,0,0,0,0,0,0,1,1,1,0,0,1,0
-# 0,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0  -
-# 0,0,1,3,0,0,0,0,0,0,1,1,0,0,1,0  1
-# 0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0
-# 0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0  -
-# Prints : site. history. array. pred.

     else:
       return 1
+flag = False
 def featureExtraction(url):
   new_url = url
         response = ""
   url = new_url
+  print("URL:\t", url)
   features = []
   features.append(havingIP(url))
   features.append(prefixSuffix(url))
   try:
+    global flag
     domain_name = whois.whois(urlparse(url).netloc)
     if domain_name.get('domain_name'):
+      pass
     else:
+      flag = True
     dns = 0 if socket.gethostbyname(domain_name.domain_name[0]) else 1
   except:
     dns = 1
 feature_names = ['Domain', 'Have_IP', 'Have_At', 'URL_Length', 'URL_Depth','Redirection',
                       'https_Domain', 'TinyURL', 'Prefix/Suffix', 'DNS_Record', 'Web_Traffic',
+                      'Domain_Age', 'Domain_End', 'iFrame', 'Mouse_Over','Right_Click', 'Web_Forwards', 'Label']

app.py CHANGED Viewed

@@ -17,10 +17,9 @@ def isPhishing(link):
     features = URLFeatureExtraction.featureExtraction(link)
     print(features)
     prediction = model2.predict([features])
-    print(prediction)
     df = df._append({'URL': link, 'Phishy?': "UnSafe" if prediction[0] else "Safe"}, ignore_index=True)
     return prediction[0]
@@ -32,17 +31,25 @@ def check_URL(Email):
     extractor = URLExtract()
     urls = extractor.find_urls(Email)
     n_urls = len(urls)
     if urls:
         bad_urls = sum([isPhishing(url) for url in urls])
     else:
         bad_urls = 0
-    print("Out of {} urls {} are phishing".format(n_urls, bad_urls))
     return bad_urls
 def check_Mail(Email):
-    state = max(URLFeatureExtraction.state, 0)
-    return [isSpam(Email), check_URL(Email), state, df]
 iface = gr.Interface(
     fn=check_Mail,
@@ -54,6 +61,4 @@ iface = gr.Interface(
         gr.Dataframe(label="Insights", interactive=False)
     ]
 )
-# Launch the Gradio app
 iface.launch()

     features = URLFeatureExtraction.featureExtraction(link)
     print(features)
     prediction = model2.predict([features])
+    print(prediction[0])
     df = df._append({'URL': link, 'Phishy?': "UnSafe" if prediction[0] else "Safe"}, ignore_index=True)
     return prediction[0]
     extractor = URLExtract()
     urls = extractor.find_urls(Email)
     n_urls = len(urls)
+    if not urls: return -1
     if urls:
         bad_urls = sum([isPhishing(url) for url in urls])
     else:
         bad_urls = 0
+    print("Out of {} urls {} are legitimate.".format(n_urls, n_urls - bad_urls))
     return bad_urls
 def check_Mail(Email):
+    bad_urls = check_URL(Email)
+    if bad_urls==-1:
+        return [isSpam(Email), 0, 'Safe', df]
+    if URLFeatureExtraction.flag or bad_urls:
+        return [isSpam(Email), bad_urls, 'Risky', df]
+    return [isSpam(Email), bad_urls, 'Safe', df]
 iface = gr.Interface(
     fn=check_Mail,
         gr.Dataframe(label="Insights", interactive=False)
     ]
 )
 iface.launch()