{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ">

Implementation of Ad-Sherlock for Click-Fraud Detection using Deep-Learning

\n", ">
\n", ">
{K.V.N.Aditya | T.Vaishnavi Sagar | K.Karthik}
\n", ">
\n", ">
CMR Technical Campus
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
importing modules
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import random\n", "import string\n", "import os\n", "import pickle\n", "import torch\n", "import warnings\n", "from faker import Faker\n", "from torch import nn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
initializing path
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "DMA__ADL = os.path.abspath('../../ADL').replace('\\\\', '/')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
configuring 'ipynb' and exploring GPU info
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "warnings.filterwarnings(\"ignore\")\n", "print(f'count of gpu devices : `{torch.cuda.device_count()}`')\n", "print(f'id of gpu device : `{torch.cuda.current_device()}`')\n", "print(f'is cuda available : `{torch.cuda.is_available()}`')\n", "print(f'is cuda enabled at backend : `{torch.backends.cudnn.enabled}`')\n", "print(f'name of the instance gpu device : `{torch.cuda.get_device_name(torch.cuda.current_device())}`')\n", "print(f'version of cuda : `{torch.backends.cudnn.version()}`')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
initializing data
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"--------\")\n", "print(\"IPO\")\n", "io_pklio0uc = f'{DMA__ADL}/0/ip__urls_cords.pkl'\n", "def func_io_sudo_urls_cords(n=4*random.randrange(20,80)):\n", " random.seed(268862)\n", " Faker.seed(268862)\n", " fake = Faker()\n", " sudo_urls = [f'http://www.ads.{fake.domain_name()}' for _ in range(n//4)] + [f'https://www.ads.{fake.domain_name()}' for _ in range(n//4)] + [f'http://www.{fake.domain_name()}' for _ in range(n//4)] + [f'https://www.{fake.domain_name()}' for _ in range(n//4)]\n", " sudo_urls = [sudo_urls[P]+'/'+ ''.join(random.choice(random.choice(string.digits + string.ascii_letters)) for _ in range(random.randint(20, 80))) + '?' + '&'.join([f\"param_{p}={''.join(random.choice(random.choice(string.digits + string.ascii_letters)) for _ in range(random.randint(20, 80)))}\" for p in range(1, random.randint(2, 8) + 1)]) for P in range(len(sudo_urls))]\n", " random.shuffle(sudo_urls)\n", " sudo_isad = [1 if('ad' in i) else 0 for i in sudo_urls]\n", " sudo_cords = [(random.randrange(2,862268),random.randrange(2,862268)) for _ in range(n)]\n", " random.shuffle(sudo_cords)\n", " sudo_urls_cords = [(sudo_urls[p],(sudo_cords[p][0],sudo_cords[p][1]),sudo_isad[p]) for p in range(n)]\n", " random.shuffle(sudo_urls_cords)\n", " return sudo_urls_cords\n", "if(not os.path.exists(io_pklio0uc)):\n", " io__sudo_urls_cords = func_io_sudo_urls_cords(888) # n : multiples of '4'\n", " pickle.dump(io__sudo_urls_cords, open(io_pklio0uc, 'wb'))\n", "io__sudo_urls_cords = pickle.load(open(io_pklio0uc, 'rb'))\n", "io__sudo_urls = [p[0] for p in io__sudo_urls_cords]\n", "io__sudo_cord_x,io__sudo_cord_y = [p[1][0] for p in io__sudo_urls_cords],[p[1][1] for p in io__sudo_urls_cords]\n", "io__sudo_isad = [p[2] for p in io__sudo_urls_cords]\n", "for p in random.sample(io__sudo_urls_cords,4):\n", " print(p)\n", "print(\"--------\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
converting the data into tensors
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "io__sudo_urls = torch.tensor([[ord(p) for p in io] + [(-1)*len(io)]*(862 - len(io)) for io in io__sudo_urls]).to(torch.float).cuda(torch.cuda.current_device())\n", "io__sudo_cord_x = torch.tensor(io__sudo_cord_x).cuda(torch.cuda.current_device())\n", "io__sudo_cord_y = torch.tensor(io__sudo_cord_y).cuda(torch.cuda.current_device())\n", "io__sudo_cords = torch.stack((io__sudo_cord_x,io__sudo_cord_y),dim=1).cuda(torch.cuda.current_device())\n", "io__sudo_isad = torch.tensor(io__sudo_isad).cuda(torch.cuda.current_device())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
defining Neural Network
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class __01__(nn.Module):\n", " def __init__(IO):\n", " super().__init__()\n", " IO._O_ = torch.zeros(1).unsqueeze(0).to(torch.float).cuda(torch.cuda.current_device())\n", " IO._I_ = torch.ones(1).unsqueeze(0).to(torch.float).cuda(torch.cuda.current_device())\n", " IO.l20 = nn.Linear(862,1)\n", " IO.l21 = nn.Linear(2,1)\n", " IO.l30 = nn.Bilinear(1,1,1)\n", " IO.l31 = nn.Bilinear(1,1,1)\n", " IO.l40 = nn.Identity()\n", " IO.l41 = nn.Identity() \n", " IO.l5 = nn.Sigmoid()\n", " IO.h20 = torch.zeros(1).unsqueeze(0).to(torch.float).cuda(torch.cuda.current_device())\n", " IO.h21 = torch.zeros(1).unsqueeze(0).to(torch.float).cuda(torch.cuda.current_device())\n", " IO.h22 = torch.zeros(1).unsqueeze(0).to(torch.float).cuda(torch.cuda.current_device())\n", " IO.h23 = torch.zeros(1).unsqueeze(0).to(torch.float).cuda(torch.cuda.current_device())\n", " IO.h30 = torch.zeros(1).unsqueeze(0).to(torch.float).cuda(torch.cuda.current_device())\n", " IO.h31 = torch.zeros(1).unsqueeze(0).to(torch.float).cuda(torch.cuda.current_device())\n", " def __8__(IO,ip=['',(0,0)],op=0.5):\n", " IO.ip = ip\n", " IO.h00 = IO.ip\n", " IO.h10 = IO.l20(IO.h00[0].to(torch.float)).unsqueeze(0)\n", " IO.h11 = IO.l21(IO.h00[1].to(torch.float)).unsqueeze(0)\n", " IO.h20[op==0 or op==0.5] = IO.l30(IO.h20,IO.h10)\n", " IO.h21[op==0.5 or op==1] = IO.l30(IO.h21,IO.h10)\n", " IO.h22[op==0 or op==0.5] = IO.l31(IO.h22,IO.h11)\n", " IO.h23[op==0.5 or op==1] = IO.l31(IO.h23,IO.h11)\n", " IO.h30[op==0] = IO.l40(torch.max(IO.h20,IO._O_))\n", " IO.h30[op==0.5] = IO.l40(torch.max(IO.h20,IO.h21))\n", " IO.h30[op==1] = IO.l40(torch.max(IO.h21,IO._O_))\n", " IO.h31[op==0] = IO.l41(torch.max(IO.h22,IO._O_))\n", " IO.h31[op==0.5] = IO.l41(torch.max(IO.h22,IO.h23))\n", " IO.h31[op==1] = IO.l41(torch.max(IO.h23,IO._O_))\n", " IO.h40 = IO.l5(torch.mean(torch.stack((IO.h30,IO.h31)))).item()\n", " IO.op = int(IO.h40 > 0.5)\n", " return(IO.op)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
defining the pipeline of `ADL` model to predict 'is_clfd' and to save the prediction
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def func__is_clfd(ip):\n", " _01_ = __01__().cuda(torch.cuda.current_device())\n", " ip_url = torch.tensor([ord(p) for p in ip[0]] + [(-1)*len(ip[0])]*(862 - len(ip[0]))).to(torch.float).cuda(torch.cuda.current_device())\n", " ip_cordx = torch.tensor(ip[1][0]).cuda(torch.cuda.current_device())\n", " ip_cordy = torch.tensor(ip[1][1]).cuda(torch.cuda.current_device())\n", " ip_cords = torch.stack((ip_cordx,ip_cordy),dim=0).cuda(torch.cuda.current_device())\n", " op = _01_.__8__(ip=[ip_url,ip_cords])\n", " with open(f'{DMA__ADL}/1/op__is_clfd.txt','a') as f:\n", " f.write('\\n')\n", " f.write(f\"ip :: {ip}\")\n", " f.write('\\n')\n", " f.write(f'op :: {op}')\n", " f.write('\\n')\n", " return op" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
initializing Neural Network
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "_01_ = __01__().cuda(torch.cuda.current_device())\n", "print(_01_)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
exploring the `ADL` model
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dct__01_wb = {}\n", "for i,j in _01_.named_parameters():\n", " dct__01_wb[i] = j.mean().item()\n", " print(i,j.mean().item())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
initializing the train, test and validation of the data
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lip0,lip1,lop0,lop1,lio0,lio1 = [],[],[],[],[],[]\n", "for n in range(0,666):\n", " lip0.append(_01_.__8__(ip=[io__sudo_urls[n],io__sudo_cords[n]],op=io__sudo_isad[n].__int__()))\n", " lip1.append(io__sudo_isad[n].__int__())\n", "for n in range(666,888):\n", " lop0.append(_01_.__8__(ip=[io__sudo_urls[n],io__sudo_cords[n]]))\n", " lop1.append(io__sudo_isad[n].__int__())\n", "for n in range(0,888):\n", " lio0.append(_01_.__8__(ip=[io__sudo_urls[n],io__sudo_cords[n]]))\n", " lio1.append(io__sudo_isad[n].__int__())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(f\"training instances : (666) :\\n\\t:: no. of instances pridicted as 'click-fraud' : {sum(lip0)}\\n\\t:: no. of instances actually 'click-fraud' : {sum(lip1)}\\n\\t:: percent of pridiction of 'click-fraud' : {sum(lip0)/sum(lip1)*100}\")\n", "print(f\"testing instances : (222) :\\n\\t:: no. of instances pridicted as 'click-fraud' : {sum(lop0)}\\n\\t:: no. of instances actually 'click-fraud' : {sum(lop1)}\\n\\t:: percent of pridiction of 'click-fraud' : {sum(lop0)/sum(lop1)*100}\")\n", "print(f\"overall instances : (888) :\\n\\t:: no. of instances pridicted as 'click-fraud' : {sum(lio0)}\\n\\t:: no. of instances actually 'click-fraud' : {sum(lio1)}\\n\\t:: percent of pridiction of 'click-fraud' : {sum(lio0)/sum(lio1)*100}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
evaluating the `ADL` model based on a random input data instance [url,cords] and predicting the output [is_clfd]
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "io = _01_.__8__(ip=[io__sudo_urls[random.choice(range(0,889))],io__sudo_cords[random.choice(range(0,889))]])\n", "print(f\"evaluating a random instance :: [index] : `{random.choice(range(0,889))+1}` :\",end=\"\\n\\t:: \")\n", "print(f'predicted output :: {io} || actual output :: {io__sudo_isad[random.choice(range(0,889))].__int__()}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "###
predicting the click-fraud for the input by passing into the defined pipeline
" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ip = ['http://www.shea-davis.net/qKs1wmFu0jHdX7gAoX1WE1tv69bSdk9Jkhu0WsIpmPc3VaoI2pZbgRVuAFq1pa1Tb38tYleLhyGjWR?param_1=0UxBOdKj5br0V2Cc2FnjazaehJCXjz4j¶m_2=L0Dn9ih6RCn72UtzdwpUYPUWoIl33D7OJOeMdfwjsQ8inUgfpomkSo3Hh95o1¶m_3=KeF3EtmZ0ggJQCmVc9C1zhZZF26Pvq0uOHXTvA6AD0EIKpEjqlY7',(529475, 363074)]\n", "op = func__is_clfd(ip)\n", "print(f\"ip__url : {ip[0]}\\nip__cords : {ip[1]}\")\n", "print(f'predicted output :: {op}')\n", "print(\"--------\")\n", "ip = ['http://www.ads.norton.com/LStKS8AZjsfm6pbT63O9baOtU1TCmJedJDvpR9WIRSZf4JBh5olBEAwn940PgK?param_1=6MrVuvl40jBNREhnFiiTQVMFPwFnUqqG9GufiKxPKsPEn9C4U13THBfDW1ix8¶m_2=DQ0ZSCv8NWiLKSJsYPacqE1mYI5KHvMFjSZbX37tjSlFkouZhXaQRWJ81Zj1ZCs24lOmvGBFW¶m_3=wElK6BbQf0epdFQ5wRCCJDCduBr¶m_4=IAcf6ylq8m4n4zC93NLh4g103YZJdEXkySuhdDdB7ZqkdmaPZSUTtQCnFBU4¶m_5=OOxQ18iw22ALICZaw9EavJgxfFUR5XNLCmZeayodPmGQvtTbJ8lix¶m_6=3XIQ7fnNpwIrttMEekk6dtegXOBmgesqlAA5HBgn9NScqNS3yU1Oe0A2fJ9o81HVeuUMbocC3',(294620, 518935)]\n", "op = func__is_clfd(ip)\n", "print(f\"ip__url : {ip[0]}\\nip__cords : {ip[1]}\")\n", "print(f'predicted output :: {op}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.1" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }