# -*- coding: utf-8 -*- """.211 Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1uZZV_SkJj2tua-CdVEbGu85Tl8vrTbWD """ import numpy as np import pandas as pd import os for dirname, _, filenames in os.walk('/kaggle/input'): for filename in filenames: print(os.path.join(dirname, filename)) import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns data = pd.read_csv('/content/synthetic_ecommerce_data.csv') print("Dataset Preview:") print(data.head()) print("\nDescriptive Statistics:") print(data.describe(include='all')) print("\nMissing Values:") print(data.isnull().sum()) data['Transaction_Date'] = pd.to_datetime(data['Transaction_Date']) daily_revenue = data.groupby('Transaction_Date')['Revenue'].sum() plt.figure(figsize=(10, 5)) plt.plot(daily_revenue, label='Daily Revenue') plt.title('Revenue Over Time') plt.xlabel('Date') plt.ylabel('Revenue') plt.legend() plt.show() top_products = data.groupby('Product_ID')['Revenue'].sum().sort_values(ascending=False).head(10) plt.figure(figsize=(10, 5)) top_products.plot(kind='bar') plt.title('Top 10 Products by Revenue') plt.xlabel('Product Id') plt.show() category_revenue = data.groupby('Category')['Revenue'].sum() plt.figure(figsize=(10, 5)) sns.scatterplot(x=data=['Ad_Spend'], y=data['Revenue']) plt.title('Ad Spend vs Revenue') plt.xlabel('Ad Spend') plt.ylabel('Revenue') plt.show() plt.figure(figsize=(10, 5)) sns.histplot(data['Ad_CTR'], bins=20, kde=True) plt.title('Distribution of Ad Click-Through Rate (CTR)') plt.xlabel('CTR') plt.ylabel('Frequency') plt.show() region_revenue = data.groupby('Region')['Revenue'].sum() plt.figure(figsize=(10, 5)) region_revenue.plot(kind='bar') plt.title('Revenue by Region') plt.xlabel('Region') plt.ylabel('Revenue') plt.show() data['Month'] = data['Transaction_Date'].dt.month monthly_revenue = data.groupby('Month')['Revenue'].sum() plt.figure(figsize=(10, 5)) monthly_revenue.plot(kind='bar') plt.title('Monthly Reveneu Trend') plt.xlabel('Month') plt.ylabel('Revenue') plt.show() plt.figure(figsize=(10, 5)) sns.scatterplot(x=data['Discount_Applied'], y=data['Revenue']) plt.title('Discount Applied vs Revenue') plt.xlabel('Discount (%)') plt.ylabel('Revenue') plt.show() plt.figure(figsize=(10, 5)) sns.scatterplot(x=data['Clicks'], y=data['Revenue']) plt.title('Clicks vs Revenue') plt.ylabel('Revenue') plt.show() plt.figure(figsize=(10, 5)) sns.histplot(data['Conversion_Rate'], bins=20, kde=True) plt.title('Distribution of Conversion Rate') plt.xlabel('Conversion Rate') plt.ylabel('Frequency') plt.show() plt.figure(figsize=(10, 5)) sns.scatterplot(x=data['Conversion_Rate'], y=data['Revenue']) plt.title('Conversion Rate vs Revenue') plt.xlabel('Conversion Rate') plt.ylabel('Revenue') plt.show() region_revenue = data.groupby('Region')['Revenue'].sum() total_revenue = region_revenue.sum() region_contribution = (region_revenue / total_revenue) * 100 plt.figure(figsize=(10, 5)) region_contribution.plot(kind='bar') plt.title('Revenue Contribution by Reigion (%)') plt.xlabel('Region') plt.ylabel('Revenue Contribution (%)') plt.show() data['Ad_Efficiency'] = data['Revenue'] / data['Ad_Spend'] plt.figure(figsize=(10, 5)) sns.boxplot(data=data, x='Category', y='Ad_Efficiency') plt.title('Ad Spend Efficiency by Category') plt.xlabel('Category') plt.ylabel('Revenue per Unit of Ad Spend') plt.show() plt.figure(figsize=(10, 5)) sns.histplot(data['Units_Sold'], bins=20, kde=True) plt.title('Distribution of Units Sold') plt.xlabel('Units Sold') plt.ylabel('Frequency') plt.show() plt.figure(figsize=(10, 5)) sns.scatterplot(x=data['Units_Sold'], y=data['Revenue']) plt.title('Units Sold vs Revenue') plt.xlabel('Units Sold') plt.ylabel('Revenue') plt.show() units_by_category = data.groupby('Category')['Units_Sold'].sum() plt.figure(figsize=(10, 5)) units_by_category.plot(kind='bar') plt.title('Units Sold by Category') plt.xlabel('Category') plt.ylabel('Units Sold') plt.show() data['Revenue_per_Impression'] = data['Revenue'] / data ['Impressions'].astype(float) plt.figure(figsize=(10, 5)) sns.boxplot(data=data, x='Category', y='Revenue_per_Impression') plt.title('Revenue per Impression by Category') plt.xlabel('Category') plt.ylabel('Revenue per Impression') plt.show()