antitheft159
commited on
Upload _211.py
Browse files
_211.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
""".211
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1uZZV_SkJj2tua-CdVEbGu85Tl8vrTbWD
|
8 |
+
"""
|
9 |
+
|
10 |
+
import numpy as np
|
11 |
+
import pandas as pd
|
12 |
+
|
13 |
+
import os
|
14 |
+
for dirname, _, filenames in os.walk('/kaggle/input'):
|
15 |
+
for filename in filenames:
|
16 |
+
print(os.path.join(dirname, filename))
|
17 |
+
|
18 |
+
import pandas as pd
|
19 |
+
import numpy as np
|
20 |
+
import matplotlib.pyplot as plt
|
21 |
+
import seaborn as sns
|
22 |
+
|
23 |
+
data = pd.read_csv('/content/synthetic_ecommerce_data.csv')
|
24 |
+
|
25 |
+
print("Dataset Preview:")
|
26 |
+
print(data.head())
|
27 |
+
|
28 |
+
print("\nDescriptive Statistics:")
|
29 |
+
print(data.describe(include='all'))
|
30 |
+
|
31 |
+
print("\nMissing Values:")
|
32 |
+
print(data.isnull().sum())
|
33 |
+
|
34 |
+
data['Transaction_Date'] = pd.to_datetime(data['Transaction_Date'])
|
35 |
+
daily_revenue = data.groupby('Transaction_Date')['Revenue'].sum()
|
36 |
+
|
37 |
+
plt.figure(figsize=(10, 5))
|
38 |
+
plt.plot(daily_revenue, label='Daily Revenue')
|
39 |
+
plt.title('Revenue Over Time')
|
40 |
+
plt.xlabel('Date')
|
41 |
+
plt.ylabel('Revenue')
|
42 |
+
plt.legend()
|
43 |
+
plt.show()
|
44 |
+
|
45 |
+
top_products = data.groupby('Product_ID')['Revenue'].sum().sort_values(ascending=False).head(10)
|
46 |
+
|
47 |
+
plt.figure(figsize=(10, 5))
|
48 |
+
top_products.plot(kind='bar')
|
49 |
+
plt.title('Top 10 Products by Revenue')
|
50 |
+
plt.xlabel('Product Id')
|
51 |
+
plt.show()
|
52 |
+
|
53 |
+
category_revenue = data.groupby('Category')['Revenue'].sum()
|
54 |
+
|
55 |
+
plt.figure(figsize=(10, 5))
|
56 |
+
sns.scatterplot(x=data=['Ad_Spend'], y=data['Revenue'])
|
57 |
+
plt.title('Ad Spend vs Revenue')
|
58 |
+
plt.xlabel('Ad Spend')
|
59 |
+
plt.ylabel('Revenue')
|
60 |
+
plt.show()
|
61 |
+
|
62 |
+
plt.figure(figsize=(10, 5))
|
63 |
+
sns.histplot(data['Ad_CTR'], bins=20, kde=True)
|
64 |
+
plt.title('Distribution of Ad Click-Through Rate (CTR)')
|
65 |
+
plt.xlabel('CTR')
|
66 |
+
plt.ylabel('Frequency')
|
67 |
+
plt.show()
|
68 |
+
|
69 |
+
region_revenue = data.groupby('Region')['Revenue'].sum()
|
70 |
+
|
71 |
+
plt.figure(figsize=(10, 5))
|
72 |
+
region_revenue.plot(kind='bar')
|
73 |
+
plt.title('Revenue by Region')
|
74 |
+
plt.xlabel('Region')
|
75 |
+
plt.ylabel('Revenue')
|
76 |
+
plt.show()
|
77 |
+
|
78 |
+
data['Month'] = data['Transaction_Date'].dt.month
|
79 |
+
monthly_revenue = data.groupby('Month')['Revenue'].sum()
|
80 |
+
|
81 |
+
plt.figure(figsize=(10, 5))
|
82 |
+
monthly_revenue.plot(kind='bar')
|
83 |
+
plt.title('Monthly Reveneu Trend')
|
84 |
+
plt.xlabel('Month')
|
85 |
+
plt.ylabel('Revenue')
|
86 |
+
plt.show()
|
87 |
+
|
88 |
+
plt.figure(figsize=(10, 5))
|
89 |
+
sns.scatterplot(x=data['Discount_Applied'], y=data['Revenue'])
|
90 |
+
plt.title('Discount Applied vs Revenue')
|
91 |
+
plt.xlabel('Discount (%)')
|
92 |
+
plt.ylabel('Revenue')
|
93 |
+
plt.show()
|
94 |
+
|
95 |
+
plt.figure(figsize=(10, 5))
|
96 |
+
sns.scatterplot(x=data['Clicks'], y=data['Revenue'])
|
97 |
+
plt.title('Clicks vs Revenue')
|
98 |
+
plt.ylabel('Revenue')
|
99 |
+
plt.show()
|
100 |
+
|
101 |
+
plt.figure(figsize=(10, 5))
|
102 |
+
sns.histplot(data['Conversion_Rate'], bins=20, kde=True)
|
103 |
+
plt.title('Distribution of Conversion Rate')
|
104 |
+
plt.xlabel('Conversion Rate')
|
105 |
+
plt.ylabel('Frequency')
|
106 |
+
plt.show()
|
107 |
+
|
108 |
+
plt.figure(figsize=(10, 5))
|
109 |
+
sns.scatterplot(x=data['Conversion_Rate'], y=data['Revenue'])
|
110 |
+
plt.title('Conversion Rate vs Revenue')
|
111 |
+
plt.xlabel('Conversion Rate')
|
112 |
+
plt.ylabel('Revenue')
|
113 |
+
plt.show()
|
114 |
+
|
115 |
+
region_revenue = data.groupby('Region')['Revenue'].sum()
|
116 |
+
total_revenue = region_revenue.sum()
|
117 |
+
region_contribution = (region_revenue / total_revenue) * 100
|
118 |
+
|
119 |
+
plt.figure(figsize=(10, 5))
|
120 |
+
region_contribution.plot(kind='bar')
|
121 |
+
plt.title('Revenue Contribution by Reigion (%)')
|
122 |
+
plt.xlabel('Region')
|
123 |
+
plt.ylabel('Revenue Contribution (%)')
|
124 |
+
plt.show()
|
125 |
+
|
126 |
+
data['Ad_Efficiency'] = data['Revenue'] / data['Ad_Spend']
|
127 |
+
plt.figure(figsize=(10, 5))
|
128 |
+
sns.boxplot(data=data, x='Category', y='Ad_Efficiency')
|
129 |
+
plt.title('Ad Spend Efficiency by Category')
|
130 |
+
plt.xlabel('Category')
|
131 |
+
plt.ylabel('Revenue per Unit of Ad Spend')
|
132 |
+
plt.show()
|
133 |
+
|
134 |
+
plt.figure(figsize=(10, 5))
|
135 |
+
sns.histplot(data['Units_Sold'], bins=20, kde=True)
|
136 |
+
plt.title('Distribution of Units Sold')
|
137 |
+
plt.xlabel('Units Sold')
|
138 |
+
plt.ylabel('Frequency')
|
139 |
+
plt.show()
|
140 |
+
|
141 |
+
plt.figure(figsize=(10, 5))
|
142 |
+
sns.scatterplot(x=data['Units_Sold'], y=data['Revenue'])
|
143 |
+
plt.title('Units Sold vs Revenue')
|
144 |
+
plt.xlabel('Units Sold')
|
145 |
+
plt.ylabel('Revenue')
|
146 |
+
plt.show()
|
147 |
+
|
148 |
+
units_by_category = data.groupby('Category')['Units_Sold'].sum()
|
149 |
+
|
150 |
+
plt.figure(figsize=(10, 5))
|
151 |
+
units_by_category.plot(kind='bar')
|
152 |
+
plt.title('Units Sold by Category')
|
153 |
+
plt.xlabel('Category')
|
154 |
+
plt.ylabel('Units Sold')
|
155 |
+
plt.show()
|
156 |
+
|
157 |
+
data['Revenue_per_Impression'] = data['Revenue'] / data ['Impressions'].astype(float)
|
158 |
+
plt.figure(figsize=(10, 5))
|
159 |
+
sns.boxplot(data=data, x='Category', y='Revenue_per_Impression')
|
160 |
+
plt.title('Revenue per Impression by Category')
|
161 |
+
plt.xlabel('Category')
|
162 |
+
plt.ylabel('Revenue per Impression')
|
163 |
+
plt.show()
|