antitheft159 commited on
Commit
2ce3fcd
·
verified ·
1 Parent(s): aa29545

Upload _211.py

Browse files
Files changed (1) hide show
  1. _211.py +163 -0
_211.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """.211
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1uZZV_SkJj2tua-CdVEbGu85Tl8vrTbWD
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ import os
14
+ for dirname, _, filenames in os.walk('/kaggle/input'):
15
+ for filename in filenames:
16
+ print(os.path.join(dirname, filename))
17
+
18
+ import pandas as pd
19
+ import numpy as np
20
+ import matplotlib.pyplot as plt
21
+ import seaborn as sns
22
+
23
+ data = pd.read_csv('/content/synthetic_ecommerce_data.csv')
24
+
25
+ print("Dataset Preview:")
26
+ print(data.head())
27
+
28
+ print("\nDescriptive Statistics:")
29
+ print(data.describe(include='all'))
30
+
31
+ print("\nMissing Values:")
32
+ print(data.isnull().sum())
33
+
34
+ data['Transaction_Date'] = pd.to_datetime(data['Transaction_Date'])
35
+ daily_revenue = data.groupby('Transaction_Date')['Revenue'].sum()
36
+
37
+ plt.figure(figsize=(10, 5))
38
+ plt.plot(daily_revenue, label='Daily Revenue')
39
+ plt.title('Revenue Over Time')
40
+ plt.xlabel('Date')
41
+ plt.ylabel('Revenue')
42
+ plt.legend()
43
+ plt.show()
44
+
45
+ top_products = data.groupby('Product_ID')['Revenue'].sum().sort_values(ascending=False).head(10)
46
+
47
+ plt.figure(figsize=(10, 5))
48
+ top_products.plot(kind='bar')
49
+ plt.title('Top 10 Products by Revenue')
50
+ plt.xlabel('Product Id')
51
+ plt.show()
52
+
53
+ category_revenue = data.groupby('Category')['Revenue'].sum()
54
+
55
+ plt.figure(figsize=(10, 5))
56
+ sns.scatterplot(x=data=['Ad_Spend'], y=data['Revenue'])
57
+ plt.title('Ad Spend vs Revenue')
58
+ plt.xlabel('Ad Spend')
59
+ plt.ylabel('Revenue')
60
+ plt.show()
61
+
62
+ plt.figure(figsize=(10, 5))
63
+ sns.histplot(data['Ad_CTR'], bins=20, kde=True)
64
+ plt.title('Distribution of Ad Click-Through Rate (CTR)')
65
+ plt.xlabel('CTR')
66
+ plt.ylabel('Frequency')
67
+ plt.show()
68
+
69
+ region_revenue = data.groupby('Region')['Revenue'].sum()
70
+
71
+ plt.figure(figsize=(10, 5))
72
+ region_revenue.plot(kind='bar')
73
+ plt.title('Revenue by Region')
74
+ plt.xlabel('Region')
75
+ plt.ylabel('Revenue')
76
+ plt.show()
77
+
78
+ data['Month'] = data['Transaction_Date'].dt.month
79
+ monthly_revenue = data.groupby('Month')['Revenue'].sum()
80
+
81
+ plt.figure(figsize=(10, 5))
82
+ monthly_revenue.plot(kind='bar')
83
+ plt.title('Monthly Reveneu Trend')
84
+ plt.xlabel('Month')
85
+ plt.ylabel('Revenue')
86
+ plt.show()
87
+
88
+ plt.figure(figsize=(10, 5))
89
+ sns.scatterplot(x=data['Discount_Applied'], y=data['Revenue'])
90
+ plt.title('Discount Applied vs Revenue')
91
+ plt.xlabel('Discount (%)')
92
+ plt.ylabel('Revenue')
93
+ plt.show()
94
+
95
+ plt.figure(figsize=(10, 5))
96
+ sns.scatterplot(x=data['Clicks'], y=data['Revenue'])
97
+ plt.title('Clicks vs Revenue')
98
+ plt.ylabel('Revenue')
99
+ plt.show()
100
+
101
+ plt.figure(figsize=(10, 5))
102
+ sns.histplot(data['Conversion_Rate'], bins=20, kde=True)
103
+ plt.title('Distribution of Conversion Rate')
104
+ plt.xlabel('Conversion Rate')
105
+ plt.ylabel('Frequency')
106
+ plt.show()
107
+
108
+ plt.figure(figsize=(10, 5))
109
+ sns.scatterplot(x=data['Conversion_Rate'], y=data['Revenue'])
110
+ plt.title('Conversion Rate vs Revenue')
111
+ plt.xlabel('Conversion Rate')
112
+ plt.ylabel('Revenue')
113
+ plt.show()
114
+
115
+ region_revenue = data.groupby('Region')['Revenue'].sum()
116
+ total_revenue = region_revenue.sum()
117
+ region_contribution = (region_revenue / total_revenue) * 100
118
+
119
+ plt.figure(figsize=(10, 5))
120
+ region_contribution.plot(kind='bar')
121
+ plt.title('Revenue Contribution by Reigion (%)')
122
+ plt.xlabel('Region')
123
+ plt.ylabel('Revenue Contribution (%)')
124
+ plt.show()
125
+
126
+ data['Ad_Efficiency'] = data['Revenue'] / data['Ad_Spend']
127
+ plt.figure(figsize=(10, 5))
128
+ sns.boxplot(data=data, x='Category', y='Ad_Efficiency')
129
+ plt.title('Ad Spend Efficiency by Category')
130
+ plt.xlabel('Category')
131
+ plt.ylabel('Revenue per Unit of Ad Spend')
132
+ plt.show()
133
+
134
+ plt.figure(figsize=(10, 5))
135
+ sns.histplot(data['Units_Sold'], bins=20, kde=True)
136
+ plt.title('Distribution of Units Sold')
137
+ plt.xlabel('Units Sold')
138
+ plt.ylabel('Frequency')
139
+ plt.show()
140
+
141
+ plt.figure(figsize=(10, 5))
142
+ sns.scatterplot(x=data['Units_Sold'], y=data['Revenue'])
143
+ plt.title('Units Sold vs Revenue')
144
+ plt.xlabel('Units Sold')
145
+ plt.ylabel('Revenue')
146
+ plt.show()
147
+
148
+ units_by_category = data.groupby('Category')['Units_Sold'].sum()
149
+
150
+ plt.figure(figsize=(10, 5))
151
+ units_by_category.plot(kind='bar')
152
+ plt.title('Units Sold by Category')
153
+ plt.xlabel('Category')
154
+ plt.ylabel('Units Sold')
155
+ plt.show()
156
+
157
+ data['Revenue_per_Impression'] = data['Revenue'] / data ['Impressions'].astype(float)
158
+ plt.figure(figsize=(10, 5))
159
+ sns.boxplot(data=data, x='Category', y='Revenue_per_Impression')
160
+ plt.title('Revenue per Impression by Category')
161
+ plt.xlabel('Category')
162
+ plt.ylabel('Revenue per Impression')
163
+ plt.show()