dnirfana commited on
Commit
3258457
1 Parent(s): e006bf1

Update eda.py

Browse files
Files changed (1) hide show
  1. eda.py +170 -1
eda.py CHANGED
@@ -4,4 +4,173 @@ import matplotlib.pyplot as plt
4
  import seaborn as sns
5
 
6
  def app():
7
- st.title('Exploratory Data Analysis')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import seaborn as sns
5
 
6
  def app():
7
+ st.title('Exploratory Data Analysis')
8
+
9
+ # Load Data
10
+ df = pd.read_csv('../Transactions Data.csv')
11
+
12
+ # Data Summary
13
+ st.header('Data Summary')
14
+ st.write(df.describe().T)
15
+
16
+ st.divider()
17
+
18
+ # Univariate Exploration
19
+ st.header('Univariate Analysis')
20
+
21
+ # 1
22
+ st.subheader('Distribution of Transactions Types')
23
+ # Plotting
24
+ fig, ax = plt.subplots()
25
+ sns.histplot(df['type'], bins=20, ax=ax)
26
+ plt.xlabel('Transaction Types')
27
+ plt.ylabel('Frequency')
28
+ plt.title('Distribution of Transaction Types')
29
+ st.pyplot(fig)
30
+ st.write('bla bla bla')
31
+ st.write('')
32
+
33
+ # 2
34
+ st.subheader('Distribution of Balance Amount')
35
+ # Plotting
36
+ fig, ax = plt.subplots()
37
+ sns.histplot(df['amount'], bins=20, ax=ax)
38
+ plt.xlabel('Amount')
39
+ plt.ylabel('Frequency')
40
+ plt.title('Distribution of Balance Amount')
41
+ st.pyplot(fig)
42
+ st.write('bla bla bla')
43
+ st.write('')
44
+
45
+ # 3
46
+ st.subheader('Distribution of Old Balance Origin')
47
+ # Plotting
48
+ fig, ax = plt.subplots()
49
+ sns.histplot(df['oldbalanceOrg'], bins=20, ax=ax)
50
+ plt.xlabel('Old Balance Origin')
51
+ plt.ylabel('Frequency')
52
+ plt.title('Distribution of Old Balance Origin')
53
+ st.pyplot(fig)
54
+ st.write('bla bla bla')
55
+ st.write('')
56
+
57
+ # 4
58
+ st.subheader('Distribution of New Balance Origin')
59
+ # Plotting
60
+ fig, ax = plt.subplots()
61
+ sns.histplot(df['newbalanceOrig'], bins=20, ax=ax)
62
+ plt.xlabel('New Balance Origin')
63
+ plt.ylabel('Frequency')
64
+ plt.title('Distribution of New Balance Origin')
65
+ st.pyplot(fig)
66
+ st.write('bla bla bla')
67
+ st.write('')
68
+
69
+ # 5
70
+ st.subheader('Distribution of Old Balance Destination')
71
+ # Plotting
72
+ fig, ax = plt.subplots()
73
+ sns.histplot(df['oldbalanceDest'], bins=20, ax=ax)
74
+ plt.xlabel('Old Balance Origin')
75
+ plt.ylabel('Frequency')
76
+ plt.title('Distribution of Old Balance Destination')
77
+ st.pyplot(fig)
78
+ st.write('bla bla bla')
79
+ st.write('')
80
+
81
+
82
+ # 5
83
+ st.subheader('Distribution of New Balance Destination')
84
+ # Plotting
85
+ fig, ax = plt.subplots()
86
+ sns.histplot(df['newbalanceDest'], bins=20, ax=ax)
87
+ plt.xlabel('New Balance Origin')
88
+ plt.ylabel('Frequency')
89
+ plt.title('Distribution of New Balance Destination')
90
+ st.pyplot(fig)
91
+ st.write('bla bla bla')
92
+ st.write('')
93
+
94
+ # 6
95
+ st.subheader('Distribution of Flagged Fraud')
96
+ # Plotting
97
+ fig, ax = plt.subplots()
98
+ sns.histplot(df['isFlaggedFraud'], bins=20, ax=ax)
99
+ plt.xlabel('Is Flagged Fraud')
100
+ plt.ylabel('Frequency')
101
+ plt.title('Distribution of Flagged Fraud')
102
+ st.pyplot(fig)
103
+ st.write('bla bla bla')
104
+ st.write('')
105
+
106
+ # 7
107
+ st.subheader('Distribution of Fraud')
108
+ # Plotting
109
+ fig, ax = plt.subplots()
110
+ sns.histplot(df['isFraud'], bins=20, ax=ax)
111
+ plt.xlabel('Is Fraud')
112
+ plt.ylabel('Frequency')
113
+ plt.title('Distribution of Fraud')
114
+ st.pyplot(fig)
115
+ st.write('bla bla bla')
116
+ st.write('')
117
+
118
+ st.divider()
119
+
120
+ # Bivariate analysis
121
+ st.header('Bivariate Analysis')
122
+
123
+ # 1
124
+ st.subheader('Distribution of Amout Balance per Transaction Types')
125
+ fig, ax = plt.subplots()
126
+ sns.boxplot(x=df['amount'], y=df['type'], ax=ax)
127
+ plt.xlabel('Amount')
128
+ plt.ylabel('Transaction Types')
129
+ plt.title('Transaction Types vs Amount Balance')
130
+ st.pyplot(fig)
131
+ st.write('bla bla bla')
132
+ st.write('')
133
+
134
+ # 2
135
+ st.subheader('Distribution of Old Balance Origin per Transaction Types')
136
+ fig, ax = plt.subplots()
137
+ sns.boxplot(x=df['oldbalanceOrg'], y=df['type'], ax=ax)
138
+ plt.xlabel('Old Balance Origin')
139
+ plt.ylabel('Transaction Types')
140
+ plt.title('Transaction Types vs Old Balance Origin')
141
+ st.pyplot(fig)
142
+ st.write('bla bla bla')
143
+ st.write('')
144
+
145
+ # 3
146
+ st.subheader('Distribution of New Balance Origin per Transaction Types')
147
+ fig, ax = plt.subplots()
148
+ sns.boxplot(x=df['newbalanceOrig'], y=df['type'], ax=ax)
149
+ plt.xlabel('New Balance Origin')
150
+ plt.ylabel('Transaction Types')
151
+ plt.title('Transaction Types vs Old Balance Origin')
152
+ st.pyplot(fig)
153
+ st.write('bla bla bla')
154
+ st.write('')
155
+
156
+ # 4
157
+ st.subheader('Distribution of Old Balance Destination per Transaction Types')
158
+ fig, ax = plt.subplots()
159
+ sns.boxplot(x=df['oldbalanceDest'], y=df['type'], ax=ax)
160
+ plt.xlabel('Old Balance Destination')
161
+ plt.ylabel('Transaction Types')
162
+ plt.title('Transaction Types vs Old Balance Destination')
163
+ st.pyplot(fig)
164
+ st.write('bla bla bla')
165
+ st.write('')
166
+
167
+ # 5
168
+ st.subheader('Distribution of New Balance Destination per Transaction Types')
169
+ fig, ax = plt.subplots()
170
+ sns.boxplot(x=df['newbalanceDest'], y=df['type'], ax=ax)
171
+ plt.xlabel('New Balance Destination')
172
+ plt.ylabel('Transaction Types')
173
+ plt.title('Transaction Types vs New Balance Destination')
174
+ st.pyplot(fig)
175
+ st.write('bla bla bla')
176
+ st.write('')