Ashar086 commited on
Commit
1123969
·
verified ·
1 Parent(s): b81d37e

Update analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +57 -11
analyzer.py CHANGED
@@ -1,15 +1,61 @@
 
1
  import pandas as pd
2
  import numpy as np
 
3
 
4
  class Analyzer:
5
- def analyze_data(self, df, prompt):
6
- # This is a simple implementation. In a real-world scenario,
7
- # you might want to use more sophisticated NLP techniques.
8
- if "correlation" in prompt.lower():
9
- return df.corr().to_string()
10
- elif "summary" in prompt.lower():
11
- return df.describe().to_string()
12
- elif "unique" in prompt.lower():
13
- return {col: df[col].nunique() for col in df.columns}
14
- else:
15
- return "I'm sorry, I couldn't understand your analysis request. Please try asking about correlation, summary statistics, or unique values."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
+ from scipy import stats
5
 
6
  class Analyzer:
7
+ def perform_analysis(self, df):
8
+ analysis_type = st.selectbox("Select analysis type",
9
+ ["Descriptive Statistics", "Correlation Analysis", "Hypothesis Testing", "Custom Query"])
10
+
11
+ if analysis_type == "Descriptive Statistics":
12
+ st.write(df.describe())
13
+
14
+ if st.checkbox("Show additional statistics"):
15
+ st.write("Skewness:")
16
+ st.write(df.skew())
17
+ st.write("Kurtosis:")
18
+ st.write(df.kurtosis())
19
+
20
+ elif analysis_type == "Correlation Analysis":
21
+ corr_matrix = df.corr()
22
+ st.write(corr_matrix)
23
+
24
+ if st.checkbox("Show heatmap"):
25
+ fig = px.imshow(corr_matrix, color_continuous_scale='RdBu_r')
26
+ st.plotly_chart(fig)
27
+
28
+ elif analysis_type == "Hypothesis Testing":
29
+ test_type = st.selectbox("Select test type", ["T-Test", "ANOVA", "Chi-Square"])
30
+
31
+ if test_type == "T-Test":
32
+ col1 = st.selectbox("Select first column", df.columns)
33
+ col2 = st.selectbox("Select second column", df.columns)
34
+ t_stat, p_value = stats.ttest_ind(df[col1], df[col2])
35
+ st.write(f"T-statistic: {t_stat}")
36
+ st.write(f"P-value: {p_value}")
37
+
38
+ elif test_type == "ANOVA":
39
+ grouping_col = st.selectbox("Select grouping column", df.columns)
40
+ value_col = st.selectbox("Select value column", df.columns)
41
+ groups = [group for name, group in df.groupby(grouping_col)[value_col]]
42
+ f_stat, p_value = stats.f_oneway(*groups)
43
+ st.write(f"F-statistic: {f_stat}")
44
+ st.write(f"P-value: {p_value}")
45
+
46
+ elif test_type == "Chi-Square":
47
+ col1 = st.selectbox("Select first column", df.columns)
48
+ col2 = st.selectbox("Select second column", df.columns)
49
+ contingency_table = pd.crosstab(df[col1], df[col2])
50
+ chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table)
51
+ st.write(f"Chi-square statistic: {chi2}")
52
+ st.write(f"P-value: {p_value}")
53
+
54
+ elif analysis_type == "Custom Query":
55
+ query = st.text_input("Enter a custom query (e.g., 'column_name > 5')")
56
+ if query:
57
+ try:
58
+ result = df.query(query)
59
+ st.write(result)
60
+ except Exception as e:
61
+ st.error(f"Error in query: {str(e)}")