|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from scipy import stats |
|
|
|
class Analyzer: |
|
def perform_analysis(self, df): |
|
analysis_type = st.selectbox("Select analysis type", |
|
["Descriptive Statistics", "Correlation Analysis", "Hypothesis Testing", "Custom Query"]) |
|
|
|
if analysis_type == "Descriptive Statistics": |
|
st.write(df.describe()) |
|
|
|
if st.checkbox("Show additional statistics"): |
|
st.write("Skewness:") |
|
st.write(df.skew()) |
|
st.write("Kurtosis:") |
|
st.write(df.kurtosis()) |
|
|
|
elif analysis_type == "Correlation Analysis": |
|
corr_matrix = df.corr() |
|
st.write(corr_matrix) |
|
|
|
if st.checkbox("Show heatmap"): |
|
fig = px.imshow(corr_matrix, color_continuous_scale='RdBu_r') |
|
st.plotly_chart(fig) |
|
|
|
elif analysis_type == "Hypothesis Testing": |
|
test_type = st.selectbox("Select test type", ["T-Test", "ANOVA", "Chi-Square"]) |
|
|
|
if test_type == "T-Test": |
|
col1 = st.selectbox("Select first column", df.columns) |
|
col2 = st.selectbox("Select second column", df.columns) |
|
t_stat, p_value = stats.ttest_ind(df[col1], df[col2]) |
|
st.write(f"T-statistic: {t_stat}") |
|
st.write(f"P-value: {p_value}") |
|
|
|
elif test_type == "ANOVA": |
|
grouping_col = st.selectbox("Select grouping column", df.columns) |
|
value_col = st.selectbox("Select value column", df.columns) |
|
groups = [group for name, group in df.groupby(grouping_col)[value_col]] |
|
f_stat, p_value = stats.f_oneway(*groups) |
|
st.write(f"F-statistic: {f_stat}") |
|
st.write(f"P-value: {p_value}") |
|
|
|
elif test_type == "Chi-Square": |
|
col1 = st.selectbox("Select first column", df.columns) |
|
col2 = st.selectbox("Select second column", df.columns) |
|
contingency_table = pd.crosstab(df[col1], df[col2]) |
|
chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table) |
|
st.write(f"Chi-square statistic: {chi2}") |
|
st.write(f"P-value: {p_value}") |
|
|
|
elif analysis_type == "Custom Query": |
|
query = st.text_input("Enter a custom query (e.g., 'column_name > 5')") |
|
if query: |
|
try: |
|
result = df.query(query) |
|
st.write(result) |
|
except Exception as e: |
|
st.error(f"Error in query: {str(e)}") |