hackathon / analyzer.py
Ashar086's picture
Update analyzer.py
1123969 verified
raw
history blame
2.75 kB
import streamlit as st
import pandas as pd
import numpy as np
from scipy import stats
class Analyzer:
def perform_analysis(self, df):
analysis_type = st.selectbox("Select analysis type",
["Descriptive Statistics", "Correlation Analysis", "Hypothesis Testing", "Custom Query"])
if analysis_type == "Descriptive Statistics":
st.write(df.describe())
if st.checkbox("Show additional statistics"):
st.write("Skewness:")
st.write(df.skew())
st.write("Kurtosis:")
st.write(df.kurtosis())
elif analysis_type == "Correlation Analysis":
corr_matrix = df.corr()
st.write(corr_matrix)
if st.checkbox("Show heatmap"):
fig = px.imshow(corr_matrix, color_continuous_scale='RdBu_r')
st.plotly_chart(fig)
elif analysis_type == "Hypothesis Testing":
test_type = st.selectbox("Select test type", ["T-Test", "ANOVA", "Chi-Square"])
if test_type == "T-Test":
col1 = st.selectbox("Select first column", df.columns)
col2 = st.selectbox("Select second column", df.columns)
t_stat, p_value = stats.ttest_ind(df[col1], df[col2])
st.write(f"T-statistic: {t_stat}")
st.write(f"P-value: {p_value}")
elif test_type == "ANOVA":
grouping_col = st.selectbox("Select grouping column", df.columns)
value_col = st.selectbox("Select value column", df.columns)
groups = [group for name, group in df.groupby(grouping_col)[value_col]]
f_stat, p_value = stats.f_oneway(*groups)
st.write(f"F-statistic: {f_stat}")
st.write(f"P-value: {p_value}")
elif test_type == "Chi-Square":
col1 = st.selectbox("Select first column", df.columns)
col2 = st.selectbox("Select second column", df.columns)
contingency_table = pd.crosstab(df[col1], df[col2])
chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table)
st.write(f"Chi-square statistic: {chi2}")
st.write(f"P-value: {p_value}")
elif analysis_type == "Custom Query":
query = st.text_input("Enter a custom query (e.g., 'column_name > 5')")
if query:
try:
result = df.query(query)
st.write(result)
except Exception as e:
st.error(f"Error in query: {str(e)}")