import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from dataset_import import load_data from data_cleaning import clean_data from data_labelling import label_dataset st.title("AnalyzeYT Dataset Analysis") # Load and clean dataset data = load_data() if data is not None: st.write("Loaded Data Preview:") st.write(data.head()) # Clean data data = clean_data(data) st.write("Cleaned Data Preview:") st.write(data.head()) # Show data description st.write("Data Description:") st.write(data.describe()) # Data visualization options st.write("Data Visualization:") chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram']) if chart_type == 'Line Chart': x_axis = st.selectbox("Select X-axis Column", data.columns) y_axis = st.selectbox("Select Y-axis Column", data.columns) title = st.text_input("Enter Chart Title", "Line Chart") st.line_chart(data[[x_axis, y_axis]]) st.write(f"Line Chart: {title}") elif chart_type == 'Bar Chart': x_axis = st.selectbox("Select X-axis Column", data.columns) y_axis = st.selectbox("Select Y-axis Column", data.columns) title = st.text_input("Enter Chart Title", "Bar Chart") st.bar_chart(data[[x_axis, y_axis]]) st.write(f"Bar Chart: {title}") elif chart_type == 'Histogram': selected_column = st.selectbox("Select Column for Histogram", data.columns) bins = st.slider("Number of Bins", min_value=10, max_value=100, value=30) title = st.text_input("Enter Chart Title", "Histogram") plt.hist(data[selected_column], bins=bins) plt.title(title) plt.xlabel(selected_column) plt.ylabel('Frequency') st.pyplot() # Add option for labeling the dataset st.write("Labeling Options:") label_option = st.radio("Do you want to label your dataset?", ('No', 'Yes')) if label_option == 'Yes': output_name = st.text_input("Enter Output File Name", "labeled_dataset.csv") if st.button("Run Labeling"): client = st.text_input("Enter your Gradio Client API Key") # Placeholder for Gradio API key input labeled_data = label_dataset(data, client) labeled_data.to_csv(output_name, index=False) st.write("Labeling Completed. Download your file below:") st.download_button(label="Download Labeled Dataset", data=labeled_data.to_csv(), file_name=output_name, mime='text/csv')