Abu1998 commited on
Commit
5ba248d
·
verified ·
1 Parent(s): 5284cc6

integrated the labeling functionality

Browse files

this is the app.py file "import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from dataset_import import load_data
from data_cleaning import clean_data

st.title("AnalyzeYT Dataset Analysis")

# Load and clean dataset
data = load_data()

if data is not None:
st.write("Loaded Data Preview:")
st.write(data.head())

# Clean data
data = clean_data(data)

st.write("Cleaned Data Preview:")
st.write(data.head())

# Show data description
st.write("Data Description:")
st.write(data.describe())

# Add correlation matrix
# st.write("Correlation Matrix:")
# corr = data.corr()
# st.write(corr)
# sns.heatmap(corr, annot=True, cmap='coolwarm')
# st.pyplot()

# Data visualization options
st.write("Data Visualization:")
chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram'])

if chart_type == 'Line Chart':
x_axis = st.selectbox("Select X-axis Column", data.columns)
y_axis = st.selectbox("Select Y-axis Column", data.columns)
title = st.text_input("Enter Chart Title", "Line Chart")
st.line_chart(data[[x_axis, y_axis]])
st.write(f"Line Chart: {title}")

elif chart_type == 'Bar Chart':
x_axis = st.selectbox("Select X-axis Column", data.columns)
y_axis = st.selectbox("Select Y-axis Column", data.columns)
title = st.text_input("Enter Chart Title", "Bar Chart")
st.bar_chart(data[[x_axis, y_axis]])
st.write(f"Bar Chart: {title}")

elif chart_type == 'Histogram':
selected_column = st.selectbox("Select Column for Histogram", data.columns)
bins = st.slider("Number of Bins", min_value=10, max_value=100, value=30)
title = st.text_input("Enter Chart Title", "Histogram")
plt.hist(data[selected_column], bins=bins)
plt.title(title)
plt.xlabel(selected_column)
plt.ylabel('Frequency')
st.pyplot()
" also write a requirements.txt file

Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -4,6 +4,7 @@ import seaborn as sns
4
  import matplotlib.pyplot as plt
5
  from dataset_import import load_data
6
  from data_cleaning import clean_data
 
7
 
8
  st.title("AnalyzeYT Dataset Analysis")
9
 
@@ -24,13 +25,6 @@ if data is not None:
24
  st.write("Data Description:")
25
  st.write(data.describe())
26
 
27
- # Add correlation matrix
28
- # st.write("Correlation Matrix:")
29
- # corr = data.corr()
30
- # st.write(corr)
31
- # sns.heatmap(corr, annot=True, cmap='coolwarm')
32
- # st.pyplot()
33
-
34
  # Data visualization options
35
  st.write("Data Visualization:")
36
  chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram'])
@@ -58,3 +52,16 @@ if data is not None:
58
  plt.xlabel(selected_column)
59
  plt.ylabel('Frequency')
60
  st.pyplot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import matplotlib.pyplot as plt
5
  from dataset_import import load_data
6
  from data_cleaning import clean_data
7
+ from data_labelling import label_dataset
8
 
9
  st.title("AnalyzeYT Dataset Analysis")
10
 
 
25
  st.write("Data Description:")
26
  st.write(data.describe())
27
 
 
 
 
 
 
 
 
28
  # Data visualization options
29
  st.write("Data Visualization:")
30
  chart_type = st.selectbox("Select Chart Type", ['Line Chart', 'Bar Chart', 'Histogram'])
 
52
  plt.xlabel(selected_column)
53
  plt.ylabel('Frequency')
54
  st.pyplot()
55
+
56
+ # Add option for labeling the dataset
57
+ st.write("Labeling Options:")
58
+ label_option = st.radio("Do you want to label your dataset?", ('No', 'Yes'))
59
+
60
+ if label_option == 'Yes':
61
+ output_name = st.text_input("Enter Output File Name", "labeled_dataset.csv")
62
+ if st.button("Run Labeling"):
63
+ client = st.text_input("Enter your Gradio Client API Key") # Placeholder for Gradio API key input
64
+ labeled_data = label_dataset(data, client)
65
+ labeled_data.to_csv(output_name, index=False)
66
+ st.write("Labeling Completed. Download your file below:")
67
+ st.download_button(label="Download Labeled Dataset", data=labeled_data.to_csv(), file_name=output_name, mime='text/csv')