import pandas as pd import streamlit as st import plotly.express as px import numpy as np import plotly.graph_objs as go from function import ( process_data, predict_late_student, predict_rank, predict_one_student, show_boxplot1, ) from datetime import datetime from PIL import Image import base64 import re import sqlite3 df = pd.DataFrame() def color_cell(val): if val == "not late": color = "green" elif val == "may late": color = "yellow" elif val == "late": color = "red" else: color = "black" return f"color: {color};" def clear_resources(): """Clears all resources from the st.session_state.""" for key in list(st.session_state.keys()): if key.startswith("resource"): del st.session_state[key] def get_year(student_id): year_str = "" for char in student_id: if char.isdigit(): year_str += char if len(year_str) == 2: break return int(year_str) def generate_comment(median): if median < 30: comment = f"The median score for {course} is quite low at {median}. Students may need to work harder to improve their performance." elif median < 50: comment = f"The median score for {course} is below average at {median}. Students should work on improving their understanding of the material." elif median < 80: comment = f"The median score for {course} is solid at {median}. Students are making good progress but could still work on improving their skills." else: comment = f"The median score for {course} is outstanding at {median}. Students are doing an excellent job in this course." return comment favicon = "R.png" hcm = "HCM.png" intera = "Logo-iuoss-trans.png" st.set_page_config( page_title="Student System", page_icon=favicon, layout="wide", ) currentYear = datetime.now().year im1 = Image.open("R.png") im2 = Image.open("HCM.png") im3 = Image.open("Logo-iuoss-trans.png") col1, col2, col3 = st.columns([1, 3, 1]) with col1: st.image(im1, width=150) with col2: st.markdown( "

TRUONG QUOC AN'S THESIS

", unsafe_allow_html=True, ) with col3: st.image(im2, width=250) @st.cache_data() def score_table(): # Establish a connection to the database conn = sqlite3.connect("database.db") cursor = conn.cursor() # Fetch data from the tables cursor.execute('''SELECT Students.MaSV, Enrollment.MaMH, Courses.TenMH, Enrollment.NHHK, Enrollment.DiemHP, Students.DTBTK FROM Students INNER JOIN Enrollment ON Students.MaSV = Enrollment.MaSV INNER JOIN Courses ON Enrollment.MaMH = Courses.MaMH''') data = cursor.fetchall() # Create a DataFrame df = pd.DataFrame(data, columns=['MaSV', 'MaMH', 'TenMH', 'NHHK', 'DiemHP', 'DTBTK']) df = df.drop_duplicates() # Close the database connection conn.close() return df @st.cache_data() def score_table_for_student(): with sqlite3.connect("database.db") as conn: cursor = conn.cursor() cursor.execute(''' SELECT DISTINCT Students.MaSV, Enrollment.MaMH, Courses.TenMH, Enrollment.NHHK, Enrollment.DiemHP, Students.DTBTK FROM Students INNER JOIN Enrollment ON Students.MaSV = Enrollment.MaSV INNER JOIN Courses ON Enrollment.MaMH = Courses.MaMH ''') results = cursor.fetchall() df = pd.DataFrame(results, columns=['MaSV', 'MaMH', 'TenMH', 'NHHK', 'DiemHP', 'DTBTK']) cursor.execute(''' SELECT MaSV, NHHK, SoTCDat FROM Students ''') results = cursor.fetchall() df1 = pd.DataFrame(results, columns=['MaSV', 'NHHK', 'SoTCDat']) merged_df = pd.merge(df, df1, on=['MaSV', 'NHHK']) return merged_df st.sidebar.image(im3) st.sidebar.title("Student Performance Prediction System") option = ["Dashboard","Prediction Performance", "Grade Distribution Tables"] tabs = st.sidebar.selectbox("Select an option", option) def filter_dataframe(df, column, value): if value == "All": return df else: return df[df[column] == value] if tabs == "Dashboard": clear_resources() raw_data = score_table() df = process_data(raw_data) additional_selection = " " unique_values_major = df["Major"].unique() unique_values_major = [ "BA", "BE", "BT", "CE", "EE", "EN", "EV", "IE", "MA", "SE", "IT", ] unique_values_major = sorted(unique_values_major, key=lambda s: s) major = st.selectbox("Select a school:", unique_values_major) df = filter_dataframe(df, "Major", major) dfa = filter_dataframe(df, "Major", major) unique_values_school = df["MaSV_school"].unique() all_values_school = np.concatenate([["All"], unique_values_school]) no_numbers = [x for x in all_values_school if not re.search(r"\d", str(x))] if len(no_numbers) == 2: school = no_numbers[1] else: col1, col2 = st.columns(2) with col1: school = st.selectbox("Select a major:", no_numbers) if school != "All": values = [x for x in no_numbers if x != "All" and x != school] values = np.concatenate([[" "], values]) with col2: additional_selection = st.selectbox( "Select another major for comparisons:", values ) if additional_selection != " ": dfa = filter_dataframe(dfa, "MaSV_school", additional_selection) df = filter_dataframe(df, "MaSV_school", school) unique_values_year = df["Year"].unique() all_values_year = np.concatenate([["All"], unique_values_year]) col1, col2 = st.columns(2) with col1: year = st.selectbox("Select a year:", all_values_year) with col2: if year != "All" and additional_selection == " ": year_list = [x for x in all_values_year if x != "All" and x != year] year_list = np.concatenate([[" "], year_list]) year_a = st.selectbox("Select another year for comparisons:", year_list) elif year == "All": year_a = " " elif year != "All" and additional_selection != " ": year_a = year if year_a != " ": dfa = filter_dataframe(dfa, "Year", year_a) dfa.dropna(axis=1, thresh=1, inplace=True) else: year_a = " " df = filter_dataframe(df, "Year", year) new1_df = df.DTBTK new1_dfa = dfa.DTBTK show_boxplot1( new1_df, new1_dfa, major, school, year, additional_selection="", year_a="" ) df.dropna(axis=1, thresh=1, inplace=True) new_df = df.iloc[:, :-4].dropna(axis=1, thresh=10).apply(pd.to_numeric) new_dfa = dfa.iloc[:, :-4].dropna(axis=1, thresh=10).apply(pd.to_numeric) list1 = new_df.columns.tolist() list2 = new_dfa.columns.tolist() if (year != "All" and year_a != " ") or ( school != "All" and additional_selection != " " ): dfac = new_dfa.columns[:-4].tolist() common_elements = np.intersect1d(list1, list2) merged_array = np.concatenate((list1, list2), axis=None) list3 = np.intersect1d(merged_array, common_elements) new_df = new_df[list3] new_dfa = new_dfa[list3] if additional_selection != " ": show_boxplot = st.checkbox("Show Boxplot for All Course", key="checkbox1") if show_boxplot: fig = px.box(new_df) fig1 = px.box(new_dfa) fig.update_layout( title="Boxplot of " + major + school + " student in " + year ) fig1.update_layout( title="Boxplot of " + major + additional_selection + " student in " + year ) st.plotly_chart(fig, use_container_width=True) st.plotly_chart(fig1, use_container_width=True) elif additional_selection == " " and year_a != " ": show_boxplot = st.checkbox("Show Boxplot for All Course", key="checkbox1") if show_boxplot: fig = px.box(new_df) fig1 = px.box(new_dfa) fig.update_layout( title="Boxplot of " + major + school + " student in " + year ) fig1.update_layout( title="Boxplot of " + major + school + " student in " + year_a ) st.plotly_chart(fig, use_container_width=True) st.plotly_chart(fig1, use_container_width=True) elif additional_selection == " ": show_boxplot = st.checkbox("Show Boxplot for All Course", key="checkbox1") if show_boxplot: fig = px.box(new_df) fig.update_layout(title="Boxplot of " + major + " student in " + year) st.plotly_chart(fig, use_container_width=True) options = df.columns[:-4] course_data_dict = {course: df[course].dropna() for course in options} valid_courses = [ course for course, data in course_data_dict.items() if len(data) > 1 ] if (year != "All" and year_a != " ") or ( school != "All" and additional_selection != " " ): dfac = new_dfa.columns[:-4].tolist() common_elements = np.intersect1d(valid_courses, dfac) merged_array = np.concatenate((valid_courses, dfac), axis=None) valid_courses = np.intersect1d(merged_array, common_elements) if len(valid_courses) > 5: course = st.selectbox("Select a course:", valid_courses) elif len(valid_courses) == 1: course = valid_courses[0] else: st.write("No valid course data found!") st.stop() course_data = course_data_dict[course] if len(course_data) > 1: if school == "All": st.write("Course:", course, " of ", major, " student") else: st.write("Course:", course, " of ", major + school, " student") st.write(generate_comment(course_data.median())) else: st.write("No data available for the selected course.") col1, col2, col3, col4 = st.columns(4) with col1: counts, bins = np.histogram(course_data, bins=np.arange(0, 110, 10)) total_count = len(course_data) frequencies_percentage = (counts / total_count) * 100 grade_bins = [f"{bins[i]}-{bins[i+1]}" for i in range(len(bins) - 1)] df = pd.DataFrame( {"Grade": grade_bins, "Grading percentage": frequencies_percentage} ) df["Grading percentage"] = df["Grading percentage"].map( lambda x: "{:.2f}".format(x) ) st.table(df) with col2: fig = go.Figure() fig.add_trace( go.Scatter( x=bins[:-1], y=frequencies_percentage, mode="lines", name="Frequency" ) ) fig.update_layout( title="Frequency Range for {}".format(course), xaxis_title="Score", yaxis_title="Percentage", height=400, width=400, ) st.plotly_chart(fig, use_container_width=True) with col3: fig = go.Figure() fig.add_trace(go.Box(y=course_data, name="Box plot")) fig.update_layout( title="Box plot of Scores for {}".format(course), yaxis_title="Score", height=400, width=400, ) st.plotly_chart(fig, use_container_width=True) with col4: raw_data1 = raw_data.copy() raw_data1["major"] = raw_data1["MaSV"].str.slice(0, 2) raw_data1.replace(["WH", "VT", "I"], np.nan, inplace=True) raw_data1 = raw_data1[~raw_data1["DiemHP"].isin(["P", "F", "PC"])] if major != "All": raw_data1 = raw_data1[raw_data1["major"] == major] raw_data1["MaSV_school"] = raw_data1["MaSV"].str.slice(2, 4) if school != "All": raw_data1 = raw_data1[raw_data1["MaSV_school"] == school] df1 = raw_data1[["TenMH", "NHHK", "DiemHP"]].copy() df1["DiemHP"] = df1["DiemHP"].replace('', pd.NA).dropna().astype(float) df1["NHHK"] = df1["NHHK"].apply(lambda x: str(x)[:4] + " S " + str(x)[4:]) selected_TenMH = " " + course filtered_df1 = df1[df1["TenMH"] == selected_TenMH] mean_DiemHP = ( filtered_df1.groupby("NHHK")["DiemHP"] .mean() .round(1) .reset_index(name="Mean") ) if year != "All": st.write("") else: fig = px.line( mean_DiemHP, x="NHHK", y="Mean", title=f"Mean Course Score for{selected_TenMH} through Semeters", ) fig.update_layout(xaxis_title="Semeters",height=400, width=400) st.plotly_chart(fig, use_container_width=True) if (year != "All" and year_a != " ") or ( school != "All" and additional_selection != " " ): course_data_dict = {course: new_dfa[course]} course_data = course_data_dict[course] st.write( "Course:", course, " of ", major + additional_selection, " student in ", year_a, ) col1, col2, col3, col4 = st.columns(4) with col1: course_data_filtered = [x for x in course_data if not np.isnan(x)] counts, bins = np.histogram( course_data_filtered, bins=np.arange(0, 110, 10) ) total_count = len(course_data_filtered) frequencies_percentage = (counts / total_count) * 100 grade_bins = [f"{bins[i]}-{bins[i+1]}" for i in range(len(bins) - 1)] df1 = pd.DataFrame( {"Grade": grade_bins, "Grading percentage": frequencies_percentage} ) df1["Grading percentage"] = df1["Grading percentage"].map( lambda x: "{:.2f}".format(x) ) st.table(df1) with col2: fig = go.Figure() fig.add_trace( go.Scatter( x=bins[:-1], y=frequencies_percentage, mode="lines", name="Frequency", ) ) fig.update_layout( title="Frequency Range for {}".format(course), xaxis_title="Score", yaxis_title="Percentage", height=400, width=400, ) st.plotly_chart(fig, use_container_width=True) with col3: fig = go.Figure() fig.add_trace(go.Box(y=course_data, name="Box plot")) fig.update_layout( title="Box plot of Scores for {}".format(course), yaxis_title="Score", height=400, width=400, ) st.plotly_chart(fig, use_container_width=True) with col4: raw_data["major"] = raw_data["MaSV"].str.slice(0, 2) raw_data.replace(["WH", "VT", "I"], np.nan, inplace=True) raw_data = raw_data[~raw_data["DiemHP"].isin(["P", "F", "PC"])] if major != "All": raw_data = raw_data[raw_data["major"] == major] raw_data["MaSV_school"] = raw_data["MaSV"].str.slice(2, 4) raw_data = raw_data[raw_data["MaSV_school"] == additional_selection] df1 = raw_data[["TenMH", "NHHK", "DiemHP"]].copy() df1["DiemHP"] = df1["DiemHP"].replace('', pd.NA).dropna().astype(float) df1["NHHK"] = df1["NHHK"].apply(lambda x: str(x)[:4] + " S " + str(x)[4:]) selected_TenMH = " " + course filtered_df1 = df1[df1["TenMH"] == selected_TenMH] mean_DiemHP = ( filtered_df1.groupby("NHHK")["DiemHP"] .mean() .round(1) .reset_index(name="Mean") ) if year != "All": st.write("") else: fig = px.line( mean_DiemHP, x="NHHK", y="Mean", title=f"Mean Course Score for{selected_TenMH} through Semeters", ) fig.update_layout(xaxis_title="Semeters",height=400, width=400) st.plotly_chart(fig, use_container_width=True) variables_to_delete = [ 'raw_data1', 'df1', 'filtered_df1', 'mean_DiemHP', 'counts', 'bins', 'total_count', 'frequencies_percentage', 'grade_bins', 'fig1', 'common_elements', 'merged_array', 'list3', 'dfac', 'fig', 'new_df', 'new_dfa', 'new1_df', 'new1_dfa', 'course_data', 'options', 'valid_courses', 'list2', 'list1' ] for variable in variables_to_delete: if variable in locals(): del locals()[variable] elif tabs == "Prediction Performance": clear_resources() raw_data = pd.read_csv("All_major.csv") raw_data["DTBTKH4"] = raw_data["DTBTK"]/25 df=raw_data.copy() df["MaSV_school"] = df["MaSV"].str.slice(2, 4) df["Major"] = df["MaSV"].str.slice(0, 2) unique_values_major = df["Major"].unique() unique_values_major = [ "BA", "BE", "BT", "CE", "EE", "EN", "EV", "IE", "MA", "SE", "IT", ] unique_values_major = sorted(unique_values_major, key=lambda s: s) col1, col2 = st.columns(2) with col1: major = st.selectbox("Select a school:", unique_values_major) df = filter_dataframe(df, "Major", major) unique_values_school = df["MaSV_school"].unique() all_values_school = np.concatenate([["All"], unique_values_school]) no_numbers = [x for x in all_values_school if not re.search(r"\d", str(x))] if len(no_numbers) == 2: school = no_numbers[1] with col2: school = st.selectbox("Select a major:", no_numbers) df = filter_dataframe(df, "MaSV_school", school) predict = predict_late_student(df) rank = predict_rank(df) predict = pd.merge(predict, rank, on="MaSV") predict.rename(columns={"Mean_Cre": "Mean Credit"}, inplace=True) rank_mapping = { "Khá": "Good", "Trung Bình Khá": "Average good", "Giỏi": "Very good", "Kém": "Very weak", "Trung Bình": "Ordinary", "Yếu": "Weak", "Xuất Sắc": "Excellent", } predict["Pred Rank"].replace(rank_mapping, inplace=True) df_late = predict MaSV = st.text_input("Enter Student ID:", key="MaSV") def clear_form(): st.session_state["MaSV"] = "" if st.button("Clear", on_click=clear_form): MaSV = "" if MaSV: df_filtered = predict[predict["MaSV"] == MaSV] styled_table = ( df_filtered[ ["MaSV", "GPA", "Mean Credit", "Pred Rank", "Progress", "Semeters"] ] .style.applymap(color_cell) .format({"GPA": "{:.2f}", "Mean Credit": "{:.1f}", "Semeters": "{:.1f}"}) ) with st.container(): st.table(styled_table) predict_one_student(df, MaSV) else: df_late = predict df_late["Year"] = 2000 + df_late["MaSV"].apply(get_year) df_late = df_late[ (df_late["Year"] != currentYear - 1) & (df_late["Year"] != currentYear - 2) ] year = st.selectbox("Select Year", options=df_late["Year"].unique()) df_filtered = df_late[df_late["Year"] == year] styled_table = ( df_filtered[ ["MaSV", "GPA", "Mean Credit", "Pred Rank", "Progress", "Semeters"] ] .style.applymap(color_cell) .format({"GPA": "{:.2f}", "Mean Credit": "{:.2f}", "Semeters": "{:.2f}"}) ) csv = df_filtered.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode() href = f'Download CSV' st.markdown(href, unsafe_allow_html=True) legend_order = [ "Excellent", "Very good", "Good", "Average good", "Ordinary", "Weak", "Very weak", ] fig1 = px.pie( df_filtered, names="Pred Rank", title="Pred Rank", color_discrete_sequence=px.colors.sequential.Mint, height=400, width=400, labels=legend_order, ) fig2 = px.pie( df_filtered, names="Progress", title="Progress", color_discrete_sequence=px.colors.sequential.Peach, height=400, width=400, ) fig1.update_layout( title={ "text": "Pred Rank", "y": 0.95, "x": 0.35, "xanchor": "center", "yanchor": "top", } ) fig2.update_layout( title={ "text": "Progress", "y": 0.95, "x": 0.35, "xanchor": "center", "yanchor": "top", } ) col3, col1, col2 = st.columns([2, 1, 1]) with col3: st.dataframe(styled_table,use_container_width=True) with col1: st.plotly_chart(fig1, use_container_width=True) with col2: st.plotly_chart(fig2, use_container_width=True) variables_to_delete = [ "raw_data", "df", "df_late", "MaSV", "predict", "rank", "rank_mapping", "styled_table", "df_filtered", "csv", "b64", "href", "legend_order", "fig1", "fig2", "col1", "col2", "col3" ] # Delete the variables after running the code for variable_name in variables_to_delete: if variable_name in locals(): del locals()[variable_name] elif tabs == "Grade Distribution Tables": clear_resources() raw_data = score_table() df = process_data(raw_data) additional_selection = " " unique_values_major = df["Major"].unique() unique_values_major = [ "BA", "BE", "BT", "CE", "EE", "EN", "EV", "IE", "MA", "SE", "IT", ] unique_values_major = sorted(unique_values_major, key=lambda s: s) col1, col2 = st.columns(2) with col1: major = st.selectbox("Select a school:", unique_values_major) df = filter_dataframe(df, "Major", major) unique_values_school = df["MaSV_school"].unique() all_values_school = np.concatenate([["All"], unique_values_school]) no_numbers = [x for x in all_values_school if not re.search(r"\d", str(x))] if len(no_numbers) == 2: school = no_numbers[1] with col2: school = st.selectbox("Select a major:", no_numbers) df = filter_dataframe(df, "MaSV_school", school) unique_values_year = df["Year"].unique() all_values_year = np.concatenate([["All"], unique_values_year]) year = st.selectbox("Select a year:", all_values_year) options = df.columns[:-4] course_data_dict = {course: df[course].dropna() for course in options} valid_courses = [ course for course, data in course_data_dict.items() if len(data) > 1 ] course = "All" if st.button("Generate Chart"): courses_per_row = 4 num_courses = len(valid_courses) num_rows = (num_courses + courses_per_row - 1) // courses_per_row for row in range(num_rows): start_index = row * courses_per_row end_index = min((row + 1) * courses_per_row, num_courses) courses_in_row = valid_courses[start_index:end_index] for course in courses_in_row: course_data = course_data_dict[course] course_data = course_data.astype(float) st.markdown(f"Course: **{course}**") st.write("Number of examinations: ", str(len(course_data))) col1, col2, col3, col4 = st.columns(4) with col1: counts, bins = np.histogram(course_data, bins=np.arange(0, 110, 10)) total_count = len(course_data) frequencies_percentage = (counts / total_count) * 100 grade_bins = [ f"{bins[i]}-{bins[i+1]}" for i in range(len(bins) - 1) ] result_array = [] cumulative_sum = 0 for element in frequencies_percentage: cumulative_sum += element result_array.append(cumulative_sum) df = pd.DataFrame( { "Grade": grade_bins, "Grading percentage": frequencies_percentage, "Cumulative percentage": result_array } ) df["Grading percentage"] = df["Grading percentage"].map( lambda x: "{:.2f}".format(x) ) df["Cumulative percentage"] = df["Cumulative percentage"].map( lambda x: "{:.2f}".format(x) ) st.table(df) with col2: fig = go.Figure() fig.add_trace( go.Scatter( x=bins[:-1], y=frequencies_percentage, mode="lines", name="Frequency", ) ) fig.update_layout( title="Frequency Range", xaxis_title="Score", yaxis_title="Percentage", height=400, width=400, ) st.plotly_chart(fig, use_container_width=True) with col3: fig = go.Figure() fig.add_trace(go.Box(y=course_data, name="Box plot")) fig.update_layout( title="Box plot", yaxis_title="Score", height=400, width=400, ) st.plotly_chart(fig, use_container_width=True) with col4: raw_data1 = raw_data.copy() raw_data1["major"] = raw_data1["MaSV"].str.slice(0, 2) raw_data1.replace(["WH", "VT", "I"], np.nan, inplace=True) raw_data1 = raw_data1[~raw_data1["DiemHP"].isin(["P", "F", "PC"])] if major != "All": raw_data1 = raw_data1[raw_data1["major"] == major] raw_data1["MaSV_school"] = raw_data1["MaSV"].str.slice(2, 4) if school != "All": raw_data1 = raw_data1[raw_data1["MaSV_school"] == school] df1 = raw_data1[["TenMH", "NHHK", "DiemHP"]].copy() df1["DiemHP"] = df1["DiemHP"].astype(float) df1["NHHK"] = df1["NHHK"].apply( lambda x: str(x)[:4] + " S " + str(x)[4:] ) selected_TenMH = " " + course filtered_df1 = df1[df1["TenMH"] == selected_TenMH] mean_DiemHP = ( filtered_df1.groupby("NHHK")["DiemHP"] .mean() .round(1) .reset_index(name="Mean") ) if year != "All": st.write("") else: fig = px.line( mean_DiemHP, x="NHHK", y="Mean", title=f"Mean DiemHP through Semesters", ) fig.update_layout(height=400, width=400) st.plotly_chart(fig, use_container_width=True) del raw_data1, df1, filtered_df1, mean_DiemHP, counts, bins, total_count, frequencies_percentage, grade_bins, fig del course_data, course_data_dict, valid_courses st.stop()