In [27]:
import pickle
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_curve, roc_auc_score,auc

In [3]:
file_path = "roc_data2.pkl"

# Open and load the pickle file
with open(file_path, 'rb') as file:
    data = pickle.load(file)


# Print or use the data
# data[2]

In [4]:
inc_slider=1
parent_location="ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/"
test_info_location=parent_location+"fullTest/test_info.txt"
test_location=parent_location+"fullTest/test.txt"
test_info = pd.read_csv(test_info_location, sep=',', header=None, engine='python')
grad_rate_data = pd.DataFrame(pd.read_pickle('school_grduation_rate.pkl'),columns=['school_number','grad_rate'])  # Load the grad_rate data

# Step 1: Extract unique school numbers from test_info
unique_schools = test_info[0].unique()

# Step 2: Filter the grad_rate_data using the unique school numbers
schools = grad_rate_data[grad_rate_data['school_number'].isin(unique_schools)]

# Define a threshold for high and low graduation rates (adjust as needed)
grad_rate_threshold = 0.9  

# Step 4: Divide schools into high and low graduation rate groups
high_grad_schools = schools[schools['grad_rate'] >= grad_rate_threshold]['school_number'].unique()
low_grad_schools = schools[schools['grad_rate'] < grad_rate_threshold]['school_number'].unique()

# Step 5: Sample percentage of schools from each group
high_sample = pd.Series(high_grad_schools).sample(frac=inc_slider/100, random_state=1).tolist()
low_sample = pd.Series(low_grad_schools).sample(frac=inc_slider/100, random_state=1).tolist()

# Step 6: Combine the sampled schools
random_schools = high_sample + low_sample

# Step 7: Get indices for the sampled schools
indices = test_info[test_info[0].isin(random_schools)].index.tolist()



In [5]:
high_indices = test_info[(test_info[0].isin(high_sample))].index.tolist()
low_indices = test_info[(test_info[0].isin(low_sample))].index.tolist()

In [6]:
len(high_indices)+len(low_indices)


997

In [7]:
# Load the test file and select rows based on indices
test = pd.read_csv(test_location, sep=',', header=None, engine='python')
selected_rows_df2 = test.loc[indices]
selected_rows_df2

Unnamed: 0,0
5342,PercentChange-0\tNumeratorQuantity1-0\tNumerat...
5343,PercentChange-0\tNumeratorQuantity2-0\tNumerat...
5344,PercentChange-0\tNumeratorQuantity2-0\tNumerat...
5345,PercentChange-0\tNumeratorQuantity2-2\tNumerat...
5346,PercentChange-0\tNumeratorQuantity2-0\tDenomin...
...,...
113359,PercentChange-0\tNumeratorQuantity2-2\tNumerat...
113360,PercentChange-0\tNumeratorQuantity2-0\tNumerat...
113361,PercentChange-0\tNumeratorQuantity2-0\tNumerat...
113362,PercentChange-0\tNumeratorQuantity2-0\tNumerat...


In [8]:
graduation_groups = [
    'high' if idx in high_indices else 'low' for idx in selected_rows_df2.index
]
# graduation_groups
len(graduation_groups)

997

In [9]:
opt_task_groups = ['opt_task1' if test_info.loc[idx, 6] == 0 else 'opt_task2' for idx in selected_rows_df2.index]
len(opt_task_groups)

997

In [10]:
t_label=data[0]
p_label=data[1]

In [12]:
# Step 1: Align graduation_group, t_label, and p_label
aligned_labels = list(zip(graduation_groups, t_label, p_label))
opt_task_aligned = list(zip(opt_task_groups, t_label, p_label))
# Step 2: Separate the labels for high and low groups
high_t_labels = [t for grad, t, p in aligned_labels if grad == 'high']
low_t_labels = [t for grad, t, p in aligned_labels if grad == 'low']

high_p_labels = [p for grad, t, p in aligned_labels if grad == 'high']
low_p_labels = [p for grad, t, p in aligned_labels if grad == 'low']


opt_task1_t_labels = [t for task, t, p in opt_task_aligned if task == 'opt_task1']
opt_task1_p_labels = [p for task, t, p in opt_task_aligned if task == 'opt_task1']

opt_task2_t_labels = [t for task, t, p in opt_task_aligned if task == 'opt_task2']
opt_task2_p_labels = [p for task, t, p in opt_task_aligned if task == 'opt_task2']


In [15]:

opt_task1_roc_auc = roc_auc_score(opt_task1_t_labels, opt_task1_p_labels) if len(set(opt_task1_t_labels)) > 1 else None
opt_task2_roc_auc = roc_auc_score(opt_task2_t_labels, opt_task2_p_labels) if len(set(opt_task2_t_labels)) > 1 else None

print(f"opt_task1 ROC-AUC: {opt_task1_roc_auc}")
print(f"opt_task2 ROC-AUC: {opt_task2_roc_auc}")

opt_task1 ROC-AUC: 0.7592686234399062
opt_task2 ROC-AUC: 0.7268598353289777


In [50]:
len(low_t_labels)+len(high_t_labels)

997

In [16]:
high_roc_auc = roc_auc_score(high_t_labels, high_p_labels) if len(set(high_t_labels)) > 1 else None
low_roc_auc = roc_auc_score(low_t_labels, low_p_labels) if len(set(low_t_labels)) > 1 else None

print("ROC-AUC Score for High Graduation Rate Group:", high_roc_auc)
print("ROC-AUC Score for Low Graduation Rate Group:", low_roc_auc)

ROC-AUC Score for High Graduation Rate Group: 0.675
ROC-AUC Score for Low Graduation Rate Group: 0.7489795918367347


In [21]:
def analyze_row(row):
    # Split the row into fields
    fields = row.split("\t")

    # Define tasks for OptionalTask_1, OptionalTask_2, and FinalAnswer
    optional_task_1_subtasks = ["DenominatorFactor", "NumeratorFactor", "EquationAnswer"]
    optional_task_2_subtasks = [
        "FirstRow2:1", "FirstRow2:2", "FirstRow1:1", "FirstRow1:2", 
        "SecondRow", "ThirdRow"
    ]

    # Helper function to evaluate task attempts
    def evaluate_tasks(fields, tasks):
        task_status = {}
        for task in tasks:
            relevant_attempts = [f for f in fields if task in f]
            if any("OK" in attempt for attempt in relevant_attempts):
                task_status[task] = "Attempted (Successful)"
            elif any("ERROR" in attempt for attempt in relevant_attempts):
                task_status[task] = "Attempted (Error)"
            elif any("JIT" in attempt for attempt in relevant_attempts):
                task_status[task] = "Attempted (JIT)"
            else:
                task_status[task] = "Unattempted"
        return task_status

    # Evaluate tasks for each category
    optional_task_1_status = evaluate_tasks(fields, optional_task_1_subtasks)
    optional_task_2_status = evaluate_tasks(fields, optional_task_2_subtasks)

    # Check if tasks have any successful attempt
    opt1_done = any(status == "Attempted (Successful)" for status in optional_task_1_status.values())
    opt2_done = any(status == "Attempted (Successful)" for status in optional_task_2_status.values())

    return opt1_done, opt2_done

# Read data from test_info.txt
# Read data from test_info.txt
with open(test_info_location, "r") as file:
    data = file.readlines()

# Assuming test_info[7] is a list with ideal tasks for each instance
ideal_tasks = test_info[6]  # A list where each element is either 1 or 2

# Initialize counters
task_counts = {
    1: {"only_opt1": 0, "only_opt2": 0, "both": 0,"none":0},
    2: {"only_opt1": 0, "only_opt2": 0, "both": 0,"none":0}
}

# Analyze rows
for i, row in enumerate(data):
    row = row.strip()
    if not row:
        continue

    ideal_task = ideal_tasks[i]  # Get the ideal task for the current row
    opt1_done, opt2_done = analyze_row(row)

    if ideal_task == 0:
        if opt1_done and not opt2_done:
            task_counts[1]["only_opt1"] += 1
        elif not opt1_done and opt2_done:
            task_counts[1]["only_opt2"] += 1
        elif opt1_done and opt2_done:
            task_counts[1]["both"] += 1
        else:
            task_counts[1]["none"] +=1
    elif ideal_task == 1:
        if opt1_done and not opt2_done:
            task_counts[2]["only_opt1"] += 1
        elif not opt1_done and opt2_done:
            task_counts[2]["only_opt2"] += 1
        elif opt1_done and opt2_done:
            task_counts[2]["both"] += 1
        else:
            task_counts[2]["none"] +=1

# Create a string output for results
output_summary = "Task Analysis Summary:\n"
output_summary += "-----------------------\n"

for ideal_task, counts in task_counts.items():
    output_summary += f"Ideal Task = OptionalTask_{ideal_task}:\n"
    output_summary += f"  Only OptionalTask_1 done: {counts['only_opt1']}\n"
    output_summary += f"  Only OptionalTask_2 done: {counts['only_opt2']}\n"
    output_summary += f"  Both done: {counts['both']}\n"
    output_summary += f"  None done: {counts['none']}\n"

print(output_summary)


Task Analysis Summary:
-----------------------
Ideal Task = OptionalTask_1:
  Only OptionalTask_1 done: 22501
  Only OptionalTask_2 done: 20014
  Both done: 24854
  None done: 38
Ideal Task = OptionalTask_2:
  Only OptionalTask_1 done: 12588
  Only OptionalTask_2 done: 18942
  Both done: 15147
  None done: 78



In [23]:
import gradio as gr
from matplotlib.figure import Figure

In [28]:
with open("roc_data.pkl", "rb") as f:
        fpr, tpr, _ = pickle.load(f)
roc_auc = auc(fpr, tpr)

# Create a matplotlib figure
fig = Figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
ax.set(xlabel='False Positive Rate', ylabel='True Positive Rate', title=f'Receiver Operating Curve (ROC)')
ax.legend(loc="lower right")
ax.grid()