Spaces:
Sleeping
Sleeping
implemented histogram
Browse files- app.py +149 -10
- result.txt +1 -1
app.py
CHANGED
@@ -136,27 +136,47 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
|
|
136 |
# Helper function to evaluate task attempts
|
137 |
def evaluate_tasks(fields, tasks):
|
138 |
task_status = {}
|
|
|
|
|
|
|
139 |
for task in tasks:
|
|
|
140 |
relevant_attempts = [f for f in fields if task in f]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
if any("OK" in attempt for attempt in relevant_attempts):
|
142 |
task_status[task] = "Attempted (Successful)"
|
|
|
143 |
elif any("ERROR" in attempt for attempt in relevant_attempts):
|
144 |
task_status[task] = "Attempted (Error)"
|
145 |
elif any("JIT" in attempt for attempt in relevant_attempts):
|
146 |
task_status[task] = "Attempted (JIT)"
|
147 |
else:
|
148 |
task_status[task] = "Unattempted"
|
149 |
-
return task_status
|
150 |
|
151 |
# Evaluate tasks for each category
|
152 |
-
optional_task_1_status = evaluate_tasks(fields, optional_task_1_subtasks)
|
153 |
-
optional_task_2_status = evaluate_tasks(fields, optional_task_2_subtasks)
|
154 |
|
155 |
# Check if tasks have any successful attempt
|
156 |
opt1_done = any(status == "Attempted (Successful)" for status in optional_task_1_status.values())
|
157 |
opt2_done = any(status == "Attempted (Successful)" for status in optional_task_2_status.values())
|
158 |
|
159 |
-
return opt1_done, opt2_done
|
|
|
|
|
|
|
160 |
|
161 |
# Read data from test_info.txt
|
162 |
with open(test_info_location, "r") as file:
|
@@ -170,16 +190,40 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
|
|
170 |
1: {"ER": 0, "ME": 0, "both": 0,"none":0},
|
171 |
2: {"ER": 0, "ME": 0, "both": 0,"none":0}
|
172 |
}
|
173 |
-
|
174 |
# Analyze rows
|
|
|
|
|
|
|
175 |
for i, row in enumerate(data):
|
176 |
row = row.strip()
|
177 |
if not row:
|
178 |
continue
|
179 |
|
180 |
ideal_task = ideal_tasks[i] # Get the ideal task for the current row
|
181 |
-
|
182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
if ideal_task == 0:
|
184 |
if opt1_done and not opt2_done:
|
185 |
task_counts[1]["ER"] += 1
|
@@ -198,6 +242,16 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
|
|
198 |
task_counts[2]["both"] += 1
|
199 |
else:
|
200 |
task_counts[2]["none"] +=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
# Create a string output for results
|
203 |
# output_summary = "Task Analysis Summary:\n"
|
@@ -209,9 +263,90 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
|
|
209 |
# output_summary += f" Only OptionalTask_2 done: {counts['ME']}\n"
|
210 |
# output_summary += f" Both done: {counts['both']}\n"
|
211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
# colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
|
213 |
colors = ["#FF6F61", "#6B5B95", "#88B04B", "#F7CAC9"]
|
214 |
-
|
215 |
# Generate pie chart for Task 1
|
216 |
task1_labels = list(task_counts[1].keys())
|
217 |
task1_values = list(task_counts[1].values())
|
@@ -419,7 +554,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
|
|
419 |
ROC-AUC for problems of type ER: {opt_task1_roc_auc:.4f}
|
420 |
ROC-AUC for problems of type ME: {opt_task2_roc_auc:.4f}
|
421 |
"""
|
422 |
-
return text_output,fig,fig_task1,fig_task2
|
423 |
|
424 |
# List of models for the dropdown menu
|
425 |
|
@@ -818,13 +953,17 @@ with gr.Blocks(theme='gstaff/sketch', css=custom_css) as demo:
|
|
818 |
opt1_pie = gr.Plot(label="ER")
|
819 |
opt2_pie = gr.Plot(label="ME")
|
820 |
# output_summary = gr.Textbox(label="Summary")
|
|
|
|
|
|
|
|
|
821 |
|
822 |
|
823 |
|
824 |
btn.click(
|
825 |
fn=process_file,
|
826 |
inputs=[model_dropdown,increment_slider],
|
827 |
-
outputs=[output_text,plot_output,opt1_pie,opt2_pie]
|
828 |
)
|
829 |
|
830 |
|
|
|
136 |
# Helper function to evaluate task attempts
|
137 |
def evaluate_tasks(fields, tasks):
|
138 |
task_status = {}
|
139 |
+
total_attempts = 0 # Counter for total number of attempts
|
140 |
+
attempted_tasks = 0 # Counter for tasks attempted at least once
|
141 |
+
successful_first_attempts = 0 # Counter for tasks successful on the first try
|
142 |
for task in tasks:
|
143 |
+
|
144 |
relevant_attempts = [f for f in fields if task in f]
|
145 |
+
attempt_count = len(relevant_attempts)
|
146 |
+
total_attempts += attempt_count # Add to the total attempts
|
147 |
+
|
148 |
+
if attempt_count > 0:
|
149 |
+
attempted_tasks += 1 # Increment attempted tasks count
|
150 |
+
|
151 |
+
# Check the first attempt
|
152 |
+
first_attempt = relevant_attempts[0]
|
153 |
+
if "OK" in first_attempt and "ERROR" not in first_attempt and "JIT" not in first_attempt:
|
154 |
+
successful_first_attempts += 1
|
155 |
+
|
156 |
+
|
157 |
if any("OK" in attempt for attempt in relevant_attempts):
|
158 |
task_status[task] = "Attempted (Successful)"
|
159 |
+
|
160 |
elif any("ERROR" in attempt for attempt in relevant_attempts):
|
161 |
task_status[task] = "Attempted (Error)"
|
162 |
elif any("JIT" in attempt for attempt in relevant_attempts):
|
163 |
task_status[task] = "Attempted (JIT)"
|
164 |
else:
|
165 |
task_status[task] = "Unattempted"
|
166 |
+
return task_status,attempted_tasks, total_attempts,successful_first_attempts
|
167 |
|
168 |
# Evaluate tasks for each category
|
169 |
+
optional_task_1_status, opt1_attempted, opt1_total_attempts, opt1_successful_first_attempts = evaluate_tasks(fields, optional_task_1_subtasks)
|
170 |
+
optional_task_2_status, opt2_attempted, opt2_total_attempts, opt2_successful_first_attempts = evaluate_tasks(fields, optional_task_2_subtasks)
|
171 |
|
172 |
# Check if tasks have any successful attempt
|
173 |
opt1_done = any(status == "Attempted (Successful)" for status in optional_task_1_status.values())
|
174 |
opt2_done = any(status == "Attempted (Successful)" for status in optional_task_2_status.values())
|
175 |
|
176 |
+
return (opt1_done, opt2_done,
|
177 |
+
opt1_attempted, opt2_attempted,
|
178 |
+
opt1_total_attempts, opt2_total_attempts,
|
179 |
+
opt1_successful_first_attempts, opt2_successful_first_attempts)
|
180 |
|
181 |
# Read data from test_info.txt
|
182 |
with open(test_info_location, "r") as file:
|
|
|
190 |
1: {"ER": 0, "ME": 0, "both": 0,"none":0},
|
191 |
2: {"ER": 0, "ME": 0, "both": 0,"none":0}
|
192 |
}
|
193 |
+
# To store task completion counts per row
|
194 |
# Analyze rows
|
195 |
+
row_summary = []
|
196 |
+
opt1_ratios = []
|
197 |
+
opt2_ratios = []
|
198 |
for i, row in enumerate(data):
|
199 |
row = row.strip()
|
200 |
if not row:
|
201 |
continue
|
202 |
|
203 |
ideal_task = ideal_tasks[i] # Get the ideal task for the current row
|
204 |
+
(
|
205 |
+
opt1_done, opt2_done,
|
206 |
+
opt1_attempted, opt2_attempted,
|
207 |
+
opt1_total_attempts, opt2_total_attempts,
|
208 |
+
opt1_successful_first_attempts, opt2_successful_first_attempts
|
209 |
+
) = analyze_row(row)
|
210 |
+
|
211 |
+
opt1_ratios.append(opt1_attempted / opt1_total_attempts if opt1_total_attempts > 0 else 0)
|
212 |
+
opt2_ratios.append(opt2_attempted / opt2_total_attempts if opt2_total_attempts > 0 else 0)
|
213 |
+
# create a summey for task:attempted, total attempts and succesful attempts for each row
|
214 |
+
# row_summary.append({
|
215 |
+
# "row_index": i + 1,
|
216 |
+
# "opt1": {
|
217 |
+
# "tasks_attempted": opt1_attempted,
|
218 |
+
# "total_attempts": opt1_total_attempts,
|
219 |
+
# "successful_attempts": opt1_successful_first_attempts,
|
220 |
+
# },
|
221 |
+
# "opt2": {
|
222 |
+
# "tasks_attempted": opt2_attempted,
|
223 |
+
# "total_attempts": opt2_total_attempts,
|
224 |
+
# "successful_attempts": opt2_successful_first_attempts,
|
225 |
+
# }
|
226 |
+
# })
|
227 |
if ideal_task == 0:
|
228 |
if opt1_done and not opt2_done:
|
229 |
task_counts[1]["ER"] += 1
|
|
|
242 |
task_counts[2]["both"] += 1
|
243 |
else:
|
244 |
task_counts[2]["none"] +=1
|
245 |
+
# Print a summary of task completions
|
246 |
+
# for summary in row_summary:
|
247 |
+
# print(f"\nRow {summary['row_index']}:")
|
248 |
+
# print(f" OptionalTask_1 - Tasks Attempted: {summary['opt1']['tasks_attempted']}, "
|
249 |
+
# f"Total Attempts: {summary['opt1']['total_attempts']}, "
|
250 |
+
# f"Successful Attempts: {summary['opt1']['successful_attempts']}")
|
251 |
+
# print(f" OptionalTask_2 - Tasks Attempted: {summary['opt2']['tasks_attempted']}, "
|
252 |
+
# f"Total Attempts: {summary['opt2']['total_attempts']}, "
|
253 |
+
# f"Successful Attempts: {summary['opt2']['successful_attempts']}")
|
254 |
+
|
255 |
|
256 |
# Create a string output for results
|
257 |
# output_summary = "Task Analysis Summary:\n"
|
|
|
263 |
# output_summary += f" Only OptionalTask_2 done: {counts['ME']}\n"
|
264 |
# output_summary += f" Both done: {counts['both']}\n"
|
265 |
|
266 |
+
|
267 |
+
# Create figure
|
268 |
+
fig_hist1 = go.Figure()
|
269 |
+
|
270 |
+
# Add histogram for OptionalTask_1 (ER)
|
271 |
+
fig_hist1.add_trace(go.Histogram(
|
272 |
+
x=opt1_ratios,
|
273 |
+
name="ER",
|
274 |
+
marker=dict(color='blue'),
|
275 |
+
opacity=1,
|
276 |
+
xbins=dict(
|
277 |
+
start=0.0,
|
278 |
+
end=1.0,
|
279 |
+
size=0.1 # Bin width set to 0.1 for 10 bins
|
280 |
+
)
|
281 |
+
))
|
282 |
+
fig_hist2=go.Figure()
|
283 |
+
# Add histogram for OptionalTask_2 (ME)
|
284 |
+
fig_hist2.add_trace(go.Histogram(
|
285 |
+
x=opt2_ratios,
|
286 |
+
name="ME",
|
287 |
+
marker=dict(color='red'),
|
288 |
+
opacity=1,
|
289 |
+
xbins=dict(
|
290 |
+
start=0.0,
|
291 |
+
end=1.0,
|
292 |
+
size=0.1 # Bin width set to 0.1 for 10 bins
|
293 |
+
)
|
294 |
+
))
|
295 |
+
|
296 |
+
# Update layout
|
297 |
+
fig_hist1.update_layout(
|
298 |
+
title="ER: Histogram of Task Success Ratios Across Dataset",
|
299 |
+
title_x=0.5,
|
300 |
+
xaxis=dict(
|
301 |
+
title="Success Ratio (Tasks Attempted / Total Attempts)",
|
302 |
+
tickmode="array",
|
303 |
+
tickvals=np.linspace(0, 1, 11), # 10 evenly spaced ticks
|
304 |
+
),
|
305 |
+
yaxis=dict(
|
306 |
+
title="Number of Instances"
|
307 |
+
),
|
308 |
+
font=dict(
|
309 |
+
family="sans-serif",
|
310 |
+
size=12,
|
311 |
+
color="black"
|
312 |
+
),
|
313 |
+
legend=dict(
|
314 |
+
font=dict(
|
315 |
+
family="sans-serif",
|
316 |
+
size=12,
|
317 |
+
color="black"
|
318 |
+
)
|
319 |
+
),
|
320 |
+
barmode='overlay' # Overlapping bars
|
321 |
+
)
|
322 |
+
fig_hist2.update_layout(
|
323 |
+
title="ME: Histogram of Task Success Ratios Across Dataset",
|
324 |
+
title_x=0.5,
|
325 |
+
xaxis=dict(
|
326 |
+
title="Success Ratio (Tasks Attempted / Total Attempts)",
|
327 |
+
tickmode="array",
|
328 |
+
tickvals=np.linspace(0, 1, 11), # 10 evenly spaced ticks
|
329 |
+
),
|
330 |
+
yaxis=dict(
|
331 |
+
title="Number of Instances"
|
332 |
+
),
|
333 |
+
font=dict(
|
334 |
+
family="sans-serif",
|
335 |
+
size=12,
|
336 |
+
color="black"
|
337 |
+
),
|
338 |
+
legend=dict(
|
339 |
+
font=dict(
|
340 |
+
family="sans-serif",
|
341 |
+
size=12,
|
342 |
+
color="black"
|
343 |
+
)
|
344 |
+
),
|
345 |
+
barmode='overlay' # Overlapping bars
|
346 |
+
)
|
347 |
# colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
|
348 |
colors = ["#FF6F61", "#6B5B95", "#88B04B", "#F7CAC9"]
|
349 |
+
# print(opt1_ratios,opt2_ratios)
|
350 |
# Generate pie chart for Task 1
|
351 |
task1_labels = list(task_counts[1].keys())
|
352 |
task1_values = list(task_counts[1].values())
|
|
|
554 |
ROC-AUC for problems of type ER: {opt_task1_roc_auc:.4f}
|
555 |
ROC-AUC for problems of type ME: {opt_task2_roc_auc:.4f}
|
556 |
"""
|
557 |
+
return text_output,fig,fig_task1,fig_task2,fig_hist1,fig_hist2
|
558 |
|
559 |
# List of models for the dropdown menu
|
560 |
|
|
|
953 |
opt1_pie = gr.Plot(label="ER")
|
954 |
opt2_pie = gr.Plot(label="ME")
|
955 |
# output_summary = gr.Textbox(label="Summary")
|
956 |
+
with gr.Row():
|
957 |
+
histo1 = gr.Plot(label="Hist")
|
958 |
+
histo2 = gr.Plot(label="Hist")
|
959 |
+
|
960 |
|
961 |
|
962 |
|
963 |
btn.click(
|
964 |
fn=process_file,
|
965 |
inputs=[model_dropdown,increment_slider],
|
966 |
+
outputs=[output_text,plot_output,opt1_pie,opt2_pie,histo1,histo2]
|
967 |
)
|
968 |
|
969 |
|
result.txt
CHANGED
@@ -3,5 +3,5 @@ total_acc: 69.00702106318957
|
|
3 |
precisions: 0.7236623191454734
|
4 |
recalls: 0.6900702106318957
|
5 |
f1_scores: 0.6802420656474512
|
6 |
-
time_taken_from_start:
|
7 |
auc_score: 0.7457100293916334
|
|
|
3 |
precisions: 0.7236623191454734
|
4 |
recalls: 0.6900702106318957
|
5 |
f1_scores: 0.6802420656474512
|
6 |
+
time_taken_from_start: 23.562122583389282
|
7 |
auc_score: 0.7457100293916334
|