File size: 2,975 Bytes
a8ed63e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from glob import glob
import json 
import numpy as np
import gradio as gr



def calculate_the_results():
    all_jsons_path = glob('./responses/*.json')
    all_jsons = [json.load(open(path)) for path in all_jsons_path]

    # count number of user corrects for each json and average and also calcaulte the type of NNs

    top1_results = []
    top1_acc = []
    topK_results = []
    topK_acc = []

    for js in all_jsons:
        # read one key and determine the type of NN
        type_of_NNs = js['history'][0]['type']
        if type_of_NNs == 'topK':
            acc = np.mean([js['history'][x]['is_user_correct'] for x in range(len(js['history']))])
            topK_acc.append((acc*100).round(2))
            topK_results.append(js)

        else:
            top1_results.append(js)
            acc = np.mean([js['history'][x]['is_user_correct'] for x in range(len(js['history']))])
            top1_acc.append((acc*100).round(2))


    print('# of top1: ', len(top1_results))
    print('top1 Accuracy: ', top1_acc)
    # print std and mean of top1_acc
    std = np.std(top1_acc)
    mean = np.mean(top1_acc)

    print('top1 std: ', std)
    print('top1 mean: ', mean)




    print('----------------------------------')


    print('# of topK: ', len(topK_results))
    print('topK Accuracy: ', topK_acc)

    std = np.std(topK_acc)
    mean = np.mean(topK_acc)

    print('topK std: ', std)
    print('topK mean: ', mean)




def calculate_the_results():
    all_jsons_path = glob('./responses/*.json')
    all_jsons = [json.load(open(path)) for path in all_jsons_path]

    # count number of user corrects for each json and average and also calculate the type of NNs

    top1_results = []
    top1_acc = []
    topK_results = []
    topK_acc = []

    for js in all_jsons:
        # read one key and determine the type of NN
        type_of_NNs = js['history'][0]['type']
        if type_of_NNs == 'topK':
            acc = np.mean([js['history'][x]['is_user_correct'] for x in range(len(js['history']))])
            topK_acc.append((acc*100).round(2))
            topK_results.append(js)
        else:
            top1_results.append(js)
            acc = np.mean([js['history'][x]['is_user_correct'] for x in range(len(js['history']))])
            top1_acc.append((acc*100).round(2))

    top1_output = f"# of top1: {len(top1_results)}\ntop1 Accuracy: {top1_acc}\ntop1 std: {np.std(top1_acc)}\ntop1 mean: {np.mean(top1_acc)}\n----------------------------------\n"
    topK_output = f"# of topK: {len(topK_results)}\ntopK Accuracy: {topK_acc}\ntopK std: {np.std(topK_acc)}\ntopK mean: {np.mean(topK_acc)}"

    return top1_output + topK_output


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    update_btn = gr.Button("Calculate the results")
    results_textbox = gr.Textbox(lines=10, label="Results")
    


    update_btn.click(fn=calculate_the_results, outputs=results_textbox)


    demo.launch(debug=False, server_name="0.0.0.0", server_port=9911)