File size: 4,683 Bytes
d3b96ce
 
 
 
901e7a3
d3b96ce
 
7a0b64b
d3b96ce
 
 
901e7a3
d3b96ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901e7a3
d3b96ce
 
 
 
 
 
 
 
 
 
27261e8
b84573a
6415a1e
 
b84573a
061400f
0b38eef
d3b96ce
 
0b38eef
cf6bb2e
 
 
27261e8
d784581
0b38eef
 
 
 
 
 
d3b96ce
1ef10ab
d3b96ce
 
 
 
 
7a0b64b
c829216
7a0b64b
d3b96ce
0b55ae2
7a0b64b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import re
import sys
import json
import logging
from pathlib import Path

import evaluate
import numpy as np
from datasets import Value

logger = logging.getLogger(__name__)
REGEX_YAML_BLOCK = re.compile(r"---[\n\r]+([\S\s]*?)[\n\r]+---[\n\r]")


def infer_gradio_input_types(feature_types):
    """
    Maps metric feature types to input types for gradio Dataframes:
        - float/int -> numbers
        - string -> strings
        - any other -> json
    Note that json is not a native gradio type but will be treated as string that
    is then parsed as a json.
    """
    input_types = []
    for feature_type in feature_types:
        input_type = "json"
        if isinstance(feature_type, Value):
            if feature_type.dtype.startswith("int") or feature_type.dtype.startswith("float"):
                input_type = "number"
            elif feature_type.dtype == "string":
                input_type = "str"
        input_types.append(input_type)
    return input_types


def json_to_string_type(input_types):
    """Maps json input type to str."""
    return ["str" if i == "json" else i for i in input_types]


def parse_readme(filepath):
    """Parses a repositories README and removes"""
    if not os.path.exists(filepath):
        return "No README.md found."
    with open(filepath, "r") as f:
        text = f.read()
        match = REGEX_YAML_BLOCK.search(text)
        if match:
            text = text[match.end() :]
    return text


def parse_gradio_data(data, input_types):
    """Parses data from gradio Dataframe for use in metric."""
    metric_inputs = {}
    data.replace("", np.nan, inplace=True)
    data.dropna(inplace=True)
    for feature_name, input_type in zip(data, input_types):
        if input_type == "json":
            metric_inputs[feature_name] = [json.loads(d) for d in data[feature_name].to_list()]
        elif input_type == "str":
            metric_inputs[feature_name] = [d.strip('"') for d in data[feature_name].to_list()]
        else:
            metric_inputs[feature_name] = data[feature_name]
    return metric_inputs


def parse_test_cases(test_cases, feature_names, input_types):
    """
    Parses test cases to be used in gradio Dataframe. Note that an apostrophe is added
    to strings to follow the format in json.
    """
    if len(test_cases) == 0:
        return None
    examples = []
    for test_case in test_cases:
        parsed_cases = []
        for feat, input_type in zip(feature_names, input_types):
            if input_type == "json":
                parsed_cases.append([str(element) for element in test_case[feat]])
            elif input_type == "str":
                parsed_cases.append(['"' + element + '"' for element in test_case[feat]])
            else:
                parsed_cases.append(test_case[feat])
        examples.append([list(i) for i in zip(*parsed_cases)])
    return examples


def launch_gradio_widget(metric):
    """Launches `metric` widget with Gradio."""

    try:
        import gradio as gr
    except ImportError as error:
        logger.info("To create a metric widget with Gradio make sure gradio is installed.")
        raise error

    local_path = Path(sys.path[0])
    # if there are several input types, use first as default.
    if isinstance(metric.features, list):
        (feature_names, feature_types) = zip(*metric.features[0].items())
    else:
        (feature_names, feature_types) = zip(*metric.features.items())
    gradio_input_types = infer_gradio_input_types(feature_types)

    def compute(summary, document):
        data = {
            "predictions": [summary], 
            "references":[document]
        }
        return metric.compute(**data)
        # return metric.compute(**parse_gradio_data(data, gradio_input_types))

    iface = gr.Interface(
        fn=compute, 
        # inputs=["text", "text"], 
        inputs=[
            gr.components.Textbox(label="Summary"), 
            gr.components.Textbox(label="Document")
        ], 
        # inputs=gr.components.Dataframe(
        #     headers=feature_names,
        #     col_count=len(feature_names),
        #     row_count=1,
        #     datatype=json_to_string_type(gradio_input_types),
        # ),
        outputs=gr.components.Textbox(label=metric.name),
        description=metric.info.description, 
        title=f"Metric: {metric.name}",
        article=parse_readme(local_path / "README.md"),
        # TODO: load test cases and use them to populate examples
        # examples=[parse_test_cases(test_cases, feature_names, gradio_input_types)]
    )

    iface.launch()

    
module = evaluate.load("yangwang825/datastats")
launch_gradio_widget(module)