Spaces:

rahular
/

ibleu

Runtime error

+import sys
+import evaluate
+from evaluate.utils import launch_gradio_widget
+sys.path = [p for p in sys.path if p != "/home/user/app"]
+module = evaluate.load("rahular/ibleu")
+sys.path = ["/home/user/app"] + sys.path
+launch_gradio_widget(module)

ibleu.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""iBleu metric."""
+import datasets
+import sacrebleu as scb
+from packaging import version
+import evaluate
+_DESCRIPTION = """
+Accuracy is the proportion of correct predictions among the total number of cases processed. It can be computed with:
+Accuracy = (TP + TN) / (TP + TN + FP + FN)
+ Where:
+TP: True positive
+TN: True negative
+FP: False positive
+FN: False negative
+"""
+_KWARGS_DESCRIPTION = """
+Args:
+    predictions (`list` of `int`): Predicted labels.
+    references (`list` of `int`): Ground truth labels.
+    normalize (`boolean`): If set to False, returns the number of correctly classified samples. Otherwise, returns the fraction of correctly classified samples. Defaults to True.
+    sample_weight (`list` of `float`): Sample weights Defaults to None.
+Returns:
+    accuracy (`float` or `int`): Accuracy score. Minimum possible value is 0. Maximum possible value is 1.0, or the number of examples input, if `normalize` is set to `True`.. A higher score means higher accuracy.
+Examples:
+    Example 1-A simple example
+        >>> accuracy_metric = evaluate.load("accuracy")
+        >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0])
+        >>> print(results)
+        {'accuracy': 0.5}
+    Example 2-The same as Example 1, except with `normalize` set to `False`.
+        >>> accuracy_metric = evaluate.load("accuracy")
+        >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0], normalize=False)
+        >>> print(results)
+        {'accuracy': 3.0}
+    Example 3-The same as Example 1, except with `sample_weight` set.
+        >>> accuracy_metric = evaluate.load("accuracy")
+        >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0], sample_weight=[0.5, 2, 0.7, 0.5, 9, 0.4])
+        >>> print(results)
+        {'accuracy': 0.8778625954198473}
+"""
+_CITATION = """
+@article{scikit-learn,
+  title={Scikit-learn: Machine Learning in {P}ython},
+  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
+         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
+         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
+         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
+  journal={Journal of Machine Learning Research},
+  volume={12},
+  pages={2825--2830},
+  year={2011}
+}
+"""
+@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class ibleu(evaluate.Metric):
+    def _info(self):
+        if version.parse(scb.__version__) < version.parse("1.4.12"):
+            raise ImportWarning(
+                "To use `sacrebleu`, the module `sacrebleu>=1.4.12` is required, and the current version of `sacrebleu` doesn't match this condition.\n"
+                'You can install it with `pip install "sacrebleu>=1.4.12"`.'
+            )
+        return evaluate.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=[
+                datasets.Features(
+                    {
+                        "inputs": datasets.Value("string", id="sequence"),
+                        "predictions": datasets.Value("string", id="sequence"),
+                        "references": datasets.Sequence(
+                            datasets.Value("string", id="sequence"), id="references"
+                        ),
+                    }
+                ),
+                datasets.Features(
+                    {
+                        "inputs": datasets.Value("string", id="sequence"),
+                        "predictions": datasets.Value("string", id="sequence"),
+                        "references": datasets.Value("string", id="sequence"),
+                    }
+                ),
+            ],
+            reference_urls=[
+                "https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html"
+            ],
+        )
+    def _compute(
+        self,
+        inputs,
+        predictions,
+        references,
+        alpha=0.7,
+        smooth_method="exp",
+        smooth_value=None,
+        force=False,
+        lowercase=False,
+        tokenize=None,
+        use_effective_order=False,
+    ):
+        # if only one reference is provided make sure we still use list of lists
+        if isinstance(references[0], str):
+            references = [[ref] for ref in references]
+        # we need to do the same for inputs
+        if isinstance(inputs[0], str):
+            inputs = [[inp] for inp in inputs]
+        else:
+            raise ValueError("There can be only one input string")
+        references_per_prediction = len(references[0])
+        if any(len(refs) != references_per_prediction for refs in references):
+            raise ValueError("Sacrebleu requires the same number of references for each prediction")
+        transformed_references = [[refs[i] for refs in references] for i in range(references_per_prediction)]
+        tgt_bleu = scb.corpus_bleu(
+            predictions,
+            transformed_references,
+            smooth_method=smooth_method,
+            smooth_value=smooth_value,
+            force=force,
+            lowercase=lowercase,
+            use_effective_order=use_effective_order,
+            **(dict(tokenize=tokenize) if tokenize else {}),
+        ).score
+        self_bleu = scb.corpus_bleu(
+            predictions,
+            inputs,
+            smooth_method=smooth_method,
+            smooth_value=smooth_value,
+            force=force,
+            lowercase=lowercase,
+            use_effective_order=use_effective_order,
+            **(dict(tokenize=tokenize) if tokenize else {}),
+        ).score
+        output_dict = {
+            "score": alpha * tgt_bleu - (1 - alpha) * self_bleu
+        }
+        return output_dict

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ git+https://github.com/huggingface/evaluate@6abb0d53b82b1e5efea5d683b91d7990a653c78d
2	+ sacrebleu