Spaces:

rahular
/

ibleu

Runtime error

App Files Files Community

rahular commited on Jan 12, 2023

Commit

e3f9170

1 Parent(s): 3e9388e

added docs

Browse files

Files changed (2) hide show

README.md +2 -2
ibleu.py +46 -43

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: iBleu
 emoji: 📊
 colorFrom: red
 colorTo: indigo
@@ -9,4 +9,4 @@ app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: iBLEU
 emoji: 📊
 colorFrom: red
 colorTo: indigo
 pinned: false
 ---
+iBLEU measures the adequacy and dissimilarity of generated paraphrases.

ibleu.py CHANGED Viewed

@@ -7,56 +7,59 @@ from packaging import version
 import evaluate
-_DESCRIPTION = """
-Accuracy is the proportion of correct predictions among the total number of cases processed. It can be computed with:
-Accuracy = (TP + TN) / (TP + TN + FP + FN)
- Where:
-TP: True positive
-TN: True negative
-FP: False positive
-FN: False negative
 """
 _KWARGS_DESCRIPTION = """
 Args:
-    predictions (`list` of `int`): Predicted labels.
-    references (`list` of `int`): Ground truth labels.
-    normalize (`boolean`): If set to False, returns the number of correctly classified samples. Otherwise, returns the fraction of correctly classified samples. Defaults to True.
-    sample_weight (`list` of `float`): Sample weights Defaults to None.
 Returns:
-    accuracy (`float` or `int`): Accuracy score. Minimum possible value is 0. Maximum possible value is 1.0, or the number of examples input, if `normalize` is set to `True`.. A higher score means higher accuracy.
 Examples:
-    Example 1-A simple example
-        >>> accuracy_metric = evaluate.load("accuracy")
-        >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0])
-        >>> print(results)
-        {'accuracy': 0.5}
-    Example 2-The same as Example 1, except with `normalize` set to `False`.
-        >>> accuracy_metric = evaluate.load("accuracy")
-        >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0], normalize=False)
-        >>> print(results)
-        {'accuracy': 3.0}
-    Example 3-The same as Example 1, except with `sample_weight` set.
-        >>> accuracy_metric = evaluate.load("accuracy")
-        >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0], sample_weight=[0.5, 2, 0.7, 0.5, 9, 0.4])
-        >>> print(results)
-        {'accuracy': 0.8778625954198473}
-"""
-_CITATION = """
-@article{scikit-learn,
-  title={Scikit-learn: Machine Learning in {P}ython},
-  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
-         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
-         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
-         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
-  journal={Journal of Machine Learning Research},
-  volume={12},
-  pages={2825--2830},
-  year={2011}
-}
 """

 import evaluate
+_CITATION = """\
+@inproceedings{sun-zhou-2012-joint,
+    title = "Joint Learning of a Dual {SMT} System for Paraphrase Generation",
+    author = "Sun, Hong  and
+      Zhou, Ming",
+    booktitle = "Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
+    month = jul,
+    year = "2012",
+    address = "Jeju Island, Korea",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/P12-2008",
+    pages = "38--42",
+}
 """
+_DESCRIPTION = """\
+iBLEU measures the adequacy and dissimilarity of generated paraphrases.
+"""
 _KWARGS_DESCRIPTION = """
+Produces iBLEU score from an input and a prediction against one or more references.
 Args:
+    inputs (`list` of `str`): list of model inputs. Each input should be tokenized into a list of tokens.
+    predictions (`list` of `str`): list of translations to score. Each translation should be tokenized into a list of tokens.
+    references (`list` of `list` of `str`): A list of lists of references. The contents of the first sub-list are the references for the first prediction, the contents of the second sub-list are for the second prediction, etc. Note that there must be the same number of references for each prediction (i.e. all sub-lists must be of the same length).
+    alpha (`float`): parameter for balancing between adequacy and dissimilarity; smaller α value indicates larger punishment on self-paraphrase.
+    smooth_method (`str`): The smoothing method to use, defaults to `'exp'`. Possible values are:
+        - `'none'`: no smoothing
+        - `'floor'`: increment zero counts
+        - `'add-k'`: increment num/denom by k for n>1
+        - `'exp'`: exponential decay
+    smooth_value (`float`): The smoothing value. Only valid when `smooth_method='floor'` (in which case `smooth_value` defaults to `0.1`) or `smooth_method='add-k'` (in which case `smooth_value` defaults to `1`).
+    tokenize (`str`): Tokenization method to use for iBLEU. If not provided, defaults to `'zh'` for Chinese, `'ja-mecab'` for Japanese and `'13a'` (mteval) otherwise. Possible values are:
+        - `'none'`: No tokenization.
+        - `'zh'`: Chinese tokenization.
+        - `'13a'`: mimics the `mteval-v13a` script from Moses.
+        - `'intl'`: International tokenization, mimics the `mteval-v14` script from Moses
+        - `'char'`: Language-agnostic character-level tokenization.
+        - `'ja-mecab'`: Japanese tokenization. Uses the [MeCab tokenizer](https://pypi.org/project/mecab-python3).
+    lowercase (`bool`): If `True`, lowercases the input, enabling case-insensitivity. Defaults to `False`.
+    force (`bool`): If `True`, insists that your tokenized input is actually detokenized. Defaults to `False`.
+    use_effective_order (`bool`): If `True`, stops including n-gram orders for which precision is 0. This should be `True`, if sentence-level BLEU will be computed. Defaults to `False`.
 Returns:
+    'score': iBLEU score,
 Examples:
+    >>> inputs = ["greetings general kenobi", "foo  foo bar bar"]
+    >>> predictions = ["hello there general kenobi", "foo bar foobar"]
+    >>> references = [["hello there general kenobi", "hello there !"], ["foo bar foobar", "foo bar foobar"]]
+    >>> ibleu = evaluate.load("rahular/ibleu")
+    >>> results = ibleu.compute(inputs=inputs, predictions=predictions, references=references)
+    >>> print(results)
+    {'score': 60.41585343630594}
 """