# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Average Precision"""

import evaluate
import datasets
from sklearn.metrics import average_precision_score


# TODO: Add BibTeX citation
_CITATION = """\
@InProceedings{huggingface:module,
title = {A great new module},
authors={chanelcolgate, Inc.},
year={2023}
}
"""

# TODO: Add description of the module here
_DESCRIPTION = """\
    Average Precision
"""


# TODO: Add description of the arguments of the module here
_KWARGS_DESCRIPTION = """
Note: To be consistent with the `evaluate` input conventions the scikit-learn inputs are renamed:
- `y_true`: `references`
- `y_pred`: `prediction_scores`

Scikit-learn docstring:
Average precision score.

Compute average precision (AP) from prediction scores.
AP summarizes a precision-recall curve as the weighted mean of precisions
achieved at each threshold, with the increase in recall from the previous
threshold used as the weight:
.. math::
    \\text{AP} = \\sum_n (R_n - R_{n-1}) P_n
where :math:`P_n` and :math:`R_n` are the precision and recall at the nth
threshold [1]_. This implementation is not interpolated and is different
from computing the area under the precision-recall curve with the
trapezoidal rule, which uses linear interpolation and can be too optimistic.
Note: this implementation is restricted to the binary classification task or
multilabel classification task.
Read more in the :ref:`User Guide <precision_recall_f_measure_metrics`.
Args:
    y_true: ndarray of shape (n_samples,) or (n_samples, n_classes)
        True binary labels or binary label indicators.
    y_score: ndarray of shape (n_samples,) or (n_samples, n_classes)
        Target scores, can either be probability estimates of the positive
        class, confidence values, or non-thresholded measure of decisions
        (as returned by :term:`decision_function` on some classifiers).
    average: {'micro', 'samples', 'weighted', 'macro'} or None, \
            default='macro'
        If ``None``, the scores for each class are retruned. Otherwise,
        this determines the type of averaging performed on the data:
        ``'micro'``:
            Calculate metrics globally be considering each element of the label
            indicator matrix as a label.
        ``'macro'``:
            Calculate metrics for each label, and find their unweighted
            mean. This does not take label imbalance into account.
        ``'weighted'``:
            Calculate metrics for each label, and find their average, weighted
            by support (the number of true instances for each label).
        ``'samples'``:
            Calculate metrics for each instance, and find their average.
        Will be ignored when ``y_true`` is binary.
    pos_label: int or str, default=1
        The label of the positive class. Only applied to binary ``y_true``.
        For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1.
    sample_weight: array_like of shape (n_samples,), default=None
        Sample weights.
Returns:
    accuracy: description of the first score,
    another_score: description of the second score,
    average_precision: float
        Average precision score.
    See Also
    roc_auc_score: Compute the area under the ROC curve.
    precision_recall_curve: Compute precision-recall pairs for different
        probability thresholds.
Examples:
    Examples should be written in doctest format, and should illustrate how
    to use the function.

    >>> import numpy as np
    >>> from sklearn.metrics import average_precision_score
    >>> y_true = np.array([0, 0, 1, 1])
    >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
    >>> average_precision_score(y_true, y_scores)
    0.8333333333333333
"""


@evaluate.utils.file_utils.add_start_docstrings(
    _DESCRIPTION, _KWARGS_DESCRIPTION
)
class AveragePrecision(evaluate.Metric):
    """TODO: Short description of my evaluation module."""

    def _info(self):
        # TODO: Specifies the evaluate.EvaluationModuleInfo object
        return evaluate.MetricInfo(
            # This is the description that will appear on the modules page.
            module_type="metric",
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            # This defines the format of each prediction and reference
            features=[
                datasets.Features(
                    {
                        "references": datasets.Value("int64"),
                        "prediction_scores": datasets.Value("float"),
                    }
                ),
                datasets.Features(
                    {
                        "references": datasets.Sequence(
                            datasets.Value("int64")
                        ),
                        "prediction_scores": datasets.Sequence(
                            datasets.Value("float")
                        ),
                    }
                ),
            ],
            # Homepage of the module for documentation
            homepage="https://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html",
            # Additional links to the codebase or references
            codebase_urls=["https://github.com/scikit-learn/scikit-learn"],
            reference_urls=["https://scikit-learn.org/stable/index.html"],
        )

    def _download_and_prepare(self, dl_manager):
        """Optional: download external resources useful to compute the scores"""
        # TODO: Download external resources if needed
        pass

    def _compute(
        self,
        references,
        prediction_scores,
        average="macro",
        pos_label=1,
        sample_weight=None,
    ):
        """Returns the scores"""
        # TODO: Compute the different scores of the module
        return {
            "average_precision_score": average_precision_score(
                y_true=references,
                y_score=prediction_scores,
                average=average,
                pos_label=pos_label,
                sample_weight=sample_weight,
            )
        }