Spaces:

MarioBarbeque
/

CombinedEvaluationMetrics

Sleeping

App Files Files Community

John Graham Reynolds commited on Nov 6, 2024

Commit

544f3da

1 Parent(s): 0f86724

update class files

Browse files

Files changed (3) hide show

fixed_f1.py +31 -5
fixed_precision.py +33 -4
fixed_recall.py +30 -4

fixed_f1.py CHANGED Viewed

@@ -1,10 +1,35 @@
 import datasets
 import evaluate
-from evaluate import evaluator, Metric
 # from evaluate.metrics.f1 import F1
 from sklearn.metrics import f1_score
-# could in principle subclass F1, but ideally we can work the fix into the F1 class to maintain SOLID code
 class FixedF1(evaluate.Metric):
     def __init__(self, average="binary"):
@@ -14,9 +39,9 @@ class FixedF1(evaluate.Metric):
     def _info(self):
         return evaluate.MetricInfo(
-            description="Custom built F1 metric for true *multilabel* classification - the 'multilabel' config_name var in the evaluate.EvaluationModules class appears to better address multi-class classification, where features can fall under a multitude of labels. Granted, the subtely is minimal and easily confused. This class is implemented with the intention of enabling the evaluation of multiple multilabel classification metrics at the same time using the evaluate.CombinedEvaluations.combine method.",
-            citation="",
-            inputs_description="'average': This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.",
             features=datasets.Features(
                 {
                     "predictions": datasets.Sequence(datasets.Value("int32")),
@@ -32,6 +57,7 @@ class FixedF1(evaluate.Metric):
         )
     # could remove specific kwargs like average, sample_weight from _compute() method of F1
     def _compute(self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None):
         score = f1_score(

 import datasets
 import evaluate
 # from evaluate.metrics.f1 import F1
 from sklearn.metrics import f1_score
+_DESCRIPTION = """
+Custom built F1 metric that accepts underlying kwargs at instantiation time.
+This class allows one to circumvent the current issue of `combine`-ing the f1 metric, instantiated with its own parameters, into a `CombinedEvaluations` class with other metrics.
+\n
+In general, the F1 score is the harmonic mean of the precision and recall. It can be computed with the equation:\n
+F1 = 2 * (precision * recall) / (precision + recall)
+"""
+_CITATION = """
+@online{MarioBbqF1,
+  author = {John Graham Reynolds aka @MarioBarbeque},
+  title = {{Fixed F1 Hugging Face Metric},
+  year = 2024,
+  url = {https://huggingface.co/spaces/MarioBarbeque/FixedF1},
+  urldate = {2024-11-5}
+}
+"""
+_INPUTS = """
+'average': This parameter is required for multiclass/multilabel targets.
+If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data.
+Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or `None`. The default is `binary`.
+"""
+# could in principle subclass the F1 Metric, but ideally we can work the fix into HF evaluate's main F1 class to maintain SOLID code
+# for this fix we create a new class
 class FixedF1(evaluate.Metric):
     def __init__(self, average="binary"):
     def _info(self):
         return evaluate.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_INPUTS,
             features=datasets.Features(
                 {
                     "predictions": datasets.Sequence(datasets.Value("int32")),
         )
     # could remove specific kwargs like average, sample_weight from _compute() method of F1
+    # but leaving for sake of potentially subclassing F1
     def _compute(self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None):
         score = f1_score(

fixed_precision.py CHANGED Viewed

@@ -1,10 +1,38 @@
 import datasets
 import evaluate
-from evaluate import evaluator, Metric
 # from evaluate.metrics.precision import Precision
 from sklearn.metrics import precision_score
 # could in principle subclass Precision, but ideally we can work the fix into the Precision class to maintain SOLID code
 class FixedPrecision(evaluate.Metric):
     def __init__(self, average="binary", zero_division="warn"):
@@ -15,9 +43,9 @@ class FixedPrecision(evaluate.Metric):
     def _info(self):
         return evaluate.MetricInfo(
-            description="Custom built Precision metric for true *multilabel* classification - the 'multilabel' config_name var in the evaluate.EvaluationModules class appears to better address multi-class classification, where features can fall under a multitude of labels. Granted, the subtlety is minimal and easily confused. This class is implemented with the intention of enabling the evaluation of multiple multilabel classification metrics at the same time using the evaluate.CombinedEvaluations.combine method.",
-            citation="",
-            inputs_description="'average': This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.",
             features=datasets.Features(
                 {
                     "predictions": datasets.Sequence(datasets.Value("int32")),
@@ -33,6 +61,7 @@ class FixedPrecision(evaluate.Metric):
         )
     # could remove specific kwargs like average, sample_weight from _compute() method and simply pass them to the underlying scikit-learn function in the form of a class var self.*
     def _compute(
         self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn",

 import datasets
 import evaluate
 # from evaluate.metrics.precision import Precision
 from sklearn.metrics import precision_score
+_DESCRIPTION = """
+Custom built Precision metric that accepts underlying kwargs at instantiation time.
+This class allows one to circumvent the current issue of `combine`-ing the precision metric, instantiated with its own parameters, into a `CombinedEvaluations` class with other metrics.
+\n
+In general, the precision is the ratio tp / (tp + fp) where tp is the number of true positives and fp the number of false positives.
+The precision is intuitively the ability of the classifier not to label as positive a sample that is negative.
+"""
+_CITATION = """
+@online{MarioBbqPrec,
+  author = {John Graham Reynolds aka @MarioBarbeque},
+  title = {{Fixed Precision Hugging Face Metric},
+  year = 2024,
+  url = {https://huggingface.co/spaces/MarioBarbeque/FixedPrecision},
+  urldate = {2024-11-6}
+}
+"""
+_INPUTS = """
+'average': This parameter is required for multiclass/multilabel targets.
+If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data.
+Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or `None`. The default value for binary classification is `"binary"`.\n
+'zero_division': "Sets the value to return when there is a zero division". Options include:
+{`“warn”`, `0.0`, `1.0`, `np.nan`}. The default value is `"warn"`.
+"""
 # could in principle subclass Precision, but ideally we can work the fix into the Precision class to maintain SOLID code
+# for this immediate fix we create a new class
 class FixedPrecision(evaluate.Metric):
     def __init__(self, average="binary", zero_division="warn"):
     def _info(self):
         return evaluate.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_INPUTS,
             features=datasets.Features(
                 {
                     "predictions": datasets.Sequence(datasets.Value("int32")),
         )
     # could remove specific kwargs like average, sample_weight from _compute() method and simply pass them to the underlying scikit-learn function in the form of a class var self.*
+    # but leaving for sake of potentially subclassing Precision
     def _compute(
         self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn",

fixed_recall.py CHANGED Viewed

@@ -1,10 +1,35 @@
 import datasets
 import evaluate
-from evaluate import evaluator, Metric
 # from evaluate.metrics.recall import Recall
 from sklearn.metrics import recall_score
 # could in principle subclass Recall, but ideally we can work the fix into the Recall class to maintain SOLID code
 class FixedRecall(evaluate.Metric):
     def __init__(self, average="binary"):
@@ -14,9 +39,9 @@ class FixedRecall(evaluate.Metric):
     def _info(self):
         return evaluate.MetricInfo(
-            description="Custom built Recall metric for true *multilabel* classification - the 'multilabel' config_name var in the evaluate.EvaluationModules class appears to better address multi-class classification, where features can fall under a multitude of labels. Granted, the subtlety is minimal and easily confused. This class is implemented with the intention of enabling the evaluation of multiple multilabel classification metrics at the same time using the evaluate.CombinedEvaluations.combine method.",
-            citation="",
-            inputs_description="'average': This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.",
             features=datasets.Features(
                 {
                     "predictions": datasets.Sequence(datasets.Value("int32")),
@@ -32,6 +57,7 @@ class FixedRecall(evaluate.Metric):
         )
     # could remove specific kwargs like average, sample_weight from _compute() method and simply pass them to the underlying scikit-learn function in the form of a class var self.*
     def _compute(
         self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn",

 import datasets
 import evaluate
 # from evaluate.metrics.recall import Recall
 from sklearn.metrics import recall_score
+_DESCRIPTION = """
+Custom built Recall metric that accepts underlying kwargs at instantiation time.
+This class allows one to circumvent the current issue of `combine`-ing the Recall metric, instantiated with its own parameters, into a `CombinedEvaluations` class with other metrics.
+\n
+In general, the recall is the ratio tp / (tp + fn) where tp is the number of true positives and fn the number of false negatives.
+The recall is intuitively the ability of the classifier to find all the positive samples.
+"""
+_CITATION = """
+@online{MarioBbqRec,
+  author = {John Graham Reynolds aka @MarioBarbeque},
+  title = {{Fixed Recall Hugging Face Metric},
+  year = 2024,
+  url = {https://huggingface.co/spaces/MarioBarbeque/FixedRecall},
+  urldate = {2024-11-6}
+}
+"""
+_INPUTS = """
+'average': This parameter is required for multiclass/multilabel targets.
+If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data.
+Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or `None`. The default is `binary`.
+"""
 # could in principle subclass Recall, but ideally we can work the fix into the Recall class to maintain SOLID code
+# for this immediate fix we create a new class
 class FixedRecall(evaluate.Metric):
     def __init__(self, average="binary"):
     def _info(self):
         return evaluate.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_INPUTS,
             features=datasets.Features(
                 {
                     "predictions": datasets.Sequence(datasets.Value("int32")),
         )
     # could remove specific kwargs like average, sample_weight from _compute() method and simply pass them to the underlying scikit-learn function in the form of a class var self.*
+    # but leaving for sake of potentially subclassing Recall
     def _compute(
         self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn",