John Graham Reynolds commited on
Commit
544f3da
·
1 Parent(s): 0f86724

update class files

Browse files
Files changed (3) hide show
  1. fixed_f1.py +31 -5
  2. fixed_precision.py +33 -4
  3. fixed_recall.py +30 -4
fixed_f1.py CHANGED
@@ -1,10 +1,35 @@
1
  import datasets
2
  import evaluate
3
- from evaluate import evaluator, Metric
4
  # from evaluate.metrics.f1 import F1
5
  from sklearn.metrics import f1_score
6
 
7
- # could in principle subclass F1, but ideally we can work the fix into the F1 class to maintain SOLID code
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  class FixedF1(evaluate.Metric):
9
 
10
  def __init__(self, average="binary"):
@@ -14,9 +39,9 @@ class FixedF1(evaluate.Metric):
14
 
15
  def _info(self):
16
  return evaluate.MetricInfo(
17
- description="Custom built F1 metric for true *multilabel* classification - the 'multilabel' config_name var in the evaluate.EvaluationModules class appears to better address multi-class classification, where features can fall under a multitude of labels. Granted, the subtely is minimal and easily confused. This class is implemented with the intention of enabling the evaluation of multiple multilabel classification metrics at the same time using the evaluate.CombinedEvaluations.combine method.",
18
- citation="",
19
- inputs_description="'average': This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.",
20
  features=datasets.Features(
21
  {
22
  "predictions": datasets.Sequence(datasets.Value("int32")),
@@ -32,6 +57,7 @@ class FixedF1(evaluate.Metric):
32
  )
33
 
34
  # could remove specific kwargs like average, sample_weight from _compute() method of F1
 
35
 
36
  def _compute(self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None):
37
  score = f1_score(
 
1
  import datasets
2
  import evaluate
 
3
  # from evaluate.metrics.f1 import F1
4
  from sklearn.metrics import f1_score
5
 
6
+ _DESCRIPTION = """
7
+ Custom built F1 metric that accepts underlying kwargs at instantiation time.
8
+ This class allows one to circumvent the current issue of `combine`-ing the f1 metric, instantiated with its own parameters, into a `CombinedEvaluations` class with other metrics.
9
+ \n
10
+ In general, the F1 score is the harmonic mean of the precision and recall. It can be computed with the equation:\n
11
+ F1 = 2 * (precision * recall) / (precision + recall)
12
+ """
13
+
14
+ _CITATION = """
15
+ @online{MarioBbqF1,
16
+ author = {John Graham Reynolds aka @MarioBarbeque},
17
+ title = {{Fixed F1 Hugging Face Metric},
18
+ year = 2024,
19
+ url = {https://huggingface.co/spaces/MarioBarbeque/FixedF1},
20
+ urldate = {2024-11-5}
21
+ }
22
+ """
23
+
24
+ _INPUTS = """
25
+ 'average': This parameter is required for multiclass/multilabel targets.
26
+ If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data.
27
+ Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or `None`. The default is `binary`.
28
+ """
29
+
30
+ # could in principle subclass the F1 Metric, but ideally we can work the fix into HF evaluate's main F1 class to maintain SOLID code
31
+ # for this fix we create a new class
32
+
33
  class FixedF1(evaluate.Metric):
34
 
35
  def __init__(self, average="binary"):
 
39
 
40
  def _info(self):
41
  return evaluate.MetricInfo(
42
+ description=_DESCRIPTION,
43
+ citation=_CITATION,
44
+ inputs_description=_INPUTS,
45
  features=datasets.Features(
46
  {
47
  "predictions": datasets.Sequence(datasets.Value("int32")),
 
57
  )
58
 
59
  # could remove specific kwargs like average, sample_weight from _compute() method of F1
60
+ # but leaving for sake of potentially subclassing F1
61
 
62
  def _compute(self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None):
63
  score = f1_score(
fixed_precision.py CHANGED
@@ -1,10 +1,38 @@
1
  import datasets
2
  import evaluate
3
- from evaluate import evaluator, Metric
4
  # from evaluate.metrics.precision import Precision
5
  from sklearn.metrics import precision_score
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # could in principle subclass Precision, but ideally we can work the fix into the Precision class to maintain SOLID code
 
 
8
  class FixedPrecision(evaluate.Metric):
9
 
10
  def __init__(self, average="binary", zero_division="warn"):
@@ -15,9 +43,9 @@ class FixedPrecision(evaluate.Metric):
15
 
16
  def _info(self):
17
  return evaluate.MetricInfo(
18
- description="Custom built Precision metric for true *multilabel* classification - the 'multilabel' config_name var in the evaluate.EvaluationModules class appears to better address multi-class classification, where features can fall under a multitude of labels. Granted, the subtlety is minimal and easily confused. This class is implemented with the intention of enabling the evaluation of multiple multilabel classification metrics at the same time using the evaluate.CombinedEvaluations.combine method.",
19
- citation="",
20
- inputs_description="'average': This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.",
21
  features=datasets.Features(
22
  {
23
  "predictions": datasets.Sequence(datasets.Value("int32")),
@@ -33,6 +61,7 @@ class FixedPrecision(evaluate.Metric):
33
  )
34
 
35
  # could remove specific kwargs like average, sample_weight from _compute() method and simply pass them to the underlying scikit-learn function in the form of a class var self.*
 
36
 
37
  def _compute(
38
  self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn",
 
1
  import datasets
2
  import evaluate
 
3
  # from evaluate.metrics.precision import Precision
4
  from sklearn.metrics import precision_score
5
 
6
+ _DESCRIPTION = """
7
+ Custom built Precision metric that accepts underlying kwargs at instantiation time.
8
+ This class allows one to circumvent the current issue of `combine`-ing the precision metric, instantiated with its own parameters, into a `CombinedEvaluations` class with other metrics.
9
+ \n
10
+ In general, the precision is the ratio tp / (tp + fp) where tp is the number of true positives and fp the number of false positives.
11
+ The precision is intuitively the ability of the classifier not to label as positive a sample that is negative.
12
+ """
13
+
14
+ _CITATION = """
15
+ @online{MarioBbqPrec,
16
+ author = {John Graham Reynolds aka @MarioBarbeque},
17
+ title = {{Fixed Precision Hugging Face Metric},
18
+ year = 2024,
19
+ url = {https://huggingface.co/spaces/MarioBarbeque/FixedPrecision},
20
+ urldate = {2024-11-6}
21
+ }
22
+ """
23
+
24
+ _INPUTS = """
25
+ 'average': This parameter is required for multiclass/multilabel targets.
26
+ If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data.
27
+ Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or `None`. The default value for binary classification is `"binary"`.\n
28
+
29
+ 'zero_division': "Sets the value to return when there is a zero division". Options include:
30
+ {`“warn”`, `0.0`, `1.0`, `np.nan`}. The default value is `"warn"`.
31
+ """
32
+
33
  # could in principle subclass Precision, but ideally we can work the fix into the Precision class to maintain SOLID code
34
+ # for this immediate fix we create a new class
35
+
36
  class FixedPrecision(evaluate.Metric):
37
 
38
  def __init__(self, average="binary", zero_division="warn"):
 
43
 
44
  def _info(self):
45
  return evaluate.MetricInfo(
46
+ description=_DESCRIPTION,
47
+ citation=_CITATION,
48
+ inputs_description=_INPUTS,
49
  features=datasets.Features(
50
  {
51
  "predictions": datasets.Sequence(datasets.Value("int32")),
 
61
  )
62
 
63
  # could remove specific kwargs like average, sample_weight from _compute() method and simply pass them to the underlying scikit-learn function in the form of a class var self.*
64
+ # but leaving for sake of potentially subclassing Precision
65
 
66
  def _compute(
67
  self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn",
fixed_recall.py CHANGED
@@ -1,10 +1,35 @@
1
  import datasets
2
  import evaluate
3
- from evaluate import evaluator, Metric
4
  # from evaluate.metrics.recall import Recall
5
  from sklearn.metrics import recall_score
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # could in principle subclass Recall, but ideally we can work the fix into the Recall class to maintain SOLID code
 
 
8
  class FixedRecall(evaluate.Metric):
9
 
10
  def __init__(self, average="binary"):
@@ -14,9 +39,9 @@ class FixedRecall(evaluate.Metric):
14
 
15
  def _info(self):
16
  return evaluate.MetricInfo(
17
- description="Custom built Recall metric for true *multilabel* classification - the 'multilabel' config_name var in the evaluate.EvaluationModules class appears to better address multi-class classification, where features can fall under a multitude of labels. Granted, the subtlety is minimal and easily confused. This class is implemented with the intention of enabling the evaluation of multiple multilabel classification metrics at the same time using the evaluate.CombinedEvaluations.combine method.",
18
- citation="",
19
- inputs_description="'average': This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data. Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or None.",
20
  features=datasets.Features(
21
  {
22
  "predictions": datasets.Sequence(datasets.Value("int32")),
@@ -32,6 +57,7 @@ class FixedRecall(evaluate.Metric):
32
  )
33
 
34
  # could remove specific kwargs like average, sample_weight from _compute() method and simply pass them to the underlying scikit-learn function in the form of a class var self.*
 
35
 
36
  def _compute(
37
  self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn",
 
1
  import datasets
2
  import evaluate
 
3
  # from evaluate.metrics.recall import Recall
4
  from sklearn.metrics import recall_score
5
 
6
+ _DESCRIPTION = """
7
+ Custom built Recall metric that accepts underlying kwargs at instantiation time.
8
+ This class allows one to circumvent the current issue of `combine`-ing the Recall metric, instantiated with its own parameters, into a `CombinedEvaluations` class with other metrics.
9
+ \n
10
+ In general, the recall is the ratio tp / (tp + fn) where tp is the number of true positives and fn the number of false negatives.
11
+ The recall is intuitively the ability of the classifier to find all the positive samples.
12
+ """
13
+
14
+ _CITATION = """
15
+ @online{MarioBbqRec,
16
+ author = {John Graham Reynolds aka @MarioBarbeque},
17
+ title = {{Fixed Recall Hugging Face Metric},
18
+ year = 2024,
19
+ url = {https://huggingface.co/spaces/MarioBarbeque/FixedRecall},
20
+ urldate = {2024-11-6}
21
+ }
22
+ """
23
+
24
+ _INPUTS = """
25
+ 'average': This parameter is required for multiclass/multilabel targets.
26
+ If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data.
27
+ Options include: {‘micro’, ‘macro’, ‘samples’, ‘weighted’, ‘binary’} or `None`. The default is `binary`.
28
+ """
29
+
30
  # could in principle subclass Recall, but ideally we can work the fix into the Recall class to maintain SOLID code
31
+ # for this immediate fix we create a new class
32
+
33
  class FixedRecall(evaluate.Metric):
34
 
35
  def __init__(self, average="binary"):
 
39
 
40
  def _info(self):
41
  return evaluate.MetricInfo(
42
+ description=_DESCRIPTION,
43
+ citation=_CITATION,
44
+ inputs_description=_INPUTS,
45
  features=datasets.Features(
46
  {
47
  "predictions": datasets.Sequence(datasets.Value("int32")),
 
57
  )
58
 
59
  # could remove specific kwargs like average, sample_weight from _compute() method and simply pass them to the underlying scikit-learn function in the form of a class var self.*
60
+ # but leaving for sake of potentially subclassing Recall
61
 
62
  def _compute(
63
  self, predictions, references, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn",