Spaces:

BridgeAI-Lab
/

SemF1

Sleeping

App Files Files Community

jsalvad0r commited on Sep 27

Commit

f2c2a9e

•

1 Parent(s): c153b4f

added aggregate function to semf1 metric. added test to check the functionality

Browse files

Files changed (2) hide show

semf1.py +20 -5
tests.py +39 -1

semf1.py CHANGED Viewed

@@ -339,7 +339,7 @@ class SemF1(evaluate.Metric):
             gpu: DEVICE_TYPE = False,
             batch_size: int = 32,
             verbose: bool = False,
-            aggregate: bool = True,
     ) -> List[Scores]:
         """
             Compute precision, recall, and F1 scores for given predictions and references.
@@ -421,7 +421,22 @@ class SemF1(evaluate.Metric):
             recall_scores = [np.clip(r_scores, 0.0, 1.0).item() for (r_scores, _) in recall_scores]
             results.append(Scores(precision, recall_scores))
-        return results
-# TEST

             gpu: DEVICE_TYPE = False,
             batch_size: int = 32,
             verbose: bool = False,
+            aggregate: bool = False,
     ) -> List[Scores]:
         """
             Compute precision, recall, and F1 scores for given predictions and references.
             recall_scores = [np.clip(r_scores, 0.0, 1.0).item() for (r_scores, _) in recall_scores]
             results.append(Scores(precision, recall_scores))
+        # runn aggregation procedure
+        if aggregate:
+            mean_prec = np.mean(
+                [score.precision for score in results]
+            )
+            mean_recall = np.mean(np.concatenate(
+                [np.array(score.recall) for score in results]
+            ))
+            aggregated_score = Scores(
+                float(mean_prec),
+                [float(mean_recall)]
+            )
+            aggregated_score.f1 = float(np.mean(
+                [score.f1 for score in results]
+            ))
+            results = aggregated_score
+        return results

tests.py CHANGED Viewed

@@ -6,6 +6,7 @@ import torch
 from numpy.testing import assert_almost_equal
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 from .encoder_models import SBertEncoder, get_encoder
 from .semf1 import SemF1, _compute_cosine_similarity, _validate_input_format
@@ -13,6 +14,14 @@ from .utils import get_gpu, slice_embeddings, is_nested_list_of_type, flatten_li
 class TestUtils(unittest.TestCase):
     def test_get_gpu(self):
         gpu_count = torch.cuda.device_count()
         gpu_available = torch.cuda.is_available()
@@ -231,6 +240,32 @@ class TestSemF1(unittest.TestCase):
                 ["Alternative reference 1.", "Alternative reference 2."]
             ],
         ]
     def test_untokenized_single_reference(self):
         scores = self.semf1_metric.compute(
@@ -600,5 +635,8 @@ class TestValidateInputFormat(unittest.TestCase):
             )
-if __name__ == '__main__':
     unittest.main(verbosity=2)

 from numpy.testing import assert_almost_equal
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
+from unittest import TestLoader
 from .encoder_models import SBertEncoder, get_encoder
 from .semf1 import SemF1, _compute_cosine_similarity, _validate_input_format
 class TestUtils(unittest.TestCase):
+    def runTest(self):
+        self.test_get_gpu()
+        self.test_slice_embeddings()
+        self.test_is_nested_list_of_type()
+        self.test_flatten_list()
+        self.test_compute_f1()
+        self.test_scores()
     def test_get_gpu(self):
         gpu_count = torch.cuda.device_count()
         gpu_available = torch.cuda.is_available()
                 ["Alternative reference 1.", "Alternative reference 2."]
             ],
         ]
+        self.multi_sample_refs = [
+            'this is the first reference sample',
+            'this is the second reference sample',
+        ]
+        self.multi_sample_preds = [
+            'this is the first prediction sample',
+            'this is the second prediction sample',
+        ]
+    def test_aggregate_flag(self):
+        """
+        check if a `Scores` class is returned instead of a list of
+        `Scores`
+        """
+        scores = self.semf1_metric.compute(
+            predictions=self.multi_sample_preds,
+            references=self.multi_sample_refs,
+            tokenize_sentences=True,
+            multi_references=False,
+            gpu=False,
+            batch_size=32,
+            verbose=False,
+            aggregate=True,
+        )
+        self.assertIsInstance(scores, Scores)
     def test_untokenized_single_reference(self):
         scores = self.semf1_metric.compute(
             )
+def run_tests():
     unittest.main(verbosity=2)
+if __name__ == '__main__':
+    run_tests()