vulnerability

Sleeping

App Files Files Community

leavoigt commited on Sep 26, 2023

Commit

95c0e35

•

1 Parent(s): ef9edc5

Rename utils/indicator_classifier.py to utils/vulnerability_classifier.py

Browse files

Files changed (1) hide show

utils/{indicator_classifier.py → vulnerability_classifier.py} +6 -6

utils/{indicator_classifier.py → vulnerability_classifier.py} RENAMED Viewed

@@ -10,7 +10,7 @@ from transformers import pipeline
 @st.cache_resource
-def load_indicatorClassifier(config_file:str = None, classifier_name:str = None):
     """
     loads the document classifier using haystack, where the name/path of model
     in HF-hub as string is used to fetch the model object.Either configfile or
@@ -30,9 +30,9 @@ def load_indicatorClassifier(config_file:str = None, classifier_name:str = None)
             return
         else:
             config = getconfig(config_file)
-            classifier_name = config.get('indicator','MODEL')
-    logging.info("Loading indicator classifier")
     # we are using the pipeline as the model is multilabel and DocumentClassifier
     # from Haystack doesnt support multilabel
     # in pipeline we use 'sigmoid' to explicitly tell pipeline to make it multilabel
@@ -51,7 +51,7 @@ def load_indicatorClassifier(config_file:str = None, classifier_name:str = None)
 @st.cache_data
-def indicator_classification(haystack_doc:pd.DataFrame,
                         threshold:float = 0.5,
                         classifier_model:pipeline= None
                         )->Tuple[DataFrame,Series]:
@@ -74,14 +74,14 @@ def indicator_classification(haystack_doc:pd.DataFrame,
     x: Series object with the unique SDG covered in the document uploaded and
     the number of times it is covered/discussed/count_of_paragraphs.
     """
-    logging.info("Working on Indicator Identification")
     haystack_doc['Indicator Label'] = 'NA'
     haystack_doc['PA_check'] = haystack_doc['Policy-Action Label'].apply(lambda x: True if len(x) != 0 else False)
     df1 = haystack_doc[haystack_doc['PA_check'] == True]
     df = haystack_doc[haystack_doc['PA_check'] == False]
     if not classifier_model:
-        classifier_model = st.session_state['indicator_classifier']
         predictions = classifier_model(list(df1.text))

 @st.cache_resource
+def load_vulnerabilityClassifier(config_file:str = None, classifier_name:str = None):
     """
     loads the document classifier using haystack, where the name/path of model
     in HF-hub as string is used to fetch the model object.Either configfile or
             return
         else:
             config = getconfig(config_file)
+            classifier_name = config.get('vulnerability','MODEL')
+    logging.info("Loading vulnerability classifier")
     # we are using the pipeline as the model is multilabel and DocumentClassifier
     # from Haystack doesnt support multilabel
     # in pipeline we use 'sigmoid' to explicitly tell pipeline to make it multilabel
 @st.cache_data
+def vulnerability_classification(haystack_doc:pd.DataFrame,
                         threshold:float = 0.5,
                         classifier_model:pipeline= None
                         )->Tuple[DataFrame,Series]:
     x: Series object with the unique SDG covered in the document uploaded and
     the number of times it is covered/discussed/count_of_paragraphs.
     """
+    logging.info("Working on vulnerability Identification")
     haystack_doc['Indicator Label'] = 'NA'
     haystack_doc['PA_check'] = haystack_doc['Policy-Action Label'].apply(lambda x: True if len(x) != 0 else False)
     df1 = haystack_doc[haystack_doc['PA_check'] == True]
     df = haystack_doc[haystack_doc['PA_check'] == False]
     if not classifier_model:
+        classifier_model = st.session_state['vulnerability_classifier']
         predictions = classifier_model(list(df1.text))