prashant commited on
Commit
f59362a
·
1 Parent(s): c1078c4

hashing fix

Browse files
appStore/sdg_analysis.py CHANGED
@@ -91,7 +91,8 @@ def app():
91
  if 'filepath' in st.session_state:
92
  file_name = st.session_state['filename']
93
  file_path = st.session_state['filepath']
94
- classifier = load_sdgClassifier(docClassifierModel=model_name)
 
95
  allDocuments = runSDGPreprocessingPipeline(fileName= file_name,
96
  filePath= file_path, split_by= split_by,
97
  split_length= split_length,
@@ -107,8 +108,7 @@ def app():
107
  with st.spinner("Running SDG Classification{}".format(warning_msg)):
108
 
109
  df, x = sdg_classification(haystackdoc=allDocuments['documents'],
110
- threshold= threshold,
111
- classifiermodel= classifier)
112
  df = df.drop(['Relevancy'], axis = 1)
113
  sdg_labels = x.SDG.unique()[::-1]
114
  textrankkeywordlist = []
 
91
  if 'filepath' in st.session_state:
92
  file_name = st.session_state['filename']
93
  file_path = st.session_state['filepath']
94
+ classifier = load_sdgClassifier(classifier_name=model_name)
95
+ st.session_state['sdg_classifier'] = classifier
96
  allDocuments = runSDGPreprocessingPipeline(fileName= file_name,
97
  filePath= file_path, split_by= split_by,
98
  split_length= split_length,
 
108
  with st.spinner("Running SDG Classification{}".format(warning_msg)):
109
 
110
  df, x = sdg_classification(haystackdoc=allDocuments['documents'],
111
+ threshold= threshold)
 
112
  df = df.drop(['Relevancy'], axis = 1)
113
  sdg_labels = x.SDG.unique()[::-1]
114
  textrankkeywordlist = []
utils/keyword_extraction.py CHANGED
@@ -107,6 +107,7 @@ def keywordExtraction(sdg:int,sdgdata:List[Text]):
107
  keywords = [keyword for keyword in results]
108
  return keywords
109
 
 
110
  def textrank(textdata:Text, ratio:float = 0.1, words = 0):
111
  """
112
  wrappper function to perform textrank, uses either ratio or wordcount to
 
107
  keywords = [keyword for keyword in results]
108
  return keywords
109
 
110
+ @st.cache(allow_output_mutation=True)
111
  def textrank(textdata:Text, ratio:float = 0.1, words = 0):
112
  """
113
  wrappper function to perform textrank, uses either ratio or wordcount to
utils/sdg_classifier.py CHANGED
@@ -7,14 +7,35 @@ import logging
7
  import pandas as pd
8
  from pandas import DataFrame, Series
9
  from utils.checkconfig import getconfig
 
10
  from utils.preprocessing import processingpipeline
11
  try:
12
  import streamlit as st
13
  except ImportError:
14
  logging.info("Streamlit not installed")
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  @st.cache(allow_output_mutation=True)
17
- def load_sdgClassifier(configFile = None, docClassifierModel = None):
18
  """
19
  loads the document classifier using haystack, where the name/path of model
20
  in HF-hub as string is used to fetch the model object.Either configfile or
@@ -31,17 +52,17 @@ def load_sdgClassifier(configFile = None, docClassifierModel = None):
31
 
32
  Return: document classifier model
33
  """
34
- if not docClassifierModel:
35
  if not configFile:
36
  logging.warning("Pass either model name or config file")
37
  return
38
  else:
39
  config = getconfig(configFile)
40
- docClassifierModel = config.get('sdg','MODEL')
41
 
42
  logging.info("Loading classifier")
43
  doc_classifier = TransformersDocumentClassifier(
44
- model_name_or_path=docClassifierModel,
45
  task="text-classification")
46
 
47
  return doc_classifier
@@ -49,7 +70,7 @@ def load_sdgClassifier(configFile = None, docClassifierModel = None):
49
 
50
  @st.cache(allow_output_mutation=True)
51
  def sdg_classification(haystackdoc:List[Document],
52
- threshold:float, classifiermodel)->Tuple[DataFrame,Series]:
53
  """
54
  Text-Classification on the list of texts provided. Classifier provides the
55
  most appropriate label for each text. these labels are in terms of if text
@@ -60,6 +81,10 @@ def sdg_classification(haystackdoc:List[Document],
60
  haystackdoc: List of haystack Documents. The output of Preprocessing Pipeline
61
  contains the list of paragraphs in different format,here the list of
62
  Haystack Documents is used.
 
 
 
 
63
 
64
  Returns
65
  ----------
@@ -69,6 +94,13 @@ def sdg_classification(haystackdoc:List[Document],
69
 
70
  """
71
  logging.info("Working on SDG Classification")
 
 
 
 
 
 
 
72
  results = classifiermodel.predict(haystackdoc)
73
 
74
 
 
7
  import pandas as pd
8
  from pandas import DataFrame, Series
9
  from utils.checkconfig import getconfig
10
+ from utils.streamlitcheck import check_streamlit
11
  from utils.preprocessing import processingpipeline
12
  try:
13
  import streamlit as st
14
  except ImportError:
15
  logging.info("Streamlit not installed")
16
 
17
+ ## Labels dictionary ###
18
+ _lab_dict = {0: 'no_cat',
19
+ 1:'SDG 1 - No poverty',
20
+ 2:'SDG 2 - Zero hunger',
21
+ 3:'SDG 3 - Good health and well-being',
22
+ 4:'SDG 4 - Quality education',
23
+ 5:'SDG 5 - Gender equality',
24
+ 6:'SDG 6 - Clean water and sanitation',
25
+ 7:'SDG 7 - Affordable and clean energy',
26
+ 8:'SDG 8 - Decent work and economic growth',
27
+ 9:'SDG 9 - Industry, Innovation and Infrastructure',
28
+ 10:'SDG 10 - Reduced inequality',
29
+ 11:'SDG 11 - Sustainable cities and communities',
30
+ 12:'SDG 12 - Responsible consumption and production',
31
+ 13:'SDG 13 - Climate action',
32
+ 14:'SDG 14 - Life below water',
33
+ 15:'SDG 15 - Life on land',
34
+ 16:'SDG 16 - Peace, justice and strong institutions',
35
+ 17:'SDG 17 - Partnership for the goals',}
36
+
37
  @st.cache(allow_output_mutation=True)
38
+ def load_sdgClassifier(configFile = None, classifier_name = None):
39
  """
40
  loads the document classifier using haystack, where the name/path of model
41
  in HF-hub as string is used to fetch the model object.Either configfile or
 
52
 
53
  Return: document classifier model
54
  """
55
+ if not classifier_name:
56
  if not configFile:
57
  logging.warning("Pass either model name or config file")
58
  return
59
  else:
60
  config = getconfig(configFile)
61
+ classifier_name = config.get('sdg','MODEL')
62
 
63
  logging.info("Loading classifier")
64
  doc_classifier = TransformersDocumentClassifier(
65
+ model_name_or_path=classifier_name,
66
  task="text-classification")
67
 
68
  return doc_classifier
 
70
 
71
  @st.cache(allow_output_mutation=True)
72
  def sdg_classification(haystackdoc:List[Document],
73
+ threshold:float, classifiermodel= None)->Tuple[DataFrame,Series]:
74
  """
75
  Text-Classification on the list of texts provided. Classifier provides the
76
  most appropriate label for each text. these labels are in terms of if text
 
81
  haystackdoc: List of haystack Documents. The output of Preprocessing Pipeline
82
  contains the list of paragraphs in different format,here the list of
83
  Haystack Documents is used.
84
+ threshold: threshold value for the model to keep the results from classifier
85
+ classifiermodel: you can pass the classifier model directly, however in case of
86
+ streamlit avoid it.
87
+
88
 
89
  Returns
90
  ----------
 
94
 
95
  """
96
  logging.info("Working on SDG Classification")
97
+ if not classifiermodel:
98
+ if check_streamlit:
99
+ classifiermodel = st.session_state['sdg_classifier']
100
+ else:
101
+ logging.warning("No streamlit envinornment found, Pass the classifier")
102
+ return
103
+
104
  results = classifiermodel.predict(haystackdoc)
105
 
106