Spaces:

GIZ
/

SDSN-demo

Running on CPU Upgrade

App Files Files Community

prashant commited on Nov 15, 2022

Commit

550b85d

1 Parent(s): 4e2e62f

changing list order adding coherence

Browse files

Files changed (7) hide show

app.py +2 -1
appStore/coherence.py +3 -262
appStore/keyword_search.py +9 -12
docStore/sample/files.json +3 -2
docStore/sample/keywordexample.json +3 -3
paramconfig.cfg +1 -1
ver0.1 scripts/coherence.py +267 -0

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import appStore.keyword_search as keyword_search
 import appStore.sdg_analysis as sdg_analysis
-#import appStore.coherence as coherence
 import appStore.info as info
 from appStore.multiapp import MultiApp
 import streamlit as st
@@ -13,5 +13,6 @@ app = MultiApp()
 app.add_app("About","house", info.app)
 app.add_app("SDG Analysis","gear",sdg_analysis.app)
 app.add_app("Search","search", keyword_search.app)
 app.run()

 import appStore.keyword_search as keyword_search
 import appStore.sdg_analysis as sdg_analysis
+import appStore.coherence as coherence
 import appStore.info as info
 from appStore.multiapp import MultiApp
 import streamlit as st
 app.add_app("About","house", info.app)
 app.add_app("SDG Analysis","gear",sdg_analysis.app)
 app.add_app("Search","search", keyword_search.app)
+app.add_app("NDC Coherence","exclude", coherence.app)
 app.run()

appStore/coherence.py CHANGED Viewed

@@ -1,267 +1,8 @@
 # set path
-import glob, os, sys; sys.path.append('../udfPreprocess')
-#import helper
-import udfPreprocess.docPreprocessing as pre
-import udfPreprocess.cleaning as clean
-#import needed libraries
-import seaborn as sns
-from pandas import DataFrame
-from sentence_transformers import SentenceTransformer, CrossEncoder, util
-from sklearn.metrics.pairwise import cosine_similarity
-# from keybert import KeyBERT
-from transformers import pipeline
-import matplotlib.pyplot as plt
-import numpy as np
 import streamlit as st
-import pandas as pd
-from rank_bm25 import BM25Okapi
-from sklearn.feature_extraction import _stop_words
-import string
-from tqdm.autonotebook import tqdm
-import numpy as np
-import urllib.request
-import ast
-import tempfile
-import sqlite3
-import json
-import urllib.request
-import ast
-import docx
-from docx.shared import Inches
-from docx.shared import Pt
-from docx.enum.style import WD_STYLE_TYPE
 def app():
-    # Sidebar
-    st.sidebar.title('Check Coherence')
-    st.sidebar.write(' ')
-    with open('ndcs/countryList.txt') as dfile:
-        countryList = dfile.read()
-    countryList = ast.literal_eval(countryList)
-    countrynames = list(countryList.keys())
-    option = st.sidebar.selectbox('Select Country', (countrynames))
-    countryCode = countryList[option]
-    with st.container():
-        st.markdown("<h1 style='text-align: center; color: black;'> Check Coherence of Policy Document with NDCs</h1>", unsafe_allow_html=True)
-        st.write(' ')
-        st.write(' ')
-    with st.expander("ℹ️ - About this app", expanded=True):
-        st.write(
-            """
-            The *Check Coherence* app is an easy-to-use interface built in Streamlit for doing analysis of policy document and finding the coherence between NDCs/New-Updated NDCs- developed by GIZ Data and the Sustainable Development Solution Network.
-            """
-        )
-        st.markdown("")
-    st.markdown("")
-    st.markdown("##  📌 Step One: Upload document of the country selected ")
-    with st.container():
-            docs = None
-            # asking user for either upload or select existing doc
-            choice = st.radio(label = 'Select the Document',
-                              help = 'You can upload the document \
-                              or else you can try a example document.',
-                              options = ('Upload Document', 'Try Example'),
-                              horizontal = True)
-            if choice == 'Upload Document':
-              uploaded_file = st.file_uploader('Upload the File', type=['pdf', 'docx', 'txt'])
-              if uploaded_file is not None:
-                with tempfile.NamedTemporaryFile(mode="wb") as temp:
-                    bytes_data = uploaded_file.getvalue()
-                    temp.write(bytes_data)
-                    st.write("Uploaded Filename: ", uploaded_file.name)
-                    file_name =  uploaded_file.name
-                    file_path = temp.name
-                    docs = pre.load_document(file_path, file_name)
-                    haystackDoc, dataframeDoc, textData, paraList = clean.preprocessing(docs)
-            else:
-              # listing the options
-              option = st.selectbox('Select the example document',
-                                    ('South Africa:Low Emission strategy',
-                                    'Ethiopia: 10 Year Development Plan'))
-              if option is 'South Africa:Low Emission strategy':
-                file_name = file_path  = 'sample/South Africa_s Low Emission Development Strategy.txt'
-                countryCode = countryList['South Africa']
-                st.write("Selected document:", file_name.split('/')[1])
-                # with open('sample/South Africa_s Low Emission Development Strategy.txt') as dfile:
-                # file = open('sample/South Africa_s Low Emission Development Strategy.txt', 'wb')
-              else:
-                # with open('sample/Ethiopia_s_2021_10 Year Development Plan.txt') as dfile:
-                file_name = file_path =  'sample/Ethiopia_s_2021_10 Year Development Plan.txt'
-                countryCode = countryList['Ethiopia']
-                st.write("Selected document:", file_name.split('/')[1])
-              if option is not None:
-                docs = pre.load_document(file_path,file_name)
-                haystackDoc, dataframeDoc, textData, paraList = clean.preprocessing(docs)
-            with open('ndcs/cca.txt', encoding='utf-8', errors='ignore') as dfile:
-                cca_sent = dfile.read()
-            cca_sent = ast.literal_eval(cca_sent)
-            with open('ndcs/ccm.txt', encoding='utf-8', errors='ignore') as dfile:
-                ccm_sent = dfile.read()
-            ccm_sent = ast.literal_eval(ccm_sent)
-            with open('ndcs/countryList.txt') as dfile:
-                countryList = dfile.read()
-            countryList = ast.literal_eval(countryList)
-            def get_document(countryCode: str):
-                link = "https://klimalog.die-gdi.de/ndc/open-data/dataset.json"
-                with urllib.request.urlopen(link) as urlfile:
-                    data =  json.loads(urlfile.read())
-                categoriesData = {}
-                categoriesData['categories']= data['categories']
-                categoriesData['subcategories']= data['subcategories']
-                keys_sub = categoriesData['subcategories'].keys()
-                documentType= 'NDCs'
-                if documentType in data.keys():
-                    if countryCode in data[documentType].keys():
-                        get_dict = {}
-                        for key, value in data[documentType][countryCode].items():
-                            if key not in ['country_name','region_id', 'region_name']:
-                                get_dict[key] = value['classification']
-                            else:
-                                get_dict[key] = value
-                    else:
-                        return None
-                else:
-                    return None
-                country = {}
-                for key in categoriesData['categories']:
-                    country[key]= {}
-                for key,value in categoriesData['subcategories'].items():
-                    country[value['category']][key] = get_dict[key]
-                return country
-        #   country_ndc = get_document('NDCs', countryList[option])
-            def countrySpecificCCA(cca_sent, threshold, countryCode):
-                temp = {}
-                doc = get_document(countryCode)
-                for key,value in cca_sent.items():
-                    id_ = doc['climate change adaptation'][key]['id']
-                    if id_ >threshold:
-                        temp[key] = value['id'][id_]
-                return temp
-            def countrySpecificCCM(ccm_sent, threshold, countryCode):
-                temp = {}
-                doc = get_document(countryCode)
-                for key,value in ccm_sent.items():
-                    id_ = doc['climate change mitigation'][key]['id']
-                    if id_ >threshold:
-                        temp[key] = value['id'][id_]
-                return temp
-            if docs is not None:
-                    sent_cca = countrySpecificCCA(cca_sent,1,countryCode)
-                    sent_ccm = countrySpecificCCM(ccm_sent,1,countryCode)
-                    #st.write(sent_ccm)
-                    @st.cache(allow_output_mutation=True)
-                    def load_sentenceTransformer(name):
-                        return SentenceTransformer(name)
-                    model = load_sentenceTransformer('all-MiniLM-L6-v2')
-                    document_embeddings = model.encode(paraList, show_progress_bar=True)
-                    genre = st.radio( "Select Category",('Climate Change Adaptation', 'Climate Change Mitigation'))
-                    if genre == 'Climate Change Adaptation':
-                        sent_dict = sent_cca
-                        sent_labels = []
-                        for key,sent in sent_dict.items():
-                            sent_labels.append(sent)
-                        label_embeddings = model.encode(sent_labels, show_progress_bar=True)
-                        similarity_high_threshold = 0.55
-                        similarity_matrix = cosine_similarity(label_embeddings, document_embeddings)
-                        label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
-                        positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
-                    else:
-                        sent_dict = sent_ccm
-                        sent_labels = []
-                        for key,sent in sent_dict.items():
-                            sent_labels.append(sent)
-                        label_embeddings = model.encode(sent_labels, show_progress_bar=True)
-                        similarity_high_threshold = 0.55
-                        similarity_matrix = cosine_similarity(label_embeddings, document_embeddings)
-                        label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
-                        positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
-                #    sent_labels = []
-                #   for key,sent in sent_dict.items():
-                  #      sent_labels.append(sent)
-                  # label_embeddings = model.encode(sent_labels, show_progress_bar=True)
-                    #similarity_high_threshold = 0.55
-                  # similarity_matrix = cosine_similarity(label_embeddings, document_embeddings)
-                    #label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
-                    #positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
-                    document = docx.Document()
-                    document.add_heading('Document name:{}'.format(file_name), 2)
-                    section = document.sections[0]
-                      # Calling the footer
-                    footer = section.footer
-                    # Calling the paragraph already present in
-                    # the footer section
-                    footer_para = footer.paragraphs[0]
-                    font_styles = document.styles
-                    font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
-                    font_object = font_charstyle.font
-                    font_object.size = Pt(7)
-                    # Adding the centered zoned footer
-                    footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces:                        https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
-                    document.add_paragraph("Country Code for which NDC is carried out {}".format(countryCode))
-                    for _label_idx, _paragraph_idx in positive_indices:
-                        st.write("This paragraph: \n")
-                        document.add_paragraph("This paragraph: \n")
-                        st.write(paraList[_paragraph_idx])
-                        st.write(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
-                        document.add_paragraph(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
-                        st.write('-'*10)
-                        document.add_paragraph('-'*10)
-                    document.save('demo.docx')
-                    with open("demo.docx", "rb") as file:
-                                btn = st.download_button(
-                                label="Download file",
-                                data=file,
-                                file_name="demo.docx",
-                                mime="txt/docx"
-                                  )

 # set path
+import glob, os, sys;
+sys.path.append('../utils')
 import streamlit as st
 def app():
+    st.write("Coming soon")

appStore/keyword_search.py CHANGED Viewed

@@ -56,10 +56,11 @@ def app():
             on the context as well. The semantic search allows for a personalized\
             experience in using the application. Both methods employ a \
             probabilistic retrieval framework in its identification of relevant \
-            paragraphs. By defualt the search is perfomred using 'Semantic Search'
-            , to find 'Exact/Lexical Matches' checkbox is provided, which will \
             by pass semantic search.. Furthermore, the application allows the \
-            user to search for pre-defined keywords from different thematic buckets""")
     with st.sidebar:
@@ -72,11 +73,6 @@ def app():
         else:
             keywordList = None
-        # searchtype = st.selectbox("Do you want to find exact macthes or similar \
-        #                             meaning/context",
-        #                          ['Exact Matches', 'Similar context/meaning'])
         st.markdown("---")
     with st.container():
@@ -84,7 +80,6 @@ def app():
         #     queryList = st.text_input("You selected the {} category we \
         #                 will look for these keywords in document".format(genre),
         #                             value="{}".format(keywordList))
-        # else:
         queryList = st.text_input("Please enter here your question and we \
                                     will look for an answer in the document\
                                     OR enter the keyword you are looking \
@@ -92,7 +87,6 @@ def app():
                                     context in the document. You can select the \
                                     presets of keywords from sidebar.",
                                     value = "{}".format(keywordList))
-                                # placeholder="Enter keyword here")
         searchtype = st.checkbox("Show only Exact Matches")
         if st.button("Find them"):
@@ -129,10 +123,13 @@ def app():
                                             split_overlap=split_overlap,
                                             removePunc= remove_punc,
                         split_respect_sentence_boundary=split_respect_sentence_boundary)
                         logging.info("starting semantic search")
-                        with st.spinner("Performing Similar/Contextual search"):
                             semantic_search(query = queryList,
                             documents = allDocuments['documents'],
                             embedding_model=embedding_model,

             on the context as well. The semantic search allows for a personalized\
             experience in using the application. Both methods employ a \
             probabilistic retrieval framework in its identification of relevant \
+            paragraphs. By defualt the search is performed using 'Semantic Search'
+            to find 'Exact/Lexical Matches' please tick the checkbox provided, which will \
             by pass semantic search.. Furthermore, the application allows the \
+            user to search for pre-defined keywords from different thematic buckets\
+            present in sidebar.""")
     with st.sidebar:
         else:
             keywordList = None
         st.markdown("---")
     with st.container():
         #     queryList = st.text_input("You selected the {} category we \
         #                 will look for these keywords in document".format(genre),
         #                             value="{}".format(keywordList))
         queryList = st.text_input("Please enter here your question and we \
                                     will look for an answer in the document\
                                     OR enter the keyword you are looking \
                                     context in the document. You can select the \
                                     presets of keywords from sidebar.",
                                     value = "{}".format(keywordList))
         searchtype = st.checkbox("Show only Exact Matches")
         if st.button("Find them"):
                                             split_overlap=split_overlap,
                                             removePunc= remove_punc,
                         split_respect_sentence_boundary=split_respect_sentence_boundary)
+                        if len(allDocuments['documents']) > 100:
+                            warning_msg = ": This might take sometime, please sit back and relax."
+                        else:
+                            warning_msg = ""
                         logging.info("starting semantic search")
+                        with st.spinner("Performing Similar/Contextual search{}".format(warning_msg)):
                             semantic_search(query = queryList,
                             documents = allDocuments['documents'],
                             embedding_model=embedding_model,

docStore/sample/files.json CHANGED Viewed

@@ -1,2 +1,3 @@
-{"South Africa:Low Emission strategy":"docStore/sample/South Africa_s Low Emission Development Strategy.txt",
- "Ethiopia: 10 Year Development Plan":"docStore/sample/Ethiopia_s_2021_10 Year Development Plan.txt"}

+{"Ethiopia: 10 Year Development Plan":"docStore/sample/Ethiopia_s_2021_10 Year Development Plan.txt",
+"South Africa:Low Emission strategy":"docStore/sample/South Africa_s Low Emission Development Strategy.txt"
+ }

docStore/sample/keywordexample.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-"Food":"Food security,Nutrition,Diets,Food loss",
 "Climate":"Climate,Adaptation,Mitigation,Decarbonization,Carbon neutrality,Net zero Emissions",
-"Social":"Indigenous,Local community(ies),Gender,Rural livelihoods,Minority",
 "Nature":"Nature,Nature-based solutions,Biodiversity,Degradation",
-"Implementation":"Implementation,transformation,reform,integration,strategy,policy"
 }

 {
 "Climate":"Climate,Adaptation,Mitigation,Decarbonization,Carbon neutrality,Net zero Emissions",
+"Food":"Food security,Nutrition,Diets,Food loss",
+"Implementation":"Implementation,transformation,reform,integration,strategy,policy",
 "Nature":"Nature,Nature-based solutions,Biodiversity,Degradation",
+"Social":"Indigenous,Local community(ies),Gender,Rural livelihoods,Minority"
 }

paramconfig.cfg CHANGED Viewed

@@ -12,7 +12,7 @@ RETRIEVER = msmarco-bert-base-dot-v5
 RETRIEVER_FORMAT = sentence_transformers
 RETRIEVER_EMB_LAYER = -1
 READER = deepset/tinyroberta-squad2
-READER_TOP_K = 5
 THRESHOLD = 0.1
 SPLIT_BY = sentence
 SPLIT_LENGTH = 3

 RETRIEVER_FORMAT = sentence_transformers
 RETRIEVER_EMB_LAYER = -1
 READER = deepset/tinyroberta-squad2
+READER_TOP_K = 10
 THRESHOLD = 0.1
 SPLIT_BY = sentence
 SPLIT_LENGTH = 3

ver0.1 scripts/coherence.py ADDED Viewed

	@@ -0,0 +1,267 @@

+# set path
+import glob, os, sys; sys.path.append('../udfPreprocess')
+#import helper
+import udfPreprocess.docPreprocessing as pre
+import udfPreprocess.cleaning as clean
+#import needed libraries
+import seaborn as sns
+from pandas import DataFrame
+from sentence_transformers import SentenceTransformer, CrossEncoder, util
+from sklearn.metrics.pairwise import cosine_similarity
+# from keybert import KeyBERT
+from transformers import pipeline
+import matplotlib.pyplot as plt
+import numpy as np
+import streamlit as st
+import pandas as pd
+from rank_bm25 import BM25Okapi
+from sklearn.feature_extraction import _stop_words
+import string
+from tqdm.autonotebook import tqdm
+import numpy as np
+import urllib.request
+import ast
+import tempfile
+import sqlite3
+import json
+import urllib.request
+import ast
+import docx
+from docx.shared import Inches
+from docx.shared import Pt
+from docx.enum.style import WD_STYLE_TYPE
+def app():
+    # Sidebar
+    st.sidebar.title('Check Coherence')
+    st.sidebar.write(' ')
+    with open('ndcs/countryList.txt') as dfile:
+        countryList = dfile.read()
+    countryList = ast.literal_eval(countryList)
+    countrynames = list(countryList.keys())
+    option = st.sidebar.selectbox('Select Country', (countrynames))
+    countryCode = countryList[option]
+    with st.container():
+        st.markdown("<h1 style='text-align: center; color: black;'> Check Coherence of Policy Document with NDCs</h1>", unsafe_allow_html=True)
+        st.write(' ')
+        st.write(' ')
+    with st.expander("ℹ️ - About this app", expanded=True):
+        st.write(
+            """
+            The *Check Coherence* app is an easy-to-use interface built in Streamlit for doing analysis of policy document and finding the coherence between NDCs/New-Updated NDCs- developed by GIZ Data and the Sustainable Development Solution Network.
+            """
+        )
+        st.markdown("")
+    st.markdown("")
+    st.markdown("##  📌 Step One: Upload document of the country selected ")
+    with st.container():
+            docs = None
+            # asking user for either upload or select existing doc
+            choice = st.radio(label = 'Select the Document',
+                              help = 'You can upload the document \
+                              or else you can try a example document.',
+                              options = ('Upload Document', 'Try Example'),
+                              horizontal = True)
+            if choice == 'Upload Document':
+              uploaded_file = st.file_uploader('Upload the File', type=['pdf', 'docx', 'txt'])
+              if uploaded_file is not None:
+                with tempfile.NamedTemporaryFile(mode="wb") as temp:
+                    bytes_data = uploaded_file.getvalue()
+                    temp.write(bytes_data)
+                    st.write("Uploaded Filename: ", uploaded_file.name)
+                    file_name =  uploaded_file.name
+                    file_path = temp.name
+                    docs = pre.load_document(file_path, file_name)
+                    haystackDoc, dataframeDoc, textData, paraList = clean.preprocessing(docs)
+            else:
+              # listing the options
+              option = st.selectbox('Select the example document',
+                                    ('South Africa:Low Emission strategy',
+                                    'Ethiopia: 10 Year Development Plan'))
+              if option is 'South Africa:Low Emission strategy':
+                file_name = file_path  = 'sample/South Africa_s Low Emission Development Strategy.txt'
+                countryCode = countryList['South Africa']
+                st.write("Selected document:", file_name.split('/')[1])
+                # with open('sample/South Africa_s Low Emission Development Strategy.txt') as dfile:
+                # file = open('sample/South Africa_s Low Emission Development Strategy.txt', 'wb')
+              else:
+                # with open('sample/Ethiopia_s_2021_10 Year Development Plan.txt') as dfile:
+                file_name = file_path =  'sample/Ethiopia_s_2021_10 Year Development Plan.txt'
+                countryCode = countryList['Ethiopia']
+                st.write("Selected document:", file_name.split('/')[1])
+              if option is not None:
+                docs = pre.load_document(file_path,file_name)
+                haystackDoc, dataframeDoc, textData, paraList = clean.preprocessing(docs)
+            with open('ndcs/cca.txt', encoding='utf-8', errors='ignore') as dfile:
+                cca_sent = dfile.read()
+            cca_sent = ast.literal_eval(cca_sent)
+            with open('ndcs/ccm.txt', encoding='utf-8', errors='ignore') as dfile:
+                ccm_sent = dfile.read()
+            ccm_sent = ast.literal_eval(ccm_sent)
+            with open('ndcs/countryList.txt') as dfile:
+                countryList = dfile.read()
+            countryList = ast.literal_eval(countryList)
+            def get_document(countryCode: str):
+                link = "https://klimalog.die-gdi.de/ndc/open-data/dataset.json"
+                with urllib.request.urlopen(link) as urlfile:
+                    data =  json.loads(urlfile.read())
+                categoriesData = {}
+                categoriesData['categories']= data['categories']
+                categoriesData['subcategories']= data['subcategories']
+                keys_sub = categoriesData['subcategories'].keys()
+                documentType= 'NDCs'
+                if documentType in data.keys():
+                    if countryCode in data[documentType].keys():
+                        get_dict = {}
+                        for key, value in data[documentType][countryCode].items():
+                            if key not in ['country_name','region_id', 'region_name']:
+                                get_dict[key] = value['classification']
+                            else:
+                                get_dict[key] = value
+                    else:
+                        return None
+                else:
+                    return None
+                country = {}
+                for key in categoriesData['categories']:
+                    country[key]= {}
+                for key,value in categoriesData['subcategories'].items():
+                    country[value['category']][key] = get_dict[key]
+                return country
+        #   country_ndc = get_document('NDCs', countryList[option])
+            def countrySpecificCCA(cca_sent, threshold, countryCode):
+                temp = {}
+                doc = get_document(countryCode)
+                for key,value in cca_sent.items():
+                    id_ = doc['climate change adaptation'][key]['id']
+                    if id_ >threshold:
+                        temp[key] = value['id'][id_]
+                return temp
+            def countrySpecificCCM(ccm_sent, threshold, countryCode):
+                temp = {}
+                doc = get_document(countryCode)
+                for key,value in ccm_sent.items():
+                    id_ = doc['climate change mitigation'][key]['id']
+                    if id_ >threshold:
+                        temp[key] = value['id'][id_]
+                return temp
+            if docs is not None:
+                    sent_cca = countrySpecificCCA(cca_sent,1,countryCode)
+                    sent_ccm = countrySpecificCCM(ccm_sent,1,countryCode)
+                    #st.write(sent_ccm)
+                    @st.cache(allow_output_mutation=True)
+                    def load_sentenceTransformer(name):
+                        return SentenceTransformer(name)
+                    model = load_sentenceTransformer('all-MiniLM-L6-v2')
+                    document_embeddings = model.encode(paraList, show_progress_bar=True)
+                    genre = st.radio( "Select Category",('Climate Change Adaptation', 'Climate Change Mitigation'))
+                    if genre == 'Climate Change Adaptation':
+                        sent_dict = sent_cca
+                        sent_labels = []
+                        for key,sent in sent_dict.items():
+                            sent_labels.append(sent)
+                        label_embeddings = model.encode(sent_labels, show_progress_bar=True)
+                        similarity_high_threshold = 0.55
+                        similarity_matrix = cosine_similarity(label_embeddings, document_embeddings)
+                        label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
+                        positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
+                    else:
+                        sent_dict = sent_ccm
+                        sent_labels = []
+                        for key,sent in sent_dict.items():
+                            sent_labels.append(sent)
+                        label_embeddings = model.encode(sent_labels, show_progress_bar=True)
+                        similarity_high_threshold = 0.55
+                        similarity_matrix = cosine_similarity(label_embeddings, document_embeddings)
+                        label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
+                        positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
+                #    sent_labels = []
+                #   for key,sent in sent_dict.items():
+                  #      sent_labels.append(sent)
+                  # label_embeddings = model.encode(sent_labels, show_progress_bar=True)
+                    #similarity_high_threshold = 0.55
+                  # similarity_matrix = cosine_similarity(label_embeddings, document_embeddings)
+                    #label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
+                    #positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
+                    document = docx.Document()
+                    document.add_heading('Document name:{}'.format(file_name), 2)
+                    section = document.sections[0]
+                      # Calling the footer
+                    footer = section.footer
+                    # Calling the paragraph already present in
+                    # the footer section
+                    footer_para = footer.paragraphs[0]
+                    font_styles = document.styles
+                    font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
+                    font_object = font_charstyle.font
+                    font_object.size = Pt(7)
+                    # Adding the centered zoned footer
+                    footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces:                        https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
+                    document.add_paragraph("Country Code for which NDC is carried out {}".format(countryCode))
+                    for _label_idx, _paragraph_idx in positive_indices:
+                        st.write("This paragraph: \n")
+                        document.add_paragraph("This paragraph: \n")
+                        st.write(paraList[_paragraph_idx])
+                        st.write(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
+                        document.add_paragraph(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
+                        st.write('-'*10)
+                        document.add_paragraph('-'*10)
+                    document.save('demo.docx')
+                    with open("demo.docx", "rb") as file:
+                                btn = st.download_button(
+                                label="Download file",
+                                data=file,
+                                file_name="demo.docx",
+                                mime="txt/docx"
+                                  )