Spaces:

rifkat
/

Uz-NER

Runtime error

App Files Files Community

samariddin commited on Dec 26, 2022

Commit

defebef

1 Parent(s): 42dc02f

added

Browse files

Files changed (9) hide show

.idea/.gitignore +3 -0
.idea/Uz-NER.iml +10 -0
.idea/inspectionProfiles/Project_Default.xml +16 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
app.py +571 -0
requirements.txt +16 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/Uz-NER.iml ADDED Viewed

	@@ -0,0 +1,10 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.8 (Uz-NER)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,16 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="3">
+            <item index="0" class="java.lang.String" itemvalue="onnxruntime-gpu" />
+            <item index="1" class="java.lang.String" itemvalue="opencv-python" />
+            <item index="2" class="java.lang.String" itemvalue="imread-from-url" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (Uz-NER)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Uz-NER.iml" filepath="$PROJECT_DIR$/.idea/Uz-NER.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

app.py ADDED Viewed

	@@ -0,0 +1,571 @@

+import requests
+import streamlit as st
+import wikipedia
+from wikipedia import WikipediaPage
+import pandas as pd
+import spacy
+import unicodedata
+from nltk.corpus import stopwords
+import numpy as np
+import nltk
+from newspaper import Article
+nltk.download('stopwords')
+from string import punctuation
+import json
+import time
+from datetime import datetime, timedelta
+import urllib
+from io import BytesIO
+from PIL import Image, UnidentifiedImageError
+from SPARQLWrapper import SPARQLWrapper, JSON, N3
+from fuzzywuzzy import process, fuzz
+from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode
+from transformers import pipeline
+import en_core_web_lg
+sparql = SPARQLWrapper('https://dbpedia.org/sparql')
+class ExtractArticleEntities:
+    """ Extract article entities from a document using natural language processing (NLP) and fuzzy matching.
+Parameters
+- text: a string or the text of a news article to be parsed
+Usage:
+import ExtractArticleEntities
+instantiate with text parameter ie. entities = ExtractArticleEntities(text)
+retrieve Who, What, When, Where entities with entities.www_json
+Non-organised entities with entiities.json
+"""
+    def __init__(self, text):
+        self.text = text  # preprocess text at initialisation
+        self.text = self.preprocessing(self.text)
+        print(self.text)
+        print('_____text_____')
+        self.json = {}
+        # Create empty dataframe to hold entity data for ease of processing
+        self.entity_df = pd.DataFrame(columns=["entity", "description"])
+        # Load the spacy model
+        # self.nlp = en_core_web_lg.load()
+        self.nlp = pipeline(model="51la5/roberta-large-NER")
+        # Parse the text
+        self.entity_df = self.get_who_what_where_when()
+        # Disambiguate entities
+        self.entity_df = self.fuzzy_disambiguation()
+        self.get_related_entity()
+        self.get_popularity()
+        # Create JSON representation of entities
+        self.entity_df = self.entity_df.drop_duplicates(subset=["description"])
+        self.entity_df = self.entity_df.reset_index(drop=True)
+        # ungrouped entity returned as json
+        self.json = self.entity_json()
+        # return json with entities grouped into who, what, where, when keys
+        self.www_json = self.get_wwww_json()
+    # def get_related_entity(self):
+    #     entities = self.entity_df.description
+    #     labels = self.entity_df.entity
+    #     related_entity = []
+    #     for entity, label in zip(entities, labels):
+    #         if label in ('PERSON', 'ORG','GPE','NORP','LOC'):
+    #             related_entity.append(wikipedia.search(entity, 3))
+    #         else:
+    #             related_entity.append([None])
+    #     self.entity_df['Wikipedia Entity'] = related_entity
+    def get_popularity(self):
+        # names = self.entity_df.description
+        # related_names = self.entity_df['Matched Entity']
+        # for name, related_name in zip(names, related_names):
+        #     if related_name:
+        #         related_name.append(name)
+        #         pytrends.build_payload(related_name, timeframe='now 4-d')
+        #         st.dataframe(pytrends.interest_over_time())
+        #         time.sleep(2)
+        master_df = pd.DataFrame()
+        view_list = []
+        for entity in self.entity_df['Matched Entity']:
+            if entity:
+                entity_to_look = entity[0]
+                # print(entity_to_look, '_______')
+                entity_to_look = entity_to_look.replace(' ', '_')
+                print(entity_to_look, '_______')
+                headers = {
+                    'accept': 'application/json',
+                    'User-Agent': 'Foo bar'
+                }
+                now = datetime.now()
+                now_dt = now.strftime(r'%Y%m%d')
+                week_back = now - timedelta(days=7)
+                week_back_dt = week_back.strftime(r'%Y%m%d')
+                resp = requests.get(
+                    f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/all-access/all-agents/{entity_to_look}/daily/{week_back_dt}/{now_dt}',
+                    headers=headers)
+                data = resp.json()
+                # print(data)
+                df = pd.json_normalize(data['items'])
+                view_count = sum(df['views'])
+            else:
+                view_count = 0
+            view_list.append(view_count)
+        self.entity_df['Views'] = view_list
+        for entity in ('PERSON', 'ORG', 'GPE', 'NORP', 'LOC'):
+            related_entity_view_list = []
+            grouped_df = self.entity_df[self.entity_df['entity'] == entity]
+            grouped_df['Matched count'] = grouped_df['fuzzy_match'].apply(len)
+            grouped_df['Wiki count'] = grouped_df['Matched Entity'].apply(len)
+            grouped_df = grouped_df.sort_values(by=['Views', 'Matched count', 'Wiki count'],
+                                                ascending=False).reset_index(drop=True)
+            if not grouped_df.empty:
+                # st.dataframe(grouped_df)
+                master_df = pd.concat([master_df, grouped_df])
+        self.sorted_entity_df = master_df
+        if 'Views' in self.sorted_entity_df:
+            self.sorted_entity_df = self.sorted_entity_df.sort_values(by=['Views'], ascending=False).reset_index(
+                drop=True)
+        # st.dataframe(self.sorted_entity_df)
+        # names = grouped_df['description'][:5].values
+        # print(names, type(names))
+        # if names.any():
+        #     # pytrends.build_payload(names, timeframe='now 1-m')
+        #     st.dataframe(pytrends.get_historical_interest(names,
+        #                                                 year_start=2022, month_start=10, day_start=1,
+        #                                                 hour_start=0,
+        #                                                 year_end=2022, month_end=10, day_end=21,
+        #                                                  hour_end=0, cat=0, geo='', gprop='', sleep=0))
+        # st.dataframe()
+        # time.sleep(2)
+        # st.dataframe(grouped_df)
+    def get_related_entity(self):
+        names = self.entity_df.description
+        entities = self.entity_df.entity
+        self.related_entity = []
+        match_scores = []
+        for name, entity in zip(names, entities):
+            if entity in ('PERSON', 'ORG', 'GPE', 'NORP', 'LOC'):
+                related_names = wikipedia.search(name, 10)
+                self.related_entity.append(related_names)
+                matches = process.extract(name, related_names)
+                match_scores.append([match[0] for match in matches if match[1] >= 90])
+            else:
+                self.related_entity.append([None])
+                match_scores.append([])
+                # Remove nulls
+        self.entity_df['Wikipedia Entity'] = self.related_entity
+        self.entity_df['Matched Entity'] = match_scores
+    def fuzzy_disambiguation(self):
+        # Load the entity data
+        self.entity_df['fuzzy_match'] = ''
+        # Load the entity data
+        person_choices = self.entity_df.loc[self.entity_df['entity'] == 'PERSON']
+        org_choices = self.entity_df.loc[self.entity_df['entity'] == 'ORG']
+        where_choices = self.entity_df.loc[self.entity_df['entity'] == 'GPE']
+        norp_choices = self.entity_df.loc[self.entity_df['entity'] == 'NORP']
+        loc_choices = self.entity_df.loc[self.entity_df['entity'] == 'LOC']
+        date_choices = self.entity_df.loc[self.entity_df['entity'] == 'DATE']
+        def fuzzy_match(row, choices):
+            '''This function disambiguates entities by looking for maximum three matches with a score of 80 or more
+            for each of the entity types. If there is no match, then the function returns None. '''
+            match = process.extract(row["description"], choices["description"], limit=3)
+            match = [m[0] for m in match if m[1] > 80 and m[1] != 100]
+            if len(match) == 0:
+                match = []
+            if match:
+                self.fuzzy_match_dict[row["description"]] = match
+            return match
+        # Apply the fuzzy matching function to the entity dataframe
+        self.fuzzy_match_dict = {}
+        for i, row in self.entity_df.iterrows():
+            if row['entity'] == 'PERSON':
+                self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, person_choices)
+            elif row['entity'] == 'ORG':
+                self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, org_choices)
+            elif row['entity'] == 'GPE':
+                self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, where_choices)
+            elif row['entity'] == 'NORP':
+                self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, norp_choices)
+            elif row['entity'] == 'LOC':
+                self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, loc_choices)
+            elif row['entity'] == 'DATE':
+                self.entity_df.at[i, 'fuzzy_match'] = fuzzy_match(row, date_choices)
+        return self.entity_df
+    def preprocessing(self, text):
+        """This function takes a text string and strips out all punctuation. It then normalizes the string to a
+        normalized form (using the "NFKD" normalization algorithm). Finally, it strips any special characters and
+        converts them to their unicode equivalents. """
+        # remove punctuation
+        text = text.translate(str.maketrans("", "", punctuation))
+        # normalize the text
+        stop_words = stopwords.words('english')
+        # Removing Stop words can cause losing context, instead stopwords can be utilized for knowledge
+        filtered_words = [word for word in self.text.split()]  # if word not in stop_words]
+        # This is very hacky. Need a better way of handling bad encoding
+        pre_text = " ".join(filtered_words)
+        pre_text = pre_text = pre_text.replace('  ', ' ')
+        pre_text = pre_text.replace('â€™', "'")
+        pre_text = pre_text.replace('â€œ', '"')
+        pre_text = pre_text.replace('â€', '"')
+        pre_text = pre_text.replace('â€˜', "'")
+        pre_text = pre_text.replace('â€¦', '...')
+        pre_text = pre_text.replace('â€“', '-')
+        pre_text = pre_text.replace("\x9d", '-')
+        # normalize the text
+        pre_text = unicodedata.normalize("NFKD", pre_text)
+        # strip punctuation again as some remains in first pass
+        pre_text = pre_text.translate(str.maketrans("", "", punctuation))
+        return pre_text
+    def get_who_what_where_when(self):
+        """Get entity information in a document.
+This function will return a DataFrame with the following columns:
+- entity: the entity being queried
+- description: a brief description of the entity
+Usage:
+get_who_what_where_when(text)
+Example:
+> get_who_what_where_when('This is a test')
+PERSON
+ORG
+GPE
+LOC
+PRODUCT
+EVENT
+LAW
+LANGUAGE
+NORP
+DATE
+GPE
+TIME"""
+        # list to hold entity data
+        article_entity_list = []
+        # tokenize the text
+        doc = self.nlp(self.text)
+        # iterate over the entities in the document but only keep those which are meaningful
+        desired_entities = ['PERSON', 'ORG', 'GPE', 'LOC', 'PRODUCT', 'EVENT', 'LAW', 'LANGUAGE', 'NORP', 'DATE', 'GPE',
+                            'TIME']
+        self.label_dict = {}
+        # stop_words = stopwords.words('english')
+        for ent in doc.ents:
+            self.label_dict[ent] = ent.label_
+            if ent.label_ in desired_entities:
+                # add the entity to the list
+                entity_dict = {ent.label_: ent.text}
+                article_entity_list.append(entity_dict)
+        # dedupe the entities but only on exact match of values as occasional it will assign an ORG entity to PER
+        deduplicated_entities = {frozenset(item.values()):
+                                     item for item in article_entity_list}.values()
+        # create a dataframe from the entities
+        for record in deduplicated_entities:
+            record_df = pd.DataFrame(record.items(), columns=["entity", "description"])
+            self.entity_df = pd.concat([self.entity_df, record_df], ignore_index=True)
+        print(self.entity_df)
+        print('______________________')
+        return self.entity_df
+    def entity_json(self):
+        """Returns a JSON representation of an entity defined by the `entity_df` dataframe. The `entity_json` function
+        will return a JSON object with the following fields:
+        - entity: The type of the entity in the text
+        - description: The name of the entity as described in the input text
+        - fuzzy_match: A list of fuzzy matches for the entity. This is useful for disambiguating entities that are similar
+       """
+        self.json = json.loads(self.entity_df.to_json(orient='records'))
+        # self.json = json.dumps(self.json, indent=2)
+        return self.json
+    def get_wwww_json(self):
+        """This function returns a JSON representation of the `get_who_what_where_when` function. The `get_www_json`
+        function will return a JSON object with the following fields:
+        - entity: The type of the entity in the text
+        - description: The name of the entity as described in the input text
+        - fuzzy_match: A list of fuzzy matches for the entity. This is useful for disambiguating entities that are similar
+        """
+        # create a json object from the entity dataframe
+        who_dict = {"who": [ent for ent in self.entity_json() if ent['entity'] in ['ORG', 'PERSON']]}
+        where_dict = {"where": [ent for ent in self.entity_json() if ent['entity'] in ['GPE', 'LOC']]}
+        when_dict = {"when": [ent for ent in self.entity_json() if ent['entity'] in ['DATE', 'TIME']]}
+        what_dict = {
+            "what": [ent for ent in self.entity_json() if ent['entity'] in ['PRODUCT', 'EVENT', 'LAW', 'LANGUAGE',
+                                                                            'NORP']]}
+        article_wwww = [who_dict, where_dict, when_dict, what_dict]
+        self.wwww_json = json.dumps(article_wwww, indent=2)
+        return self.wwww_json
+news_article = st.text_input('Paste an Article here to be parsed')
+if 'parsed' not in st.session_state:
+    st.session_state['parsed'] = None
+    st.session_state['article'] = None
+if news_article:
+    st.write('Your news article is')
+    st.write(news_article)
+    if st.button('Get details'):
+        parsed = ExtractArticleEntities(news_article)
+        if parsed:
+            st.session_state['article'] = parsed.sorted_entity_df
+            st.session_state['parsed'] = True
+            st.session_state['json'] = parsed.www_json
+# if not st.session_state['article'].empty:
+def preprocessing(text):
+    """This function takes a text string and strips out all punctuation. It then normalizes the string to a
+    normalized form (using the "NFKD" normalization algorithm). Finally, it strips any special characters and
+    converts them to their unicode equivalents. """
+    # remove punctuation
+    if text:
+        text = text.translate(str.maketrans("", "", punctuation))
+        # normalize the text
+        stop_words = stopwords.words('english')
+        # Removing Stop words can cause losing context, instead stopwords can be utilized for knowledge
+        filtered_words = [word for word in text.split()]  # if word not in stop_words]
+        # This is very hacky. Need a better way of handling bad encoding
+        pre_text = " ".join(filtered_words)
+        pre_text = pre_text = pre_text.replace('  ', ' ')
+        pre_text = pre_text.replace('â€™', "'")
+        pre_text = pre_text.replace('â€œ', '"')
+        pre_text = pre_text.replace('â€', '"')
+        pre_text = pre_text.replace('â€˜', "'")
+        pre_text = pre_text.replace('â€¦', '...')
+        pre_text = pre_text.replace('â€“', '-')
+        pre_text = pre_text.replace("\x9d", '-')
+        # normalize the text
+        pre_text = unicodedata.normalize("NFKD", pre_text)
+        # strip punctuation again as some remains in first pass
+        pre_text = pre_text.translate(str.maketrans("", "", punctuation))
+    else:
+        pre_text = None
+    return pre_text
+def filter_wiki_df(df):
+    key_list = df.keys()[:2]
+    # df.to_csv('test.csv')
+    df = df[key_list]
+    # if len(df.keys()) == 2:
+    df['Match Check'] = np.where(df[df.keys()[0]] != df[df.keys()[1]], True, False)
+    df = df[df['Match Check'] != False]
+    df = df[key_list]
+    df = df.dropna(how='any').reset_index(drop=True)
+    # filtered_term = []
+    # for terms in df[df.keys()[0]]:
+    #     if isinstance(terms, str):
+    #         filtered_term.append(preprocessing(terms))
+    #     else:
+    #         filtered_term.append(None)
+    # df[df.keys()[0]] = filtered_term
+    df.rename(columns={key_list[0]: 'Attribute', key_list[1]: 'Value'}, inplace=True)
+    return df
+def get_entity_from_selectbox(related_entity):
+    entity = st.selectbox('Please select the term:', related_entity, key='foo')
+    if entity:
+        summary_entity = wikipedia.summary(entity, 3)
+        return summary_entity
+if st.session_state['parsed']:
+    df = st.session_state['article']
+    # left, right = st.columns(2)
+    # with left:
+    df_to_st = pd.DataFrame()
+    df_to_st['Name'] = df['description']
+    df_to_st['Is a type of'] = df['entity']
+    df_to_st['Related to'] = df['Matched Entity']
+    df_to_st['Is a type of'] = df_to_st['Is a type of'].replace({'PERSON': 'Person',
+                                                                 'ORG': 'Organization',
+                                                                 'GPE': 'Political Location',
+                                                                 'NORP': 'Political or Religious Groups',
+                                                                 'LOC': 'Non Political Location'})
+    gb = GridOptionsBuilder.from_dataframe(df_to_st)
+    gb.configure_pagination(paginationAutoPageSize=True)  # Add pagination
+    gb.configure_side_bar()  # Add a sidebar
+    gb.configure_selection('multiple', use_checkbox=True,
+                           groupSelectsChildren="Group checkbox select children")  # Enable multi-row selection
+    gridOptions = gb.build()
+    # st.dataframe(df_to_st)
+    grid_response = AgGrid(
+        df_to_st,
+        gridOptions=gridOptions,
+        data_return_mode='AS_INPUT',
+        update_mode='MODEL_CHANGED',
+        fit_columns_on_grid_load=False,
+        enable_enterprise_modules=True,
+        height=350,
+        width='100%',
+        reload_data=True
+    )
+    data = grid_response['data']
+    selected = grid_response['selected_rows']
+    selected_df = pd.DataFrame(selected)
+    if not selected_df.empty:
+        selected_entity = selected_df[['Name', 'Is a type of', 'Related to']]
+        st.dataframe(selected_entity)
+    # with right:
+    # st.json(st.session_state['json'])
+    entities_list = df['description']
+    # selected_entity = st.selectbox('Which entity you want to choose?',
+    #                     entities_list)
+    if not selected_df.empty and selected_entity['Name'].any():
+        # lookup_url = rf'https://lookup.dbpedia.org/api/search?query={selected_entity}'
+        # r = requests.get(lookup_url)
+        selected_row = df.loc[df['description'] == selected_entity['Name'][0]]
+        entity_value = selected_row.values
+        # st.write('Entity is a ', entity_value[0][0])
+        label, name, fuzzy, related, related_match, _, _, _ = entity_value[0]
+        not_matched = [word for word in related if word not in related_match]
+        fuzzy = fuzzy[0] if len(fuzzy) > 0 else ''
+        related = related[0] if len(related) > 0 else ''
+        not_matched = not_matched[0] if len(not_matched) > 0 else related
+        related_entity_list = [name, fuzzy, not_matched]
+        related_entity = entity_value[0][1:]
+        google_query_term = ' '.join(related_entity_list)
+        # search()
+        try:
+            urls = [i for i in search(google_query_term, stop=10, pause=2.0, tld='com', lang='en', tbs='0',
+                                      user_agent=get_random_user_agent())]
+        except:
+            urls = []
+        # urls = search(google_query_term+' news latest', num_results=10)
+        st.session_state['wiki_summary'] = False
+        all_related_entity = []
+        for el in related_entity[:-2]:
+            if isinstance(el, str):
+                all_related_entity.append(el)
+            elif isinstance(el, int):
+                all_related_entity.append(str(el))
+            else:
+                all_related_entity.extend(el)
+        # [ if type(el) == 'int' all_related_entity.extend(el) else all_related_entity.extend([el])for el in related_entity]
+        for entity in all_related_entity:
+            # try:
+            if True:
+                if entity:
+                    entity = entity.replace(' ', '_')
+                    query = f'''
+                        SELECT ?name ?comment ?image
+                        WHERE {{ dbr:{entity} rdfs:label ?name.
+                                dbr:{entity} rdfs:comment ?comment.
+                                dbr:{entity} dbo:thumbnail ?image.
+                            FILTER (lang(?name) = 'en')
+                            FILTER (lang(?comment) = 'en')
+                        }}'''
+                    sparql.setQuery(query)
+                    sparql.setReturnFormat(JSON)
+                    qres = sparql.query().convert()
+                    if qres['results']['bindings']:
+                        result = qres['results']['bindings'][0]
+                        name, comment, image_url = result['name']['value'], result['comment']['value'], result['image'][
+                            'value']
+                        # urllib.request.urlretrieve(image_url, "img.jpg")
+                        # img = Image.open("/Users/anujkarn/NER/img.jpg")
+                        wiki_url = f'https://en.wikipedia.org/wiki/{entity}'
+                        st.write(name)
+                        # st.image(img)
+                        st.write(image_url)
+                        # try:
+                        response = requests.get(image_url)
+                        try:
+                            related_image = Image.open(BytesIO(response.content))
+                            st.image(related_image)
+                        except UnidentifiedImageError:
+                            st.write('Not able to get image')
+                            pass
+                        # except error as e:
+                        # st.write(f'Image not parsed because of : {e}')
+                        summary_entity = comment
+                        wiki_knowledge_df = pd.read_html(wiki_url)[0]
+                        wiki_knowledge_df = filter_wiki_df(wiki_knowledge_df)
+                        st.write('Showing desciption for entity:', name)
+                        st.dataframe(wiki_knowledge_df)
+                        # if st.button('Want something else?'):
+                        # summary_entity = get_entity_from_selectbox(all_related_entity)
+                        break
+                    # summary_entity = wikipedia.summary(entity, 3)
+                    else:
+                        summary_entity = None
+        if not summary_entity:
+            try:
+                summary_entity = get_entity_from_selectbox(all_related_entity)
+                # page = WikipediaPage(entity)
+            except wikipedia.exceptions.DisambiguationError:
+                st.write('Disambiguation is there for term')
+        if selected_entity['Name'].any():
+            st.write(f'Summary for {selected_entity["Name"][0]}')
+            st.write(summary_entity)

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+https://huggingface.co/spacy/en_core_web_lg/resolve/main/en_core_web_lg-any-py3-none-any.whl
+fastapi==0.88.0
+fuzzywuzzy==0.18.0
+matplotlib==3.3.4
+newspaper3k==0.2.8
+nltk==3.6.1
+numpy==1.19.5
+pandas==1.2.4
+Pillow==9.3.0
+requests==2.25.1
+spacy
+SPARQLWrapper==2.0.0
+streamlit==1.11.1
+wikipedia==1.4.0
+streamlit-aggrid
+transformers==2.5.0