shrut27 commited on
Commit
2083211
1 Parent(s): c4426e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -8,28 +8,28 @@ import pandas as pd
8
  # Loading spaCy model outside the streamlit cache
9
  nlp = spacy.load("en_core_web_sm")
10
 
11
- @st.cache(allow_output_mutation=True)
12
  def load_environmental_model():
13
  name_env = "ESGBERT/EnvironmentalBERT-environmental"
14
  tokenizer_env = AutoTokenizer.from_pretrained(name_env)
15
  model_env = AutoModelForSequenceClassification.from_pretrained(name_env)
16
  return pipeline("text-classification", model=model_env, tokenizer=tokenizer_env)
17
 
18
- @st.cache(allow_output_mutation=True)
19
  def load_social_model():
20
  name_soc = "ESGBERT/SocialBERT-social"
21
  tokenizer_soc = AutoTokenizer.from_pretrained(name_soc)
22
  model_soc = AutoModelForSequenceClassification.from_pretrained(name_soc)
23
  return pipeline("text-classification", model=model_soc, tokenizer=tokenizer_soc)
24
 
25
- @st.cache(allow_output_mutation=True)
26
  def load_governance_model():
27
  name_gov = "ESGBERT/GovernanceBERT-governance"
28
  tokenizer_gov = AutoTokenizer.from_pretrained(name_gov)
29
  model_gov = AutoModelForSequenceClassification.from_pretrained(name_gov)
30
  return pipeline("text-classification", model=model_gov, tokenizer=tokenizer_gov)
31
 
32
- @st.cache(allow_output_mutation=True)
33
  def load_sentiment_model():
34
  model_name = "climatebert/distilroberta-base-climate-sentiment"
35
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
@@ -37,12 +37,14 @@ def load_sentiment_model():
37
  return pipeline("text-classification", model=model, tokenizer=tokenizer)
38
 
39
  # Streamlit App
40
- st.title("ESGBERT Text Classification App")
41
 
42
  # Get report URL from user input
43
  url = st.text_input("Enter the URL of the report (PDF):")
44
 
45
  # Model selection dropdown
 
 
46
  selected_model = st.selectbox("Select Model", ["Environmental Model", "Social Model", "Governance Model", "Sentiment Model"])
47
 
48
  if url:
@@ -52,18 +54,15 @@ if url:
52
  if response.status_code == 200:
53
  # Parse PDF and extract text
54
  raw_text = parser.from_buffer(response.content)['content']
55
-
56
  # Extract sentences using spaCy
57
  doc = nlp(raw_text)
58
  sentences = [sent.text for sent in doc.sents]
59
-
60
  # Filtering and preprocessing sentences
61
  sequences = list(map(str, sentences))
62
  sentences = [x.replace("\n", "") for x in sequences]
63
  sentences = [x for x in sentences if x != ""]
64
  sentences = [x for x in sentences if x[0].isupper()]
65
- sub_sentences = sentences[:100] # Takes around 20 seconds
66
-
67
  # Classification using different models based on user selection
68
  if selected_model == "Environmental Model":
69
  pipe_model = load_environmental_model()
 
8
  # Loading spaCy model outside the streamlit cache
9
  nlp = spacy.load("en_core_web_sm")
10
 
11
+ @st.cache_resource()
12
  def load_environmental_model():
13
  name_env = "ESGBERT/EnvironmentalBERT-environmental"
14
  tokenizer_env = AutoTokenizer.from_pretrained(name_env)
15
  model_env = AutoModelForSequenceClassification.from_pretrained(name_env)
16
  return pipeline("text-classification", model=model_env, tokenizer=tokenizer_env)
17
 
18
+ @st.cache_resource()
19
  def load_social_model():
20
  name_soc = "ESGBERT/SocialBERT-social"
21
  tokenizer_soc = AutoTokenizer.from_pretrained(name_soc)
22
  model_soc = AutoModelForSequenceClassification.from_pretrained(name_soc)
23
  return pipeline("text-classification", model=model_soc, tokenizer=tokenizer_soc)
24
 
25
+ @st.cache_resource()
26
  def load_governance_model():
27
  name_gov = "ESGBERT/GovernanceBERT-governance"
28
  tokenizer_gov = AutoTokenizer.from_pretrained(name_gov)
29
  model_gov = AutoModelForSequenceClassification.from_pretrained(name_gov)
30
  return pipeline("text-classification", model=model_gov, tokenizer=tokenizer_gov)
31
 
32
+ @st.cache_resource()
33
  def load_sentiment_model():
34
  model_name = "climatebert/distilroberta-base-climate-sentiment"
35
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
 
37
  return pipeline("text-classification", model=model, tokenizer=tokenizer)
38
 
39
  # Streamlit App
40
+ st.title("ESG Report Classification using Natural Language Processing")
41
 
42
  # Get report URL from user input
43
  url = st.text_input("Enter the URL of the report (PDF):")
44
 
45
  # Model selection dropdown
46
+ st.write("Environmental Model, Social Model, Governance Model would give the percentage denoting the parameter chosen.")
47
+ st.write("Sentiment Model shows if the company is a risk or opportunity based on all 3 parameters.")
48
  selected_model = st.selectbox("Select Model", ["Environmental Model", "Social Model", "Governance Model", "Sentiment Model"])
49
 
50
  if url:
 
54
  if response.status_code == 200:
55
  # Parse PDF and extract text
56
  raw_text = parser.from_buffer(response.content)['content']
 
57
  # Extract sentences using spaCy
58
  doc = nlp(raw_text)
59
  sentences = [sent.text for sent in doc.sents]
 
60
  # Filtering and preprocessing sentences
61
  sequences = list(map(str, sentences))
62
  sentences = [x.replace("\n", "") for x in sequences]
63
  sentences = [x for x in sentences if x != ""]
64
  sentences = [x for x in sentences if x[0].isupper()]
65
+ sub_sentences = sentences[:100]
 
66
  # Classification using different models based on user selection
67
  if selected_model == "Environmental Model":
68
  pipe_model = load_environmental_model()