NarayanaMayya commited on
Commit
d3acf2f
β€’
1 Parent(s): 86396a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -90
app.py CHANGED
@@ -2,12 +2,8 @@ import requests
2
  import streamlit as st
3
  import PyPDF2
4
  import torch
5
- import joblib
6
- import tensorflow as tf
7
  from transformers import AutoTokenizer, LEDForConditionalGeneration
8
- from tensorflow.keras.models import load_model
9
- from transformers import TFBertForSequenceClassification, BertTokenizer
10
- st.set_page_config(page_title="Summarization&tweet_analysis", page_icon="πŸ“ˆ",layout="wide")
11
  hide_streamlit_style = """
12
  <style>
13
  #MainMenu {visibility: hidden;}
@@ -63,7 +59,7 @@ def add_bg_from_local(image_file):
63
  #add_bg_from_local(background_image)
64
 
65
  #@st.cache
66
- st.header('Summarization & tweet_analysis')
67
  def convert_df(df):
68
  # IMPORTANT: Cache the conversion to prevent computation on every rerun
69
  return df.to_csv(index=False).encode('utf-8')
@@ -73,9 +69,9 @@ result_csv_batch_sql = result_csv_batch_fail=result_csv_batch=result_csv4=result
73
  with col1:
74
  models = st.selectbox(
75
  'Select the option',
76
- ('summarization_model1','tweet_analysis' ))
77
  #try:
78
- if models == 'summarization_model1':
79
  st.markdown("")
80
  else:
81
  st.markdown("")
@@ -92,92 +88,26 @@ with col1:
92
  st.markdown(hide_label, unsafe_allow_html=True)
93
  submitted = st.form_submit_button("Submit")
94
  if submitted:
95
- if models == 'summarization_model1':
96
- #torch.cuda.set_device(2)
97
- tokenizer = AutoTokenizer.from_pretrained('allenai/PRIMERA-multinews')
98
- model = LEDForConditionalGeneration.from_pretrained('allenai/PRIMERA-multinews')
99
- #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # get the device
100
- device = "cpu"
101
- model.to(device) # move the model to the device
102
- documents = text_data
103
-
104
- # Tokenize and encode the documents
105
- inputs = tokenizer(documents, return_tensors='pt', padding=True, truncation=True,max_length=1000000)
106
-
107
- # Move the inputs to the device
108
- inputs = inputs.to(device)
109
-
110
- # Generate summaries
111
- outputs = model.generate(**inputs,max_length=1000000)
112
-
113
- # Decode the summaries
114
- st.write(tokenizer.batch_decode(outputs, skip_special_tokens=True))
115
- st.success('Prediction done successfully!', icon="βœ…")
116
- else:
117
- # Define the custom objects (custom layers) needed for loading the model
118
- custom_objects = {"TFBertForSequenceClassification": TFBertForSequenceClassification}
119
-
120
- # Load the best model checkpoint
121
- best_model = load_model('best_model_checkpoint_val_acc_0.8697_epoch_03.h5', custom_objects=custom_objects)
122
 
123
- # Assuming you already have the test set DataFrame (df_test) and tokenizer
124
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
125
- test_encodings = tokenizer(text_data, padding=True, truncation=True, return_tensors='tf')
126
- test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings)))
127
 
128
- # Make predictions on the test set using the loaded model
129
- predictions_probabilities = best_model.predict(test_dataset.batch(8))
130
 
131
- # Convert probabilities to one-hot encoded predictions
132
- predictions_onehot = np.eye(9)[np.argmax(predictions_probabilities, axis=1)]
133
 
134
- # Display or save the DataFrame with predicted labels
135
- index_arg = np.argmax(predictions_probabilities, axis=1)
136
- # Later, you can load the LabelEncoder
137
- label_encoder = joblib.load('label_encoder.joblib')
138
- result_label = label_encoder.inverse_transform(index_arg)
139
-
140
- # Display or save the DataFrame with predicted labels
141
- st.write("Item name: ", result_label[0])
142
-
143
- from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
144
- from scipy.special import softmax
145
-
146
- MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
147
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
148
- config = AutoConfig.from_pretrained(MODEL)
149
- # PT
150
- model = AutoModelForSequenceClassification.from_pretrained(MODEL)
151
- #model.save_pretrained(MODEL)
152
- #text = "Covid cases are increasing fast!"
153
- pred_label = []
154
- pred_scor = []
155
- def preprocess(text):
156
- new_text = []
157
- for t in text.split(" "):
158
- t = '@user' if t.startswith('@') and len(t) > 1 else t
159
- t = 'http' if t.startswith('http') else t
160
- new_text.append(t)
161
- return " ".join(new_text)
162
- def predict_pret(text):
163
- #print(text)
164
- text = preprocess(text)
165
- encoded_input = tokenizer(text, return_tensors='pt')
166
- output = model(**encoded_input)
167
- scores = output[0][0].detach().numpy()
168
- scores = softmax(scores)
169
-
170
- ranking = np.argsort(scores)
171
- ranking = ranking[::-1]
172
- l = config.id2label[ranking[0]]
173
- s = scores[ranking[0]]
174
- return l,s
175
-
176
- l,s = predict_pret(text_data)
177
-
178
- st.write("Sentiment is: ", l)
179
-
180
- st.success('Prediction done successfully!', icon="βœ…")
181
  _='''
182
  except Exception as e:
183
  if 'NoneType' or 'not defined' in str(e):
 
2
  import streamlit as st
3
  import PyPDF2
4
  import torch
 
 
5
  from transformers import AutoTokenizer, LEDForConditionalGeneration
6
+ st.set_page_config(page_title="Summarization", page_icon="πŸ“ˆ",layout="wide")
 
 
7
  hide_streamlit_style = """
8
  <style>
9
  #MainMenu {visibility: hidden;}
 
59
  #add_bg_from_local(background_image)
60
 
61
  #@st.cache
62
+ st.header('Summarization')
63
  def convert_df(df):
64
  # IMPORTANT: Cache the conversion to prevent computation on every rerun
65
  return df.to_csv(index=False).encode('utf-8')
 
69
  with col1:
70
  models = st.selectbox(
71
  'Select the option',
72
+ ('model1', ))
73
  #try:
74
+ if models == 'model1':
75
  st.markdown("")
76
  else:
77
  st.markdown("")
 
88
  st.markdown(hide_label, unsafe_allow_html=True)
89
  submitted = st.form_submit_button("Submit")
90
  if submitted:
91
+ #torch.cuda.set_device(2)
92
+ tokenizer = AutoTokenizer.from_pretrained('allenai/PRIMERA-multinews')
93
+ model = LEDForConditionalGeneration.from_pretrained('allenai/PRIMERA-multinews')
94
+ #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # get the device
95
+ device = "cpu"
96
+ model.to(device) # move the model to the device
97
+ documents = text_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ # Tokenize and encode the documents
100
+ inputs = tokenizer(documents, return_tensors='pt', padding=True, truncation=True,max_length=1000000)
 
 
101
 
102
+ # Move the inputs to the device
103
+ inputs = inputs.to(device)
104
 
105
+ # Generate summaries
106
+ outputs = model.generate(**inputs,max_length=1000000)
107
 
108
+ # Decode the summaries
109
+ st.write(tokenizer.batch_decode(outputs, skip_special_tokens=True))
110
+ st.success('Prediction done successfully!', icon="βœ…")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  _='''
112
  except Exception as e:
113
  if 'NoneType' or 'not defined' in str(e):