NarayanaMayya
commited on
Commit
β’
d3acf2f
1
Parent(s):
86396a5
Update app.py
Browse files
app.py
CHANGED
@@ -2,12 +2,8 @@ import requests
|
|
2 |
import streamlit as st
|
3 |
import PyPDF2
|
4 |
import torch
|
5 |
-
import joblib
|
6 |
-
import tensorflow as tf
|
7 |
from transformers import AutoTokenizer, LEDForConditionalGeneration
|
8 |
-
|
9 |
-
from transformers import TFBertForSequenceClassification, BertTokenizer
|
10 |
-
st.set_page_config(page_title="Summarization&tweet_analysis", page_icon="π",layout="wide")
|
11 |
hide_streamlit_style = """
|
12 |
<style>
|
13 |
#MainMenu {visibility: hidden;}
|
@@ -63,7 +59,7 @@ def add_bg_from_local(image_file):
|
|
63 |
#add_bg_from_local(background_image)
|
64 |
|
65 |
#@st.cache
|
66 |
-
st.header('Summarization
|
67 |
def convert_df(df):
|
68 |
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
69 |
return df.to_csv(index=False).encode('utf-8')
|
@@ -73,9 +69,9 @@ result_csv_batch_sql = result_csv_batch_fail=result_csv_batch=result_csv4=result
|
|
73 |
with col1:
|
74 |
models = st.selectbox(
|
75 |
'Select the option',
|
76 |
-
('
|
77 |
#try:
|
78 |
-
if models == '
|
79 |
st.markdown("")
|
80 |
else:
|
81 |
st.markdown("")
|
@@ -92,92 +88,26 @@ with col1:
|
|
92 |
st.markdown(hide_label, unsafe_allow_html=True)
|
93 |
submitted = st.form_submit_button("Submit")
|
94 |
if submitted:
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
documents = text_data
|
103 |
-
|
104 |
-
# Tokenize and encode the documents
|
105 |
-
inputs = tokenizer(documents, return_tensors='pt', padding=True, truncation=True,max_length=1000000)
|
106 |
-
|
107 |
-
# Move the inputs to the device
|
108 |
-
inputs = inputs.to(device)
|
109 |
-
|
110 |
-
# Generate summaries
|
111 |
-
outputs = model.generate(**inputs,max_length=1000000)
|
112 |
-
|
113 |
-
# Decode the summaries
|
114 |
-
st.write(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
115 |
-
st.success('Prediction done successfully!', icon="β
")
|
116 |
-
else:
|
117 |
-
# Define the custom objects (custom layers) needed for loading the model
|
118 |
-
custom_objects = {"TFBertForSequenceClassification": TFBertForSequenceClassification}
|
119 |
-
|
120 |
-
# Load the best model checkpoint
|
121 |
-
best_model = load_model('best_model_checkpoint_val_acc_0.8697_epoch_03.h5', custom_objects=custom_objects)
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
test_encodings = tokenizer(text_data, padding=True, truncation=True, return_tensors='tf')
|
126 |
-
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings)))
|
127 |
|
128 |
-
|
129 |
-
|
130 |
|
131 |
-
|
132 |
-
|
133 |
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
label_encoder = joblib.load('label_encoder.joblib')
|
138 |
-
result_label = label_encoder.inverse_transform(index_arg)
|
139 |
-
|
140 |
-
# Display or save the DataFrame with predicted labels
|
141 |
-
st.write("Item name: ", result_label[0])
|
142 |
-
|
143 |
-
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
|
144 |
-
from scipy.special import softmax
|
145 |
-
|
146 |
-
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
|
147 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
148 |
-
config = AutoConfig.from_pretrained(MODEL)
|
149 |
-
# PT
|
150 |
-
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
|
151 |
-
#model.save_pretrained(MODEL)
|
152 |
-
#text = "Covid cases are increasing fast!"
|
153 |
-
pred_label = []
|
154 |
-
pred_scor = []
|
155 |
-
def preprocess(text):
|
156 |
-
new_text = []
|
157 |
-
for t in text.split(" "):
|
158 |
-
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
159 |
-
t = 'http' if t.startswith('http') else t
|
160 |
-
new_text.append(t)
|
161 |
-
return " ".join(new_text)
|
162 |
-
def predict_pret(text):
|
163 |
-
#print(text)
|
164 |
-
text = preprocess(text)
|
165 |
-
encoded_input = tokenizer(text, return_tensors='pt')
|
166 |
-
output = model(**encoded_input)
|
167 |
-
scores = output[0][0].detach().numpy()
|
168 |
-
scores = softmax(scores)
|
169 |
-
|
170 |
-
ranking = np.argsort(scores)
|
171 |
-
ranking = ranking[::-1]
|
172 |
-
l = config.id2label[ranking[0]]
|
173 |
-
s = scores[ranking[0]]
|
174 |
-
return l,s
|
175 |
-
|
176 |
-
l,s = predict_pret(text_data)
|
177 |
-
|
178 |
-
st.write("Sentiment is: ", l)
|
179 |
-
|
180 |
-
st.success('Prediction done successfully!', icon="β
")
|
181 |
_='''
|
182 |
except Exception as e:
|
183 |
if 'NoneType' or 'not defined' in str(e):
|
|
|
2 |
import streamlit as st
|
3 |
import PyPDF2
|
4 |
import torch
|
|
|
|
|
5 |
from transformers import AutoTokenizer, LEDForConditionalGeneration
|
6 |
+
st.set_page_config(page_title="Summarization", page_icon="π",layout="wide")
|
|
|
|
|
7 |
hide_streamlit_style = """
|
8 |
<style>
|
9 |
#MainMenu {visibility: hidden;}
|
|
|
59 |
#add_bg_from_local(background_image)
|
60 |
|
61 |
#@st.cache
|
62 |
+
st.header('Summarization')
|
63 |
def convert_df(df):
|
64 |
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
65 |
return df.to_csv(index=False).encode('utf-8')
|
|
|
69 |
with col1:
|
70 |
models = st.selectbox(
|
71 |
'Select the option',
|
72 |
+
('model1', ))
|
73 |
#try:
|
74 |
+
if models == 'model1':
|
75 |
st.markdown("")
|
76 |
else:
|
77 |
st.markdown("")
|
|
|
88 |
st.markdown(hide_label, unsafe_allow_html=True)
|
89 |
submitted = st.form_submit_button("Submit")
|
90 |
if submitted:
|
91 |
+
#torch.cuda.set_device(2)
|
92 |
+
tokenizer = AutoTokenizer.from_pretrained('allenai/PRIMERA-multinews')
|
93 |
+
model = LEDForConditionalGeneration.from_pretrained('allenai/PRIMERA-multinews')
|
94 |
+
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # get the device
|
95 |
+
device = "cpu"
|
96 |
+
model.to(device) # move the model to the device
|
97 |
+
documents = text_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
+
# Tokenize and encode the documents
|
100 |
+
inputs = tokenizer(documents, return_tensors='pt', padding=True, truncation=True,max_length=1000000)
|
|
|
|
|
101 |
|
102 |
+
# Move the inputs to the device
|
103 |
+
inputs = inputs.to(device)
|
104 |
|
105 |
+
# Generate summaries
|
106 |
+
outputs = model.generate(**inputs,max_length=1000000)
|
107 |
|
108 |
+
# Decode the summaries
|
109 |
+
st.write(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
110 |
+
st.success('Prediction done successfully!', icon="β
")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
_='''
|
112 |
except Exception as e:
|
113 |
if 'NoneType' or 'not defined' in str(e):
|