Jainesh212 commited on
Commit
54f24e6
·
1 Parent(s): 2b68ec8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -16
app.py CHANGED
@@ -80,7 +80,7 @@ class Comments_Data_Module(pl.LightningDataModule):
80
  self.max_token_length = max_token_length
81
  self.tokenizer = AutoTokenizer.from_pre
82
 
83
- def setup(self, stage = None):
84
  if stage in (None, "fit"):
85
  self.train_dataset = Comments_Dataset(self.train_path, attributes=self.attributes, tokenizer=self.tokenizer)
86
  self.val_dataset = Comments_Dataset(self.val_path, attributes=self.attributes, tokenizer=self.tokenizer, sample=None)
@@ -148,7 +148,6 @@ class Comment_Classifier(pl.LightningModule):
148
  warmup_steps = math.floor(total_steps * self.config['warmup'])
149
  scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
150
  return [optimizer],[scheduler]
151
-
152
 
153
  config = {
154
  'model_name': 'distilroberta-base',
@@ -161,6 +160,7 @@ config = {
161
  'n_epochs': 100
162
  }
163
 
 
164
  model_name = 'distilroberta-base'
165
  tokenizer = AutoTokenizer.from_pretrained(model_name)
166
 
@@ -168,14 +168,9 @@ model = Comment_Classifier(config=config)
168
  model.load_state_dict(torch.load("model_state_dict.pt"))
169
  model.eval()
170
 
171
-
172
-
173
  def prepare_tokenized_review(raw_review):
174
- # Remove HTML tags with BS
175
  review_text = BeautifulSoup(raw_review).get_text()
176
- # Removing non-letters using a regular expression
177
  review_text = re.sub("[^a-zA-Z!?]"," ", review_text)
178
- # Convert words to lower case and split them
179
  words = review_text.lower().split()
180
 
181
  return " ".join(words)
@@ -201,23 +196,16 @@ def run_inference(encoding):
201
  final_output = torch.softmax(output[1][0],dim=0).cpu()
202
  print(final_output.numpy().tolist())
203
  return final_output.numpy().tolist()
204
-
205
-
206
-
207
  test_tweets = test_df["comment_text"].values
208
- #streamlit section
209
  models = ["distilroberta-base"]
210
  model_pointers = ["default: distilroberta-base"]
211
 
212
- # current_random_tweet = test_tweets[random.randint(0,len(test_tweets))]
213
- # current_random_tweet = prepare_tokenized_review(current_random_tweet)
214
  st.write("1. Hit the button to view and see the analyis of a random tweet")
215
 
216
  with st.form(key="init_form"):
217
  current_random_tweet = test_tweets[random.randint(0,len(test_tweets))]
218
  current_random_tweet = prepare_tokenized_review(current_random_tweet)
219
-
220
-
221
 
222
  choice = st.selectbox("Choose Model", model_pointers)
223
 
@@ -230,8 +218,9 @@ with st.form(key="init_form"):
230
  df["Sentiment Score"] = max(result)
231
  st.table(df)
232
 
 
233
  next_tweet = st.form_submit_button("Next Tweet")
234
 
235
  if next_tweet:
236
  with st.spinner("Analyzing..."):
237
- st.write("")
 
80
  self.max_token_length = max_token_length
81
  self.tokenizer = AutoTokenizer.from_pre
82
 
83
+ def setup(self, stage = None):
84
  if stage in (None, "fit"):
85
  self.train_dataset = Comments_Dataset(self.train_path, attributes=self.attributes, tokenizer=self.tokenizer)
86
  self.val_dataset = Comments_Dataset(self.val_path, attributes=self.attributes, tokenizer=self.tokenizer, sample=None)
 
148
  warmup_steps = math.floor(total_steps * self.config['warmup'])
149
  scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
150
  return [optimizer],[scheduler]
 
151
 
152
  config = {
153
  'model_name': 'distilroberta-base',
 
160
  'n_epochs': 100
161
  }
162
 
163
+ ##tokenizer
164
  model_name = 'distilroberta-base'
165
  tokenizer = AutoTokenizer.from_pretrained(model_name)
166
 
 
168
  model.load_state_dict(torch.load("model_state_dict.pt"))
169
  model.eval()
170
 
 
 
171
  def prepare_tokenized_review(raw_review):
 
172
  review_text = BeautifulSoup(raw_review).get_text()
 
173
  review_text = re.sub("[^a-zA-Z!?]"," ", review_text)
 
174
  words = review_text.lower().split()
175
 
176
  return " ".join(words)
 
196
  final_output = torch.softmax(output[1][0],dim=0).cpu()
197
  print(final_output.numpy().tolist())
198
  return final_output.numpy().tolist()
199
+
 
 
200
  test_tweets = test_df["comment_text"].values
 
201
  models = ["distilroberta-base"]
202
  model_pointers = ["default: distilroberta-base"]
203
 
 
 
204
  st.write("1. Hit the button to view and see the analyis of a random tweet")
205
 
206
  with st.form(key="init_form"):
207
  current_random_tweet = test_tweets[random.randint(0,len(test_tweets))]
208
  current_random_tweet = prepare_tokenized_review(current_random_tweet)
 
 
209
 
210
  choice = st.selectbox("Choose Model", model_pointers)
211
 
 
218
  df["Sentiment Score"] = max(result)
219
  st.table(df)
220
 
221
+
222
  next_tweet = st.form_submit_button("Next Tweet")
223
 
224
  if next_tweet:
225
  with st.spinner("Analyzing..."):
226
+ st.write("")