Spaces:
Runtime error
Runtime error
Commit
·
54f24e6
1
Parent(s):
2b68ec8
Update app.py
Browse files
app.py
CHANGED
@@ -80,7 +80,7 @@ class Comments_Data_Module(pl.LightningDataModule):
|
|
80 |
self.max_token_length = max_token_length
|
81 |
self.tokenizer = AutoTokenizer.from_pre
|
82 |
|
83 |
-
|
84 |
if stage in (None, "fit"):
|
85 |
self.train_dataset = Comments_Dataset(self.train_path, attributes=self.attributes, tokenizer=self.tokenizer)
|
86 |
self.val_dataset = Comments_Dataset(self.val_path, attributes=self.attributes, tokenizer=self.tokenizer, sample=None)
|
@@ -148,7 +148,6 @@ class Comment_Classifier(pl.LightningModule):
|
|
148 |
warmup_steps = math.floor(total_steps * self.config['warmup'])
|
149 |
scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
|
150 |
return [optimizer],[scheduler]
|
151 |
-
|
152 |
|
153 |
config = {
|
154 |
'model_name': 'distilroberta-base',
|
@@ -161,6 +160,7 @@ config = {
|
|
161 |
'n_epochs': 100
|
162 |
}
|
163 |
|
|
|
164 |
model_name = 'distilroberta-base'
|
165 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
166 |
|
@@ -168,14 +168,9 @@ model = Comment_Classifier(config=config)
|
|
168 |
model.load_state_dict(torch.load("model_state_dict.pt"))
|
169 |
model.eval()
|
170 |
|
171 |
-
|
172 |
-
|
173 |
def prepare_tokenized_review(raw_review):
|
174 |
-
# Remove HTML tags with BS
|
175 |
review_text = BeautifulSoup(raw_review).get_text()
|
176 |
-
# Removing non-letters using a regular expression
|
177 |
review_text = re.sub("[^a-zA-Z!?]"," ", review_text)
|
178 |
-
# Convert words to lower case and split them
|
179 |
words = review_text.lower().split()
|
180 |
|
181 |
return " ".join(words)
|
@@ -201,23 +196,16 @@ def run_inference(encoding):
|
|
201 |
final_output = torch.softmax(output[1][0],dim=0).cpu()
|
202 |
print(final_output.numpy().tolist())
|
203 |
return final_output.numpy().tolist()
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
test_tweets = test_df["comment_text"].values
|
208 |
-
#streamlit section
|
209 |
models = ["distilroberta-base"]
|
210 |
model_pointers = ["default: distilroberta-base"]
|
211 |
|
212 |
-
# current_random_tweet = test_tweets[random.randint(0,len(test_tweets))]
|
213 |
-
# current_random_tweet = prepare_tokenized_review(current_random_tweet)
|
214 |
st.write("1. Hit the button to view and see the analyis of a random tweet")
|
215 |
|
216 |
with st.form(key="init_form"):
|
217 |
current_random_tweet = test_tweets[random.randint(0,len(test_tweets))]
|
218 |
current_random_tweet = prepare_tokenized_review(current_random_tweet)
|
219 |
-
|
220 |
-
|
221 |
|
222 |
choice = st.selectbox("Choose Model", model_pointers)
|
223 |
|
@@ -230,8 +218,9 @@ with st.form(key="init_form"):
|
|
230 |
df["Sentiment Score"] = max(result)
|
231 |
st.table(df)
|
232 |
|
|
|
233 |
next_tweet = st.form_submit_button("Next Tweet")
|
234 |
|
235 |
if next_tweet:
|
236 |
with st.spinner("Analyzing..."):
|
237 |
-
st.write("")
|
|
|
80 |
self.max_token_length = max_token_length
|
81 |
self.tokenizer = AutoTokenizer.from_pre
|
82 |
|
83 |
+
def setup(self, stage = None):
|
84 |
if stage in (None, "fit"):
|
85 |
self.train_dataset = Comments_Dataset(self.train_path, attributes=self.attributes, tokenizer=self.tokenizer)
|
86 |
self.val_dataset = Comments_Dataset(self.val_path, attributes=self.attributes, tokenizer=self.tokenizer, sample=None)
|
|
|
148 |
warmup_steps = math.floor(total_steps * self.config['warmup'])
|
149 |
scheduler = get_cosine_schedule_with_warmup(optimizer, warmup_steps, total_steps)
|
150 |
return [optimizer],[scheduler]
|
|
|
151 |
|
152 |
config = {
|
153 |
'model_name': 'distilroberta-base',
|
|
|
160 |
'n_epochs': 100
|
161 |
}
|
162 |
|
163 |
+
##tokenizer
|
164 |
model_name = 'distilroberta-base'
|
165 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
166 |
|
|
|
168 |
model.load_state_dict(torch.load("model_state_dict.pt"))
|
169 |
model.eval()
|
170 |
|
|
|
|
|
171 |
def prepare_tokenized_review(raw_review):
|
|
|
172 |
review_text = BeautifulSoup(raw_review).get_text()
|
|
|
173 |
review_text = re.sub("[^a-zA-Z!?]"," ", review_text)
|
|
|
174 |
words = review_text.lower().split()
|
175 |
|
176 |
return " ".join(words)
|
|
|
196 |
final_output = torch.softmax(output[1][0],dim=0).cpu()
|
197 |
print(final_output.numpy().tolist())
|
198 |
return final_output.numpy().tolist()
|
199 |
+
|
|
|
|
|
200 |
test_tweets = test_df["comment_text"].values
|
|
|
201 |
models = ["distilroberta-base"]
|
202 |
model_pointers = ["default: distilroberta-base"]
|
203 |
|
|
|
|
|
204 |
st.write("1. Hit the button to view and see the analyis of a random tweet")
|
205 |
|
206 |
with st.form(key="init_form"):
|
207 |
current_random_tweet = test_tweets[random.randint(0,len(test_tweets))]
|
208 |
current_random_tweet = prepare_tokenized_review(current_random_tweet)
|
|
|
|
|
209 |
|
210 |
choice = st.selectbox("Choose Model", model_pointers)
|
211 |
|
|
|
218 |
df["Sentiment Score"] = max(result)
|
219 |
st.table(df)
|
220 |
|
221 |
+
|
222 |
next_tweet = st.form_submit_button("Next Tweet")
|
223 |
|
224 |
if next_tweet:
|
225 |
with st.spinner("Analyzing..."):
|
226 |
+
st.write("")
|