ruslanruslanruslan commited on
Commit
66e9d7c
1 Parent(s): 0f494d0

models added

Browse files
Files changed (4) hide show
  1. logreg.pkl +3 -0
  2. pages/Film reviews classifier.py +37 -1
  3. requirements.txt +1 -0
  4. tf.pkl +3 -0
logreg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bd6d0e129d3a2e6bdc40393eaa602ea599e032c0686a973ef137dc138be805b
3
+ size 44433
pages/Film reviews classifier.py CHANGED
@@ -11,14 +11,40 @@ import nltk
11
  import numpy as np
12
  import torch.nn as nn
13
  import transformers
 
 
14
  nltk.download('wordnet')
15
  nltk.download('stopwords')
16
  from collections import Counter
17
  from nltk.corpus import stopwords
18
  from nltk.stem import WordNetLemmatizer
 
 
 
19
 
20
  stop_words = set(stopwords.words('english'))
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def preprocess_single_string(input_string: str, seq_len: int, vocab_to_int: dict):
23
  preprocessed_string = data_preprocessing(input_string)
24
  result_list = []
@@ -124,6 +150,14 @@ model_lstm.load_state_dict(torch.load('lstm_model_weights.pt', map_location=torc
124
  model_lstm.to('cpu').eval()
125
 
126
 
 
 
 
 
 
 
 
 
127
 
128
  def predict_sentence_lstm(text: str):
129
  start_time = time.time()
@@ -150,14 +184,16 @@ def predict_sentence_bert(text: str):
150
  reses = {0: 'negative', 1: 'positive'}
151
 
152
  def process_text(input_text):
 
153
  res_lstm, time_lstm = predict_sentence_lstm(input_text)
154
  res_bert, time_bert = predict_sentence_bert(input_text)
155
  st.write('Results:')
 
156
  st.write(f'LSTM: {reses[res_lstm]}, execution time: {time_lstm:.2f} seconds.')
157
  st.write(f'Upgraded Bert: {reses[res_bert]}, execution time: {time_bert:.2f} seconds.')
158
 
159
  st.title('Film reviews classifier')
160
- st.write('Write a film review in a box below, and the application, powered by two NLP models (LSTM and upgraded Bert), will tell if it is a positive or a negative review.')
161
 
162
  user_input = st.text_area("Enter your text:")
163
  if st.button("Send a review for processing"):
 
11
  import numpy as np
12
  import torch.nn as nn
13
  import transformers
14
+ import lightgbm as lgb
15
+ import pickle
16
  nltk.download('wordnet')
17
  nltk.download('stopwords')
18
  from collections import Counter
19
  from nltk.corpus import stopwords
20
  from nltk.stem import WordNetLemmatizer
21
+ from nltk.tokenize import RegexpTokenizer
22
+ from sklearn.feature_extraction.text import TfidfVectorizer
23
+ from sklearn.linear_model import LogisticRegression
24
 
25
  stop_words = set(stopwords.words('english'))
26
 
27
+
28
+ with open('logreg.pkl', 'rb') as f:
29
+ logreg = pickle.load(f)
30
+
31
+ with open('tf.pkl', 'rb') as f:
32
+ tf = pickle.load(f)
33
+
34
+ def classical_pipeline(text):
35
+ text = text.lower()
36
+ text = re.sub(r'\d+', ' ', text)
37
+ text = text.translate(str.maketrans('', '', string.punctuation))
38
+ text = re.sub(r'\n', '', text)
39
+ wn_lemmatizer = WordNetLemmatizer()
40
+ text = ' '.join([wn_lemmatizer.lemmatize(word) for word in text.split()])
41
+ reg_tokenizer = RegexpTokenizer('\w+')
42
+ text = reg_tokenizer.tokenize_sents([text])
43
+ sw = stopwords.words('english')
44
+ text = ' '.join([word for word in text[0] if word not in sw])
45
+ text = tf.transform([text])
46
+ return text
47
+
48
  def preprocess_single_string(input_string: str, seq_len: int, vocab_to_int: dict):
49
  preprocessed_string = data_preprocessing(input_string)
50
  result_list = []
 
150
  model_lstm.to('cpu').eval()
151
 
152
 
153
+ def predict_sentence_classical(text: str):
154
+ start_time = time.time()
155
+ text = classical_pipeline(text)
156
+ res = logreg.predict(text)[0]
157
+ end_time = time.time()
158
+ execution_time = end_time - start_time
159
+ return res, execution_time
160
+
161
 
162
  def predict_sentence_lstm(text: str):
163
  start_time = time.time()
 
184
  reses = {0: 'negative', 1: 'positive'}
185
 
186
  def process_text(input_text):
187
+ res_classical, time_classical = predict_sentence_classical(input_text)
188
  res_lstm, time_lstm = predict_sentence_lstm(input_text)
189
  res_bert, time_bert = predict_sentence_bert(input_text)
190
  st.write('Results:')
191
+ st.write(f'Logistic regression: {reses[res_lstm]}, execution time: {time_lstm:.2f} seconds.')
192
  st.write(f'LSTM: {reses[res_lstm]}, execution time: {time_lstm:.2f} seconds.')
193
  st.write(f'Upgraded Bert: {reses[res_bert]}, execution time: {time_bert:.2f} seconds.')
194
 
195
  st.title('Film reviews classifier')
196
+ st.write('Write a film review in a box below, and the application, powered by three NLP models (logistic regression, LSTM and upgraded Bert), will tell if it is a positive or a negative review.')
197
 
198
  user_input = st.text_area("Enter your text:")
199
  if st.button("Send a review for processing"):
requirements.txt CHANGED
@@ -62,6 +62,7 @@ rich==13.4.2
62
  rpds-py==0.9.2
63
  safetensors==0.3.1
64
  six==1.16.0
 
65
  smmap==5.0.0
66
  streamlit==1.24.1
67
  sympy==1.12
 
62
  rpds-py==0.9.2
63
  safetensors==0.3.1
64
  six==1.16.0
65
+ sklearn==0.0.post7
66
  smmap==5.0.0
67
  streamlit==1.24.1
68
  sympy==1.12
tf.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:734f5cfcd6c4033bc5cdb18e6750660b207cdf0abd4ff6e8cc0c7d25d90b14e9
3
+ size 2072875