Spaces:

ruslanruslanruslan
/

nlp_project

Sleeping

App Files Files Community

ruslanruslanruslan commited on Jul 21, 2023

Commit

66e9d7c

1 Parent(s): 0f494d0

models added

Browse files

Files changed (4) hide show

logreg.pkl +3 -0
pages/Film reviews classifier.py +37 -1
requirements.txt +1 -0
tf.pkl +3 -0

logreg.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7bd6d0e129d3a2e6bdc40393eaa602ea599e032c0686a973ef137dc138be805b
+size 44433

pages/Film reviews classifier.py CHANGED Viewed

@@ -11,14 +11,40 @@ import nltk
 import numpy as np
 import torch.nn as nn
 import transformers
 nltk.download('wordnet')
 nltk.download('stopwords')
 from collections import Counter
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 stop_words = set(stopwords.words('english'))
 def preprocess_single_string(input_string: str, seq_len: int, vocab_to_int: dict):
     preprocessed_string = data_preprocessing(input_string)
     result_list = []
@@ -124,6 +150,14 @@ model_lstm.load_state_dict(torch.load('lstm_model_weights.pt', map_location=torc
 model_lstm.to('cpu').eval()
 def predict_sentence_lstm(text: str):
     start_time = time.time()
@@ -150,14 +184,16 @@ def predict_sentence_bert(text: str):
 reses = {0: 'negative', 1: 'positive'}
 def process_text(input_text):
     res_lstm, time_lstm = predict_sentence_lstm(input_text)
     res_bert, time_bert = predict_sentence_bert(input_text)
     st.write('Results:')
     st.write(f'LSTM: {reses[res_lstm]}, execution time: {time_lstm:.2f} seconds.')
     st.write(f'Upgraded Bert: {reses[res_bert]}, execution time: {time_bert:.2f} seconds.')
 st.title('Film reviews classifier')
-st.write('Write a film review in a box below, and the application, powered by two NLP models (LSTM and upgraded Bert), will tell if it is a positive or a negative review.')
 user_input = st.text_area("Enter your text:")
 if st.button("Send a review for processing"):

 import numpy as np
 import torch.nn as nn
 import transformers
+import lightgbm as lgb
+import pickle
 nltk.download('wordnet')
 nltk.download('stopwords')
 from collections import Counter
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import RegexpTokenizer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression
 stop_words = set(stopwords.words('english'))
+with open('logreg.pkl', 'rb') as f:
+    logreg = pickle.load(f)
+with open('tf.pkl', 'rb') as f:
+    tf = pickle.load(f)
+def classical_pipeline(text):
+    text = text.lower()
+    text = re.sub(r'\d+', ' ', text)
+    text = text.translate(str.maketrans('', '', string.punctuation))
+    text = re.sub(r'\n', '', text)
+    wn_lemmatizer = WordNetLemmatizer()
+    text = ' '.join([wn_lemmatizer.lemmatize(word) for word in text.split()])
+    reg_tokenizer = RegexpTokenizer('\w+')
+    text = reg_tokenizer.tokenize_sents([text])
+    sw = stopwords.words('english')
+    text = ' '.join([word for word in text[0] if word not in sw])
+    text = tf.transform([text])
+    return text
 def preprocess_single_string(input_string: str, seq_len: int, vocab_to_int: dict):
     preprocessed_string = data_preprocessing(input_string)
     result_list = []
 model_lstm.to('cpu').eval()
+def predict_sentence_classical(text: str):
+    start_time = time.time()
+    text = classical_pipeline(text)
+    res = logreg.predict(text)[0]
+    end_time = time.time()
+    execution_time = end_time - start_time
+    return res, execution_time
 def predict_sentence_lstm(text: str):
     start_time = time.time()
 reses = {0: 'negative', 1: 'positive'}
 def process_text(input_text):
+    res_classical, time_classical = predict_sentence_classical(input_text)
     res_lstm, time_lstm = predict_sentence_lstm(input_text)
     res_bert, time_bert = predict_sentence_bert(input_text)
     st.write('Results:')
+    st.write(f'Logistic regression: {reses[res_lstm]}, execution time: {time_lstm:.2f} seconds.')
     st.write(f'LSTM: {reses[res_lstm]}, execution time: {time_lstm:.2f} seconds.')
     st.write(f'Upgraded Bert: {reses[res_bert]}, execution time: {time_bert:.2f} seconds.')
 st.title('Film reviews classifier')
+st.write('Write a film review in a box below, and the application, powered by three NLP models (logistic regression, LSTM and upgraded Bert), will tell if it is a positive or a negative review.')
 user_input = st.text_area("Enter your text:")
 if st.button("Send a review for processing"):

requirements.txt CHANGED Viewed

@@ -62,6 +62,7 @@ rich==13.4.2
 rpds-py==0.9.2
 safetensors==0.3.1
 six==1.16.0
 smmap==5.0.0
 streamlit==1.24.1
 sympy==1.12

 rpds-py==0.9.2
 safetensors==0.3.1
 six==1.16.0
+sklearn==0.0.post7
 smmap==5.0.0
 streamlit==1.24.1
 sympy==1.12

tf.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:734f5cfcd6c4033bc5cdb18e6750660b207cdf0abd4ff6e8cc0c7d25d90b14e9
+size 2072875