|
import streamlit as st |
|
import pandas as pd |
|
import catboost |
|
from catboost import CatBoostClassifier |
|
import re |
|
import string |
|
from nltk.corpus import stopwords |
|
from pymystem3 import Mystem |
|
from joblib import load |
|
import nltk |
|
nltk.download('stopwords') |
|
|
|
def data_preprocessing(text): |
|
stop_words = set(stopwords.words('russian')) |
|
text = text.lower() |
|
text = re.sub("<.*?>", "", text) |
|
text = re.sub(r'http\S+', " ", text) |
|
text = re.sub(r'@\w+', ' ', text) |
|
text = re.sub(r'#\w+', ' ', text) |
|
text = re.sub(r'\d+', ' ', text) |
|
text = "".join([c for c in text if c not in string.punctuation]) |
|
return " ".join([word for word in text.split() if word not in stop_words]) |
|
|
|
def lemmatize_text(text): |
|
mystem = Mystem() |
|
lemmas = mystem.lemmatize(text) |
|
return ' '.join(lemmas) |
|
|
|
model = CatBoostClassifier() |
|
model.load_model('cat_model4.cbm') |
|
|
|
tfidf_vectorizer = load('tfidf_vectorizer.joblib') |
|
|
|
def classic_ml_page(): |
|
st.title("Классификация отзывов о медицинских учреждениях") |
|
user_review = st.text_area("Введите ваш отзыв здесь:") |
|
|
|
if st.button("Классифицировать"): |
|
if user_review: |
|
preprocessed_review = data_preprocessing(user_review) |
|
lemmatized_review = lemmatize_text(preprocessed_review) |
|
vectorized_review = tfidf_vectorizer.transform([lemmatized_review]) |
|
prediction = model.predict(vectorized_review) |
|
|
|
if prediction[0] == 1: |
|
st.write("Позитивный отзыв 😀") |
|
else: |
|
st.write("Негативный отзыв 😟") |
|
else: |
|
st.write("Пожалуйста, введите отзыв для классификации.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|