Spaces:
Sleeping
Sleeping
File size: 1,796 Bytes
ddeaa8b 4a2a45c ddeaa8b 3fa3430 ddeaa8b 3fa3430 ddeaa8b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import streamlit as st
import pandas as pd
from catboost import CatBoostClassifier
import re
import string
from nltk.corpus import stopwords
from pymystem3 import Mystem
from joblib import load
import nltk
nltk.download('stopwords')
def data_preprocessing(text):
stop_words = set(stopwords.words('russian'))
text = text.lower()
text = re.sub("<.*?>", "", text)
text = re.sub(r'http\S+', " ", text)
text = re.sub(r'@\w+', ' ', text)
text = re.sub(r'#\w+', ' ', text)
text = re.sub(r'\d+', ' ', text)
text = "".join([c for c in text if c not in string.punctuation])
return " ".join([word for word in text.split() if word not in stop_words])
def lemmatize_text(text):
mystem = Mystem()
lemmas = mystem.lemmatize(text)
return ' '.join(lemmas)
model = CatBoostClassifier()
model.load_model('cat_model4.cbm')
tfidf_vectorizer = load('tfidf_vectorizer.joblib')
def classic_ml_page():
st.title("Классификация отзывов о медицинских учреждениях")
user_review = st.text_area("Введите ваш отзыв здесь:")
if st.button("Классифицировать"):
if user_review:
preprocessed_review = data_preprocessing(user_review)
lemmatized_review = lemmatize_text(preprocessed_review)
vectorized_review = tfidf_vectorizer.transform([lemmatized_review])
prediction = model.predict(vectorized_review)
if prediction[0] == 1:
st.write("Позитивный отзыв 😀")
else:
st.write("Негативный отзыв 😟")
else:
st.write("Пожалуйста, введите отзыв для классификации.")
|