NLP_FULL_APP / pages /4_LANGUAGE-DETECTOR-MODEL.py
Sudhanshu976's picture
third
fe5faf3
raw
history blame contribute delete
No virus
2.1 kB
import streamlit as st
import pickle
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
st.set_page_config(
page_title="NLP WEB APP"
)
st.title("LANGUAGE DETECTOR MODEL")
st.sidebar.success("Select a page above")
nltk.download('stopwords')
nltk.download('punkt')
def preprocess(text):
text = text.lower()
text = re.sub(r'\d+', '', text)
translator = str.maketrans('', '', string.punctuation)
text = text.translate(translator)
stop_words = set(stopwords.words("english"))
word_tokens = word_tokenize(text)
filtered_text = [word for word in word_tokens if word not in stop_words]
stems = [stemmer.stem(word) for word in filtered_text]
preprocessed_text = ' '.join(stems)
return preprocessed_text
cv = pickle.load(open('language-detector-models/vectorizer.pkl','rb'))
model = pickle.load(open('language-detector-models/model.pkl','rb'))
message= st.text_input("ENTER THE MESSAGE")
if st.button("PREDICT"):
# PREPROCESS
transformed_text = preprocess(message)
# VECTORIZE
vector_input = cv.transform([message])
# PREDICTION
result = model.predict(vector_input)[0]
# DISPLAY
if result==0:
st.header("ARABIC")
elif result==1:
st.header("DANISH")
elif result==2:
st.header("DUTCH")
elif result==3:
st.header("ENGLISH")
elif result==4:
st.header("FRENCH")
elif result==5:
st.header("GERMAN")
elif result==6:
st.header("GREEK")
elif result==7:
st.header("HINDI")
elif result==8:
st.header("ITALIAN")
elif result==9:
st.header("KANNADA")
elif result==10:
st.header("MALYALAM")
elif result==11:
st.header("PORTUGESE")
elif result==12:
st.header("RUSSIAN")
elif result==13:
st.header("SPANISH")
elif result==14:
st.header("SWEDISH")
elif result==15:
st.header("TAMIL")
else:
st.header("TURKISH")