Spaces:

Sudhanshu976
/

NLP_FULL_APP

Sleeping

App Files Files Community

NLP_FULL_APP / pages /4_LANGUAGE-DETECTOR-MODEL.py

Sudhanshu976

third

fe5faf3 about 1 year ago

raw

history blame contribute delete

No virus

2.1 kB

	import streamlit as st
	import pickle
	import re
	import string
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from nltk.stem.porter import PorterStemmer
	stemmer = PorterStemmer()

	st.set_page_config(
	page_title="NLP WEB APP"
	)

	st.title("LANGUAGE DETECTOR MODEL")
	st.sidebar.success("Select a page above")
	nltk.download('stopwords')
	nltk.download('punkt')

	def preprocess(text):
	text = text.lower()
	text = re.sub(r'\d+', '', text)
	translator = str.maketrans('', '', string.punctuation)
	text = text.translate(translator)


	stop_words = set(stopwords.words("english"))
	word_tokens = word_tokenize(text)
	filtered_text = [word for word in word_tokens if word not in stop_words]

	stems = [stemmer.stem(word) for word in filtered_text]
	preprocessed_text = ' '.join(stems)
	return preprocessed_text



	cv = pickle.load(open('language-detector-models/vectorizer.pkl','rb'))
	model = pickle.load(open('language-detector-models/model.pkl','rb'))

	message= st.text_input("ENTER THE MESSAGE")


	if st.button("PREDICT"):
	# PREPROCESS
	transformed_text = preprocess(message)

	# VECTORIZE
	vector_input = cv.transform([message])

	# PREDICTION
	result = model.predict(vector_input)[0]


	# DISPLAY
	if result==0:
	st.header("ARABIC")
	elif result==1:
	st.header("DANISH")
	elif result==2:
	st.header("DUTCH")
	elif result==3:
	st.header("ENGLISH")
	elif result==4:
	st.header("FRENCH")
	elif result==5:
	st.header("GERMAN")
	elif result==6:
	st.header("GREEK")
	elif result==7:
	st.header("HINDI")
	elif result==8:
	st.header("ITALIAN")
	elif result==9:
	st.header("KANNADA")
	elif result==10:
	st.header("MALYALAM")
	elif result==11:
	st.header("PORTUGESE")
	elif result==12:
	st.header("RUSSIAN")
	elif result==13:
	st.header("SPANISH")
	elif result==14:
	st.header("SWEDISH")
	elif result==15:
	st.header("TAMIL")
	else:
	st.header("TURKISH")