Spaces:

JEPHONETORRE
/

text_analysis_app.py

Running

1b3143f 11 days ago

2.58 kB

	import streamlit as st
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.decomposition import LatentDirichletAllocation
	import pickle

	# Title
	st.title("Unsupervised Text Analysis App with Training")
	st.subheader("Train an LDA Model for Topic Modeling")

	# Initialize Session State
	if "lda_model" not in st.session_state:
	st.session_state.lda_model = None

	# Built-in Dataset
	st.write("### Dataset:")
	texts = [
	"The economy is experiencing significant growth this year.",
	"Climate change is one of the most pressing global challenges.",
	"Artificial intelligence is transforming industries worldwide.",
	"Renewable energy sources are becoming more popular and cost-effective.",
	"Sports events bring people together and promote cultural exchange.",
	"Advances in medicine have greatly improved life expectancy.",
	"Education plays a critical role in shaping the future of societies.",
	"Travel and tourism contribute significantly to the global economy.",
	"Space exploration inspires innovation and collaboration.",
	"Social media platforms influence public opinion and behavior."
	]

	# Display dataset
	st.write(texts)

	# Input: Number of Topics
	st.subheader("Training Parameters")
	num_topics = st.slider("Select the number of topics for training", 2, 10, 3)

	# Vectorization
	vectorizer = CountVectorizer(stop_words="english", max_features=1000)
	doc_term_matrix = vectorizer.fit_transform(texts)

	# Train LDA Model
	st.subheader("Training the LDA Model")
	if st.button("Train Model"):
	with st.spinner("Training the LDA model..."):
	lda = LatentDirichletAllocation(n_components=num_topics, random_state=42)
	lda.fit(doc_term_matrix)
	st.session_state.lda_model = lda # Save the trained model in session state

	# Display Topics
	st.success("Training Completed!")
	feature_names = vectorizer.get_feature_names_out()
	topics = []
	for topic_idx, topic in enumerate(lda.components_):
	top_features = [feature_names[i] for i in topic.argsort()[:-6:-1]]
	topics.append(f"Topic {topic_idx + 1}: {', '.join(top_features)}")

	st.write("### Identified Topics:")
	for topic in topics:
	st.write(topic)

	# Save the Trained Model
	st.subheader("Save the Trained Model")
	if st.button("Save Model"):
	if st.session_state.lda_model:
	with open("lda_model.pkl", "wb") as f:
	pickle.dump(st.session_state.lda_model, f)
	st.success("Model saved as `lda_model.pkl`.")
	else:
	st.error("Please train the model first before saving.")