JEPHONETORRE's picture
1
1b3143f
import streamlit as st
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import pickle
# Title
st.title("Unsupervised Text Analysis App with Training")
st.subheader("Train an LDA Model for Topic Modeling")
# Initialize Session State
if "lda_model" not in st.session_state:
st.session_state.lda_model = None
# Built-in Dataset
st.write("### Dataset:")
texts = [
"The economy is experiencing significant growth this year.",
"Climate change is one of the most pressing global challenges.",
"Artificial intelligence is transforming industries worldwide.",
"Renewable energy sources are becoming more popular and cost-effective.",
"Sports events bring people together and promote cultural exchange.",
"Advances in medicine have greatly improved life expectancy.",
"Education plays a critical role in shaping the future of societies.",
"Travel and tourism contribute significantly to the global economy.",
"Space exploration inspires innovation and collaboration.",
"Social media platforms influence public opinion and behavior."
]
# Display dataset
st.write(texts)
# Input: Number of Topics
st.subheader("Training Parameters")
num_topics = st.slider("Select the number of topics for training", 2, 10, 3)
# Vectorization
vectorizer = CountVectorizer(stop_words="english", max_features=1000)
doc_term_matrix = vectorizer.fit_transform(texts)
# Train LDA Model
st.subheader("Training the LDA Model")
if st.button("Train Model"):
with st.spinner("Training the LDA model..."):
lda = LatentDirichletAllocation(n_components=num_topics, random_state=42)
lda.fit(doc_term_matrix)
st.session_state.lda_model = lda # Save the trained model in session state
# Display Topics
st.success("Training Completed!")
feature_names = vectorizer.get_feature_names_out()
topics = []
for topic_idx, topic in enumerate(lda.components_):
top_features = [feature_names[i] for i in topic.argsort()[:-6:-1]]
topics.append(f"Topic {topic_idx + 1}: {', '.join(top_features)}")
st.write("### Identified Topics:")
for topic in topics:
st.write(topic)
# Save the Trained Model
st.subheader("Save the Trained Model")
if st.button("Save Model"):
if st.session_state.lda_model:
with open("lda_model.pkl", "wb") as f:
pickle.dump(st.session_state.lda_model, f)
st.success("Model saved as `lda_model.pkl`.")
else:
st.error("Please train the model first before saving.")