import streamlit as st |
from sklearn.feature_extraction.text import CountVectorizer |
from sklearn.decomposition import LatentDirichletAllocation |
import pickle |
st.title("Unsupervised Text Analysis App with Training") |
st.subheader("Train an LDA Model for Topic Modeling") |
if "lda_model" not in st.session_state: |
st.session_state.lda_model = None |
st.write("### Dataset:") |
texts = [ |
"The economy is experiencing significant growth this year.", |
"Climate change is one of the most pressing global challenges.", |
"Artificial intelligence is transforming industries worldwide.", |
"Renewable energy sources are becoming more popular and cost-effective.", |
"Sports events bring people together and promote cultural exchange.", |
"Advances in medicine have greatly improved life expectancy.", |
"Education plays a critical role in shaping the future of societies.", |
"Travel and tourism contribute significantly to the global economy.", |
"Space exploration inspires innovation and collaboration.", |
"Social media platforms influence public opinion and behavior." |
] |
st.write(texts) |
st.subheader("Training Parameters") |
num_topics = st.slider("Select the number of topics for training", 2, 10, 3) |
vectorizer = CountVectorizer(stop_words="english", max_features=1000) |
doc_term_matrix = vectorizer.fit_transform(texts) |
st.subheader("Training the LDA Model") |
if st.button("Train Model"): |
with st.spinner("Training the LDA model..."): |
lda = LatentDirichletAllocation(n_components=num_topics, random_state=42) |
lda.fit(doc_term_matrix) |
st.session_state.lda_model = lda |
st.success("Training Completed!") |
feature_names = vectorizer.get_feature_names_out() |
topics = [] |
for topic_idx, topic in enumerate(lda.components_): |
top_features = [feature_names[i] for i in topic.argsort()[:-6:-1]] |
topics.append(f"Topic {topic_idx + 1}: {', '.join(top_features)}") |
st.write("### Identified Topics:") |
for topic in topics: |
st.write(topic) |
st.subheader("Save the Trained Model") |
if st.button("Save Model"): |
if st.session_state.lda_model: |
with open("lda_model.pkl", "wb") as f: |
pickle.dump(st.session_state.lda_model, f) |
st.success("Model saved as `lda_model.pkl`.") |
else: |
st.error("Please train the model first before saving.") |