|
import streamlit as st |
|
from sklearn.feature_extraction.text import CountVectorizer |
|
from sklearn.decomposition import LatentDirichletAllocation |
|
import pickle |
|
|
|
|
|
st.title("Unsupervised Text Analysis App with Training") |
|
st.subheader("Train an LDA Model for Topic Modeling") |
|
|
|
|
|
if "lda_model" not in st.session_state: |
|
st.session_state.lda_model = None |
|
|
|
|
|
st.write("### Dataset:") |
|
texts = [ |
|
"The economy is experiencing significant growth this year.", |
|
"Climate change is one of the most pressing global challenges.", |
|
"Artificial intelligence is transforming industries worldwide.", |
|
"Renewable energy sources are becoming more popular and cost-effective.", |
|
"Sports events bring people together and promote cultural exchange.", |
|
"Advances in medicine have greatly improved life expectancy.", |
|
"Education plays a critical role in shaping the future of societies.", |
|
"Travel and tourism contribute significantly to the global economy.", |
|
"Space exploration inspires innovation and collaboration.", |
|
"Social media platforms influence public opinion and behavior." |
|
] |
|
|
|
|
|
st.write(texts) |
|
|
|
|
|
st.subheader("Training Parameters") |
|
num_topics = st.slider("Select the number of topics for training", 2, 10, 3) |
|
|
|
|
|
vectorizer = CountVectorizer(stop_words="english", max_features=1000) |
|
doc_term_matrix = vectorizer.fit_transform(texts) |
|
|
|
|
|
st.subheader("Training the LDA Model") |
|
if st.button("Train Model"): |
|
with st.spinner("Training the LDA model..."): |
|
lda = LatentDirichletAllocation(n_components=num_topics, random_state=42) |
|
lda.fit(doc_term_matrix) |
|
st.session_state.lda_model = lda |
|
|
|
|
|
st.success("Training Completed!") |
|
feature_names = vectorizer.get_feature_names_out() |
|
topics = [] |
|
for topic_idx, topic in enumerate(lda.components_): |
|
top_features = [feature_names[i] for i in topic.argsort()[:-6:-1]] |
|
topics.append(f"Topic {topic_idx + 1}: {', '.join(top_features)}") |
|
|
|
st.write("### Identified Topics:") |
|
for topic in topics: |
|
st.write(topic) |
|
|
|
|
|
st.subheader("Save the Trained Model") |
|
if st.button("Save Model"): |
|
if st.session_state.lda_model: |
|
with open("lda_model.pkl", "wb") as f: |
|
pickle.dump(st.session_state.lda_model, f) |
|
st.success("Model saved as `lda_model.pkl`.") |
|
else: |
|
st.error("Please train the model first before saving.") |
|
|