import streamlit as st from sklearn.feature_extraction.text import CountVectorizer from sklearn.decomposition import LatentDirichletAllocation import pickle # Title st.title("Unsupervised Text Analysis App with Training") st.subheader("Train an LDA Model for Topic Modeling") # Initialize Session State if "lda_model" not in st.session_state: st.session_state.lda_model = None # Built-in Dataset st.write("### Dataset:") texts = [ "The economy is experiencing significant growth this year.", "Climate change is one of the most pressing global challenges.", "Artificial intelligence is transforming industries worldwide.", "Renewable energy sources are becoming more popular and cost-effective.", "Sports events bring people together and promote cultural exchange.", "Advances in medicine have greatly improved life expectancy.", "Education plays a critical role in shaping the future of societies.", "Travel and tourism contribute significantly to the global economy.", "Space exploration inspires innovation and collaboration.", "Social media platforms influence public opinion and behavior." ] # Display dataset st.write(texts) # Input: Number of Topics st.subheader("Training Parameters") num_topics = st.slider("Select the number of topics for training", 2, 10, 3) # Vectorization vectorizer = CountVectorizer(stop_words="english", max_features=1000) doc_term_matrix = vectorizer.fit_transform(texts) # Train LDA Model st.subheader("Training the LDA Model") if st.button("Train Model"): with st.spinner("Training the LDA model..."): lda = LatentDirichletAllocation(n_components=num_topics, random_state=42) lda.fit(doc_term_matrix) st.session_state.lda_model = lda # Save the trained model in session state # Display Topics st.success("Training Completed!") feature_names = vectorizer.get_feature_names_out() topics = [] for topic_idx, topic in enumerate(lda.components_): top_features = [feature_names[i] for i in topic.argsort()[:-6:-1]] topics.append(f"Topic {topic_idx + 1}: {', '.join(top_features)}") st.write("### Identified Topics:") for topic in topics: st.write(topic) # Save the Trained Model st.subheader("Save the Trained Model") if st.button("Save Model"): if st.session_state.lda_model: with open("lda_model.pkl", "wb") as f: pickle.dump(st.session_state.lda_model, f) st.success("Model saved as `lda_model.pkl`.") else: st.error("Please train the model first before saving.")