import streamlit as st import os from typing import List from unified_document_processor import UnifiedDocumentProcessor, CustomEmbeddingFunction class StreamlitDocProcessor: def __init__(self): # Initialize session state if 'processor' not in st.session_state: groq_api_key = st.secrets["GROQ_API_KEY"] st.session_state.processor = UnifiedDocumentProcessor(groq_api_key) if 'processed_files' not in st.session_state: st.session_state.processed_files = set() def run(self): st.title("Document Processing and Q&A System") # Create sidebar for navigation page = st.sidebar.selectbox( "Choose a page", ["Upload & Process", "Question & Answer"] ) if page == "Upload & Process": self.upload_and_process_page() else: self.qa_page() def upload_and_process_page(self): st.header("Upload and Process Documents") # File uploader uploaded_files = st.file_uploader( "Upload PDF or XML files", type=['pdf', 'xml'], accept_multiple_files=True ) if uploaded_files: for uploaded_file in uploaded_files: if uploaded_file.name not in st.session_state.processed_files: # Create a temporary file temp_path = f"temp_{uploaded_file.name}" with open(temp_path, "wb") as f: f.write(uploaded_file.getbuffer()) # Process the file with st.spinner(f'Processing {uploaded_file.name}...'): result = st.session_state.processor.process_file(temp_path) if result['success']: st.success(f"Successfully processed {uploaded_file.name}") st.session_state.processed_files.add(uploaded_file.name) else: st.error(f"Failed to process {uploaded_file.name}: {result['error']}") # Clean up temporary file os.remove(temp_path) else: st.info(f"{uploaded_file.name} has already been processed") # Display processed files if st.session_state.processed_files: st.subheader("Processed Files") for file in sorted(st.session_state.processed_files): st.text(f"✓ {file}") def qa_page(self): st.header("Ask Questions About Your Documents") # Get available files available_files = st.session_state.processor.get_available_files() all_files = available_files['pdf'] + available_files['xml'] if not all_files: st.warning("No processed files available. Please upload and process some files first.") return # File selection selected_files = st.multiselect( "Select files to search through", all_files, default=all_files ) if not selected_files: st.warning("Please select at least one file to search through.") return # Question input question = st.text_input("Enter your question:") if st.button("Ask Question") and question: with st.spinner("Searching for answer..."): answer = st.session_state.processor.ask_question_selective( question, selected_files ) st.write("Answer:", answer) def main(): app = StreamlitDocProcessor() app.run() if __name__ == "__main__": main()