import streamlit as st import json import os from Chunker import CodeChunker # Set Streamlit page config at the very beginning st.set_page_config(page_title="Cintra Code Chunker", layout="wide") # Function to load JSON data def load_json_file(file_path): with open(file_path, 'r') as file: return json.load(file) # Function to read code from an uploaded file def read_code_from_file(uploaded_file): return uploaded_file.getvalue().decode("utf-8") st.link_button('Contribute on GitHub', 'https://github.com/CintraAI/code-chunker', help=None, type="secondary", disabled=False, use_container_width=False) json_file_path = os.path.join(os.path.dirname(__file__), 'mock_codefiles.json') code_files_data = load_json_file(json_file_path) # Extract filenames and contents code_files = list(code_files_data.keys()) st.title('Cintra Code Chunker') selection_col, upload_col = st.columns(2) with selection_col: # File selection dropdown selected_file_name = st.selectbox("Select an example code file", code_files) with upload_col: # File upload uploaded_file = st.file_uploader("Or upload your code file", type=['py', 'js', 'css', 'jsx']) # Determine the content and file extension based on selection or upload if uploaded_file is not None: code_content = read_code_from_file(uploaded_file) file_extension = uploaded_file.name.split('.')[-1] else: code_content = code_files_data.get(selected_file_name, "") file_extension = selected_file_name.split('.')[-1] if selected_file_name else None # Determine the language for syntax highlighting def get_language_by_extension(file_extension): if file_extension in ['py', 'python']: return 'python' elif file_extension in ['js', 'jsx', 'javascript']: return 'javascript' elif file_extension == 'css': return 'css' else: return None language = get_language_by_extension(file_extension) token_chunk_size = st.number_input('Chunk Size Target Measured in Tokens (tiktoken, gpt-4)', min_value=5, max_value=1000, value=25) original_col, chunked_col = st.columns(2) with original_col: st.subheader('Original File') st.code(code_content, language=language) # Initialize the code chunker code_chunker = CodeChunker(file_extension=file_extension) # Chunk the code content chunked_code_dict = code_chunker.chunk(code_content, token_chunk_size) with chunked_col: st.subheader('Chunked Code') for chunk_key, chunk_code in chunked_code_dict.items(): st.code(chunk_code, language=language)