Spaces:
Running
Running
File size: 2,543 Bytes
9afc52f 182adbd 4644b40 9358586 1d6fa11 19c8428 dbc4628 9358586 182adbd 34165ae 19c8428 4644b40 9358586 dbc4628 4644b40 9358586 4644b40 19c8428 34165ae 1d6fa11 19c8428 34165ae 19c8428 dbc4628 34165ae dbc4628 19c8428 dbc4628 9358586 19c8428 34165ae 19c8428 9358586 19c8428 9358586 dbc4628 9358586 1d6fa11 9358586 1d6fa11 9358586 19c8428 1d6fa11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import streamlit as st
import json
import os
from Chunker import CodeChunker
# Set Streamlit page config at the very beginning
st.set_page_config(page_title="Cintra Code Chunker", layout="wide")
# Function to load JSON data
def load_json_file(file_path):
with open(file_path, 'r') as file:
return json.load(file)
# Function to read code from an uploaded file
def read_code_from_file(uploaded_file):
return uploaded_file.getvalue().decode("utf-8")
st.link_button('Contribute on GitHub', 'https://github.com/CintraAI/code-chunker', help=None, type="secondary", disabled=False, use_container_width=False)
json_file_path = os.path.join(os.path.dirname(__file__), 'mock_codefiles.json')
code_files_data = load_json_file(json_file_path)
# Extract filenames and contents
code_files = list(code_files_data.keys())
st.title('Cintra Code Chunker')
selection_col, upload_col = st.columns(2)
with selection_col:
# File selection dropdown
selected_file_name = st.selectbox("Select an example code file", code_files)
with upload_col:
# File upload
uploaded_file = st.file_uploader("Or upload your code file", type=['py', 'js', 'css', 'jsx'])
# Determine the content and file extension based on selection or upload
if uploaded_file is not None:
code_content = read_code_from_file(uploaded_file)
file_extension = uploaded_file.name.split('.')[-1]
else:
code_content = code_files_data.get(selected_file_name, "")
file_extension = selected_file_name.split('.')[-1] if selected_file_name else None
# Determine the language for syntax highlighting
def get_language_by_extension(file_extension):
if file_extension in ['py', 'python']:
return 'python'
elif file_extension in ['js', 'jsx', 'javascript']:
return 'javascript'
elif file_extension == 'css':
return 'css'
else:
return None
language = get_language_by_extension(file_extension)
token_chunk_size = st.number_input('Chunk Size Target Measured in Tokens (tiktoken, gpt-4)', min_value=5, max_value=1000, value=25)
original_col, chunked_col = st.columns(2)
with original_col:
st.subheader('Original File')
st.code(code_content, language=language)
# Initialize the code chunker
code_chunker = CodeChunker(file_extension=file_extension)
# Chunk the code content
chunked_code_dict = code_chunker.chunk(code_content, token_chunk_size)
with chunked_col:
st.subheader('Chunked Code')
for chunk_key, chunk_code in chunked_code_dict.items():
st.code(chunk_code, language=language) |