import streamlit as st from carbon import Carbon from decimal import Decimal import requests import json # Constants CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea" CUSTOMER_ID = "Candid" # Initialize Carbon SDK carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID) # Authenticate and get OAuth URL for Google Drive def get_google_drive_oauth(carbon): get_oauth_url_response = carbon.integrations.get_oauth_url( service="GOOGLE_DRIVE", scope="https://www.googleapis.com/auth/drive.readonly", connecting_new_account=True, ) return get_oauth_url_response.oauth_url # Authenticate and get OAuth URL for Dropbox def get_dropbox_oauth(carbon): get_oauth_url_response = carbon.integrations.get_oauth_url( service="DROPBOX", connecting_new_account=True, ) return get_oauth_url_response.oauth_url # Authenticate and get OAuth URL for Notion def get_notion_oauth(carbon): get_oauth_url_response = carbon.integrations.get_oauth_url( service="NOTION", connecting_new_account=True, ) return get_oauth_url_response.oauth_url # Get data source ID def get_data_source_id(service): response = carbon.data_sources.query_user_data_sources( pagination={"limit": 100, "offset": 0}, order_by="created_at", order_dir="desc", filters={"source": service}, ) return response.results[0].id # List files in the data source def list_files(data_source_id): response = carbon.integrations.list_data_source_items( data_source_id=data_source_id, filters={}, pagination={"limit": 250, "offset": 0}, ) return response.items # List all data sources associated with the user def list_user_data_sources(): response = carbon.data_sources.query_user_data_sources( pagination={"limit": 100, "offset": 0}, order_by="created_at", order_dir="desc", ) return response.results # List files uploaded by the user def list_uploaded_files(data_source_id): url = "https://api.carbon.ai/user_files_v2" payload = { "pagination": { "limit": 100, "offset": 0 }, "order_by": "created_at", "order_dir": "desc", "filters": { "organization_user_data_source_id": [], #data_source_id organization level need to explore "embedding_generators": ["OPENAI"], "include_all_children": True, }, "include_raw_file": True, "include_parsed_text_file": True, "include_additional_files": True } headers = { "authorization": f"Bearer {CARBON_API_KEY}", "customer-id": CUSTOMER_ID, "Content-Type": "application/json" } response = requests.post(url, json=payload, headers=headers) return response.json()['results'] # Search function def search_documents(query, file_ids): url = "https://api.carbon.ai/embeddings" payload = { "query": query, "k": 2, "file_ids": [], "include_all_children": True, "include_tags": True, "include_vectors": True, "include_raw_file": True, "hybrid_search": False, "hybrid_search_tuning_parameters": { "weight_a": 0.5, "weight_b": 0.5 }, "media_type": "TEXT", "embedding_model": "OPENAI" } headers = { "authorization": f"Bearer {CARBON_API_KEY}", "customer-id": CUSTOMER_ID, "Content-Type": "application/json" } #response = requests.post(url, json=payload, headers=headers) response = requests.request("POST", url, json=payload, headers=headers) #print(response.json()) return response.json()['documents'] # Streamlit UI st.title("Data Connector using Carbon SDK") # Authenticate with Carbon API st.header("Authenticate with Carbon API") # Connect to Data Source st.subheader("Connect to Data Source") data_source = st.selectbox("Select Data Source for OAuth", ["GOOGLE_DRIVE", "DROPBOX", "NOTION"]) if st.button("Get OAuth URL"): if data_source == "GOOGLE_DRIVE": oauth_url = get_google_drive_oauth(carbon) elif data_source == "DROPBOX": oauth_url = get_dropbox_oauth(carbon) elif data_source == "NOTION": oauth_url = get_notion_oauth(carbon) st.write(f"OAuth URL for {data_source}: {oauth_url}") st.markdown(f'Authenticate {data_source}', unsafe_allow_html=True) # List User Data Sources st.subheader("List Data Sources") if st.button("List Data Sources"): data_sources = list_user_data_sources() st.write("Data Sources associated with the user:") for ds in data_sources: st.write(f"ID: {ds.id}, External ID: {ds.data_source_external_id}, Type: {ds.data_source_type}, " f"Sync Status: {ds.sync_status}, Created At: {ds.created_at}, Updated At: {ds.updated_at}") # List Files in Data Source st.subheader(f"List Files in {data_source}") if st.button("List Files"): data_source_id = get_data_source_id(data_source) files = list_files(data_source_id) st.write(f"Files in {data_source}:") for file in files: st.write(file.name) # List Uploaded Files st.subheader("Documents Uploaded Result") # data_source_search = st.selectbox("Select Data Source for OAuth", ["GOOGLE_DRIVE", "DROPBOX", "NOTION"]) # if st.button("Submit"): # data_source_search file_ids = [] if st.button("Show Uploaded Files"): data_source_id = get_data_source_id(data_source) uploaded_files = list_uploaded_files(data_source_id) st.write("Uploaded Files:") #print(uploaded_files) for file in uploaded_files: st.write(f"ID: {file['id']}, Name: {file['name']}, Organization Supplied User ID: {file['organization_supplied_user_id']}, " f"Organization User Data Source ID: {file['organization_user_data_source_id']}, External URL: {file['external_url']}") file_ids.append(file['id']) #print( file_ids) # Search Documents st.subheader("Search Documents") query = st.text_input("Enter your search query:") if st.button("Search"): if query: #print(file_ids) search_results = search_documents(query, file_ids) st.write("Search Results:") for result in search_results: st.write(f"Source: {result['source']}") st.write(f"Title: {result['content']}") st.write(f"Source URL: {result['source_url']}") st.write(f"Source Type: {result['source_type']}") st.write(f"Presigned URL: {result['presigned_url']}") st.write(f"Tags: {result['tags']}") st.write("-------------------------------------------------") # # Add chat interface using custom HTML/CSS # st.subheader("Chat Interface") # chat_input = st.text_input("Enter your query:") # if st.button("Send"): # if chat_input: # st.markdown(f'
{chat_input}
', unsafe_allow_html=True) # # Placeholder for bot response (add your processing logic here) # bot_response = "This is a bot response." # st.markdown(f'
{bot_response}
', unsafe_allow_html=True) # Custom CSS for chat bubbles st.markdown(""" """, unsafe_allow_html=True)