# import streamlit as st # from carbon import Carbon # import requests # import json # # Carbon API Key # CARBON_API_KEY = "a38ee1fe5fef56fc8e1ae2afc881378804bb902882442e1554adae4f82ee23ea" # CUSTOMER_ID = "Candid" # def get_google_drive_oauth(carbon): # get_oauth_url_response = carbon.integrations.get_oauth_url( # service="GOOGLE_DRIVE", # scope="https://www.googleapis.com/auth/drive.readonly", # connecting_new_account=True, # ) # return get_oauth_url_response.oauth_url # def get_dropbox_oauth(carbon): # get_oauth_url_response = carbon.integrations.get_oauth_url( # service="DROPBOX", # connecting_new_account=True, # ) # return get_oauth_url_response.oauth_url # def get_notion_oauth(carbon): # get_oauth_url_response = carbon.integrations.get_oauth_url( # service="NOTION", # connecting_new_account=True, # ) # return get_oauth_url_response.oauth_url # def sync_github(carbon, username, token): # sync_response = carbon.integrations.sync_git_hub( # username=username, # token=token, # sync_source_items=True # ) # return sync_response # def sync_gitbook(carbon, access_token, organization): # sync_response = carbon.integrations.sync_git_book( # access_token=access_token, # organization=organization, # sync_source_items=True # ) # return sync_response # def sync_s3(carbon, access_key, access_key_secret): # sync_response = carbon.integrations.sync_s3( # access_key=access_key, # access_key_secret=access_key_secret, # sync_source_items=True # ) # return sync_response # def sync_google_drive(carbon, data_source_id): # sync_response = carbon.integrations.sync_data_source_items(data_source_id=int(data_source_id)) # return sync_response # def list_files(carbon, data_source_id=None, service="GOOGLE_DRIVE"): # if data_source_id: # sync_google_drive(carbon, data_source_id) # list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id)) # st.session_state['current_data_source'] = data_source_id # Store the current data source # st.session_state['files'] = list_files_response.items # Store the fetched files # else: # if 'current_data_source' in st.session_state and st.session_state['current_data_source']: # data_source_id = st.session_state['current_data_source'] # sync_google_drive(carbon, data_source_id) # list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id)) # st.session_state['files'] = list_files_response.items # Store the fetched files # else: # query_user_data_sources_response = carbon.data_sources.query_user_data_sources( # pagination={"limit": 100, "offset": 0}, # order_by="created_at", # order_dir="desc", # filters={"source": service}, # ) # if query_user_data_sources_response.results: # data_source_id = query_user_data_sources_response.results[0].id # sync_google_drive(carbon, data_source_id) # list_files_response = carbon.integrations.list_data_source_items(data_source_id=int(data_source_id)) # st.session_state['current_data_source'] = data_source_id # st.session_state['files'] = list_files_response.items # Store the fetched files # else: # list_files_response = None # return list_files_response.items if list_files_response else None # def list_all_files(carbon, data_source_id): # url = "https://api.carbon.ai/user_files_v2" # payload = { # "pagination": { # "limit": 100, # "offset": 0 # }, # "order_by": "created_at", # "order_dir": "desc", # "filters": { # "organization_user_data_source_id": [data_source_id], # "embedding_generators": ["OPENAI"], # "include_all_children": True, # }, # "include_raw_file": True, # "include_parsed_text_file": True, # "include_additional_files": True # } # headers = { # "authorization": f"Bearer {CARBON_API_KEY}", # "customer-id": CUSTOMER_ID, # "Content-Type": "application/json" # } # response = requests.request("POST", url, json=payload, headers=headers) # res = json.loads(response.text) # file_id= res['results'][0]['id'] # for i,document in enumerate(res['results']): # print(document['name']) # print(document['id']) # file_id=res['results'][0]['id'] # print(file_id) # return res['results'] # def list_user_documents(carbon): # query_user_documents_response = carbon.documents.query_documents( # pagination={"limit": 100, "offset": 0}, # order_by="created_at", # order_dir="desc" # ) # return query_user_documents_response.documents if query_user_documents_response else None # def semantic_search_v2(carbon, query, tags_v2=None, hybrid_search=False): # search_response = carbon.embeddings.get_documents( # query=query, # k=2, # tags_v2=tags_v2 if tags_v2 else {}, # include_tags=True, # include_vectors=True, # include_raw_file=True, # hybrid_search=hybrid_search, # hybrid_search_tuning_parameters={ # "weight_a": 0.5, # "weight_b": 0.5, # }, # media_type="TEXT", # embedding_model="OPENAI", # ) # return search_response.documents # def main(): # st.title('Data Connector using Carbon SDK') # # Authenticate with Carbon API # st.write('### Authenticate with Carbon API') # carbon = Carbon(api_key=CARBON_API_KEY, customer_id=CUSTOMER_ID) # token = carbon.auth.get_access_token() # carbon = Carbon(access_token=token.access_token) # authenticated object # # Connect to Data Source # st.write('## Connect to Data Source') # service = st.selectbox('Select Data Source for OAuth', ['GOOGLE_DRIVE', 'DROPBOX', 'NOTION']) # if st.button('Get OAuth URL'): # with st.spinner('Fetching OAuth URL...'): # try: # if service == "GOOGLE_DRIVE": # oauth_url = get_google_drive_oauth(carbon) # elif service == "DROPBOX": # oauth_url = get_dropbox_oauth(carbon) # elif service == "NOTION": # oauth_url = get_notion_oauth(carbon) # st.write(f"OAuth URL for {service}: {oauth_url}") # st.session_state['current_data_source'] = None # Reset the current data source # st.session_state['files'] = None # Clear the previous files # st.session_state['oauth_fetched'] = True # except Exception as e: # st.error(f"An error occurred: {e}") # if 'oauth_fetched' in st.session_state and st.session_state['oauth_fetched']: # st.write("OAuth URL fetched. Please authenticate and then click 'Sync and Fetch Files'.") # if st.button('Sync and Fetch Files'): # with st.spinner('Syncing and fetching files...'): # try: # query_user_data_sources_response = carbon.data_sources.query_user_data_sources( # pagination={"limit": 100, "offset": 0}, # order_by="created_at", # order_dir="desc", # filters={"source": service}, # ) # if query_user_data_sources_response.results: # data_source_id = query_user_data_sources_response.results[0].id # sync_google_drive(carbon, data_source_id) # st.session_state['current_data_source'] = data_source_id # st.session_state['oauth_fetched'] = False # st.success("Synced successfully! Now you can list the files.") # else: # st.error("No data sources found. Please ensure the connection was successful.") # except Exception as e: # st.error(f"An error occurred: {e}") # # List Files in Data Source # st.write(f'## List Files in {service}') # data_source_id = st.text_input('Enter Data Source ID (leave blank to list all files)') # if st.button('List Files'): # with st.spinner('Fetching files...'): # try: # if 'current_data_source' in st.session_state and st.session_state['current_data_source']: # data_source_id = st.session_state['current_data_source'] # files = list_files(carbon, data_source_id if data_source_id else None, service) # if files: # st.write(f"Files in {service}:") # for item in files: # st.write(f"File ID: {item.id}, File Name: {item.name}, File Size: {item.size if hasattr(item, 'size') else 'N/A'}, Last Modified: {item.last_modified if hasattr(item, 'last_modified') else 'N/A'}") # else: # st.write("No files found.") # except Exception as e: # st.error(f"An error occurred: {e}") # # List All Files # st.write('### List All Files') # if st.button('List All Files'): # with st.spinner('Fetching all files...'): # try: # if 'current_data_source' in st.session_state and st.session_state['current_data_source']: # data_source_id = st.session_state['current_data_source'] # all_files = list_all_files(carbon, data_source_id) # if all_files: # st.write("All files:") # for i, document in enumerate(all_files): # file_id = document['id'] # st.write(f"File ID: {document['id']}, File Name: {document['name']}") # else: # st.write("No files found.") # except Exception as e: # st.error(f"An error occurred: {e}") # # Search in the Connected Data Source # st.write('### Search in the Connected Data Source') # query = st.text_input("Enter your query:", value="Type here...") # if st.button('Search'): # if query: # with st.spinner('Searching...'): # try: # all_files = list_all_files(carbon, data_source_id) # url = "https://api.carbon.ai/embeddings" # payload = { # "query": query, # "k": 2, # "file_ids": file_id, # "include_all_children": True, # "tags": {}, # "include_tags": True, # "include_vectors": True, # "include_raw_file": True, # "hybrid_search": False, # "media_type": "TEXT", # "embedding_model": "OPENAI" # } # headers = { # "authorization": f"Bearer {CARBON_API_KEY}", # "customer-id": CUSTOMER_ID, # "Content-Type": "application/json" # } # response_search = requests.post(url, json=payload, headers=headers) # response_search_chunks = json.loads(response_search.text) # st.write("Search results:") # for i, doc in enumerate(response_search_chunks['documents']): # st.write(f"Document {i+1}:") # st.write(f"Content: {doc['content']}") # st.write(f"Source: {doc['source']}") # st.write(f"Match Percentage: {doc['score'] * 100}%") # if 'file_url' in doc: # st.markdown(f"[Download {doc['filename']}]({doc['file_url']})") # st.write("-------------------------------------------------") # except Exception as e: # st.error(f"An error occurred: {e}") # else: # st.write("Please enter a query to search.") # # Display Search History # st.write('## Search History') # if 'search_history' not in st.session_state: # st.session_state['search_history'] = [] # if query and st.button('Add to Search History'): # st.session_state['search_history'].append(query) # if st.session_state['search_history']: # st.write("Past Searches:") # for past_query in st.session_state['search_history']: # st.write(past_query) # # Call the main function # if __name__ == '__main__': # main()