Spaces:

quasara-io
/

Semantic-Search-Frontend

Running

App Files Files Community

Semantic-Search-Frontend / app.py

inie2003

added major tom NA to interactive frontend

713c9c7 verified about 19 hours ago

raw

history blame contribute delete

8.09 kB

	import streamlit as st
	from helper import (
	load_dataset, search, get_file_paths,
	get_cordinates, get_images_from_s3_to_display,
	get_images_with_bounding_boxes_from_s3, load_dataset_with_limit
	)
	import os
	import time


	# Load environment variables
	AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
	AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")

	# Predefined list of datasets
	datasets = ["MajorTom-Germany", "MajorTom-Netherlands","MajorTom-North-America", "MajorTom-Europe","WayveScenes"]
	folder_path_dict = {
	"WayveScenes": "",
	"MajorTom-Germany": "MajorTOM-DE/",
	"MajorTom-Netherlands": "MajorTOM-NL/",
	"MajorTom-Europe": "MajorTom-Europe/",
	"MajorTom-North-America" : "MajorTom-NA_66b587ece7b433ff03455227_66b589a3c70d86c8306cdf86_85f3b0d0/",
	"MajorTom-UK" :""
	}
	description = {
	"WayveScenes": "A large-scale dataset featuring diverse urban driving scenes, captured from vehicles to advance AI perception and navigation in complex environments.",
	"MajorTom-Germany": "A geospatial dataset containing satellite imagery from across Germany, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics.",
	"MajorTom-Netherlands": "A geospatial dataset containing satellite imagery from across Netherlands, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics.",
	"MajorTom-UK" :"A geospatial dataset containing satellite imagery from across the United Kingdom, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics.",
	"MajorTom-North-America" :"A geospatial dataset containing satellite imagery from across Europe, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics.",
	"MajorTom-Europe" :"A geospatial dataset containing satellite imagery from across Europe, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics."
	}
	selection = {
	'WayveScenes': [1, 10], #Is there problem?
	"MajorTom-Germany": [1, 1],
	"MajorTom-Netherlands": [1,1],
	"MajorTom-UK": [1,1],
	"MajorTom-North-America": [1,4],
	"MajorTom-Europe": [1,19]
	}

	example_queries = {
	'WayveScenes': "Parking Signs, Pedestrian Crossing, Traffic Light (Red, Green, Orange)",
	"MajorTom-Germany": "Airports, Golf Courses, Wind Mills, Solar Panels ",
	"MajorTom-Netherlands": "Airports, Golf Courses, Wind Mills, Solar Panels ",
	"MajorTom-UK": "Airports, Golf Courses, Wind Mills, Solar Panels ",
	"MajorTom-Europe": "Airports, Golf Courses, Wind Mills, Solar Panels ",
	"MajorTom-North-America": "Airports, Golf Courses, Wind Mills, Solar Panels "
	}


	# AWS S3 bucket name
	bucket_name = "datasets-quasara-io"



	# Streamlit App
	def main():
	# Initialize session state variables if not already initialized
	if 'search_in_small_objects' not in st.session_state:
	st.session_state.search_in_small_objects = False

	if 'dataset_number' not in st.session_state:
	st.session_state.dataset_number = 1

	if 'df' not in st.session_state:
	st.session_state.df = None

	st.title("Semantic Search and Image Display")

	# Select dataset from dropdown
	dataset_name = st.selectbox("Select Dataset", datasets)
	st.session_state.df = None
	#For Loading from Box
	folder_path = folder_path_dict[dataset_name]

	st.caption(description[dataset_name])

	if st.checkbox("Enable Small Object Search", value=st.session_state.search_in_small_objects):
	st.session_state.search_in_small_objects = True
	st.text("Small Object Search Enabled")
	st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][1] + 1)))
	st.session_state.df = None
	st.text(f"You have selected Split Dataset {st.session_state.dataset_number}")
	else:
	st.session_state.search_in_small_objects = False
	st.text("Small Object Search Disabled")
	st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][0] + 1)))
	st.session_state.df = None
	st.text(f"You have selected Main Dataset {st.session_state.dataset_number}")

	df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=1)
	dataset_limit = st.slider("Size of Dataset to be searched from", min_value=0, max_value=min(total_rows, 80000), value=int(min(total_rows, 80000)/2))
	st.text(f'The smaller the dataset the faster the search will work.')

	# Load dataset with limit only if not already loaded

	try:
	loading_dataset_text = st.empty()
	loading_dataset_text.text("Loading Dataset...")
	loading_dataset_bar = st.progress(0)


	# Simulate dataset loading progress
	for i in range(0, 100, 25):
	time.sleep(0.2) # Simulate work being done
	loading_dataset_bar.progress(i + 25)

	# Load dataset
	df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=dataset_limit)

	# Store loaded dataset in session state
	st.session_state.df = df
	loading_dataset_bar.progress(100)
	loading_dataset_text.text("Dataset loaded successfully!")
	st.success(f"Dataset loaded successfully with {len(df)} rows.")


	except Exception as e:
	st.error(f"Failed to load dataset: {e}")


	# Input search query
	query = st.text_input("Enter your search query")
	st.text(f"Example Queries for your Dataset: {example_queries[dataset_name]}")
	# Number of results to display
	limit = st.number_input("Number of results to display", min_value=1, max_value=10, value=10)

	# Search button
	if st.button("Search"):
	# Validate input
	if not query:
	st.warning("Please enter a search query.")
	else:
	try:
	# Progress bar for search
	search_loading_text = st.empty()
	search_loading_text.text("Searching...")
	search_progress_bar = st.progress(0)

	# Perform search on the loaded dataset from session state
	df = st.session_state.df
	if st.session_state.search_in_small_objects:
	results = search(query, df, limit)
	top_k_paths = get_file_paths(df, results)
	top_k_cordinates = get_cordinates(df, results)
	search_type = 'Splits'
	else:
	# Normal Search
	results = search(query, df, limit)
	top_k_paths = get_file_paths(df, results)
	search_type = 'Main'

	# Complete the search progress
	search_progress_bar.progress(100)
	search_loading_text.text(f"Search completed among {dataset_limit} rows for {dataset_name} in {search_type} {st.session_state.dataset_number}")

	# Load Images with Bounding Boxes if applicable
	if st.session_state.search_in_small_objects and top_k_paths and top_k_cordinates:
	get_images_with_bounding_boxes_from_s3(bucket_name, top_k_paths, top_k_cordinates, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
	elif not st.session_state.search_in_small_objects and top_k_paths:
	st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
	get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)

	else:
	st.write("No results found.")


	except Exception as e:
	st.error(f"Search failed: {e}")

	if __name__ == "__main__":
	main()