Update helper.py
Browse files
helper.py
CHANGED
@@ -47,22 +47,19 @@ def encode_query(query: Union[str, Image.Image]) -> torch.Tensor:
|
|
47 |
|
48 |
def load_hf_datasets(dataset_name):
|
49 |
"""
|
50 |
-
Load
|
51 |
-
|
52 |
dataset_name: str - name of dataset on Hugging Face
|
53 |
-
|
54 |
-
|
|
|
55 |
"""
|
56 |
dataset = load_dataset(f"quasara-io/{dataset_name}")
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
df_list = [dataset[split].to_pandas() for split in main_splits]
|
63 |
-
combined_df = pd.concat(df_list, ignore_index=True)
|
64 |
-
|
65 |
-
return combined_df
|
66 |
|
67 |
def get_image_vectors(df):
|
68 |
# Get the image vectors from the dataframe
|
|
|
47 |
|
48 |
def load_hf_datasets(dataset_name):
|
49 |
"""
|
50 |
+
Load Datasets from Hugging Face as DF
|
51 |
+
---------------------------------------
|
52 |
dataset_name: str - name of dataset on Hugging Face
|
53 |
+
---------------------------------------
|
54 |
+
|
55 |
+
RETURNS: dataset as pandas dataframe
|
56 |
"""
|
57 |
dataset = load_dataset(f"quasara-io/{dataset_name}")
|
58 |
+
# Access only the 'Main' split
|
59 |
+
main_dataset = dataset['Main_1']
|
60 |
+
# Convert to Pandas DataFrame
|
61 |
+
df = main_dataset.to_pandas()
|
62 |
+
return df
|
|
|
|
|
|
|
|
|
63 |
|
64 |
def get_image_vectors(df):
|
65 |
# Get the image vectors from the dataframe
|