inie2003 commited on
Commit
42c8d90
1 Parent(s): 81bf6cd

Update helper.py

Browse files
Files changed (1) hide show
  1. helper.py +10 -13
helper.py CHANGED
@@ -47,22 +47,19 @@ def encode_query(query: Union[str, Image.Image]) -> torch.Tensor:
47
 
48
  def load_hf_datasets(dataset_name):
49
  """
50
- Load all splits containing 'Main' from a Hugging Face dataset as a DataFrame
51
- ---------------------------------------------------------------------------
52
  dataset_name: str - name of dataset on Hugging Face
53
- ---------------------------------------------------------------------------
54
- RETURNS: concatenated dataset as a pandas DataFrame
 
55
  """
56
  dataset = load_dataset(f"quasara-io/{dataset_name}")
57
-
58
- # Filter splits that contain the word 'Main'
59
- main_splits = [split for split in dataset if 'Main' in split]
60
-
61
- # Load and concatenate all splits containing 'Main' into a single DataFrame
62
- df_list = [dataset[split].to_pandas() for split in main_splits]
63
- combined_df = pd.concat(df_list, ignore_index=True)
64
-
65
- return combined_df
66
 
67
  def get_image_vectors(df):
68
  # Get the image vectors from the dataframe
 
47
 
48
  def load_hf_datasets(dataset_name):
49
  """
50
+ Load Datasets from Hugging Face as DF
51
+ ---------------------------------------
52
  dataset_name: str - name of dataset on Hugging Face
53
+ ---------------------------------------
54
+
55
+ RETURNS: dataset as pandas dataframe
56
  """
57
  dataset = load_dataset(f"quasara-io/{dataset_name}")
58
+ # Access only the 'Main' split
59
+ main_dataset = dataset['Main_1']
60
+ # Convert to Pandas DataFrame
61
+ df = main_dataset.to_pandas()
62
+ return df
 
 
 
 
63
 
64
  def get_image_vectors(df):
65
  # Get the image vectors from the dataframe