Spaces:
Build error
Build error
Update helper.py
Browse files
helper.py
CHANGED
|
@@ -47,22 +47,19 @@ def encode_query(query: Union[str, Image.Image]) -> torch.Tensor:
|
|
| 47 |
|
| 48 |
def load_hf_datasets(dataset_name):
|
| 49 |
"""
|
| 50 |
-
Load
|
| 51 |
-
|
| 52 |
dataset_name: str - name of dataset on Hugging Face
|
| 53 |
-
|
| 54 |
-
|
|
|
|
| 55 |
"""
|
| 56 |
dataset = load_dataset(f"quasara-io/{dataset_name}")
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
df_list = [dataset[split].to_pandas() for split in main_splits]
|
| 63 |
-
combined_df = pd.concat(df_list, ignore_index=True)
|
| 64 |
-
|
| 65 |
-
return combined_df
|
| 66 |
|
| 67 |
def get_image_vectors(df):
|
| 68 |
# Get the image vectors from the dataframe
|
|
|
|
| 47 |
|
| 48 |
def load_hf_datasets(dataset_name):
|
| 49 |
"""
|
| 50 |
+
Load Datasets from Hugging Face as DF
|
| 51 |
+
---------------------------------------
|
| 52 |
dataset_name: str - name of dataset on Hugging Face
|
| 53 |
+
---------------------------------------
|
| 54 |
+
|
| 55 |
+
RETURNS: dataset as pandas dataframe
|
| 56 |
"""
|
| 57 |
dataset = load_dataset(f"quasara-io/{dataset_name}")
|
| 58 |
+
# Access only the 'Main' split
|
| 59 |
+
main_dataset = dataset['Main_1']
|
| 60 |
+
# Convert to Pandas DataFrame
|
| 61 |
+
df = main_dataset.to_pandas()
|
| 62 |
+
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
def get_image_vectors(df):
|
| 65 |
# Get the image vectors from the dataframe
|