Spaces:
Running
Running
import sys | |
import os | |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) | |
def list_txt_files(root_dir): | |
"""Recursively lists all .txt files in a directory.""" | |
txt_files = [] | |
for dirpath, _, filenames in os.walk(root_dir): | |
for file in filenames: | |
if file.endswith(".txt"): | |
full_path = os.path.join(dirpath, file) | |
txt_files.append(full_path) | |
return txt_files | |
def label_file(filepath): | |
""" | |
Assigns label based on filename prefix: | |
- 'sta-' => 0 (pristine) | |
- 'wea-' => 1 (weathered) | |
Returns None if prefix is unknown. | |
""" | |
filename = os.path.basename(filepath).lower() | |
if filename.startswith("sta-"): | |
return 0 | |
elif filename.startswith("wea-"): | |
return 1 | |
else: | |
return None # Unknown or irrelevant | |
if __name__ == "__main__": | |
dataset_dir = os.path.join( | |
"datasets", "rdwp", | |
"A Raman database of microplastics weathered under natural environments" | |
) | |
txt_paths = list_txt_files(dataset_dir) | |
print(f"Found {len(txt_paths)} .txt files.") | |
print("Sample Files: ") | |
for path in txt_paths[:5]: | |
print(" -", path) | |
labeled_files = [] | |
for path in txt_paths: | |
label = label_file(path) | |
if label is not None: | |
labeled_files.append((path, label)) | |
print(f"\nLabeled {len(labeled_files)} files:") | |
for path, label in labeled_files[:5]: | |
print(f" - {os.path.basename(path)} => Label: {label}") | |