polymer-aging-ml / scripts /discover_raman_files.py
devjas1
Initial migration from original polymer_project
e484a46
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
def list_txt_files(root_dir):
"""Recursively lists all .txt files in a directory."""
txt_files = []
for dirpath, _, filenames in os.walk(root_dir):
for file in filenames:
if file.endswith(".txt"):
full_path = os.path.join(dirpath, file)
txt_files.append(full_path)
return txt_files
def label_file(filepath):
"""
Assigns label based on filename prefix:
- 'sta-' => 0 (pristine)
- 'wea-' => 1 (weathered)
Returns None if prefix is unknown.
"""
filename = os.path.basename(filepath).lower()
if filename.startswith("sta-"):
return 0
elif filename.startswith("wea-"):
return 1
else:
return None # Unknown or irrelevant
if __name__ == "__main__":
dataset_dir = os.path.join(
"datasets", "rdwp",
"A Raman database of microplastics weathered under natural environments"
)
txt_paths = list_txt_files(dataset_dir)
print(f"Found {len(txt_paths)} .txt files.")
print("Sample Files: ")
for path in txt_paths[:5]:
print(" -", path)
labeled_files = []
for path in txt_paths:
label = label_file(path)
if label is not None:
labeled_files.append((path, label))
print(f"\nLabeled {len(labeled_files)} files:")
for path, label in labeled_files[:5]:
print(f" - {os.path.basename(path)} => Label: {label}")