polymer-aging-ml / scripts /list_spectra.py
devjas1
Initial migration from original polymer_project
e484a46
"""
list_spectra.py
This script provides functionality to recursively list all `.txt` files
within a specified directory. It is designed to assist in managing and
exploring datasets, particularly for Raman spectrum data stored in text files.
Functions:
- list_txt_files(root_dir): Recursively finds and returns a list of all `.txt`
files in the given directory.
Usage:
- The script can be executed directly to list `.txt` files in a predefined
dataset directory and print a summary, including the total count and a
sample of file paths.
Example:
$ python list_spectra.py
Found 100 .txt files.
Sample Files:
- datasets/rdwp/.../file1.txt
- datasets/rdwp/.../file2.txt
"""
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
def list_txt_files(root_dir):
"""Recursively lists all .txt files in a directory."""
txt_files = []
for dirpath, _, filenames in os.walk(root_dir):
for file in filenames:
if file.endswith(".txt"):
full_path = os.path.join(dirpath, file)
txt_files.append(full_path)
return txt_files
def label_file(filepath):
"""
Assigns label based on filename prefix:
- 'sta-' => 0 (pristine)
- 'wea-' => 1 (weathered)
Returns None if prefix is unknown.
"""
filename = os.path.basename(filepath).lower()
if filename.startswith("sta-"):
return 0
elif filename.startswith("wea-"):
return 1
else:
return None # Unknown or irrelevant
if __name__ == "__main__":
dataset_dir = os.path.join(
"datasets", "rdwp",
"A Raman database of microplastics weathered under natural environments"
)
txt_paths = list_txt_files(dataset_dir)
print(f"Found {len(txt_paths)} .txt files.")
print("Sample Files: ")
for path in txt_paths[:5]:
print(" -", path)
labeled_files = []
for path in txt_paths:
label = label_file(path)
if label is not None:
labeled_files.append((path, label))
print(f"\nLabeled {len(labeled_files)} files:")
for path, label in labeled_files[:5]:
print(f" - {os.path.basename(path)} => Label: {label}")