File size: 1,529 Bytes
e484a46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))


def list_txt_files(root_dir):
    """Recursively lists all .txt files in a directory."""
    txt_files = []
    for dirpath, _, filenames in os.walk(root_dir):
        for file in filenames:
            if file.endswith(".txt"):
                full_path = os.path.join(dirpath, file)
                txt_files.append(full_path)
    return txt_files


def label_file(filepath):
    """
    Assigns label based on filename prefix:
    - 'sta-' => 0 (pristine)
    - 'wea-' => 1 (weathered)
    Returns None if prefix is unknown.
    """
    filename = os.path.basename(filepath).lower()
    if filename.startswith("sta-"):
        return 0
    elif filename.startswith("wea-"):
        return 1
    else:
        return None  # Unknown or irrelevant


if __name__ == "__main__":
    dataset_dir = os.path.join(
        "datasets", "rdwp",
        "A Raman database of microplastics weathered under natural environments"
    )

    txt_paths = list_txt_files(dataset_dir)

    print(f"Found {len(txt_paths)} .txt files.")
    print("Sample Files: ")
    for path in txt_paths[:5]:
        print(" -", path)

    labeled_files = []
    for path in txt_paths:
        label = label_file(path)
        if label is not None:
            labeled_files.append((path, label))

    print(f"\nLabeled {len(labeled_files)} files:")
    for path, label in labeled_files[:5]:
        print(f" - {os.path.basename(path)} => Label: {label}")