File size: 3,383 Bytes
1314bf5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import pandas as pd
import os
from pathlib import Path
from typing import Dict, List, Tuple, Union, Any

def extract_file_dict(folder_path: List[Path]) -> Dict[str, Path]:
    """
    Extract file dictionary from folder path.
    
    Args:
        folder_path: List of Path objects from Gradio file upload
        
    Returns:
        Dictionary mapping filename to full path
    """
    file_dict = {}
    for file in folder_path:
        filepath = file
        filename = filepath.name.split("/")[-1]
        file_dict[filename] = filepath
    return file_dict


def validate_data(file_dict: Dict[str, Path]) -> Tuple[Union[bool, str], str]:
    """
    Validate the uploaded data structure.
    
    Args:
        file_dict: Dictionary of filename to path mappings
        
    Returns:
        Tuple of (validation_result, message)
        validation_result can be:
        - True: Valid data with CSV
        - False: Invalid data
        - "no_csv": Valid but no CSV file
        - "multiple_csv": Valid but multiple CSV files
    """
    # Find CSV file
    csv_files = [fname for fname in file_dict if fname.lower().endswith('.csv')]
    
    # Find image files
    image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']
    image_files = [fname for fname in file_dict if any(fname.lower().endswith(ext) for ext in image_exts)]
    
    if not image_files:
        return False, "No image files found in the folder or subfolders"
    
    # If no CSV or multiple CSVs, we'll proceed with file-based processing
    if len(csv_files) == 0:
        return "no_csv", "No CSV file found. Will extract data from file paths and names."
    elif len(csv_files) > 1:
        return "multiple_csv", "Multiple CSV files found. Will extract data from file paths and names."
    
    # Check if single CSV has required columns
    try:
        df = pd.read_csv(file_dict[csv_files[0]])
        if 'Ground Truth' not in df.columns:
            return False, "CSV file does not contain 'Ground Truth' column"
        if 'Image Name' not in df.columns:
            return False, "CSV file does not contain 'Image Name' column"
    except Exception as e:
        return False, f"Error reading CSV file: {str(e)}"
    
    return True, "Data validation successful"


def extract_binary_output(
    model_output: str, 
    ground_truth: str = "", 
    all_ground_truths: List[str] = None
) -> str:
    """
    Extract binary output from model response based on unique ground truth keywords.
    
    Args:
        model_output: The model's text response
        ground_truth: Current item's ground truth (for fallback)
        all_ground_truths: List of all ground truth values to extract unique keywords
    
    Returns:
        Extracted keyword that best matches the model output
    """
    if all_ground_truths is None:
        all_ground_truths = []
        
    # Unique lowercase keywords
    unique_keywords = sorted({str(gt).strip().lower() for gt in all_ground_truths if gt})
    
    # Take only the first line of model output
    first_line = model_output.split("\n", 1)[0].lower()

    print(f"DEBUG: Unique keywords extracted: {first_line}")
    print(f"DEBUG: Model output: {model_output[:100]}...")  # First 100 chars
    
    for keyword in unique_keywords:
        if keyword in first_line:
            return keyword
    
    return "Enter the output manually"