Spaces:
Runtime error
Runtime error
import logging | |
from typing import Dict, List, Optional, Any | |
from datetime import datetime | |
from pydantic import BaseModel, Field | |
from app.schemas.dataset_common import ImpactLevel, DatasetMetrics | |
# Log for this module | |
log = logging.getLogger(__name__) | |
# Supported strategies for dataset combination | |
SUPPORTED_STRATEGIES = ["merge", "intersect", "filter"] | |
class ImpactAssessment(BaseModel): | |
dataset_id: str = Field(..., description="The ID of the dataset being assessed") | |
impact_level: ImpactLevel = Field(..., description="The impact level: low, medium, or high") | |
assessment_method: str = Field( | |
"unknown", | |
description="Method used to determine impact level (e.g., size_based, downloads_and_likes_based)" | |
) | |
metrics: DatasetMetrics = Field( | |
..., | |
description="Metrics used for impact assessment" | |
) | |
thresholds: Dict[str, Dict[str, str]] = Field( | |
{}, | |
description="Thresholds used for determining impact levels (for reference)" | |
) | |
class DatasetInfo(BaseModel): | |
id: str | |
impact_level: Optional[ImpactLevel] = None | |
impact_assessment: Optional[Dict] = None | |
# Add other fields as needed | |
class Config: | |
extra = "allow" # Allow extra fields from the API | |
class DatasetBase(BaseModel): | |
name: str | |
description: Optional[str] = None | |
tags: Optional[List[str]] = None | |
class DatasetCreate(DatasetBase): | |
files: Optional[List[str]] = None | |
class DatasetUpdate(DatasetBase): | |
name: Optional[str] = None # Make fields optional for updates | |
class Dataset(DatasetBase): | |
id: int # or str depending on your ID format | |
owner_id: str # Assuming user IDs are strings | |
created_at: Optional[str] = None | |
updated_at: Optional[str] = None | |
class Config: | |
pass # Removed orm_mode = True since ORM is not used | |
class DatasetCombineRequest(BaseModel): | |
source_datasets: List[str] = Field(..., description="List of dataset IDs to combine") | |
name: str = Field(..., description="Name for the combined dataset") | |
description: Optional[str] = Field(None, description="Description for the combined dataset") | |
combination_strategy: str = Field("merge", description="Strategy to use when combining datasets (e.g., 'merge', 'intersect', 'filter')") | |
filter_criteria: Optional[Dict[str, Any]] = Field(None, description="Criteria for filtering when combining datasets") | |
class CombinedDataset(BaseModel): | |
id: str = Field(..., description="ID of the combined dataset") | |
name: str = Field(..., description="Name of the combined dataset") | |
description: Optional[str] = Field(None, description="Description of the combined dataset") | |
source_datasets: List[str] = Field(..., description="IDs of the source datasets") | |
created_at: datetime = Field(..., description="Creation timestamp") | |
created_by: str = Field(..., description="ID of the user who created this combined dataset") | |
impact_level: Optional[ImpactLevel] = Field(None, description="Calculated impact level of the combined dataset") | |
status: str = Field("processing", description="Status of the dataset combination process") | |
combination_strategy: str = Field(..., description="Strategy used when combining datasets") | |
metrics: Optional[DatasetMetrics] = Field(None, description="Metrics for the combined dataset") | |
storage_bucket_id: Optional[str] = Field(None, description="ID of the storage bucket containing dataset files") | |
storage_folder_path: Optional[str] = Field(None, description="Path to the dataset files within the bucket") | |
class Config: | |
extra = "allow" # Allow extra fields for flexibility | |
__all__ = ["ImpactLevel", "ImpactAssessment", "DatasetInfo", "DatasetMetrics", | |
"Dataset", "DatasetCreate", "DatasetUpdate", "DatasetCombineRequest", "CombinedDataset"] |