dataset-tool / app /schemas /dataset.py
iaroy's picture
Deploy full application code
fdc5d7a
import logging
from typing import Dict, List, Optional, Any
from datetime import datetime
from pydantic import BaseModel, Field
from app.schemas.dataset_common import ImpactLevel, DatasetMetrics
# Log for this module
log = logging.getLogger(__name__)
# Supported strategies for dataset combination
SUPPORTED_STRATEGIES = ["merge", "intersect", "filter"]
class ImpactAssessment(BaseModel):
dataset_id: str = Field(..., description="The ID of the dataset being assessed")
impact_level: ImpactLevel = Field(..., description="The impact level: low, medium, or high")
assessment_method: str = Field(
"unknown",
description="Method used to determine impact level (e.g., size_based, downloads_and_likes_based)"
)
metrics: DatasetMetrics = Field(
...,
description="Metrics used for impact assessment"
)
thresholds: Dict[str, Dict[str, str]] = Field(
{},
description="Thresholds used for determining impact levels (for reference)"
)
class DatasetInfo(BaseModel):
id: str
impact_level: Optional[ImpactLevel] = None
impact_assessment: Optional[Dict] = None
# Add other fields as needed
class Config:
extra = "allow" # Allow extra fields from the API
class DatasetBase(BaseModel):
name: str
description: Optional[str] = None
tags: Optional[List[str]] = None
class DatasetCreate(DatasetBase):
files: Optional[List[str]] = None
class DatasetUpdate(DatasetBase):
name: Optional[str] = None # Make fields optional for updates
class Dataset(DatasetBase):
id: int # or str depending on your ID format
owner_id: str # Assuming user IDs are strings
created_at: Optional[str] = None
updated_at: Optional[str] = None
class Config:
pass # Removed orm_mode = True since ORM is not used
class DatasetCombineRequest(BaseModel):
source_datasets: List[str] = Field(..., description="List of dataset IDs to combine")
name: str = Field(..., description="Name for the combined dataset")
description: Optional[str] = Field(None, description="Description for the combined dataset")
combination_strategy: str = Field("merge", description="Strategy to use when combining datasets (e.g., 'merge', 'intersect', 'filter')")
filter_criteria: Optional[Dict[str, Any]] = Field(None, description="Criteria for filtering when combining datasets")
class CombinedDataset(BaseModel):
id: str = Field(..., description="ID of the combined dataset")
name: str = Field(..., description="Name of the combined dataset")
description: Optional[str] = Field(None, description="Description of the combined dataset")
source_datasets: List[str] = Field(..., description="IDs of the source datasets")
created_at: datetime = Field(..., description="Creation timestamp")
created_by: str = Field(..., description="ID of the user who created this combined dataset")
impact_level: Optional[ImpactLevel] = Field(None, description="Calculated impact level of the combined dataset")
status: str = Field("processing", description="Status of the dataset combination process")
combination_strategy: str = Field(..., description="Strategy used when combining datasets")
metrics: Optional[DatasetMetrics] = Field(None, description="Metrics for the combined dataset")
storage_bucket_id: Optional[str] = Field(None, description="ID of the storage bucket containing dataset files")
storage_folder_path: Optional[str] = Field(None, description="Path to the dataset files within the bucket")
class Config:
extra = "allow" # Allow extra fields for flexibility
__all__ = ["ImpactLevel", "ImpactAssessment", "DatasetInfo", "DatasetMetrics",
"Dataset", "DatasetCreate", "DatasetUpdate", "DatasetCombineRequest", "CombinedDataset"]