Spaces:

iroy99
/

dataset-tool

Runtime error

App Files Files Community

dataset-tool / app /schemas /dataset.py

iaroy

Deploy full application code

fdc5d7a 24 days ago

raw

history blame contribute delete

3.82 kB

	import logging
	from typing import Dict, List, Optional, Any
	from datetime import datetime
	from pydantic import BaseModel, Field

	from app.schemas.dataset_common import ImpactLevel, DatasetMetrics

	# Log for this module
	log = logging.getLogger(__name__)

	# Supported strategies for dataset combination
	SUPPORTED_STRATEGIES = ["merge", "intersect", "filter"]

	class ImpactAssessment(BaseModel):
	dataset_id: str = Field(..., description="The ID of the dataset being assessed")
	impact_level: ImpactLevel = Field(..., description="The impact level: low, medium, or high")
	assessment_method: str = Field(
	"unknown",
	description="Method used to determine impact level (e.g., size_based, downloads_and_likes_based)"
	)
	metrics: DatasetMetrics = Field(
	...,
	description="Metrics used for impact assessment"
	)
	thresholds: Dict[str, Dict[str, str]] = Field(
	{},
	description="Thresholds used for determining impact levels (for reference)"
	)

	class DatasetInfo(BaseModel):
	id: str
	impact_level: Optional[ImpactLevel] = None
	impact_assessment: Optional[Dict] = None
	# Add other fields as needed
	class Config:
	extra = "allow" # Allow extra fields from the API

	class DatasetBase(BaseModel):
	name: str
	description: Optional[str] = None
	tags: Optional[List[str]] = None

	class DatasetCreate(DatasetBase):
	files: Optional[List[str]] = None

	class DatasetUpdate(DatasetBase):
	name: Optional[str] = None # Make fields optional for updates

	class Dataset(DatasetBase):
	id: int # or str depending on your ID format
	owner_id: str # Assuming user IDs are strings
	created_at: Optional[str] = None
	updated_at: Optional[str] = None
	class Config:
	pass # Removed orm_mode = True since ORM is not used

	class DatasetCombineRequest(BaseModel):
	source_datasets: List[str] = Field(..., description="List of dataset IDs to combine")
	name: str = Field(..., description="Name for the combined dataset")
	description: Optional[str] = Field(None, description="Description for the combined dataset")
	combination_strategy: str = Field("merge", description="Strategy to use when combining datasets (e.g., 'merge', 'intersect', 'filter')")
	filter_criteria: Optional[Dict[str, Any]] = Field(None, description="Criteria for filtering when combining datasets")

	class CombinedDataset(BaseModel):
	id: str = Field(..., description="ID of the combined dataset")
	name: str = Field(..., description="Name of the combined dataset")
	description: Optional[str] = Field(None, description="Description of the combined dataset")
	source_datasets: List[str] = Field(..., description="IDs of the source datasets")
	created_at: datetime = Field(..., description="Creation timestamp")
	created_by: str = Field(..., description="ID of the user who created this combined dataset")
	impact_level: Optional[ImpactLevel] = Field(None, description="Calculated impact level of the combined dataset")
	status: str = Field("processing", description="Status of the dataset combination process")
	combination_strategy: str = Field(..., description="Strategy used when combining datasets")
	metrics: Optional[DatasetMetrics] = Field(None, description="Metrics for the combined dataset")
	storage_bucket_id: Optional[str] = Field(None, description="ID of the storage bucket containing dataset files")
	storage_folder_path: Optional[str] = Field(None, description="Path to the dataset files within the bucket")
	class Config:
	extra = "allow" # Allow extra fields for flexibility

	__all__ = ["ImpactLevel", "ImpactAssessment", "DatasetInfo", "DatasetMetrics",
	"Dataset", "DatasetCreate", "DatasetUpdate", "DatasetCombineRequest", "CombinedDataset"]