Spaces:

creativeforce
/

attribution-2steps-method

Sleeping

App Files Files Community

attribution-2steps-method / app /services /base.py

thanhnt-cf

fix issues in description

98ef152 4 months ago

raw

history blame contribute delete

6.48 kB

	from abc import ABC, abstractmethod
	from typing import Any, Dict, List, Type, Union

	from pydantic import BaseModel
	from app.utils.converter import to_snake_case

	from app.schemas.schema_tools import (
	convert_attribute_to_model,
	validate_json_data,
	validate_json_schema,
	)


	def cf_style_to_pydantic_percentage_shema(
	cf_style_schema: dict,
	) -> str:
	"""
	Convert CF style schema to Pydantic schema
	"""
	print(f'{cf_style_schema}')
	attributes_line_in_product = []
	values_classes = []
	for attribute, attribute_info in cf_style_schema.items():
	multiple = False
	if "list" in attribute_info.data_type:
	multiple = True
	else:
	multiple = False
	class_name = "Class_" + attribute.capitalize()
	multiple_desc = "multi-label classification" if multiple else "single-label classification"
	attribute_desc = attribute_info.description
	attribute_line = f'{attribute}: {class_name} = Field("", description="{multiple_desc}, {attribute_desc}")'

	class_code = f"""
	class {class_name}(BaseModel):

	"""
	for value in attribute_info.allowed_values:
	class_code += f" {value.lower().replace(' ', '_').replace('-', '_')}: int\n"

	values_classes.append(class_code)
	attributes_line_in_product.append(attribute_line)
	attributes_line = "\n ".join(attributes_line_in_product)
	values_classes_code = "\n".join(values_classes)
	pydantic_schema = f"""
	from pydantic import BaseModel, Field
	{values_classes_code}
	class Product(BaseModel):
	{attributes_line}
	"""
	pydantic_code = pydantic_schema.strip()
	exec(pydantic_code, globals())
	return Product

	def build_attributes_types_prompt(attributes):
	list_of_types_prompt = "\n List of attributes types:\n"
	for key, value in attributes.items():
	list_of_types_prompt += f"- {key}: {value.data_type}\n"
	return list_of_types_prompt


	class BaseAttributionService(ABC):
	@abstractmethod
	async def extract_attributes(
	self,
	attributes_model: Type[BaseModel],
	ai_model: str,
	img_urls: List[str],
	product_taxonomy: str,
	pil_images: List[Any] = None,
	appended_prompt: str = "",
	) -> Dict[str, Any]:
	pass

	@abstractmethod
	async def reevaluate_atributes(
	self,
	attributes_model: Type[BaseModel],
	ai_model: str,
	img_urls: List[str],
	product_taxonomy: str,
	pil_images: List[Any] = None,
	appended_prompt: str = "",
	) -> Dict[str, Any]:
	pass

	@abstractmethod
	async def follow_schema(
	self, schema: Dict[str, Any], data: Dict[str, Any]
	) -> Dict[str, Any]:
	pass

	async def extract_attributes_with_validation(
	self,
	attributes: Dict[str, Any],
	ai_model: str,
	img_urls: List[str],
	product_taxonomy: str,
	product_data: Dict[str, Union[str, List[str]]],
	pil_images: List[Any] = None,
	img_paths: List[str] = None,
	appended_prompt = str
	) -> Dict[str, Any]:
	# validate_json_schema(schema)

	# create mappings for keys of attributes, to make the key following naming convention of python variables
	forward_mapping = {}
	reverse_mapping = {}
	for i, key in enumerate(attributes.keys()):
	forward_mapping[key] = f'{to_snake_case(key)}_{i}'
	reverse_mapping[f'{to_snake_case(key)}_{i}'] = key

	transformed_attributes = {}
	for key, value in attributes.items():
	transformed_attributes[forward_mapping[key]] = value

	attributes_types_prompt = build_attributes_types_prompt(attributes)

	# attributes_model = convert_attribute_to_model(transformed_attributes)
	attributes_percentage_model = cf_style_to_pydantic_percentage_shema(transformed_attributes)
	schema = attributes_percentage_model.model_json_schema()
	data = await self.extract_attributes(
	attributes_percentage_model,
	ai_model,
	img_urls,
	product_taxonomy if product_taxonomy != "" else "main",
	product_data,
	# pil_images=pil_images, # temporarily removed to save cost
	img_paths=img_paths,
	appended_prompt=attributes_types_prompt
	)
	validate_json_data(data, schema)

	str_data = str(data)
	reevaluate_data = await self.reevaluate_atributes(
	attributes_percentage_model,
	ai_model,
	img_urls,
	product_taxonomy if product_taxonomy != "" else "main",
	str_data,
	# pil_images=pil_images, # temporarily removed to save cost
	img_paths=img_paths,
	appended_prompt=attributes_types_prompt
	)

	init_reevaluate_data = {}
	for field_name, field in attributes_percentage_model.model_fields.items(): # type: ignore
	print(f"{field_name}: {field.description}")
	if "single-label" in field.description.lower():
	max_percentage = 0
	for k, v in reevaluate_data[field_name].items():
	if v > max_percentage:
	max_percentage = v
	init_reevaluate_data[field_name] = k
	elif "multi-label" in field.description.lower():
	init_list = []
	for k, v in reevaluate_data[field_name].items():
	if v >= 60:
	init_list.append(k)
	init_reevaluate_data[field_name] = init_list
	else:
	assert False, f"The description does not contain 'single-label' or 'multi-label': {field.description}"

	# reverse the key mapping to the original keys
	reverse_data = {}
	for key, value in init_reevaluate_data.items():
	reverse_data[reverse_mapping[key]] = value
	return data, reverse_data

	async def follow_schema_with_validation(
	self, schema: Dict[str, Any], data: Dict[str, Any]
	) -> Dict[str, Any]:
	validate_json_schema(schema)
	data = await self.follow_schema(schema, data)
	validate_json_data(data, schema)
	return data