Spaces:

MinhDS
/

Fine-tuned-PaddleOCRv5

Running

App Files Files Community

Fine-tuned-PaddleOCRv5 / fine_tune_text_recognition.py

MinhDS

initial commit

eddf5b2 verified 4 months ago

raw

history blame contribute delete

18.7 kB

	#!/usr/bin/env python3
	"""
	Fine-tuning Script for PaddleOCR Text Recognition Models
	Based on the Text Recognition Module Tutorial

	This script provides a complete pipeline for fine-tuning text recognition models:
	1. Dataset preparation and validation
	2. Model training with custom configurations
	3. Model evaluation
	4. Model export for inference

	Supported models: PP-OCRv5_server_rec, PP-OCRv5_mobile_rec, PP-OCRv4_server_rec, etc.
	"""

	import os
	import sys
	import argparse
	import yaml
	import wget
	import tarfile
	import subprocess
	from pathlib import Path
	import logging

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	class TextRecognitionFineTuner:
	def __init__(self, config_path=None, model_name="PP-OCRv5_server_rec", work_dir="./work_dir"):
	"""
	Initialize the fine-tuner

	Args:
	config_path: Path to custom config file
	model_name: Name of the model to fine-tune
	work_dir: Working directory for outputs
	"""
	self.model_name = model_name
	self.work_dir = Path(work_dir)
	self.work_dir.mkdir(exist_ok=True)

	# Model configurations mapping
	self.model_configs = {
	"PP-OCRv5_server_rec": {
	"config": "configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml",
	"pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv5_server_rec_pretrained.pdparams"
	},
	"PP-OCRv5_mobile_rec": {
	"config": "configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml",
	"pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv5_mobile_rec_pretrained.pdparams"
	},
	"PP-OCRv4_server_rec": {
	"config": "configs/rec/PP-OCRv4/PP-OCRv4_server_rec.yml",
	"pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv4_server_rec_pretrained.pdparams"
	},
	"PP-OCRv4_mobile_rec": {
	"config": "configs/rec/PP-OCRv4/PP-OCRv4_mobile_rec.yml",
	"pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv4_mobile_rec_pretrained.pdparams"
	}
	}

	self.config_path = config_path or self.model_configs[model_name]["config"]
	self.pretrained_path = self.work_dir / f"{model_name}_pretrained.pdparams"

	def prepare_demo_dataset(self):
	"""Download and prepare demo dataset"""
	logger.info("Preparing demo dataset...")

	dataset_url = "https://paddle-model-ecology.bj.bcebos.com/paddlex/data/ocr_rec_dataset_examples.tar"
	dataset_path = self.work_dir / "ocr_rec_dataset_examples.tar"

	if not dataset_path.exists():
	logger.info(f"Downloading dataset from {dataset_url}...")
	wget.download(dataset_url, str(dataset_path))

	# Extract dataset
	extract_path = self.work_dir / "dataset"
	if not extract_path.exists():
	logger.info("Extracting dataset...")
	with tarfile.open(dataset_path, 'r') as tar:
	tar.extractall(self.work_dir)

	# Rename extracted folder
	extracted_folder = self.work_dir / "ocr_rec_dataset_examples"
	if extracted_folder.exists():
	extracted_folder.rename(extract_path)

	logger.info(f"Dataset prepared at {extract_path}")
	return extract_path

	def download_pretrained_model(self):
	"""Download pretrained model weights"""
	if self.pretrained_path.exists():
	logger.info(f"Pretrained model already exists at {self.pretrained_path}")
	return self.pretrained_path

	logger.info(f"Downloading pretrained model for {self.model_name}...")
	pretrained_url = self.model_configs[self.model_name]["pretrained_url"]

	wget.download(pretrained_url, str(self.pretrained_path))
	logger.info(f"Pretrained model downloaded to {self.pretrained_path}")
	return self.pretrained_path

	def create_custom_config(self, dataset_path, custom_params=None):
	"""
	Create custom training configuration

	Args:
	dataset_path: Path to training dataset
	custom_params: Dictionary of custom parameters to override
	"""
	logger.info("Creating custom configuration...")

	# Default custom parameters
	default_params = {
	"Global": {
	"epoch_num": 20,
	"log_smooth_window": 20,
	"print_batch_step": 10,
	"save_model_dir": str(self.work_dir / "output"),
	"save_epoch_step": 5,
	"eval_batch_step": [0, 2000],
	"cal_metric_during_train": True,
	"pretrained_model": str(self.pretrained_path),
	"checkpoints": None,
	"use_visualdl": False,
	"infer_img": str(dataset_path / "test_imgs"),
	"character_dict_path": str(dataset_path / "character_dict.txt"),
	"character_type": "ch",
	"max_text_length": 25,
	"infer_mode": False,
	"use_space_char": True,
	"distributed": False,
	"save_res_path": str(self.work_dir / "output" / "predicts_rec.txt")
	},
	"Train": {
	"dataset": {
	"name": "SimpleDataSet",
	"data_dir": str(dataset_path),
	"label_file_list": [str(dataset_path / "train_list.txt")],
	"transforms": [
	{"DecodeImage": {"img_mode": "BGR", "channel_first": False}},
	{"RecConAug": {"prob": 0.5, "ext_data_num": 2, "image_shape": [48, 320, 3]}},
	{"RecAug": {}},
	{"MultiLabelEncode": {}},
	{"RecResizeImg": {"image_shape": [3, 48, 320]}},
	{"KeepKeys": {"keep_keys": ["image", "label_list", "length"]}}
	]
	},
	"loader": {
	"shuffle": True,
	"batch_size_per_card": 256,
	"drop_last": True,
	"num_workers": 4
	}
	},
	"Eval": {
	"dataset": {
	"name": "SimpleDataSet",
	"data_dir": str(dataset_path),
	"label_file_list": [str(dataset_path / "val_list.txt")],
	"transforms": [
	{"DecodeImage": {"img_mode": "BGR", "channel_first": False}},
	{"MultiLabelEncode": {}},
	{"RecResizeImg": {"image_shape": [3, 48, 320]}},
	{"KeepKeys": {"keep_keys": ["image", "label_list", "length"]}}
	]
	},
	"loader": {
	"shuffle": False,
	"drop_last": False,
	"batch_size_per_card": 256,
	"num_workers": 4
	}
	}
	}

	# Merge with custom parameters
	if custom_params:
	self._deep_update(default_params, custom_params)

	# Save custom config
	custom_config_path = self.work_dir / f"{self.model_name}_custom.yml"
	with open(custom_config_path, 'w', encoding='utf-8') as f:
	yaml.dump(default_params, f, default_flow_style=False, allow_unicode=True)

	logger.info(f"Custom configuration saved to {custom_config_path}")
	return custom_config_path

	def _deep_update(self, base_dict, update_dict):
	"""Recursively update nested dictionary"""
	for key, value in update_dict.items():
	if isinstance(value, dict) and key in base_dict and isinstance(base_dict[key], dict):
	self._deep_update(base_dict[key], value)
	else:
	base_dict[key] = value

	def train(self, config_path, gpus="0", resume_from=None):
	"""
	Train the model

	Args:
	config_path: Path to configuration file
	gpus: GPU IDs to use (e.g., "0" or "0,1,2,3")
	resume_from: Path to checkpoint to resume from
	"""
	logger.info(f"Starting training with GPUs: {gpus}")

	# Prepare training command
	if len(gpus.split(',')) > 1:
	# Multi-GPU training
	cmd = [
	"python3", "-m", "paddle.distributed.launch",
	"--gpus", gpus,
	"tools/train.py",
	"-c", str(config_path)
	]
	else:
	# Single GPU training
	cmd = [
	"python3", "tools/train.py",
	"-c", str(config_path)
	]

	# Add resume option if provided
	if resume_from:
	cmd.extend(["-o", f"Global.checkpoints={resume_from}"])

	# Set environment variable for GPU
	env = os.environ.copy()
	env["CUDA_VISIBLE_DEVICES"] = gpus

	logger.info(f"Training command: {' '.join(cmd)}")

	try:
	result = subprocess.run(cmd, env=env, check=True, capture_output=False)
	logger.info("Training completed successfully!")
	return True
	except subprocess.CalledProcessError as e:
	logger.error(f"Training failed with error: {e}")
	return False

	def evaluate(self, config_path, checkpoint_path, gpus="0"):
	"""
	Evaluate the trained model

	Args:
	config_path: Path to configuration file
	checkpoint_path: Path to model checkpoint
	gpus: GPU IDs to use
	"""
	logger.info(f"Starting evaluation...")

	cmd = [
	"python3", "tools/eval.py",
	"-c", str(config_path),
	"-o", f"Global.pretrained_model={checkpoint_path}"
	]

	# Set environment variable for GPU
	env = os.environ.copy()
	env["CUDA_VISIBLE_DEVICES"] = gpus

	logger.info(f"Evaluation command: {' '.join(cmd)}")

	try:
	result = subprocess.run(cmd, env=env, check=True, capture_output=True, text=True)
	logger.info("Evaluation completed successfully!")
	logger.info(f"Evaluation results:\n{result.stdout}")
	return True
	except subprocess.CalledProcessError as e:
	logger.error(f"Evaluation failed with error: {e}")
	logger.error(f"Error output: {e.stderr}")
	return False

	def export_model(self, config_path, checkpoint_path, output_dir=None):
	"""
	Export trained model for inference

	Args:
	config_path: Path to configuration file
	checkpoint_path: Path to trained model checkpoint
	output_dir: Directory to save exported model
	"""
	if output_dir is None:
	output_dir = self.work_dir / f"{self.model_name}_infer"

	logger.info(f"Exporting model to {output_dir}")

	cmd = [
	"python3", "tools/export_model.py",
	"-c", str(config_path),
	"-o", f"Global.pretrained_model={checkpoint_path}",
	"-o", f"Global.save_inference_dir={output_dir}"
	]

	logger.info(f"Export command: {' '.join(cmd)}")

	try:
	result = subprocess.run(cmd, check=True, capture_output=True, text=True)
	logger.info("Model export completed successfully!")
	logger.info(f"Exported model saved to {output_dir}")

	# List exported files
	if Path(output_dir).exists():
	exported_files = list(Path(output_dir).glob("*"))
	logger.info(f"Exported files: {[f.name for f in exported_files]}")

	return True
	except subprocess.CalledProcessError as e:
	logger.error(f"Model export failed with error: {e}")
	logger.error(f"Error output: {e.stderr}")
	return False

	def run_complete_pipeline(self, custom_params=None, gpus="0", skip_demo_data=False):
	"""
	Run the complete fine-tuning pipeline

	Args:
	custom_params: Custom parameters to override defaults
	gpus: GPU IDs to use
	skip_demo_data: Whether to skip demo data preparation
	"""
	logger.info("=== Starting Complete Fine-tuning Pipeline ===")

	try:
	# Step 1: Prepare dataset
	if not skip_demo_data:
	dataset_path = self.prepare_demo_dataset()
	else:
	dataset_path = Path(custom_params.get("dataset_path", "./dataset")) # Use custom dataset path

	# Step 2: Download pretrained model
	self.download_pretrained_model()

	# Step 3: Create custom configuration
	config_path = self.create_custom_config(dataset_path, custom_params)

	# Step 4: Train model
	logger.info("=== Starting Training ===")
	training_success = self.train(config_path, gpus)

	if not training_success:
	logger.error("Training failed. Stopping pipeline.")
	return False

	# Step 5: Find best checkpoint
	output_dir = self.work_dir / "output"
	checkpoints = list(output_dir.glob("**/best_accuracy.pdparams"))

	if not checkpoints:
	# Try to find latest checkpoint
	checkpoints = list(output_dir.glob("**/latest.pdparams"))

	if not checkpoints:
	logger.error("No checkpoint found for evaluation and export.")
	return False

	best_checkpoint = checkpoints[0]
	logger.info(f"Using checkpoint: {best_checkpoint}")

	# Step 6: Evaluate model
	logger.info("=== Starting Evaluation ===")
	self.evaluate(config_path, best_checkpoint, gpus)

	# Step 7: Export model
	logger.info("=== Starting Model Export ===")
	self.export_model(config_path, best_checkpoint)

	logger.info("=== Complete Pipeline Finished Successfully ===")
	return True

	except Exception as e:
	logger.error(f"Pipeline failed with error: {e}")
	return False


	def main():
	parser = argparse.ArgumentParser(description="Fine-tune PaddleOCR Text Recognition Models")
	parser.add_argument("--model_name", type=str, default="PP-OCRv5_server_rec",
	choices=["PP-OCRv5_server_rec", "PP-OCRv5_mobile_rec",
	"PP-OCRv4_server_rec", "PP-OCRv4_mobile_rec"],
	help="Model name to fine-tune")
	parser.add_argument("--work_dir", type=str, default="./work_dir",
	help="Working directory for outputs")
	parser.add_argument("--gpus", type=str, default="0",
	help="GPU IDs to use (e.g., '0' or '0,1,2,3')")
	parser.add_argument("--config", type=str, default=None,
	help="Path to custom config file")
	parser.add_argument("--skip_demo_data", action="store_true",
	help="Skip demo data preparation (use your own dataset)")
	parser.add_argument("--dataset_path", type=str, default="./dataset",
	help="Path to custom dataset directory")
	parser.add_argument("--mode", type=str, default="complete",
	choices=["complete", "train", "eval", "export"],
	help="Mode to run")
	parser.add_argument("--checkpoint", type=str, default=None,
	help="Checkpoint path for evaluation/export")

	args = parser.parse_args()

	# Initialize fine-tuner
	fine_tuner = TextRecognitionFineTuner(
	config_path=args.config,
	model_name=args.model_name,
	work_dir=args.work_dir
	)

	# Example custom parameters (you can modify these)
	custom_params = {
	"dataset_path": args.dataset_path, # Add dataset path to custom params
	"Global": {
	"epoch_num": 10, # Reduce epochs for faster training
	"save_epoch_step": 2,
	"eval_batch_step": [0, 1000]
	},
	"Train": {
	"loader": {
	"batch_size_per_card": 128 # Reduce batch size if GPU memory is limited
	}
	}
	}

	if args.mode == "complete":
	# Run complete pipeline
	success = fine_tuner.run_complete_pipeline(
	custom_params=custom_params,
	gpus=args.gpus,
	skip_demo_data=args.skip_demo_data
	)
	sys.exit(0 if success else 1)

	elif args.mode == "train":
	# Training only
	if not args.skip_demo_data:
	dataset_path = fine_tuner.prepare_demo_dataset()
	else:
	dataset_path = Path(args.dataset_path)

	fine_tuner.download_pretrained_model()
	config_path = fine_tuner.create_custom_config(dataset_path, custom_params)
	success = fine_tuner.train(config_path, args.gpus)
	sys.exit(0 if success else 1)

	elif args.mode == "eval":
	# Evaluation only
	if not args.checkpoint:
	logger.error("Checkpoint path required for evaluation mode")
	sys.exit(1)
	config_path = args.config or fine_tuner.config_path
	success = fine_tuner.evaluate(config_path, args.checkpoint, args.gpus)
	sys.exit(0 if success else 1)

	elif args.mode == "export":
	# Export only
	if not args.checkpoint:
	logger.error("Checkpoint path required for export mode")
	sys.exit(1)
	config_path = args.config or fine_tuner.config_path
	success = fine_tuner.export_model(config_path, args.checkpoint)
	sys.exit(0 if success else 1)


	if __name__ == "__main__":
	main()