Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Fine-tuning Script for PaddleOCR Text Recognition Models | |
| Based on the Text Recognition Module Tutorial | |
| This script provides a complete pipeline for fine-tuning text recognition models: | |
| 1. Dataset preparation and validation | |
| 2. Model training with custom configurations | |
| 3. Model evaluation | |
| 4. Model export for inference | |
| Supported models: PP-OCRv5_server_rec, PP-OCRv5_mobile_rec, PP-OCRv4_server_rec, etc. | |
| """ | |
| import os | |
| import sys | |
| import argparse | |
| import yaml | |
| import wget | |
| import tarfile | |
| import subprocess | |
| from pathlib import Path | |
| import logging | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| class TextRecognitionFineTuner: | |
| def __init__(self, config_path=None, model_name="PP-OCRv5_server_rec", work_dir="./work_dir"): | |
| """ | |
| Initialize the fine-tuner | |
| Args: | |
| config_path: Path to custom config file | |
| model_name: Name of the model to fine-tune | |
| work_dir: Working directory for outputs | |
| """ | |
| self.model_name = model_name | |
| self.work_dir = Path(work_dir) | |
| self.work_dir.mkdir(exist_ok=True) | |
| # Model configurations mapping | |
| self.model_configs = { | |
| "PP-OCRv5_server_rec": { | |
| "config": "configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml", | |
| "pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv5_server_rec_pretrained.pdparams" | |
| }, | |
| "PP-OCRv5_mobile_rec": { | |
| "config": "configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml", | |
| "pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv5_mobile_rec_pretrained.pdparams" | |
| }, | |
| "PP-OCRv4_server_rec": { | |
| "config": "configs/rec/PP-OCRv4/PP-OCRv4_server_rec.yml", | |
| "pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv4_server_rec_pretrained.pdparams" | |
| }, | |
| "PP-OCRv4_mobile_rec": { | |
| "config": "configs/rec/PP-OCRv4/PP-OCRv4_mobile_rec.yml", | |
| "pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv4_mobile_rec_pretrained.pdparams" | |
| } | |
| } | |
| self.config_path = config_path or self.model_configs[model_name]["config"] | |
| self.pretrained_path = self.work_dir / f"{model_name}_pretrained.pdparams" | |
| def prepare_demo_dataset(self): | |
| """Download and prepare demo dataset""" | |
| logger.info("Preparing demo dataset...") | |
| dataset_url = "https://paddle-model-ecology.bj.bcebos.com/paddlex/data/ocr_rec_dataset_examples.tar" | |
| dataset_path = self.work_dir / "ocr_rec_dataset_examples.tar" | |
| if not dataset_path.exists(): | |
| logger.info(f"Downloading dataset from {dataset_url}...") | |
| wget.download(dataset_url, str(dataset_path)) | |
| # Extract dataset | |
| extract_path = self.work_dir / "dataset" | |
| if not extract_path.exists(): | |
| logger.info("Extracting dataset...") | |
| with tarfile.open(dataset_path, 'r') as tar: | |
| tar.extractall(self.work_dir) | |
| # Rename extracted folder | |
| extracted_folder = self.work_dir / "ocr_rec_dataset_examples" | |
| if extracted_folder.exists(): | |
| extracted_folder.rename(extract_path) | |
| logger.info(f"Dataset prepared at {extract_path}") | |
| return extract_path | |
| def download_pretrained_model(self): | |
| """Download pretrained model weights""" | |
| if self.pretrained_path.exists(): | |
| logger.info(f"Pretrained model already exists at {self.pretrained_path}") | |
| return self.pretrained_path | |
| logger.info(f"Downloading pretrained model for {self.model_name}...") | |
| pretrained_url = self.model_configs[self.model_name]["pretrained_url"] | |
| wget.download(pretrained_url, str(self.pretrained_path)) | |
| logger.info(f"Pretrained model downloaded to {self.pretrained_path}") | |
| return self.pretrained_path | |
| def create_custom_config(self, dataset_path, custom_params=None): | |
| """ | |
| Create custom training configuration | |
| Args: | |
| dataset_path: Path to training dataset | |
| custom_params: Dictionary of custom parameters to override | |
| """ | |
| logger.info("Creating custom configuration...") | |
| # Default custom parameters | |
| default_params = { | |
| "Global": { | |
| "epoch_num": 20, | |
| "log_smooth_window": 20, | |
| "print_batch_step": 10, | |
| "save_model_dir": str(self.work_dir / "output"), | |
| "save_epoch_step": 5, | |
| "eval_batch_step": [0, 2000], | |
| "cal_metric_during_train": True, | |
| "pretrained_model": str(self.pretrained_path), | |
| "checkpoints": None, | |
| "use_visualdl": False, | |
| "infer_img": str(dataset_path / "test_imgs"), | |
| "character_dict_path": str(dataset_path / "character_dict.txt"), | |
| "character_type": "ch", | |
| "max_text_length": 25, | |
| "infer_mode": False, | |
| "use_space_char": True, | |
| "distributed": False, | |
| "save_res_path": str(self.work_dir / "output" / "predicts_rec.txt") | |
| }, | |
| "Train": { | |
| "dataset": { | |
| "name": "SimpleDataSet", | |
| "data_dir": str(dataset_path), | |
| "label_file_list": [str(dataset_path / "train_list.txt")], | |
| "transforms": [ | |
| {"DecodeImage": {"img_mode": "BGR", "channel_first": False}}, | |
| {"RecConAug": {"prob": 0.5, "ext_data_num": 2, "image_shape": [48, 320, 3]}}, | |
| {"RecAug": {}}, | |
| {"MultiLabelEncode": {}}, | |
| {"RecResizeImg": {"image_shape": [3, 48, 320]}}, | |
| {"KeepKeys": {"keep_keys": ["image", "label_list", "length"]}} | |
| ] | |
| }, | |
| "loader": { | |
| "shuffle": True, | |
| "batch_size_per_card": 256, | |
| "drop_last": True, | |
| "num_workers": 4 | |
| } | |
| }, | |
| "Eval": { | |
| "dataset": { | |
| "name": "SimpleDataSet", | |
| "data_dir": str(dataset_path), | |
| "label_file_list": [str(dataset_path / "val_list.txt")], | |
| "transforms": [ | |
| {"DecodeImage": {"img_mode": "BGR", "channel_first": False}}, | |
| {"MultiLabelEncode": {}}, | |
| {"RecResizeImg": {"image_shape": [3, 48, 320]}}, | |
| {"KeepKeys": {"keep_keys": ["image", "label_list", "length"]}} | |
| ] | |
| }, | |
| "loader": { | |
| "shuffle": False, | |
| "drop_last": False, | |
| "batch_size_per_card": 256, | |
| "num_workers": 4 | |
| } | |
| } | |
| } | |
| # Merge with custom parameters | |
| if custom_params: | |
| self._deep_update(default_params, custom_params) | |
| # Save custom config | |
| custom_config_path = self.work_dir / f"{self.model_name}_custom.yml" | |
| with open(custom_config_path, 'w', encoding='utf-8') as f: | |
| yaml.dump(default_params, f, default_flow_style=False, allow_unicode=True) | |
| logger.info(f"Custom configuration saved to {custom_config_path}") | |
| return custom_config_path | |
| def _deep_update(self, base_dict, update_dict): | |
| """Recursively update nested dictionary""" | |
| for key, value in update_dict.items(): | |
| if isinstance(value, dict) and key in base_dict and isinstance(base_dict[key], dict): | |
| self._deep_update(base_dict[key], value) | |
| else: | |
| base_dict[key] = value | |
| def train(self, config_path, gpus="0", resume_from=None): | |
| """ | |
| Train the model | |
| Args: | |
| config_path: Path to configuration file | |
| gpus: GPU IDs to use (e.g., "0" or "0,1,2,3") | |
| resume_from: Path to checkpoint to resume from | |
| """ | |
| logger.info(f"Starting training with GPUs: {gpus}") | |
| # Prepare training command | |
| if len(gpus.split(',')) > 1: | |
| # Multi-GPU training | |
| cmd = [ | |
| "python3", "-m", "paddle.distributed.launch", | |
| "--gpus", gpus, | |
| "tools/train.py", | |
| "-c", str(config_path) | |
| ] | |
| else: | |
| # Single GPU training | |
| cmd = [ | |
| "python3", "tools/train.py", | |
| "-c", str(config_path) | |
| ] | |
| # Add resume option if provided | |
| if resume_from: | |
| cmd.extend(["-o", f"Global.checkpoints={resume_from}"]) | |
| # Set environment variable for GPU | |
| env = os.environ.copy() | |
| env["CUDA_VISIBLE_DEVICES"] = gpus | |
| logger.info(f"Training command: {' '.join(cmd)}") | |
| try: | |
| result = subprocess.run(cmd, env=env, check=True, capture_output=False) | |
| logger.info("Training completed successfully!") | |
| return True | |
| except subprocess.CalledProcessError as e: | |
| logger.error(f"Training failed with error: {e}") | |
| return False | |
| def evaluate(self, config_path, checkpoint_path, gpus="0"): | |
| """ | |
| Evaluate the trained model | |
| Args: | |
| config_path: Path to configuration file | |
| checkpoint_path: Path to model checkpoint | |
| gpus: GPU IDs to use | |
| """ | |
| logger.info(f"Starting evaluation...") | |
| cmd = [ | |
| "python3", "tools/eval.py", | |
| "-c", str(config_path), | |
| "-o", f"Global.pretrained_model={checkpoint_path}" | |
| ] | |
| # Set environment variable for GPU | |
| env = os.environ.copy() | |
| env["CUDA_VISIBLE_DEVICES"] = gpus | |
| logger.info(f"Evaluation command: {' '.join(cmd)}") | |
| try: | |
| result = subprocess.run(cmd, env=env, check=True, capture_output=True, text=True) | |
| logger.info("Evaluation completed successfully!") | |
| logger.info(f"Evaluation results:\n{result.stdout}") | |
| return True | |
| except subprocess.CalledProcessError as e: | |
| logger.error(f"Evaluation failed with error: {e}") | |
| logger.error(f"Error output: {e.stderr}") | |
| return False | |
| def export_model(self, config_path, checkpoint_path, output_dir=None): | |
| """ | |
| Export trained model for inference | |
| Args: | |
| config_path: Path to configuration file | |
| checkpoint_path: Path to trained model checkpoint | |
| output_dir: Directory to save exported model | |
| """ | |
| if output_dir is None: | |
| output_dir = self.work_dir / f"{self.model_name}_infer" | |
| logger.info(f"Exporting model to {output_dir}") | |
| cmd = [ | |
| "python3", "tools/export_model.py", | |
| "-c", str(config_path), | |
| "-o", f"Global.pretrained_model={checkpoint_path}", | |
| "-o", f"Global.save_inference_dir={output_dir}" | |
| ] | |
| logger.info(f"Export command: {' '.join(cmd)}") | |
| try: | |
| result = subprocess.run(cmd, check=True, capture_output=True, text=True) | |
| logger.info("Model export completed successfully!") | |
| logger.info(f"Exported model saved to {output_dir}") | |
| # List exported files | |
| if Path(output_dir).exists(): | |
| exported_files = list(Path(output_dir).glob("*")) | |
| logger.info(f"Exported files: {[f.name for f in exported_files]}") | |
| return True | |
| except subprocess.CalledProcessError as e: | |
| logger.error(f"Model export failed with error: {e}") | |
| logger.error(f"Error output: {e.stderr}") | |
| return False | |
| def run_complete_pipeline(self, custom_params=None, gpus="0", skip_demo_data=False): | |
| """ | |
| Run the complete fine-tuning pipeline | |
| Args: | |
| custom_params: Custom parameters to override defaults | |
| gpus: GPU IDs to use | |
| skip_demo_data: Whether to skip demo data preparation | |
| """ | |
| logger.info("=== Starting Complete Fine-tuning Pipeline ===") | |
| try: | |
| # Step 1: Prepare dataset | |
| if not skip_demo_data: | |
| dataset_path = self.prepare_demo_dataset() | |
| else: | |
| dataset_path = Path(custom_params.get("dataset_path", "./dataset")) # Use custom dataset path | |
| # Step 2: Download pretrained model | |
| self.download_pretrained_model() | |
| # Step 3: Create custom configuration | |
| config_path = self.create_custom_config(dataset_path, custom_params) | |
| # Step 4: Train model | |
| logger.info("=== Starting Training ===") | |
| training_success = self.train(config_path, gpus) | |
| if not training_success: | |
| logger.error("Training failed. Stopping pipeline.") | |
| return False | |
| # Step 5: Find best checkpoint | |
| output_dir = self.work_dir / "output" | |
| checkpoints = list(output_dir.glob("**/best_accuracy.pdparams")) | |
| if not checkpoints: | |
| # Try to find latest checkpoint | |
| checkpoints = list(output_dir.glob("**/latest.pdparams")) | |
| if not checkpoints: | |
| logger.error("No checkpoint found for evaluation and export.") | |
| return False | |
| best_checkpoint = checkpoints[0] | |
| logger.info(f"Using checkpoint: {best_checkpoint}") | |
| # Step 6: Evaluate model | |
| logger.info("=== Starting Evaluation ===") | |
| self.evaluate(config_path, best_checkpoint, gpus) | |
| # Step 7: Export model | |
| logger.info("=== Starting Model Export ===") | |
| self.export_model(config_path, best_checkpoint) | |
| logger.info("=== Complete Pipeline Finished Successfully ===") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Pipeline failed with error: {e}") | |
| return False | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Fine-tune PaddleOCR Text Recognition Models") | |
| parser.add_argument("--model_name", type=str, default="PP-OCRv5_server_rec", | |
| choices=["PP-OCRv5_server_rec", "PP-OCRv5_mobile_rec", | |
| "PP-OCRv4_server_rec", "PP-OCRv4_mobile_rec"], | |
| help="Model name to fine-tune") | |
| parser.add_argument("--work_dir", type=str, default="./work_dir", | |
| help="Working directory for outputs") | |
| parser.add_argument("--gpus", type=str, default="0", | |
| help="GPU IDs to use (e.g., '0' or '0,1,2,3')") | |
| parser.add_argument("--config", type=str, default=None, | |
| help="Path to custom config file") | |
| parser.add_argument("--skip_demo_data", action="store_true", | |
| help="Skip demo data preparation (use your own dataset)") | |
| parser.add_argument("--dataset_path", type=str, default="./dataset", | |
| help="Path to custom dataset directory") | |
| parser.add_argument("--mode", type=str, default="complete", | |
| choices=["complete", "train", "eval", "export"], | |
| help="Mode to run") | |
| parser.add_argument("--checkpoint", type=str, default=None, | |
| help="Checkpoint path for evaluation/export") | |
| args = parser.parse_args() | |
| # Initialize fine-tuner | |
| fine_tuner = TextRecognitionFineTuner( | |
| config_path=args.config, | |
| model_name=args.model_name, | |
| work_dir=args.work_dir | |
| ) | |
| # Example custom parameters (you can modify these) | |
| custom_params = { | |
| "dataset_path": args.dataset_path, # Add dataset path to custom params | |
| "Global": { | |
| "epoch_num": 10, # Reduce epochs for faster training | |
| "save_epoch_step": 2, | |
| "eval_batch_step": [0, 1000] | |
| }, | |
| "Train": { | |
| "loader": { | |
| "batch_size_per_card": 128 # Reduce batch size if GPU memory is limited | |
| } | |
| } | |
| } | |
| if args.mode == "complete": | |
| # Run complete pipeline | |
| success = fine_tuner.run_complete_pipeline( | |
| custom_params=custom_params, | |
| gpus=args.gpus, | |
| skip_demo_data=args.skip_demo_data | |
| ) | |
| sys.exit(0 if success else 1) | |
| elif args.mode == "train": | |
| # Training only | |
| if not args.skip_demo_data: | |
| dataset_path = fine_tuner.prepare_demo_dataset() | |
| else: | |
| dataset_path = Path(args.dataset_path) | |
| fine_tuner.download_pretrained_model() | |
| config_path = fine_tuner.create_custom_config(dataset_path, custom_params) | |
| success = fine_tuner.train(config_path, args.gpus) | |
| sys.exit(0 if success else 1) | |
| elif args.mode == "eval": | |
| # Evaluation only | |
| if not args.checkpoint: | |
| logger.error("Checkpoint path required for evaluation mode") | |
| sys.exit(1) | |
| config_path = args.config or fine_tuner.config_path | |
| success = fine_tuner.evaluate(config_path, args.checkpoint, args.gpus) | |
| sys.exit(0 if success else 1) | |
| elif args.mode == "export": | |
| # Export only | |
| if not args.checkpoint: | |
| logger.error("Checkpoint path required for export mode") | |
| sys.exit(1) | |
| config_path = args.config or fine_tuner.config_path | |
| success = fine_tuner.export_model(config_path, args.checkpoint) | |
| sys.exit(0 if success else 1) | |
| if __name__ == "__main__": | |
| main() |