Fine-tuned-PaddleOCRv5 / fine_tune_text_recognition.py
MinhDS's picture
initial commit
eddf5b2 verified
#!/usr/bin/env python3
"""
Fine-tuning Script for PaddleOCR Text Recognition Models
Based on the Text Recognition Module Tutorial
This script provides a complete pipeline for fine-tuning text recognition models:
1. Dataset preparation and validation
2. Model training with custom configurations
3. Model evaluation
4. Model export for inference
Supported models: PP-OCRv5_server_rec, PP-OCRv5_mobile_rec, PP-OCRv4_server_rec, etc.
"""
import os
import sys
import argparse
import yaml
import wget
import tarfile
import subprocess
from pathlib import Path
import logging
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class TextRecognitionFineTuner:
def __init__(self, config_path=None, model_name="PP-OCRv5_server_rec", work_dir="./work_dir"):
"""
Initialize the fine-tuner
Args:
config_path: Path to custom config file
model_name: Name of the model to fine-tune
work_dir: Working directory for outputs
"""
self.model_name = model_name
self.work_dir = Path(work_dir)
self.work_dir.mkdir(exist_ok=True)
# Model configurations mapping
self.model_configs = {
"PP-OCRv5_server_rec": {
"config": "configs/rec/PP-OCRv5/PP-OCRv5_server_rec.yml",
"pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv5_server_rec_pretrained.pdparams"
},
"PP-OCRv5_mobile_rec": {
"config": "configs/rec/PP-OCRv5/PP-OCRv5_mobile_rec.yml",
"pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv5_mobile_rec_pretrained.pdparams"
},
"PP-OCRv4_server_rec": {
"config": "configs/rec/PP-OCRv4/PP-OCRv4_server_rec.yml",
"pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv4_server_rec_pretrained.pdparams"
},
"PP-OCRv4_mobile_rec": {
"config": "configs/rec/PP-OCRv4/PP-OCRv4_mobile_rec.yml",
"pretrained_url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-OCRv4_mobile_rec_pretrained.pdparams"
}
}
self.config_path = config_path or self.model_configs[model_name]["config"]
self.pretrained_path = self.work_dir / f"{model_name}_pretrained.pdparams"
def prepare_demo_dataset(self):
"""Download and prepare demo dataset"""
logger.info("Preparing demo dataset...")
dataset_url = "https://paddle-model-ecology.bj.bcebos.com/paddlex/data/ocr_rec_dataset_examples.tar"
dataset_path = self.work_dir / "ocr_rec_dataset_examples.tar"
if not dataset_path.exists():
logger.info(f"Downloading dataset from {dataset_url}...")
wget.download(dataset_url, str(dataset_path))
# Extract dataset
extract_path = self.work_dir / "dataset"
if not extract_path.exists():
logger.info("Extracting dataset...")
with tarfile.open(dataset_path, 'r') as tar:
tar.extractall(self.work_dir)
# Rename extracted folder
extracted_folder = self.work_dir / "ocr_rec_dataset_examples"
if extracted_folder.exists():
extracted_folder.rename(extract_path)
logger.info(f"Dataset prepared at {extract_path}")
return extract_path
def download_pretrained_model(self):
"""Download pretrained model weights"""
if self.pretrained_path.exists():
logger.info(f"Pretrained model already exists at {self.pretrained_path}")
return self.pretrained_path
logger.info(f"Downloading pretrained model for {self.model_name}...")
pretrained_url = self.model_configs[self.model_name]["pretrained_url"]
wget.download(pretrained_url, str(self.pretrained_path))
logger.info(f"Pretrained model downloaded to {self.pretrained_path}")
return self.pretrained_path
def create_custom_config(self, dataset_path, custom_params=None):
"""
Create custom training configuration
Args:
dataset_path: Path to training dataset
custom_params: Dictionary of custom parameters to override
"""
logger.info("Creating custom configuration...")
# Default custom parameters
default_params = {
"Global": {
"epoch_num": 20,
"log_smooth_window": 20,
"print_batch_step": 10,
"save_model_dir": str(self.work_dir / "output"),
"save_epoch_step": 5,
"eval_batch_step": [0, 2000],
"cal_metric_during_train": True,
"pretrained_model": str(self.pretrained_path),
"checkpoints": None,
"use_visualdl": False,
"infer_img": str(dataset_path / "test_imgs"),
"character_dict_path": str(dataset_path / "character_dict.txt"),
"character_type": "ch",
"max_text_length": 25,
"infer_mode": False,
"use_space_char": True,
"distributed": False,
"save_res_path": str(self.work_dir / "output" / "predicts_rec.txt")
},
"Train": {
"dataset": {
"name": "SimpleDataSet",
"data_dir": str(dataset_path),
"label_file_list": [str(dataset_path / "train_list.txt")],
"transforms": [
{"DecodeImage": {"img_mode": "BGR", "channel_first": False}},
{"RecConAug": {"prob": 0.5, "ext_data_num": 2, "image_shape": [48, 320, 3]}},
{"RecAug": {}},
{"MultiLabelEncode": {}},
{"RecResizeImg": {"image_shape": [3, 48, 320]}},
{"KeepKeys": {"keep_keys": ["image", "label_list", "length"]}}
]
},
"loader": {
"shuffle": True,
"batch_size_per_card": 256,
"drop_last": True,
"num_workers": 4
}
},
"Eval": {
"dataset": {
"name": "SimpleDataSet",
"data_dir": str(dataset_path),
"label_file_list": [str(dataset_path / "val_list.txt")],
"transforms": [
{"DecodeImage": {"img_mode": "BGR", "channel_first": False}},
{"MultiLabelEncode": {}},
{"RecResizeImg": {"image_shape": [3, 48, 320]}},
{"KeepKeys": {"keep_keys": ["image", "label_list", "length"]}}
]
},
"loader": {
"shuffle": False,
"drop_last": False,
"batch_size_per_card": 256,
"num_workers": 4
}
}
}
# Merge with custom parameters
if custom_params:
self._deep_update(default_params, custom_params)
# Save custom config
custom_config_path = self.work_dir / f"{self.model_name}_custom.yml"
with open(custom_config_path, 'w', encoding='utf-8') as f:
yaml.dump(default_params, f, default_flow_style=False, allow_unicode=True)
logger.info(f"Custom configuration saved to {custom_config_path}")
return custom_config_path
def _deep_update(self, base_dict, update_dict):
"""Recursively update nested dictionary"""
for key, value in update_dict.items():
if isinstance(value, dict) and key in base_dict and isinstance(base_dict[key], dict):
self._deep_update(base_dict[key], value)
else:
base_dict[key] = value
def train(self, config_path, gpus="0", resume_from=None):
"""
Train the model
Args:
config_path: Path to configuration file
gpus: GPU IDs to use (e.g., "0" or "0,1,2,3")
resume_from: Path to checkpoint to resume from
"""
logger.info(f"Starting training with GPUs: {gpus}")
# Prepare training command
if len(gpus.split(',')) > 1:
# Multi-GPU training
cmd = [
"python3", "-m", "paddle.distributed.launch",
"--gpus", gpus,
"tools/train.py",
"-c", str(config_path)
]
else:
# Single GPU training
cmd = [
"python3", "tools/train.py",
"-c", str(config_path)
]
# Add resume option if provided
if resume_from:
cmd.extend(["-o", f"Global.checkpoints={resume_from}"])
# Set environment variable for GPU
env = os.environ.copy()
env["CUDA_VISIBLE_DEVICES"] = gpus
logger.info(f"Training command: {' '.join(cmd)}")
try:
result = subprocess.run(cmd, env=env, check=True, capture_output=False)
logger.info("Training completed successfully!")
return True
except subprocess.CalledProcessError as e:
logger.error(f"Training failed with error: {e}")
return False
def evaluate(self, config_path, checkpoint_path, gpus="0"):
"""
Evaluate the trained model
Args:
config_path: Path to configuration file
checkpoint_path: Path to model checkpoint
gpus: GPU IDs to use
"""
logger.info(f"Starting evaluation...")
cmd = [
"python3", "tools/eval.py",
"-c", str(config_path),
"-o", f"Global.pretrained_model={checkpoint_path}"
]
# Set environment variable for GPU
env = os.environ.copy()
env["CUDA_VISIBLE_DEVICES"] = gpus
logger.info(f"Evaluation command: {' '.join(cmd)}")
try:
result = subprocess.run(cmd, env=env, check=True, capture_output=True, text=True)
logger.info("Evaluation completed successfully!")
logger.info(f"Evaluation results:\n{result.stdout}")
return True
except subprocess.CalledProcessError as e:
logger.error(f"Evaluation failed with error: {e}")
logger.error(f"Error output: {e.stderr}")
return False
def export_model(self, config_path, checkpoint_path, output_dir=None):
"""
Export trained model for inference
Args:
config_path: Path to configuration file
checkpoint_path: Path to trained model checkpoint
output_dir: Directory to save exported model
"""
if output_dir is None:
output_dir = self.work_dir / f"{self.model_name}_infer"
logger.info(f"Exporting model to {output_dir}")
cmd = [
"python3", "tools/export_model.py",
"-c", str(config_path),
"-o", f"Global.pretrained_model={checkpoint_path}",
"-o", f"Global.save_inference_dir={output_dir}"
]
logger.info(f"Export command: {' '.join(cmd)}")
try:
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
logger.info("Model export completed successfully!")
logger.info(f"Exported model saved to {output_dir}")
# List exported files
if Path(output_dir).exists():
exported_files = list(Path(output_dir).glob("*"))
logger.info(f"Exported files: {[f.name for f in exported_files]}")
return True
except subprocess.CalledProcessError as e:
logger.error(f"Model export failed with error: {e}")
logger.error(f"Error output: {e.stderr}")
return False
def run_complete_pipeline(self, custom_params=None, gpus="0", skip_demo_data=False):
"""
Run the complete fine-tuning pipeline
Args:
custom_params: Custom parameters to override defaults
gpus: GPU IDs to use
skip_demo_data: Whether to skip demo data preparation
"""
logger.info("=== Starting Complete Fine-tuning Pipeline ===")
try:
# Step 1: Prepare dataset
if not skip_demo_data:
dataset_path = self.prepare_demo_dataset()
else:
dataset_path = Path(custom_params.get("dataset_path", "./dataset")) # Use custom dataset path
# Step 2: Download pretrained model
self.download_pretrained_model()
# Step 3: Create custom configuration
config_path = self.create_custom_config(dataset_path, custom_params)
# Step 4: Train model
logger.info("=== Starting Training ===")
training_success = self.train(config_path, gpus)
if not training_success:
logger.error("Training failed. Stopping pipeline.")
return False
# Step 5: Find best checkpoint
output_dir = self.work_dir / "output"
checkpoints = list(output_dir.glob("**/best_accuracy.pdparams"))
if not checkpoints:
# Try to find latest checkpoint
checkpoints = list(output_dir.glob("**/latest.pdparams"))
if not checkpoints:
logger.error("No checkpoint found for evaluation and export.")
return False
best_checkpoint = checkpoints[0]
logger.info(f"Using checkpoint: {best_checkpoint}")
# Step 6: Evaluate model
logger.info("=== Starting Evaluation ===")
self.evaluate(config_path, best_checkpoint, gpus)
# Step 7: Export model
logger.info("=== Starting Model Export ===")
self.export_model(config_path, best_checkpoint)
logger.info("=== Complete Pipeline Finished Successfully ===")
return True
except Exception as e:
logger.error(f"Pipeline failed with error: {e}")
return False
def main():
parser = argparse.ArgumentParser(description="Fine-tune PaddleOCR Text Recognition Models")
parser.add_argument("--model_name", type=str, default="PP-OCRv5_server_rec",
choices=["PP-OCRv5_server_rec", "PP-OCRv5_mobile_rec",
"PP-OCRv4_server_rec", "PP-OCRv4_mobile_rec"],
help="Model name to fine-tune")
parser.add_argument("--work_dir", type=str, default="./work_dir",
help="Working directory for outputs")
parser.add_argument("--gpus", type=str, default="0",
help="GPU IDs to use (e.g., '0' or '0,1,2,3')")
parser.add_argument("--config", type=str, default=None,
help="Path to custom config file")
parser.add_argument("--skip_demo_data", action="store_true",
help="Skip demo data preparation (use your own dataset)")
parser.add_argument("--dataset_path", type=str, default="./dataset",
help="Path to custom dataset directory")
parser.add_argument("--mode", type=str, default="complete",
choices=["complete", "train", "eval", "export"],
help="Mode to run")
parser.add_argument("--checkpoint", type=str, default=None,
help="Checkpoint path for evaluation/export")
args = parser.parse_args()
# Initialize fine-tuner
fine_tuner = TextRecognitionFineTuner(
config_path=args.config,
model_name=args.model_name,
work_dir=args.work_dir
)
# Example custom parameters (you can modify these)
custom_params = {
"dataset_path": args.dataset_path, # Add dataset path to custom params
"Global": {
"epoch_num": 10, # Reduce epochs for faster training
"save_epoch_step": 2,
"eval_batch_step": [0, 1000]
},
"Train": {
"loader": {
"batch_size_per_card": 128 # Reduce batch size if GPU memory is limited
}
}
}
if args.mode == "complete":
# Run complete pipeline
success = fine_tuner.run_complete_pipeline(
custom_params=custom_params,
gpus=args.gpus,
skip_demo_data=args.skip_demo_data
)
sys.exit(0 if success else 1)
elif args.mode == "train":
# Training only
if not args.skip_demo_data:
dataset_path = fine_tuner.prepare_demo_dataset()
else:
dataset_path = Path(args.dataset_path)
fine_tuner.download_pretrained_model()
config_path = fine_tuner.create_custom_config(dataset_path, custom_params)
success = fine_tuner.train(config_path, args.gpus)
sys.exit(0 if success else 1)
elif args.mode == "eval":
# Evaluation only
if not args.checkpoint:
logger.error("Checkpoint path required for evaluation mode")
sys.exit(1)
config_path = args.config or fine_tuner.config_path
success = fine_tuner.evaluate(config_path, args.checkpoint, args.gpus)
sys.exit(0 if success else 1)
elif args.mode == "export":
# Export only
if not args.checkpoint:
logger.error("Checkpoint path required for export mode")
sys.exit(1)
config_path = args.config or fine_tuner.config_path
success = fine_tuner.export_model(config_path, args.checkpoint)
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()