improve training performance of db_mv3 (#646)

panshaowu · panshaowu · web-flow · commit ec20d02bd673 · 2024-01-04T17:59:09.000+08:00
Co-authored-by: panshaowu &lt;panshaowu@huawei.com&gt;
diff --git a/configs/det/dbnet/db_mobilenetv3_icdar15.yaml b/configs/det/dbnet/db_mobilenetv3_icdar15.yaml
@@ -78,6 +78,7 @@ train:
     data_dir: ic15/det/train/ch4_training_images
     label_file: ic15/det/train/det_gt.txt
     sample_ratio: 1.0
+    use_minddata: True
     transform_pipeline:
       - DecodeImage:
           img_mode: RGB
@@ -135,6 +136,7 @@ eval:
     data_dir: ic15/det/test/ch4_test_images
     label_file: ic15/det/test/det_gt.txt
     sample_ratio: 1.0
+    use_minddata: True
     transform_pipeline:
       - DecodeImage:
           img_mode: RGB
diff --git a/configs/det/dbnet/db_mobilenetv3_icdar15_8p.yaml b/configs/det/dbnet/db_mobilenetv3_icdar15_8p.yaml
@@ -0,0 +1,165 @@
+system:
+  mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
+  distribute: True
+  amp_level: 'O0'
+  seed: 42
+  log_interval: 10
+  val_while_train: True
+  val_start_epoch: 500
+  drop_overflow_update: False
+
+model:
+  type: det
+  transform: null
+  backbone:
+    name: det_mobilenet_v3
+    architecture: large
+    alpha: 0.5
+    out_stages: [5, 8, 14, 20]
+    bottleneck_params:
+      se_version: SqueezeExciteV2
+      always_expand:  True
+    pretrained: https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_large_050_no_scale_se_v2_expand-3c4047ac.ckpt
+  neck:
+    name: DBFPN
+    out_channels: 256
+    bias: False
+  head:
+    name: DBHead
+    k: 50
+    bias: False
+    adaptive: True
+
+postprocess:
+  name: DBPostprocess
+  box_type: quad   # whether to output a polygon or a box
+  binary_thresh: 0.3      # binarization threshold
+  box_thresh: 0.6         # box score threshold
+  max_candidates: 1000
+  expand_ratio: 1.5       # coefficient for expanding predictions
+
+metric:
+  name: DetMetric
+  main_indicator: f-score
+
+loss:
+  name: DBLoss
+  eps: 1.0e-6
+  l1_scale: 10
+  bce_scale: 5
+  bce_replace: bceloss
+
+scheduler:
+  scheduler: polynomial_decay
+  lr: 0.02
+  num_epochs: 2000
+  decay_rate: 0.9
+  warmup_epochs: 3
+
+optimizer:
+  opt: momentum
+  filter_bias_and_bn: false
+  momentum: 0.9
+  weight_decay: 1.0e-4
+
+# only used for mixed precision training
+loss_scaler:
+  type: dynamic
+  loss_scale: 512
+  scale_factor: 2
+  scale_window: 1000
+
+train:
+  ckpt_save_dir: './tmp_det'
+  dataset_sink_mode: True
+  dataset:
+    type: DetDataset
+    dataset_root: /data/ocr_datasets
+    data_dir: ic15/det/train/ch4_training_images
+    label_file: ic15/det/train/det_gt.txt
+    sample_ratio: 1.0
+    use_minddata: True
+    transform_pipeline:
+      - DecodeImage:
+          img_mode: RGB
+          to_float32: False
+      - DetLabelEncode:
+      - RandomColorAdjust:
+          brightness: 0.1255  # 32.0 / 255
+          saturation: 0.5
+      - RandomHorizontalFlip:
+          p: 0.5
+      - RandomRotate:
+          degrees: [ -10, 10 ]
+          expand_canvas: False
+          p: 1.0
+      - RandomScale:
+          scale_range: [ 0.5, 3.0 ]
+          p: 1.0
+      - RandomCropWithBBox:
+          max_tries: 10
+          min_crop_ratio: 0.1
+          crop_size: [ 640, 640 ]
+          p: 1.0
+      - ValidatePolygons:
+      - ShrinkBinaryMap:
+          min_text_size: 8
+          shrink_ratio: 0.4
+      - BorderMap:
+          shrink_ratio: 0.4
+          thresh_min: 0.3
+          thresh_max: 0.7
+      - NormalizeImage:
+          bgr_to_rgb: False
+          is_hwc: True
+          mean: imagenet
+          std: imagenet
+      - ToCHWImage:
+    #  the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visualize
+    output_columns: [ 'image', 'binary_map', 'mask', 'thresh_map', 'thresh_mask']
+#    output_columns: ['image'] # for debug op performance
+    net_input_column_index: [0] # input indices for network forward func in output_columns
+    label_column_index: [1, 2, 3, 4] # input indices marked as label
+
+  loader:
+    shuffle: True
+    batch_size: 8
+    drop_remainder: True
+    num_workers: 10
+
+eval:
+  ckpt_load_path: tmp_det/best.ckpt
+  dataset_sink_mode: False
+  dataset:
+    type: DetDataset
+    dataset_root: /data/ocr_datasets
+    data_dir: ic15/det/test/ch4_test_images
+    label_file: ic15/det/test/det_gt.txt
+    sample_ratio: 1.0
+    use_minddata: True
+    transform_pipeline:
+      - DecodeImage:
+          img_mode: RGB
+          to_float32: False
+      - DetLabelEncode:
+      - DetResize:  # GridResize 32
+          target_size: [ 736, 1280 ]
+          keep_ratio: False
+          limit_type: none
+          divisor: 32
+      - NormalizeImage:
+          bgr_to_rgb: False
+          is_hwc: True
+          mean: imagenet
+          std: imagenet
+      - ToCHWImage:
+    #  the order of the dataloader list, matching the network input and the labels for evaluation
+    output_columns: [ 'image', 'polys', 'ignore_tags', 'shape_list' ]
+    net_input_column_index: [0] # input indices for network forward func in output_columns
+    label_column_index: [1, 2] # input indices marked as label
+
+  loader:
+    shuffle: False
+    batch_size: 1 # TODO: due to dynamic shape of polygons (num of boxes varies), BS has to be 1
+    drop_remainder: False
+    num_workers: 3
diff --git a/docs/cn/tutorials/frequently_asked_questions.md b/docs/cn/tutorials/frequently_asked_questions.md
@@ -7,6 +7,7 @@
  - [关于`RunTimeError:The device address tpe is wrong`](#q6-runtimeerror-the-device-address-type-is-wrong-type-name-in-addresscpu-type-name-in-contextascend)
  - [模型转换相关问题](#q7-模型转换相关问题)
  - [推理相关问题](#q8-推理时相关问题)
+ - [DBNet训练速率不及预期](#q9-DBNet训练速率不及预期)
 
 ### Q1 未定义符号
 
@@ -618,3 +619,58 @@ ERROR: Could not build wheels for lanms-neo, which is required to install pyproj
 
   - 使用恰当的模型。例如在 `--rec_model_path` 错误传入了检测模型，可触发此错误；
   - 使用推理模型（非训练模型），用`converter_lite`转换工具转为端侧`mindir`进行推理。
+
+
+### Q9 DBNet训练速率不及预期
+
+执行以下命令，训练DBNet系列网络（包括DBNet MobileNetV3、DBNet ResNet-18、DBNet ResNet-50、DBNet++ ResNet-50等）时，训练帧率不及预期。例如，DBNet MobileNetV3在Ascend 910A上，训练速率仅80fps，不及预期的100fps。
+
+``` bash
+python tools/train.py -c configs/det/dbnet/db_mobilenetv3_icdar15.yaml
+```
+
+由于DBNet数据预处理过程相对复杂，如训练服务器CPU单核运算能力较弱，则数据预处理可能成为性能瓶颈。
+
+**解决方法**
+
+1. 尝试将配置文件中`train.dataset.use_minddata`和`eval.dataset.use_minddata`的选项设置为`True`。MindOCR将采用MindSpore[MindData](https://www.mindspore.cn/docs/zh-CN/master/api_python/dataset/dataset_method/operation/mindspore.dataset.Dataset.map.html?highlight=map#mindspore.dataset.Dataset.map)执行部分数据预处理步骤：
+
+``` yaml
+...
+train:
+  ckpt_save_dir: './tmp_det'
+  dataset_sink_mode: True
+  dataset:
+    type: DetDataset
+    dataset_root: /data/ocr_datasets
+    data_dir: ic15/det/train/ch4_training_images
+    label_file: ic15/det/train/det_gt.txt
+    sample_ratio: 1.0
+    use_minddata: True                          <-- 设置该选项
+...
+eval:
+  ckpt_load_path: tmp_det/best.ckpt
+  dataset_sink_mode: False
+  dataset:
+    type: DetDataset
+    dataset_root: /data/ocr_datasets
+    data_dir: ic15/det/test/ch4_test_images
+    label_file: ic15/det/test/det_gt.txt
+    sample_ratio: 1.0
+    use_minddata: True                          <-- 设置该选项
+...
+```
+
+2. 如训练服务器CPU核数较多，尝试调高配置文件中的`train.loader.num_workers`选项，提升数据预取的线程数：
+
+``` yaml
+...
+train:
+  ...
+  loader:
+    shuffle: True
+    batch_size: 10
+    drop_remainder: True
+    num_workers: 12                             <-- 设置该选项
+...
+```
diff --git a/docs/en/tutorials/frequently_asked_questions.md b/docs/en/tutorials/frequently_asked_questions.md
@@ -7,6 +7,7 @@
  - [`RunTimeError:The device address tpe is wrong`](#q6-runtimeerror-the-device-address-type-is-wrong-type-name-in-addresscpu-type-name-in-contextascend)
  - [Problems related to model converting](#q7-problems-related-to-model-converting)
  - [Problems related to inference](#q8-problems-related-to-inference)
+ - [Training speed of DBNet not as fast as expexted](#q9-training-speed-of-dbnet-not-as-fast-as-expexted)
 
 ### Q1 Undefined symbol
 
@@ -607,3 +608,58 @@ Reason:
 
   - Use suitable model. For example, it may fail and pass detection model to `--rec_model_path` parameter.
   - Use inference model(not training model) to do converting.
+
+
+### Q9 Training speed of DBNet not as fast as expexted
+
+When traning DBNet series networks (including DBNet MobileNetV3, DBNet ResNet-18, DBNet ResNet-50, and DBNet++ ResNet-50) using following command, the training speed is not as fast as expexted. For instance, the training speed of DBNet MobileNetV3 can reach only 80fps which is slower than the expecting 100fps.
+
+``` bash
+python tools/train.py -c configs/det/dbnet/db_mobilenetv3_icdar15.yaml
+```
+
+This problem is due to the complex data pre-processing procedures of DBNet. The data pre-processing procedures will become the performance bottleneck if the computation ability of a CPU core of the training server is relatively weak.
+
+**Solutions**
+
+1. Try to set the `train.dataset.use_minddata` and `eval.dataset.use_minddata` in the configuration file to `True`. MindOCR will execute parts of data pre-processing procedures using MindSpore[MindData](https://www.mindspore.cn/docs/zh-CN/master/api_python/dataset/dataset_method/operation/mindspore.dataset.Dataset.map.html?highlight=map#mindspore.dataset.Dataset.map):
+
+``` yaml
+...
+train:
+  ckpt_save_dir: './tmp_det'
+  dataset_sink_mode: True
+  dataset:
+    type: DetDataset
+    dataset_root: /data/ocr_datasets
+    data_dir: ic15/det/train/ch4_training_images
+    label_file: ic15/det/train/det_gt.txt
+    sample_ratio: 1.0
+    use_minddata: True                          <-- Set this configuration
+...
+eval:
+  ckpt_load_path: tmp_det/best.ckpt
+  dataset_sink_mode: False
+  dataset:
+    type: DetDataset
+    dataset_root: /data/ocr_datasets
+    data_dir: ic15/det/test/ch4_test_images
+    label_file: ic15/det/test/det_gt.txt
+    sample_ratio: 1.0
+    use_minddata: True                          <-- Set this configuration
+...
+```
+
+2. Try to set the `train.loader.num_workers` in the configuration file to a larger value to enhance the number of threads fetching dataset if the training server has enough CPU cores:
+
+``` yaml
+...
+train:
+  ...
+  loader:
+    shuffle: True
+    batch_size: 10
+    drop_remainder: True
+    num_workers: 12                             <-- Set this configuration
+...
+```
diff --git a/mindocr/data/builder.py b/mindocr/data/builder.py
@@ -4,6 +4,7 @@
 
 import mindspore as ms
 
+from .constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 from .det_dataset import DetDataset, SynthTextDataset
 from .kie_dataset import KieDataset
 from .layout_dataset import PublayNetDataset
@@ -140,6 +141,9 @@ def build_dataset(
     assert dataset_class_name in supported_dataset_types, "Invalid dataset name"
     dataset_class = eval(dataset_class_name)
     dataset_args = dict(is_train=is_train, **dataset_config)
+    if "use_minddata" in dataset_args and dataset_args["use_minddata"]:
+        minddata_op_list = _parse_minddata_op(dataset_args)
+
     dataset = dataset_class(**dataset_args)
 
     dataset_column_names = dataset.get_output_columns()
@@ -158,8 +162,13 @@ def build_dataset(
     )
 
     # 2. data mapping using minddata C lib (optional)
-    # ds = ds.map(operations=transform_list, input_columns=['image', 'label'], num_parallel_workers=8,
-    # python_multiprocessing=True)
+    if "use_minddata" in dataset_args and dataset_args["use_minddata"]:
+        ds = ds.map(
+            operations=minddata_op_list,
+            input_columns=["image"],
+            num_parallel_workers=num_workers,
+            python_multiprocessing=True,
+        )
 
     # 3. create loader
     # get batch of dataset by collecting batch_size consecutive data rows and apply batch operations
@@ -242,3 +251,30 @@ def _check_batch_size(num_samples, ori_batch_size=32, refine=True):
                     f"dropped/padded in graph mode."
                 )
                 return bs
+
+
+def _parse_minddata_op(dataset_args):
+    minddata_op_idx = []
+    minddata_op_list = []
+    for i, transform_dict in enumerate(dataset_args["transform_pipeline"]):
+        if "RandomColorAdjust" in transform_dict.keys():
+            minddata_op_idx.append(i)
+            color_adjust_op = ms.dataset.vision.RandomColorAdjust(
+                brightness=transform_dict["RandomColorAdjust"]["brightness"],
+                saturation=transform_dict["RandomColorAdjust"]["saturation"],
+            )
+            minddata_op_list.append(color_adjust_op)
+            continue
+        if "NormalizeImage" in transform_dict.keys():
+            minddata_op_idx.append(i)
+            normalize_op = ms.dataset.vision.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)
+            minddata_op_list.append(normalize_op)
+            continue
+        if "ToCHWImage" in transform_dict.keys():
+            minddata_op_idx.append(i)
+            change_swap_op = ms.dataset.vision.HWC2CHW()
+            minddata_op_list.append(change_swap_op)
+            continue
+    for _ in range(len(minddata_op_idx)):
+        dataset_args["transform_pipeline"].pop(minddata_op_idx.pop())
+    return minddata_op_list