|
| 1 | +system: |
| 2 | + mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore |
| 3 | + distribute: True |
| 4 | + amp_level: 'O0' |
| 5 | + seed: 42 |
| 6 | + log_interval: 10 |
| 7 | + val_while_train: True |
| 8 | + drop_overflow_update: False |
| 9 | + |
| 10 | +model: |
| 11 | + type: det |
| 12 | + transform: null |
| 13 | + backbone: |
| 14 | + name: det_resnet50 |
| 15 | + pretrained: True |
| 16 | + neck: |
| 17 | + name: DBFPN |
| 18 | + out_channels: 256 |
| 19 | + bias: False |
| 20 | + head: |
| 21 | + name: DBHead |
| 22 | + k: 50 |
| 23 | + bias: False |
| 24 | + adaptive: True |
| 25 | + |
| 26 | +postprocess: |
| 27 | + name: DBPostprocess |
| 28 | + box_type: quad # whether to output a polygon or a box |
| 29 | + binary_thresh: 0.3 # binarization threshold |
| 30 | + box_thresh: 0.7 # box score threshold |
| 31 | + max_candidates: 1000 |
| 32 | + expand_ratio: 1.5 # coefficient for expanding predictions |
| 33 | + |
| 34 | +metric: |
| 35 | + name: DetMetric |
| 36 | + main_indicator: f-score |
| 37 | + |
| 38 | +loss: |
| 39 | + name: DBLoss |
| 40 | + eps: 1.0e-6 |
| 41 | + l1_scale: 10 |
| 42 | + bce_scale: 5 |
| 43 | + bce_replace: bceloss |
| 44 | + |
| 45 | +scheduler: |
| 46 | + scheduler: warmup_cosine_decay |
| 47 | + lr: 0.001 |
| 48 | + min_lr: 0.00001 |
| 49 | + num_epochs: 1200 |
| 50 | + warmup_epochs: 3 |
| 51 | + decay_epochs: 1150 |
| 52 | + |
| 53 | +optimizer: |
| 54 | + opt: Adam |
| 55 | + beta1: 0.9 |
| 56 | + beta2: 0.999 |
| 57 | + |
| 58 | +# only used for mixed precision training |
| 59 | +loss_scaler: |
| 60 | + type: dynamic |
| 61 | + loss_scale: 512 |
| 62 | + scale_factor: 2 |
| 63 | + scale_window: 1000 |
| 64 | + |
| 65 | +train: |
| 66 | + ckpt_save_dir: './tmp_det' |
| 67 | + dataset_sink_mode: True |
| 68 | + dataset: |
| 69 | + type: DetDataset |
| 70 | + dataset_root: /data/ocr_datasets |
| 71 | + data_dir: ic15/det/train/ch4_training_images |
| 72 | + label_file: ic15/det/train/det_gt.txt |
| 73 | + sample_ratio: 1.0 |
| 74 | + transform_pipeline: |
| 75 | + - DecodeImage: |
| 76 | + img_mode: RGB |
| 77 | + to_float32: False |
| 78 | + - DetLabelEncode: |
| 79 | + - RandomColorAdjust: |
| 80 | + brightness: 0.1255 # 32.0 / 255 |
| 81 | + saturation: 0.5 |
| 82 | + - RandomHorizontalFlip: |
| 83 | + p: 0.5 |
| 84 | + - RandomRotate: |
| 85 | + degrees: [ -10, 10 ] |
| 86 | + expand_canvas: False |
| 87 | + p: 1.0 |
| 88 | + - RandomScale: |
| 89 | + scale_range: [ 0.5, 3.0 ] |
| 90 | + p: 1.0 |
| 91 | + - RandomCropWithBBox: |
| 92 | + max_tries: 10 |
| 93 | + min_crop_ratio: 0.1 |
| 94 | + crop_size: [ 640, 640 ] |
| 95 | + p: 1.0 |
| 96 | + - ValidatePolygons: |
| 97 | + - ShrinkBinaryMap: |
| 98 | + min_text_size: 8 |
| 99 | + shrink_ratio: 0.4 |
| 100 | + - BorderMap: |
| 101 | + shrink_ratio: 0.4 |
| 102 | + thresh_min: 0.3 |
| 103 | + thresh_max: 0.7 |
| 104 | + - NormalizeImage: |
| 105 | + bgr_to_rgb: False |
| 106 | + is_hwc: True |
| 107 | + mean: imagenet |
| 108 | + std: imagenet |
| 109 | + - ToCHWImage: |
| 110 | + # the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visualize |
| 111 | + output_columns: [ 'image', 'binary_map', 'mask', 'thresh_map', 'thresh_mask' ] #'img_path'] |
| 112 | +# output_columns: ['image'] # for debug op performance |
| 113 | + net_input_column_index: [0] # input indices for network forward func in output_columns |
| 114 | + label_column_index: [1, 2, 3, 4] # input indices marked as label |
| 115 | + |
| 116 | + loader: |
| 117 | + shuffle: True |
| 118 | + batch_size: 10 |
| 119 | + drop_remainder: True |
| 120 | + num_workers: 8 |
| 121 | + |
| 122 | +eval: |
| 123 | + ckpt_load_path: tmp_det/best.ckpt |
| 124 | + dataset_sink_mode: False |
| 125 | + dataset: |
| 126 | + type: DetDataset |
| 127 | + dataset_root: /data/ocr_datasets |
| 128 | + data_dir: ic15/det/test/ch4_test_images |
| 129 | + label_file: ic15/det/test/det_gt.txt |
| 130 | + sample_ratio: 1.0 |
| 131 | + transform_pipeline: |
| 132 | + - DecodeImage: |
| 133 | + img_mode: RGB |
| 134 | + to_float32: False |
| 135 | + - DetLabelEncode: |
| 136 | + - DetResize: # GridResize 32 |
| 137 | + target_size: [ 736, 1280 ] |
| 138 | + keep_ratio: False |
| 139 | + limit_type: none |
| 140 | + divisor: 32 |
| 141 | + - NormalizeImage: |
| 142 | + bgr_to_rgb: False |
| 143 | + is_hwc: True |
| 144 | + mean: imagenet |
| 145 | + std: imagenet |
| 146 | + - ToCHWImage: |
| 147 | + # the order of the dataloader list, matching the network input and the labels for evaluation |
| 148 | + output_columns: [ 'image', 'polys', 'ignore_tags', 'shape_list' ] |
| 149 | + net_input_column_index: [0] # input indices for network forward func in output_columns |
| 150 | + label_column_index: [1, 2] # input indices marked as label |
| 151 | + |
| 152 | + loader: |
| 153 | + shuffle: False |
| 154 | + batch_size: 1 # TODO: due to dynamic shape of polygons (num of boxes varies), BS has to be 1 |
| 155 | + drop_remainder: False |
| 156 | + num_workers: 2 |
| 157 | + |
| 158 | +predict: |
| 159 | + ckpt_load_path: tmp_det/best.ckpt |
| 160 | + dataset_sink_mode: False |
| 161 | + dataset: |
| 162 | + type: PredictDataset |
| 163 | + dataset_root: path/to/dataset_root |
| 164 | + data_dir: ic15/det/test/ch4_test_images |
| 165 | +# label_file: test.txt |
| 166 | + sample_ratio: 1.0 |
| 167 | + transform_pipeline: |
| 168 | + - DecodeImage: |
| 169 | + img_mode: RGB |
| 170 | + to_float32: False |
| 171 | +# - DetLabelEncode: |
| 172 | + - DetResize: # GridResize 32 |
| 173 | + target_size: [ 736, 1280 ] |
| 174 | + keep_ratio: False |
| 175 | + limit_type: none |
| 176 | + divisor: 32 |
| 177 | + - NormalizeImage: |
| 178 | + bgr_to_rgb: False |
| 179 | + is_hwc: True |
| 180 | + mean: imagenet |
| 181 | + std: imagenet |
| 182 | + - ToCHWImage: |
| 183 | + # the order of the dataloader list, matching the network input and the labels for evaluation |
| 184 | + output_columns: [ 'img_path', 'image', 'raw_img_shape' ] # shape in h, w order |
| 185 | +# num_keys_of_labels: 2 # num labels |
| 186 | + |
| 187 | + loader: |
| 188 | + shuffle: False |
| 189 | + batch_size: 1 # TODO: due to dynamic shape of polygons (num of boxes varies), BS has to be 1 |
| 190 | + drop_remainder: False |
| 191 | + num_workers: 2 |
0 commit comments