|
| 1 | +model: |
| 2 | + base_learning_rate: 4.5e-06 |
| 3 | + target: taming.models.cond_transformer.Net2NetTransformer |
| 4 | + params: |
| 5 | + cond_stage_key: objects_bbox |
| 6 | + transformer_config: |
| 7 | + target: taming.modules.transformer.mingpt.GPT |
| 8 | + params: |
| 9 | + vocab_size: 8192 |
| 10 | + block_size: 348 # = 256 + 92 = dim(vqgan_latent_space,16x16) + dim(conditional_builder.embedding_dim) |
| 11 | + n_layer: 36 |
| 12 | + n_head: 16 |
| 13 | + n_embd: 1536 |
| 14 | + embd_pdrop: 0.1 |
| 15 | + resid_pdrop: 0.1 |
| 16 | + attn_pdrop: 0.1 |
| 17 | + first_stage_config: |
| 18 | + target: taming.models.vqgan.VQModel |
| 19 | + params: |
| 20 | + ckpt_path: /path/to/coco_oi_epoch12.ckpt # https://heibox.uni-heidelberg.de/f/461d9a9f4fcf48ab84f4/ |
| 21 | + embed_dim: 256 |
| 22 | + n_embed: 8192 |
| 23 | + ddconfig: |
| 24 | + double_z: false |
| 25 | + z_channels: 256 |
| 26 | + resolution: 256 |
| 27 | + in_channels: 3 |
| 28 | + out_ch: 3 |
| 29 | + ch: 128 |
| 30 | + ch_mult: |
| 31 | + - 1 |
| 32 | + - 1 |
| 33 | + - 2 |
| 34 | + - 2 |
| 35 | + - 4 |
| 36 | + num_res_blocks: 2 |
| 37 | + attn_resolutions: |
| 38 | + - 16 |
| 39 | + dropout: 0.0 |
| 40 | + lossconfig: |
| 41 | + target: taming.modules.losses.DummyLoss |
| 42 | + cond_stage_config: |
| 43 | + target: taming.models.dummy_cond_stage.DummyCondStage |
| 44 | + params: |
| 45 | + conditional_key: objects_bbox |
| 46 | + |
| 47 | +data: |
| 48 | + target: main.DataModuleFromConfig |
| 49 | + params: |
| 50 | + batch_size: 6 |
| 51 | + train: |
| 52 | + target: taming.data.annotated_objects_open_images.AnnotatedObjectsOpenImages |
| 53 | + params: |
| 54 | + data_path: data/open_images_annotations_100 # substitute with path to full dataset |
| 55 | + split: train |
| 56 | + keys: [image, objects_bbox, file_name, annotations] |
| 57 | + no_tokens: 8192 |
| 58 | + target_image_size: 256 |
| 59 | + category_allow_list_target: taming.data.open_images_helper.top_300_classes_plus_coco_compatibility |
| 60 | + category_mapping_target: taming.data.open_images_helper.open_images_unify_categories_for_coco |
| 61 | + min_object_area: 0.0001 |
| 62 | + min_objects_per_image: 2 |
| 63 | + max_objects_per_image: 30 |
| 64 | + crop_method: random-2d |
| 65 | + random_flip: true |
| 66 | + use_group_parameter: true |
| 67 | + use_additional_parameters: true |
| 68 | + encode_crop: true |
| 69 | + validation: |
| 70 | + target: taming.data.annotated_objects_open_images.AnnotatedObjectsOpenImages |
| 71 | + params: |
| 72 | + data_path: data/open_images_annotations_100 # substitute with path to full dataset |
| 73 | + split: validation |
| 74 | + keys: [image, objects_bbox, file_name, annotations] |
| 75 | + no_tokens: 8192 |
| 76 | + target_image_size: 256 |
| 77 | + category_allow_list_target: taming.data.open_images_helper.top_300_classes_plus_coco_compatibility |
| 78 | + category_mapping_target: taming.data.open_images_helper.open_images_unify_categories_for_coco |
| 79 | + min_object_area: 0.0001 |
| 80 | + min_objects_per_image: 2 |
| 81 | + max_objects_per_image: 30 |
| 82 | + crop_method: center |
| 83 | + random_flip: false |
| 84 | + use_group_parameter: true |
| 85 | + use_additional_parameters: true |
| 86 | + encode_crop: true |
0 commit comments