Skip to content

Commit 09298dc

Browse files
authored
Merge pull request CompVis#127 from CompVis/scene-images-oi
Added scene image generation for Open Images
2 parents 3121649 + 2201a7d commit 09298dc

File tree

213 files changed

+3642
-11
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

213 files changed

+3642
-11
lines changed

assets/scene_images_samples.svg

Lines changed: 453 additions & 0 deletions
Loading

configs/coco_scene_images_transformer.yaml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,12 @@ data:
4848
target: main.DataModuleFromConfig
4949
params:
5050
batch_size: 6
51-
num_workers: 12
5251
train:
5352
target: taming.data.annotated_objects_coco.AnnotatedObjectsCoco
5453
params:
55-
data_path: data/coco_annotations_100
54+
data_path: data/coco_annotations_100 # substitute with path to full dataset
5655
split: train
57-
keys: [image, objects_bbox, file_name]
56+
keys: [image, objects_bbox, file_name, annotations]
5857
no_tokens: 8192
5958
target_image_size: 256
6059
min_object_area: 0.00001
@@ -67,9 +66,9 @@ data:
6766
validation:
6867
target: taming.data.annotated_objects_coco.AnnotatedObjectsCoco
6968
params:
70-
data_path: data/coco_annotations_100
69+
data_path: data/coco_annotations_100 # substitute with path to full dataset
7170
split: validation
72-
keys: [image, objects_bbox, file_name]
71+
keys: [image, objects_bbox, file_name, annotations]
7372
no_tokens: 8192
7473
target_image_size: 256
7574
min_object_area: 0.00001
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
model:
2+
base_learning_rate: 4.5e-06
3+
target: taming.models.cond_transformer.Net2NetTransformer
4+
params:
5+
cond_stage_key: objects_bbox
6+
transformer_config:
7+
target: taming.modules.transformer.mingpt.GPT
8+
params:
9+
vocab_size: 8192
10+
block_size: 348 # = 256 + 92 = dim(vqgan_latent_space,16x16) + dim(conditional_builder.embedding_dim)
11+
n_layer: 36
12+
n_head: 16
13+
n_embd: 1536
14+
embd_pdrop: 0.1
15+
resid_pdrop: 0.1
16+
attn_pdrop: 0.1
17+
first_stage_config:
18+
target: taming.models.vqgan.VQModel
19+
params:
20+
ckpt_path: /path/to/coco_oi_epoch12.ckpt # https://heibox.uni-heidelberg.de/f/461d9a9f4fcf48ab84f4/
21+
embed_dim: 256
22+
n_embed: 8192
23+
ddconfig:
24+
double_z: false
25+
z_channels: 256
26+
resolution: 256
27+
in_channels: 3
28+
out_ch: 3
29+
ch: 128
30+
ch_mult:
31+
- 1
32+
- 1
33+
- 2
34+
- 2
35+
- 4
36+
num_res_blocks: 2
37+
attn_resolutions:
38+
- 16
39+
dropout: 0.0
40+
lossconfig:
41+
target: taming.modules.losses.DummyLoss
42+
cond_stage_config:
43+
target: taming.models.dummy_cond_stage.DummyCondStage
44+
params:
45+
conditional_key: objects_bbox
46+
47+
data:
48+
target: main.DataModuleFromConfig
49+
params:
50+
batch_size: 6
51+
train:
52+
target: taming.data.annotated_objects_open_images.AnnotatedObjectsOpenImages
53+
params:
54+
data_path: data/open_images_annotations_100 # substitute with path to full dataset
55+
split: train
56+
keys: [image, objects_bbox, file_name, annotations]
57+
no_tokens: 8192
58+
target_image_size: 256
59+
category_allow_list_target: taming.data.open_images_helper.top_300_classes_plus_coco_compatibility
60+
category_mapping_target: taming.data.open_images_helper.open_images_unify_categories_for_coco
61+
min_object_area: 0.0001
62+
min_objects_per_image: 2
63+
max_objects_per_image: 30
64+
crop_method: random-2d
65+
random_flip: true
66+
use_group_parameter: true
67+
use_additional_parameters: true
68+
encode_crop: true
69+
validation:
70+
target: taming.data.annotated_objects_open_images.AnnotatedObjectsOpenImages
71+
params:
72+
data_path: data/open_images_annotations_100 # substitute with path to full dataset
73+
split: validation
74+
keys: [image, objects_bbox, file_name, annotations]
75+
no_tokens: 8192
76+
target_image_size: 256
77+
category_allow_list_target: taming.data.open_images_helper.top_300_classes_plus_coco_compatibility
78+
category_mapping_target: taming.data.open_images_helper.open_images_unify_categories_for_coco
79+
min_object_area: 0.0001
80+
min_objects_per_image: 2
81+
max_objects_per_image: 30
82+
crop_method: center
83+
random_flip: false
84+
use_group_parameter: true
85+
use_additional_parameters: true
86+
encode_crop: true

0 commit comments

Comments
 (0)