diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3e52d57 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +__pycache__/ +*.pyc +venv/ +dataset.zip +dataset/ +pretrained_model/ +pretrained_model.zip diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..4b4e685 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: +- repo: https://github.com/python/black + rev: stable + hooks: + - id: black diff --git a/README.md b/README.md index f4dfbfc..56bc70a 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,27 @@ # Grid-Anchor-based-Image-Cropping-Pytorch This code includes several extensions we have made to our conference version. Please read the [paper](https://drive.google.com/open?id=1Bd1VaqYVycB7Npv5OdXKl-znKs_APl4n) for details. +## Change Log +### 2020-09-10 +- Add an `autocrop.py` driver for batch processing on image folder. +- Overrides `_roi_align.so` and `_roi_align.so` with corresponding CPU version. (I'm using a MAC without nvidia gpu :( ). +- Removed a lot of `*.pyc` files +- Add a `requirements.txt` with correct (old) dependencies. +- Fix formatting issues with black in pre-commit hook -### Requirements +## Requirements python 2.7, pytorch 0.4.1, numpy, cv2, scipy. -### Usage +## Usage 1. Download the source code, the [dataset](https://drive.google.com/open?id=1X9xK5O9cx4_MvDkWAs5wVuM-mPWINaqa) and the [pretrained model](https://drive.google.com/open?id=1kaNWvfIdtbh2GIPNSWXdxqyS-d2DR1F3). 2. Run ``TrainModel.py`` to train a new model on our dataset or Run ``demo_eval.py`` to test the pretrained model on any images. 3. To change the aspect ratio of generated crops, please change the ``generate_bboxes`` function in ``croppingDataset.py`` (line 115). -### Annotation software +## Annotation software The executable annotation software can be found [here](https://github.com/lld533/Grid-Anchor-based-Image-Cropping-Pytorch). -### Other implementation +## Other implementation 1. [PyTorch 1.0 or later](https://github.com/lld533/Grid-Anchor-based-Image-Cropping-Pytorch) 2. [Matlab (conference version)](https://github.com/HuiZeng/Grid-Anchor-based-Image-Cropping) diff --git a/ShuffleNetV2.py b/ShuffleNetV2.py index 6040088..77c0ef8 100644 --- a/ShuffleNetV2.py +++ b/ShuffleNetV2.py @@ -6,11 +6,12 @@ from torch.nn import init import math + def conv_bn(inp, oup, stride): return nn.Sequential( nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), - nn.ReLU(inplace=True) + nn.ReLU(inplace=True), ) @@ -18,17 +19,17 @@ def conv_1x1_bn(inp, oup): return nn.Sequential( nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), - nn.ReLU(inplace=True) + nn.ReLU(inplace=True), ) + def channel_shuffle(x, groups): batchsize, num_channels, height, width = x.data.size() channels_per_group = num_channels // groups - + # reshape - x = x.view(batchsize, groups, - channels_per_group, height, width) + x = x.view(batchsize, groups, channels_per_group, height, width) x = torch.transpose(x, 1, 2).contiguous() @@ -36,7 +37,8 @@ def channel_shuffle(x, groups): x = x.view(batchsize, -1, height, width) return x - + + class InvertedResidual(nn.Module): def __init__(self, inp, oup, stride, benchmodel): super(InvertedResidual, self).__init__() @@ -44,11 +46,11 @@ def __init__(self, inp, oup, stride, benchmodel): self.stride = stride assert stride in [1, 2] - oup_inc = oup//2 - + oup_inc = oup // 2 + if self.benchmodel == 1: - #assert inp == oup_inc - self.banch2 = nn.Sequential( + # assert inp == oup_inc + self.banch2 = nn.Sequential( # pw nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False), nn.BatchNorm2d(oup_inc), @@ -60,8 +62,8 @@ def __init__(self, inp, oup, stride, benchmodel): nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False), nn.BatchNorm2d(oup_inc), nn.ReLU(inplace=True), - ) - else: + ) + else: self.banch1 = nn.Sequential( # dw nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), @@ -70,8 +72,8 @@ def __init__(self, inp, oup, stride, benchmodel): nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False), nn.BatchNorm2d(oup_inc), nn.ReLU(inplace=True), - ) - + ) + self.banch2 = nn.Sequential( # pw nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False), @@ -85,34 +87,34 @@ def __init__(self, inp, oup, stride, benchmodel): nn.BatchNorm2d(oup_inc), nn.ReLU(inplace=True), ) - + @staticmethod def _concat(x, out): # concatenate along channel axis - return torch.cat((x, out), 1) + return torch.cat((x, out), 1) def forward(self, x): - if 1==self.benchmodel: - x1 = x[:, :(x.shape[1]//2), :, :] - x2 = x[:, (x.shape[1]//2):, :, :] + if 1 == self.benchmodel: + x1 = x[:, : (x.shape[1] // 2), :, :] + x2 = x[:, (x.shape[1] // 2) :, :, :] out = self._concat(x1, self.banch2(x2)) - elif 2==self.benchmodel: + elif 2 == self.benchmodel: out = self._concat(self.banch1(x), self.banch2(x)) return channel_shuffle(out, 2) class ShuffleNetV2(nn.Module): - def __init__(self, n_class=1000, input_size=224, width_mult=1.): + def __init__(self, n_class=1000, input_size=224, width_mult=1.0): super(ShuffleNetV2, self).__init__() - + assert input_size % 32 == 0 - + self.stage_repeats = [4, 8, 4] # index 0 is invalid and should never be called. # only used for indexing convenience. if width_mult == 0.5: - self.stage_out_channels = [-1, 24, 48, 96, 192, 1024] + self.stage_out_channels = [-1, 24, 48, 96, 192, 1024] elif width_mult == 1.0: self.stage_out_channels = [-1, 24, 116, 232, 464, 1024] elif width_mult == 1.5: @@ -122,36 +124,42 @@ def __init__(self, n_class=1000, input_size=224, width_mult=1.): else: raise ValueError( """{} groups is not supported for - 1x1 Grouped Convolutions""".format(num_groups)) + 1x1 Grouped Convolutions""".format( + num_groups + ) + ) # building first layer input_channel = self.stage_out_channels[1] - self.conv1 = conv_bn(3, input_channel, 2) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - + self.conv1 = conv_bn(3, input_channel, 2) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.features = [] # building inverted residual blocks for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] - output_channel = self.stage_out_channels[idxstage+2] + output_channel = self.stage_out_channels[idxstage + 2] for i in range(numrepeat): if i == 0: - #inp, oup, stride, benchmodel): - self.features.append(InvertedResidual(input_channel, output_channel, 2, 2)) + # inp, oup, stride, benchmodel): + self.features.append( + InvertedResidual(input_channel, output_channel, 2, 2) + ) else: - self.features.append(InvertedResidual(input_channel, output_channel, 1, 1)) + self.features.append( + InvertedResidual(input_channel, output_channel, 1, 1) + ) input_channel = output_channel - - + # make it nn.Sequential self.features = nn.Sequential(*self.features) # building last several layers - self.conv_last = conv_1x1_bn(input_channel, self.stage_out_channels[-1]) - self.globalpool = nn.Sequential(nn.AvgPool2d(int(input_size/32))) - - # building classifier - self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class)) + self.conv_last = conv_1x1_bn(input_channel, self.stage_out_channels[-1]) + self.globalpool = nn.Sequential(nn.AvgPool2d(int(input_size / 32))) + + # building classifier + self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class)) def forward(self, x): x = self.conv1(x) @@ -163,10 +171,12 @@ def forward(self, x): x = self.classifier(x) return x -def shufflenetv2(width_mult=1.): + +def shufflenetv2(width_mult=1.0): model = ShuffleNetV2(width_mult=width_mult) return model - + + if __name__ == "__main__": """Testing """ diff --git a/TestAccuracy.py b/TestAccuracy.py index 0395e51..10dd09f 100644 --- a/TestAccuracy.py +++ b/TestAccuracy.py @@ -11,37 +11,62 @@ from scipy.stats import spearmanr, pearsonr parser = argparse.ArgumentParser( - description='Single Shot MultiBox Detector Training With Pytorch') -parser.add_argument('--dataset_root', default='dataset/GAIC/', help='Dataset root directory path') -parser.add_argument('--image_size', default=256, type=int, help='Batch size for training') -parser.add_argument('--batch_size', default=1, type=int, help='Batch size for training') -parser.add_argument('--num_workers', default=0, type=int, help='Number of workers used in dataloading') -parser.add_argument('--cuda', default=True, help='Use CUDA to train model') -parser.add_argument('--net_path', default='weights/ablation/cropping/mobilenetv2/downsample4_multi_Aug1_Align9_Cdim8/23_0.625_0.583_0.553_0.525_0.785_0.762_0.748_0.723_0.783_0.806.pth_____', - help='Directory for saving checkpoint models') + description="Single Shot MultiBox Detector Training With Pytorch" +) +parser.add_argument( + "--dataset_root", default="dataset/GAIC/", help="Dataset root directory path" +) +parser.add_argument( + "--image_size", default=256, type=int, help="Batch size for training" +) +parser.add_argument("--batch_size", default=1, type=int, help="Batch size for training") +parser.add_argument( + "--num_workers", default=0, type=int, help="Number of workers used in dataloading" +) +parser.add_argument("--cuda", default=True, help="Use CUDA to train model") +parser.add_argument( + "--net_path", + default="weights/ablation/cropping/mobilenetv2/downsample4_multi_Aug1_Align9_Cdim8/23_0.625_0.583_0.553_0.525_0.785_0.762_0.748_0.723_0.783_0.806.pth_____", + help="Directory for saving checkpoint models", +) args = parser.parse_args() if torch.cuda.is_available(): if args.cuda: - torch.set_default_tensor_type('torch.cuda.FloatTensor') + torch.set_default_tensor_type("torch.cuda.FloatTensor") if not args.cuda: - print("WARNING: It looks like you have a CUDA device, but aren't " + - "using CUDA.\nRun with --cuda for optimal training speed.") - torch.set_default_tensor_type('torch.FloatTensor') + print( + "WARNING: It looks like you have a CUDA device, but aren't " + + "using CUDA.\nRun with --cuda for optimal training speed." + ) + torch.set_default_tensor_type("torch.FloatTensor") else: - torch.set_default_tensor_type('torch.FloatTensor') + torch.set_default_tensor_type("torch.FloatTensor") -data_loader = data.DataLoader(GAICD(image_size=args.image_size, dataset_dir=args.dataset_root, set='test'), args.batch_size, num_workers=args.num_workers, shuffle=False) +data_loader = data.DataLoader( + GAICD(image_size=args.image_size, dataset_dir=args.dataset_root, set="test"), + args.batch_size, + num_workers=args.num_workers, + shuffle=False, +) + def test(): - net = build_crop_model(scale='multi', alignsize=9, reddim=8, loadweight=True, model='mobilenetv2', downsample=4) + net = build_crop_model( + scale="multi", + alignsize=9, + reddim=8, + loadweight=True, + model="mobilenetv2", + downsample=4, + ) net.load_state_dict(torch.load(args.net_path)) if args.cuda: - net = torch.nn.DataParallel(net,device_ids=[0]) + net = torch.nn.DataParallel(net, device_ids=[0]) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False net = net.cuda() @@ -61,14 +86,22 @@ def test(): wacc4_10.append(0) for id, sample in enumerate(data_loader): - image = sample['image'] - bboxs = sample['bbox'] - MOS = sample['MOS'] + image = sample["image"] + bboxs = sample["bbox"] + MOS = sample["MOS"] roi = [] - for idx in range(0,len(bboxs['xmin'])): - roi.append((0, bboxs['xmin'][idx],bboxs['ymin'][idx],bboxs['xmax'][idx],bboxs['ymax'][idx])) + for idx in range(0, len(bboxs["xmin"])): + roi.append( + ( + 0, + bboxs["xmin"][idx], + bboxs["ymin"][idx], + bboxs["xmax"][idx], + bboxs["ymax"][idx], + ) + ) if args.cuda: image = Variable(image.cuda()) @@ -78,9 +111,9 @@ def test(): roi = Variable(torch.Tensor(roi)) t0 = time.time() - out = net(image,roi) + out = net(image, roi) t1 = time.time() - print('timer: %.4f sec.' % (t1 - t0)) + print("timer: %.4f sec." % (t1 - t0)) id_MOS = sorted(range(len(MOS)), key=lambda k: MOS[k], reverse=True) id_out = sorted(range(len(out)), key=lambda k: out[k], reverse=True) @@ -92,35 +125,37 @@ def test(): for k in range(4): temp_acc_4_5 = 0.0 temp_acc_4_10 = 0.0 - for j in range(k+1): + for j in range(k + 1): if MOS[id_out[j]] >= MOS[id_MOS[4]]: temp_acc_4_5 += 1.0 if MOS[id_out[j]] >= MOS[id_MOS[9]]: temp_acc_4_10 += 1.0 - acc4_5[k] += temp_acc_4_5 / (k+1.0) - acc4_10[k] += temp_acc_4_10 / (k+1.0) + acc4_5[k] += temp_acc_4_5 / (k + 1.0) + acc4_10[k] += temp_acc_4_10 / (k + 1.0) for k in range(4): temp_wacc_4_5 = 0.0 temp_wacc_4_10 = 0.0 - temp_rank_of_returned_crop = rank_of_returned_crop[:(k+1)] + temp_rank_of_returned_crop = rank_of_returned_crop[: (k + 1)] temp_rank_of_returned_crop.sort() - for j in range(k+1): + for j in range(k + 1): if temp_rank_of_returned_crop[j] <= 4: - temp_wacc_4_5 += 1.0 * math.exp(-0.2*(temp_rank_of_returned_crop[j]-j)) + temp_wacc_4_5 += 1.0 * math.exp( + -0.2 * (temp_rank_of_returned_crop[j] - j) + ) if temp_rank_of_returned_crop[j] <= 9: - temp_wacc_4_10 += 1.0 * math.exp(-0.1*(temp_rank_of_returned_crop[j]-j)) - wacc4_5[k] += temp_wacc_4_5 / (k+1.0) - wacc4_10[k] += temp_wacc_4_10 / (k+1.0) - + temp_wacc_4_10 += 1.0 * math.exp( + -0.1 * (temp_rank_of_returned_crop[j] - j) + ) + wacc4_5[k] += temp_wacc_4_5 / (k + 1.0) + wacc4_10[k] += temp_wacc_4_10 / (k + 1.0) MOS_arr = [] out = torch.squeeze(out).cpu().detach().numpy() for k in range(len(MOS)): MOS_arr.append(MOS[k].numpy()[0]) - srcc.append(spearmanr(MOS_arr,out)[0]) - pcc.append(pearsonr(MOS_arr,out)[0]) - + srcc.append(spearmanr(MOS_arr, out)[0]) + pcc.append(pearsonr(MOS_arr, out)[0]) for k in range(4): acc4_5[k] = acc4_5[k] / 200.0 @@ -131,10 +166,34 @@ def test(): avg_srcc = sum(srcc) / 200.0 avg_pcc = sum(pcc) / 200.0 - sys.stdout.write('[%.3f, %.3f, %.3f, %.3f] [%.3f, %.3f, %.3f, %.3f]\n' % (acc4_5[0],acc4_5[1],acc4_5[2],acc4_5[3],acc4_10[0],acc4_10[1],acc4_10[2],acc4_10[3])) - sys.stdout.write('[%.3f, %.3f, %.3f, %.3f] [%.3f, %.3f, %.3f, %.3f]\n' % (wacc4_5[0],wacc4_5[1],wacc4_5[2],wacc4_5[3],wacc4_10[0],wacc4_10[1],wacc4_10[2],wacc4_10[3])) - sys.stdout.write('[Avg SRCC: %.3f] [Avg PCC: %.3f]\n' % (avg_srcc,avg_pcc)) - - -if __name__ == '__main__': + sys.stdout.write( + "[%.3f, %.3f, %.3f, %.3f] [%.3f, %.3f, %.3f, %.3f]\n" + % ( + acc4_5[0], + acc4_5[1], + acc4_5[2], + acc4_5[3], + acc4_10[0], + acc4_10[1], + acc4_10[2], + acc4_10[3], + ) + ) + sys.stdout.write( + "[%.3f, %.3f, %.3f, %.3f] [%.3f, %.3f, %.3f, %.3f]\n" + % ( + wacc4_5[0], + wacc4_5[1], + wacc4_5[2], + wacc4_5[3], + wacc4_10[0], + wacc4_10[1], + wacc4_10[2], + wacc4_10[3], + ) + ) + sys.stdout.write("[Avg SRCC: %.3f] [Avg PCC: %.3f]\n" % (avg_srcc, avg_pcc)) + + +if __name__ == "__main__": test() diff --git a/TrainModel.py b/TrainModel.py index 13c9750..8b04a2c 100644 --- a/TrainModel.py +++ b/TrainModel.py @@ -18,24 +18,65 @@ np.random.seed(SEED) random.seed(SEED) -parser = argparse.ArgumentParser(description='Grid anchor based image cropping') -parser.add_argument('--dataset_root', default='dataset/GAIC/', help='Dataset root directory path') -parser.add_argument('--base_model', default='mobilenetv2', help='Pretrained base model') -parser.add_argument('--scale', default='multi', type=str, help='choose single or multi scale') -parser.add_argument('--downsample', default=4, type=int, help='downsample time') -parser.add_argument('--augmentation', default=1, type=int, help='choose single or multi scale') -parser.add_argument('--image_size', default=256, type=int, help='Batch size for training') -parser.add_argument('--align_size', default=9, type=int, help='Spatial size of RoIAlign and RoDAlign') -parser.add_argument('--reduced_dim', default=8, type=int, help='Spatial size of RoIAlign and RoDAlign') -parser.add_argument('--batch_size', default=1, type=int, help='Batch size for training') -parser.add_argument('--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from') -parser.add_argument('--start_iter', default=0, type=int, help='Resume training at this iter') -parser.add_argument('--num_workers', default=0, type=int, help='Number of workers used in dataloading') -parser.add_argument('--lr', '--learning-rate', default=1e-4, type=float, help='initial learning rate') -parser.add_argument('--save_folder', default='weights/ablation/cropping/', help='Directory for saving checkpoint models') +parser = argparse.ArgumentParser(description="Grid anchor based image cropping") +parser.add_argument( + "--dataset_root", default="dataset/GAIC/", help="Dataset root directory path" +) +parser.add_argument("--base_model", default="mobilenetv2", help="Pretrained base model") +parser.add_argument( + "--scale", default="multi", type=str, help="choose single or multi scale" +) +parser.add_argument("--downsample", default=4, type=int, help="downsample time") +parser.add_argument( + "--augmentation", default=1, type=int, help="choose single or multi scale" +) +parser.add_argument( + "--image_size", default=256, type=int, help="Batch size for training" +) +parser.add_argument( + "--align_size", default=9, type=int, help="Spatial size of RoIAlign and RoDAlign" +) +parser.add_argument( + "--reduced_dim", default=8, type=int, help="Spatial size of RoIAlign and RoDAlign" +) +parser.add_argument("--batch_size", default=1, type=int, help="Batch size for training") +parser.add_argument( + "--resume", + default=None, + type=str, + help="Checkpoint state_dict file to resume training from", +) +parser.add_argument( + "--start_iter", default=0, type=int, help="Resume training at this iter" +) +parser.add_argument( + "--num_workers", default=0, type=int, help="Number of workers used in dataloading" +) +parser.add_argument( + "--lr", "--learning-rate", default=1e-4, type=float, help="initial learning rate" +) +parser.add_argument( + "--save_folder", + default="weights/ablation/cropping/", + help="Directory for saving checkpoint models", +) args = parser.parse_args() -args.save_folder = args.save_folder + args.base_model + '/' + 'downsample' + str(args.downsample) + '_' + args.scale + '_Aug' + str(args.augmentation) + '_Align' +str(args.align_size) + '_Cdim'+str(args.reduced_dim) +args.save_folder = ( + args.save_folder + + args.base_model + + "/" + + "downsample" + + str(args.downsample) + + "_" + + args.scale + + "_Aug" + + str(args.augmentation) + + "_Align" + + str(args.align_size) + + "_Cdim" + + str(args.reduced_dim) +) if not os.path.exists(args.save_folder): os.makedirs(args.save_folder) @@ -43,31 +84,53 @@ cuda = True if torch.cuda.is_available() else False if cuda: - torch.set_default_tensor_type('torch.cuda.FloatTensor') + torch.set_default_tensor_type("torch.cuda.FloatTensor") else: - torch.set_default_tensor_type('torch.FloatTensor') - - -data_loader_train = data.DataLoader(GAICD(image_size=args.image_size, dataset_dir=args.dataset_root, set='train', augmentation=args.augmentation), - batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, worker_init_fn=random.seed(SEED)) - -data_loader_test = data.DataLoader(GAICD(image_size=args.image_size, dataset_dir=args.dataset_root, set='test'), - batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False) - -net = build_crop_model(scale=args.scale, alignsize=args.align_size, reddim=args.reduced_dim, loadweight=True, model=args.base_model, downsample=args.downsample) + torch.set_default_tensor_type("torch.FloatTensor") + + +data_loader_train = data.DataLoader( + GAICD( + image_size=args.image_size, + dataset_dir=args.dataset_root, + set="train", + augmentation=args.augmentation, + ), + batch_size=args.batch_size, + num_workers=args.num_workers, + shuffle=True, + worker_init_fn=random.seed(SEED), +) + +data_loader_test = data.DataLoader( + GAICD(image_size=args.image_size, dataset_dir=args.dataset_root, set="test"), + batch_size=args.batch_size, + num_workers=args.num_workers, + shuffle=False, +) + +net = build_crop_model( + scale=args.scale, + alignsize=args.align_size, + reddim=args.reduced_dim, + loadweight=True, + model=args.base_model, + downsample=args.downsample, +) # fix the batch normalization in mobilenet and shufflenet because batchsize = 1 net.eval() if cuda: - net = torch.nn.DataParallel(net,device_ids=[0]) + net = torch.nn.DataParallel(net, device_ids=[0]) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False - #cudnn.benchmark = True + # cudnn.benchmark = True net = net.cuda() optimizer = optim.Adam(net.parameters(), lr=args.lr) + def test(): acc4_5 = [] acc4_10 = [] @@ -84,14 +147,22 @@ def test(): wacc4_10.append(0) for id, sample in enumerate(data_loader_test): - image = sample['image'] - bboxs = sample['bbox'] - MOS = sample['MOS'] + image = sample["image"] + bboxs = sample["bbox"] + MOS = sample["MOS"] roi = [] - for idx in range(0,len(bboxs['xmin'])): - roi.append((0, bboxs['xmin'][idx],bboxs['ymin'][idx],bboxs['xmax'][idx],bboxs['ymax'][idx])) + for idx in range(0, len(bboxs["xmin"])): + roi.append( + ( + 0, + bboxs["xmin"][idx], + bboxs["ymin"][idx], + bboxs["xmax"][idx], + bboxs["ymax"][idx], + ) + ) if cuda: image = Variable(image.cuda()) @@ -100,24 +171,26 @@ def test(): image = Variable(image) roi = Variable(roi) - #t0 = time.time() - out = net(image,roi) - loss = torch.nn.SmoothL1Loss(reduction='elementwise_mean')(out.squeeze(), torch.Tensor(MOS)) + # t0 = time.time() + out = net(image, roi) + loss = torch.nn.SmoothL1Loss(reduction="elementwise_mean")( + out.squeeze(), torch.Tensor(MOS) + ) total_loss += loss.item() - avg_loss = total_loss / (id+1) + avg_loss = total_loss / (id + 1) - id_MOS = sorted(range(len(MOS)), key=lambda k: MOS[k], reverse = True) - id_out = sorted(range(len(out)), key=lambda k: out[k], reverse = True) + id_MOS = sorted(range(len(MOS)), key=lambda k: MOS[k], reverse=True) + id_out = sorted(range(len(out)), key=lambda k: out[k], reverse=True) for k in range(4): temp_acc_4_5 = 0.0 temp_acc_4_10 = 0.0 - for j in range(k+1): + for j in range(k + 1): if MOS[id_out[j]] >= MOS[id_MOS[4]]: temp_acc_4_5 += 1.0 if MOS[id_out[j]] >= MOS[id_MOS[9]]: temp_acc_4_10 += 1.0 - acc4_5[k] += temp_acc_4_5 / (k+1.0) - acc4_10[k] += temp_acc_4_10 / (k+1.0) + acc4_5[k] += temp_acc_4_5 / (k + 1.0) + acc4_10[k] += temp_acc_4_10 / (k + 1.0) rank_of_returned_crop = [] for k in range(4): @@ -126,26 +199,30 @@ def test(): for k in range(4): temp_wacc_4_5 = 0.0 temp_wacc_4_10 = 0.0 - temp_rank_of_returned_crop = rank_of_returned_crop[:(k+1)] + temp_rank_of_returned_crop = rank_of_returned_crop[: (k + 1)] temp_rank_of_returned_crop.sort() - for j in range(k+1): + for j in range(k + 1): if temp_rank_of_returned_crop[j] <= 4: - temp_wacc_4_5 += 1.0 * math.exp(-0.2*(temp_rank_of_returned_crop[j]-j)) + temp_wacc_4_5 += 1.0 * math.exp( + -0.2 * (temp_rank_of_returned_crop[j] - j) + ) if temp_rank_of_returned_crop[j] <= 9: - temp_wacc_4_10 += 1.0 * math.exp(-0.1*(temp_rank_of_returned_crop[j]-j)) - wacc4_5[k] += temp_wacc_4_5 / (k+1.0) - wacc4_10[k] += temp_wacc_4_10 / (k+1.0) + temp_wacc_4_10 += 1.0 * math.exp( + -0.1 * (temp_rank_of_returned_crop[j] - j) + ) + wacc4_5[k] += temp_wacc_4_5 / (k + 1.0) + wacc4_10[k] += temp_wacc_4_10 / (k + 1.0) MOS_arr = [] out = torch.squeeze(out).cpu().detach().numpy() for k in range(len(MOS)): MOS_arr.append(MOS[k].numpy()[0]) - srcc.append(spearmanr(MOS_arr,out)[0]) - pcc.append(pearsonr(MOS_arr,out)[0]) + srcc.append(spearmanr(MOS_arr, out)[0]) + pcc.append(pearsonr(MOS_arr, out)[0]) - #t1 = time.time() + # t1 = time.time() - #print('timer: %.4f sec.' % (t1 - t0)) + # print('timer: %.4f sec.' % (t1 - t0)) for k in range(4): acc4_5[k] = acc4_5[k] / 200.0 acc4_10[k] = acc4_10[k] / 200.0 @@ -155,7 +232,6 @@ def test(): avg_srcc = sum(srcc) / 200.0 avg_pcc = sum(pcc) / 200.0 - return acc4_5, acc4_10, avg_srcc, avg_pcc, avg_loss, wacc4_5, wacc4_10 @@ -165,18 +241,26 @@ def train(): total_loss = 0 for id, sample in enumerate(data_loader_train): - image = sample['image'] - bboxs = sample['bbox'] + image = sample["image"] + bboxs = sample["bbox"] roi = [] MOS = [] - random_ID = range(0,len(bboxs['xmin'])) + random_ID = range(0, len(bboxs["xmin"])) random.shuffle(random_ID) for idx in random_ID[:64]: - roi.append((0, bboxs['xmin'][idx],bboxs['ymin'][idx],bboxs['xmax'][idx],bboxs['ymax'][idx])) - MOS.append(sample['MOS'][idx]) + roi.append( + ( + 0, + bboxs["xmin"][idx], + bboxs["ymin"][idx], + bboxs["xmax"][idx], + bboxs["ymax"][idx], + ) + ) + MOS.append(sample["MOS"][idx]) if cuda: image = Variable(image.cuda()) @@ -188,23 +272,74 @@ def train(): # forward - out = net(image,roi) - loss = torch.nn.SmoothL1Loss(reduction='elementwise_mean')(out.squeeze(), MOS) + out = net(image, roi) + loss = torch.nn.SmoothL1Loss(reduction="elementwise_mean")( + out.squeeze(), MOS + ) total_loss += loss.item() - avg_loss = total_loss / (id+1) + avg_loss = total_loss / (id + 1) # backprop optimizer.zero_grad() loss.backward() optimizer.step() - sys.stdout.write('\r[Epoch %d/%d] [Batch %d/%d] [Train Loss: %.4f]' % (epoch, 79, id, len(data_loader_train), avg_loss)) + sys.stdout.write( + "\r[Epoch %d/%d] [Batch %d/%d] [Train Loss: %.4f]" + % (epoch, 79, id, len(data_loader_train), avg_loss) + ) acc4_5, acc4_10, avg_srcc, avg_pcc, test_avg_loss, wacc4_5, wacc4_10 = test() - sys.stdout.write('[Test Loss: %.4f] [%.3f, %.3f, %.3f, %.3f] [%.3f, %.3f, %.3f, %.3f] [SRCC: %.3f] [PCC: %.3f]\n' % (test_avg_loss,acc4_5[0],acc4_5[1],acc4_5[2],acc4_5[3],acc4_10[0],acc4_10[1],acc4_10[2],acc4_10[3],avg_srcc,avg_pcc)) - sys.stdout.write('[%.3f, %.3f, %.3f, %.3f] [%.3f, %.3f, %.3f, %.3f]\n' % (wacc4_5[0],wacc4_5[1],wacc4_5[2],wacc4_5[3],wacc4_10[0],wacc4_10[1],wacc4_10[2],wacc4_10[3])) - torch.save(net.module.state_dict(), args.save_folder + '/' + repr(epoch) + '_%.3f_%.3f_%.3f_%.3f_%.3f_%.3f_%.3f_%.3f_%.3f_%.3f' % (acc4_5[0],acc4_5[1],acc4_5[2],acc4_5[3],acc4_10[0],acc4_10[1],acc4_10[2],acc4_10[3],avg_srcc,avg_pcc) + '.pth') - - -if __name__ == '__main__': + sys.stdout.write( + "[Test Loss: %.4f] [%.3f, %.3f, %.3f, %.3f] [%.3f, %.3f, %.3f, %.3f] [SRCC: %.3f] [PCC: %.3f]\n" + % ( + test_avg_loss, + acc4_5[0], + acc4_5[1], + acc4_5[2], + acc4_5[3], + acc4_10[0], + acc4_10[1], + acc4_10[2], + acc4_10[3], + avg_srcc, + avg_pcc, + ) + ) + sys.stdout.write( + "[%.3f, %.3f, %.3f, %.3f] [%.3f, %.3f, %.3f, %.3f]\n" + % ( + wacc4_5[0], + wacc4_5[1], + wacc4_5[2], + wacc4_5[3], + wacc4_10[0], + wacc4_10[1], + wacc4_10[2], + wacc4_10[3], + ) + ) + torch.save( + net.module.state_dict(), + args.save_folder + + "/" + + repr(epoch) + + "_%.3f_%.3f_%.3f_%.3f_%.3f_%.3f_%.3f_%.3f_%.3f_%.3f" + % ( + acc4_5[0], + acc4_5[1], + acc4_5[2], + acc4_5[3], + acc4_10[0], + acc4_10[1], + acc4_10[2], + acc4_10[3], + avg_srcc, + avg_pcc, + ) + + ".pth", + ) + + +if __name__ == "__main__": train() diff --git a/augmentations.py b/augmentations.py index a330903..f3f0da3 100644 --- a/augmentations.py +++ b/augmentations.py @@ -16,10 +16,8 @@ def intersect(box_a, box_b): def jaccard_numpy(box_a, box_b): inter = intersect(box_a, box_b) - area_a = ((box_a[:, 2]-box_a[:, 0]) * - (box_a[:, 3]-box_a[:, 1])) # [A,B] - area_b = ((box_b[2]-box_b[0]) * - (box_b[3]-box_b[1])) # [A,B] + area_a = (box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1]) # [A,B] + area_b = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1]) # [A,B] union = area_a + area_b - inter return inter / union # [A,B] @@ -94,8 +92,7 @@ def __init__(self, size=300): self.size = size def __call__(self, image, boxes=None, labels=None): - image = cv2.resize(image, (self.size, - self.size)) + image = cv2.resize(image, (self.size, self.size)) return image, boxes, labels @@ -128,9 +125,7 @@ def __call__(self, image, boxes=None, labels=None): class RandomLightingNoise(object): def __init__(self): - self.perms = ((0, 1, 2), (0, 2, 1), - (1, 0, 2), (1, 2, 0), - (2, 0, 1), (2, 1, 0)) + self.perms = ((0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)) def __call__(self, image, boxes=None, labels=None): if random.randint(2): @@ -141,14 +136,14 @@ def __call__(self, image, boxes=None, labels=None): class ConvertColor(object): - def __init__(self, current='BGR', transform='HSV'): + def __init__(self, current="BGR", transform="HSV"): self.transform = transform self.current = current def __call__(self, image, boxes=None, labels=None): - if self.current == 'BGR' and self.transform == 'HSV': + if self.current == "BGR" and self.transform == "HSV": image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) - elif self.current == 'HSV' and self.transform == 'BGR': + elif self.current == "HSV" and self.transform == "BGR": image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) else: raise NotImplementedError @@ -185,12 +180,20 @@ def __call__(self, image, boxes=None, labels=None): class ToCV2Image(object): def __call__(self, tensor, boxes=None, labels=None): - return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels + return ( + tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), + boxes, + labels, + ) class ToTensor(object): def __call__(self, cvimage, boxes=None, labels=None): - return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels + return ( + torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), + boxes, + labels, + ) class RandomSampleCrop(object): @@ -206,6 +209,7 @@ class RandomSampleCrop(object): boxes (Tensor): the adjusted bounding boxes in pt form labels (Tensor): the class labels for each bbox """ + def __init__(self): self.sample_options = ( # using entire original input image @@ -229,9 +233,9 @@ def __call__(self, image, boxes=None, labels=None): min_iou, max_iou = mode if min_iou is None: - min_iou = float('-inf') + min_iou = float("-inf") if max_iou is None: - max_iou = float('inf') + max_iou = float("inf") # max trails (50) for _ in range(50): @@ -248,7 +252,7 @@ def __call__(self, image, boxes=None, labels=None): top = random.uniform(height - h) # convert to integer rect x1,y1,x2,y2 - rect = np.array([int(left), int(top), int(left+w), int(top+h)]) + rect = np.array([int(left), int(top), int(left + w), int(top + h)]) # calculate IoU (jaccard overlap) b/t the cropped and gt boxes overlap = jaccard_numpy(boxes, rect) @@ -258,8 +262,7 @@ def __call__(self, image, boxes=None, labels=None): continue # cut the crop from the image - current_image = current_image[rect[1]:rect[3], rect[0]:rect[2], - :] + current_image = current_image[rect[1] : rect[3], rect[0] : rect[2], :] # keep overlap with gt box IF center in sampled patch centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 @@ -284,13 +287,11 @@ def __call__(self, image, boxes=None, labels=None): current_labels = labels[mask] # should we use the box left and top corner or the crop's - current_boxes[:, :2] = np.maximum(current_boxes[:, :2], - rect[:2]) + current_boxes[:, :2] = np.maximum(current_boxes[:, :2], rect[:2]) # adjust to crop (by substracting crop's left,top) current_boxes[:, :2] -= rect[:2] - current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], - rect[2:]) + current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:], rect[2:]) # adjust to crop (by substracting crop's left,top) current_boxes[:, 2:] -= rect[:2] @@ -307,15 +308,16 @@ def __call__(self, image, boxes, labels): height, width, depth = image.shape ratio = random.uniform(1, 4) - left = random.uniform(0, width*ratio - width) - top = random.uniform(0, height*ratio - height) + left = random.uniform(0, width * ratio - width) + top = random.uniform(0, height * ratio - height) expand_image = np.zeros( - (int(height*ratio), int(width*ratio), depth), - dtype=image.dtype) + (int(height * ratio), int(width * ratio), depth), dtype=image.dtype + ) expand_image[:, :, :] = self.mean - expand_image[int(top):int(top + height), - int(left):int(left + width)] = image + expand_image[ + int(top) : int(top + height), int(left) : int(left + width) + ] = image image = expand_image boxes = boxes.copy() @@ -330,7 +332,7 @@ def __call__(self, image, annotations, classes): _, width, _ = image.shape if random.randint(2): image = image[:, ::-1] - for i in range (len(annotations)): + for i in range(len(annotations)): annotations[i][1] = width - annotations[i][1] annotations[i][3] = width - annotations[i][3] return image, annotations, classes @@ -366,11 +368,11 @@ class PhotometricDistort(object): def __init__(self): self.pd = [ RandomContrast(), - ConvertColor(transform='HSV'), + ConvertColor(transform="HSV"), RandomSaturation(), RandomHue(), - ConvertColor(current='HSV', transform='BGR'), - RandomContrast() + ConvertColor(current="HSV", transform="BGR"), + RandomContrast(), ] self.rand_brightness = RandomBrightness() self.rand_light_noise = RandomLightingNoise() @@ -387,11 +389,9 @@ def __call__(self, image, boxes, labels): class CropAugmentation(object): def __init__(self): - self.augment = Compose([ - ConvertFromInts(), - PhotometricDistort(), - RandomMirror() - ]) + self.augment = Compose( + [ConvertFromInts(), PhotometricDistort(), RandomMirror()] + ) def __call__(self, img, annotations): image, annotations, label = self.augment(img, annotations) diff --git a/autocrop.py b/autocrop.py new file mode 100644 index 0000000..af1bb97 --- /dev/null +++ b/autocrop.py @@ -0,0 +1,129 @@ +from croppingModel import build_crop_model +from croppingDataset import setup_test_dataset + +import os +import torch +from torch.autograd import Variable +from torch.utils.data import DataLoader +import cv2 +import argparse + +from tqdm import tqdm + +networks = { + "mobilenetv2_0.5": { + "model": "mobilenetv2", + "path": "mobilenetv2_0.5-eaa6f9ad.pth", + }, + "mobilenetv2_0.75": { + "model": "mobilenetv2", + "path": "mobilenetv2_0.75-dace9791.pth", + }, + "mobilenetv2_1.0": { + "model": "mobilenetv2", + "path": "mobilenetv2_1.0-0c6065bc.pth", + }, + "shufflenetv2_x0.5": { + "model": "shufflenetv2", + "path": "shufflenetv2_x0.5_60.646_81.696.pth.tar", + }, + "shufflenetv2_x1": { + "model": "shufflenetv2", + "path": "shufflenetv2_x1_69.402_88.374.pth.tar", + }, +} + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument(dest="input_image_dir") + parser.add_argument(dest="output_image_dir") + parser.add_argument( + "--network", choices=sorted(networks), default="mobilenetv2_1.0" + ) + args = parser.parse_args() + + os.makedirs(args.output_image_dir, exist_ok=True) + + net_conf = networks[args.network] + net = build_crop_model( + scale="multi", + alignsize=9, + reddim=8, + loadweight=True, + model=net_conf["model"], + downsample=4, + ) + net.load_state_dict( + torch.load( + os.path.join("pretrained_model", net_conf["path"]), map_location="cpu" + ), + strict=False, + ) + net.eval() + + dataset = setup_test_dataset(dataset_dir=args.input_image_dir) + data_loader = DataLoader( + dataset, batch_size=1, num_workers=0, shuffle=False, pin_memory=True + ) + + for sample in tqdm(data_loader): + imgpath = sample["imgpath"] + image = sample["image"] + bboxes = sample["sourceboxes"] + resized_image = sample["resized_image"] + tbboxes = sample["tbboxes"] + + if len(tbboxes["xmin"]) == 0: + continue + + roi = [] + + for idx in range(0, len(tbboxes["xmin"])): + roi.append( + ( + 0, + tbboxes["xmin"][idx], + tbboxes["ymin"][idx], + tbboxes["xmax"][idx], + tbboxes["ymax"][idx], + ) + ) + + resized_image = Variable(resized_image) + roi = Variable(torch.Tensor(roi)) + + """ + t0 = time.time() + for r in range(0,100): + out = net(resized_image,roi) + t1 = time.time() + print('timer: %.4f sec.' % (t1 - t0)) + """ + + out = net(resized_image, roi) + + id_out = sorted(range(len(out)), key=lambda k: out[k], reverse=True) + image = image.cpu().numpy().squeeze(0) + + for i in range(4): + top1_box = bboxes[id_out[i]] + top1_box = [ + top1_box[0].numpy()[0], + top1_box[1].numpy()[0], + top1_box[2].numpy()[0], + top1_box[3].numpy()[0], + ] + top1_crop = image[ + int(top1_box[0]) : int(top1_box[2]), int(top1_box[1]) : int(top1_box[3]) + ] + imgname = os.path.basename(imgpath[0]) + filename, file_extension = os.path.splitext(imgname) + cv2.imwrite( + args.output_image_dir + "/" + filename + "_" + str(i) + file_extension, + top1_crop[:, :, (2, 1, 0)], + ) + + +if __name__ == "__main__": + main() diff --git a/croppingDataset.py b/croppingDataset.py index 8424c3b..1f94e9c 100644 --- a/croppingDataset.py +++ b/croppingDataset.py @@ -12,14 +12,13 @@ class TransformFunction(object): - - def __call__(self, sample,image_size): - image, annotations = sample['image'], sample['annotations'] + def __call__(self, sample, image_size): + image, annotations = sample["image"], sample["annotations"] scale = image_size / min(image.shape[:2]) h = round(image.shape[0] * scale / 32.0) * 32 w = round(image.shape[1] * scale / 32.0) * 32 - resized_image = cv2.resize(image,(int(w),int(h))) / 256.0 + resized_image = cv2.resize(image, (int(w), int(h))) / 256.0 rgb_mean = np.array(RGB_MEAN, dtype=np.float32) rgb_std = np.array(RGB_STD, dtype=np.float32) resized_image = resized_image.astype(np.float32) @@ -30,53 +29,76 @@ def __call__(self, sample,image_size): scale_width = float(resized_image.shape[1]) / image.shape[1] transformed_bbox = {} - transformed_bbox['xmin'] = [] - transformed_bbox['ymin'] = [] - transformed_bbox['xmax'] = [] - transformed_bbox['ymax'] = [] + transformed_bbox["xmin"] = [] + transformed_bbox["ymin"] = [] + transformed_bbox["xmax"] = [] + transformed_bbox["ymax"] = [] MOS = [] for annotation in annotations: - transformed_bbox['xmin'].append(math.floor(float(annotation[1]) * scale_width)) - transformed_bbox['ymin'].append(math.floor(float(annotation[0]) * scale_height)) - transformed_bbox['xmax'].append(math.ceil(float(annotation[3]) * scale_width)) - transformed_bbox['ymax'].append(math.ceil(float(annotation[2]) * scale_height)) + transformed_bbox["xmin"].append( + math.floor(float(annotation[1]) * scale_width) + ) + transformed_bbox["ymin"].append( + math.floor(float(annotation[0]) * scale_height) + ) + transformed_bbox["xmax"].append( + math.ceil(float(annotation[3]) * scale_width) + ) + transformed_bbox["ymax"].append( + math.ceil(float(annotation[2]) * scale_height) + ) MOS.append((float(annotation[-1]) - MOS_MEAN) / MOS_STD) resized_image = resized_image.transpose((2, 0, 1)) - return {'image': resized_image, 'bbox': transformed_bbox, 'MOS': MOS} + return {"image": resized_image, "bbox": transformed_bbox, "MOS": MOS} -class GAICD(data.Dataset): - def __init__(self, image_size=256, dataset_dir='dataset/GAIC/', set = 'train', - transform=TransformFunction(), augmentation=False): +class GAICD(data.Dataset): + def __init__( + self, + image_size=256, + dataset_dir="dataset/GAIC/", + set="train", + transform=TransformFunction(), + augmentation=False, + ): self.image_size = float(image_size) self.dataset_dir = dataset_dir self.set = set - image_lists = os.listdir(self.dataset_dir + 'images/' + set) + image_lists = os.listdir(self.dataset_dir + "images/" + set) self._imgpath = list() self._annopath = list() for image in image_lists: - self._imgpath.append(os.path.join(self.dataset_dir, 'images', set, image)) - self._annopath.append(os.path.join(self.dataset_dir, 'annotations', set, image[:-3]+"txt")) + self._imgpath.append(os.path.join(self.dataset_dir, "images", set, image)) + self._annopath.append( + os.path.join(self.dataset_dir, "annotations", set, image[:-3] + "txt") + ) self.transform = transform if augmentation: self.augmentation = CropAugmentation() else: self.augmentation = None - def __getitem__(self, idx): image = cv2.imread(self._imgpath[idx]) - with open(self._annopath[idx],'r') as fid: + with open(self._annopath[idx], "r") as fid: annotations_txt = fid.readlines() annotations = list() for annotation in annotations_txt: annotation_split = annotation.split() if float(annotation_split[4]) != -2: - annotations.append([float(annotation_split[0]),float(annotation_split[1]),float(annotation_split[2]),float(annotation_split[3]),float(annotation_split[4])]) + annotations.append( + [ + float(annotation_split[0]), + float(annotation_split[1]), + float(annotation_split[2]), + float(annotation_split[3]), + float(annotation_split[4]), + ] + ) if self.augmentation: image, annotations = self.augmentation(image, annotations) @@ -84,10 +106,10 @@ def __getitem__(self, idx): # to rgb image = image[:, :, (2, 1, 0)] - sample = {'image': image, 'annotations': annotations} + sample = {"image": image, "annotations": annotations} if self.transform: - sample = self.transform(sample,self.image_size) + sample = self.transform(sample, self.image_size) return sample @@ -96,13 +118,12 @@ def __len__(self): class TransformFunctionTest(object): - def __call__(self, image, image_size): scale = image_size / min(image.shape[:2]) h = round(image.shape[0] * scale / 32.0) * 32 w = round(image.shape[1] * scale / 32.0) * 32 - resized_image = cv2.resize(image,(int(w),int(h))) / 256.0 + resized_image = cv2.resize(image, (int(w), int(h))) / 256.0 rgb_mean = np.array(RGB_MEAN, dtype=np.float32) rgb_std = np.array(RGB_STD, dtype=np.float32) resized_image = resized_image.astype(np.float32) @@ -115,21 +136,28 @@ def __call__(self, image, image_size): bboxes = generate_bboxes(resized_image) transformed_bbox = {} - transformed_bbox['xmin'] = [] - transformed_bbox['ymin'] = [] - transformed_bbox['xmax'] = [] - transformed_bbox['ymax'] = [] + transformed_bbox["xmin"] = [] + transformed_bbox["ymin"] = [] + transformed_bbox["xmax"] = [] + transformed_bbox["ymax"] = [] source_bboxes = list() for bbox in bboxes: - source_bboxes.append([round(bbox[0] * scale_height),round(bbox[1] * scale_width),round(bbox[2] * scale_height),round(bbox[3] * scale_width)]) - transformed_bbox['xmin'].append(bbox[1]) - transformed_bbox['ymin'].append(bbox[0]) - transformed_bbox['xmax'].append(bbox[3]) - transformed_bbox['ymax'].append(bbox[2]) + source_bboxes.append( + [ + round(bbox[0] * scale_height), + round(bbox[1] * scale_width), + round(bbox[2] * scale_height), + round(bbox[3] * scale_width), + ] + ) + transformed_bbox["xmin"].append(bbox[1]) + transformed_bbox["ymin"].append(bbox[0]) + transformed_bbox["xmax"].append(bbox[3]) + transformed_bbox["ymax"].append(bbox[2]) resized_image = resized_image.transpose((2, 0, 1)) - return resized_image,transformed_bbox,source_bboxes + return resized_image, transformed_bbox, source_bboxes def generate_bboxes(image): @@ -140,15 +168,27 @@ def generate_bboxes(image): step_h = h / bins step_w = w / bins annotations = list() - for x1 in range(0,4): - for y1 in range(0,4): - for x2 in range(8,12): - for y2 in range(8,12): - if (x2-x1)*(y2-y1)>0.4999*bins*bins and (y2-y1)*step_w/(x2-x1)/step_h>0.5 and (y2-y1)*step_w/(x2-x1)/step_h<2.0: - annotations.append([float(step_h*(0.5+x1)),float(step_w*(0.5+y1)),float(step_h*(0.5+x2)),float(step_w*(0.5+y2))]) + for x1 in range(0, 4): + for y1 in range(0, 4): + for x2 in range(8, 12): + for y2 in range(8, 12): + if ( + (x2 - x1) * (y2 - y1) > 0.4999 * bins * bins + and (y2 - y1) * step_w / (x2 - x1) / step_h > 0.5 + and (y2 - y1) * step_w / (x2 - x1) / step_h < 2.0 + ): + annotations.append( + [ + float(step_h * (0.5 + x1)), + float(step_w * (0.5 + y1)), + float(step_h * (0.5 + x2)), + float(step_w * (0.5 + y2)), + ] + ) return annotations + def generate_bboxes_16_9(image): h = image.shape[0] @@ -156,15 +196,23 @@ def generate_bboxes_16_9(image): h_step = 9 w_step = 16 annotations = list() - for i in range(14,30): - out_h = h_step*i - out_w = w_step*i - if out_h < h and out_w < w and out_h*out_w>0.4*h*w: - for w_start in range(0,w-out_w,w_step): - for h_start in range(0,h-out_h,h_step): - annotations.append([float(h_start),float(w_start),float(h_start+out_h-1),float(w_start+out_w-1)]) + for i in range(14, 30): + out_h = h_step * i + out_w = w_step * i + if out_h < h and out_w < w and out_h * out_w > 0.4 * h * w: + for w_start in range(0, w - out_w, w_step): + for h_start in range(0, h - out_h, h_step): + annotations.append( + [ + float(h_start), + float(w_start), + float(h_start + out_h - 1), + float(w_start + out_w - 1), + ] + ) return annotations + def generate_bboxes_4_3(image): h = image.shape[0] @@ -172,15 +220,23 @@ def generate_bboxes_4_3(image): h_step = 12 w_step = 16 annotations = list() - for i in range(14,30): - out_h = h_step*i - out_w = w_step*i - if out_h < h and out_w < w and out_h*out_w>0.4*h*w: - for w_start in range(0,w-out_w,w_step): - for h_start in range(0,h-out_h,h_step): - annotations.append([float(h_start),float(w_start),float(h_start+out_h-1),float(w_start+out_w-1)]) + for i in range(14, 30): + out_h = h_step * i + out_w = w_step * i + if out_h < h and out_w < w and out_h * out_w > 0.4 * h * w: + for w_start in range(0, w - out_w, w_step): + for h_start in range(0, h - out_h, h_step): + annotations.append( + [ + float(h_start), + float(w_start), + float(h_start + out_h - 1), + float(w_start + out_w - 1), + ] + ) return annotations + def generate_bboxes_1_1(image): h = image.shape[0] @@ -188,28 +244,39 @@ def generate_bboxes_1_1(image): h_step = 12 w_step = 12 annotations = list() - for i in range(14,30): - out_h = h_step*i - out_w = w_step*i - if out_h < h and out_w < w and out_h*out_w>0.4*h*w: - for w_start in range(0,w-out_w,w_step): - for h_start in range(0,h-out_h,h_step): - annotations.append([float(h_start),float(w_start),float(h_start+out_h-1),float(w_start+out_w-1)]) + for i in range(14, 30): + out_h = h_step * i + out_w = w_step * i + if out_h < h and out_w < w and out_h * out_w > 0.4 * h * w: + for w_start in range(0, w - out_w, w_step): + for h_start in range(0, h - out_h, h_step): + annotations.append( + [ + float(h_start), + float(w_start), + float(h_start + out_h - 1), + float(w_start + out_w - 1), + ] + ) return annotations -class setup_test_dataset(data.Dataset): - def __init__(self, image_size=256.0,dataset_dir='testsetDir', transform=TransformFunctionTest()): +class setup_test_dataset(data.Dataset): + def __init__( + self, + image_size=256.0, + dataset_dir="testsetDir", + transform=TransformFunctionTest(), + ): self.image_size = float(image_size) self.dataset_dir = dataset_dir - image_lists = os.listdir(self.dataset_dir) + image_lists = sorted(os.listdir(self.dataset_dir)) self._imgpath = list() self._annopath = list() for image in image_lists: - self._imgpath.append(os.path.join(self.dataset_dir, image)) + self._imgpath.append(os.path.join(self.dataset_dir, image)) self.transform = transform - def __getitem__(self, idx): image = cv2.imread(self._imgpath[idx]) @@ -217,12 +284,19 @@ def __getitem__(self, idx): image = image[:, :, (2, 1, 0)] if self.transform: - resized_image,transformed_bbox,source_bboxes = self.transform(image,self.image_size) - - sample = {'imgpath': self._imgpath[idx], 'image': image, 'resized_image': resized_image, 'tbboxes':transformed_bbox , 'sourceboxes': source_bboxes} + resized_image, transformed_bbox, source_bboxes = self.transform( + image, self.image_size + ) + + sample = { + "imgpath": self._imgpath[idx], + "image": image, + "resized_image": resized_image, + "tbboxes": transformed_bbox, + "sourceboxes": source_bboxes, + } return sample def __len__(self): return len(self._imgpath) - diff --git a/croppingModel.py b/croppingModel.py index 5352232..9d9b1f4 100644 --- a/croppingModel.py +++ b/croppingModel.py @@ -10,7 +10,6 @@ class vgg_base(nn.Module): - def __init__(self, loadweights=True, downsample=4): super(vgg_base, self).__init__() @@ -25,30 +24,37 @@ def __init__(self, loadweights=True, downsample=4): self.feature4 = nn.Sequential(vgg.features[23:30]) self.feature5 = nn.Sequential(vgg.features[30:]) - #flops, params = profile(self.feature, input_size=(1, 3, 256,256)) + # flops, params = profile(self.feature, input_size=(1, 3, 256,256)) def forward(self, x): - #return self.feature(x) + # return self.feature(x) f3 = self.feature3(x) f4 = self.feature4(f3) f5 = self.feature5(f4) return f3, f4, f5 -class resnet50_base(nn.Module): +class resnet50_base(nn.Module): def __init__(self, loadweights=True, downsample=4): super(resnet50_base, self).__init__() resnet50 = models.resnet50(pretrained=True) - self.feature3 = nn.Sequential(resnet50.conv1,resnet50.bn1,resnet50.relu,resnet50.maxpool,resnet50.layer1,resnet50.layer2) + self.feature3 = nn.Sequential( + resnet50.conv1, + resnet50.bn1, + resnet50.relu, + resnet50.maxpool, + resnet50.layer1, + resnet50.layer2, + ) self.feature4 = nn.Sequential(resnet50.layer3) self.feature5 = nn.Sequential(resnet50.layer4) - #flops, params = profile(self.feature, input_size=(1, 3, 256,256)) + # flops, params = profile(self.feature, input_size=(1, 3, 256,256)) def forward(self, x): - #return self.feature(x) + # return self.feature(x) f3 = self.feature3(x) f4 = self.feature4(f3) f5 = self.feature5(f4) @@ -56,28 +62,32 @@ def forward(self, x): class mobilenetv2_base(nn.Module): - - def __init__(self, loadweights=True, downsample=4, model_path='pretrained_model/mobilenetv2_1.0-0c6065bc.pth'): + def __init__( + self, + loadweights=True, + downsample=4, + model_path="pretrained_model/mobilenetv2_1.0-0c6065bc.pth", + ): super(mobilenetv2_base, self).__init__() model = MobileNetV2(width_mult=1.0) if loadweights: - model.load_state_dict(torch.load(model_path)) + model.load_state_dict(torch.load(model_path, map_location="cpu")) - #if downsample == 4: + # if downsample == 4: # self.feature = nn.Sequential(model.features[:14]) - #elif downsample == 5: + # elif downsample == 5: # self.feature = nn.Sequential(model.features) self.feature3 = nn.Sequential(model.features[:7]) self.feature4 = nn.Sequential(model.features[7:14]) self.feature5 = nn.Sequential(model.features[14:]) - #flops, params = profile(self.feature, input_size=(1, 3, 256,256)) + # flops, params = profile(self.feature, input_size=(1, 3, 256,256)) def forward(self, x): - #return self.feature(x) + # return self.feature(x) f3 = self.feature3(x) f4 = self.feature4(f3) f5 = self.feature5(f4) @@ -85,8 +95,12 @@ def forward(self, x): class shufflenetv2_base(nn.Module): - - def __init__(self, loadweights=True, downsample=4, model_path='pretrained_model/shufflenetv2_x1_69.402_88.374.pth.tar'): + def __init__( + self, + loadweights=True, + downsample=4, + model_path="pretrained_model/shufflenetv2_x1_69.402_88.374.pth.tar", + ): super(shufflenetv2_base, self).__init__() model = shufflenetv2(width_mult=1.0) @@ -98,71 +112,74 @@ def __init__(self, loadweights=True, downsample=4, model_path='pretrained_model/ self.feature4 = nn.Sequential(model.features[4:12]) self.feature5 = nn.Sequential(model.features[12:]) - #if downsample == 4: + # if downsample == 4: # self.feature = nn.Sequential(model.conv1, model.maxpool, model.features[:12]) - #elif downsample == 5: + # elif downsample == 5: # self.feature = nn.Sequential(model.conv1, model.maxpool, model.features) - #flops, params = profile(self.feature, input_size=(1, 3, 256,256)) + # flops, params = profile(self.feature, input_size=(1, 3, 256,256)) def forward(self, x): - #return self.feature(x) + # return self.feature(x) f3 = self.feature3(x) f4 = self.feature4(f3) f5 = self.feature5(f4) return f3, f4, f5 -def fc_layers(reddim = 32, alignsize = 8): - conv1 = nn.Sequential(nn.Conv2d(reddim, 768, kernel_size=alignsize, padding=0),nn.ReLU(inplace=True)) - #conv1 = nn.Sequential(nn.Conv2d(reddim, 768, kernel_size=3, padding=1, stride=2),nn.ReLU(inplace=True), +def fc_layers(reddim=32, alignsize=8): + conv1 = nn.Sequential( + nn.Conv2d(reddim, 768, kernel_size=alignsize, padding=0), nn.ReLU(inplace=True) + ) + # conv1 = nn.Sequential(nn.Conv2d(reddim, 768, kernel_size=3, padding=1, stride=2),nn.ReLU(inplace=True), # nn.Conv2d(768, reddim, kernel_size=1, padding=0),nn.ReLU(inplace=True), # nn.Conv2d(reddim, 768, kernel_size=3, padding=1,stride=2),nn.ReLU(inplace=True), # nn.Conv2d(768, reddim, kernel_size=1, padding=0),nn.ReLU(inplace=True), # nn.Conv2d(reddim, 768, kernel_size=3, padding=0,stride=1),nn.ReLU(inplace=True)) - #conv1 = nn.Sequential(nn.Conv2d(reddim, 768, kernel_size=5, padding=2, stride=2),nn.ReLU(inplace=True), + # conv1 = nn.Sequential(nn.Conv2d(reddim, 768, kernel_size=5, padding=2, stride=2),nn.ReLU(inplace=True), # nn.Conv2d(768, reddim, kernel_size=1, padding=0),nn.ReLU(inplace=True), # nn.Conv2d(reddim, 768, kernel_size=5, padding=0,stride=1),nn.ReLU(inplace=True)) - conv2 = nn.Sequential(nn.Conv2d(768, 128, kernel_size=1),nn.ReLU(inplace=True)) - #dropout = nn.Dropout(p=0.5) + conv2 = nn.Sequential(nn.Conv2d(768, 128, kernel_size=1), nn.ReLU(inplace=True)) + dropout = nn.Dropout(p=0.5) conv3 = nn.Conv2d(128, 1, kernel_size=1) layers = nn.Sequential(conv1, conv2, dropout, conv3) return layers class crop_model_single_scale(nn.Module): - - def __init__(self, alignsize = 8, reddim = 8, loadweight = True, model = None, downsample=4): + def __init__( + self, alignsize=8, reddim=8, loadweight=True, model=None, downsample=4 + ): super(crop_model_single_scale, self).__init__() - if model == 'shufflenetv2': - self.Feat_ext = shufflenetv2_base(loadweight,downsample) + if model == "shufflenetv2": + self.Feat_ext = shufflenetv2_base(loadweight, downsample) if downsample == 4: self.DimRed = nn.Conv2d(232, reddim, kernel_size=1, padding=0) else: self.DimRed = nn.Conv2d(464, reddim, kernel_size=1, padding=0) - elif model == 'mobilenetv2': - self.Feat_ext = mobilenetv2_base(loadweight,downsample) + elif model == "mobilenetv2": + self.Feat_ext = mobilenetv2_base(loadweight, downsample) if downsample == 4: self.DimRed = nn.Conv2d(96, reddim, kernel_size=1, padding=0) else: self.DimRed = nn.Conv2d(320, reddim, kernel_size=1, padding=0) - elif model == 'vgg16': - self.Feat_ext = vgg_base(loadweight,downsample) + elif model == "vgg16": + self.Feat_ext = vgg_base(loadweight, downsample) self.DimRed = nn.Conv2d(512, reddim, kernel_size=1, padding=0) - elif model == 'resnet50': - self.Feat_ext = resnet50_base(loadweight,downsample) + elif model == "resnet50": + self.Feat_ext = resnet50_base(loadweight, downsample) self.DimRed = nn.Conv2d(1024, reddim, kernel_size=1, padding=0) - self.RoIAlign = RoIAlignAvg(alignsize, alignsize, 1.0/2**downsample) - self.RoDAlign = RoDAlignAvg(alignsize, alignsize, 1.0/2**downsample) - self.FC_layers = fc_layers(reddim*2, alignsize) + self.RoIAlign = RoIAlignAvg(alignsize, alignsize, 1.0 / 2 ** downsample) + self.RoDAlign = RoDAlignAvg(alignsize, alignsize, 1.0 / 2 ** downsample) + self.FC_layers = fc_layers(reddim * 2, alignsize) - #flops, params = profile(self.FC_layers, input_size=(1,reddim*2,9,9)) + # flops, params = profile(self.FC_layers, input_size=(1,reddim*2,9,9)) def forward(self, im_data, boxes): - f3,base_feat,f5 = self.Feat_ext(im_data) + f3, base_feat, f5 = self.Feat_ext(im_data) red_feat = self.DimRed(base_feat) RoI_feat = self.RoIAlign(red_feat, boxes) RoD_feat = self.RoDAlign(red_feat, boxes) @@ -171,37 +188,38 @@ def forward(self, im_data, boxes): return prediction def _init_weights(self): - print('Initializing weights...') + print("Initializing weights...") self.DimRed.apply(weights_init) self.FC_layers.apply(weights_init) class crop_model_multi_scale_individual(nn.Module): - - def __init__(self, alignsize = 8, reddim = 32, loadweight = True, model = None, downsample = 4): + def __init__( + self, alignsize=8, reddim=32, loadweight=True, model=None, downsample=4 + ): super(crop_model_multi_scale_individual, self).__init__() - if model == 'shufflenetv2': - self.Feat_ext1 = shufflenetv2_base(loadweight,downsample) - self.Feat_ext2 = shufflenetv2_base(loadweight,downsample) - self.Feat_ext3 = shufflenetv2_base(loadweight,downsample) + if model == "shufflenetv2": + self.Feat_ext1 = shufflenetv2_base(loadweight, downsample) + self.Feat_ext2 = shufflenetv2_base(loadweight, downsample) + self.Feat_ext3 = shufflenetv2_base(loadweight, downsample) self.DimRed = nn.Conv2d(232, reddim, kernel_size=1, padding=0) - elif model == 'mobilenetv2': - self.Feat_ext1 = mobilenetv2_base(loadweight,downsample) - self.Feat_ext2 = mobilenetv2_base(loadweight,downsample) - self.Feat_ext3 = mobilenetv2_base(loadweight,downsample) + elif model == "mobilenetv2": + self.Feat_ext1 = mobilenetv2_base(loadweight, downsample) + self.Feat_ext2 = mobilenetv2_base(loadweight, downsample) + self.Feat_ext3 = mobilenetv2_base(loadweight, downsample) self.DimRed = nn.Conv2d(96, reddim, kernel_size=1, padding=0) - elif model == 'vgg16': - self.Feat_ext1 = vgg_base(loadweight,downsample) - self.Feat_ext2 = vgg_base(loadweight,downsample) - self.Feat_ext3 = vgg_base(loadweight,downsample) + elif model == "vgg16": + self.Feat_ext1 = vgg_base(loadweight, downsample) + self.Feat_ext2 = vgg_base(loadweight, downsample) + self.Feat_ext3 = vgg_base(loadweight, downsample) self.DimRed = nn.Conv2d(512, reddim, kernel_size=1, padding=0) - self.downsample2 = nn.UpsamplingBilinear2d(scale_factor=1.0/2.0) + self.downsample2 = nn.UpsamplingBilinear2d(scale_factor=1.0 / 2.0) self.upsample2 = nn.UpsamplingBilinear2d(scale_factor=2.0) - self.RoIAlign = RoIAlignAvg(alignsize, alignsize, 1.0/2**downsample) - self.RoDAlign = RoDAlignAvg(alignsize, alignsize, 1.0/2**downsample) - self.FC_layers = fc_layers(reddim*2, alignsize) + self.RoIAlign = RoIAlignAvg(alignsize, alignsize, 1.0 / 2 ** downsample) + self.RoDAlign = RoDAlignAvg(alignsize, alignsize, 1.0 / 2 ** downsample) + self.FC_layers = fc_layers(reddim * 2, alignsize) def forward(self, im_data, boxes): @@ -215,8 +233,8 @@ def forward(self, im_data, boxes): down_feat = self.Feat_ext3(down_im) down_feat = self.upsample2(down_feat) - #cat_feat = torch.cat((base_feat,up_feat,down_feat),1) - cat_feat = 0.5*base_feat + 0.35*up_feat + 0.15*down_feat + # cat_feat = torch.cat((base_feat,up_feat,down_feat),1) + cat_feat = 0.5 * base_feat + 0.35 * up_feat + 0.15 * down_feat red_feat = self.DimRed(cat_feat) RoI_feat = self.RoIAlign(red_feat, boxes) RoD_feat = self.RoDAlign(red_feat, boxes) @@ -225,52 +243,53 @@ def forward(self, im_data, boxes): return prediction def _init_weights(self): - print('Initializing weights...') + print("Initializing weights...") self.DimRed.apply(weights_init) self.FC_layers.apply(weights_init) -class crop_model_multi_scale_shared(nn.Module): - def __init__(self, alignsize = 8, reddim = 32, loadweight = True, model = None, downsample = 4): +class crop_model_multi_scale_shared(nn.Module): + def __init__( + self, alignsize=8, reddim=32, loadweight=True, model=None, downsample=4 + ): super(crop_model_multi_scale_shared, self).__init__() - if model == 'shufflenetv2': - self.Feat_ext = shufflenetv2_base(loadweight,downsample) + if model == "shufflenetv2": + self.Feat_ext = shufflenetv2_base(loadweight, downsample) self.DimRed = nn.Conv2d(812, reddim, kernel_size=1, padding=0) - elif model == 'mobilenetv2': - self.Feat_ext = mobilenetv2_base(loadweight,downsample) + elif model == "mobilenetv2": + self.Feat_ext = mobilenetv2_base(loadweight, downsample) self.DimRed = nn.Conv2d(448, reddim, kernel_size=1, padding=0) - elif model == 'vgg16': - self.Feat_ext = vgg_base(loadweight,downsample) + elif model == "vgg16": + self.Feat_ext = vgg_base(loadweight, downsample) self.DimRed = nn.Conv2d(1536, reddim, kernel_size=1, padding=0) - elif model == 'resnet50': - self.Feat_ext = resnet50_base(loadweight,downsample) + elif model == "resnet50": + self.Feat_ext = resnet50_base(loadweight, downsample) self.DimRed = nn.Conv2d(3584, reddim, kernel_size=1, padding=0) - self.downsample2 = nn.UpsamplingBilinear2d(scale_factor=1.0/2.0) + self.downsample2 = nn.UpsamplingBilinear2d(scale_factor=1.0 / 2.0) self.upsample2 = nn.UpsamplingBilinear2d(scale_factor=2.0) - self.RoIAlign = RoIAlignAvg(alignsize, alignsize, 1.0/2**downsample) - self.RoDAlign = RoDAlignAvg(alignsize, alignsize, 1.0/2**downsample) - self.FC_layers = fc_layers(reddim*2, alignsize) - + self.RoIAlign = RoIAlignAvg(alignsize, alignsize, 1.0 / 2 ** downsample) + self.RoDAlign = RoDAlignAvg(alignsize, alignsize, 1.0 / 2 ** downsample) + self.FC_layers = fc_layers(reddim * 2, alignsize) def forward(self, im_data, boxes): - #base_feat = self.Feat_ext(im_data) + # base_feat = self.Feat_ext(im_data) - #up_im = self.upsample2(im_data) - #up_feat = self.Feat_ext(up_im) - #up_feat = self.downsample2(up_feat) + # up_im = self.upsample2(im_data) + # up_feat = self.Feat_ext(up_im) + # up_feat = self.downsample2(up_feat) - #down_im = self.downsample2(im_data) - #down_feat = self.Feat_ext(down_im) - #down_feat = self.upsample2(down_feat) + # down_im = self.downsample2(im_data) + # down_feat = self.Feat_ext(down_im) + # down_feat = self.upsample2(down_feat) - f3,f4,f5 = self.Feat_ext(im_data) - cat_feat = torch.cat((self.downsample2(f3),f4,0.5*self.upsample2(f5)),1) + f3, f4, f5 = self.Feat_ext(im_data) + cat_feat = torch.cat((self.downsample2(f3), f4, 0.5 * self.upsample2(f5)), 1) - #cat_feat = torch.cat((base_feat,up_feat,down_feat),1) - #cat_feat = base_feat + 0.35*up_feat + 0.15*down_feat + # cat_feat = torch.cat((base_feat,up_feat,down_feat),1) + # cat_feat = base_feat + 0.35*up_feat + 0.15*down_feat red_feat = self.DimRed(cat_feat) RoI_feat = self.RoIAlign(red_feat, boxes) RoD_feat = self.RoDAlign(red_feat, boxes) @@ -279,10 +298,11 @@ def forward(self, im_data, boxes): return prediction def _init_weights(self): - print('Initializing weights...') + print("Initializing weights...") self.DimRed.apply(weights_init) self.FC_layers.apply(weights_init) + def xavier(param): init.xavier_uniform_(param) @@ -293,12 +313,13 @@ def weights_init(m): m.bias.data.zero_() -def build_crop_model(scale='single', alignsize=8, reddim=32, loadweight=True, model=None, downsample=4): +def build_crop_model( + scale="single", alignsize=8, reddim=32, loadweight=True, model=None, downsample=4 +): - if scale=='single': + if scale == "single": return crop_model_single_scale(alignsize, reddim, loadweight, model, downsample) - elif scale=='multi': - return crop_model_multi_scale_shared(alignsize, reddim, loadweight, model, downsample) - - - + elif scale == "multi": + return crop_model_multi_scale_shared( + alignsize, reddim, loadweight, model, downsample + ) diff --git a/demo_eval.py b/demo_eval.py index 2c19ac2..045ed0e 100644 --- a/demo_eval.py +++ b/demo_eval.py @@ -15,19 +15,30 @@ def str2bool(v): parser = argparse.ArgumentParser( - description='Grid anchor based image cropping With Pytorch') -parser.add_argument('--input_dir', default='dataset/GAIC/images/test', - help='root directory path of testing images') -parser.add_argument('--output_dir', default='dataset/test_result', - help='root directory path of testing images') -parser.add_argument('--batch_size', default=1, type=int, - help='Batch size for training') -parser.add_argument('--num_workers', default=0, type=int, - help='Number of workers used in dataloading') -parser.add_argument('--cuda', default=True, type=str2bool, - help='Use CUDA to train model') -parser.add_argument('--net_path', default='pretrained_model/mobilenet_0.625_0.583_0.553_0.525_0.785_0.762_0.748_0.723_0.783_0.806.pth', - help='Directory for saving checkpoint models') + description="Grid anchor based image cropping With Pytorch" +) +parser.add_argument( + "--input_dir", + default="dataset/GAIC/images/test", + help="root directory path of testing images", +) +parser.add_argument( + "--output_dir", + default="dataset/test_result", + help="root directory path of testing images", +) +parser.add_argument("--batch_size", default=1, type=int, help="Batch size for training") +parser.add_argument( + "--num_workers", default=0, type=int, help="Number of workers used in dataloading" +) +parser.add_argument( + "--cuda", default=False, type=str2bool, help="Use CUDA to train model" +) +parser.add_argument( + "--net_path", + default="pretrained_model/mobilenet_0.625_0.583_0.553_0.525_0.785_0.762_0.748_0.723_0.783_0.806.pth", + help="Directory for saving checkpoint models", +) args = parser.parse_args() if not os.path.exists(args.output_dir): @@ -35,46 +46,68 @@ def str2bool(v): if torch.cuda.is_available(): if args.cuda: - torch.set_default_tensor_type('torch.cuda.FloatTensor') + torch.set_default_tensor_type("torch.cuda.FloatTensor") if not args.cuda: - print("WARNING: It looks like you have a CUDA device, but aren't " + - "using CUDA.\nRun with --cuda for optimal training speed.") - torch.set_default_tensor_type('torch.FloatTensor') + print( + "WARNING: It looks like you have a CUDA device, but aren't " + + "using CUDA.\nRun with --cuda for optimal training speed." + ) + torch.set_default_tensor_type("torch.FloatTensor") else: - torch.set_default_tensor_type('torch.FloatTensor') + torch.set_default_tensor_type("torch.FloatTensor") -dataset = setup_test_dataset(dataset_dir = args.input_dir) +dataset = setup_test_dataset(dataset_dir=args.input_dir) def test(): - net = build_crop_model(scale='multi', alignsize=9, reddim=8, loadweight=True, model='mobilenetv2',downsample=4) - net.load_state_dict(torch.load(args.net_path)) + net = build_crop_model( + scale="multi", + alignsize=9, + reddim=8, + loadweight=True, + model="mobilenetv2", + downsample=4, + ) + net.load_state_dict(torch.load(args.net_path, map_location="cpu"), strict=False) net.eval() if args.cuda: - net = torch.nn.DataParallel(net,device_ids=[0]) + net = torch.nn.DataParallel(net, device_ids=[0]) cudnn.benchmark = True net = net.cuda() - - data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers,shuffle=False,pin_memory=True) + data_loader = data.DataLoader( + dataset, + args.batch_size, + num_workers=args.num_workers, + shuffle=False, + pin_memory=True, + ) for id, sample in enumerate(data_loader): - imgpath = sample['imgpath'] - image = sample['image'] - bboxes = sample['sourceboxes'] - resized_image = sample['resized_image'] - tbboxes = sample['tbboxes'] + imgpath = sample["imgpath"] + image = sample["image"] + bboxes = sample["sourceboxes"] + resized_image = sample["resized_image"] + tbboxes = sample["tbboxes"] - if len(tbboxes['xmin'])==0: + if len(tbboxes["xmin"]) == 0: continue roi = [] - for idx in range(0,len(tbboxes['xmin'])): - roi.append((0, tbboxes['xmin'][idx],tbboxes['ymin'][idx],tbboxes['xmax'][idx],tbboxes['ymax'][idx])) + for idx in range(0, len(tbboxes["xmin"])): + roi.append( + ( + 0, + tbboxes["xmin"][idx], + tbboxes["ymin"][idx], + tbboxes["xmax"][idx], + tbboxes["ymax"][idx], + ) + ) if args.cuda: resized_image = Variable(resized_image.cuda()) @@ -83,25 +116,34 @@ def test(): resized_image = Variable(resized_image) roi = Variable(torch.Tensor(roi)) - t0 = time.time() - for r in range(0,100): - out = net(resized_image,roi) + for r in range(0, 100): + out = net(resized_image, roi) t1 = time.time() - print('timer: %.4f sec.' % (t1 - t0)) + print("timer: %.4f sec." % (t1 - t0)) - out = net(resized_image,roi) + out = net(resized_image, roi) - id_out = sorted(range(len(out)), key=lambda k: out[k], reverse = True) + id_out = sorted(range(len(out)), key=lambda k: out[k], reverse=True) image = image.cpu().numpy().squeeze(0) for i in range(4): top1_box = bboxes[id_out[i]] - top1_box = [top1_box[0].numpy()[0],top1_box[1].numpy()[0],top1_box[2].numpy()[0],top1_box[3].numpy()[0]] - top1_crop = image[int(top1_box[0]):int(top1_box[2]),int(top1_box[1]):int(top1_box[3])] - imgname = imgpath[0].split('/')[-1] - cv2.imwrite(args.output_dir + '/' + imgname[:-4] + '_' +str(i) + imgname[-4:],top1_crop[:,:,(2, 1, 0)]) - - -if __name__ == '__main__': + top1_box = [ + top1_box[0].numpy()[0], + top1_box[1].numpy()[0], + top1_box[2].numpy()[0], + top1_box[3].numpy()[0], + ] + top1_crop = image[ + int(top1_box[0]) : int(top1_box[2]), int(top1_box[1]) : int(top1_box[3]) + ] + imgname = imgpath[0].split("/")[-1] + cv2.imwrite( + args.output_dir + "/" + imgname[:-4] + "_" + str(i) + imgname[-4:], + top1_crop[:, :, (2, 1, 0)], + ) + + +if __name__ == "__main__": test() diff --git a/mobilenetv2.py b/mobilenetv2.py index 5fee6b1..d6990f8 100644 --- a/mobilenetv2.py +++ b/mobilenetv2.py @@ -9,7 +9,7 @@ import torch.nn as nn import math -__all__ = ['mobilenetv2'] +__all__ = ["mobilenetv2"] def _make_divisible(v, divisor, min_value=None): @@ -36,7 +36,7 @@ def conv_3x3_bn(inp, oup, stride): return nn.Sequential( nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), - nn.ReLU6(inplace=True) + nn.ReLU6(inplace=True), ) @@ -44,7 +44,7 @@ def conv_1x1_bn(inp, oup): return nn.Sequential( nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), - nn.ReLU6(inplace=True) + nn.ReLU6(inplace=True), ) @@ -59,7 +59,9 @@ def __init__(self, inp, oup, stride, expand_ratio): if expand_ratio == 1: self.conv = nn.Sequential( # dw - nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + nn.Conv2d( + hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False + ), nn.BatchNorm2d(hidden_dim), nn.ReLU6(inplace=True), # pw-linear @@ -73,7 +75,9 @@ def __init__(self, inp, oup, stride, expand_ratio): nn.BatchNorm2d(hidden_dim), nn.ReLU6(inplace=True), # dw - nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), + nn.Conv2d( + hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False + ), nn.BatchNorm2d(hidden_dim), nn.ReLU6(inplace=True), # pw-linear @@ -89,16 +93,16 @@ def forward(self, x): class MobileNetV2(nn.Module): - def __init__(self, num_classes=1000, input_size=224, width_mult=1.): + def __init__(self, num_classes=1000, input_size=224, width_mult=1.0): super(MobileNetV2, self).__init__() # setting of inverted residual blocks self.cfgs = [ # t, c, n, s - [1, 16, 1, 1], - [6, 24, 2, 2], - [6, 32, 3, 2], - [6, 64, 4, 2], - [6, 96, 3, 1], + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] @@ -118,7 +122,9 @@ def __init__(self, num_classes=1000, input_size=224, width_mult=1.): input_channel = output_channel self.features = nn.Sequential(*layers) # building last several layers - output_channel = _make_divisible(1280 * width_mult, 8) if width_mult > 1.0 else 1280 + output_channel = ( + _make_divisible(1280 * width_mult, 8) if width_mult > 1.0 else 1280 + ) self.conv = conv_1x1_bn(input_channel, output_channel) self.avgpool = nn.AvgPool2d(input_size // 32, stride=1) self.classifier = nn.Linear(output_channel, num_classes) @@ -137,7 +143,7 @@ def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) + m.weight.data.normal_(0, math.sqrt(2.0 / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): @@ -148,9 +154,9 @@ def _initialize_weights(self): m.weight.data.normal_(0, 0.01) m.bias.data.zero_() + def mobilenetv2(**kwargs): """ Constructs a MobileNet V2 model """ return MobileNetV2(**kwargs) - diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4912a75 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +opencv_python==4.2.0.34 +torch==0.4.1 +torchvision==0.2.0 +numpy==1.18.4 +scipy==1.4.1 diff --git a/rod_align/__init__.pyc b/rod_align/__init__.pyc deleted file mode 100644 index fd63362..0000000 Binary files a/rod_align/__init__.pyc and /dev/null differ diff --git a/rod_align/_ext/__init__.py b/rod_align/_ext/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rod_align/_ext/__init__.pyc b/rod_align/_ext/__init__.pyc deleted file mode 100644 index f9ddd28..0000000 Binary files a/rod_align/_ext/__init__.pyc and /dev/null differ diff --git a/rod_align/_ext/rod_align/__init__.py b/rod_align/_ext/rod_align/__init__.py index f73c1cc..0f7f4f3 100644 --- a/rod_align/_ext/rod_align/__init__.py +++ b/rod_align/_ext/rod_align/__init__.py @@ -1,8 +1,9 @@ - from torch.utils.ffi import _wrap_function from ._rod_align import lib as _lib, ffi as _ffi __all__ = [] + + def _import_symbols(locals): for symbol in dir(_lib): fn = getattr(_lib, symbol) @@ -12,4 +13,5 @@ def _import_symbols(locals): locals[symbol] = fn __all__.append(symbol) + _import_symbols(locals()) diff --git a/rod_align/_ext/rod_align/__init__.pyc b/rod_align/_ext/rod_align/__init__.pyc deleted file mode 100644 index 7b0c3f3..0000000 Binary files a/rod_align/_ext/rod_align/__init__.pyc and /dev/null differ diff --git a/rod_align/_ext/rod_align/_rod_align.so b/rod_align/_ext/rod_align/_rod_align.so old mode 100644 new mode 100755 index 32eecad..d4598a2 Binary files a/rod_align/_ext/rod_align/_rod_align.so and b/rod_align/_ext/rod_align/_rod_align.so differ diff --git a/rod_align/build.py b/rod_align/build.py index b1685b0..6d490bd 100644 --- a/rod_align/build.py +++ b/rod_align/build.py @@ -3,11 +3,11 @@ import torch from torch.utils.ffi import create_extension -sources = ['src/rod_align.c'] -headers = ['src/rod_align.h'] +sources = ["src/rod_align.c"] +headers = ["src/rod_align.h"] extra_objects = [] -#sources = [] -#headers = [] +# sources = [] +# headers = [] defines = [] with_cuda = False @@ -15,24 +15,24 @@ print(this_file) if torch.cuda.is_available(): - print('Including CUDA code.') - sources += ['src/rod_align_cuda.c'] - headers += ['src/rod_align_cuda.h'] - defines += [('WITH_CUDA', None)] + print("Including CUDA code.") + sources += ["src/rod_align_cuda.c"] + headers += ["src/rod_align_cuda.h"] + defines += [("WITH_CUDA", None)] with_cuda = True - - extra_objects = ['src/rod_align_kernel.cu.o'] + + extra_objects = ["src/rod_align_kernel.cu.o"] extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] ffi = create_extension( - '_ext.rod_align', + "_ext.rod_align", headers=headers, sources=sources, define_macros=defines, relative_to=__file__, with_cuda=with_cuda, - extra_objects=extra_objects + extra_objects=extra_objects, ) -if __name__ == '__main__': +if __name__ == "__main__": ffi.build() diff --git a/rod_align/functions/__init__.pyc b/rod_align/functions/__init__.pyc deleted file mode 100644 index 33906ee..0000000 Binary files a/rod_align/functions/__init__.pyc and /dev/null differ diff --git a/rod_align/functions/rod_align.py b/rod_align/functions/rod_align.py index 21d827d..61670f0 100644 --- a/rod_align/functions/rod_align.py +++ b/rod_align/functions/rod_align.py @@ -19,32 +19,47 @@ def forward(self, features, rois): batch_size, num_channels, data_height, data_width = features.size() num_rois = rois.size(0) - output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() + output = features.new( + num_rois, num_channels, self.aligned_height, self.aligned_width + ).zero_() if features.is_cuda: - rod_align.rod_align_forward_cuda(self.aligned_height, - self.aligned_width, - self.spatial_scale, features, - rois, output) + rod_align.rod_align_forward_cuda( + self.aligned_height, + self.aligned_width, + self.spatial_scale, + features, + rois, + output, + ) else: - rod_align.rod_align_forward(self.aligned_height, - self.aligned_width, - self.spatial_scale, features, - rois, output) -# raise NotImplementedError + rod_align.rod_align_forward( + self.aligned_height, + self.aligned_width, + self.spatial_scale, + features, + rois, + output, + ) + # raise NotImplementedError return output def backward(self, grad_output): - assert(self.feature_size is not None and grad_output.is_cuda) + assert self.feature_size is not None and grad_output.is_cuda batch_size, num_channels, data_height, data_width = self.feature_size - grad_input = self.rois.new(batch_size, num_channels, data_height, - data_width).zero_() - rod_align.rod_align_backward_cuda(self.aligned_height, - self.aligned_width, - self.spatial_scale, grad_output, - self.rois, grad_input) + grad_input = self.rois.new( + batch_size, num_channels, data_height, data_width + ).zero_() + rod_align.rod_align_backward_cuda( + self.aligned_height, + self.aligned_width, + self.spatial_scale, + grad_output, + self.rois, + grad_input, + ) # print grad_input diff --git a/rod_align/functions/rod_align.pyc b/rod_align/functions/rod_align.pyc deleted file mode 100644 index 9d8f55e..0000000 Binary files a/rod_align/functions/rod_align.pyc and /dev/null differ diff --git a/rod_align/modules/__init__.pyc b/rod_align/modules/__init__.pyc deleted file mode 100644 index 61c608d..0000000 Binary files a/rod_align/modules/__init__.pyc and /dev/null differ diff --git a/rod_align/modules/rod_align.py b/rod_align/modules/rod_align.py index afd7488..73fb9d2 100644 --- a/rod_align/modules/rod_align.py +++ b/rod_align/modules/rod_align.py @@ -12,8 +12,10 @@ def __init__(self, aligned_height, aligned_width, spatial_scale): self.spatial_scale = float(spatial_scale) def forward(self, features, rois): - return RoDAlignFunction(self.aligned_height, self.aligned_width, - self.spatial_scale)(features, rois) + return RoDAlignFunction( + self.aligned_height, self.aligned_width, self.spatial_scale + )(features, rois) + class RoDAlignAvg(Module): def __init__(self, aligned_height, aligned_width, spatial_scale): @@ -24,10 +26,12 @@ def __init__(self, aligned_height, aligned_width, spatial_scale): self.spatial_scale = float(spatial_scale) def forward(self, features, rois): - x = RoDAlignFunction(self.aligned_height+1, self.aligned_width+1, - self.spatial_scale)(features, rois) + x = RoDAlignFunction( + self.aligned_height + 1, self.aligned_width + 1, self.spatial_scale + )(features, rois) return avg_pool2d(x, kernel_size=2, stride=1) + class RoDAlignMax(Module): def __init__(self, aligned_height, aligned_width, spatial_scale): super(RoDAlignMax, self).__init__() @@ -37,6 +41,7 @@ def __init__(self, aligned_height, aligned_width, spatial_scale): self.spatial_scale = float(spatial_scale) def forward(self, features, rois): - x = RoDAlignFunction(self.aligned_height+1, self.aligned_width+1, - self.spatial_scale)(features, rois) + x = RoDAlignFunction( + self.aligned_height + 1, self.aligned_width + 1, self.spatial_scale + )(features, rois) return max_pool2d(x, kernel_size=2, stride=1) diff --git a/rod_align/modules/rod_align.pyc b/rod_align/modules/rod_align.pyc deleted file mode 100644 index 33f1c6b..0000000 Binary files a/rod_align/modules/rod_align.pyc and /dev/null differ diff --git a/roi_align/__init__.pyc b/roi_align/__init__.pyc deleted file mode 100644 index 0c256f3..0000000 Binary files a/roi_align/__init__.pyc and /dev/null differ diff --git a/roi_align/__pycache__/__init__.cpython-35.pyc b/roi_align/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 820b591..0000000 Binary files a/roi_align/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/roi_align/_ext/__init__.py b/roi_align/_ext/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/roi_align/_ext/__init__.pyc b/roi_align/_ext/__init__.pyc deleted file mode 100644 index d20a313..0000000 Binary files a/roi_align/_ext/__init__.pyc and /dev/null differ diff --git a/roi_align/_ext/__pycache__/__init__.cpython-35.pyc b/roi_align/_ext/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index a502eb8..0000000 Binary files a/roi_align/_ext/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/roi_align/_ext/roi_align/__init__.py b/roi_align/_ext/roi_align/__init__.py index c5b6e5d..e84fbbf 100644 --- a/roi_align/_ext/roi_align/__init__.py +++ b/roi_align/_ext/roi_align/__init__.py @@ -1,8 +1,9 @@ - from torch.utils.ffi import _wrap_function from ._roi_align import lib as _lib, ffi as _ffi __all__ = [] + + def _import_symbols(locals): for symbol in dir(_lib): fn = getattr(_lib, symbol) @@ -12,4 +13,5 @@ def _import_symbols(locals): locals[symbol] = fn __all__.append(symbol) + _import_symbols(locals()) diff --git a/roi_align/_ext/roi_align/__init__.pyc b/roi_align/_ext/roi_align/__init__.pyc deleted file mode 100644 index ba60328..0000000 Binary files a/roi_align/_ext/roi_align/__init__.pyc and /dev/null differ diff --git a/roi_align/_ext/roi_align/__pycache__/__init__.cpython-35.pyc b/roi_align/_ext/roi_align/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 38ff4f7..0000000 Binary files a/roi_align/_ext/roi_align/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/roi_align/_ext/roi_align/_roi_align.so b/roi_align/_ext/roi_align/_roi_align.so old mode 100644 new mode 100755 index 59c26a2..ddbf956 Binary files a/roi_align/_ext/roi_align/_roi_align.so and b/roi_align/_ext/roi_align/_roi_align.so differ diff --git a/roi_align/build.py b/roi_align/build.py index 79f9586..654b44b 100644 --- a/roi_align/build.py +++ b/roi_align/build.py @@ -3,11 +3,11 @@ import torch from torch.utils.ffi import create_extension -sources = ['src/roi_align.c'] -headers = ['src/roi_align.h'] +sources = ["src/roi_align.c"] +headers = ["src/roi_align.h"] extra_objects = [] -#sources = [] -#headers = [] +# sources = [] +# headers = [] defines = [] with_cuda = False @@ -15,24 +15,24 @@ print(this_file) if torch.cuda.is_available(): - print('Including CUDA code.') - sources += ['src/roi_align_cuda.c'] - headers += ['src/roi_align_cuda.h'] - defines += [('WITH_CUDA', None)] + print("Including CUDA code.") + sources += ["src/roi_align_cuda.c"] + headers += ["src/roi_align_cuda.h"] + defines += [("WITH_CUDA", None)] with_cuda = True - - extra_objects = ['src/roi_align_kernel.cu.o'] + + extra_objects = ["src/roi_align_kernel.cu.o"] extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] ffi = create_extension( - '_ext.roi_align', + "_ext.roi_align", headers=headers, sources=sources, define_macros=defines, relative_to=__file__, with_cuda=with_cuda, - extra_objects=extra_objects + extra_objects=extra_objects, ) -if __name__ == '__main__': +if __name__ == "__main__": ffi.build() diff --git a/roi_align/functions/__init__.pyc b/roi_align/functions/__init__.pyc deleted file mode 100644 index 7d287c0..0000000 Binary files a/roi_align/functions/__init__.pyc and /dev/null differ diff --git a/roi_align/functions/__pycache__/__init__.cpython-35.pyc b/roi_align/functions/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index b122bd4..0000000 Binary files a/roi_align/functions/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/roi_align/functions/__pycache__/roi_align.cpython-35.pyc b/roi_align/functions/__pycache__/roi_align.cpython-35.pyc deleted file mode 100644 index e6fb198..0000000 Binary files a/roi_align/functions/__pycache__/roi_align.cpython-35.pyc and /dev/null differ diff --git a/roi_align/functions/roi_align.py b/roi_align/functions/roi_align.py index bf1d2e1..17670e1 100644 --- a/roi_align/functions/roi_align.py +++ b/roi_align/functions/roi_align.py @@ -19,32 +19,47 @@ def forward(self, features, rois): batch_size, num_channels, data_height, data_width = features.size() num_rois = rois.size(0) - output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() + output = features.new( + num_rois, num_channels, self.aligned_height, self.aligned_width + ).zero_() if features.is_cuda: - roi_align.roi_align_forward_cuda(self.aligned_height, - self.aligned_width, - self.spatial_scale, features, - rois, output) + roi_align.roi_align_forward_cuda( + self.aligned_height, + self.aligned_width, + self.spatial_scale, + features, + rois, + output, + ) else: - roi_align.roi_align_forward(self.aligned_height, - self.aligned_width, - self.spatial_scale, features, - rois, output) -# raise NotImplementedError + roi_align.roi_align_forward( + self.aligned_height, + self.aligned_width, + self.spatial_scale, + features, + rois, + output, + ) + # raise NotImplementedError return output def backward(self, grad_output): - assert(self.feature_size is not None and grad_output.is_cuda) + assert self.feature_size is not None and grad_output.is_cuda batch_size, num_channels, data_height, data_width = self.feature_size - grad_input = self.rois.new(batch_size, num_channels, data_height, - data_width).zero_() - roi_align.roi_align_backward_cuda(self.aligned_height, - self.aligned_width, - self.spatial_scale, grad_output, - self.rois, grad_input) + grad_input = self.rois.new( + batch_size, num_channels, data_height, data_width + ).zero_() + roi_align.roi_align_backward_cuda( + self.aligned_height, + self.aligned_width, + self.spatial_scale, + grad_output, + self.rois, + grad_input, + ) # print grad_input diff --git a/roi_align/functions/roi_align.pyc b/roi_align/functions/roi_align.pyc deleted file mode 100644 index 5192ab8..0000000 Binary files a/roi_align/functions/roi_align.pyc and /dev/null differ diff --git a/roi_align/modules/__init__.pyc b/roi_align/modules/__init__.pyc deleted file mode 100644 index 9e68d51..0000000 Binary files a/roi_align/modules/__init__.pyc and /dev/null differ diff --git a/roi_align/modules/__pycache__/__init__.cpython-35.pyc b/roi_align/modules/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 3b4aaf2..0000000 Binary files a/roi_align/modules/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/roi_align/modules/__pycache__/roi_align.cpython-35.pyc b/roi_align/modules/__pycache__/roi_align.cpython-35.pyc deleted file mode 100644 index 45b47dd..0000000 Binary files a/roi_align/modules/__pycache__/roi_align.cpython-35.pyc and /dev/null differ diff --git a/roi_align/modules/roi_align.py b/roi_align/modules/roi_align.py index ca02e3b..c7167bb 100644 --- a/roi_align/modules/roi_align.py +++ b/roi_align/modules/roi_align.py @@ -12,8 +12,10 @@ def __init__(self, aligned_height, aligned_width, spatial_scale): self.spatial_scale = float(spatial_scale) def forward(self, features, rois): - return RoIAlignFunction(self.aligned_height, self.aligned_width, - self.spatial_scale)(features, rois) + return RoIAlignFunction( + self.aligned_height, self.aligned_width, self.spatial_scale + )(features, rois) + class RoIAlignAvg(Module): def __init__(self, aligned_height, aligned_width, spatial_scale): @@ -24,10 +26,12 @@ def __init__(self, aligned_height, aligned_width, spatial_scale): self.spatial_scale = float(spatial_scale) def forward(self, features, rois): - x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, - self.spatial_scale)(features, rois) + x = RoIAlignFunction( + self.aligned_height + 1, self.aligned_width + 1, self.spatial_scale + )(features, rois) return avg_pool2d(x, kernel_size=2, stride=1) + class RoIAlignMax(Module): def __init__(self, aligned_height, aligned_width, spatial_scale): super(RoIAlignMax, self).__init__() @@ -37,6 +41,7 @@ def __init__(self, aligned_height, aligned_width, spatial_scale): self.spatial_scale = float(spatial_scale) def forward(self, features, rois): - x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, - self.spatial_scale)(features, rois) + x = RoIAlignFunction( + self.aligned_height + 1, self.aligned_width + 1, self.spatial_scale + )(features, rois) return max_pool2d(x, kernel_size=2, stride=1) diff --git a/roi_align/modules/roi_align.pyc b/roi_align/modules/roi_align.pyc deleted file mode 100644 index 492eaa8..0000000 Binary files a/roi_align/modules/roi_align.pyc and /dev/null differ diff --git a/thop/__init__.py b/thop/__init__.py index a25bbfc..c0c090a 100644 --- a/thop/__init__.py +++ b/thop/__init__.py @@ -1 +1 @@ -from .profile import profile \ No newline at end of file +from .profile import profile diff --git a/thop/__init__.pyc b/thop/__init__.pyc deleted file mode 100644 index b57c3fa..0000000 Binary files a/thop/__init__.pyc and /dev/null differ diff --git a/thop/count_hooks.py b/thop/count_hooks.py index a94b506..b255031 100644 --- a/thop/count_hooks.py +++ b/thop/count_hooks.py @@ -116,7 +116,10 @@ def count_maxpool(m, x, y): def count_adap_maxpool(m, x, y): - kernel = torch.Tensor([(x[0].shape[2:])]) // torch.Tensor(list((m.output_size,))).squeeze() + kernel = ( + torch.Tensor([(x[0].shape[2:])]) + // torch.Tensor(list((m.output_size,))).squeeze() + ) kernel_ops = torch.prod(kernel) num_elements = y.numel() total_ops = kernel_ops * num_elements @@ -135,7 +138,10 @@ def count_avgpool(m, x, y): def count_adap_avgpool(m, x, y): - kernel = torch.Tensor([(x[0].shape[2:])]) // torch.Tensor(list((m.output_size,))).squeeze() + kernel = ( + torch.Tensor([(x[0].shape[2:])]) + // torch.Tensor(list((m.output_size,))).squeeze() + ) total_add = torch.prod(kernel) total_div = 1 kernel_ops = total_add + total_div diff --git a/thop/count_hooks.pyc b/thop/count_hooks.pyc deleted file mode 100644 index b23a0b9..0000000 Binary files a/thop/count_hooks.pyc and /dev/null differ diff --git a/thop/profile.py b/thop/profile.py index fc3b4ed..ac1ad74 100644 --- a/thop/profile.py +++ b/thop/profile.py @@ -11,26 +11,21 @@ nn.Conv2d: count_convNd, nn.Conv3d: count_convNd, nn.ConvTranspose2d: count_convtranspose2d, - nn.BatchNorm1d: count_bn, nn.BatchNorm2d: count_bn, nn.BatchNorm3d: count_bn, - nn.ReLU: count_relu, nn.ReLU6: count_relu, nn.LeakyReLU: count_relu, - nn.MaxPool1d: count_maxpool, nn.MaxPool2d: count_maxpool, nn.MaxPool3d: count_maxpool, nn.AdaptiveMaxPool1d: count_adap_maxpool, nn.AdaptiveMaxPool2d: count_adap_maxpool, nn.AdaptiveMaxPool3d: count_adap_maxpool, - nn.AvgPool1d: count_avgpool, nn.AvgPool2d: count_avgpool, nn.AvgPool3d: count_avgpool, - nn.AdaptiveAvgPool1d: count_adap_avgpool, nn.AdaptiveAvgPool2d: count_adap_avgpool, nn.AdaptiveAvgPool3d: count_adap_avgpool, @@ -46,8 +41,8 @@ def add_hooks(m): if len(list(m.children())) > 0: return - m.register_buffer('total_ops', torch.zeros(1)) - m.register_buffer('total_params', torch.zeros(1)) + m.register_buffer("total_ops", torch.zeros(1)) + m.register_buffer("total_params", torch.zeros(1)) for p in m.parameters(): m.total_params += torch.Tensor([p.numel()]) diff --git a/thop/profile.pyc b/thop/profile.pyc deleted file mode 100644 index 13582fc..0000000 Binary files a/thop/profile.pyc and /dev/null differ diff --git a/thop/utils.py b/thop/utils.py index c63f444..38a9e28 100644 --- a/thop/utils.py +++ b/thop/utils.py @@ -1,4 +1,3 @@ - def clever_format(num, format="%.2f"): if num > 1e12: return format % (num / 1e12) + "T" @@ -7,4 +6,4 @@ def clever_format(num, format="%.2f"): if num > 1e6: return format % (num / 1e6) + "M" if num > 1e3: - return format % (num / 1e3) + "K" \ No newline at end of file + return format % (num / 1e3) + "K"