diff --git a/checkpoint.py b/checkpoint.py index ffd59a5..02c3ca4 100644 --- a/checkpoint.py +++ b/checkpoint.py @@ -23,13 +23,6 @@ def __init__(self, save_path): os.makedirs(self.save_path) def load_state(self, model, state_dict): - """ - load state_dict to model - :params model: - :params state_dict: - :return: model - """ - # set the model in evaluation mode, otherwise the accuracy will change model.eval() model_dict = model.state_dict() @@ -49,7 +42,6 @@ def load_model(self, model_path): """ if os.path.isfile(model_path): print("|===>Load retrain model from:", model_path) - # model_state_dict = torch.load(model_path, map_location={'cuda:1':'cuda:0'}) model_state_dict = torch.load(model_path, map_location='cpu') return model_state_dict else: diff --git a/data/test/crop_eye_with_landmarks.py b/data/test/crop_eye_with_landmarks.py index d8fa17a..0eaf68d 100644 --- a/data/test/crop_eye_with_landmarks.py +++ b/data/test/crop_eye_with_landmarks.py @@ -48,7 +48,7 @@ def crop_eye_with_landmarks(img, landmarks): large_rect = larger_left_eye_rect left_eye = img[large_rect[1]:large_rect[1]+large_rect[3], large_rect[0]:large_rect[0]+large_rect[2]] - larger_rect = larger_right_eye_rect + large_rect = larger_right_eye_rect right_eye = img[large_rect[1]:large_rect[1]+large_rect[3], large_rect[0]:large_rect[0]+large_rect[2]] diff --git a/loss.py b/loss.py index 36679ff..b26090a 100644 --- a/loss.py +++ b/loss.py @@ -3,18 +3,17 @@ class NLL_OHEM(torch.nn.NLLLoss): """Online hard sample mining, Needs input from nn.LogSoftmax()""" def __init__(self, ratio): - super(NLL_OHEM, self).__init__(None, True): - self.ration = ratio + super(NLL_OHEM, self).__init__(None, True).self.ration = ratio def forward(self, x, y, ratio=None): if ratio is not None: self.ratio = ratio num_inst = x.size(0) num_hns = int(self.ratio*num_inst) - x_ = x.clone() + x = x.clone() inst_losses = torch.autograd.Variable(torch.zeros(num_inst)).cuda() for idx, label in enumerate(y.data): - insta_losses[idx] = -x.data[idx, label] + num_inst[idx] = -x.data[idx, label] _, idxs = inst_losses.topk(num_hns) y_hn = y.index_select(0, idxs) x_hn = y.index_select(0, idxs) diff --git a/test/test_image.py b/test/test_image.py index 792be0a..369f447 100644 --- a/test/test_image.py +++ b/test/test_image.py @@ -1,23 +1,27 @@ -import os, sys -sys.path.append('.') +import os +import sys import time import pathlib - import cv2 import numpy as np import torch - -import config from nets.optimized_landmark import LNet -from tools.utils import show_landmarks, show_tensor_landmarks, draw_tensor_landmarks +from tools.utils import show_landmarks, show_tensor_landmarks if __name__ == "__main__": model = LNet() - # model.load_state_dict(torch.load('result/check_point/{0}_landmarks_model_200.pth'.format(config.NUM_LANDMARKS))) + pretrained_weights_path = './pretrained_weights/{0}_landmarks_model_200.pth'.format(config.NUM_LANDMARKS) + + if os.path.exists(pretrained_weights_path): + model.load_state_dict(torch.load(pretrained_weights_path)) + print("Loaded pretrained weights successfully.") + else: + print("Pretrained weights not found. Please check the path:", pretrained_weights_path) + sys.exit(1) + model.eval() - model.load_state_dict(torch.load('./pretrained_weights/{0}_landmarks_model_200.pth'.format(config.NUM_LANDMARKS))) + input_path = pathlib.Path('./data/test/') - project_root = pathlib.Path() output_path = project_root / "result" / "imgs" output_path.mkdir(exist_ok=True) @@ -27,22 +31,25 @@ img = cv2.imread(str(input_img_filename)) RGB_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) RGB_img = cv2.resize(RGB_img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT)) - + data = RGB_img - 127.5 data = data / 127.5 - + data = data.transpose((2, 0, 1)) data = np.expand_dims(data, axis=0) data = torch.Tensor(data) - with torch.no_grad(): + + with torch.no_grad(): landmarks = model(data) - landmarks = landmarks.cpu().detach() + + landmarks = landmarks.cpu().detach().numpy()[0] - show_tensor_landmarks(data[0], landmarks[0]) - - landmarks = [(i) for i in landmarks[0]] - h, w = img.shape[0:2] - landmarks = [(landmarks[2*i]*w, landmarks[2*i+1]*h) for i in range(len(landmarks)//2)] - show_landmarks(img, landmarks) + # Visualize landmarks using the show_tensor_landmarks function + show_tensor_landmarks(data[0], landmarks) + # Convert normalized landmarks to image coordinates + h, w = img.shape[0:2] + landmarks_img_coords = [(int(landmarks[2*i] * w), int(landmarks[2*i+1] * h)) for i in range(len(landmarks)//2)] + # Visualize landmarks using the show_landmarks function + show_landmarks(img, landmarks_img_coords) diff --git a/tools/iris_dataset.py b/tools/iris_dataset.py index e85db53..19fec79 100644 --- a/tools/iris_dataset.py +++ b/tools/iris_dataset.py @@ -60,13 +60,15 @@ def __call__(self, sample): class RandomFlip(object): def __init__(self, prob): self.prob = prob + def __call__(self, sample): image = sample['image'] landmarks = sample['landmarks'] + if random.random() < self.prob: - image = cv2.flip(image, 1) # 1 for flip around y axis, 0 for x axis, -1 for both - # landmarks[:,0] = image.shape[1] - landmarks[:,0] # flip x coordinates - landmarks[:, 0] = 1 - landmarks[:, 0] + image = cv2.flip(image, 1) # 1 for flip around y axis, 0 for x axis, -1 for both + landmarks[:, 0] = 1 - landmarks[:, 0] # flip x coordinates + return {'image': image, 'landmarks': landmarks} class Rescale(object): @@ -79,9 +81,14 @@ def __call__(self, sample): ratio = np.random.randint(self.low, self.high) image = sample['image'] landmarks = sample['landmarks'] - h, w = image.shape[0:2] - image = cv2.resize(image, (int(w/ratio), int(h/ratio))) - image = cv2.resize(image, (w, h)) + h, w = image.shape[:2] + + # Downscale + small_image = cv2.resize(image, (int(w / ratio), int(h / ratio))) + + # Upscale + image = cv2.resize(small_image, (w, h)) + return {'image': image, 'landmarks': landmarks} class RandomGaussianBlur(object): @@ -195,6 +202,104 @@ def kps_to_landmarks(self, kps): landmarks = np.array(landmarks) return landmarks + + +def unnormalize_landmark(landmarks, image): + h, w = image.shape[:2] + landmarks[:, 0] = landmarks[:, 0] * w + landmarks[:, 1] = landmarks[:, 1] * h + return landmarks + +def normalize_landmark(landmarks, image): + h, w = image.shape[:2] + landmarks[:, 0] = landmarks[:, 0] / w + landmarks[:, 1] = landmarks[:, 1] / h + return landmarks + +class RandomCropResize(object): + def __init__(self, output_size, resize_ratio): + assert isinstance(output_size, (int, tuple)) + if isinstance(output_size, int): + self.output_size = (output_size, output_size) + else: + assert len(output_size) == 2 + self.output_size = output_size + self.resize_ratio = resize_ratio + + def __call__(self, sample): + image, landmarks = sample['image'], sample['landmarks'] + + resize = np.random.random() + + h, w = image.shape[:2] + new_w, new_h = self.output_size + + if resize < self.resize_ratio: + top = np.random.randint(0, h - new_h) + left = np.random.randint(0, w - new_w) + + landmarks = unnormalize_landmark(landmarks, image) + image = image[top:top + new_h, left:left + new_w] + landmarks = landmarks - [left, top] + landmarks = normalize_landmark(landmarks, image) + else: + image = cv2.resize(image, (new_w, new_h)) + # landmarks = landmarks * [new_w / w, new_h / h] + + return {'image': image, "landmarks": landmarks} + +class RandomRotate(object): + def __init__(self, degree): + self.degree = degree + + def __call__(self, sample): + image = sample['image'] + landmarks = sample['landmarks'] + h, w = image.shape[:2] + img_h, img_w = image.shape[:2] + center = (img_w // 2, img_h // 2) + random_degree = np.random.uniform(-self.degree, self.degree) + rot_mat = cv2.getRotationMatrix2D(center, random_degree, 1) + image_rotated = cv2.warpAffine(image, rot_mat, (img_w, img_h)) + + landmark_rotated = np.asarray([(rot_mat[0][0] * x * w + rot_mat[0][1] * y * h + rot_mat[0][2], + rot_mat[1][0] * x * w + rot_mat[1][1] * y * h + rot_mat[1][2]) + for (x, y) in landmarks]) + + for i in range(landmark_rotated.shape[0] // 2): + landmark_rotated[2 * i] /= w + landmark_rotated[2 * i + 1] /= h + + return {'image': image_rotated, "landmarks": landmark_rotated} + +class RandomMotionBlur(object): + def __init__(self, radius): + self.radius = radius + self.seq = iaa.Sequential([ + iaa.Sometimes(0.2, + iaa.MotionBlur(k=self.radius) + ) + ]) + + def __call__(self, sample): + image = sample['image'] + landmarks = sample['landmarks'] + landmarks = unnormalize_landmark(landmarks, image) + kps = self.landmarks_to_kps(image, landmarks) + img_aug, kps_aug = self.seq(image=image, keypoints=kps) + landmarks_aug = self.kps_to_landmarks(kps_aug) + landmarks_aug = normalize_landmark(landmarks_aug, img_aug) + return {'image': img_aug, 'landmarks': landmarks_aug} + + def landmarks_to_kps(self, image, landmarks): + kp_list = [Keypoint(x=landmarks[i][0], y=landmarks[i][1]) for i in range(landmarks.shape[0])] + kps = KeypointsOnImage(kp_list, shape=image.shape) + return kps + + def kps_to_landmarks(self, kps): + landmarks = np.array([(kp.x_int, kp.y_int) for kp in kps.keypoints]) + return landmarks + class ToTensor(object): # def __init__(self, image_size): # self.image_size = image_size diff --git a/tools/utils.py b/tools/utils.py index 80e1384..051c87a 100644 --- a/tools/utils.py +++ b/tools/utils.py @@ -1,5 +1,4 @@ import os - import numpy as np import cv2 import torch @@ -10,25 +9,17 @@ transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) +# Add missing import +from imgaug import Keypoint, KeypointsOnImage def nms(dets, thresh, mode='Union'): - ''' greedily select bboxes with high confidence,if an box overlap with the highest score box > thres, rule it out - - params: - dets: [[x1, y1, x2, y2, score]] - thresh: retain overlap <= thresh - return: - indexes to keep - ''' - x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] areas = (x2 - x1) * (y2 - y1) - order = scores.argsort()[::-1] # the index of scores by desc - # order = np.array([i for i in range(dets.shape[0])]) + order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] @@ -37,7 +28,7 @@ def nms(dets, thresh, mode='Union'): yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) - + w = np.maximum(0.0, xx2 - xx1) h = np.maximum(0.0, yy2 - yy1) inter = w * h @@ -47,94 +38,46 @@ def nms(dets, thresh, mode='Union'): ovr = inter / np.minimum(areas[i], areas[order[1:]]) inds = np.where(ovr <= thresh)[0] order = order[inds + 1] - + return keep def convert_to_square(bbox): - ''' Convert bbox to a square which it can include the bbox - Parameters: - bbox: numpy array, shape n x 5 - - returns: - square box - ''' - square_bbox = bbox.copy() h = bbox[:, 3] - bbox[:, 1] w = bbox[:, 2] - bbox[:, 0] max_side = np.maximum(h, w) - square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5 - square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5 + square_bbox[:, 0] = bbox[:, 0] + w * 0.5 - max_side * 0.5 + square_bbox[:, 1] = bbox[:, 1] + h * 0.5 - max_side * 0.5 square_bbox[:, 2] = square_bbox[:, 0] + max_side square_bbox[:, 3] = square_bbox[:, 1] + max_side - - return square_bbox + return square_bbox def IoU(box, boxes): - """Compute IoU between detect box and gt boxes - - Parameters: - ---------- - box: numpy array , shape (5, ): x1, y1, x2, y2, score - input box - boxes: numpy array, shape (n, 4): x1, y1, x2, y2 - input ground truth boxes - - Returns: - ------- - ovr: numpy.array, shape (n, ) - IoU - """ box_area = (box[2] - box[0]) * (box[3] - box[1]) area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) - + xx1 = np.maximum(box[0], boxes[:, 0]) yy1 = np.maximum(box[1], boxes[:, 1]) xx2 = np.minimum(box[2], boxes[:, 2]) yy2 = np.minimum(box[3], boxes[:, 3]) - - # compute the width and height of the inter box + w = np.maximum(0, xx2 - xx1) h = np.maximum(0, yy2 - yy1) - + inter = w * h ovr = np.true_divide(inter, (box_area + area - inter)) - - + return ovr def convert_image_to_tensor(image): - """convert an image to pytorch tensor - - Parameters: - ---------- - image: numpy array , h * w * c - - Returns: - ------- - image_tensor: pytorch.FloatTensor, c * h * w - """ - return transform(image) - def convert_chwTensor_to_hwcNumpy(tensor): - """convert a group images pytorch tensor(count * c * h * w) to numpy array images(count * h * w * c) - Parameters: - ---------- - tensor: numpy array , count * c * h * w - - Returns: - ------- - numpy array images: count * h * w * c - """ - if isinstance(tensor, torch.FloatTensor): return np.transpose(tensor.detach().numpy(), (0, 2, 3, 1)) else: - raise Exception( - "covert b*c*h*w tensor to b*h*w*c numpy error.This tensor must have 4 dimension of float data type.") + raise Exception("Convert b*c*h*w tensor to b*h*w*c numpy error. This tensor must have 4 dimensions of float data type.") def show_landmarks(image, landmarks): for i in landmarks: @@ -145,65 +88,48 @@ def show_landmarks(image, landmarks): def show_tensor_landmarks(image, landmarks): image = image.numpy() image = np.transpose(image, (1, 2, 0)) - image = 255*(image*0.5+0.5) + image = 255 * (image * 0.5 + 0.5) image = np.clip(image, 0, 255).astype('uint8') image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) landmarks = landmarks.numpy() landmarks = landmarks.reshape((-1, 2)) h, w = image.shape[0:2] for i in landmarks: - cv2.circle(image, (int(w*i[0]), int(h*i[1])), 1, (255, 0, 0), 1) + cv2.circle(image, (int(w * i[0]), int(h * i[1])), 1, (255, 0, 0), 1) cv2.imshow('img', image) cv2.waitKey(0) def draw_tensor_landmarks(image, pred_landmarks, gt_landmarks=None): image = image.numpy() image = np.transpose(image, (1, 2, 0)) - image = 255*(image*0.5+0.5) + image = 255 * (image * 0.5 + 0.5) image = image.astype('uint8') image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) pred_landmarks = pred_landmarks.numpy() pred_landmarks = pred_landmarks.reshape((-1, 2)) h, w = image.shape[0:2] for i in pred_landmarks: - cv2.circle(image, (int(w*i[0]), int(h*i[1])), 1, (255, 0, 0), 0) + cv2.circle(image, (int(w * i[0]), int(h * i[1])), 1, (255, 0, 0), 0) if gt_landmarks is not None: gt_landmarks = gt_landmarks.numpy() gt_landmarks = gt_landmarks.reshape((-1, 2)) for i in gt_landmarks: - cv2.circle(image, (int(w*i[0]), int(h*i[1])), 1, (255, 255, 0), 0) + cv2.circle(image, (int(w * i[0]), int(h * i[1])), 1, (255, 255, 0), 0) return image def show_bbox_landmarks(image, bbox, landmarks): bbox = [int(b) for b in bbox] cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 3) - landmarks = [(int(landmarks[2*i]), int(landmarks[2*i+1])) for i in range(len(landmarks)//2)] + landmarks = [(int(landmarks[2 * i]), int(landmarks[2 * i + 1])) for i in range(len(landmarks) // 2)] for i in landmarks: cv2.circle(image, (i[0], i[1]), 1, (255, 0, 0), 0) cv2.imshow('img', image) cv2.waitKey(0) class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self): self.reset() def reset(self): - """ - reset all parameters - """ self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - """ - update parameters - """ - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - + diff --git a/training/train.py b/training/train.py index 9a7e23f..7e947f5 100644 --- a/training/train.py +++ b/training/train.py @@ -1,12 +1,15 @@ +from ctypes import resize import os, sys sys.path.append('.') import torch import torch.nn as nn from torchvision import transforms +from torch.utils.data import DataLoader +from data import test import config -from tools.iris_dataset import * # transformers + from tools.logger import Logger from nets.optimized_landmark import LNet from training.trainer import Trainer @@ -16,28 +19,31 @@ use_cuda = config.USE_CUDA and torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") -transform = transforms.Compose([Resize((int(1.4*config.IMAGE_WIDTH), - int(1.4*config.IMAGE_HEIGHT))), - Rescale(3, 15), - RandomFlip(0.4), - RandomGaussianBlur(0.6), - RandomMedianBlur(0.6), - # RandomMotionBlur(20), - RandomCropResize((config.IMAGE_WIDTH, - config.IMAGE_HEIGHT), 0.2), - ToTensor(), - Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])] - ) - -train_loader = torch.utils.data.DataLoader( - FaceLandmarkDataset([{'root_dir': config.TRAIN_DATA_DIR, - 'label_file': config.LANDMARKS_ANNO_FILE}], - point_num=config.NUM_LANDMARKS, - transform=transform), - batch_size = config.batch_size, - num_workers = config.num_threads, - shuffle=True) +transform = transforms.Compose([ + transforms.Resize((int(1.2 * config.IMAGE_WIDTH), int(1.2 * config.IMAGE_HEIGHT))), + transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(degrees=15), + transforms.RandomResizedCrop((config.IMAGE_HEIGHT, config.IMAGE_WIDTH), scale=(0.8, 1.2), ratio=(0.9, 1.1)), + transforms.GaussianBlur(kernel_size=3), + transforms.ToTensor(), + transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) +]) + + +train_dataset = test( + [{'root_dir': config.TRAIN_DATA_DIR, 'label_file': config.LANDMARKS_ANNO_FILE}], + point_num=config.NUM_LANDMARKS, + transform=transform +) +train_loader = DataLoader( + train_dataset, + batch_size=config.batch_size, + num_workers=config.num_threads, + shuffle=True +) model = LNet() model.load_state_dict(torch.load('result/iris_lnet/check_point/32_landmarks_model_200.pth')) if torch.cuda.device_count() > 1: @@ -48,7 +54,6 @@ lossfn = nn.MSELoss() checkpoint = CheckPoint(config.save_path) optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) -# optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.step, gamma=0.1) logger = Logger(config.save_path) diff --git a/training/trainer.py b/training/trainer.py index 0262642..08366b3 100644 --- a/training/trainer.py +++ b/training/trainer.py @@ -24,7 +24,7 @@ def __init__(self, lr, train_loader, model, optimizer, lossfn, scheduler, logger self.run_count = 0 def compute_metrics(self, pred_landmarks, gt_landmarks): - pass + raise NotImplementedError() def train(self, epoch): self.scheduler.step()