diff --git a/README.md b/README.md index 7bdf52a17..bc3ec5593 100644 --- a/README.md +++ b/README.md @@ -248,6 +248,28 @@ where `config_spec` is one of {`autoencoder_kl_8x8x64`(f=32, d=64), `autoencoder For training VQ-regularized models, see the [taming-transformers](https://github.com/CompVis/taming-transformers) repository. +### Evaluation of trained autoencoder models + +1. generate an evaluation dataset +``` +python scripts/create_eval_data.py /mnt/disks/datasets/celeba-hq ./eval_data ./data/celebahqvalidation_jpg.txt +``` +2. generate reconstructed images from autoencoder models +``` +python scripts/reconstruct_first_stages.py \ +--config ./models/first_stage_models/kl-f4/config.yaml \ +--ckpt ./models/first_stage_models/kl-f4/model.ckpt \ +--input_dir ./eval_data \ +--output_dir ./reconstructed_images_pretrain +``` +3. compute metrics for original images and reconstructed_images +``` +python scripts/evaluate_first_stages.py \ +--original_dir ./eval_data \ +--reconstructed_dir1 ./reconstructed_images_pretrain \ +--reconstructed_dir2 ./reconstructed_images_train200 +``` + ### Training LDMs In ``configs/latent-diffusion/`` we provide configs for training LDMs on the LSUN-, CelebA-HQ, FFHQ and ImageNet datasets. diff --git a/scripts/create_eval_data.py b/scripts/create_eval_data.py new file mode 100644 index 000000000..9dae400e4 --- /dev/null +++ b/scripts/create_eval_data.py @@ -0,0 +1,53 @@ +import os +import shutil +import sys + + +def copy_images(source_dir, destination_dir, file_list): + """ + Copies images listed in a file from a source directory to a destination directory. + + Parameters: + - source_dir: The directory where the images are located. + - destination_dir: The directory where the images will be copied to. + - file_list: A file containing the list of image file names to copy. + """ + # Create the destination directory if it doesn't exist + if not os.path.exists(destination_dir): + os.makedirs(destination_dir) + + # Open the file containing the list of images to copy + with open(file_list, 'r') as file: + for line in file: + # Remove any trailing whitespace or newline characters + image_name = line.strip() + + # Define the source and destination file paths + source_file = os.path.join(source_dir, image_name) + destination_file = os.path.join(destination_dir, image_name) + + # Check if the source file exists before attempting to copy + if os.path.exists(source_file): + # Copy the file to the destination directory + shutil.copy(source_file, destination_file) + else: + print(f"File {image_name} not found in source directory.") + + +def main(): + if len(sys.argv) != 4: + print("Usage: python script.py ") + sys.exit(1) + + source_dir = sys.argv[1] + destination_dir = sys.argv[2] + file_list = sys.argv[3] + + copy_images(source_dir, destination_dir, file_list) + + +if __name__ == "__main__": + """ + python scripts/create_eval_data.py /mnt/disks/datasets/celeba-hq ./eval_data ./data/celebahqvalidation_jpg.txt + """ + main() diff --git a/scripts/evaluate_first_stages.py b/scripts/evaluate_first_stages.py new file mode 100644 index 000000000..56e0abbd1 --- /dev/null +++ b/scripts/evaluate_first_stages.py @@ -0,0 +1,78 @@ +import argparse +import os +import numpy as np +from skimage.metrics import peak_signal_noise_ratio as psnr +from skimage.metrics import structural_similarity as ssim +from skimage.io import imread +from skimage.transform import resize +import subprocess + + +def compute_metrics(original_dir, reconstructed_dir, output_size=(256, 256)): + psnr_values = [] + ssim_values = [] + + for filename in os.listdir(original_dir): + if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')): + continue # Skip non-image files + + # Read the original and reconstructed images + original_path = os.path.join(original_dir, filename) + reconstructed_path = os.path.join(reconstructed_dir, filename) + + original_img = imread(original_path) + reconstructed_img = imread(reconstructed_path) + + # Resize images to 256x256 + original_img = resize(original_img, output_size, anti_aliasing=True) + reconstructed_img = resize(reconstructed_img, output_size, anti_aliasing=True) + + # Compute PSNR and SSIM + psnr_value = psnr(original_img, reconstructed_img, data_range=original_img.max() - original_img.min()) + ssim_value = ssim(original_img, reconstructed_img, channel_axis=-1, data_range=original_img.max() - original_img.min()) + + psnr_values.append(psnr_value) + ssim_values.append(ssim_value) + + return np.mean(psnr_values), np.mean(ssim_values) + + +def calculate_rfid(image_dir1, image_dir2): + fid_command = f'python -m pytorch_fid {image_dir1} {image_dir2}' + fid_result = subprocess.run(fid_command, shell=True, capture_output=True, text=True) + fid_score = float(fid_result.stdout.split(' ')[-1]) + return fid_score + + +def main(original_images_dir, reconstructed_images_dir1, reconstructed_images_dir2): + resize_to = (256, 256) + + psnr1, ssim1 = compute_metrics(original_images_dir, reconstructed_images_dir1, output_size=resize_to) + psnr2, ssim2 = compute_metrics(original_images_dir, reconstructed_images_dir2, output_size=resize_to) + + print(f"Model 1 - PSNR: {psnr1}, SSIM: {ssim1}") + print(f"Model 2 - PSNR: {psnr2}, SSIM: {ssim2}") + + rfid1 = calculate_rfid(original_images_dir, reconstructed_images_dir1) + rfid2 = calculate_rfid(original_images_dir, reconstructed_images_dir2) + + print(f"Model 1 - rFID: {rfid1}") + print(f"Model 2 - rFID: {rfid2}") + + +if __name__ == "__main__": + """ + python scripts/evaluate_first_stages.py \ + --original_dir ./eval_data \ + --reconstructed_dir1 ./reconstructed_images_pretrain \ + --reconstructed_dir2 /reconstructed_images_train200 + """ + parser = argparse.ArgumentParser(description="Evaluate models with PSNR, SSIM, and rFID") + parser.add_argument('--original_dir', type=str, required=True, help='Directory of original images') + parser.add_argument('--reconstructed_dir1', type=str, required=True, + help='Directory of reconstructed images from the first model') + parser.add_argument('--reconstructed_dir2', type=str, required=True, + help='Directory of reconstructed images from the second model') + + args = parser.parse_args() + main(args.original_dir, args.reconstructed_dir1, args.reconstructed_dir2) diff --git a/scripts/reconstruct_first_stages.py b/scripts/reconstruct_first_stages.py new file mode 100644 index 000000000..4aaad2296 --- /dev/null +++ b/scripts/reconstruct_first_stages.py @@ -0,0 +1,95 @@ +import torch +from ldm.util import instantiate_from_config +from omegaconf import OmegaConf +from PIL import Image +import torchvision.transforms as T +import os +import torchvision.utils as vutils +import argparse + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +def load_model_from_config(config, ckpt): + print(f"Loading model from {ckpt}") + pl_sd = torch.load(ckpt, map_location="cpu") + global_step = pl_sd["global_step"] + sd = pl_sd["state_dict"] + model = instantiate_from_config(config.model) + m, u = model.load_state_dict(sd, strict=False) + model.to(device) + model.eval() + return {"model": model}, global_step + + +def load_and_preprocess_image(image_path, resize_shape=(256, 256)): + transform = T.Compose([ + T.Resize(resize_shape), + T.ToTensor(), + T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + ]) + image = Image.open(image_path).convert("RGB") + return transform(image).unsqueeze(0).to(device) + + +def reconstruct_image(model, image_tensor): + with torch.no_grad(): + reconstructed_img, _ = model(image_tensor) + return reconstructed_img + + +def save_image(tensor, filename): + print("Tensor Type:", type(tensor)) # Debugging line to confirm tensor type + if isinstance(tensor, torch.Tensor): + tensor = (tensor + 1) / 2 # Normalize if the tensor is in the range [-1, 1] + vutils.save_image(tensor, filename) + else: + print("The input is not a tensor.") + + +def reconstruct_and_save_images(input_dir, output_dir, model): + for image_name in os.listdir(input_dir): + if not image_name.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')): + continue + + image_path = os.path.join(input_dir, image_name) + image_tensor = load_and_preprocess_image(image_path) + + reconstructed_img = reconstruct_image(model, image_tensor) + + output_path = os.path.join(output_dir, image_name) + save_image(reconstructed_img, output_path) + + +def main(config_path, ckpt_path, input_dir, output_dir): + config = OmegaConf.load(config_path) + model_info, step = load_model_from_config(config, ckpt_path) + model = model_info["model"] + + os.makedirs(output_dir, exist_ok=True) + reconstruct_and_save_images(input_dir, output_dir, model) + + +if __name__ == "__main__": + """ + python scripts/reconstruct_first_stages.py \ + --config ./models/first_stage_models/kl-f4/config.yaml \ + --ckpt ./models/first_stage_models/kl-f4/model.ckpt \ + --input_dir ./eval_data \ + --output_dir ./reconstructed_images_pretrain + + + python scripts/reconstruct_first_stages.py \ + --config ./logs/2024-02-24T19-56-50_autoencoder_kl_64x64x3/checkpoints/config.yaml \ + --ckpt ./logs/2024-02-24T19-56-50_autoencoder_kl_64x64x3/checkpoints/last.ckpt \ + --input_dir ./eval_data \ + --output_dir ./reconstructed_images_train200 + """ + parser = argparse.ArgumentParser(description="Reconstruct images from training autoencoder models") + parser.add_argument('--config', type=str, required=True, help='Path to model config YAML file') + parser.add_argument('--ckpt', type=str, required=True, help='Path to model checkpoint file') + parser.add_argument('--input_dir', type=str, required=True, help='Directory where input images are stored') + parser.add_argument('--output_dir', type=str, required=True, help='Directory where output images will be saved') + + args = parser.parse_args() + main(args.config, args.ckpt, args.input_dir, args.output_dir)