From 767441d82f7e04ba9cef03e03761256d872c0bd2 Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Thu, 6 Nov 2025 17:23:34 +0000 Subject: [PATCH 01/26] copy code from old PR --- docs/pretrained.rst | 8 +- .../engines/test_nucleus_detection_engine.py | 67 +++++ tiatoolbox/data/pretrained_model.yaml | 12 +- tiatoolbox/models/engine/__init__.py | 2 + tiatoolbox/models/engine/nucleus_detector.py | 250 ++++++++++++++++++ 5 files changed, 329 insertions(+), 10 deletions(-) create mode 100644 tests/engines/test_nucleus_detection_engine.py create mode 100644 tiatoolbox/models/engine/nucleus_detector.py diff --git a/docs/pretrained.rst b/docs/pretrained.rst index 310fc83bc..1a2a53faf 100644 --- a/docs/pretrained.rst +++ b/docs/pretrained.rst @@ -353,7 +353,7 @@ The input output configuration is as follows: ioconfig = IOPatchPredictorConfig( patch_input_shape=(31, 31), stride_shape=(8, 8), - input_resolutions=[{"resolution": 0.25, "units": "mpp"}] + input_resolutions=[{"resolution": 0.5, "units": "mpp"}] ) @@ -369,7 +369,7 @@ The input output configuration is as follows: ioconfig = IOPatchPredictorConfig( patch_input_shape=(252, 252), stride_shape=(150, 150), - input_resolutions=[{"resolution": 0.25, "units": "mpp"}] + input_resolutions=[{"resolution": 0.5, "units": "mpp"}] ) @@ -393,7 +393,7 @@ The input output configuration is as follows: ioconfig = IOPatchPredictorConfig( patch_input_shape=(31, 31), stride_shape=(8, 8), - input_resolutions=[{"resolution": 0.25, "units": "mpp"}] + input_resolutions=[{"resolution": 0.5, "units": "mpp"}] ) @@ -409,7 +409,7 @@ The input output configuration is as follows: ioconfig = IOPatchPredictorConfig( patch_input_shape=(252, 252), stride_shape=(150, 150), - input_resolutions=[{"resolution": 0.25, "units": "mpp"}] + input_resolutions=[{"resolution": 0.5, "units": "mpp"}] ) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py new file mode 100644 index 000000000..e9f4e1aec --- /dev/null +++ b/tests/engines/test_nucleus_detection_engine.py @@ -0,0 +1,67 @@ +"""Tests for NucleusDetector.""" + +import pathlib +import shutil + +import pandas as pd +import pytest + +from tiatoolbox.models.engine.nucleus_detector import ( + IONucleusDetectorConfig, + NucleusDetector, +) +from tiatoolbox.utils import env_detection as toolbox_env + +ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() + + +def _rm_dir(path): + """Helper func to remove directory.""" + if pathlib.Path(path).exists(): + shutil.rmtree(path, ignore_errors=True) + + +def check_output(path): + """Check NucleusDetector output.""" + coordinates = pd.read_csv(path) + assert coordinates.x[0] == pytest.approx(53, abs=2) + assert coordinates.x[1] == pytest.approx(55, abs=2) + assert coordinates.y[0] == pytest.approx(107, abs=2) + assert coordinates.y[1] == pytest.approx(127, abs=2) + + +def test_nucleus_detector_engine(remote_sample, tmp_path): + """Test for nucleus detection engine.""" + mini_wsi_svs = pathlib.Path(remote_sample("wsi4_512_512_svs")) + + nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") + _ = nucleus_detector.predict( + [mini_wsi_svs], + mode="wsi", + save_dir=tmp_path / "output", + on_gpu=ON_GPU, + ) + + check_output(tmp_path / "output" / "0.locations.0.csv") + + _rm_dir(tmp_path / "output") + + ioconfig = IONucleusDetectorConfig( + input_resolutions=[{"units": "mpp", "resolution": 0.5}], + output_resolutions=[{"units": "mpp", "resolution": 0.5}], + save_resolution=None, + patch_input_shape=[252, 252], + patch_output_shape=[252, 252], + stride_shape=[150, 150], + ) + + nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") + _ = nucleus_detector.predict( + [mini_wsi_svs], + mode="wsi", + save_dir=tmp_path / "output", + on_gpu=ON_GPU, + ioconfig=ioconfig, + ) + + check_output(tmp_path / "output" / "0.locations.0.csv") diff --git a/tiatoolbox/data/pretrained_model.yaml b/tiatoolbox/data/pretrained_model.yaml index 880c623fe..a345e7681 100644 --- a/tiatoolbox/data/pretrained_model.yaml +++ b/tiatoolbox/data/pretrained_model.yaml @@ -815,7 +815,7 @@ mapde-crchisto: threshold_abs: 250 num_classes: 1 ioconfig: - class: semantic_segmentor.IOSegmentorConfig + class: io_config.IOSegmentorConfig kwargs: input_resolutions: - { "units": "mpp", "resolution": 0.5 } @@ -837,7 +837,7 @@ mapde-conic: threshold_abs: 205 num_classes: 1 ioconfig: - class: semantic_segmentor.IOSegmentorConfig + class: io_config.IOSegmentorConfig kwargs: input_resolutions: - { "units": "mpp", "resolution": 0.5 } @@ -860,7 +860,7 @@ sccnn-crchisto: threshold_abs: 0.20 patch_output_shape: [ 13, 13 ] ioconfig: - class: semantic_segmentor.IOSegmentorConfig + class: io_config.IOSegmentorConfig kwargs: input_resolutions: - { "units": "mpp", "resolution": 0.5 } @@ -883,7 +883,7 @@ sccnn-conic: threshold_abs: 0.05 patch_output_shape: [ 13, 13 ] ioconfig: - class: semantic_segmentor.IOSegmentorConfig + class: io_config.IOSegmentorConfig kwargs: input_resolutions: - { "units": "mpp", "resolution": 0.5 } @@ -903,7 +903,7 @@ nuclick_original-pannuke: num_input_channels: 5 num_output_channels: 1 ioconfig: - class: semantic_segmentor.IOSegmentorConfig + class: io_config.IOSegmentorConfig kwargs: input_resolutions: - {'units': 'baseline', 'resolution': 0.25} @@ -925,7 +925,7 @@ nuclick_light-pannuke: decoder_block: [3,3] skip_type: "add" ioconfig: - class: semantic_segmentor.IOSegmentorConfig + class: io_config.IOSegmentorConfig kwargs: input_resolutions: - {'units': 'baseline', 'resolution': 0.25} diff --git a/tiatoolbox/models/engine/__init__.py b/tiatoolbox/models/engine/__init__.py index 9c00ac4a2..ff65892a2 100644 --- a/tiatoolbox/models/engine/__init__.py +++ b/tiatoolbox/models/engine/__init__.py @@ -2,6 +2,7 @@ from . import ( engine_abc, + nucleus_detector, nucleus_instance_segmentor, patch_predictor, semantic_segmentor, @@ -9,6 +10,7 @@ __all__ = [ "engine_abc", + "nucleus_detector", "nucleus_instance_segmentor", "patch_predictor", "semantic_segmentor", diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py new file mode 100644 index 000000000..0cc006756 --- /dev/null +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -0,0 +1,250 @@ +"""This module implements nucleus detection engine.""" + +import numpy as np +import pandas as pd + +from tiatoolbox.models.engine.semantic_segmentor import ( + IOSegmentorConfig, + SemanticSegmentor, +) + + +class IONucleusDetectorConfig(IOSegmentorConfig): + """Contains NucleusDetector input and output information. + + Args: + input_resolutions (list): + Resolution of each input head of model inference, must be in + the same order as `target model.forward()`. + output_resolutions (list): + Resolution of each output head from model inference, must be + in the same order as target model.infer_batch(). + patch_input_shape (:class:`numpy.ndarray`, list(int)): + Shape of the largest input in (height, width). + patch_output_shape (:class:`numpy.ndarray`, list(int)): + Shape of the largest output in (height, width). + save_resolution (dict): + Resolution to save all output. + + Examples: + >>> # Defining io for a network having 1 input and 1 output at the + >>> # same resolution + >>> ioconfig = IONucleusDetectorConfig( + ... input_resolutions=[{"units": "baseline", "resolution": 1.0}], + ... output_resolutions=[{"units": "baseline", "resolution": 1.0}], + ... patch_input_shape=[2048, 2048], + ... patch_output_shape=[1024, 1024], + ... stride_shape=[512, 512], + ... ) + + """ + + def __init__( + self, + input_resolutions: list[dict], + output_resolutions: list[dict], + patch_input_shape: list[int] | np.ndarray, + patch_output_shape: list[int] | np.ndarray, + save_resolution: dict = None, + **kwargs, + ): + super().__init__( + input_resolutions=input_resolutions, + output_resolutions=output_resolutions, + patch_input_shape=patch_input_shape, + patch_output_shape=patch_output_shape, + save_resolution=save_resolution, + **kwargs, + ) + + +class NucleusDetector(SemanticSegmentor): + r"""Nucleus detection engine. + + The models provided by tiatoolbox should give the following results: + + .. list-table:: Nucleus detection performance on the (add models list here) + :widths: 15 15 + :header-rows: 1 + + Args: + model (nn.Module): + Use externally defined PyTorch model for prediction with. + weights already loaded. Default is `None`. If provided, + `pretrained_model` argument is ignored. + pretrained_model (str): + Name of the existing models support by tiatoolbox for + processing the data. For a full list of pretrained models, + refer to the `docs + `_ + By default, the corresponding pretrained weights will also + be downloaded. However, you can override with your own set + of weights via the `pretrained_weights` argument. Argument + is case-insensitive. + pretrained_weights (str): + Path to the weight of the corresponding `pretrained_model`. + + >>> predictor = NucleusDetector( + ... pretrained_model="mapde-conic", + ... pretrained_weights="mapde_local_weight") + + batch_size (int): + Number of images fed into the model each time. + num_loader_workers (int): + Number of workers to load the data. Take note that they will + also perform preprocessing. + verbose (bool): + Whether to output logging information. default=False. + auto_generate_mask (bool): + To automatically generate tile/WSI tissue mask if is not + provided. default=False. + + Attributes: + imgs (:obj:`str` or :obj:`pathlib.Path` or :obj:`numpy.ndarray`): + A HWC image or a path to WSI. + model (nn.Module): + Defined PyTorch model. + pretrained_model (str): + Name of the existing models support by tiatoolbox for + processing the data e.g., mapde-conic, sccnn-conic. + For a full list of pretrained models, please refer to the `docs + `_ + By default, the corresponding pretrained weights will also + be downloaded. However, you can override with your own set + of weights via the `pretrained_weights` argument. Argument + is case insensitive. + batch_size (int): + Number of images fed into the model each time. + num_loader_workers (int): + Number of workers used in torch.utils.data.DataLoader. + verbose (bool): + Whether to output logging information. + + Examples: + >>> # list of 2 image patches as input + >>> data = [img1, img2] + >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") + >>> output = nucleus_detector.predict(data, mode='patch') + + >>> # array of list of 2 image patches as input + >>> data = np.array([img1, img2]) + >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") + >>> output = nucleus_detector.predict(data, mode='patch') + + >>> # list of 2 image patch files as input + >>> data = ['path/img.png', 'path/img.png'] + >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") + >>> output = nucleus_detector.predict(data, mode='patch') + + >>> # list of 2 image tile files as input + >>> tile_file = ['path/tile1.png', 'path/tile2.png'] + >>> nucleus_detector = NucleusDetector(pretraind_model="mapde-conic") + >>> output = nucleus_detector.predict(tile_file, mode='tile') + + >>> # list of 2 wsi files as input + >>> wsi_file = ['path/wsi1.svs', 'path/wsi2.svs'] + >>> nucleus_detector = NucleusDetector(pretraind_model="mapde-conic") + >>> output = nucleus_detector.predict(wsi_file, mode='wsi') + + References: + [1] Raza, Shan E. Ahmed, et al. "Deconvolving convolutional neural network + for cell detection." 2019 IEEE 16th International Symposium on Biomedical + Imaging (ISBI 2019). IEEE, 2019. + + [2] Sirinukunwattana, Korsuk, et al. + "Locality sensitive deep learning for detection and classification + of nuclei in routine colon cancer histology images." + IEEE transactions on medical imaging 35.5 (2016): 1196-1206. + + """ + + from tiatoolbox.wsicore.wsireader import WSIReader + + def __init__( + self, + batch_size=8, + num_loader_workers=0, + model=None, + pretrained_model=None, + pretrained_weights=None, + verbose: bool = False, + auto_generate_mask: bool = False, + ): + super().__init__( + batch_size=batch_size, + num_loader_workers=num_loader_workers, + model=model, + pretrained_model=pretrained_model, + pretrained_weights=pretrained_weights, + verbose=verbose, + auto_generate_mask=auto_generate_mask, + ) + + def _process_predictions( + self, + cum_batch_predictions: list, + wsi_reader: WSIReader, + ioconfig: IOSegmentorConfig, + save_path: str, + cache_dir: str, + ): + """Define how the aggregated predictions are processed. + + This includes merging the prediction if necessary and also saving the + locations afterwards. Note that items within `cum_batch_predictions` will + be consumed during the operation. + + Args: + cum_batch_predictions (list): + List of batch predictions. Each item within the list + should be of (location, patch_predictions). + wsi_reader (:class:`WSIReader`): + A reader for the image where the predictions come from. + ioconfig (:class:`IOSegmentorConfig`): + A configuration object contains input and output + information. + save_path (str): + Root path to save current WSI predictions. + cache_dir (str): + Root path to cache current WSI data. + + """ + if len(cum_batch_predictions) == 0: + return + + # assume predictions is N, each item has L output element + locations, predictions = list(zip(*cum_batch_predictions)) + # Nx4 (N x [tl_x, tl_y, br_x, br_y), denotes the location of + # output patch this can exceed the image bound at the requested + # resolution remove singleton due to split. + locations = np.array([v[0] for v in locations]) + for index, output_resolution in enumerate(ioconfig.output_resolutions): + # assume resolution index to be in the same order as L + merged_resolution = ioconfig.highest_input_resolution + merged_locations = locations + # ! location is w.r.t the highest resolution, hence still need conversion + if ioconfig.save_resolution is not None: + merged_resolution = ioconfig.save_resolution + output_shape = wsi_reader.slide_dimensions(**output_resolution) + merged_shape = wsi_reader.slide_dimensions(**merged_resolution) + fx = merged_shape[0] / output_shape[0] + merged_locations = np.ceil(locations * fx).astype(np.int64) + merged_shape = wsi_reader.slide_dimensions(**merged_resolution) + # 0 idx is to remove singleton without removing other axes singleton + to_merge_predictions = [v[index][0] for v in predictions] + sub_save_path = f"{save_path}.raw.{index}.npy" + sub_count_path = f"{cache_dir}/count.{index}.npy" + cum_canvas = self.merge_prediction( + merged_shape[::-1], # XY to YX + to_merge_predictions, + merged_locations, + save_path=sub_save_path, + cache_count_path=sub_count_path, + ) + + # Coordinates in output resolution for the current canvas. + cum_canvas = np.expand_dims(cum_canvas, axis=0) + coordinates_canvas = pd.DataFrame( + self.model.postproc_func(cum_canvas), columns=["x", "y"] + ) + coordinates_canvas.to_csv(f"{save_path}.locations.{index}.csv", index=False) From d2a970275fdaf9eee41b070efdf7e6d28040ed6a Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Fri, 7 Nov 2025 17:29:51 +0000 Subject: [PATCH 02/26] preliminiary testing --- requirements/requirements.txt | 1 + test.py | 23 + .../engines/test_nucleus_detection_engine.py | 103 ++-- tiatoolbox/data/pretrained_model.yaml | 2 +- tiatoolbox/models/architecture/mapde.py | 8 +- tiatoolbox/models/engine/nucleus_detector.py | 545 ++++++++++++------ 6 files changed, 453 insertions(+), 229 deletions(-) create mode 100644 test.py diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 045a4ce4e..96e2ebed8 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -5,6 +5,7 @@ albumentations>=1.3.0 bokeh>=3.1.1, <3.6.0 Click>=8.1.3, <8.2.0 dask>=2025.10.0 +pyarrow>=14.0.1 defusedxml>=0.7.1 filelock>=3.9.0 flask>=2.2.2 diff --git a/test.py b/test.py new file mode 100644 index 000000000..1a45ad039 --- /dev/null +++ b/test.py @@ -0,0 +1,23 @@ +import pathlib +import shutil + +import pandas as pd +import pytest + +from tiatoolbox.models.engine.nucleus_detector import ( + NucleusDetector, +) +from tiatoolbox.utils import env_detection as toolbox_env + +ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() + + +if __name__ == "__main__": + detector = NucleusDetector(model="mapde-conic", batch_size=8, num_workers=2) + detector.run( + images=[pathlib.Path("/media/u1910100/data/slides/CMU-1-Small-Region.svs")], + patch_mode=False, + device="cuda", + save_dir=pathlib.Path("/media/u1910100/data/overlays/test"), + overwrite=True, + ) \ No newline at end of file diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index e9f4e1aec..3427d38da 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -7,7 +7,6 @@ import pytest from tiatoolbox.models.engine.nucleus_detector import ( - IONucleusDetectorConfig, NucleusDetector, ) from tiatoolbox.utils import env_detection as toolbox_env @@ -15,53 +14,55 @@ ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() -def _rm_dir(path): - """Helper func to remove directory.""" - if pathlib.Path(path).exists(): - shutil.rmtree(path, ignore_errors=True) - - -def check_output(path): - """Check NucleusDetector output.""" - coordinates = pd.read_csv(path) - assert coordinates.x[0] == pytest.approx(53, abs=2) - assert coordinates.x[1] == pytest.approx(55, abs=2) - assert coordinates.y[0] == pytest.approx(107, abs=2) - assert coordinates.y[1] == pytest.approx(127, abs=2) - - -def test_nucleus_detector_engine(remote_sample, tmp_path): - """Test for nucleus detection engine.""" - mini_wsi_svs = pathlib.Path(remote_sample("wsi4_512_512_svs")) - - nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - _ = nucleus_detector.predict( - [mini_wsi_svs], - mode="wsi", - save_dir=tmp_path / "output", - on_gpu=ON_GPU, - ) - - check_output(tmp_path / "output" / "0.locations.0.csv") - - _rm_dir(tmp_path / "output") - - ioconfig = IONucleusDetectorConfig( - input_resolutions=[{"units": "mpp", "resolution": 0.5}], - output_resolutions=[{"units": "mpp", "resolution": 0.5}], - save_resolution=None, - patch_input_shape=[252, 252], - patch_output_shape=[252, 252], - stride_shape=[150, 150], - ) - - nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - _ = nucleus_detector.predict( - [mini_wsi_svs], - mode="wsi", - save_dir=tmp_path / "output", - on_gpu=ON_GPU, - ioconfig=ioconfig, - ) - - check_output(tmp_path / "output" / "0.locations.0.csv") +# def _rm_dir(path): +# """Helper func to remove directory.""" +# if pathlib.Path(path).exists(): +# shutil.rmtree(path, ignore_errors=True) + + +# def check_output(path): +# """Check NucleusDetector output.""" +# coordinates = pd.read_csv(path) +# assert coordinates.x[0] == pytest.approx(53, abs=2) +# assert coordinates.x[1] == pytest.approx(55, abs=2) +# assert coordinates.y[0] == pytest.approx(107, abs=2) +# assert coordinates.y[1] == pytest.approx(127, abs=2) + + +# def test_nucleus_detector_engine(remote_sample, tmp_path): +# """Test for nucleus detection engine.""" +# mini_wsi_svs = pathlib.Path(remote_sample("wsi4_512_512_svs")) + +# nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") +# _ = nucleus_detector.predict( +# [mini_wsi_svs], +# mode="wsi", +# save_dir=tmp_path / "output", +# on_gpu=ON_GPU, +# ) + +# check_output(tmp_path / "output" / "0.locations.0.csv") + +# _rm_dir(tmp_path / "output") + +# ioconfig = IONucleusDetectorConfig( +# input_resolutions=[{"units": "mpp", "resolution": 0.5}], +# output_resolutions=[{"units": "mpp", "resolution": 0.5}], +# save_resolution=None, +# patch_input_shape=[252, 252], +# patch_output_shape=[252, 252], +# stride_shape=[150, 150], +# ) + +# nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") +# _ = nucleus_detector.predict( +# [mini_wsi_svs], +# mode="wsi", +# save_dir=tmp_path / "output", +# on_gpu=ON_GPU, +# ioconfig=ioconfig, +# ) + +# check_output(tmp_path / "output" / "0.locations.0.csv") + + diff --git a/tiatoolbox/data/pretrained_model.yaml b/tiatoolbox/data/pretrained_model.yaml index a345e7681..75963662f 100644 --- a/tiatoolbox/data/pretrained_model.yaml +++ b/tiatoolbox/data/pretrained_model.yaml @@ -843,7 +843,7 @@ mapde-conic: - { "units": "mpp", "resolution": 0.5 } output_resolutions: - { "units": "mpp", "resolution": 0.5 } - tile_shape: [ 2048, 2048 ] + # tile_shape: [ 2048, 2048 ] patch_input_shape: [ 252, 252 ] patch_output_shape: [ 252, 252 ] stride_shape: [ 150, 150 ] diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index 0900aa6fd..b4a7c4deb 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -262,7 +262,7 @@ def infer_batch( batch_data: torch.Tensor, *, device: str, - ) -> list[np.ndarray]: + ) -> np.ndarray: """Run inference on an input batch. This contains logic for forward operation as well as batch I/O @@ -293,8 +293,4 @@ def infer_batch( pred = model(patch_imgs_gpu) pred = pred.permute(0, 2, 3, 1).contiguous() - pred = pred.cpu().numpy() - - return [ - pred, - ] + return pred.cpu().numpy() \ No newline at end of file diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 0cc006756..55c66b6c8 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -1,61 +1,217 @@ """This module implements nucleus detection engine.""" +from __future__ import annotations +import os +from pathlib import Path +import sys import numpy as np import pandas as pd - +import dask.array as da +import dask.dataframe as dd +from dask.delayed import delayed +import dask +from skimage.feature import peak_local_max +from skimage.measure import label, regionprops + +from tiatoolbox.models.engine.engine_abc import EngineABCRunParams from tiatoolbox.models.engine.semantic_segmentor import ( - IOSegmentorConfig, SemanticSegmentor, + SemanticSegmentorRunParams ) +from tiatoolbox.models.engine.io_config import IOSegmentorConfig +from tiatoolbox.models.models_abc import ModelABC +from shapely.geometry import Point +from typing import TYPE_CHECKING, Unpack +if TYPE_CHECKING: # pragma: no cover + import os -class IONucleusDetectorConfig(IOSegmentorConfig): - """Contains NucleusDetector input and output information. + from torch.utils.data import DataLoader - Args: - input_resolutions (list): - Resolution of each input head of model inference, must be in - the same order as `target model.forward()`. - output_resolutions (list): - Resolution of each output head from model inference, must be - in the same order as target model.infer_batch(). - patch_input_shape (:class:`numpy.ndarray`, list(int)): - Shape of the largest input in (height, width). - patch_output_shape (:class:`numpy.ndarray`, list(int)): - Shape of the largest output in (height, width). - save_resolution (dict): - Resolution to save all output. + from tiatoolbox.annotation import AnnotationStore + from tiatoolbox.models.engine.io_config import IOSegmentorConfig + from tiatoolbox.models.models_abc import ModelABC + from tiatoolbox.type_hints import Resolution + from tiatoolbox.wsicore import WSIReader - Examples: - >>> # Defining io for a network having 1 input and 1 output at the - >>> # same resolution - >>> ioconfig = IONucleusDetectorConfig( - ... input_resolutions=[{"units": "baseline", "resolution": 1.0}], - ... output_resolutions=[{"units": "baseline", "resolution": 1.0}], - ... patch_input_shape=[2048, 2048], - ... patch_output_shape=[1024, 1024], - ... stride_shape=[512, 512], - ... ) +def dataframe_to_annotation_store( + df: pd.DataFrame, +) -> AnnotationStore: + """ + Convert a pandas DataFrame with columns ['x','y','type','prob'] + to an AnnotationStore and save to disk. + """ + from tiatoolbox.annotation import SQLiteStore, Annotation + + ann_store = SQLiteStore() + for _, row in df.iterrows(): + x = int(row["x"]) + y = int(row["y"]) + obj_type = int(row["type"]) + prob = float(row["prob"]) + ann = Annotation(geometry=Point(x, y), properties={"type": "nuclei", "probability": prob}) + ann_store.append(ann) + return ann_store + +def processed_mask_fn(img2d:np.ndarray, min_distance: int, threshold_abs: float|int) -> np.ndarray: + """ + Build a boolean mask (H, W) of objects from a 2D probability map. + Here: 1-pixel objects from peak_local_max. Add morphology inside if you need blobs. + """ + H, W = img2d.shape + mask = np.zeros((H, W), dtype=bool) + coords = peak_local_max(img2d, min_distance=min_distance, threshold_abs=threshold_abs) + if coords.size: + r, c = coords[:, 0], coords[:, 1] + mask[r, c] = True + return mask + +def block_regionprops_mapoverlap( + block: np.ndarray, + block_info, + min_distance: int, + threshold_abs: float | int, + depth_h: int, + depth_w: int, +) -> np.ndarray: """ + Runs inside da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). + Builds a processed mask per channel, runs label+regionprops, and writes + region score (mean_intensity) at centroid pixels. Keeps only centroids + whose (row,col) lie in the interior window: + rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) + Returns same spatial shape as input block: (h_pad, w_pad, C), float32. + """ + H, W, C = block.shape - def __init__( - self, - input_resolutions: list[dict], - output_resolutions: list[dict], - patch_input_shape: list[int] | np.ndarray, - patch_output_shape: list[int] | np.ndarray, - save_resolution: dict = None, - **kwargs, - ): - super().__init__( - input_resolutions=input_resolutions, - output_resolutions=output_resolutions, - patch_input_shape=patch_input_shape, - patch_output_shape=patch_output_shape, - save_resolution=save_resolution, - **kwargs, - ) + # --- derive core (pre-overlap) size for THIS block safely --- + info = block_info[0] + locs = info["array-location"] # [(r0,r1),(c0,c1),(ch0,ch1)] + core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 + core_w = int(locs[1][1] - locs[1][0]) + + + rmin, rmax = depth_h, depth_h + core_h + cmin, cmax = depth_w, depth_w + core_w + + out = np.zeros((H, W, C), dtype=np.float32) + + for ch in range(C): + img = np.asarray(block[..., ch]) # NumPy 2D view + pmask = processed_mask_fn(img, min_distance, threshold_abs) + if not pmask.any(): + continue + + lab = label(pmask) + props = regionprops(lab, intensity_image=img) + + for reg in props: + r, c = reg.centroid # floats in padded-block coords + if (rmin <= r < rmax) and (cmin <= c < cmax): + rr = int(round(r)) + cc = int(round(c)) + if 0 <= rr < H and 0 <= cc < W: + out[rr, cc, ch] = float(reg.mean_intensity) + + return out + + +def detect_with_map_overlap(probs, min_distance, threshold_abs, depth_pixels): + """ + probs: Dask array (H, W, C), float. + depth_pixels: halo in pixels for H/W (use >= min_distance and >= any morphology radius). + Returns: + scores: da.Array (H, W, C) with mean_intensity at centroids, 0 elsewhere. + """ + depth = {0: depth_pixels, 1: depth_pixels, 2: 0} + scores = da.map_overlap( + probs, + block_regionprops_mapoverlap, + depth=depth, + boundary=0, + dtype=np.float32, + block_info=True, + min_distance=min_distance, + threshold_abs=threshold_abs, + depth_h=depth_pixels, + depth_w=depth_pixels, + ) + return scores + +def scores_to_ddf(scores: da.Array, x_offset: int, y_offset: int) -> dd.DataFrame: + """ + Convert (H, W, C) scores -> Dask DataFrame with columns: x, y, type, prob. + Uses da.extract(mask, scores) to avoid vindex on Dask indexers. + """ + # 1) Build a boolean mask of detections + mask = scores > 0 + + # 2) Global coordinates of detections (lazy 1D Dask arrays) + yy, xx, cc = da.nonzero(mask) + + # 3) Values at those detections (lazy) — same length as yy/xx/cc + ss = da.extract(mask, scores) + + # 4) Assemble a Dask DataFrame + ddf = dd.concat( + [ + dd.from_dask_array(xx.astype("int64"), columns="x"), + dd.from_dask_array(yy.astype("int64"), columns="y"), + dd.from_dask_array(cc.astype("int64"), columns="type"), + dd.from_dask_array(ss.astype("float32"), columns="prob"), + ], + axis=1, + ) + + # 5) Apply global offsets (if your WSI/crop needs them) + ddf["x"] = ddf["x"] + int(x_offset) + ddf["y"] = ddf["y"] + int(y_offset) + + return ddf + + +def greedy_radius_nms_pandas_all(df: pd.DataFrame, radius: int) -> pd.DataFrame: + """ + Greedy NMS across ALL detections (no per-type grouping). + Keeps the highest-prob point, suppresses any other point within 'radius' pixels. + + Expects columns: ['x','y','type','prob']. + Returns: filtered DataFrame with same columns/dtypes. + """ + if df.empty: + return df.copy() + + # Sort by descending probability (highest priority first) + sub = df.sort_values("prob", ascending=False).reset_index(drop=True) + + # Coordinates as float64 for distance math + coords = sub[["x", "y"]].to_numpy(dtype=np.float64) + r2 = float(radius) * float(radius) + + suppressed = np.zeros(len(sub), dtype=bool) + keep_idx = [] + + for i in range(len(sub)): + if suppressed[i]: + continue + keep_idx.append(i) + + # Suppress all remaining within radius of the kept point + dx = coords[:, 0] - coords[i, 0] + dy = coords[:, 1] - coords[i, 1] + close = (dx * dx + dy * dy) <= r2 + suppressed |= close + + kept = sub.iloc[keep_idx].copy() + + # Ensure stable dtypes + kept["x"] = kept["x"].astype("int64") + kept["y"] = kept["y"].astype("int64") + kept["type"] = kept["type"].astype("int64") + kept["prob"] = kept["prob"].astype(df["prob"].dtype) + + return kept class NucleusDetector(SemanticSegmentor): @@ -68,83 +224,55 @@ class NucleusDetector(SemanticSegmentor): :header-rows: 1 Args: - model (nn.Module): - Use externally defined PyTorch model for prediction with. - weights already loaded. Default is `None`. If provided, - `pretrained_model` argument is ignored. - pretrained_model (str): - Name of the existing models support by tiatoolbox for - processing the data. For a full list of pretrained models, - refer to the `docs - `_ - By default, the corresponding pretrained weights will also - be downloaded. However, you can override with your own set - of weights via the `pretrained_weights` argument. Argument - is case-insensitive. - pretrained_weights (str): - Path to the weight of the corresponding `pretrained_model`. - - >>> predictor = NucleusDetector( - ... pretrained_model="mapde-conic", - ... pretrained_weights="mapde_local_weight") - - batch_size (int): - Number of images fed into the model each time. - num_loader_workers (int): - Number of workers to load the data. Take note that they will - also perform preprocessing. - verbose (bool): - Whether to output logging information. default=False. - auto_generate_mask (bool): - To automatically generate tile/WSI tissue mask if is not - provided. default=False. - - Attributes: - imgs (:obj:`str` or :obj:`pathlib.Path` or :obj:`numpy.ndarray`): - A HWC image or a path to WSI. - model (nn.Module): - Defined PyTorch model. - pretrained_model (str): - Name of the existing models support by tiatoolbox for - processing the data e.g., mapde-conic, sccnn-conic. + model (str or nn.Module): + Defined PyTorch model or name of the existing models support by + tiatoolbox for processing the data e.g., mapde-conic, sccnn-conic. For a full list of pretrained models, please refer to the `docs - `_ + `. By default, the corresponding pretrained weights will also be downloaded. However, you can override with your own set - of weights via the `pretrained_weights` argument. Argument - is case insensitive. + of weights via the `weights` argument. Argument is case insensitive. batch_size (int): Number of images fed into the model each time. - num_loader_workers (int): + num_workers (int): Number of workers used in torch.utils.data.DataLoader. + weights (str or pathlib.Path, optional): + Pretrained weights file path or name of the existing weights + supported by tiatoolbox. If ``None``, and `model` is a string, + the default pretrained weights for the specified model will be used. + If `model` is a nn.Module, no weights will be loaded + unless specified here. + device (str): + Device to run the model on, e.g., 'cpu' or 'cuda:0'. verbose (bool): - Whether to output logging information. + Whether to output logging information. + Examples: >>> # list of 2 image patches as input >>> data = [img1, img2] >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - >>> output = nucleus_detector.predict(data, mode='patch') + >>> output = nucleus_detector.run(data, mode='patch') >>> # array of list of 2 image patches as input >>> data = np.array([img1, img2]) >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - >>> output = nucleus_detector.predict(data, mode='patch') + >>> output = nucleus_detector.run(data, mode='patch') >>> # list of 2 image patch files as input >>> data = ['path/img.png', 'path/img.png'] >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - >>> output = nucleus_detector.predict(data, mode='patch') + >>> output = nucleus_detector.run(data, mode='patch') >>> # list of 2 image tile files as input >>> tile_file = ['path/tile1.png', 'path/tile2.png'] - >>> nucleus_detector = NucleusDetector(pretraind_model="mapde-conic") - >>> output = nucleus_detector.predict(tile_file, mode='tile') + >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") + >>> output = nucleus_detector.run(tile_file, mode='tile') >>> # list of 2 wsi files as input >>> wsi_file = ['path/wsi1.svs', 'path/wsi2.svs'] - >>> nucleus_detector = NucleusDetector(pretraind_model="mapde-conic") - >>> output = nucleus_detector.predict(wsi_file, mode='wsi') + >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") + >>> output = nucleus_detector.run(wsi_file, mode='wsi') References: [1] Raza, Shan E. Ahmed, et al. "Deconvolving convolutional neural network @@ -161,90 +289,165 @@ class NucleusDetector(SemanticSegmentor): from tiatoolbox.wsicore.wsireader import WSIReader def __init__( - self, - batch_size=8, - num_loader_workers=0, - model=None, - pretrained_model=None, - pretrained_weights=None, - verbose: bool = False, - auto_generate_mask: bool = False, + self: NucleusDetector, + model: str | ModelABC, + batch_size: int = 8, + num_workers: int = 0, + weights: str | Path | None = None, + *, + device: str = "cpu", + verbose: bool = True, ): super().__init__( - batch_size=batch_size, - num_loader_workers=num_loader_workers, model=model, - pretrained_model=pretrained_model, - pretrained_weights=pretrained_weights, + batch_size=batch_size, + num_workers=num_workers, + weights=weights, + device=device, verbose=verbose, - auto_generate_mask=auto_generate_mask, ) - def _process_predictions( - self, - cum_batch_predictions: list, - wsi_reader: WSIReader, - ioconfig: IOSegmentorConfig, - save_path: str, - cache_dir: str, - ): - """Define how the aggregated predictions are processed. + def post_process_patches(self, + raw_predictions: da.Array, + prediction_shape: tuple[int, ...], # noqa: ARG002 + prediction_dtype: type, # noqa: ARG002 + **kwargs: Unpack[SemanticSegmentorRunParams], # noqa: ARG002 + ) -> da.Array: + """Define how to post-process patch predictions. + + Returns: + A function that process the raw model predictions on patches. + + """ - This includes merging the prediction if necessary and also saving the - locations afterwards. Note that items within `cum_batch_predictions` will - be consumed during the operation. + pass + + def post_process_wsi(self: NucleusDetector, + raw_predictions: da.Array, + prediction_shape: tuple[int, ...], + prediction_dtype: type, + **kwargs: Unpack[SemanticSegmentorRunParams], + ) -> da.Array: + """Define how to post-process WSI predictions. + + Returns: + A function that process the raw model predictions on WSI. + + """ + print("Post processing WSI predictions in NucleusDetector") + + print("Raw probabilities shape:", raw_predictions.shape) + + print("Chunk size:", raw_predictions.chunks) + + + + scores = detect_with_map_overlap( + probs=raw_predictions, + min_distance=3, + threshold_abs=205, # set your threshold + depth_pixels=5 + ) + print("Scores shape:", scores.shape) + + # compact table: + ddf = scores_to_ddf(scores, x_offset=0, y_offset=0) + pandas_df = ddf.compute() + + print("Total detections before NMS:", len(pandas_df)) + nms_df = greedy_radius_nms_pandas_all(pandas_df, radius=3) + print("Total detections after NMS:", len(nms_df)) + + save_path = "/media/u1910100/data/overlays/test/mapde_conic.db" + ann_store = dataframe_to_annotation_store(nms_df) + ann_store.dump(save_path) + + + sys.exit() + + + + def run( + self: NucleusDetector, + images: list[os.PathLike | Path | WSIReader] | np.ndarray, + masks: list[os.PathLike | Path] | np.ndarray | None = None, + labels: list | None = None, + ioconfig: IOSegmentorConfig | None = None, + *, + patch_mode: bool = True, + save_dir: os.PathLike | Path | None = None, + overwrite: bool = False, + output_type: str = "dict", + **kwargs: Unpack[SemanticSegmentorRunParams], + ) -> AnnotationStore | Path | str | dict | list[Path]: + """Run the semantic segmentation engine on input images. + + This method orchestrates the full inference pipeline, including preprocessing, + model inference, post-processing, and saving results. It supports both + patch-level and whole slide image (WSI) modes. Args: - cum_batch_predictions (list): - List of batch predictions. Each item within the list - should be of (location, patch_predictions). - wsi_reader (:class:`WSIReader`): - A reader for the image where the predictions come from. - ioconfig (:class:`IOSegmentorConfig`): - A configuration object contains input and output - information. - save_path (str): - Root path to save current WSI predictions. - cache_dir (str): - Root path to cache current WSI data. + images (list[PathLike | WSIReader] | np.ndarray): + Input images or patches. Can be a list of file paths, WSIReader objects, + or a NumPy array of image patches. + masks (list[PathLike] | np.ndarray | None): + Optional masks for WSI processing. Only used when `patch_mode` is False. + labels (list | None): + Optional labels for input images. Only one label per image is supported. + ioconfig (IOSegmentorConfig | None): + IO configuration for patch extraction and resolution. + patch_mode (bool): + Whether to treat input as patches (`True`) or WSIs (`False`). Default + is True. + save_dir (PathLike | None): + Directory to save output files. Required for WSI mode. + overwrite (bool): + Whether to overwrite existing output files. Default is False. + output_type (str): + Desired output format: "dict", "zarr", or "annotationstore". Default + is "dict". + **kwargs (SemanticSegmentorRunParams): + Additional runtime parameters to update engine attributes. + + Returns: + AnnotationStore | Path | str | dict | list[Path]: + - If `patch_mode` is True: returns predictions or path to saved output. + - If `patch_mode` is False: returns a dictionary mapping each WSI + to its output path. + + Examples: + >>> wsis = ['wsi1.svs', 'wsi2.svs'] + >>> image_patches = [np.ndarray, np.ndarray] + >>> segmentor = SemanticSegmentor(model="fcn-tissue_mask") + >>> output = segmentor.run(image_patches, patch_mode=True) + >>> output + ... "/path/to/Output.db" + + >>> output = segmentor.run( + ... image_patches, + ... patch_mode=True, + ... output_type="zarr" + ... ) + >>> output + ... "/path/to/Output.zarr" + + >>> output = segmentor.run(wsis, patch_mode=False) + >>> output.keys() + ... ['wsi1.svs', 'wsi2.svs'] + >>> output['wsi1.svs'] + ... "/path/to/wsi1.db" """ - if len(cum_batch_predictions) == 0: - return - - # assume predictions is N, each item has L output element - locations, predictions = list(zip(*cum_batch_predictions)) - # Nx4 (N x [tl_x, tl_y, br_x, br_y), denotes the location of - # output patch this can exceed the image bound at the requested - # resolution remove singleton due to split. - locations = np.array([v[0] for v in locations]) - for index, output_resolution in enumerate(ioconfig.output_resolutions): - # assume resolution index to be in the same order as L - merged_resolution = ioconfig.highest_input_resolution - merged_locations = locations - # ! location is w.r.t the highest resolution, hence still need conversion - if ioconfig.save_resolution is not None: - merged_resolution = ioconfig.save_resolution - output_shape = wsi_reader.slide_dimensions(**output_resolution) - merged_shape = wsi_reader.slide_dimensions(**merged_resolution) - fx = merged_shape[0] / output_shape[0] - merged_locations = np.ceil(locations * fx).astype(np.int64) - merged_shape = wsi_reader.slide_dimensions(**merged_resolution) - # 0 idx is to remove singleton without removing other axes singleton - to_merge_predictions = [v[index][0] for v in predictions] - sub_save_path = f"{save_path}.raw.{index}.npy" - sub_count_path = f"{cache_dir}/count.{index}.npy" - cum_canvas = self.merge_prediction( - merged_shape[::-1], # XY to YX - to_merge_predictions, - merged_locations, - save_path=sub_save_path, - cache_count_path=sub_count_path, - ) - - # Coordinates in output resolution for the current canvas. - cum_canvas = np.expand_dims(cum_canvas, axis=0) - coordinates_canvas = pd.DataFrame( - self.model.postproc_func(cum_canvas), columns=["x", "y"] - ) - coordinates_canvas.to_csv(f"{save_path}.locations.{index}.csv", index=False) + return super().run( + images=images, + masks=masks, + labels=labels, + ioconfig=ioconfig, + patch_mode=patch_mode, + save_dir=save_dir, + overwrite=overwrite, + output_type=output_type, + **kwargs, + ) + + \ No newline at end of file From 44c4994e1924068ba61f8373a6e30815ff7b36f9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 7 Nov 2025 17:30:36 +0000 Subject: [PATCH 03/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- requirements/requirements.txt | 2 +- test.py | 8 +- .../engines/test_nucleus_detection_engine.py | 11 --- tiatoolbox/models/architecture/mapde.py | 2 +- tiatoolbox/models/engine/nucleus_detector.py | 98 +++++++++---------- 5 files changed, 49 insertions(+), 72 deletions(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 96e2ebed8..1282748bf 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -5,7 +5,6 @@ albumentations>=1.3.0 bokeh>=3.1.1, <3.6.0 Click>=8.1.3, <8.2.0 dask>=2025.10.0 -pyarrow>=14.0.1 defusedxml>=0.7.1 filelock>=3.9.0 flask>=2.2.2 @@ -23,6 +22,7 @@ openslide-bin>=4.0.0.2 openslide-python>=1.4.0 pandas>=2.0.0 pillow>=9.3.0 +pyarrow>=14.0.1 pydicom>=2.3.1 # Used by wsidicom pyyaml>=6.0 requests>=2.28.1 diff --git a/test.py b/test.py index 1a45ad039..3320381bd 100644 --- a/test.py +++ b/test.py @@ -1,8 +1,4 @@ import pathlib -import shutil - -import pandas as pd -import pytest from tiatoolbox.models.engine.nucleus_detector import ( NucleusDetector, @@ -15,9 +11,9 @@ if __name__ == "__main__": detector = NucleusDetector(model="mapde-conic", batch_size=8, num_workers=2) detector.run( - images=[pathlib.Path("/media/u1910100/data/slides/CMU-1-Small-Region.svs")], + images=[pathlib.Path("/media/u1910100/data/slides/CMU-1-Small-Region.svs")], patch_mode=False, device="cuda", save_dir=pathlib.Path("/media/u1910100/data/overlays/test"), overwrite=True, - ) \ No newline at end of file + ) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index 3427d38da..1bca39362 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -1,14 +1,5 @@ """Tests for NucleusDetector.""" -import pathlib -import shutil - -import pandas as pd -import pytest - -from tiatoolbox.models.engine.nucleus_detector import ( - NucleusDetector, -) from tiatoolbox.utils import env_detection as toolbox_env ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() @@ -64,5 +55,3 @@ # ) # check_output(tmp_path / "output" / "0.locations.0.csv") - - diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index b4a7c4deb..645a4c2c0 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -293,4 +293,4 @@ def infer_batch( pred = model(patch_imgs_gpu) pred = pred.permute(0, 2, 3, 1).contiguous() - return pred.cpu().numpy() \ No newline at end of file + return pred.cpu().numpy() diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 55c66b6c8..1fdb923d3 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -1,47 +1,43 @@ """This module implements nucleus detection engine.""" + from __future__ import annotations import os -from pathlib import Path import sys -import numpy as np -import pandas as pd +from pathlib import Path +from typing import TYPE_CHECKING, Unpack + import dask.array as da import dask.dataframe as dd -from dask.delayed import delayed -import dask +import numpy as np +import pandas as pd +from shapely.geometry import Point from skimage.feature import peak_local_max from skimage.measure import label, regionprops -from tiatoolbox.models.engine.engine_abc import EngineABCRunParams +from tiatoolbox.models.engine.io_config import IOSegmentorConfig from tiatoolbox.models.engine.semantic_segmentor import ( SemanticSegmentor, - SemanticSegmentorRunParams + SemanticSegmentorRunParams, ) -from tiatoolbox.models.engine.io_config import IOSegmentorConfig from tiatoolbox.models.models_abc import ModelABC -from shapely.geometry import Point -from typing import TYPE_CHECKING, Unpack if TYPE_CHECKING: # pragma: no cover import os - from torch.utils.data import DataLoader - from tiatoolbox.annotation import AnnotationStore from tiatoolbox.models.engine.io_config import IOSegmentorConfig from tiatoolbox.models.models_abc import ModelABC - from tiatoolbox.type_hints import Resolution from tiatoolbox.wsicore import WSIReader + def dataframe_to_annotation_store( df: pd.DataFrame, ) -> AnnotationStore: - """ - Convert a pandas DataFrame with columns ['x','y','type','prob'] + """Convert a pandas DataFrame with columns ['x','y','type','prob'] to an AnnotationStore and save to disk. """ - from tiatoolbox.annotation import SQLiteStore, Annotation + from tiatoolbox.annotation import Annotation, SQLiteStore ann_store = SQLiteStore() for _, row in df.iterrows(): @@ -49,34 +45,39 @@ def dataframe_to_annotation_store( y = int(row["y"]) obj_type = int(row["type"]) prob = float(row["prob"]) - ann = Annotation(geometry=Point(x, y), properties={"type": "nuclei", "probability": prob}) + ann = Annotation( + geometry=Point(x, y), properties={"type": "nuclei", "probability": prob} + ) ann_store.append(ann) return ann_store -def processed_mask_fn(img2d:np.ndarray, min_distance: int, threshold_abs: float|int) -> np.ndarray: - """ - Build a boolean mask (H, W) of objects from a 2D probability map. +def processed_mask_fn( + img2d: np.ndarray, min_distance: int, threshold_abs: float +) -> np.ndarray: + """Build a boolean mask (H, W) of objects from a 2D probability map. Here: 1-pixel objects from peak_local_max. Add morphology inside if you need blobs. """ H, W = img2d.shape mask = np.zeros((H, W), dtype=bool) - coords = peak_local_max(img2d, min_distance=min_distance, threshold_abs=threshold_abs) + coords = peak_local_max( + img2d, min_distance=min_distance, threshold_abs=threshold_abs + ) if coords.size: r, c = coords[:, 0], coords[:, 1] mask[r, c] = True return mask + def block_regionprops_mapoverlap( block: np.ndarray, block_info, min_distance: int, - threshold_abs: float | int, + threshold_abs: float, depth_h: int, depth_w: int, ) -> np.ndarray: - """ - Runs inside da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). + """Runs inside da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). Builds a processed mask per channel, runs label+regionprops, and writes region score (mean_intensity) at centroid pixels. Keeps only centroids whose (row,col) lie in the interior window: @@ -87,11 +88,10 @@ def block_regionprops_mapoverlap( # --- derive core (pre-overlap) size for THIS block safely --- info = block_info[0] - locs = info["array-location"] # [(r0,r1),(c0,c1),(ch0,ch1)] - core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 + locs = info["array-location"] # [(r0,r1),(c0,c1),(ch0,ch1)] + core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 core_w = int(locs[1][1] - locs[1][0]) - rmin, rmax = depth_h, depth_h + core_h cmin, cmax = depth_w, depth_w + core_w @@ -118,9 +118,9 @@ def block_regionprops_mapoverlap( def detect_with_map_overlap(probs, min_distance, threshold_abs, depth_pixels): - """ - probs: Dask array (H, W, C), float. + """probs: Dask array (H, W, C), float. depth_pixels: halo in pixels for H/W (use >= min_distance and >= any morphology radius). + Returns: scores: da.Array (H, W, C) with mean_intensity at centroids, 0 elsewhere. """ @@ -139,9 +139,9 @@ def detect_with_map_overlap(probs, min_distance, threshold_abs, depth_pixels): ) return scores + def scores_to_ddf(scores: da.Array, x_offset: int, y_offset: int) -> dd.DataFrame: - """ - Convert (H, W, C) scores -> Dask DataFrame with columns: x, y, type, prob. + """Convert (H, W, C) scores -> Dask DataFrame with columns: x, y, type, prob. Uses da.extract(mask, scores) to avoid vindex on Dask indexers. """ # 1) Build a boolean mask of detections @@ -156,9 +156,9 @@ def scores_to_ddf(scores: da.Array, x_offset: int, y_offset: int) -> dd.DataFram # 4) Assemble a Dask DataFrame ddf = dd.concat( [ - dd.from_dask_array(xx.astype("int64"), columns="x"), - dd.from_dask_array(yy.astype("int64"), columns="y"), - dd.from_dask_array(cc.astype("int64"), columns="type"), + dd.from_dask_array(xx.astype("int64"), columns="x"), + dd.from_dask_array(yy.astype("int64"), columns="y"), + dd.from_dask_array(cc.astype("int64"), columns="type"), dd.from_dask_array(ss.astype("float32"), columns="prob"), ], axis=1, @@ -172,8 +172,7 @@ def scores_to_ddf(scores: da.Array, x_offset: int, y_offset: int) -> dd.DataFram def greedy_radius_nms_pandas_all(df: pd.DataFrame, radius: int) -> pd.DataFrame: - """ - Greedy NMS across ALL detections (no per-type grouping). + """Greedy NMS across ALL detections (no per-type grouping). Keeps the highest-prob point, suppresses any other point within 'radius' pixels. Expects columns: ['x','y','type','prob']. @@ -245,7 +244,7 @@ class NucleusDetector(SemanticSegmentor): device (str): Device to run the model on, e.g., 'cpu' or 'cuda:0'. verbose (bool): - Whether to output logging information. + Whether to output logging information. Examples: @@ -307,11 +306,12 @@ def __init__( verbose=verbose, ) - def post_process_patches(self, + def post_process_patches( + self, raw_predictions: da.Array, - prediction_shape: tuple[int, ...], # noqa: ARG002 - prediction_dtype: type, # noqa: ARG002 - **kwargs: Unpack[SemanticSegmentorRunParams], # noqa: ARG002 + prediction_shape: tuple[int, ...], + prediction_dtype: type, + **kwargs: Unpack[SemanticSegmentorRunParams], ) -> da.Array: """Define how to post-process patch predictions. @@ -320,9 +320,8 @@ def post_process_patches(self, """ - pass - - def post_process_wsi(self: NucleusDetector, + def post_process_wsi( + self: NucleusDetector, raw_predictions: da.Array, prediction_shape: tuple[int, ...], prediction_dtype: type, @@ -340,13 +339,11 @@ def post_process_wsi(self: NucleusDetector, print("Chunk size:", raw_predictions.chunks) - - scores = detect_with_map_overlap( probs=raw_predictions, min_distance=3, - threshold_abs=205, # set your threshold - depth_pixels=5 + threshold_abs=205, # set your threshold + depth_pixels=5, ) print("Scores shape:", scores.shape) @@ -362,10 +359,7 @@ def post_process_wsi(self: NucleusDetector, ann_store = dataframe_to_annotation_store(nms_df) ann_store.dump(save_path) - sys.exit() - - def run( self: NucleusDetector, @@ -449,5 +443,3 @@ def run( output_type=output_type, **kwargs, ) - - \ No newline at end of file From 0f8d4fe7e8d80448eb7f2f62aa8d810576747fda Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Mon, 10 Nov 2025 18:49:37 +0000 Subject: [PATCH 04/26] initial prototype --- test.py | 2 + tiatoolbox/models/architecture/mapde.py | 58 +++- tiatoolbox/models/engine/nucleus_detector.py | 334 ++++++++++--------- tiatoolbox/utils/misc.py | 81 ++++- 4 files changed, 298 insertions(+), 177 deletions(-) diff --git a/test.py b/test.py index 3320381bd..4172ad470 100644 --- a/test.py +++ b/test.py @@ -16,4 +16,6 @@ device="cuda", save_dir=pathlib.Path("/media/u1910100/data/overlays/test"), overwrite=True, + output_type="annotationstore", + class_dict={0: "nucleus"}, ) diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index 645a4c2c0..157b9bc4e 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -12,9 +12,16 @@ import torch import torch.nn.functional as F # noqa: N812 from skimage.feature import peak_local_max +import dask.array as da +from tiatoolbox.annotation.storage import SQLiteStore +import pandas as pd from tiatoolbox.models.architecture.micronet import MicroNet - +from tiatoolbox.models.engine.nucleus_detector import ( + peak_detection_mapoverlap, + centroids_map_to_dask_dataframe, + nucleus_detection_nms, +) class MapDe(MicroNet): """Initialize MapDe [1]. @@ -231,30 +238,57 @@ def forward(self: MapDe, input_tensor: torch.Tensor) -> torch.Tensor: logits, _, _, _ = super().forward(input_tensor) out = F.conv2d(logits, self.dist_filter, padding="same") return F.relu(out) + + + + # skipcq: PYL-W0221 # noqa: ERA001 - def postproc(self: MapDe, prediction_map: np.ndarray) -> np.ndarray: - """Post-processing script for MicroNet. + def postproc(self: MapDe, prediction_map: da.Array, prediction_shape: tuple, dtype: np.dtype) -> pd.DataFrame: + """Post-processing script for MapDe. Performs peak detection and extracts coordinates in x, y format. Args: - prediction_map (ndarray): - Input image of type numpy array. + prediction_map (da.array): + Predicted probability map (HxWx1) of the entire input image. Returns: - :class:`numpy.ndarray`: - Pixel-wise nuclear instance segmentation - prediction. + detected_nuclei (pandas.DataFrame): + Detected nuclei coordinates stored in a pandas DataFrame. """ - coordinates = peak_local_max( - np.squeeze(prediction_map[0], axis=2), + # coordinates = peak_local_max( + # np.squeeze(prediction_map[0], axis=2), + # min_distance=self.min_distance, + # threshold_abs=self.threshold_abs, + # exclude_border=False, + # ) + # return np.fliplr(coordinates) + + depth = {0: self.min_distance, 1: self.min_distance, 2: 0} + scores = da.map_overlap( + prediction_map, + peak_detection_mapoverlap, + depth=depth, + boundary=0, + dtype=dtype, + block_info=True, min_distance=self.min_distance, threshold_abs=self.threshold_abs, - exclude_border=False, + depth_h=self.min_distance, + depth_w=self.min_distance, + calculate_probabilities=False, ) - return np.fliplr(coordinates) + ddf = centroids_map_to_dask_dataframe(scores, x_offset=0, y_offset=0) + pandas_df = ddf.compute() + + print("Total detections before NMS:", len(pandas_df)) + nms_df = nucleus_detection_nms(pandas_df, radius=self.min_distance) + print("Total detections after NMS:", len(nms_df)) + + return nms_df + @staticmethod def infer_batch( diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 1fdb923d3..4fdcd55f7 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -21,47 +21,34 @@ SemanticSegmentorRunParams, ) from tiatoolbox.models.models_abc import ModelABC +from tiatoolbox.annotation import Annotation, SQLiteStore, AnnotationStore +from tiatoolbox.utils.misc import df_to_store_nucleus_detector +from tiatoolbox import logger if TYPE_CHECKING: # pragma: no cover import os - - from tiatoolbox.annotation import AnnotationStore from tiatoolbox.models.engine.io_config import IOSegmentorConfig from tiatoolbox.models.models_abc import ModelABC from tiatoolbox.wsicore import WSIReader -def dataframe_to_annotation_store( - df: pd.DataFrame, -) -> AnnotationStore: - """Convert a pandas DataFrame with columns ['x','y','type','prob'] - to an AnnotationStore and save to disk. - """ - from tiatoolbox.annotation import Annotation, SQLiteStore - - ann_store = SQLiteStore() - for _, row in df.iterrows(): - x = int(row["x"]) - y = int(row["y"]) - obj_type = int(row["type"]) - prob = float(row["prob"]) - ann = Annotation( - geometry=Point(x, y), properties={"type": "nuclei", "probability": prob} - ) - ann_store.append(ann) - return ann_store - - -def processed_mask_fn( - img2d: np.ndarray, min_distance: int, threshold_abs: float +def probability_to_peak_map( + img2d: np.ndarray, min_distance: int, threshold_abs: float, threshold_rel: float = 0.0 ) -> np.ndarray: - """Build a boolean mask (H, W) of objects from a 2D probability map. - Here: 1-pixel objects from peak_local_max. Add morphology inside if you need blobs. + """Build a boolean mask (H, W) of objects from a 2D probability map using peak_local_max. + + Args: + img2d (np.ndarray): 2D probability map. + min_distance (int): Minimum distance between peaks. + threshold_abs (float): Absolute threshold for peak detection. + threshold_rel (float, optional): Relative threshold for peak detection. Defaults to 0.0. + Returns: + mask (np.ndarray): Boolean mask (H, W) with True at peak locations. """ H, W = img2d.shape mask = np.zeros((H, W), dtype=bool) coords = peak_local_max( - img2d, min_distance=min_distance, threshold_abs=threshold_abs + img2d, min_distance=min_distance, threshold_abs=threshold_abs, threshold_rel=threshold_rel ) if coords.size: r, c = coords[:, 0], coords[:, 1] @@ -69,20 +56,33 @@ def processed_mask_fn( return mask -def block_regionprops_mapoverlap( +def peak_detection_mapoverlap( block: np.ndarray, block_info, min_distance: int, threshold_abs: float, depth_h: int, depth_w: int, + calculate_probabilities: bool = False, ) -> np.ndarray: - """Runs inside da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). - Builds a processed mask per channel, runs label+regionprops, and writes - region score (mean_intensity) at centroid pixels. Keeps only centroids - whose (row,col) lie in the interior window: + """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). + Builds a processed mask per channel, runs peak_local_max then + label+regionprops, and writes probability (mean_intensity) at centroid pixels. + Keeps only centroids whose (row,col) lie in the interior window: rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) Returns same spatial shape as input block: (h_pad, w_pad, C), float32. + + Args: + block: NumPy array (H, W, C) with padded block data. + block_info: Dask block info dict. + min_distance: Minimum distance in pixels between peaks. + threshold_abs: Minimum absolute threshold for peak detection. + depth_h: Halo size in pixels for height (rows). + depth_w: Halo size in pixels for width (cols). + calculate_probabilities: If True, write mean_intensity at centroids; + else write 1.0 at centroids. + Returns: + out: NumPy array (H, W, C) with probabilities at centroids, 0 elsewhere. """ H, W, C = block.shape @@ -99,7 +99,7 @@ def block_regionprops_mapoverlap( for ch in range(C): img = np.asarray(block[..., ch]) # NumPy 2D view - pmask = processed_mask_fn(img, min_distance, threshold_abs) + pmask = probability_to_peak_map(img, min_distance, threshold_abs) if not pmask.any(): continue @@ -112,12 +112,15 @@ def block_regionprops_mapoverlap( rr = int(round(r)) cc = int(round(c)) if 0 <= rr < H and 0 <= cc < W: - out[rr, cc, ch] = float(reg.mean_intensity) + if calculate_probabilities: + out[rr, cc, ch] = float(reg.mean_intensity) + else: + out[rr, cc, ch] = 1.0 return out -def detect_with_map_overlap(probs, min_distance, threshold_abs, depth_pixels): +def detection_with_map_overlap(probs: da.Array, min_distance: int, threshold_abs: float, depth_pixels: int) -> da.Array: """probs: Dask array (H, W, C), float. depth_pixels: halo in pixels for H/W (use >= min_distance and >= any morphology radius). @@ -127,7 +130,7 @@ def detect_with_map_overlap(probs, min_distance, threshold_abs, depth_pixels): depth = {0: depth_pixels, 1: depth_pixels, 2: 0} scores = da.map_overlap( probs, - block_regionprops_mapoverlap, + peak_detection_mapoverlap, depth=depth, boundary=0, dtype=np.float32, @@ -140,20 +143,27 @@ def detect_with_map_overlap(probs, min_distance, threshold_abs, depth_pixels): return scores -def scores_to_ddf(scores: da.Array, x_offset: int, y_offset: int) -> dd.DataFrame: - """Convert (H, W, C) scores -> Dask DataFrame with columns: x, y, type, prob. - Uses da.extract(mask, scores) to avoid vindex on Dask indexers. +def centroids_map_to_dask_dataframe(scores: da.Array, x_offset: int = 0, y_offset: int = 0) -> dd.DataFrame: + """Convert centroid map (H, W, C) into a Dask DataFrame with columns: x, y, type, prob. + + Args: + scores: Dask array (H, W, C) with probabilities at centroids, 0 elsewhere. + x_offset: global x offset to add to all x coordinates. + y_offset: global y offset to add to all y coordinates. + Returns: + ddf: Dask DataFrame with columns: x, y, type, prob. """ # 1) Build a boolean mask of detections + mask = scores > 0 + # 2) Get coordinates and class of detections (lazy 1D Dask arrays) - # 2) Global coordinates of detections (lazy 1D Dask arrays) yy, xx, cc = da.nonzero(mask) + # 3) Get probability values at those detections (lazy) — same length as yy/xx/cc - # 3) Values at those detections (lazy) — same length as yy/xx/cc ss = da.extract(mask, scores) - # 4) Assemble a Dask DataFrame + # all columns are row-wise aligned (all built from arrays of the same length). ddf = dd.concat( [ dd.from_dask_array(xx.astype("int64"), columns="x"), @@ -162,24 +172,38 @@ def scores_to_ddf(scores: da.Array, x_offset: int, y_offset: int) -> dd.DataFram dd.from_dask_array(ss.astype("float32"), columns="prob"), ], axis=1, + ignore_unknown_divisions=True ) - # 5) Apply global offsets (if your WSI/crop needs them) - ddf["x"] = ddf["x"] + int(x_offset) - ddf["y"] = ddf["y"] + int(y_offset) + # 5) Apply global offsets (if needed) + if x_offset != 0: + ddf["x"] = ddf["x"] + int(x_offset) + if y_offset != 0: + ddf["y"] = ddf["y"] + int(y_offset) return ddf -def greedy_radius_nms_pandas_all(df: pd.DataFrame, radius: int) -> pd.DataFrame: - """Greedy NMS across ALL detections (no per-type grouping). - Keeps the highest-prob point, suppresses any other point within 'radius' pixels. +def nucleus_detection_nms(df: pd.DataFrame, radius: int, overlap_threshold:float = 0.5) -> pd.DataFrame: + """Greedy NMS across ALL detections. + + Keeps the highest-prob detection, removes any other point within 'radius' pixels > overlap_threshold. + Expects dataframe columns: ['x','y','type','prob']. - Expects columns: ['x','y','type','prob']. - Returns: filtered DataFrame with same columns/dtypes. + Args: + df: pandas DataFrame of detections. + radius: radius in pixels for suppression. + overlap_threshold: float in [0,1], fraction of radius for suppression. + + Returns: + filtered DataFrame with same columns/dtypes. """ if df.empty: return df.copy() + if radius <= 0: + raise ValueError("radius must be > 0") + if not (0.0 < overlap_threshold <= 1.0): + raise ValueError("overlap_threshold must be in (0.0, 1.0]") # Sort by descending probability (highest priority first) sub = df.sort_values("prob", ascending=False).reset_index(drop=True) @@ -188,28 +212,48 @@ def greedy_radius_nms_pandas_all(df: pd.DataFrame, radius: int) -> pd.DataFrame: coords = sub[["x", "y"]].to_numpy(dtype=np.float64) r2 = float(radius) * float(radius) + coords = sub[["x", "y"]].to_numpy(dtype=np.float64) + r = float(radius) + two_r = 2.0 * r + two_r2 = (two_r * two_r) # distance^2 cutoff for any overlap + suppressed = np.zeros(len(sub), dtype=bool) keep_idx = [] for i in range(len(sub)): if suppressed[i]: continue + keep_idx.append(i) - # Suppress all remaining within radius of the kept point + # Vectorised distances to all points dx = coords[:, 0] - coords[i, 0] dy = coords[:, 1] - coords[i, 1] - close = (dx * dx + dy * dy) <= r2 - suppressed |= close + d2 = dx * dx + dy * dy + + # Only points with d < 2r can have nonzero overlap + cand = (d2 <= two_r2) + cand[i] = False # don't suppress the kept point itself + if not np.any(cand): + continue + + d = np.sqrt(d2[cand]) - kept = sub.iloc[keep_idx].copy() - # Ensure stable dtypes - kept["x"] = kept["x"].astype("int64") - kept["y"] = kept["y"].astype("int64") - kept["type"] = kept["type"].astype("int64") - kept["prob"] = kept["prob"].astype(df["prob"].dtype) + # Safe cosine argument = (distance ÷ diameter), Clamp for numerical stability + u = np.clip(d / (2.0 * r), -1.0, 1.0) + # Exact intersection area of two equal-radius circles. + inter = 2.0 * (r * r) * np.arccos(u) - 0.5 * d * np.sqrt(np.clip(4.0 * r * r - d * d, 0.0, None)) + union = 2.0 * np.pi * (r * r) - inter + iou = inter / union + + # Suppress candidates whose IoU exceeds threshold + idx_cand = np.where(cand)[0] + to_suppress = idx_cand[iou >= overlap_threshold] + suppressed[to_suppress] = True + + kept = sub.iloc[keep_idx].copy() return kept @@ -307,18 +351,31 @@ def __init__( ) def post_process_patches( - self, + self: NucleusDetector, raw_predictions: da.Array, prediction_shape: tuple[int, ...], prediction_dtype: type, **kwargs: Unpack[SemanticSegmentorRunParams], - ) -> da.Array: + ) -> list[pd.DataFrame]: """Define how to post-process patch predictions. + Args: + raw_predictions (da.Array): The raw predictions from the model. + prediction_shape (tuple[int, ...]): The shape of the predictions. + prediction_dtype (type): The data type of the predictions. Returns: - A function that process the raw model predictions on patches. + A list of DataFrames containing the post-processed predictions for each patch. """ + _ = kwargs.get("return_probabilities") + _ = prediction_shape + _ = prediction_dtype + + batch_predictions = [] + for i in range(raw_predictions.shape[0]): + batch_predictions.append(self.model.postproc_func(raw_predictions[i])) + return batch_predictions + def post_process_wsi( self: NucleusDetector, @@ -326,120 +383,69 @@ def post_process_wsi( prediction_shape: tuple[int, ...], prediction_dtype: type, **kwargs: Unpack[SemanticSegmentorRunParams], - ) -> da.Array: + ) -> pd.DataFrame: """Define how to post-process WSI predictions. Returns: - A function that process the raw model predictions on WSI. + A DataFrame containing the post-processed predictions for the WSI. """ - print("Post processing WSI predictions in NucleusDetector") - - print("Raw probabilities shape:", raw_predictions.shape) - - print("Chunk size:", raw_predictions.chunks) - - scores = detect_with_map_overlap( - probs=raw_predictions, - min_distance=3, - threshold_abs=205, # set your threshold - depth_pixels=5, - ) - print("Scores shape:", scores.shape) + logger.info("Post processing WSI predictions in NucleusDetector") - # compact table: - ddf = scores_to_ddf(scores, x_offset=0, y_offset=0) - pandas_df = ddf.compute() + logger.info(f"Raw probabilities shape: {prediction_shape}") + logger.info(f"Raw probabilities dtype: {prediction_dtype}") + logger.info(f"Chunk size: {raw_predictions.chunks}") - print("Total detections before NMS:", len(pandas_df)) - nms_df = greedy_radius_nms_pandas_all(pandas_df, radius=3) - print("Total detections after NMS:", len(nms_df)) + detection_df = self.model.postproc(raw_predictions, prediction_shape, prediction_dtype) - save_path = "/media/u1910100/data/overlays/test/mapde_conic.db" - ann_store = dataframe_to_annotation_store(nms_df) - ann_store.dump(save_path) - sys.exit() + return detection_df - def run( + def save_predictions( self: NucleusDetector, - images: list[os.PathLike | Path | WSIReader] | np.ndarray, - masks: list[os.PathLike | Path] | np.ndarray | None = None, - labels: list | None = None, - ioconfig: IOSegmentorConfig | None = None, - *, - patch_mode: bool = True, - save_dir: os.PathLike | Path | None = None, - overwrite: bool = False, - output_type: str = "dict", + processed_predictions: dict, + output_type: str, + save_path: Path | None = None, **kwargs: Unpack[SemanticSegmentorRunParams], - ) -> AnnotationStore | Path | str | dict | list[Path]: - """Run the semantic segmentation engine on input images. - - This method orchestrates the full inference pipeline, including preprocessing, - model inference, post-processing, and saving results. It supports both - patch-level and whole slide image (WSI) modes. - - Args: - images (list[PathLike | WSIReader] | np.ndarray): - Input images or patches. Can be a list of file paths, WSIReader objects, - or a NumPy array of image patches. - masks (list[PathLike] | np.ndarray | None): - Optional masks for WSI processing. Only used when `patch_mode` is False. - labels (list | None): - Optional labels for input images. Only one label per image is supported. - ioconfig (IOSegmentorConfig | None): - IO configuration for patch extraction and resolution. - patch_mode (bool): - Whether to treat input as patches (`True`) or WSIs (`False`). Default - is True. - save_dir (PathLike | None): - Directory to save output files. Required for WSI mode. - overwrite (bool): - Whether to overwrite existing output files. Default is False. - output_type (str): - Desired output format: "dict", "zarr", or "annotationstore". Default - is "dict". - **kwargs (SemanticSegmentorRunParams): - Additional runtime parameters to update engine attributes. + ) -> AnnotationStore | Path | list[Path]: + """Define how to save the processed predictions. Returns: - AnnotationStore | Path | str | dict | list[Path]: - - If `patch_mode` is True: returns predictions or path to saved output. - - If `patch_mode` is False: returns a dictionary mapping each WSI - to its output path. - - Examples: - >>> wsis = ['wsi1.svs', 'wsi2.svs'] - >>> image_patches = [np.ndarray, np.ndarray] - >>> segmentor = SemanticSegmentor(model="fcn-tissue_mask") - >>> output = segmentor.run(image_patches, patch_mode=True) - >>> output - ... "/path/to/Output.db" - - >>> output = segmentor.run( - ... image_patches, - ... patch_mode=True, - ... output_type="zarr" - ... ) - >>> output - ... "/path/to/Output.zarr" - - >>> output = segmentor.run(wsis, patch_mode=False) - >>> output.keys() - ... ['wsi1.svs', 'wsi2.svs'] - >>> output['wsi1.svs'] - ... "/path/to/wsi1.db" + A function that saves the processed predictions. """ - return super().run( - images=images, - masks=masks, - labels=labels, - ioconfig=ioconfig, - patch_mode=patch_mode, - save_dir=save_dir, - overwrite=overwrite, - output_type=output_type, - **kwargs, - ) + logger.info("Saving predictions in NucleusDetector") + if output_type != "annotationstore": + logger.warning( + f"NucleusDetector only supports output_type='annotationstore'. " + f"Overriding output_type='{output_type}' to 'annotationstore'." + ) + output_type = "annotationstore" + scale_factor = kwargs.get("scale_factor", (1.0, 1.0)) + class_dict = kwargs.get("class_dict") + + if self.patch_mode: + save_paths = [] + for i, predictions in enumerate(processed_predictions["predictions"]): + if isinstance(self.images[i], Path): + output_path = save_path.parent / (self.images[i].stem + ".db") + else: + output_path = save_path.parent / (str(i) + ".db") + + out_file = df_to_store_nucleus_detector( + predictions, + scale_factor=scale_factor, + class_dict=class_dict, + save_path=output_path, + ) + + save_paths.append(out_file) + return save_paths + else: + return df_to_store_nucleus_detector( + processed_predictions['predictions'], + scale_factor=scale_factor, + save_path=save_path, + class_dict=class_dict, + ) + \ No newline at end of file diff --git a/tiatoolbox/utils/misc.py b/tiatoolbox/utils/misc.py index 56c6a3ea7..afc6923a0 100644 --- a/tiatoolbox/utils/misc.py +++ b/tiatoolbox/utils/misc.py @@ -21,7 +21,7 @@ import zarr from filelock import FileLock from shapely.affinity import translate -from shapely.geometry import Polygon +from shapely.geometry import Polygon, Point from shapely.geometry import shape as feature2geometry from skimage import exposure from tqdm import notebook as tqdm_notebook @@ -1340,6 +1340,85 @@ def process_contours( return annotations_list +def df_to_store_nucleus_detector( + df: pd.DataFrame, + scale_factor: tuple[float, float], + save_path: Path | None = None, + class_dict: dict | None = None, + batch_size: int = 50_000 +) -> SQLiteStore | Path: + """ + Convert a pandas DataFrame with columns ['x','y','type','prob'] + into an Annotation SQLiteStore efficiently using append_many(). + + Args: + df (pd.DataFrame): + A pandas DataFrame with columns ['x','y','type','prob']. + save_path (Path, optional): + Optional Output directory to save the Annotation + Store results. + scale_factor (tuple[float, float]): + The scale factor to use when saving the + annotations. All coordinates will be multiplied by this factor to allow + conversion of annotations saved at non-baseline resolution to baseline. + Should be model_mpp/slide_mpp. + class_dict (dict): + Optional dictionary mapping class indices to class names. + batch_size (int): + Number of annotations to process in each batch. + + Returns: + (SQLiteStore or Path): + An SQLiteStore containing Annotations for each nucleus + or Path to file storing SQLiteStore containing Annotations + for each nucleus. + """ + + # 1) Select & coerce dtypes once (compact + avoids per-row casts) + x = df["x"].to_numpy(dtype=np.int64, copy=False) + y = df["y"].to_numpy(dtype=np.int64, copy=False) + t = df["type"].to_numpy(dtype=np.int64, copy=False) + p = df["prob"].to_numpy(dtype=np.float32, copy=False) + + x_scaled = np.rint(x * scale_factor[0]).astype(np.int64, copy=False) + y_scaled = np.rint(y * scale_factor[1]).astype(np.int64, copy=False) + + store = SQLiteStore() + + def make_points(xb, yb): + return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] + + if class_dict is None: + # identity over the actually present types (robust if types aren't 0..K) + unique_types = np.unique(t) + class_dict = {int(k): int(k) for k in unique_types} + + n = len(df) + for i in range(0, n, batch_size): + j = min(i + batch_size, n) + xb, yb, tb, pb = x_scaled[i:j], y_scaled[i:j], t[i:j], p[i:j] + + pts = make_points(xb, yb) # array/list of Points + + anns = [Annotation(geometry=pt, + properties={"type": class_dict.get(int(tt), int(tt)), "probability": float(pp)}) + for pt, tt, pp in zip(pts, tb, pb)] + + store.append_many(anns) + + # # if a save director is provided, then dump store into a file + if save_path: + # ensure parent directory exists + save_path.parent.absolute().mkdir(parents=True, exist_ok=True) + # ensure proper db extension + save_path = save_path.parent.absolute() / (save_path.stem + ".db") + store.commit() + store.dump(save_path) + return save_path + + return store + + def dict_to_store_semantic_segmentor( patch_output: dict | zarr.Group, scale_factor: tuple[float, float], From d42b78a22a15d66e31a791cf51802234e9a25dd0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 18:50:03 +0000 Subject: [PATCH 05/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tiatoolbox/models/architecture/mapde.py | 18 ++--- tiatoolbox/models/engine/nucleus_detector.py | 76 +++++++++++--------- tiatoolbox/utils/misc.py | 25 ++++--- 3 files changed, 64 insertions(+), 55 deletions(-) diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index 157b9bc4e..e7f7f9e76 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -8,21 +8,20 @@ from __future__ import annotations +import dask.array as da import numpy as np +import pandas as pd import torch import torch.nn.functional as F # noqa: N812 -from skimage.feature import peak_local_max -import dask.array as da -from tiatoolbox.annotation.storage import SQLiteStore -import pandas as pd from tiatoolbox.models.architecture.micronet import MicroNet from tiatoolbox.models.engine.nucleus_detector import ( - peak_detection_mapoverlap, centroids_map_to_dask_dataframe, nucleus_detection_nms, + peak_detection_mapoverlap, ) + class MapDe(MicroNet): """Initialize MapDe [1]. @@ -238,13 +237,11 @@ def forward(self: MapDe, input_tensor: torch.Tensor) -> torch.Tensor: logits, _, _, _ = super().forward(input_tensor) out = F.conv2d(logits, self.dist_filter, padding="same") return F.relu(out) - - - - # skipcq: PYL-W0221 # noqa: ERA001 - def postproc(self: MapDe, prediction_map: da.Array, prediction_shape: tuple, dtype: np.dtype) -> pd.DataFrame: + def postproc( + self: MapDe, prediction_map: da.Array, prediction_shape: tuple, dtype: np.dtype + ) -> pd.DataFrame: """Post-processing script for MapDe. Performs peak detection and extracts coordinates in x, y format. @@ -289,7 +286,6 @@ def postproc(self: MapDe, prediction_map: da.Array, prediction_shape: tuple, dty return nms_df - @staticmethod def infer_batch( model: torch.nn.Module, diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 4fdcd55f7..744033f36 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -2,8 +2,6 @@ from __future__ import annotations -import os -import sys from pathlib import Path from typing import TYPE_CHECKING, Unpack @@ -11,44 +9,46 @@ import dask.dataframe as dd import numpy as np import pandas as pd -from shapely.geometry import Point from skimage.feature import peak_local_max from skimage.measure import label, regionprops -from tiatoolbox.models.engine.io_config import IOSegmentorConfig +from tiatoolbox import logger +from tiatoolbox.annotation import AnnotationStore from tiatoolbox.models.engine.semantic_segmentor import ( SemanticSegmentor, SemanticSegmentorRunParams, ) from tiatoolbox.models.models_abc import ModelABC -from tiatoolbox.annotation import Annotation, SQLiteStore, AnnotationStore from tiatoolbox.utils.misc import df_to_store_nucleus_detector -from tiatoolbox import logger if TYPE_CHECKING: # pragma: no cover - import os - from tiatoolbox.models.engine.io_config import IOSegmentorConfig from tiatoolbox.models.models_abc import ModelABC - from tiatoolbox.wsicore import WSIReader def probability_to_peak_map( - img2d: np.ndarray, min_distance: int, threshold_abs: float, threshold_rel: float = 0.0 + img2d: np.ndarray, + min_distance: int, + threshold_abs: float, + threshold_rel: float = 0.0, ) -> np.ndarray: """Build a boolean mask (H, W) of objects from a 2D probability map using peak_local_max. - + Args: img2d (np.ndarray): 2D probability map. min_distance (int): Minimum distance between peaks. threshold_abs (float): Absolute threshold for peak detection. threshold_rel (float, optional): Relative threshold for peak detection. Defaults to 0.0. + Returns: mask (np.ndarray): Boolean mask (H, W) with True at peak locations. """ H, W = img2d.shape mask = np.zeros((H, W), dtype=bool) coords = peak_local_max( - img2d, min_distance=min_distance, threshold_abs=threshold_abs, threshold_rel=threshold_rel + img2d, + min_distance=min_distance, + threshold_abs=threshold_abs, + threshold_rel=threshold_rel, ) if coords.size: r, c = coords[:, 0], coords[:, 1] @@ -67,7 +67,7 @@ def peak_detection_mapoverlap( ) -> np.ndarray: """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). Builds a processed mask per channel, runs peak_local_max then - label+regionprops, and writes probability (mean_intensity) at centroid pixels. + label+regionprops, and writes probability (mean_intensity) at centroid pixels. Keeps only centroids whose (row,col) lie in the interior window: rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) Returns same spatial shape as input block: (h_pad, w_pad, C), float32. @@ -81,6 +81,7 @@ def peak_detection_mapoverlap( depth_w: Halo size in pixels for width (cols). calculate_probabilities: If True, write mean_intensity at centroids; else write 1.0 at centroids. + Returns: out: NumPy array (H, W, C) with probabilities at centroids, 0 elsewhere. """ @@ -120,7 +121,9 @@ def peak_detection_mapoverlap( return out -def detection_with_map_overlap(probs: da.Array, min_distance: int, threshold_abs: float, depth_pixels: int) -> da.Array: +def detection_with_map_overlap( + probs: da.Array, min_distance: int, threshold_abs: float, depth_pixels: int +) -> da.Array: """probs: Dask array (H, W, C), float. depth_pixels: halo in pixels for H/W (use >= min_distance and >= any morphology radius). @@ -143,18 +146,21 @@ def detection_with_map_overlap(probs: da.Array, min_distance: int, threshold_abs return scores -def centroids_map_to_dask_dataframe(scores: da.Array, x_offset: int = 0, y_offset: int = 0) -> dd.DataFrame: +def centroids_map_to_dask_dataframe( + scores: da.Array, x_offset: int = 0, y_offset: int = 0 +) -> dd.DataFrame: """Convert centroid map (H, W, C) into a Dask DataFrame with columns: x, y, type, prob. Args: scores: Dask array (H, W, C) with probabilities at centroids, 0 elsewhere. x_offset: global x offset to add to all x coordinates. y_offset: global y offset to add to all y coordinates. + Returns: ddf: Dask DataFrame with columns: x, y, type, prob. """ # 1) Build a boolean mask of detections - + mask = scores > 0 # 2) Get coordinates and class of detections (lazy 1D Dask arrays) @@ -172,7 +178,7 @@ def centroids_map_to_dask_dataframe(scores: da.Array, x_offset: int = 0, y_offse dd.from_dask_array(ss.astype("float32"), columns="prob"), ], axis=1, - ignore_unknown_divisions=True + ignore_unknown_divisions=True, ) # 5) Apply global offsets (if needed) @@ -184,7 +190,9 @@ def centroids_map_to_dask_dataframe(scores: da.Array, x_offset: int = 0, y_offse return ddf -def nucleus_detection_nms(df: pd.DataFrame, radius: int, overlap_threshold:float = 0.5) -> pd.DataFrame: +def nucleus_detection_nms( + df: pd.DataFrame, radius: int, overlap_threshold: float = 0.5 +) -> pd.DataFrame: """Greedy NMS across ALL detections. Keeps the highest-prob detection, removes any other point within 'radius' pixels > overlap_threshold. @@ -215,7 +223,7 @@ def nucleus_detection_nms(df: pd.DataFrame, radius: int, overlap_threshold:float coords = sub[["x", "y"]].to_numpy(dtype=np.float64) r = float(radius) two_r = 2.0 * r - two_r2 = (two_r * two_r) # distance^2 cutoff for any overlap + two_r2 = two_r * two_r # distance^2 cutoff for any overlap suppressed = np.zeros(len(sub), dtype=bool) keep_idx = [] @@ -232,18 +240,19 @@ def nucleus_detection_nms(df: pd.DataFrame, radius: int, overlap_threshold:float d2 = dx * dx + dy * dy # Only points with d < 2r can have nonzero overlap - cand = (d2 <= two_r2) + cand = d2 <= two_r2 cand[i] = False # don't suppress the kept point itself if not np.any(cand): continue d = np.sqrt(d2[cand]) - # Safe cosine argument = (distance ÷ diameter), Clamp for numerical stability u = np.clip(d / (2.0 * r), -1.0, 1.0) # Exact intersection area of two equal-radius circles. - inter = 2.0 * (r * r) * np.arccos(u) - 0.5 * d * np.sqrt(np.clip(4.0 * r * r - d * d, 0.0, None)) + inter = 2.0 * (r * r) * np.arccos(u) - 0.5 * d * np.sqrt( + np.clip(4.0 * r * r - d * d, 0.0, None) + ) union = 2.0 * np.pi * (r * r) - inter iou = inter / union @@ -252,7 +261,7 @@ def nucleus_detection_nms(df: pd.DataFrame, radius: int, overlap_threshold:float idx_cand = np.where(cand)[0] to_suppress = idx_cand[iou >= overlap_threshold] suppressed[to_suppress] = True - + kept = sub.iloc[keep_idx].copy() return kept @@ -363,6 +372,7 @@ def post_process_patches( raw_predictions (da.Array): The raw predictions from the model. prediction_shape (tuple[int, ...]): The shape of the predictions. prediction_dtype (type): The data type of the predictions. + Returns: A list of DataFrames containing the post-processed predictions for each patch. @@ -376,7 +386,6 @@ def post_process_patches( batch_predictions.append(self.model.postproc_func(raw_predictions[i])) return batch_predictions - def post_process_wsi( self: NucleusDetector, raw_predictions: da.Array, @@ -396,8 +405,9 @@ def post_process_wsi( logger.info(f"Raw probabilities dtype: {prediction_dtype}") logger.info(f"Chunk size: {raw_predictions.chunks}") - detection_df = self.model.postproc(raw_predictions, prediction_shape, prediction_dtype) - + detection_df = self.model.postproc( + raw_predictions, prediction_shape, prediction_dtype + ) return detection_df @@ -441,11 +451,9 @@ def save_predictions( save_paths.append(out_file) return save_paths - else: - return df_to_store_nucleus_detector( - processed_predictions['predictions'], - scale_factor=scale_factor, - save_path=save_path, - class_dict=class_dict, - ) - \ No newline at end of file + return df_to_store_nucleus_detector( + processed_predictions["predictions"], + scale_factor=scale_factor, + save_path=save_path, + class_dict=class_dict, + ) diff --git a/tiatoolbox/utils/misc.py b/tiatoolbox/utils/misc.py index afc6923a0..b6081cb74 100644 --- a/tiatoolbox/utils/misc.py +++ b/tiatoolbox/utils/misc.py @@ -21,7 +21,7 @@ import zarr from filelock import FileLock from shapely.affinity import translate -from shapely.geometry import Polygon, Point +from shapely.geometry import Point, Polygon from shapely.geometry import shape as feature2geometry from skimage import exposure from tqdm import notebook as tqdm_notebook @@ -1345,10 +1345,9 @@ def df_to_store_nucleus_detector( scale_factor: tuple[float, float], save_path: Path | None = None, class_dict: dict | None = None, - batch_size: int = 50_000 + batch_size: int = 50_000, ) -> SQLiteStore | Path: - """ - Convert a pandas DataFrame with columns ['x','y','type','prob'] + """Convert a pandas DataFrame with columns ['x','y','type','prob'] into an Annotation SQLiteStore efficiently using append_many(). Args: @@ -1356,7 +1355,7 @@ def df_to_store_nucleus_detector( A pandas DataFrame with columns ['x','y','type','prob']. save_path (Path, optional): Optional Output directory to save the Annotation - Store results. + Store results. scale_factor (tuple[float, float]): The scale factor to use when saving the annotations. All coordinates will be multiplied by this factor to allow @@ -1373,7 +1372,6 @@ def df_to_store_nucleus_detector( or Path to file storing SQLiteStore containing Annotations for each nucleus. """ - # 1) Select & coerce dtypes once (compact + avoids per-row casts) x = df["x"].to_numpy(dtype=np.int64, copy=False) y = df["y"].to_numpy(dtype=np.int64, copy=False) @@ -1387,7 +1385,7 @@ def df_to_store_nucleus_detector( def make_points(xb, yb): return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] - + if class_dict is None: # identity over the actually present types (robust if types aren't 0..K) unique_types = np.unique(t) @@ -1400,9 +1398,16 @@ def make_points(xb, yb): pts = make_points(xb, yb) # array/list of Points - anns = [Annotation(geometry=pt, - properties={"type": class_dict.get(int(tt), int(tt)), "probability": float(pp)}) - for pt, tt, pp in zip(pts, tb, pb)] + anns = [ + Annotation( + geometry=pt, + properties={ + "type": class_dict.get(int(tt), int(tt)), + "probability": float(pp), + }, + ) + for pt, tt, pp in zip(pts, tb, pb) + ] store.append_many(anns) From b7f829ccd2f616a57fdaaff514a09961a88e101d Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Mon, 10 Nov 2025 18:57:23 +0000 Subject: [PATCH 06/26] clean up --- tiatoolbox/models/architecture/mapde.py | 31 +++++++++++--------- tiatoolbox/models/engine/nucleus_detector.py | 4 +-- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index e7f7f9e76..b512dd85b 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -14,6 +14,10 @@ import torch import torch.nn.functional as F # noqa: N812 +import dask.array as da +import pandas as pd +from tiatoolbox import logger + from tiatoolbox.models.architecture.micronet import MicroNet from tiatoolbox.models.engine.nucleus_detector import ( centroids_map_to_dask_dataframe, @@ -243,26 +247,24 @@ def postproc( self: MapDe, prediction_map: da.Array, prediction_shape: tuple, dtype: np.dtype ) -> pd.DataFrame: """Post-processing script for MapDe. - - Performs peak detection and extracts coordinates in x, y format. + + Post-process predicted probability map of the input image. + Performs peak detection, then non-maximum suppression. + Returns a pandas DataFrame containing detected nuclei coordinates [x, y, type, prob]. Args: prediction_map (da.array): Predicted probability map (HxWx1) of the entire input image. + prediction_shape (tuple): + Shape of the prediction map. + dtype (np.dtype): + Data type of the prediction map. Returns: detected_nuclei (pandas.DataFrame): Detected nuclei coordinates stored in a pandas DataFrame. """ - # coordinates = peak_local_max( - # np.squeeze(prediction_map[0], axis=2), - # min_distance=self.min_distance, - # threshold_abs=self.threshold_abs, - # exclude_border=False, - # ) - # return np.fliplr(coordinates) - depth = {0: self.min_distance, 1: self.min_distance, 2: 0} scores = da.map_overlap( prediction_map, @@ -280,11 +282,12 @@ def postproc( ddf = centroids_map_to_dask_dataframe(scores, x_offset=0, y_offset=0) pandas_df = ddf.compute() - print("Total detections before NMS:", len(pandas_df)) - nms_df = nucleus_detection_nms(pandas_df, radius=self.min_distance) - print("Total detections after NMS:", len(nms_df)) + logger.info(f"Total detections before NMS: {len(pandas_df)}") + detected_nuclei = nucleus_detection_nms(pandas_df, radius=self.min_distance) + logger.info(f"Total detections after NMS: {len(detected_nuclei)}") + + return detected_nuclei - return nms_df @staticmethod def infer_batch( diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 744033f36..97b247a03 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -11,20 +11,20 @@ import pandas as pd from skimage.feature import peak_local_max from skimage.measure import label, regionprops - from tiatoolbox import logger -from tiatoolbox.annotation import AnnotationStore from tiatoolbox.models.engine.semantic_segmentor import ( SemanticSegmentor, SemanticSegmentorRunParams, ) from tiatoolbox.models.models_abc import ModelABC +from tiatoolbox.annotation import AnnotationStore from tiatoolbox.utils.misc import df_to_store_nucleus_detector if TYPE_CHECKING: # pragma: no cover from tiatoolbox.models.models_abc import ModelABC + def probability_to_peak_map( img2d: np.ndarray, min_distance: int, From cba5fd53436fad3baeece8cd6704830241e4bf46 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 18:57:50 +0000 Subject: [PATCH 07/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tiatoolbox/models/architecture/mapde.py | 6 +----- tiatoolbox/models/engine/nucleus_detector.py | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index b512dd85b..c8cde9ce2 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -14,10 +14,7 @@ import torch import torch.nn.functional as F # noqa: N812 -import dask.array as da -import pandas as pd from tiatoolbox import logger - from tiatoolbox.models.architecture.micronet import MicroNet from tiatoolbox.models.engine.nucleus_detector import ( centroids_map_to_dask_dataframe, @@ -247,7 +244,7 @@ def postproc( self: MapDe, prediction_map: da.Array, prediction_shape: tuple, dtype: np.dtype ) -> pd.DataFrame: """Post-processing script for MapDe. - + Post-process predicted probability map of the input image. Performs peak detection, then non-maximum suppression. Returns a pandas DataFrame containing detected nuclei coordinates [x, y, type, prob]. @@ -288,7 +285,6 @@ def postproc( return detected_nuclei - @staticmethod def infer_batch( model: torch.nn.Module, diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 97b247a03..744033f36 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -11,20 +11,20 @@ import pandas as pd from skimage.feature import peak_local_max from skimage.measure import label, regionprops + from tiatoolbox import logger +from tiatoolbox.annotation import AnnotationStore from tiatoolbox.models.engine.semantic_segmentor import ( SemanticSegmentor, SemanticSegmentorRunParams, ) from tiatoolbox.models.models_abc import ModelABC -from tiatoolbox.annotation import AnnotationStore from tiatoolbox.utils.misc import df_to_store_nucleus_detector if TYPE_CHECKING: # pragma: no cover from tiatoolbox.models.models_abc import ModelABC - def probability_to_peak_map( img2d: np.ndarray, min_distance: int, From f2cdcc4185b9cee52fcba9bca20742165ba9fb66 Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Tue, 11 Nov 2025 20:22:15 +0000 Subject: [PATCH 08/26] update --- test.py | 12 ++- tiatoolbox/data/pretrained_model.yaml | 8 +- tiatoolbox/models/architecture/mapde.py | 11 ++- tiatoolbox/models/architecture/sccnn.py | 85 +++++++++++++++----- tiatoolbox/models/engine/nucleus_detector.py | 47 +++++------ 5 files changed, 112 insertions(+), 51 deletions(-) diff --git a/test.py b/test.py index 4172ad470..1578b73e3 100644 --- a/test.py +++ b/test.py @@ -7,15 +7,23 @@ ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() +import dask.array as da +from skimage.feature import peak_local_max if __name__ == "__main__": - detector = NucleusDetector(model="mapde-conic", batch_size=8, num_workers=2) + + # model_name = "sccnn-crchisto" + model_name = "mapde-conic" + + detector = NucleusDetector(model=model_name, batch_size=16, num_workers=8) detector.run( - images=[pathlib.Path("/media/u1910100/data/slides/CMU-1-Small-Region.svs")], + images=[pathlib.Path("/media/u1910100/data/slides/patient366_wsi1.tif")], patch_mode=False, device="cuda", save_dir=pathlib.Path("/media/u1910100/data/overlays/test"), overwrite=True, output_type="annotationstore", class_dict={0: "nucleus"}, + auto_get_mask=True, + memory_threshold=50 ) diff --git a/tiatoolbox/data/pretrained_model.yaml b/tiatoolbox/data/pretrained_model.yaml index 75963662f..24831627e 100644 --- a/tiatoolbox/data/pretrained_model.yaml +++ b/tiatoolbox/data/pretrained_model.yaml @@ -814,6 +814,7 @@ mapde-crchisto: min_distance: 4 threshold_abs: 250 num_classes: 1 + postproc_tile_shape: [ 2048, 2048 ] ioconfig: class: io_config.IOSegmentorConfig kwargs: @@ -821,7 +822,6 @@ mapde-crchisto: - { "units": "mpp", "resolution": 0.5 } output_resolutions: - { "units": "mpp", "resolution": 0.5 } - tile_shape: [ 2048, 2048 ] patch_input_shape: [ 252, 252 ] patch_output_shape: [ 252, 252 ] stride_shape: [ 150, 150 ] @@ -836,6 +836,7 @@ mapde-conic: min_distance: 3 threshold_abs: 205 num_classes: 1 + postproc_tile_shape: [ 2048, 2048 ] ioconfig: class: io_config.IOSegmentorConfig kwargs: @@ -843,7 +844,6 @@ mapde-conic: - { "units": "mpp", "resolution": 0.5 } output_resolutions: - { "units": "mpp", "resolution": 0.5 } - # tile_shape: [ 2048, 2048 ] patch_input_shape: [ 252, 252 ] patch_output_shape: [ 252, 252 ] stride_shape: [ 150, 150 ] @@ -859,6 +859,7 @@ sccnn-crchisto: min_distance: 6 threshold_abs: 0.20 patch_output_shape: [ 13, 13 ] + postproc_tile_shape: [ 2048, 2048 ] ioconfig: class: io_config.IOSegmentorConfig kwargs: @@ -866,7 +867,6 @@ sccnn-crchisto: - { "units": "mpp", "resolution": 0.5 } output_resolutions: - { "units": "mpp", "resolution": 0.5 } - tile_shape: [ 2048, 2048 ] patch_input_shape: [ 31, 31 ] patch_output_shape: [ 13, 13 ] stride_shape: [ 8, 8 ] @@ -882,6 +882,7 @@ sccnn-conic: min_distance: 5 threshold_abs: 0.05 patch_output_shape: [ 13, 13 ] + postproc_tile_shape: [ 2048, 2048 ] ioconfig: class: io_config.IOSegmentorConfig kwargs: @@ -889,7 +890,6 @@ sccnn-conic: - { "units": "mpp", "resolution": 0.5 } output_resolutions: - { "units": "mpp", "resolution": 0.5 } - tile_shape: [ 2048, 2048 ] patch_input_shape: [ 31, 31 ] patch_output_shape: [ 13, 13 ] stride_shape: [ 8, 8 ] diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index c8cde9ce2..be244f7a8 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -85,6 +85,7 @@ def __init__( min_distance: int = 4, threshold_abs: float = 250, num_classes: int = 1, + postproc_tile_shape: list[int] = [2048, 2048], ) -> None: """Initialize :class:`MapDe`.""" super().__init__( @@ -92,7 +93,7 @@ def __init__( num_input_channels=num_input_channels, out_activation="relu", ) - + self.postproc_tile_shape = postproc_tile_shape dist_filter = np.array( [ [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], @@ -263,8 +264,14 @@ def postproc( """ depth = {0: self.min_distance, 1: self.min_distance, 2: 0} + + rechunked_prediction_map = prediction_map.rechunk( + (self.postproc_tile_shape[0], self.postproc_tile_shape[1], -1) + ) + logger.info(f"Post-processing chunk size: {rechunked_prediction_map.chunks}") + scores = da.map_overlap( - prediction_map, + rechunked_prediction_map, peak_detection_mapoverlap, depth=depth, boundary=0, diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index 4da0f9dca..6c11f4691 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -12,6 +12,14 @@ from collections import OrderedDict import numpy as np +import dask.array as da +import pandas as pd +from tiatoolbox import logger +from tiatoolbox.models.architecture.mapde import ( + centroids_map_to_dask_dataframe, + nucleus_detection_nms, + peak_detection_mapoverlap, +) import torch from skimage.feature import peak_local_max from torch import nn @@ -91,6 +99,7 @@ def __init__( radius: int = 12, min_distance: int = 6, threshold_abs: float = 0.20, + postproc_tile_shape: tuple[int, int] = (2048, 2048), ) -> None: """Initialize :class:`SCCNN`.""" super().__init__() @@ -99,6 +108,7 @@ def __init__( self.in_ch = num_input_channels self.out_height = out_height self.out_width = out_width + self.postproc_tile_shape = postproc_tile_shape # Create mesh grid and convert to 3D vector x, y = torch.meshgrid( @@ -325,36 +335,71 @@ def spatially_constrained_layer1( ) return self.spatially_constrained_layer2(s1_sigmoid0, s1_sigmoid1, s1_sigmoid2) - # skipcq: PYL-W0221 # noqa: ERA001 - def postproc(self: SCCNN, prediction_map: np.ndarray) -> np.ndarray: - """Post-processing script for MicroNet. - - Performs peak detection and extracts coordinates in x, y format. + def postproc( + self: SCCNN, prediction_map: da.Array, prediction_shape: tuple, dtype: np.dtype + ) -> pd.DataFrame: + """Post-processing script for SCCNN. + + Post-process predicted probability map of the input image. + Performs peak detection, then non-maximum suppression. + Returns a pandas DataFrame containing detected nuclei coordinates [x, y, type, prob]. Args: - prediction_map (ndarray): - Input image of type numpy array. + prediction_map (da.array): + Predicted probability map (HxWx1) of the entire input image. + prediction_shape (tuple): + Shape of the prediction map. + dtype (np.dtype): + Data type of the prediction map. Returns: - :class:`numpy.ndarray`: - Pixel-wise nuclear instance segmentation - prediction. + detected_nuclei (pandas.DataFrame): + Detected nuclei coordinates stored in a pandas DataFrame. """ - coordinates = peak_local_max( - np.squeeze(prediction_map[0], axis=2), + depth = {0: self.min_distance, 1: self.min_distance, 2: 0} + + # print("maxmin debug:") # --- DEBUG --- + # lazy_max = prediction_map.max() + # max_value = lazy_max.compute() + # lazy_min = prediction_map.min() + # min_value = lazy_min.compute() + # print(f"lazy_max: {max_value}, lazy_min: {min_value}") + + rechunked_prediction_map = prediction_map.rechunk( + (self.postproc_tile_shape[0], self.postproc_tile_shape[1], -1) + ) + print(f"rechunked_prediction_map.shape: {rechunked_prediction_map.shape}") + print(f"rechunked_prediction_map.chunks: {rechunked_prediction_map.chunks}") + + scores = da.map_overlap( + rechunked_prediction_map, + peak_detection_mapoverlap, + depth=depth, + boundary=0, + dtype=dtype, + block_info=True, min_distance=self.min_distance, threshold_abs=self.threshold_abs, - exclude_border=False, + depth_h=self.min_distance, + depth_w=self.min_distance, + calculate_probabilities=False, ) - return np.fliplr(coordinates) + ddf = centroids_map_to_dask_dataframe(scores, x_offset=0, y_offset=0) + pandas_df = ddf.compute() + + logger.info(f"Total detections before NMS: {len(pandas_df)}") + detected_nuclei = nucleus_detection_nms(pandas_df, radius=self.min_distance) + logger.info(f"Total detections after NMS: {len(detected_nuclei)}") + + return detected_nuclei @staticmethod def infer_batch( model: nn.Module, - batch_data: np.ndarray | torch.Tensor, + batch_data: torch.Tensor, device: str, - ) -> list[np.ndarray]: + ) -> np.ndarray: """Run inference on an input batch. This contains logic for forward operation as well as batch I/O @@ -387,8 +432,8 @@ def infer_batch( pred = model(patch_imgs_gpu) pred = pred.permute(0, 2, 3, 1).contiguous() - pred = pred.cpu().numpy() + if torch.max(pred) > 0: + print(torch.max(pred), torch.min(pred)) # --- DEBUG --- + return pred.cpu().numpy() + - return [ - pred, - ] diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 744033f36..230813f25 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -49,6 +49,7 @@ def probability_to_peak_map( min_distance=min_distance, threshold_abs=threshold_abs, threshold_rel=threshold_rel, + exclude_border=False, ) if coords.size: r, c = coords[:, 0], coords[:, 1] @@ -121,29 +122,29 @@ def peak_detection_mapoverlap( return out -def detection_with_map_overlap( - probs: da.Array, min_distance: int, threshold_abs: float, depth_pixels: int -) -> da.Array: - """probs: Dask array (H, W, C), float. - depth_pixels: halo in pixels for H/W (use >= min_distance and >= any morphology radius). - - Returns: - scores: da.Array (H, W, C) with mean_intensity at centroids, 0 elsewhere. - """ - depth = {0: depth_pixels, 1: depth_pixels, 2: 0} - scores = da.map_overlap( - probs, - peak_detection_mapoverlap, - depth=depth, - boundary=0, - dtype=np.float32, - block_info=True, - min_distance=min_distance, - threshold_abs=threshold_abs, - depth_h=depth_pixels, - depth_w=depth_pixels, - ) - return scores +# def detection_with_map_overlap( +# probs: da.Array, min_distance: int, threshold_abs: float, depth_pixels: int +# ) -> da.Array: +# """probs: Dask array (H, W, C), float. +# depth_pixels: halo in pixels for H/W (use >= min_distance and >= any morphology radius). + +# Returns: +# scores: da.Array (H, W, C) with mean_intensity at centroids, 0 elsewhere. +# """ +# depth = {0: depth_pixels, 1: depth_pixels, 2: 0} +# scores = da.map_overlap( +# probs, +# peak_detection_mapoverlap, +# depth=depth, +# boundary=0, +# dtype=np.float32, +# block_info=True, +# min_distance=min_distance, +# threshold_abs=threshold_abs, +# depth_h=depth_pixels, +# depth_w=depth_pixels, +# ) +# return scores def centroids_map_to_dask_dataframe( From dd99d97bb37abe6db620d837ae4efa9fc6b0c626 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 11 Nov 2025 20:22:43 +0000 Subject: [PATCH 09/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test.py | 5 +---- tiatoolbox/models/architecture/mapde.py | 2 +- tiatoolbox/models/architecture/sccnn.py | 13 +++++-------- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/test.py b/test.py index 1578b73e3..070e87c14 100644 --- a/test.py +++ b/test.py @@ -7,11 +7,8 @@ ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() -import dask.array as da -from skimage.feature import peak_local_max if __name__ == "__main__": - # model_name = "sccnn-crchisto" model_name = "mapde-conic" @@ -25,5 +22,5 @@ output_type="annotationstore", class_dict={0: "nucleus"}, auto_get_mask=True, - memory_threshold=50 + memory_threshold=50, ) diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index be244f7a8..18fb62347 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -269,7 +269,7 @@ def postproc( (self.postproc_tile_shape[0], self.postproc_tile_shape[1], -1) ) logger.info(f"Post-processing chunk size: {rechunked_prediction_map.chunks}") - + scores = da.map_overlap( rechunked_prediction_map, peak_detection_mapoverlap, diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index 6c11f4691..fece8c686 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -11,19 +11,18 @@ from collections import OrderedDict -import numpy as np import dask.array as da +import numpy as np import pandas as pd +import torch +from torch import nn + from tiatoolbox import logger from tiatoolbox.models.architecture.mapde import ( centroids_map_to_dask_dataframe, nucleus_detection_nms, peak_detection_mapoverlap, ) -import torch -from skimage.feature import peak_local_max -from torch import nn - from tiatoolbox.models.models_abc import ModelABC @@ -339,7 +338,7 @@ def postproc( self: SCCNN, prediction_map: da.Array, prediction_shape: tuple, dtype: np.dtype ) -> pd.DataFrame: """Post-processing script for SCCNN. - + Post-process predicted probability map of the input image. Performs peak detection, then non-maximum suppression. Returns a pandas DataFrame containing detected nuclei coordinates [x, y, type, prob]. @@ -435,5 +434,3 @@ def infer_batch( if torch.max(pred) > 0: print(torch.max(pred), torch.min(pred)) # --- DEBUG --- return pred.cpu().numpy() - - From c468a8bff9c006549c3602a65e4715faf89f7d9a Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Wed, 12 Nov 2025 17:30:29 +0000 Subject: [PATCH 10/26] update pipeline --- test.py | 6 +- tiatoolbox/models/architecture/mapde.py | 114 ++-- tiatoolbox/models/architecture/sccnn.py | 8 +- tiatoolbox/models/engine/engine_abc.py | 2 +- tiatoolbox/models/engine/nucleus_detector.py | 629 +++++++++++-------- tiatoolbox/utils/misc.py | 1 + 6 files changed, 446 insertions(+), 314 deletions(-) diff --git a/test.py b/test.py index 1578b73e3..e087f8b67 100644 --- a/test.py +++ b/test.py @@ -7,8 +7,6 @@ ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() -import dask.array as da -from skimage.feature import peak_local_max if __name__ == "__main__": @@ -17,7 +15,7 @@ detector = NucleusDetector(model=model_name, batch_size=16, num_workers=8) detector.run( - images=[pathlib.Path("/media/u1910100/data/slides/patient366_wsi1.tif")], + images=[pathlib.Path("/media/u1910100/data/slides/CMU-1-Small-Region.svs")], patch_mode=False, device="cuda", save_dir=pathlib.Path("/media/u1910100/data/overlays/test"), @@ -25,5 +23,5 @@ output_type="annotationstore", class_dict={0: "nucleus"}, auto_get_mask=True, - memory_threshold=50 + memory_threshold=80 ) diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index be244f7a8..bfadb04a4 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -8,19 +8,13 @@ from __future__ import annotations -import dask.array as da import numpy as np -import pandas as pd import torch import torch.nn.functional as F # noqa: N812 +from skimage.feature import peak_local_max from tiatoolbox import logger from tiatoolbox.models.architecture.micronet import MicroNet -from tiatoolbox.models.engine.nucleus_detector import ( - centroids_map_to_dask_dataframe, - nucleus_detection_nms, - peak_detection_mapoverlap, -) class MapDe(MicroNet): @@ -242,55 +236,73 @@ def forward(self: MapDe, input_tensor: torch.Tensor) -> torch.Tensor: # skipcq: PYL-W0221 # noqa: ERA001 def postproc( - self: MapDe, prediction_map: da.Array, prediction_shape: tuple, dtype: np.dtype - ) -> pd.DataFrame: - """Post-processing script for MapDe. - - Post-process predicted probability map of the input image. - Performs peak detection, then non-maximum suppression. - Returns a pandas DataFrame containing detected nuclei coordinates [x, y, type, prob]. + self: MapDe, + block: np.ndarray, + block_info: dict, + depth_h: int, + depth_w: int, + ) -> np.ndarray: + """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). + Builds a processed mask per channel, runs peak_local_max then + label+regionprops, and writes probability (mean_intensity) at centroid pixels. + Keeps only centroids whose (row,col) lie in the interior window: + rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) + Returns same spatial shape as input block: (h_pad, w_pad, C), float32. Args: - prediction_map (da.array): - Predicted probability map (HxWx1) of the entire input image. - prediction_shape (tuple): - Shape of the prediction map. - dtype (np.dtype): - Data type of the prediction map. + block: NumPy array (H, W, C) with padded block data. + block_info: Dask block info dict. + min_distance: Minimum distance in pixels between peaks. + threshold_abs: Minimum absolute threshold for peak detection. + depth_h: Halo size in pixels for height (rows). + depth_w: Halo size in pixels for width (cols). + calculate_probabilities: If True, write mean_intensity at centroids; + else write 1.0 at centroids. Returns: - detected_nuclei (pandas.DataFrame): - Detected nuclei coordinates stored in a pandas DataFrame. - + out: NumPy array (H, W, C) with probabilities at centroids, 0 elsewhere. """ - depth = {0: self.min_distance, 1: self.min_distance, 2: 0} - - rechunked_prediction_map = prediction_map.rechunk( - (self.postproc_tile_shape[0], self.postproc_tile_shape[1], -1) - ) - logger.info(f"Post-processing chunk size: {rechunked_prediction_map.chunks}") - - scores = da.map_overlap( - rechunked_prediction_map, - peak_detection_mapoverlap, - depth=depth, - boundary=0, - dtype=dtype, - block_info=True, - min_distance=self.min_distance, - threshold_abs=self.threshold_abs, - depth_h=self.min_distance, - depth_w=self.min_distance, - calculate_probabilities=False, - ) - ddf = centroids_map_to_dask_dataframe(scores, x_offset=0, y_offset=0) - pandas_df = ddf.compute() - - logger.info(f"Total detections before NMS: {len(pandas_df)}") - detected_nuclei = nucleus_detection_nms(pandas_df, radius=self.min_distance) - logger.info(f"Total detections after NMS: {len(detected_nuclei)}") - - return detected_nuclei + H, W, C = block.shape + + # --- derive core (pre-overlap) size for THIS block safely --- + info = block_info[0] + locs = info["array-location"] # [(r0,r1),(c0,c1),(ch0,ch1)] + core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 + core_w = int(locs[1][1] - locs[1][0]) + + rmin, rmax = depth_h, depth_h + core_h + cmin, cmax = depth_w, depth_w + core_w + + out = np.zeros((H, W, C), dtype=np.float32) + + for ch in range(C): + img = np.asarray(block[..., ch]) # NumPy 2D view + + coords = peak_local_max( + img, + min_distance=self.min_distance, + threshold_abs=self.threshold_abs, + exclude_border=False, + ) + + for r, c in coords: + if (rmin <= r < rmax) and (cmin <= c < cmax): + out[r, c, ch] = 1.0 + # pmask = probability_to_peak_map(img, self.min_distance, self.threshold_abs) + # if not pmask.any(): + # continue + + # lab = label(pmask) + # props = regionprops(lab, intensity_image=img) + + # for reg in props: + # r, c = reg.centroid # floats in padded-block coords + # if (rmin <= r < rmax) and (cmin <= c < cmax): + # rr = int(round(r)) + # cc = int(round(c)) + # if 0 <= rr < H and 0 <= cc < W: + # out[rr, cc, ch] = 1.0 + return out @staticmethod def infer_batch( diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index 6c11f4691..52d53f339 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -15,11 +15,11 @@ import dask.array as da import pandas as pd from tiatoolbox import logger -from tiatoolbox.models.architecture.mapde import ( - centroids_map_to_dask_dataframe, - nucleus_detection_nms, - peak_detection_mapoverlap, +from tiatoolbox.models.engine.nucleus_detector import ( + # centroids_map_to_dask_dataframe, + centroids_map_to_ddf_chunkwise, ) + import torch from skimage.feature import peak_local_max from torch import nn diff --git a/tiatoolbox/models/engine/engine_abc.py b/tiatoolbox/models/engine/engine_abc.py index 73b4ca1c1..0a6a8e127 100644 --- a/tiatoolbox/models/engine/engine_abc.py +++ b/tiatoolbox/models/engine/engine_abc.py @@ -45,7 +45,7 @@ import torch import zarr from dask import compute -from dask.diagnostics import ProgressBar +from dask.diagnostics.progress import ProgressBar from torch import nn from typing_extensions import Unpack diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 230813f25..078c5d45d 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -3,14 +3,17 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Unpack +from typing import TYPE_CHECKING, Unpack, Tuple +import dask import dask.array as da import dask.dataframe as dd import numpy as np import pandas as pd from skimage.feature import peak_local_max from skimage.measure import label, regionprops +from tiatoolbox.wsicore.wsireader import is_zarr +from dask.diagnostics.progress import ProgressBar from tiatoolbox import logger from tiatoolbox.annotation import AnnotationStore @@ -18,8 +21,10 @@ SemanticSegmentor, SemanticSegmentorRunParams, ) +from shapely.geometry import Point from tiatoolbox.models.models_abc import ModelABC from tiatoolbox.utils.misc import df_to_store_nucleus_detector +from tiatoolbox.annotation.storage import SQLiteStore, Annotation if TYPE_CHECKING: # pragma: no cover from tiatoolbox.models.models_abc import ModelABC @@ -57,229 +62,226 @@ def probability_to_peak_map( return mask -def peak_detection_mapoverlap( - block: np.ndarray, - block_info, - min_distance: int, - threshold_abs: float, - depth_h: int, - depth_w: int, - calculate_probabilities: bool = False, -) -> np.ndarray: - """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). - Builds a processed mask per channel, runs peak_local_max then - label+regionprops, and writes probability (mean_intensity) at centroid pixels. - Keeps only centroids whose (row,col) lie in the interior window: - rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) - Returns same spatial shape as input block: (h_pad, w_pad, C), float32. - - Args: - block: NumPy array (H, W, C) with padded block data. - block_info: Dask block info dict. - min_distance: Minimum distance in pixels between peaks. - threshold_abs: Minimum absolute threshold for peak detection. - depth_h: Halo size in pixels for height (rows). - depth_w: Halo size in pixels for width (cols). - calculate_probabilities: If True, write mean_intensity at centroids; - else write 1.0 at centroids. - - Returns: - out: NumPy array (H, W, C) with probabilities at centroids, 0 elsewhere. - """ - H, W, C = block.shape - - # --- derive core (pre-overlap) size for THIS block safely --- - info = block_info[0] - locs = info["array-location"] # [(r0,r1),(c0,c1),(ch0,ch1)] - core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 - core_w = int(locs[1][1] - locs[1][0]) - - rmin, rmax = depth_h, depth_h + core_h - cmin, cmax = depth_w, depth_w + core_w - - out = np.zeros((H, W, C), dtype=np.float32) - - for ch in range(C): - img = np.asarray(block[..., ch]) # NumPy 2D view - pmask = probability_to_peak_map(img, min_distance, threshold_abs) - if not pmask.any(): - continue - - lab = label(pmask) - props = regionprops(lab, intensity_image=img) - - for reg in props: - r, c = reg.centroid # floats in padded-block coords - if (rmin <= r < rmax) and (cmin <= c < cmax): - rr = int(round(r)) - cc = int(round(c)) - if 0 <= rr < H and 0 <= cc < W: - if calculate_probabilities: - out[rr, cc, ch] = float(reg.mean_intensity) - else: - out[rr, cc, ch] = 1.0 - - return out - - -# def detection_with_map_overlap( -# probs: da.Array, min_distance: int, threshold_abs: float, depth_pixels: int -# ) -> da.Array: -# """probs: Dask array (H, W, C), float. -# depth_pixels: halo in pixels for H/W (use >= min_distance and >= any morphology radius). +# def peak_detection_mapoverlap( +# block: np.ndarray, +# block_info, +# min_distance: int, +# threshold_abs: float, +# depth_h: int, +# depth_w: int, +# ) -> np.ndarray: +# """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). +# Builds a processed mask per channel, runs peak_local_max then +# label+regionprops, and writes probability (mean_intensity) at centroid pixels. +# Keeps only centroids whose (row,col) lie in the interior window: +# rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) +# Returns same spatial shape as input block: (h_pad, w_pad, C), float32. + +# Args: +# block: NumPy array (H, W, C) with padded block data. +# block_info: Dask block info dict. +# min_distance: Minimum distance in pixels between peaks. +# threshold_abs: Minimum absolute threshold for peak detection. +# depth_h: Halo size in pixels for height (rows). +# depth_w: Halo size in pixels for width (cols). +# calculate_probabilities: If True, write mean_intensity at centroids; +# else write 1.0 at centroids. # Returns: -# scores: da.Array (H, W, C) with mean_intensity at centroids, 0 elsewhere. +# out: NumPy array (H, W, C) with probabilities at centroids, 0 elsewhere. # """ -# depth = {0: depth_pixels, 1: depth_pixels, 2: 0} -# scores = da.map_overlap( -# probs, -# peak_detection_mapoverlap, -# depth=depth, -# boundary=0, -# dtype=np.float32, -# block_info=True, -# min_distance=min_distance, -# threshold_abs=threshold_abs, -# depth_h=depth_pixels, -# depth_w=depth_pixels, -# ) -# return scores - - -def centroids_map_to_dask_dataframe( - scores: da.Array, x_offset: int = 0, y_offset: int = 0 -) -> dd.DataFrame: - """Convert centroid map (H, W, C) into a Dask DataFrame with columns: x, y, type, prob. - - Args: - scores: Dask array (H, W, C) with probabilities at centroids, 0 elsewhere. - x_offset: global x offset to add to all x coordinates. - y_offset: global y offset to add to all y coordinates. - - Returns: - ddf: Dask DataFrame with columns: x, y, type, prob. - """ - # 1) Build a boolean mask of detections - - mask = scores > 0 - # 2) Get coordinates and class of detections (lazy 1D Dask arrays) - - yy, xx, cc = da.nonzero(mask) - # 3) Get probability values at those detections (lazy) — same length as yy/xx/cc - - ss = da.extract(mask, scores) - # 4) Assemble a Dask DataFrame - # all columns are row-wise aligned (all built from arrays of the same length). - ddf = dd.concat( - [ - dd.from_dask_array(xx.astype("int64"), columns="x"), - dd.from_dask_array(yy.astype("int64"), columns="y"), - dd.from_dask_array(cc.astype("int64"), columns="type"), - dd.from_dask_array(ss.astype("float32"), columns="prob"), - ], - axis=1, - ignore_unknown_divisions=True, - ) - - # 5) Apply global offsets (if needed) - if x_offset != 0: - ddf["x"] = ddf["x"] + int(x_offset) - if y_offset != 0: - ddf["y"] = ddf["y"] + int(y_offset) - +# H, W, C = block.shape + +# # --- derive core (pre-overlap) size for THIS block safely --- +# info = block_info[0] +# locs = info["array-location"] # [(r0,r1),(c0,c1),(ch0,ch1)] +# core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 +# core_w = int(locs[1][1] - locs[1][0]) + +# rmin, rmax = depth_h, depth_h + core_h +# cmin, cmax = depth_w, depth_w + core_w + +# out = np.zeros((H, W, C), dtype=np.float32) + +# for ch in range(C): +# img = np.asarray(block[..., ch]) # NumPy 2D view +# pmask = probability_to_peak_map(img, min_distance, threshold_abs) +# if not pmask.any(): +# continue + +# lab = label(pmask) +# props = regionprops(lab, intensity_image=img) + +# for reg in props: +# r, c = reg.centroid # floats in padded-block coords +# if (rmin <= r < rmax) and (cmin <= c < cmax): +# rr = int(round(r)) +# cc = int(round(c)) +# if 0 <= rr < H and 0 <= cc < W: +# if calculate_probabilities: +# out[rr, cc, ch] = float(reg.mean_intensity) +# else: +# out[rr, cc, ch] = 1.0 + +# return out + + +def _chunk_to_df(block:np.ndarray, block_info:dict, x_offset:int = 0, y_offset:int = 0) -> pd.DataFrame: + # block: np.ndarray (h, w, C) for this chunk (no halos here; use after stitching) + info = block_info[0] if 0 in block_info else block_info[None] + (r0, r1), (c0, c1), _ = info["array-location"] # global interior coords for this chunk + + # find nonzeros per channel + ys, xs, cs = np.nonzero(block) + if ys.size == 0: + DTYPES = { + "x": "uint32", # or "uint32" if you really want + "y": "uint32", + "type": "uint32", + "prob": "float32", + } + return pd.DataFrame({k: pd.Series(dtype=v) for k, v in DTYPES.items()}) + + probs = block[ys, xs, cs].astype(np.float32, copy=False) + df = pd.DataFrame({ + "x": xs + c0 + int(x_offset), + "y": ys + r0 + int(y_offset), + "type": cs.astype(np.int64, copy=False), + "prob": probs, + }) + return df + + +def centroids_map_to_ddf_chunkwise(scores: da.Array, x_offset: int=0, y_offset: int=0) -> dd.DataFrame: + # build one delayed pandas DF per chunk + dfs = scores.map_blocks( + _chunk_to_df, + dtype=object, # ignored; returning DataFrames + block_info=True, + x_offset=x_offset, + y_offset=y_offset, + ).to_delayed().ravel() + + meta = pd.DataFrame({"x": pd.Series([], dtype="uint32"), + "y": pd.Series([], dtype="uint32"), + "type": pd.Series([], dtype="uint32"), + "prob": pd.Series([], dtype="float32")}) + ddf = dd.from_delayed(dfs, meta=meta) return ddf -def nucleus_detection_nms( - df: pd.DataFrame, radius: int, overlap_threshold: float = 0.5 -) -> pd.DataFrame: - """Greedy NMS across ALL detections. +def _chunk_to_records(block: np.ndarray, block_info + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + # block: (h, w, C) NumPy chunk (post-stitching, no halos) + info = block_info[0] if 0 in block_info else block_info[None] + (r0, r1), (c0, c1), _ = info["array-location"] # global interior start/stop + + ys, xs, cs = np.nonzero(block) + if ys.size == 0: + # return empty, dtype-stable arrays to avoid surprises + return ( + np.empty(0, dtype=np.uint32), + np.empty(0, dtype=np.uint32), + np.empty(0, dtype=np.uint32), + np.empty(0, dtype=np.float32), + ) - Keeps the highest-prob detection, removes any other point within 'radius' pixels > overlap_threshold. - Expects dataframe columns: ['x','y','type','prob']. + x = xs.astype(np.uint32, copy=False) + int(c0) + y = ys.astype(np.uint32, copy=False) + int(r0) + t = cs.astype(np.uint32, copy=False) + p = block[ys, xs, cs].astype(np.float32, copy=False) + return (x, y, t, p) + +def _write_records_to_store( + recs: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], + store: SQLiteStore, + scale_factor: Tuple[float, float], + class_dict: dict | None, + batch_size: int = 5000, +) -> int: + x, y, t, p = recs + n = len(x) + if n == 0: + return 0 # nothing to write + + x = np.rint(x * scale_factor[0]).astype(np.uint32, copy=False) + y = np.rint(y * scale_factor[1]).astype(np.uint32, copy=False) + + # class mapping + if class_dict is None: + # identity over actually-present types + uniq = np.unique(t) + class_dict = {int(k): int(k) for k in uniq} + labels = np.array([class_dict.get(int(k), int(k)) for k in t], dtype=object) + + def make_points(xb, yb): return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] + + written = 0 + for i in range(0, n, batch_size): + j = min(i + batch_size, n) + pts = make_points(x[i:j], y[i:j]) + + anns = [ + Annotation(geometry=pt, + properties={"type": lbl, "probability": float(pp)}) + for pt, lbl, pp in zip(pts, labels[i:j], p[i:j]) + ] + store.append_many(anns) + written += (j - i) + return written + + +def write_centroids_to_store( + scores: da.Array, + scale_factor:tuple[float, float] = (1.0, 1.0), + class_dict: dict | None = None, + save_path: Path | None = None, + batch_size: int = 5000) -> Path | SQLiteStore: + + # one delayed record-tuple per chunk + recs_delayed = scores.map_blocks( + _chunk_to_records, + dtype=object, # we return Python tuples + block_info=True, + ).to_delayed().ravel() + + store = SQLiteStore() + + # one delayed writer per chunk (returns number written) + writes = [ + dask.delayed(_write_records_to_store)( + recs, store, scale_factor, class_dict, batch_size + ) + for recs in recs_delayed + ] - Args: - df: pandas DataFrame of detections. - radius: radius in pixels for suppression. - overlap_threshold: float in [0,1], fraction of radius for suppression. + # IMPORTANT: SQLite is single-writer; run sequentially + with ProgressBar(): + total = dask.compute(*writes, scheduler="single-threaded") + logger.info(f"Total detections written to store: {sum(total)}") + # # if a save director is provided, then dump store into a file + if save_path: + # ensure parent directory exists + save_path.parent.absolute().mkdir(parents=True, exist_ok=True) + # ensure proper db extension + save_path = save_path.parent.absolute() / (save_path.stem + ".db") + store.commit() + store.dump(save_path) + return save_path - Returns: - filtered DataFrame with same columns/dtypes. - """ - if df.empty: - return df.copy() - if radius <= 0: - raise ValueError("radius must be > 0") - if not (0.0 < overlap_threshold <= 1.0): - raise ValueError("overlap_threshold must be in (0.0, 1.0]") - - # Sort by descending probability (highest priority first) - sub = df.sort_values("prob", ascending=False).reset_index(drop=True) - - # Coordinates as float64 for distance math - coords = sub[["x", "y"]].to_numpy(dtype=np.float64) - r2 = float(radius) * float(radius) - - coords = sub[["x", "y"]].to_numpy(dtype=np.float64) - r = float(radius) - two_r = 2.0 * r - two_r2 = two_r * two_r # distance^2 cutoff for any overlap - - suppressed = np.zeros(len(sub), dtype=bool) - keep_idx = [] - - for i in range(len(sub)): - if suppressed[i]: - continue - - keep_idx.append(i) - - # Vectorised distances to all points - dx = coords[:, 0] - coords[i, 0] - dy = coords[:, 1] - coords[i, 1] - d2 = dx * dx + dy * dy - - # Only points with d < 2r can have nonzero overlap - cand = d2 <= two_r2 - cand[i] = False # don't suppress the kept point itself - if not np.any(cand): - continue - - d = np.sqrt(d2[cand]) - - # Safe cosine argument = (distance ÷ diameter), Clamp for numerical stability - u = np.clip(d / (2.0 * r), -1.0, 1.0) - # Exact intersection area of two equal-radius circles. - inter = 2.0 * (r * r) * np.arccos(u) - 0.5 * d * np.sqrt( - np.clip(4.0 * r * r - d * d, 0.0, None) - ) + return store - union = 2.0 * np.pi * (r * r) - inter - iou = inter / union - # Suppress candidates whose IoU exceeds threshold - idx_cand = np.where(cand)[0] - to_suppress = idx_cand[iou >= overlap_threshold] - suppressed[to_suppress] = True - kept = sub.iloc[keep_idx].copy() - return kept class NucleusDetector(SemanticSegmentor): r"""Nucleus detection engine. - The models provided by tiatoolbox should give the following results: - - .. list-table:: Nucleus detection performance on the (add models list here) - :widths: 15 15 - :header-rows: 1 - Args: model (str or nn.Module): Defined PyTorch model or name of the existing models support by - tiatoolbox for processing the data e.g., mapde-conic, sccnn-conic. + tiatoolbox for processing the data e.g., mapde-conic, mapde-crchisto. For a full list of pretrained models, please refer to the `docs `. By default, the corresponding pretrained weights will also @@ -300,43 +302,25 @@ class NucleusDetector(SemanticSegmentor): verbose (bool): Whether to output logging information. + Supported TIAToolBox Pre-trained Models: + - `mapde-conic` + - `mapde-crchisto` - Examples: - >>> # list of 2 image patches as input - >>> data = [img1, img2] - >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - >>> output = nucleus_detector.run(data, mode='patch') - - >>> # array of list of 2 image patches as input - >>> data = np.array([img1, img2]) - >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - >>> output = nucleus_detector.run(data, mode='patch') - - >>> # list of 2 image patch files as input - >>> data = ['path/img.png', 'path/img.png'] - >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - >>> output = nucleus_detector.run(data, mode='patch') - - >>> # list of 2 image tile files as input - >>> tile_file = ['path/tile1.png', 'path/tile2.png'] - >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - >>> output = nucleus_detector.run(tile_file, mode='tile') - - >>> # list of 2 wsi files as input - >>> wsi_file = ['path/wsi1.svs', 'path/wsi2.svs'] - >>> nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - >>> output = nucleus_detector.run(wsi_file, mode='wsi') - - References: - [1] Raza, Shan E. Ahmed, et al. "Deconvolving convolutional neural network - for cell detection." 2019 IEEE 16th International Symposium on Biomedical - Imaging (ISBI 2019). IEEE, 2019. - - [2] Sirinukunwattana, Korsuk, et al. - "Locality sensitive deep learning for detection and classification - of nuclei in routine colon cancer histology images." - IEEE transactions on medical imaging 35.5 (2016): 1196-1206. + Examples: + >>> model_name = "mapde-conic" + >>> detector = NucleusDetector(model=model_name, batch_size=16, num_workers=8) + >>> detector.run( + ... images=[pathlib.Path("example_wsi.tiff")], + ... patch_mode=False, + ... device="cuda", + ... save_dir=pathlib.Path("output_directory/"), + ... overwrite=True, + ... output_type="annotationstore", + ... class_dict={0: "nucleus"}, + ... auto_get_mask=True, + ... memory_threshold=80 + ... ) """ from tiatoolbox.wsicore.wsireader import WSIReader @@ -393,24 +377,50 @@ def post_process_wsi( prediction_shape: tuple[int, ...], prediction_dtype: type, **kwargs: Unpack[SemanticSegmentorRunParams], - ) -> pd.DataFrame: + ) -> da.Array: """Define how to post-process WSI predictions. + Processes the raw prediction dask array using map_overlap + to apply the model's post-processing function on each chunk + with appropriate overlaps on chunk boundaries. + Args: + raw_predictions (da.Array): The raw predictions from the model. + prediction_shape (tuple[int, ...]): The shape of the predictions. + prediction_dtype (type): The data type of the predictions. Returns: - A DataFrame containing the post-processed predictions for the WSI. + Post-processed dask array of detections at the WSI level. + The array has the same shape and dtype as the input. + Each pixel indicates the presence of a detected nucleus as a probability score. """ logger.info("Post processing WSI predictions in NucleusDetector") - logger.info(f"Raw probabilities shape: {prediction_shape}") logger.info(f"Raw probabilities dtype: {prediction_dtype}") - logger.info(f"Chunk size: {raw_predictions.chunks}") + logger.info(f"Raw chunk size: {raw_predictions.chunks}") + + # Add halo (overlap) around each block for post-processing + depth_h = self.model.min_distance + depth_w = self.model.min_distance + depth = {0: depth_h, 1: depth_w, 2: 0} - detection_df = self.model.postproc( - raw_predictions, prediction_shape, prediction_dtype + # Re-chunk to post-processing tile shape for more efficient processing + rechunked_prediction_map = raw_predictions.rechunk( + (self.model.postproc_tile_shape[0], self.model.postproc_tile_shape[1], -1) + ) + logger.info(f"Post-processing chunk size: {rechunked_prediction_map.chunks}") + + detection_map = da.map_overlap( + rechunked_prediction_map, + self.model.postproc, + depth=depth, + boundary=0, + dtype=prediction_dtype, + block_info=True, + depth_h=depth_h, + depth_w=depth_w, ) - return detection_df + return detection_map def save_predictions( self: NucleusDetector, @@ -418,20 +428,55 @@ def save_predictions( output_type: str, save_path: Path | None = None, **kwargs: Unpack[SemanticSegmentorRunParams], - ) -> AnnotationStore | Path | list[Path]: - """Define how to save the processed predictions. + ) -> AnnotationStore: + """Save nucleus detections to disk or return them in memory. + + This method saves predictions in one of the supported formats: + - "annotationstore": converts predictions to an AnnotationStore (.db file). + + If `patch_mode` is True, predictions are saved per image. If False, + predictions are merged and saved as a single output. + + Args: + processed_predictions (dict): + Dictionary containing processed model predictions. + output_type (str): + "annotationstore". + save_path (Path | None): + Path to save the output file. + **kwargs (SemanticSegmentorRunParams): + Additional runtime parameters including: + - scale_factor (tuple[float, float]): For coordinate transformation. + - class_dict (dict): Mapping of class indices to names. + - return_probabilities (bool): Whether to save probability maps. Returns: - A function that saves the processed predictions. + dict | AnnotationStore | Path: + - If output_type is "dict": returns predictions as a dictionary. + - If output_type is "zarr": returns path to saved Zarr file. + - If output_type is "annotationstore": returns AnnotationStore + or path to .db file. """ - logger.info("Saving predictions in NucleusDetector") + # Conversion to annotationstore uses a different function for SemanticSegmentor if output_type != "annotationstore": logger.warning( - f"NucleusDetector only supports output_type='annotationstore'. " - f"Overriding output_type='{output_type}' to 'annotationstore'." + f"Output type '{output_type}' is not supported by NucleusDetector. " + "Defaulting to 'annotationstore'." ) output_type = "annotationstore" + + # scale_factor set from kwargs + scale_factor = kwargs.get("scale_factor", (1.0, 1.0)) + # class_dict set from kwargs + class_dict = kwargs.get("class_dict") + + # Need to add support for zarr conversion. + save_paths = [] + + logger.info("Saving predictions as AnnotationStore.") + + scale_factor = kwargs.get("scale_factor", (1.0, 1.0)) class_dict = kwargs.get("class_dict") @@ -443,7 +488,7 @@ def save_predictions( else: output_path = save_path.parent / (str(i) + ".db") - out_file = df_to_store_nucleus_detector( + out_file = write_centroids_to_store( predictions, scale_factor=scale_factor, class_dict=class_dict, @@ -452,9 +497,85 @@ def save_predictions( save_paths.append(out_file) return save_paths - return df_to_store_nucleus_detector( + return write_centroids_to_store( processed_predictions["predictions"], scale_factor=scale_factor, save_path=save_path, class_dict=class_dict, ) + + @staticmethod + def nucleus_detection_nms( + df: pd.DataFrame, radius: int, overlap_threshold: float = 0.5 + ) -> pd.DataFrame: + """Non-Maximum Suppression across ALL detections. + + Keeps the highest-prob detection, removes any other point within 'radius' pixels > overlap_threshold. + Expects dataframe columns: ['x','y','type','prob']. + + Args: + df: pandas DataFrame of detections. + radius: radius in pixels for suppression. + overlap_threshold: float in [0,1], fraction of radius for suppression. + + Returns: + filtered DataFrame with same columns/dtypes. + """ + if df.empty: + return df.copy() + if radius <= 0: + raise ValueError("radius must be > 0") + if not (0.0 < overlap_threshold <= 1.0): + raise ValueError("overlap_threshold must be in (0.0, 1.0]") + + # Sort by descending probability (highest priority first) + sub = df.sort_values("prob", ascending=False).reset_index(drop=True) + + # Coordinates as float64 for distance math + coords = sub[["x", "y"]].to_numpy(dtype=np.float64) + r2 = float(radius) * float(radius) + + coords = sub[["x", "y"]].to_numpy(dtype=np.float64) + r = float(radius) + two_r = 2.0 * r + two_r2 = two_r * two_r # distance^2 cutoff for any overlap + + suppressed = np.zeros(len(sub), dtype=bool) + keep_idx = [] + + for i in range(len(sub)): + if suppressed[i]: + continue + + keep_idx.append(i) + + # Vectorised distances to all points + dx = coords[:, 0] - coords[i, 0] + dy = coords[:, 1] - coords[i, 1] + d2 = dx * dx + dy * dy + + # Only points with d < 2r can have nonzero overlap + cand = d2 <= two_r2 + cand[i] = False # don't suppress the kept point itself + if not np.any(cand): + continue + + d = np.sqrt(d2[cand]) + + # Safe cosine argument = (distance ÷ diameter), Clamp for numerical stability + u = np.clip(d / (2.0 * r), -1.0, 1.0) + # Exact intersection area of two equal-radius circles. + inter = 2.0 * (r * r) * np.arccos(u) - 0.5 * d * np.sqrt( + np.clip(4.0 * r * r - d * d, 0.0, None) + ) + + union = 2.0 * np.pi * (r * r) - inter + iou = inter / union + + # Suppress candidates whose IoU exceeds threshold + idx_cand = np.where(cand)[0] + to_suppress = idx_cand[iou >= overlap_threshold] + suppressed[to_suppress] = True + + kept = sub.iloc[keep_idx].copy() + return kept diff --git a/tiatoolbox/utils/misc.py b/tiatoolbox/utils/misc.py index b6081cb74..1d094b6d1 100644 --- a/tiatoolbox/utils/misc.py +++ b/tiatoolbox/utils/misc.py @@ -1340,6 +1340,7 @@ def process_contours( return annotations_list + def df_to_store_nucleus_detector( df: pd.DataFrame, scale_factor: tuple[float, float], From 6e65fba868803e08f1171e0a186b30a31b63017f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Nov 2025 17:33:34 +0000 Subject: [PATCH 11/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test.py | 2 +- tiatoolbox/models/architecture/mapde.py | 3 +- tiatoolbox/models/architecture/sccnn.py | 9 -- tiatoolbox/models/engine/nucleus_detector.py | 118 +++++++++++-------- tiatoolbox/utils/misc.py | 1 - 5 files changed, 69 insertions(+), 64 deletions(-) diff --git a/test.py b/test.py index e907d7c25..856a0ab91 100644 --- a/test.py +++ b/test.py @@ -22,5 +22,5 @@ output_type="annotationstore", class_dict={0: "nucleus"}, auto_get_mask=True, - memory_threshold=80 + memory_threshold=80, ) diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index bfadb04a4..88fec25a3 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -13,7 +13,6 @@ import torch.nn.functional as F # noqa: N812 from skimage.feature import peak_local_max -from tiatoolbox import logger from tiatoolbox.models.architecture.micronet import MicroNet @@ -236,7 +235,7 @@ def forward(self: MapDe, input_tensor: torch.Tensor) -> torch.Tensor: # skipcq: PYL-W0221 # noqa: ERA001 def postproc( - self: MapDe, + self: MapDe, block: np.ndarray, block_info: dict, depth_h: int, diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index 153a7e25e..f4a03051d 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -18,15 +18,6 @@ from torch import nn from tiatoolbox import logger -from tiatoolbox.models.engine.nucleus_detector import ( - # centroids_map_to_dask_dataframe, - centroids_map_to_ddf_chunkwise, -) - -import torch -from skimage.feature import peak_local_max -from torch import nn - from tiatoolbox.models.models_abc import ModelABC diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 078c5d45d..9b89dadc4 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -3,28 +3,25 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Unpack, Tuple +from typing import TYPE_CHECKING, Unpack import dask import dask.array as da import dask.dataframe as dd import numpy as np import pandas as pd -from skimage.feature import peak_local_max -from skimage.measure import label, regionprops -from tiatoolbox.wsicore.wsireader import is_zarr from dask.diagnostics.progress import ProgressBar +from shapely.geometry import Point +from skimage.feature import peak_local_max from tiatoolbox import logger from tiatoolbox.annotation import AnnotationStore +from tiatoolbox.annotation.storage import Annotation, SQLiteStore from tiatoolbox.models.engine.semantic_segmentor import ( SemanticSegmentor, SemanticSegmentorRunParams, ) -from shapely.geometry import Point from tiatoolbox.models.models_abc import ModelABC -from tiatoolbox.utils.misc import df_to_store_nucleus_detector -from tiatoolbox.annotation.storage import SQLiteStore, Annotation if TYPE_CHECKING: # pragma: no cover from tiatoolbox.models.models_abc import ModelABC @@ -126,52 +123,69 @@ def probability_to_peak_map( # return out -def _chunk_to_df(block:np.ndarray, block_info:dict, x_offset:int = 0, y_offset:int = 0) -> pd.DataFrame: +def _chunk_to_df( + block: np.ndarray, block_info: dict, x_offset: int = 0, y_offset: int = 0 +) -> pd.DataFrame: # block: np.ndarray (h, w, C) for this chunk (no halos here; use after stitching) info = block_info[0] if 0 in block_info else block_info[None] - (r0, r1), (c0, c1), _ = info["array-location"] # global interior coords for this chunk + (r0, r1), (c0, c1), _ = info[ + "array-location" + ] # global interior coords for this chunk # find nonzeros per channel ys, xs, cs = np.nonzero(block) if ys.size == 0: DTYPES = { - "x": "uint32", # or "uint32" if you really want - "y": "uint32", + "x": "uint32", # or "uint32" if you really want + "y": "uint32", "type": "uint32", "prob": "float32", } return pd.DataFrame({k: pd.Series(dtype=v) for k, v in DTYPES.items()}) probs = block[ys, xs, cs].astype(np.float32, copy=False) - df = pd.DataFrame({ - "x": xs + c0 + int(x_offset), - "y": ys + r0 + int(y_offset), - "type": cs.astype(np.int64, copy=False), - "prob": probs, - }) + df = pd.DataFrame( + { + "x": xs + c0 + int(x_offset), + "y": ys + r0 + int(y_offset), + "type": cs.astype(np.int64, copy=False), + "prob": probs, + } + ) return df -def centroids_map_to_ddf_chunkwise(scores: da.Array, x_offset: int=0, y_offset: int=0) -> dd.DataFrame: +def centroids_map_to_ddf_chunkwise( + scores: da.Array, x_offset: int = 0, y_offset: int = 0 +) -> dd.DataFrame: # build one delayed pandas DF per chunk - dfs = scores.map_blocks( - _chunk_to_df, - dtype=object, # ignored; returning DataFrames - block_info=True, - x_offset=x_offset, - y_offset=y_offset, - ).to_delayed().ravel() - - meta = pd.DataFrame({"x": pd.Series([], dtype="uint32"), - "y": pd.Series([], dtype="uint32"), - "type": pd.Series([], dtype="uint32"), - "prob": pd.Series([], dtype="float32")}) + dfs = ( + scores.map_blocks( + _chunk_to_df, + dtype=object, # ignored; returning DataFrames + block_info=True, + x_offset=x_offset, + y_offset=y_offset, + ) + .to_delayed() + .ravel() + ) + + meta = pd.DataFrame( + { + "x": pd.Series([], dtype="uint32"), + "y": pd.Series([], dtype="uint32"), + "type": pd.Series([], dtype="uint32"), + "prob": pd.Series([], dtype="float32"), + } + ) ddf = dd.from_delayed(dfs, meta=meta) return ddf -def _chunk_to_records(block: np.ndarray, block_info - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: +def _chunk_to_records( + block: np.ndarray, block_info +) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: # block: (h, w, C) NumPy chunk (post-stitching, no halos) info = block_info[0] if 0 in block_info else block_info[None] (r0, r1), (c0, c1), _ = info["array-location"] # global interior start/stop @@ -192,10 +206,11 @@ def _chunk_to_records(block: np.ndarray, block_info p = block[ys, xs, cs].astype(np.float32, copy=False) return (x, y, t, p) + def _write_records_to_store( - recs: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], + recs: tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], store: SQLiteStore, - scale_factor: Tuple[float, float], + scale_factor: tuple[float, float], class_dict: dict | None, batch_size: int = 5000, ) -> int: @@ -214,7 +229,8 @@ def _write_records_to_store( class_dict = {int(k): int(k) for k in uniq} labels = np.array([class_dict.get(int(k), int(k)) for k in t], dtype=object) - def make_points(xb, yb): return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] + def make_points(xb, yb): + return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] written = 0 for i in range(0, n, batch_size): @@ -222,28 +238,31 @@ def make_points(xb, yb): return [Point(int(xx), int(yy)) for xx, yy in zip(xb, y pts = make_points(x[i:j], y[i:j]) anns = [ - Annotation(geometry=pt, - properties={"type": lbl, "probability": float(pp)}) + Annotation(geometry=pt, properties={"type": lbl, "probability": float(pp)}) for pt, lbl, pp in zip(pts, labels[i:j], p[i:j]) ] store.append_many(anns) - written += (j - i) + written += j - i return written def write_centroids_to_store( scores: da.Array, - scale_factor:tuple[float, float] = (1.0, 1.0), + scale_factor: tuple[float, float] = (1.0, 1.0), class_dict: dict | None = None, save_path: Path | None = None, - batch_size: int = 5000) -> Path | SQLiteStore: - + batch_size: int = 5000, +) -> Path | SQLiteStore: # one delayed record-tuple per chunk - recs_delayed = scores.map_blocks( - _chunk_to_records, - dtype=object, # we return Python tuples - block_info=True, - ).to_delayed().ravel() + recs_delayed = ( + scores.map_blocks( + _chunk_to_records, + dtype=object, # we return Python tuples + block_info=True, + ) + .to_delayed() + .ravel() + ) store = SQLiteStore() @@ -272,9 +291,6 @@ def write_centroids_to_store( return store - - - class NucleusDetector(SemanticSegmentor): r"""Nucleus detection engine. @@ -387,6 +403,7 @@ def post_process_wsi( raw_predictions (da.Array): The raw predictions from the model. prediction_shape (tuple[int, ...]): The shape of the predictions. prediction_dtype (type): The data type of the predictions. + Returns: Post-processed dask array of detections at the WSI level. The array has the same shape and dtype as the input. @@ -408,7 +425,7 @@ def post_process_wsi( (self.model.postproc_tile_shape[0], self.model.postproc_tile_shape[1], -1) ) logger.info(f"Post-processing chunk size: {rechunked_prediction_map.chunks}") - + detection_map = da.map_overlap( rechunked_prediction_map, self.model.postproc, @@ -476,7 +493,6 @@ def save_predictions( logger.info("Saving predictions as AnnotationStore.") - scale_factor = kwargs.get("scale_factor", (1.0, 1.0)) class_dict = kwargs.get("class_dict") diff --git a/tiatoolbox/utils/misc.py b/tiatoolbox/utils/misc.py index 1d094b6d1..b6081cb74 100644 --- a/tiatoolbox/utils/misc.py +++ b/tiatoolbox/utils/misc.py @@ -1340,7 +1340,6 @@ def process_contours( return annotations_list - def df_to_store_nucleus_detector( df: pd.DataFrame, scale_factor: tuple[float, float], From 7eb916e9e4de3596336e958f466ba28391bbe7b2 Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Wed, 12 Nov 2025 17:58:23 +0000 Subject: [PATCH 12/26] refactor code --- tiatoolbox/models/architecture/mapde.py | 27 +- tiatoolbox/models/architecture/sccnn.py | 5 + tiatoolbox/models/engine/nucleus_detector.py | 424 +++++++------------ tiatoolbox/utils/misc.py | 86 +--- 4 files changed, 167 insertions(+), 375 deletions(-) diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index 88fec25a3..4d00b2e8d 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -243,7 +243,7 @@ def postproc( ) -> np.ndarray: """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). Builds a processed mask per channel, runs peak_local_max then - label+regionprops, and writes probability (mean_intensity) at centroid pixels. + writes 1.0 at centroid pixels. Keeps only centroids whose (row,col) lie in the interior window: rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) Returns same spatial shape as input block: (h_pad, w_pad, C), float32. @@ -251,21 +251,17 @@ def postproc( Args: block: NumPy array (H, W, C) with padded block data. block_info: Dask block info dict. - min_distance: Minimum distance in pixels between peaks. - threshold_abs: Minimum absolute threshold for peak detection. depth_h: Halo size in pixels for height (rows). depth_w: Halo size in pixels for width (cols). - calculate_probabilities: If True, write mean_intensity at centroids; - else write 1.0 at centroids. Returns: - out: NumPy array (H, W, C) with probabilities at centroids, 0 elsewhere. + out: NumPy array (H, W, C) with 1 at centroids, 0 elsewhere. """ H, W, C = block.shape - # --- derive core (pre-overlap) size for THIS block safely --- + # --- derive core (pre-overlap) size for THIS block --- info = block_info[0] - locs = info["array-location"] # [(r0,r1),(c0,c1),(ch0,ch1)] + locs = info["array-location"] # a list of (start, stop) coordinates per axis core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 core_w = int(locs[1][1] - locs[1][0]) @@ -287,20 +283,7 @@ def postproc( for r, c in coords: if (rmin <= r < rmax) and (cmin <= c < cmax): out[r, c, ch] = 1.0 - # pmask = probability_to_peak_map(img, self.min_distance, self.threshold_abs) - # if not pmask.any(): - # continue - - # lab = label(pmask) - # props = regionprops(lab, intensity_image=img) - - # for reg in props: - # r, c = reg.centroid # floats in padded-block coords - # if (rmin <= r < rmax) and (cmin <= c < cmax): - # rr = int(round(r)) - # cc = int(round(c)) - # if 0 <= rr < H and 0 <= cc < W: - # out[rr, cc, ch] = 1.0 + return out @staticmethod diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index f4a03051d..ed801b10f 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -18,6 +18,11 @@ from torch import nn from tiatoolbox import logger + +import torch +from skimage.feature import peak_local_max +from torch import nn + from tiatoolbox.models.models_abc import ModelABC diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 9b89dadc4..2b7ebc37f 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -7,7 +7,6 @@ import dask import dask.array as da -import dask.dataframe as dd import numpy as np import pandas as pd from dask.diagnostics.progress import ProgressBar @@ -22,275 +21,12 @@ SemanticSegmentorRunParams, ) from tiatoolbox.models.models_abc import ModelABC +from tiatoolbox.annotation.storage import SQLiteStore, Annotation if TYPE_CHECKING: # pragma: no cover from tiatoolbox.models.models_abc import ModelABC -def probability_to_peak_map( - img2d: np.ndarray, - min_distance: int, - threshold_abs: float, - threshold_rel: float = 0.0, -) -> np.ndarray: - """Build a boolean mask (H, W) of objects from a 2D probability map using peak_local_max. - - Args: - img2d (np.ndarray): 2D probability map. - min_distance (int): Minimum distance between peaks. - threshold_abs (float): Absolute threshold for peak detection. - threshold_rel (float, optional): Relative threshold for peak detection. Defaults to 0.0. - - Returns: - mask (np.ndarray): Boolean mask (H, W) with True at peak locations. - """ - H, W = img2d.shape - mask = np.zeros((H, W), dtype=bool) - coords = peak_local_max( - img2d, - min_distance=min_distance, - threshold_abs=threshold_abs, - threshold_rel=threshold_rel, - exclude_border=False, - ) - if coords.size: - r, c = coords[:, 0], coords[:, 1] - mask[r, c] = True - return mask - - -# def peak_detection_mapoverlap( -# block: np.ndarray, -# block_info, -# min_distance: int, -# threshold_abs: float, -# depth_h: int, -# depth_w: int, -# ) -> np.ndarray: -# """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). -# Builds a processed mask per channel, runs peak_local_max then -# label+regionprops, and writes probability (mean_intensity) at centroid pixels. -# Keeps only centroids whose (row,col) lie in the interior window: -# rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) -# Returns same spatial shape as input block: (h_pad, w_pad, C), float32. - -# Args: -# block: NumPy array (H, W, C) with padded block data. -# block_info: Dask block info dict. -# min_distance: Minimum distance in pixels between peaks. -# threshold_abs: Minimum absolute threshold for peak detection. -# depth_h: Halo size in pixels for height (rows). -# depth_w: Halo size in pixels for width (cols). -# calculate_probabilities: If True, write mean_intensity at centroids; -# else write 1.0 at centroids. - -# Returns: -# out: NumPy array (H, W, C) with probabilities at centroids, 0 elsewhere. -# """ -# H, W, C = block.shape - -# # --- derive core (pre-overlap) size for THIS block safely --- -# info = block_info[0] -# locs = info["array-location"] # [(r0,r1),(c0,c1),(ch0,ch1)] -# core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 -# core_w = int(locs[1][1] - locs[1][0]) - -# rmin, rmax = depth_h, depth_h + core_h -# cmin, cmax = depth_w, depth_w + core_w - -# out = np.zeros((H, W, C), dtype=np.float32) - -# for ch in range(C): -# img = np.asarray(block[..., ch]) # NumPy 2D view -# pmask = probability_to_peak_map(img, min_distance, threshold_abs) -# if not pmask.any(): -# continue - -# lab = label(pmask) -# props = regionprops(lab, intensity_image=img) - -# for reg in props: -# r, c = reg.centroid # floats in padded-block coords -# if (rmin <= r < rmax) and (cmin <= c < cmax): -# rr = int(round(r)) -# cc = int(round(c)) -# if 0 <= rr < H and 0 <= cc < W: -# if calculate_probabilities: -# out[rr, cc, ch] = float(reg.mean_intensity) -# else: -# out[rr, cc, ch] = 1.0 - -# return out - - -def _chunk_to_df( - block: np.ndarray, block_info: dict, x_offset: int = 0, y_offset: int = 0 -) -> pd.DataFrame: - # block: np.ndarray (h, w, C) for this chunk (no halos here; use after stitching) - info = block_info[0] if 0 in block_info else block_info[None] - (r0, r1), (c0, c1), _ = info[ - "array-location" - ] # global interior coords for this chunk - - # find nonzeros per channel - ys, xs, cs = np.nonzero(block) - if ys.size == 0: - DTYPES = { - "x": "uint32", # or "uint32" if you really want - "y": "uint32", - "type": "uint32", - "prob": "float32", - } - return pd.DataFrame({k: pd.Series(dtype=v) for k, v in DTYPES.items()}) - - probs = block[ys, xs, cs].astype(np.float32, copy=False) - df = pd.DataFrame( - { - "x": xs + c0 + int(x_offset), - "y": ys + r0 + int(y_offset), - "type": cs.astype(np.int64, copy=False), - "prob": probs, - } - ) - return df - - -def centroids_map_to_ddf_chunkwise( - scores: da.Array, x_offset: int = 0, y_offset: int = 0 -) -> dd.DataFrame: - # build one delayed pandas DF per chunk - dfs = ( - scores.map_blocks( - _chunk_to_df, - dtype=object, # ignored; returning DataFrames - block_info=True, - x_offset=x_offset, - y_offset=y_offset, - ) - .to_delayed() - .ravel() - ) - - meta = pd.DataFrame( - { - "x": pd.Series([], dtype="uint32"), - "y": pd.Series([], dtype="uint32"), - "type": pd.Series([], dtype="uint32"), - "prob": pd.Series([], dtype="float32"), - } - ) - ddf = dd.from_delayed(dfs, meta=meta) - return ddf - - -def _chunk_to_records( - block: np.ndarray, block_info -) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - # block: (h, w, C) NumPy chunk (post-stitching, no halos) - info = block_info[0] if 0 in block_info else block_info[None] - (r0, r1), (c0, c1), _ = info["array-location"] # global interior start/stop - - ys, xs, cs = np.nonzero(block) - if ys.size == 0: - # return empty, dtype-stable arrays to avoid surprises - return ( - np.empty(0, dtype=np.uint32), - np.empty(0, dtype=np.uint32), - np.empty(0, dtype=np.uint32), - np.empty(0, dtype=np.float32), - ) - - x = xs.astype(np.uint32, copy=False) + int(c0) - y = ys.astype(np.uint32, copy=False) + int(r0) - t = cs.astype(np.uint32, copy=False) - p = block[ys, xs, cs].astype(np.float32, copy=False) - return (x, y, t, p) - - -def _write_records_to_store( - recs: tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], - store: SQLiteStore, - scale_factor: tuple[float, float], - class_dict: dict | None, - batch_size: int = 5000, -) -> int: - x, y, t, p = recs - n = len(x) - if n == 0: - return 0 # nothing to write - - x = np.rint(x * scale_factor[0]).astype(np.uint32, copy=False) - y = np.rint(y * scale_factor[1]).astype(np.uint32, copy=False) - - # class mapping - if class_dict is None: - # identity over actually-present types - uniq = np.unique(t) - class_dict = {int(k): int(k) for k in uniq} - labels = np.array([class_dict.get(int(k), int(k)) for k in t], dtype=object) - - def make_points(xb, yb): - return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] - - written = 0 - for i in range(0, n, batch_size): - j = min(i + batch_size, n) - pts = make_points(x[i:j], y[i:j]) - - anns = [ - Annotation(geometry=pt, properties={"type": lbl, "probability": float(pp)}) - for pt, lbl, pp in zip(pts, labels[i:j], p[i:j]) - ] - store.append_many(anns) - written += j - i - return written - - -def write_centroids_to_store( - scores: da.Array, - scale_factor: tuple[float, float] = (1.0, 1.0), - class_dict: dict | None = None, - save_path: Path | None = None, - batch_size: int = 5000, -) -> Path | SQLiteStore: - # one delayed record-tuple per chunk - recs_delayed = ( - scores.map_blocks( - _chunk_to_records, - dtype=object, # we return Python tuples - block_info=True, - ) - .to_delayed() - .ravel() - ) - - store = SQLiteStore() - - # one delayed writer per chunk (returns number written) - writes = [ - dask.delayed(_write_records_to_store)( - recs, store, scale_factor, class_dict, batch_size - ) - for recs in recs_delayed - ] - - # IMPORTANT: SQLite is single-writer; run sequentially - with ProgressBar(): - total = dask.compute(*writes, scheduler="single-threaded") - logger.info(f"Total detections written to store: {sum(total)}") - # # if a save director is provided, then dump store into a file - if save_path: - # ensure parent directory exists - save_path.parent.absolute().mkdir(parents=True, exist_ok=True) - # ensure proper db extension - save_path = save_path.parent.absolute() / (save_path.stem + ".db") - store.commit() - store.dump(save_path) - return save_path - - return store - - class NucleusDetector(SemanticSegmentor): r"""Nucleus detection engine. @@ -445,7 +181,7 @@ def save_predictions( output_type: str, save_path: Path | None = None, **kwargs: Unpack[SemanticSegmentorRunParams], - ) -> AnnotationStore: + ) -> AnnotationStore | Path: """Save nucleus detections to disk or return them in memory. This method saves predictions in one of the supported formats: @@ -504,7 +240,7 @@ def save_predictions( else: output_path = save_path.parent / (str(i) + ".db") - out_file = write_centroids_to_store( + out_file = self.write_centroids_to_store( predictions, scale_factor=scale_factor, class_dict=class_dict, @@ -513,7 +249,7 @@ def save_predictions( save_paths.append(out_file) return save_paths - return write_centroids_to_store( + return self.write_centroids_to_store( processed_predictions["predictions"], scale_factor=scale_factor, save_path=save_path, @@ -595,3 +331,155 @@ def nucleus_detection_nms( kept = sub.iloc[keep_idx].copy() return kept + + @staticmethod + def _chunk_to_records( + block: np.ndarray, + block_info: dict + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """Convert a Dask block of detection maps to detection records. + + Each block is a NumPy array of shape (h, w, C) containing detection scores + of each class c. This function finds non-zero detections and returns their + global coordinates, class IDs (channel), and probabilities. + + Args: + block: NumPy array (h, w, C) for this chunk (no halos). + block_info: Dask block info dict. + Returns: + Tuple of ([x_coords], [y_coords], [class_ids], [probs]) + """ + + # block: (h, w, C) NumPy chunk (post-stitching, no halos) + info = block_info[0] if 0 in block_info else block_info[None] + (r0, r1), (c0, c1), _ = info["array-location"] # global interior start/stop + + # find the coordinates and channel indices of nonzeros + ys, xs, cs = np.nonzero(block) + + if ys.size == 0: + # return empty arrays + return ( + np.empty(0, dtype=np.uint32), + np.empty(0, dtype=np.uint32), + np.empty(0, dtype=np.uint32), + np.empty(0, dtype=np.float32), + ) + + x = xs.astype(np.uint32, copy=False) + int(c0) + y = ys.astype(np.uint32, copy=False) + int(r0) + t = cs.astype(np.uint32, copy=False) + + # read detection probabilities + p = block[ys, xs, cs].astype(np.float32, copy=False) + return (x, y, t, p) + + + @staticmethod + def _write_records_to_store( + recs: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], + store: SQLiteStore, + scale_factor: Tuple[float, float], + class_dict: dict[int, str | int] | None, + batch_size: int = 5000, + ) -> int: + """Write detection records to AnnotationStore in batches. + + Args: + recs: Tuple of ([x_coords], [y_coords], [class_ids], [probs]) + store: SQLiteStore to write the detections to + scale_factor: Scaling factors for x and y coordinates + class_dict: Mapping from original class IDs to new class names + batch_size: Number of records to write in each batch + Returns: + Total number of records written + """ + x, y, t, p = recs + n = len(x) + if n == 0: + return 0 # nothing to write + + # scale coordinates + x = np.rint(x * scale_factor[0]).astype(np.uint32, copy=False) + y = np.rint(y * scale_factor[1]).astype(np.uint32, copy=False) + + # class mapping + if class_dict is None: + # identity over actually-present types + uniq = np.unique(t) + class_dict = {int(k): int(k) for k in uniq} + labels = np.array([class_dict.get(int(k), int(k)) for k in t], dtype=object) + + def make_points(xb, yb): return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] + + written = 0 + for i in range(0, n, batch_size): + j = min(i + batch_size, n) + pts = make_points(x[i:j], y[i:j]) + + anns = [ + Annotation(geometry=pt, + properties={"type": lbl, "probability": float(pp)}) + for pt, lbl, pp in zip(pts, labels[i:j], p[i:j]) + ] + store.append_many(anns) + written += (j - i) + return written + + + @staticmethod + def write_centroids_to_store( + detection_maps: da.Array, + scale_factor: tuple[float, float] = (1.0, 1.0), + class_dict: dict | None = None, + save_path: Path | None = None, + batch_size: int = 5000 + ) -> Path | SQLiteStore: + """Write post-processed detection maps to an AnnotationStore. + This is done in chunks using Dask for efficiency and to handle large + detection maps at WSI level. + + Args: + detection_maps: Dask array (H, W, C) of detection scores. + scale_factor: Tuple (sx, sy) to scale coordinates before saving. + class_dict: Optional dict mapping class indices to names. + save_path: Optional Path to save the .db file. If None, returns in-memory store. + batch_size: Number of records to write per batch. + Returns: + Path to saved .db file if save_path is provided, else in-memory SQLiteStore. + """ + + # Convert each block to detection records first + # [block_H, block_W, C] -> [xs, ys, classes, probs] + # one delayed record-tuple per chunk + recs_delayed = detection_maps.map_blocks( + NucleusDetector._chunk_to_records, + dtype=object, # we return Python tuples + block_info=True, + ).to_delayed().ravel() + + # create annotation store + store = SQLiteStore() + + # one delayed writer per chunk (returns number of detections written) + writes = [ + dask.delayed(NucleusDetector._write_records_to_store)( + recs, store, scale_factor, class_dict, batch_size + ) + for recs in recs_delayed + ] + + # IMPORTANT: SQLite is single-writer; run sequentially + with ProgressBar(): + total = dask.compute(*writes, scheduler="single-threaded") + logger.info(f"Total detections written to store: {sum(total)}") + + # if a save directory is provided, then dump store into a file + if save_path: + save_path.parent.absolute().mkdir(parents=True, exist_ok=True) + save_path = save_path.parent.absolute() / (save_path.stem + ".db") + store.commit() + store.dump(save_path) + return save_path + + return store diff --git a/tiatoolbox/utils/misc.py b/tiatoolbox/utils/misc.py index b6081cb74..56c6a3ea7 100644 --- a/tiatoolbox/utils/misc.py +++ b/tiatoolbox/utils/misc.py @@ -21,7 +21,7 @@ import zarr from filelock import FileLock from shapely.affinity import translate -from shapely.geometry import Point, Polygon +from shapely.geometry import Polygon from shapely.geometry import shape as feature2geometry from skimage import exposure from tqdm import notebook as tqdm_notebook @@ -1340,90 +1340,6 @@ def process_contours( return annotations_list -def df_to_store_nucleus_detector( - df: pd.DataFrame, - scale_factor: tuple[float, float], - save_path: Path | None = None, - class_dict: dict | None = None, - batch_size: int = 50_000, -) -> SQLiteStore | Path: - """Convert a pandas DataFrame with columns ['x','y','type','prob'] - into an Annotation SQLiteStore efficiently using append_many(). - - Args: - df (pd.DataFrame): - A pandas DataFrame with columns ['x','y','type','prob']. - save_path (Path, optional): - Optional Output directory to save the Annotation - Store results. - scale_factor (tuple[float, float]): - The scale factor to use when saving the - annotations. All coordinates will be multiplied by this factor to allow - conversion of annotations saved at non-baseline resolution to baseline. - Should be model_mpp/slide_mpp. - class_dict (dict): - Optional dictionary mapping class indices to class names. - batch_size (int): - Number of annotations to process in each batch. - - Returns: - (SQLiteStore or Path): - An SQLiteStore containing Annotations for each nucleus - or Path to file storing SQLiteStore containing Annotations - for each nucleus. - """ - # 1) Select & coerce dtypes once (compact + avoids per-row casts) - x = df["x"].to_numpy(dtype=np.int64, copy=False) - y = df["y"].to_numpy(dtype=np.int64, copy=False) - t = df["type"].to_numpy(dtype=np.int64, copy=False) - p = df["prob"].to_numpy(dtype=np.float32, copy=False) - - x_scaled = np.rint(x * scale_factor[0]).astype(np.int64, copy=False) - y_scaled = np.rint(y * scale_factor[1]).astype(np.int64, copy=False) - - store = SQLiteStore() - - def make_points(xb, yb): - return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] - - if class_dict is None: - # identity over the actually present types (robust if types aren't 0..K) - unique_types = np.unique(t) - class_dict = {int(k): int(k) for k in unique_types} - - n = len(df) - for i in range(0, n, batch_size): - j = min(i + batch_size, n) - xb, yb, tb, pb = x_scaled[i:j], y_scaled[i:j], t[i:j], p[i:j] - - pts = make_points(xb, yb) # array/list of Points - - anns = [ - Annotation( - geometry=pt, - properties={ - "type": class_dict.get(int(tt), int(tt)), - "probability": float(pp), - }, - ) - for pt, tt, pp in zip(pts, tb, pb) - ] - - store.append_many(anns) - - # # if a save director is provided, then dump store into a file - if save_path: - # ensure parent directory exists - save_path.parent.absolute().mkdir(parents=True, exist_ok=True) - # ensure proper db extension - save_path = save_path.parent.absolute() / (save_path.stem + ".db") - store.commit() - store.dump(save_path) - return save_path - - return store - - def dict_to_store_semantic_segmentor( patch_output: dict | zarr.Group, scale_factor: tuple[float, float], From 8442ac2876bd9815ce7cdda5bc8f9aeb7e05eba9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Nov 2025 17:58:53 +0000 Subject: [PATCH 13/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tiatoolbox/models/architecture/sccnn.py | 5 --- tiatoolbox/models/engine/nucleus_detector.py | 47 ++++++++++---------- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index ed801b10f..f4a03051d 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -18,11 +18,6 @@ from torch import nn from tiatoolbox import logger - -import torch -from skimage.feature import peak_local_max -from torch import nn - from tiatoolbox.models.models_abc import ModelABC diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 2b7ebc37f..4c020d3f1 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -11,7 +11,6 @@ import pandas as pd from dask.diagnostics.progress import ProgressBar from shapely.geometry import Point -from skimage.feature import peak_local_max from tiatoolbox import logger from tiatoolbox.annotation import AnnotationStore @@ -21,7 +20,6 @@ SemanticSegmentorRunParams, ) from tiatoolbox.models.models_abc import ModelABC -from tiatoolbox.annotation.storage import SQLiteStore, Annotation if TYPE_CHECKING: # pragma: no cover from tiatoolbox.models.models_abc import ModelABC @@ -331,25 +329,24 @@ def nucleus_detection_nms( kept = sub.iloc[keep_idx].copy() return kept - + @staticmethod def _chunk_to_records( - block: np.ndarray, - block_info: dict + block: np.ndarray, block_info: dict ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Convert a Dask block of detection maps to detection records. - Each block is a NumPy array of shape (h, w, C) containing detection scores + Each block is a NumPy array of shape (h, w, C) containing detection scores of each class c. This function finds non-zero detections and returns their global coordinates, class IDs (channel), and probabilities. Args: block: NumPy array (h, w, C) for this chunk (no halos). block_info: Dask block info dict. + Returns: Tuple of ([x_coords], [y_coords], [class_ids], [probs]) """ - # block: (h, w, C) NumPy chunk (post-stitching, no halos) info = block_info[0] if 0 in block_info else block_info[None] (r0, r1), (c0, c1), _ = info["array-location"] # global interior start/stop @@ -369,12 +366,11 @@ def _chunk_to_records( x = xs.astype(np.uint32, copy=False) + int(c0) y = ys.astype(np.uint32, copy=False) + int(r0) t = cs.astype(np.uint32, copy=False) - + # read detection probabilities p = block[ys, xs, cs].astype(np.float32, copy=False) return (x, y, t, p) - @staticmethod def _write_records_to_store( recs: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], @@ -384,7 +380,7 @@ def _write_records_to_store( batch_size: int = 5000, ) -> int: """Write detection records to AnnotationStore in batches. - + Args: recs: Tuple of ([x_coords], [y_coords], [class_ids], [probs]) store: SQLiteStore to write the detections to @@ -410,7 +406,8 @@ def _write_records_to_store( class_dict = {int(k): int(k) for k in uniq} labels = np.array([class_dict.get(int(k), int(k)) for k in t], dtype=object) - def make_points(xb, yb): return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] + def make_points(xb, yb): + return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] written = 0 for i in range(0, n, batch_size): @@ -418,25 +415,25 @@ def make_points(xb, yb): return [Point(int(xx), int(yy)) for xx, yy in zip(xb, y pts = make_points(x[i:j], y[i:j]) anns = [ - Annotation(geometry=pt, - properties={"type": lbl, "probability": float(pp)}) + Annotation( + geometry=pt, properties={"type": lbl, "probability": float(pp)} + ) for pt, lbl, pp in zip(pts, labels[i:j], p[i:j]) ] store.append_many(anns) - written += (j - i) + written += j - i return written - @staticmethod def write_centroids_to_store( detection_maps: da.Array, scale_factor: tuple[float, float] = (1.0, 1.0), class_dict: dict | None = None, save_path: Path | None = None, - batch_size: int = 5000 + batch_size: int = 5000, ) -> Path | SQLiteStore: """Write post-processed detection maps to an AnnotationStore. - This is done in chunks using Dask for efficiency and to handle large + This is done in chunks using Dask for efficiency and to handle large detection maps at WSI level. Args: @@ -445,18 +442,22 @@ def write_centroids_to_store( class_dict: Optional dict mapping class indices to names. save_path: Optional Path to save the .db file. If None, returns in-memory store. batch_size: Number of records to write per batch. + Returns: Path to saved .db file if save_path is provided, else in-memory SQLiteStore. """ - # Convert each block to detection records first # [block_H, block_W, C] -> [xs, ys, classes, probs] # one delayed record-tuple per chunk - recs_delayed = detection_maps.map_blocks( - NucleusDetector._chunk_to_records, - dtype=object, # we return Python tuples - block_info=True, - ).to_delayed().ravel() + recs_delayed = ( + detection_maps.map_blocks( + NucleusDetector._chunk_to_records, + dtype=object, # we return Python tuples + block_info=True, + ) + .to_delayed() + .ravel() + ) # create annotation store store = SQLiteStore() From de830741fe711430c3fa1ddc52e9888c2bcd8153 Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Wed, 12 Nov 2025 18:31:02 +0000 Subject: [PATCH 14/26] clean up --- requirements/requirements.txt | 1 - tiatoolbox/models/architecture/mapde.py | 11 +-- tiatoolbox/models/architecture/sccnn.py | 94 +++++++++++-------------- 3 files changed, 49 insertions(+), 57 deletions(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 1282748bf..045a4ce4e 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -22,7 +22,6 @@ openslide-bin>=4.0.0.2 openslide-python>=1.4.0 pandas>=2.0.0 pillow>=9.3.0 -pyarrow>=14.0.1 pydicom>=2.3.1 # Used by wsidicom pyyaml>=6.0 requests>=2.28.1 diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index 4d00b2e8d..f3d27461c 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -78,7 +78,7 @@ def __init__( min_distance: int = 4, threshold_abs: float = 250, num_classes: int = 1, - postproc_tile_shape: list[int] = [2048, 2048], + postproc_tile_shape: tuple[int, int] = (2048, 2048), ) -> None: """Initialize :class:`MapDe`.""" super().__init__( @@ -86,7 +86,9 @@ def __init__( num_input_channels=num_input_channels, out_activation="relu", ) + self.postproc_tile_shape = postproc_tile_shape + dist_filter = np.array( [ [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], @@ -242,6 +244,7 @@ def postproc( depth_w: int, ) -> np.ndarray: """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). + Builds a processed mask per channel, runs peak_local_max then writes 1.0 at centroid pixels. Keeps only centroids whose (row,col) lie in the interior window: @@ -257,7 +260,7 @@ def postproc( Returns: out: NumPy array (H, W, C) with 1 at centroids, 0 elsewhere. """ - H, W, C = block.shape + block_height, block_width, block_channels = block.shape # --- derive core (pre-overlap) size for THIS block --- info = block_info[0] @@ -268,9 +271,9 @@ def postproc( rmin, rmax = depth_h, depth_h + core_h cmin, cmax = depth_w, depth_w + core_w - out = np.zeros((H, W, C), dtype=np.float32) + out = np.zeros((block_height, block_width, block_channels), dtype=np.float32) - for ch in range(C): + for ch in range(block_channels): img = np.asarray(block[..., ch]) # NumPy 2D view coords = peak_local_max( diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index f4a03051d..0bf4e0145 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -11,13 +11,11 @@ from collections import OrderedDict -import dask.array as da import numpy as np -import pandas as pd import torch +from skimage.feature import peak_local_max from torch import nn -from tiatoolbox import logger from tiatoolbox.models.models_abc import ModelABC @@ -330,63 +328,57 @@ def spatially_constrained_layer1( return self.spatially_constrained_layer2(s1_sigmoid0, s1_sigmoid1, s1_sigmoid2) def postproc( - self: SCCNN, prediction_map: da.Array, prediction_shape: tuple, dtype: np.dtype - ) -> pd.DataFrame: - """Post-processing script for SCCNN. + self: SCCNN, + block: np.ndarray, + block_info: dict, + depth_h: int, + depth_w: int, + ) -> np.ndarray: + """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). - Post-process predicted probability map of the input image. - Performs peak detection, then non-maximum suppression. - Returns a pandas DataFrame containing detected nuclei coordinates [x, y, type, prob]. + Builds a processed mask per channel, runs peak_local_max then + writes 1.0 at centroid pixels. + Keeps only centroids whose (row,col) lie in the interior window: + rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) + Returns same spatial shape as input block: (h_pad, w_pad, C), float32. Args: - prediction_map (da.array): - Predicted probability map (HxWx1) of the entire input image. - prediction_shape (tuple): - Shape of the prediction map. - dtype (np.dtype): - Data type of the prediction map. + block: NumPy array (H, W, C) with padded block data. + block_info: Dask block info dict. + depth_h: Halo size in pixels for height (rows). + depth_w: Halo size in pixels for width (cols). Returns: - detected_nuclei (pandas.DataFrame): - Detected nuclei coordinates stored in a pandas DataFrame. - + out: NumPy array (H, W, C) with 1 at centroids, 0 elsewhere. """ - depth = {0: self.min_distance, 1: self.min_distance, 2: 0} + block_height, block_width, block_channels = block.shape - # print("maxmin debug:") # --- DEBUG --- - # lazy_max = prediction_map.max() - # max_value = lazy_max.compute() - # lazy_min = prediction_map.min() - # min_value = lazy_min.compute() - # print(f"lazy_max: {max_value}, lazy_min: {min_value}") + # --- derive core (pre-overlap) size for THIS block --- + info = block_info[0] + locs = info["array-location"] # a list of (start, stop) coordinates per axis + core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 + core_w = int(locs[1][1] - locs[1][0]) - rechunked_prediction_map = prediction_map.rechunk( - (self.postproc_tile_shape[0], self.postproc_tile_shape[1], -1) - ) - print(f"rechunked_prediction_map.shape: {rechunked_prediction_map.shape}") - print(f"rechunked_prediction_map.chunks: {rechunked_prediction_map.chunks}") - - scores = da.map_overlap( - rechunked_prediction_map, - peak_detection_mapoverlap, - depth=depth, - boundary=0, - dtype=dtype, - block_info=True, - min_distance=self.min_distance, - threshold_abs=self.threshold_abs, - depth_h=self.min_distance, - depth_w=self.min_distance, - calculate_probabilities=False, - ) - ddf = centroids_map_to_dask_dataframe(scores, x_offset=0, y_offset=0) - pandas_df = ddf.compute() + rmin, rmax = depth_h, depth_h + core_h + cmin, cmax = depth_w, depth_w + core_w + + out = np.zeros((block_height, block_width, block_channels), dtype=np.float32) + + for ch in range(block_channels): + img = np.asarray(block[..., ch]) # NumPy 2D view + + coords = peak_local_max( + img, + min_distance=self.min_distance, + threshold_abs=self.threshold_abs, + exclude_border=False, + ) - logger.info(f"Total detections before NMS: {len(pandas_df)}") - detected_nuclei = nucleus_detection_nms(pandas_df, radius=self.min_distance) - logger.info(f"Total detections after NMS: {len(detected_nuclei)}") + for r, c in coords: + if (rmin <= r < rmax) and (cmin <= c < cmax): + out[r, c, ch] = 1.0 - return detected_nuclei + return out @staticmethod def infer_batch( @@ -426,6 +418,4 @@ def infer_batch( pred = model(patch_imgs_gpu) pred = pred.permute(0, 2, 3, 1).contiguous() - if torch.max(pred) > 0: - print(torch.max(pred), torch.min(pred)) # --- DEBUG --- return pred.cpu().numpy() From f5b18853875fb1734e90620d1da6eb9cb199c376 Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Sat, 22 Nov 2025 14:49:28 +0000 Subject: [PATCH 15/26] update patch mode processing --- test.py | 34 +++++++++++---- tiatoolbox/data/pretrained_model.yaml | 2 - tiatoolbox/models/architecture/mapde.py | 44 ++++++++++++-------- tiatoolbox/models/architecture/sccnn.py | 44 ++++++++++++-------- tiatoolbox/models/engine/nucleus_detector.py | 10 +++-- 5 files changed, 87 insertions(+), 47 deletions(-) diff --git a/test.py b/test.py index 856a0ab91..9460439a4 100644 --- a/test.py +++ b/test.py @@ -1,26 +1,46 @@ -import pathlib +from pathlib import Path from tiatoolbox.models.engine.nucleus_detector import ( NucleusDetector, ) from tiatoolbox.utils import env_detection as toolbox_env +from tiatoolbox.wsicore.wsireader import WSIReader -ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() +from tiatoolbox.utils.misc import imwrite +ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() if __name__ == "__main__": - # model_name = "sccnn-crchisto" - model_name = "mapde-conic" + model_name = "sccnn-crchisto" + # model_name = "mapde-conic" + + + # test_image_path = "/media/u1910100/data/slides/CMU-1-Small-Region.svs" + # reader = WSIReader.open(test_image_path) + + # patch_1 = reader.read_region((1500, 1500), level=0, size=(31, 31)) + + # imwrite(Path("/media/u1910100/data/slides/patch_1.png"), patch_1) + + # patch_2 = reader.read_region((1000, 1000), level=0, size=(31, 31)) + # imwrite(Path("/media/u1910100/data/slides/patch_2.png"), patch_2) + + # patches = [ + # Path("/media/u1910100/data/slides/patch_1.png"), + # Path("/media/u1910100/data/slides/patch_2.png"), + # ] + detector = NucleusDetector(model=model_name, batch_size=16, num_workers=8) detector.run( - images=[pathlib.Path("/media/u1910100/data/slides/CMU-1-Small-Region.svs")], + images=[Path("/media/u1910100/data/slides/wsi1_2k_2k.svs")], + # images=patches, patch_mode=False, device="cuda", - save_dir=pathlib.Path("/media/u1910100/data/overlays/test"), + save_dir=Path("/media/u1910100/data/overlays/test"), overwrite=True, output_type="annotationstore", class_dict={0: "nucleus"}, auto_get_mask=True, - memory_threshold=80, + memory_threshold=70, ) diff --git a/tiatoolbox/data/pretrained_model.yaml b/tiatoolbox/data/pretrained_model.yaml index 1a948de1d..048f41337 100644 --- a/tiatoolbox/data/pretrained_model.yaml +++ b/tiatoolbox/data/pretrained_model.yaml @@ -867,7 +867,6 @@ sccnn-crchisto: - { "units": "mpp", "resolution": 0.25 } output_resolutions: - { "units": "mpp", "resolution": 0.25 } - tile_shape: [ 2048, 2048 ] patch_input_shape: [ 31, 31 ] patch_output_shape: [ 13, 13 ] stride_shape: [ 8, 8 ] @@ -891,7 +890,6 @@ sccnn-conic: - { "units": "mpp", "resolution": 0.25 } output_resolutions: - { "units": "mpp", "resolution": 0.25 } - tile_shape: [ 2048, 2048 ] patch_input_shape: [ 31, 31 ] patch_output_shape: [ 13, 13 ] stride_shape: [ 8, 8 ] diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index f3d27461c..d02c5a6c3 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -239,34 +239,44 @@ def forward(self: MapDe, input_tensor: torch.Tensor) -> torch.Tensor: def postproc( self: MapDe, block: np.ndarray, - block_info: dict, - depth_h: int, - depth_w: int, + block_info: dict | None = None, + depth_h: int = 0, + depth_w: int = 0, ) -> np.ndarray: - """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). + """ MapDe post-processing function. - Builds a processed mask per channel, runs peak_local_max then - writes 1.0 at centroid pixels. - Keeps only centroids whose (row,col) lie in the interior window: + Builds a processed mask per input channel, runs peak_local_max then + writes 1.0 at peak pixels. + + Can be called inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C) + to process large prediction maps in chunks. Keeps only centroids whose (row,col) + lie in the interior window: rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) - Returns same spatial shape as input block: (h_pad, w_pad, C), float32. + + Returns same spatial shape as the input block Args: - block: NumPy array (H, W, C) with padded block data. - block_info: Dask block info dict. - depth_h: Halo size in pixels for height (rows). - depth_w: Halo size in pixels for width (cols). + block: NumPy array (H, W, C). + block_info: Dask block info dict. Only used when called inside dask.array.map_overlap. + depth_h: Halo size in pixels for height (rows). + Only used when it's called inside dask.array.map_overlap. + depth_w: Halo size in pixels for width (cols). + Only used when it's called inside dask.array.map_overlap. Returns: - out: NumPy array (H, W, C) with 1 at centroids, 0 elsewhere. + out: NumPy array (H, W, C) with 1.0 at peaks, 0 elsewhere. """ block_height, block_width, block_channels = block.shape # --- derive core (pre-overlap) size for THIS block --- - info = block_info[0] - locs = info["array-location"] # a list of (start, stop) coordinates per axis - core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 - core_w = int(locs[1][1] - locs[1][0]) + if block_info is None: + core_h = block_height - 2 * depth_h + core_w = block_width - 2 * depth_w + else: + info = block_info[0] + locs = info["array-location"] # a list of (start, stop) coordinates per axis + core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 + core_w = int(locs[1][1] - locs[1][0]) rmin, rmax = depth_h, depth_h + core_h cmin, cmax = depth_w, depth_w + core_w diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index 9700d6884..55d160f50 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -329,34 +329,44 @@ def spatially_constrained_layer1( def postproc( self: SCCNN, block: np.ndarray, - block_info: dict, - depth_h: int, - depth_w: int, + block_info: dict | None = None, + depth_h: int = 0, + depth_w: int = 0, ) -> np.ndarray: - """Runs inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C). + """ SCCNN post-processing function. - Builds a processed mask per channel, runs peak_local_max then - writes 1.0 at centroid pixels. - Keeps only centroids whose (row,col) lie in the interior window: + Builds a processed mask per input channel, runs peak_local_max then + writes 1.0 at peak pixels. + + Can be called inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C) + to process large prediction maps in chunks. Keeps only centroids whose (row,col) + lie in the interior window: rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) - Returns same spatial shape as input block: (h_pad, w_pad, C), float32. + + Returns same spatial shape as the input block Args: - block: NumPy array (H, W, C) with padded block data. - block_info: Dask block info dict. - depth_h: Halo size in pixels for height (rows). - depth_w: Halo size in pixels for width (cols). + block: NumPy array (H, W, C). + block_info: Dask block info dict. Only used when called inside dask.array.map_overlap. + depth_h: Halo size in pixels for height (rows). + Only used when it's called inside dask.array.map_overlap. + depth_w: Halo size in pixels for width (cols). + Only used when it's called inside dask.array.map_overlap. Returns: - out: NumPy array (H, W, C) with 1 at centroids, 0 elsewhere. + out: NumPy array (H, W, C) with 1.0 at peaks, 0 elsewhere. """ block_height, block_width, block_channels = block.shape # --- derive core (pre-overlap) size for THIS block --- - info = block_info[0] - locs = info["array-location"] # a list of (start, stop) coordinates per axis - core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 - core_w = int(locs[1][1] - locs[1][0]) + if block_info is None: + core_h = block_height - 2 * depth_h + core_w = block_width - 2 * depth_w + else: + info = block_info[0] + locs = info["array-location"] # a list of (start, stop) coordinates per axis + core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 + core_w = int(locs[1][1] - locs[1][0]) rmin, rmax = depth_h, depth_h + core_h cmin, cmax = depth_w, depth_w + core_w diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 4c020d3f1..f6b1817f3 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -96,11 +96,11 @@ def __init__( def post_process_patches( self: NucleusDetector, - raw_predictions: da.Array, + raw_predictions: list[da.Array], prediction_shape: tuple[int, ...], prediction_dtype: type, **kwargs: Unpack[SemanticSegmentorRunParams], - ) -> list[pd.DataFrame]: + ) -> list[np.ndarray]: """Define how to post-process patch predictions. Args: @@ -117,7 +117,7 @@ def post_process_patches( _ = prediction_dtype batch_predictions = [] - for i in range(raw_predictions.shape[0]): + for i in range(len(raw_predictions)): batch_predictions.append(self.model.postproc_func(raw_predictions[i])) return batch_predictions @@ -233,13 +233,15 @@ def save_predictions( if self.patch_mode: save_paths = [] for i, predictions in enumerate(processed_predictions["predictions"]): + predictions_da = da.from_array(predictions, chunks=predictions.shape) + if isinstance(self.images[i], Path): output_path = save_path.parent / (self.images[i].stem + ".db") else: output_path = save_path.parent / (str(i) + ".db") out_file = self.write_centroids_to_store( - predictions, + predictions_da, scale_factor=scale_factor, class_dict=class_dict, save_path=output_path, From 551e43c472527889f5a0a1b67d04d9a7f2545eb3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 22 Nov 2025 14:49:56 +0000 Subject: [PATCH 16/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test.py | 5 ----- tiatoolbox/models/architecture/mapde.py | 12 +++++++----- tiatoolbox/models/architecture/sccnn.py | 12 +++++++----- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/test.py b/test.py index 9460439a4..07fca9bd1 100644 --- a/test.py +++ b/test.py @@ -4,9 +4,6 @@ NucleusDetector, ) from tiatoolbox.utils import env_detection as toolbox_env -from tiatoolbox.wsicore.wsireader import WSIReader - -from tiatoolbox.utils.misc import imwrite ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() @@ -14,7 +11,6 @@ model_name = "sccnn-crchisto" # model_name = "mapde-conic" - # test_image_path = "/media/u1910100/data/slides/CMU-1-Small-Region.svs" # reader = WSIReader.open(test_image_path) @@ -30,7 +26,6 @@ # Path("/media/u1910100/data/slides/patch_2.png"), # ] - detector = NucleusDetector(model=model_name, batch_size=16, num_workers=8) detector.run( images=[Path("/media/u1910100/data/slides/wsi1_2k_2k.svs")], diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index d02c5a6c3..b60d39750 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -243,13 +243,13 @@ def postproc( depth_h: int = 0, depth_w: int = 0, ) -> np.ndarray: - """ MapDe post-processing function. + """MapDe post-processing function. Builds a processed mask per input channel, runs peak_local_max then writes 1.0 at peak pixels. Can be called inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C) - to process large prediction maps in chunks. Keeps only centroids whose (row,col) + to process large prediction maps in chunks. Keeps only centroids whose (row,col) lie in the interior window: rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) @@ -258,9 +258,9 @@ def postproc( Args: block: NumPy array (H, W, C). block_info: Dask block info dict. Only used when called inside dask.array.map_overlap. - depth_h: Halo size in pixels for height (rows). + depth_h: Halo size in pixels for height (rows). Only used when it's called inside dask.array.map_overlap. - depth_w: Halo size in pixels for width (cols). + depth_w: Halo size in pixels for width (cols). Only used when it's called inside dask.array.map_overlap. Returns: @@ -274,7 +274,9 @@ def postproc( core_w = block_width - 2 * depth_w else: info = block_info[0] - locs = info["array-location"] # a list of (start, stop) coordinates per axis + locs = info[ + "array-location" + ] # a list of (start, stop) coordinates per axis core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 core_w = int(locs[1][1] - locs[1][0]) diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index 55d160f50..d336965e6 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -333,13 +333,13 @@ def postproc( depth_h: int = 0, depth_w: int = 0, ) -> np.ndarray: - """ SCCNN post-processing function. + """SCCNN post-processing function. Builds a processed mask per input channel, runs peak_local_max then writes 1.0 at peak pixels. Can be called inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C) - to process large prediction maps in chunks. Keeps only centroids whose (row,col) + to process large prediction maps in chunks. Keeps only centroids whose (row,col) lie in the interior window: rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) @@ -348,9 +348,9 @@ def postproc( Args: block: NumPy array (H, W, C). block_info: Dask block info dict. Only used when called inside dask.array.map_overlap. - depth_h: Halo size in pixels for height (rows). + depth_h: Halo size in pixels for height (rows). Only used when it's called inside dask.array.map_overlap. - depth_w: Halo size in pixels for width (cols). + depth_w: Halo size in pixels for width (cols). Only used when it's called inside dask.array.map_overlap. Returns: @@ -364,7 +364,9 @@ def postproc( core_w = block_width - 2 * depth_w else: info = block_info[0] - locs = info["array-location"] # a list of (start, stop) coordinates per axis + locs = info[ + "array-location" + ] # a list of (start, stop) coordinates per axis core_h = int(locs[0][1] - locs[0][0]) # r1 - r0 core_w = int(locs[1][1] - locs[1][0]) From 12f985aa8758b8815328da6476cf5216baf9665e Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Sat, 22 Nov 2025 15:43:04 +0000 Subject: [PATCH 17/26] tidy up code --- test.py | 5 +- .../engines/test_nucleus_detection_engine.py | 91 ++++++++++--------- tests/models/test_arch_mapde.py | 6 +- tests/models/test_arch_sccnn.py | 10 +- tiatoolbox/data/pretrained_model.yaml | 12 +++ tiatoolbox/models/architecture/mapde.py | 3 +- tiatoolbox/models/architecture/sccnn.py | 2 + tiatoolbox/models/engine/nucleus_detector.py | 76 +++++----------- 8 files changed, 106 insertions(+), 99 deletions(-) diff --git a/test.py b/test.py index 07fca9bd1..e75218c14 100644 --- a/test.py +++ b/test.py @@ -8,8 +8,8 @@ ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() if __name__ == "__main__": - model_name = "sccnn-crchisto" - # model_name = "mapde-conic" + # model_name = "sccnn-crchisto" + model_name = "mapde-conic" # test_image_path = "/media/u1910100/data/slides/CMU-1-Small-Region.svs" # reader = WSIReader.open(test_image_path) @@ -35,7 +35,6 @@ save_dir=Path("/media/u1910100/data/overlays/test"), overwrite=True, output_type="annotationstore", - class_dict={0: "nucleus"}, auto_get_mask=True, memory_threshold=70, ) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index 1bca39362..cb875f475 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -1,57 +1,66 @@ """Tests for NucleusDetector.""" from tiatoolbox.utils import env_detection as toolbox_env +import pathlib +import shutil +from tiatoolbox.models.engine.nucleus_detector import NucleusDetector +import pandas as pd +import pytest +from tiatoolbox.annotation.storage import SQLiteStore -ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() +device = "cuda" if toolbox_env.has_gpu() else "cpu" -# def _rm_dir(path): -# """Helper func to remove directory.""" -# if pathlib.Path(path).exists(): -# shutil.rmtree(path, ignore_errors=True) +def _rm_dir(path): + """Helper func to remove directory.""" + if pathlib.Path(path).exists(): + shutil.rmtree(path, ignore_errors=True) -# def check_output(path): -# """Check NucleusDetector output.""" -# coordinates = pd.read_csv(path) -# assert coordinates.x[0] == pytest.approx(53, abs=2) -# assert coordinates.x[1] == pytest.approx(55, abs=2) -# assert coordinates.y[0] == pytest.approx(107, abs=2) -# assert coordinates.y[1] == pytest.approx(127, abs=2) +def check_output(path): + """Check NucleusDetector output.""" + store = SQLiteStore.open(path) + # coordinates = store.to_dataframe() + for item in store.values(): + geometry = item.geometry + print(geometry.centroid) + break + # assert coordinates.x[0] == pytest.approx(53, abs=2) + # assert coordinates.x[1] == pytest.approx(55, abs=2) + # assert coordinates.y[0] == pytest.approx(107, abs=2) + # assert coordinates.y[1] == pytest.approx(127, abs=2) -# def test_nucleus_detector_engine(remote_sample, tmp_path): -# """Test for nucleus detection engine.""" -# mini_wsi_svs = pathlib.Path(remote_sample("wsi4_512_512_svs")) +def test_nucleus_detector_wsi(remote_sample, tmp_path): + """Test for nucleus detection engine.""" + mini_wsi_svs = pathlib.Path(remote_sample("wsi4_512_512_svs")) -# nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") -# _ = nucleus_detector.predict( -# [mini_wsi_svs], -# mode="wsi", -# save_dir=tmp_path / "output", -# on_gpu=ON_GPU, -# ) + pretrained_model = "mapde-conic" -# check_output(tmp_path / "output" / "0.locations.0.csv") + nucleus_detector = NucleusDetector(model=pretrained_model) + _ = nucleus_detector.run( + patch_mode=False, + device=device, + output_type="annotationstore", + auto_get_mask=True, + memory_threshold=50, + images=[mini_wsi_svs], + save_dir=tmp_path / "output", + ) -# _rm_dir(tmp_path / "output") + check_output(tmp_path / "output" / "wsi4_512_512_svs.db") -# ioconfig = IONucleusDetectorConfig( -# input_resolutions=[{"units": "mpp", "resolution": 0.5}], -# output_resolutions=[{"units": "mpp", "resolution": 0.5}], -# save_resolution=None, -# patch_input_shape=[252, 252], -# patch_output_shape=[252, 252], -# stride_shape=[150, 150], -# ) + _rm_dir(tmp_path / "output") -# nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") -# _ = nucleus_detector.predict( -# [mini_wsi_svs], -# mode="wsi", -# save_dir=tmp_path / "output", -# on_gpu=ON_GPU, -# ioconfig=ioconfig, -# ) -# check_output(tmp_path / "output" / "0.locations.0.csv") + + # nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") + # _ = nucleus_detector.predict( + # [mini_wsi_svs], + # mode="wsi", + # save_dir=tmp_path / "output", + # on_gpu=ON_GPU, + # ioconfig=ioconfig, + # ) + + # check_output(tmp_path / "output" / "0.locations.0.csv") diff --git a/tests/models/test_arch_mapde.py b/tests/models/test_arch_mapde.py index 19163f593..e3bba2945 100644 --- a/tests/models/test_arch_mapde.py +++ b/tests/models/test_arch_mapde.py @@ -11,6 +11,7 @@ from tiatoolbox.utils import env_detection as toolbox_env from tiatoolbox.utils.misc import select_device from tiatoolbox.wsicore.wsireader import WSIReader +from tiatoolbox.models.engine.nucleus_detector import NucleusDetector ON_GPU = toolbox_env.has_gpu() @@ -48,7 +49,10 @@ def test_functionality(remote_sample: Callable) -> None: batch = torch.from_numpy(patch)[None] output = model.infer_batch(model, batch, device=select_device(on_gpu=ON_GPU)) output = model.postproc(output[0]) - assert np.all(output[0:2] == [[19, 171], [53, 89]]) + xs, ys, _, _ = NucleusDetector._centroid_maps_to_detection_records(output, None) + + np.testing.assert_array_equal(xs[0:2], np.array([242, 192])) + np.testing.assert_array_equal(ys[0:2], np.array([10, 13])) Path(weights_path).unlink() diff --git a/tests/models/test_arch_sccnn.py b/tests/models/test_arch_sccnn.py index a456faff5..f23c4c565 100644 --- a/tests/models/test_arch_sccnn.py +++ b/tests/models/test_arch_sccnn.py @@ -10,6 +10,7 @@ from tiatoolbox.utils import env_detection from tiatoolbox.utils.misc import select_device from tiatoolbox.wsicore.wsireader import WSIReader +from tiatoolbox.models.engine.nucleus_detector import NucleusDetector def _load_sccnn(name: str) -> SCCNN: @@ -48,7 +49,10 @@ def test_functionality(remote_sample: Callable) -> None: device=select_device(on_gpu=env_detection.has_gpu()), ) output = model.postproc(output[0]) - np.testing.assert_array_equal(output, np.array([[8, 7]])) + xs, ys, _, _ = NucleusDetector._centroid_maps_to_detection_records(output, None) + + np.testing.assert_array_equal(xs, np.array([8])) + np.testing.assert_array_equal(ys, np.array([7])) model = _load_sccnn(name="sccnn-conic") output = model.infer_batch( @@ -57,4 +61,6 @@ def test_functionality(remote_sample: Callable) -> None: device=select_device(on_gpu=env_detection.has_gpu()), ) output = model.postproc(output[0]) - np.testing.assert_array_equal(output, np.array([[7, 8]])) + xs, ys, _, _ = NucleusDetector._centroid_maps_to_detection_records(output, None) + np.testing.assert_array_equal(xs, np.array([7])) + np.testing.assert_array_equal(ys, np.array([8])) \ No newline at end of file diff --git a/tiatoolbox/data/pretrained_model.yaml b/tiatoolbox/data/pretrained_model.yaml index 048f41337..693241029 100644 --- a/tiatoolbox/data/pretrained_model.yaml +++ b/tiatoolbox/data/pretrained_model.yaml @@ -815,6 +815,9 @@ mapde-crchisto: threshold_abs: 250 num_classes: 1 postproc_tile_shape: [ 2048, 2048 ] + output_class_dict: { + 0: "nucleus" + } ioconfig: class: io_config.IOSegmentorConfig kwargs: @@ -837,6 +840,9 @@ mapde-conic: threshold_abs: 205 num_classes: 1 postproc_tile_shape: [ 2048, 2048 ] + output_class_dict: { + 0: "nucleus" + } ioconfig: class: io_config.IOSegmentorConfig kwargs: @@ -860,6 +866,9 @@ sccnn-crchisto: threshold_abs: 0.20 patch_output_shape: [ 13, 13 ] postproc_tile_shape: [ 2048, 2048 ] + output_class_dict: { + 0: "nucleus" + } ioconfig: class: io_config.IOSegmentorConfig kwargs: @@ -883,6 +892,9 @@ sccnn-conic: threshold_abs: 0.05 patch_output_shape: [ 13, 13 ] postproc_tile_shape: [ 2048, 2048 ] + output_class_dict: { + 0: "nucleus" + } ioconfig: class: io_config.IOSegmentorConfig kwargs: diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index b60d39750..8ee11c651 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -79,6 +79,7 @@ def __init__( threshold_abs: float = 250, num_classes: int = 1, postproc_tile_shape: tuple[int, int] = (2048, 2048), + output_class_dict: dict[int, str] | None = None, ) -> None: """Initialize :class:`MapDe`.""" super().__init__( @@ -86,7 +87,7 @@ def __init__( num_input_channels=num_input_channels, out_activation="relu", ) - + self.output_class_dict = output_class_dict self.postproc_tile_shape = postproc_tile_shape dist_filter = np.array( diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index d336965e6..2325868de 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -92,6 +92,7 @@ def __init__( min_distance: int = 6, threshold_abs: float = 0.20, postproc_tile_shape: tuple[int, int] = (2048, 2048), + output_class_dict: dict[int, str] | None = None, ) -> None: """Initialize :class:`SCCNN`.""" super().__init__() @@ -101,6 +102,7 @@ def __init__( self.out_height = out_height self.out_width = out_width self.postproc_tile_shape = postproc_tile_shape + self.output_class_dict = output_class_dict # Create mesh grid and convert to 3D vector x, y = torch.meshgrid( diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index f6b1817f3..3adda90c9 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -3,7 +3,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Unpack +from typing import TYPE_CHECKING import dask import dask.array as da @@ -11,7 +11,6 @@ import pandas as pd from dask.diagnostics.progress import ProgressBar from shapely.geometry import Point - from tiatoolbox import logger from tiatoolbox.annotation import AnnotationStore from tiatoolbox.annotation.storage import Annotation, SQLiteStore @@ -19,10 +18,9 @@ SemanticSegmentor, SemanticSegmentorRunParams, ) -from tiatoolbox.models.models_abc import ModelABC if TYPE_CHECKING: # pragma: no cover - from tiatoolbox.models.models_abc import ModelABC + from typing import Unpack, Tuple class NucleusDetector(SemanticSegmentor): @@ -73,27 +71,6 @@ class NucleusDetector(SemanticSegmentor): ... ) """ - from tiatoolbox.wsicore.wsireader import WSIReader - - def __init__( - self: NucleusDetector, - model: str | ModelABC, - batch_size: int = 8, - num_workers: int = 0, - weights: str | Path | None = None, - *, - device: str = "cpu", - verbose: bool = True, - ): - super().__init__( - model=model, - batch_size=batch_size, - num_workers=num_workers, - weights=weights, - device=device, - verbose=verbose, - ) - def post_process_patches( self: NucleusDetector, raw_predictions: list[da.Array], @@ -158,7 +135,8 @@ def post_process_wsi( rechunked_prediction_map = raw_predictions.rechunk( (self.model.postproc_tile_shape[0], self.model.postproc_tile_shape[1], -1) ) - logger.info(f"Post-processing chunk size: {rechunked_prediction_map.chunks}") + logger.info(f"Post-processing tile size: {rechunked_prediction_map.chunks}") + logger.info(f"Post-processing tiles overlap: (h={depth_h}, w={depth_w})") detection_map = da.map_overlap( rechunked_prediction_map, @@ -199,17 +177,13 @@ def save_predictions( Additional runtime parameters including: - scale_factor (tuple[float, float]): For coordinate transformation. - class_dict (dict): Mapping of class indices to names. - - return_probabilities (bool): Whether to save probability maps. Returns: - dict | AnnotationStore | Path: - - If output_type is "dict": returns predictions as a dictionary. - - If output_type is "zarr": returns path to saved Zarr file. - - If output_type is "annotationstore": returns AnnotationStore - or path to .db file. + AnnotationStore | Path: + - returns AnnotationStore or path to .db file. """ - # Conversion to annotationstore uses a different function for SemanticSegmentor + # Only "annotationstore" output type is supported for NucleusDetector if output_type != "annotationstore": logger.warning( f"Output type '{output_type}' is not supported by NucleusDetector. " @@ -221,15 +195,14 @@ def save_predictions( scale_factor = kwargs.get("scale_factor", (1.0, 1.0)) # class_dict set from kwargs class_dict = kwargs.get("class_dict") + if class_dict is None: + class_dict = self.model.output_class_dict # Need to add support for zarr conversion. save_paths = [] logger.info("Saving predictions as AnnotationStore.") - scale_factor = kwargs.get("scale_factor", (1.0, 1.0)) - class_dict = kwargs.get("class_dict") - if self.patch_mode: save_paths = [] for i, predictions in enumerate(processed_predictions["predictions"]): @@ -240,7 +213,7 @@ def save_predictions( else: output_path = save_path.parent / (str(i) + ".db") - out_file = self.write_centroids_to_store( + out_file = self.write_centroid_maps_to_store( predictions_da, scale_factor=scale_factor, class_dict=class_dict, @@ -249,7 +222,7 @@ def save_predictions( save_paths.append(out_file) return save_paths - return self.write_centroids_to_store( + return self.write_centroid_maps_to_store( processed_predictions["predictions"], scale_factor=scale_factor, save_path=save_path, @@ -284,8 +257,6 @@ def nucleus_detection_nms( sub = df.sort_values("prob", ascending=False).reset_index(drop=True) # Coordinates as float64 for distance math - coords = sub[["x", "y"]].to_numpy(dtype=np.float64) - r2 = float(radius) * float(radius) coords = sub[["x", "y"]].to_numpy(dtype=np.float64) r = float(radius) @@ -333,25 +304,28 @@ def nucleus_detection_nms( return kept @staticmethod - def _chunk_to_records( - block: np.ndarray, block_info: dict + def _centroid_maps_to_detection_records( + block: np.ndarray, block_info: dict | None = None ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """Convert a Dask block of detection maps to detection records. + """Convert a block of centroid maps to detection records. - Each block is a NumPy array of shape (h, w, C) containing detection scores + Each block is a NumPy array of shape (h, w, C) containing detection probabilities of each class c. This function finds non-zero detections and returns their global coordinates, class IDs (channel), and probabilities. Args: - block: NumPy array (h, w, C) for this chunk (no halos). + block: NumPy array (h, w, C) for this chunk. block_info: Dask block info dict. Returns: Tuple of ([x_coords], [y_coords], [class_ids], [probs]) """ # block: (h, w, C) NumPy chunk (post-stitching, no halos) - info = block_info[0] if 0 in block_info else block_info[None] - (r0, r1), (c0, c1), _ = info["array-location"] # global interior start/stop + if block_info is not None: + info = block_info[0] + (r0, _), (c0, _), _ = info["array-location"] # global interior start/stop + else: + r0, c0 = 0, 0 # find the coordinates and channel indices of nonzeros ys, xs, cs = np.nonzero(block) @@ -374,7 +348,7 @@ def _chunk_to_records( return (x, y, t, p) @staticmethod - def _write_records_to_store( + def _write_detection_records_to_store( recs: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], store: SQLiteStore, scale_factor: Tuple[float, float], @@ -427,7 +401,7 @@ def make_points(xb, yb): return written @staticmethod - def write_centroids_to_store( + def write_centroid_maps_to_store( detection_maps: da.Array, scale_factor: tuple[float, float] = (1.0, 1.0), class_dict: dict | None = None, @@ -453,7 +427,7 @@ def write_centroids_to_store( # one delayed record-tuple per chunk recs_delayed = ( detection_maps.map_blocks( - NucleusDetector._chunk_to_records, + NucleusDetector._centroid_maps_to_detection_records, dtype=object, # we return Python tuples block_info=True, ) @@ -466,7 +440,7 @@ def write_centroids_to_store( # one delayed writer per chunk (returns number of detections written) writes = [ - dask.delayed(NucleusDetector._write_records_to_store)( + dask.delayed(NucleusDetector._write_detection_records_to_store)( recs, store, scale_factor, class_dict, batch_size ) for recs in recs_delayed From 05b2c7d39a743ada88521e3212f032c70cb0bc2d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 22 Nov 2025 15:43:30 +0000 Subject: [PATCH 18/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/engines/test_nucleus_detection_engine.py | 9 +++------ tests/models/test_arch_mapde.py | 2 +- tests/models/test_arch_sccnn.py | 4 ++-- tiatoolbox/models/engine/nucleus_detector.py | 9 +++++---- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index cb875f475..e99c9f2fd 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -1,12 +1,11 @@ """Tests for NucleusDetector.""" -from tiatoolbox.utils import env_detection as toolbox_env import pathlib import shutil -from tiatoolbox.models.engine.nucleus_detector import NucleusDetector -import pandas as pd -import pytest + from tiatoolbox.annotation.storage import SQLiteStore +from tiatoolbox.models.engine.nucleus_detector import NucleusDetector +from tiatoolbox.utils import env_detection as toolbox_env device = "cuda" if toolbox_env.has_gpu() else "cpu" @@ -52,8 +51,6 @@ def test_nucleus_detector_wsi(remote_sample, tmp_path): _rm_dir(tmp_path / "output") - - # nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") # _ = nucleus_detector.predict( # [mini_wsi_svs], diff --git a/tests/models/test_arch_mapde.py b/tests/models/test_arch_mapde.py index e3bba2945..9a876cc57 100644 --- a/tests/models/test_arch_mapde.py +++ b/tests/models/test_arch_mapde.py @@ -8,10 +8,10 @@ from tiatoolbox.models import MapDe from tiatoolbox.models.architecture import fetch_pretrained_weights +from tiatoolbox.models.engine.nucleus_detector import NucleusDetector from tiatoolbox.utils import env_detection as toolbox_env from tiatoolbox.utils.misc import select_device from tiatoolbox.wsicore.wsireader import WSIReader -from tiatoolbox.models.engine.nucleus_detector import NucleusDetector ON_GPU = toolbox_env.has_gpu() diff --git a/tests/models/test_arch_sccnn.py b/tests/models/test_arch_sccnn.py index f23c4c565..d0b25b728 100644 --- a/tests/models/test_arch_sccnn.py +++ b/tests/models/test_arch_sccnn.py @@ -7,10 +7,10 @@ from tiatoolbox.models import SCCNN from tiatoolbox.models.architecture import fetch_pretrained_weights +from tiatoolbox.models.engine.nucleus_detector import NucleusDetector from tiatoolbox.utils import env_detection from tiatoolbox.utils.misc import select_device from tiatoolbox.wsicore.wsireader import WSIReader -from tiatoolbox.models.engine.nucleus_detector import NucleusDetector def _load_sccnn(name: str) -> SCCNN: @@ -63,4 +63,4 @@ def test_functionality(remote_sample: Callable) -> None: output = model.postproc(output[0]) xs, ys, _, _ = NucleusDetector._centroid_maps_to_detection_records(output, None) np.testing.assert_array_equal(xs, np.array([7])) - np.testing.assert_array_equal(ys, np.array([8])) \ No newline at end of file + np.testing.assert_array_equal(ys, np.array([8])) diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 3adda90c9..c1ff9dcb8 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -11,6 +11,7 @@ import pandas as pd from dask.diagnostics.progress import ProgressBar from shapely.geometry import Point + from tiatoolbox import logger from tiatoolbox.annotation import AnnotationStore from tiatoolbox.annotation.storage import Annotation, SQLiteStore @@ -20,7 +21,7 @@ ) if TYPE_CHECKING: # pragma: no cover - from typing import Unpack, Tuple + from typing import Unpack class NucleusDetector(SemanticSegmentor): @@ -306,7 +307,7 @@ def nucleus_detection_nms( @staticmethod def _centroid_maps_to_detection_records( block: np.ndarray, block_info: dict | None = None - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Convert a block of centroid maps to detection records. Each block is a NumPy array of shape (h, w, C) containing detection probabilities @@ -349,9 +350,9 @@ def _centroid_maps_to_detection_records( @staticmethod def _write_detection_records_to_store( - recs: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], + recs: tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], store: SQLiteStore, - scale_factor: Tuple[float, float], + scale_factor: tuple[float, float], class_dict: dict[int, str | int] | None, batch_size: int = 5000, ) -> int: From 2afbf8ca2d240cbc4d1bb5e095ab0b8e4f7c6a5d Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Sun, 23 Nov 2025 18:15:48 +0000 Subject: [PATCH 19/26] fix precommit --- test.py | 40 ----------- .../engines/test_nucleus_detection_engine.py | 23 ++---- tiatoolbox/models/architecture/mapde.py | 11 +-- tiatoolbox/models/architecture/sccnn.py | 11 +-- tiatoolbox/models/engine/nucleus_detector.py | 71 +++++++++++-------- 5 files changed, 57 insertions(+), 99 deletions(-) delete mode 100644 test.py diff --git a/test.py b/test.py deleted file mode 100644 index e75218c14..000000000 --- a/test.py +++ /dev/null @@ -1,40 +0,0 @@ -from pathlib import Path - -from tiatoolbox.models.engine.nucleus_detector import ( - NucleusDetector, -) -from tiatoolbox.utils import env_detection as toolbox_env - -ON_GPU = not toolbox_env.running_on_ci() and toolbox_env.has_gpu() - -if __name__ == "__main__": - # model_name = "sccnn-crchisto" - model_name = "mapde-conic" - - # test_image_path = "/media/u1910100/data/slides/CMU-1-Small-Region.svs" - # reader = WSIReader.open(test_image_path) - - # patch_1 = reader.read_region((1500, 1500), level=0, size=(31, 31)) - - # imwrite(Path("/media/u1910100/data/slides/patch_1.png"), patch_1) - - # patch_2 = reader.read_region((1000, 1000), level=0, size=(31, 31)) - # imwrite(Path("/media/u1910100/data/slides/patch_2.png"), patch_2) - - # patches = [ - # Path("/media/u1910100/data/slides/patch_1.png"), - # Path("/media/u1910100/data/slides/patch_2.png"), - # ] - - detector = NucleusDetector(model=model_name, batch_size=16, num_workers=8) - detector.run( - images=[Path("/media/u1910100/data/slides/wsi1_2k_2k.svs")], - # images=patches, - patch_mode=False, - device="cuda", - save_dir=Path("/media/u1910100/data/overlays/test"), - overwrite=True, - output_type="annotationstore", - auto_get_mask=True, - memory_threshold=70, - ) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index e99c9f2fd..8b73e9e77 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -2,6 +2,7 @@ import pathlib import shutil +from collections.abc import Callable from tiatoolbox.annotation.storage import SQLiteStore from tiatoolbox.models.engine.nucleus_detector import NucleusDetector @@ -10,27 +11,22 @@ device = "cuda" if toolbox_env.has_gpu() else "cpu" -def _rm_dir(path): +def _rm_dir(path: pathlib.Path) -> None: """Helper func to remove directory.""" if pathlib.Path(path).exists(): shutil.rmtree(path, ignore_errors=True) -def check_output(path): +def check_output(path: pathlib.Path) -> None: """Check NucleusDetector output.""" store = SQLiteStore.open(path) - # coordinates = store.to_dataframe() for item in store.values(): geometry = item.geometry print(geometry.centroid) break - # assert coordinates.x[0] == pytest.approx(53, abs=2) - # assert coordinates.x[1] == pytest.approx(55, abs=2) - # assert coordinates.y[0] == pytest.approx(107, abs=2) - # assert coordinates.y[1] == pytest.approx(127, abs=2) -def test_nucleus_detector_wsi(remote_sample, tmp_path): +def test_nucleus_detector_wsi(remote_sample: Callable, tmp_path: pathlib.Path) -> None: """Test for nucleus detection engine.""" mini_wsi_svs = pathlib.Path(remote_sample("wsi4_512_512_svs")) @@ -50,14 +46,3 @@ def test_nucleus_detector_wsi(remote_sample, tmp_path): check_output(tmp_path / "output" / "wsi4_512_512_svs.db") _rm_dir(tmp_path / "output") - - # nucleus_detector = NucleusDetector(pretrained_model="mapde-conic") - # _ = nucleus_detector.predict( - # [mini_wsi_svs], - # mode="wsi", - # save_dir=tmp_path / "output", - # on_gpu=ON_GPU, - # ioconfig=ioconfig, - # ) - - # check_output(tmp_path / "output" / "0.locations.0.csv") diff --git a/tiatoolbox/models/architecture/mapde.py b/tiatoolbox/models/architecture/mapde.py index 8ee11c651..a623bc160 100644 --- a/tiatoolbox/models/architecture/mapde.py +++ b/tiatoolbox/models/architecture/mapde.py @@ -249,16 +249,17 @@ def postproc( Builds a processed mask per input channel, runs peak_local_max then writes 1.0 at peak pixels. - Can be called inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C) - to process large prediction maps in chunks. Keeps only centroids whose (row,col) - lie in the interior window: - rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) + Can be called inside Dask.da.map_overlap on a padded NumPy block: + (h_pad, w_pad, C) to process large prediction maps in chunks. + Keeps only centroids whose (row,col) lie in the interior window: + rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) Returns same spatial shape as the input block Args: block: NumPy array (H, W, C). - block_info: Dask block info dict. Only used when called inside dask.array.map_overlap. + block_info: Dask block info dict. + Only used when called inside dask.array.map_overlap. depth_h: Halo size in pixels for height (rows). Only used when it's called inside dask.array.map_overlap. depth_w: Halo size in pixels for width (cols). diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index 2325868de..8ba6ee12f 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -340,16 +340,17 @@ def postproc( Builds a processed mask per input channel, runs peak_local_max then writes 1.0 at peak pixels. - Can be called inside Dask.da.map_overlap on a padded NumPy block: (h_pad, w_pad, C) - to process large prediction maps in chunks. Keeps only centroids whose (row,col) - lie in the interior window: - rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) + Can be called inside Dask.da.map_overlap on a padded NumPy block: + (h_pad, w_pad, C) to process large prediction maps in chunks. + Keeps only centroids whose (row,col) lie in the interior window: + rows [depth_h : depth_h + core_h), cols [depth_w : depth_w + core_w) Returns same spatial shape as the input block Args: block: NumPy array (H, W, C). - block_info: Dask block info dict. Only used when called inside dask.array.map_overlap. + block_info: Dask block info dict. Only used when called inside + dask.array.map_overlap. depth_h: Halo size in pixels for height (rows). Only used when it's called inside dask.array.map_overlap. depth_w: Halo size in pixels for width (cols). diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index c1ff9dcb8..ceb1b1ba4 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -8,12 +8,10 @@ import dask import dask.array as da import numpy as np -import pandas as pd from dask.diagnostics.progress import ProgressBar from shapely.geometry import Point from tiatoolbox import logger -from tiatoolbox.annotation import AnnotationStore from tiatoolbox.annotation.storage import Annotation, SQLiteStore from tiatoolbox.models.engine.semantic_segmentor import ( SemanticSegmentor, @@ -23,6 +21,10 @@ if TYPE_CHECKING: # pragma: no cover from typing import Unpack + import pandas as pd + + from tiatoolbox.annotation import AnnotationStore + class NucleusDetector(SemanticSegmentor): r"""Nucleus detection engine. @@ -85,28 +87,32 @@ def post_process_patches( raw_predictions (da.Array): The raw predictions from the model. prediction_shape (tuple[int, ...]): The shape of the predictions. prediction_dtype (type): The data type of the predictions. + **kwargs (SemanticSegmentorRunParams): + Additional runtime parameters Returns: - A list of DataFrames containing the post-processed predictions for each patch. + A list of DataFrames containing the post-processed + predictions for each patch. """ _ = kwargs.get("return_probabilities") _ = prediction_shape _ = prediction_dtype - batch_predictions = [] - for i in range(len(raw_predictions)): - batch_predictions.append(self.model.postproc_func(raw_predictions[i])) - return batch_predictions + return [ + self.model.postproc_func(raw_predictions[i]) + for i in range(len(raw_predictions)) + ] def post_process_wsi( self: NucleusDetector, raw_predictions: da.Array, prediction_shape: tuple[int, ...], prediction_dtype: type, - **kwargs: Unpack[SemanticSegmentorRunParams], + **kwargs: Unpack[SemanticSegmentorRunParams], # noqa: ARG002 ) -> da.Array: """Define how to post-process WSI predictions. + Processes the raw prediction dask array using map_overlap to apply the model's post-processing function on each chunk with appropriate overlaps on chunk boundaries. @@ -115,11 +121,14 @@ def post_process_wsi( raw_predictions (da.Array): The raw predictions from the model. prediction_shape (tuple[int, ...]): The shape of the predictions. prediction_dtype (type): The data type of the predictions. + **kwargs (SemanticSegmentorRunParams): + Additional runtime parameters Returns: Post-processed dask array of detections at the WSI level. The array has the same shape and dtype as the input. - Each pixel indicates the presence of a detected nucleus as a probability score. + Each pixel indicates the presence of a detected nucleus + as a probability score. """ logger.info("Post processing WSI predictions in NucleusDetector") @@ -139,7 +148,7 @@ def post_process_wsi( logger.info(f"Post-processing tile size: {rechunked_prediction_map.chunks}") logger.info(f"Post-processing tiles overlap: (h={depth_h}, w={depth_w})") - detection_map = da.map_overlap( + return da.map_overlap( rechunked_prediction_map, self.model.postproc, depth=depth, @@ -150,8 +159,6 @@ def post_process_wsi( depth_w=depth_w, ) - return detection_map - def save_predictions( self: NucleusDetector, processed_predictions: dict, @@ -236,7 +243,8 @@ def nucleus_detection_nms( ) -> pd.DataFrame: """Non-Maximum Suppression across ALL detections. - Keeps the highest-prob detection, removes any other point within 'radius' pixels > overlap_threshold. + Keeps the highest-prob detection, removes any other point + within 'radius' pixels > overlap_threshold. Expects dataframe columns: ['x','y','type','prob']. Args: @@ -247,12 +255,16 @@ def nucleus_detection_nms( Returns: filtered DataFrame with same columns/dtypes. """ + overlap_max = 1.0 + overlap_min = 0.0 if df.empty: return df.copy() if radius <= 0: - raise ValueError("radius must be > 0") - if not (0.0 < overlap_threshold <= 1.0): - raise ValueError("overlap_threshold must be in (0.0, 1.0]") + msg = "radius must be > 0" + raise ValueError(msg) + if not (overlap_min < overlap_threshold <= overlap_max): + msg = f"overlap_threshold must be in (0.0, 1.0], got {overlap_threshold}" + raise ValueError(msg) # Sort by descending probability (highest priority first) sub = df.sort_values("prob", ascending=False).reset_index(drop=True) @@ -286,7 +298,8 @@ def nucleus_detection_nms( d = np.sqrt(d2[cand]) - # Safe cosine argument = (distance ÷ diameter), Clamp for numerical stability + # Safe cosine argument = (distance ÷ diameter) + # Clamp for numerical stability u = np.clip(d / (2.0 * r), -1.0, 1.0) # Exact intersection area of two equal-radius circles. inter = 2.0 * (r * r) * np.arccos(u) - 0.5 * d * np.sqrt( @@ -301,8 +314,7 @@ def nucleus_detection_nms( to_suppress = idx_cand[iou >= overlap_threshold] suppressed[to_suppress] = True - kept = sub.iloc[keep_idx].copy() - return kept + return sub.iloc[keep_idx].copy() @staticmethod def _centroid_maps_to_detection_records( @@ -310,9 +322,9 @@ def _centroid_maps_to_detection_records( ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Convert a block of centroid maps to detection records. - Each block is a NumPy array of shape (h, w, C) containing detection probabilities - of each class c. This function finds non-zero detections and returns their - global coordinates, class IDs (channel), and probabilities. + Each block is a NumPy array of shape (h, w, C) containing detection + probabilities of each class c. This function finds non-zero detections + and returns their global coordinates, class IDs (channel), and probabilities. Args: block: NumPy array (h, w, C) for this chunk. @@ -383,8 +395,8 @@ def _write_detection_records_to_store( class_dict = {int(k): int(k) for k in uniq} labels = np.array([class_dict.get(int(k), int(k)) for k in t], dtype=object) - def make_points(xb, yb): - return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb)] + def make_points(xb: np.ndarray, yb: np.ndarray) -> list[Point]: + return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb, strict=True)] written = 0 for i in range(0, n, batch_size): @@ -395,7 +407,7 @@ def make_points(xb, yb): Annotation( geometry=pt, properties={"type": lbl, "probability": float(pp)} ) - for pt, lbl, pp in zip(pts, labels[i:j], p[i:j]) + for pt, lbl, pp in zip(pts, labels[i:j], p[i:j], strict=True) ] store.append_many(anns) written += j - i @@ -410,6 +422,7 @@ def write_centroid_maps_to_store( batch_size: int = 5000, ) -> Path | SQLiteStore: """Write post-processed detection maps to an AnnotationStore. + This is done in chunks using Dask for efficiency and to handle large detection maps at WSI level. @@ -417,16 +430,14 @@ def write_centroid_maps_to_store( detection_maps: Dask array (H, W, C) of detection scores. scale_factor: Tuple (sx, sy) to scale coordinates before saving. class_dict: Optional dict mapping class indices to names. - save_path: Optional Path to save the .db file. If None, returns in-memory store. + save_path: Optional Path to save the .db file. + If None, returns in-memory store. batch_size: Number of records to write per batch. Returns: Path to saved .db file if save_path is provided, else in-memory SQLiteStore. """ - # Convert each block to detection records first - # [block_H, block_W, C] -> [xs, ys, classes, probs] - # one delayed record-tuple per chunk - recs_delayed = ( + recs_delayed = ( # Convert each block to detection records detection_maps.map_blocks( NucleusDetector._centroid_maps_to_detection_records, dtype=object, # we return Python tuples From 367295d9787105a8a512935602cd09b8a325565f Mon Sep 17 00:00:00 2001 From: Jiaqi Lv Date: Sun, 23 Nov 2025 18:44:51 +0000 Subject: [PATCH 20/26] update test --- tests/engines/test_nucleus_detection_engine.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index 8b73e9e77..31ef6df11 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -20,10 +20,7 @@ def _rm_dir(path: pathlib.Path) -> None: def check_output(path: pathlib.Path) -> None: """Check NucleusDetector output.""" store = SQLiteStore.open(path) - for item in store.values(): - geometry = item.geometry - print(geometry.centroid) - break + assert len(store.values()) == 281 def test_nucleus_detector_wsi(remote_sample: Callable, tmp_path: pathlib.Path) -> None: @@ -32,6 +29,8 @@ def test_nucleus_detector_wsi(remote_sample: Callable, tmp_path: pathlib.Path) - pretrained_model = "mapde-conic" + save_dir = tmp_path + nucleus_detector = NucleusDetector(model=pretrained_model) _ = nucleus_detector.run( patch_mode=False, @@ -40,9 +39,10 @@ def test_nucleus_detector_wsi(remote_sample: Callable, tmp_path: pathlib.Path) - auto_get_mask=True, memory_threshold=50, images=[mini_wsi_svs], - save_dir=tmp_path / "output", + save_dir=save_dir, + overwrite=True, ) - check_output(tmp_path / "output" / "wsi4_512_512_svs.db") + check_output(save_dir / "wsi4_512_512.db") - _rm_dir(tmp_path / "output") + _rm_dir(save_dir) \ No newline at end of file From 7912abecf8cb44581702e463c9c87595f63dc3d6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 23 Nov 2025 18:45:51 +0000 Subject: [PATCH 21/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/engines/test_nucleus_detection_engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index 31ef6df11..8adad4026 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -29,7 +29,7 @@ def test_nucleus_detector_wsi(remote_sample: Callable, tmp_path: pathlib.Path) - pretrained_model = "mapde-conic" - save_dir = tmp_path + save_dir = tmp_path nucleus_detector = NucleusDetector(model=pretrained_model) _ = nucleus_detector.run( @@ -45,4 +45,4 @@ def test_nucleus_detector_wsi(remote_sample: Callable, tmp_path: pathlib.Path) - check_output(save_dir / "wsi4_512_512.db") - _rm_dir(save_dir) \ No newline at end of file + _rm_dir(save_dir) From 0a72e8bbb75e0090aea4d2c7b1ad3b1ac1db8a0a Mon Sep 17 00:00:00 2001 From: Jiaqi-Lv <60471431+Jiaqi-Lv@users.noreply.github.com> Date: Mon, 24 Nov 2025 17:36:35 +0000 Subject: [PATCH 22/26] improve tests --- .../engines/test_nucleus_detection_engine.py | 132 +++++++++++++++++- tests/models/test_arch_mapde.py | 25 ++++ tests/models/test_arch_sccnn.py | 26 +++- 3 files changed, 178 insertions(+), 5 deletions(-) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index 8adad4026..29005cd69 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -4,9 +4,13 @@ import shutil from collections.abc import Callable +import pandas as pd +import pytest + from tiatoolbox.annotation.storage import SQLiteStore from tiatoolbox.models.engine.nucleus_detector import NucleusDetector from tiatoolbox.utils import env_detection as toolbox_env +from tiatoolbox.wsicore.wsireader import WSIReader device = "cuda" if toolbox_env.has_gpu() else "cpu" @@ -19,8 +23,91 @@ def _rm_dir(path: pathlib.Path) -> None: def check_output(path: pathlib.Path) -> None: """Check NucleusDetector output.""" - store = SQLiteStore.open(path) - assert len(store.values()) == 281 + + +def test_nucleus_detection_nms_empty_dataframe() -> None: + """nucleus_detection_nms should return a copy for empty inputs.""" + df = pd.DataFrame(columns=["x", "y", "type", "prob"]) + + result = NucleusDetector.nucleus_detection_nms(df, radius=3) + + assert result.empty + assert result is not df + assert list(result.columns) == ["x", "y", "type", "prob"] + + +def test_nucleus_detection_nms_invalid_radius() -> None: + """Radius must be strictly positive.""" + df = pd.DataFrame({"x": [0], "y": [0], "type": [1], "prob": [0.9]}) + + with pytest.raises(ValueError, match="radius must be > 0"): + NucleusDetector.nucleus_detection_nms(df, radius=0) + + +def test_nucleus_detection_nms_invalid_overlap_threshold() -> None: + """overlap_threshold must lie in (0, 1].""" + df = pd.DataFrame({"x": [0], "y": [0], "type": [1], "prob": [0.9]}) + + message = r"overlap_threshold must be in \(0\.0, 1\.0\], got 0" + with pytest.raises(ValueError, match=message): + NucleusDetector.nucleus_detection_nms(df, radius=1, overlap_threshold=0) + + +def test_nucleus_detection_nms_suppresses_overlapping_detections() -> None: + """Lower-probability overlapping detections are removed.""" + df = pd.DataFrame( + { + "x": [2, 0, 20], + "y": [1, 0, 20], + "type": [1, 1, 2], + "prob": [0.6, 0.9, 0.7], + } + ) + + result = NucleusDetector.nucleus_detection_nms(df, radius=5) + + expected = pd.DataFrame( + {"x": [0, 20], "y": [0, 20], "type": [1, 2], "prob": [0.9, 0.7]} + ) + pd.testing.assert_frame_equal(result.reset_index(drop=True), expected) + + +def test_nucleus_detection_nms_suppresses_across_types() -> None: + """Overlapping detections of different types are also suppressed.""" + df = pd.DataFrame( + { + "x": [0, 0, 20], + "y": [0, 0, 0], + "type": [1, 2, 1], + "prob": [0.6, 0.95, 0.4], + } + ) + + result = NucleusDetector.nucleus_detection_nms(df, radius=5) + + expected = pd.DataFrame( + {"x": [0, 20], "y": [0, 0], "type": [2, 1], "prob": [0.95, 0.4]} + ) + pd.testing.assert_frame_equal(result.reset_index(drop=True), expected) + + +def test_nucleus_detection_nms_retains_non_overlapping_candidates() -> None: + """Detections with IoU below the threshold are preserved.""" + df = pd.DataFrame( + { + "x": [0, 10], + "y": [0, 0], + "type": [1, 1], + "prob": [0.8, 0.5], + } + ) + + result = NucleusDetector.nucleus_detection_nms(df, radius=5, overlap_threshold=0.5) + + expected = pd.DataFrame( + {"x": [0, 10], "y": [0, 0], "type": [1, 1], "prob": [0.8, 0.5]} + ) + pd.testing.assert_frame_equal(result.reset_index(drop=True), expected) def test_nucleus_detector_wsi(remote_sample: Callable, tmp_path: pathlib.Path) -> None: @@ -36,13 +123,50 @@ def test_nucleus_detector_wsi(remote_sample: Callable, tmp_path: pathlib.Path) - patch_mode=False, device=device, output_type="annotationstore", - auto_get_mask=True, memory_threshold=50, images=[mini_wsi_svs], save_dir=save_dir, overwrite=True, ) - check_output(save_dir / "wsi4_512_512.db") + store = SQLiteStore.open(save_dir / "wsi4_512_512.db") + assert len(store.values()) == 281 + store.close() + + _rm_dir(save_dir) + + +def test_nucleus_detector_patch( + remote_sample: Callable, tmp_path: pathlib.Path +) -> None: + """Test for nucleus detection engine in patch mode.""" + mini_wsi_svs = pathlib.Path(remote_sample("wsi4_512_512_svs")) + + wsi_reader = WSIReader.open(mini_wsi_svs) + patch_1 = wsi_reader.read_rect((0, 0), (252, 252), resolution=0.5, units="mpp") + patch_2 = wsi_reader.read_rect((252, 252), (252, 252), resolution=0.5, units="mpp") + + pretrained_model = "mapde-conic" + + save_dir = tmp_path + + nucleus_detector = NucleusDetector(model=pretrained_model) + _ = nucleus_detector.run( + patch_mode=True, + device=device, + output_type="annotationstore", + memory_threshold=50, + images=[patch_1, patch_2], + save_dir=save_dir, + overwrite=True, + ) + + store_1 = SQLiteStore.open(save_dir / "0.db") + assert len(store_1.values()) == 270 + store_1.close() + + store_2 = SQLiteStore.open(save_dir / "1.db") + assert len(store_2.values()) == 52 + store_2.close() _rm_dir(save_dir) diff --git a/tests/models/test_arch_mapde.py b/tests/models/test_arch_mapde.py index 9a876cc57..22a354ad4 100644 --- a/tests/models/test_arch_mapde.py +++ b/tests/models/test_arch_mapde.py @@ -53,6 +53,31 @@ def test_functionality(remote_sample: Callable) -> None: np.testing.assert_array_equal(xs[0:2], np.array([242, 192])) np.testing.assert_array_equal(ys[0:2], np.array([10, 13])) + + patch = reader.read_bounds( + (0, 0, 252, 252), + resolution=0.50, + units="mpp", + coord_space="resolution", + ) + + model, weights_path = _load_mapde(name="mapde-conic") + patch = model.preproc(patch) + batch = torch.from_numpy(patch)[None] + output = model.infer_batch(model, batch, device=select_device(on_gpu=ON_GPU)) + block_info = { + 0: { + "array-location": [ + [0, 1], + [0, 1], + ], # dummy block to test no valid detections + } + } + output = model.postproc(output[0], block_info=block_info) + xs, ys, _, _ = NucleusDetector._centroid_maps_to_detection_records(output, None) + np.testing.assert_array_equal(xs, np.array([])) + np.testing.assert_array_equal(ys, np.array([])) + Path(weights_path).unlink() diff --git a/tests/models/test_arch_sccnn.py b/tests/models/test_arch_sccnn.py index d0b25b728..4f8364854 100644 --- a/tests/models/test_arch_sccnn.py +++ b/tests/models/test_arch_sccnn.py @@ -60,7 +60,31 @@ def test_functionality(remote_sample: Callable) -> None: batch, device=select_device(on_gpu=env_detection.has_gpu()), ) - output = model.postproc(output[0]) + block_info = { + 0: { + "array-location": [[0, 31], [0, 31]], + } + } + output = model.postproc(output[0], block_info=block_info) xs, ys, _, _ = NucleusDetector._centroid_maps_to_detection_records(output, None) np.testing.assert_array_equal(xs, np.array([7])) np.testing.assert_array_equal(ys, np.array([8])) + + model = _load_sccnn(name="sccnn-conic") + output = model.infer_batch( + model, + batch, + device=select_device(on_gpu=env_detection.has_gpu()), + ) + block_info = { + 0: { + "array-location": [ + [0, 1], + [0, 1], + ], # dummy block to test no valid detections + } + } + output = model.postproc(output[0], block_info=block_info) + xs, ys, _, _ = NucleusDetector._centroid_maps_to_detection_records(output, None) + np.testing.assert_array_equal(xs, np.array([])) + np.testing.assert_array_equal(ys, np.array([])) From f8b418984680e45b4f1d32bb09b3ea5a58d6f982 Mon Sep 17 00:00:00 2001 From: Jiaqi-Lv <60471431+Jiaqi-Lv@users.noreply.github.com> Date: Mon, 24 Nov 2025 19:22:25 +0000 Subject: [PATCH 23/26] improve tests --- .../engines/test_nucleus_detection_engine.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index 29005cd69..2bcc065c4 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -11,6 +11,9 @@ from tiatoolbox.models.engine.nucleus_detector import NucleusDetector from tiatoolbox.utils import env_detection as toolbox_env from tiatoolbox.wsicore.wsireader import WSIReader +from tiatoolbox.utils.misc import imwrite +import numpy as np +import dask.array as da device = "cuda" if toolbox_env.has_gpu() else "cpu" @@ -169,4 +172,54 @@ def test_nucleus_detector_patch( assert len(store_2.values()) == 52 store_2.close() + imwrite(save_dir / "patch_0.png", patch_1) + imwrite(save_dir / "patch_1.png", patch_2) + _ = nucleus_detector.run( + patch_mode=True, + device=device, + output_type="zarr", + memory_threshold=50, + images=[save_dir / "patch_0.png", save_dir / "patch_1.png"], + save_dir=save_dir, + overwrite=True, + ) + + store_1 = SQLiteStore.open(save_dir / "patch_0.db") + assert len(store_1.values()) == 270 + store_1.close() + + store_2 = SQLiteStore.open(save_dir / "patch_1.db") + assert len(store_2.values()) == 52 + store_2.close() + _rm_dir(save_dir) + + +def test_nucleus_detector_write_centroid_maps(tmp_path: pathlib.Path)->None: + """Test for _write_centroid_maps function.""" + + detection_maps = np.zeros((20, 20, 1), dtype=np.uint8) + detection_maps = da.from_array(detection_maps, chunks=(20, 20, 1)) + + store = NucleusDetector.write_centroid_maps_to_store( + detection_maps=detection_maps, + ) + assert len(store.values()) == 0 + store.close() + + detection_maps = np.zeros((20, 20, 1), dtype=np.uint8) + detection_maps[10, 10, 0] = 1 + detection_maps = da.from_array(detection_maps, chunks=(20, 20, 1)) + _ = NucleusDetector.write_centroid_maps_to_store( + detection_maps=detection_maps, + save_path=tmp_path / "test.db", + class_dict={0: "nucleus"}, + ) + store = SQLiteStore.open(tmp_path / "test.db") + assert len(store.values()) == 1 + annotation = next(iter(store.values())) + print(annotation) + assert annotation.properties["type"] == "nucleus" + assert annotation.geometry.centroid.x == 10.0 + assert annotation.geometry.centroid.y == 10.0 + store.close() \ No newline at end of file From 228731c1d8c7efed06ca75cd08168a0a101c2234 Mon Sep 17 00:00:00 2001 From: Jiaqi-Lv <60471431+Jiaqi-Lv@users.noreply.github.com> Date: Mon, 24 Nov 2025 19:23:23 +0000 Subject: [PATCH 24/26] precommit --- tests/engines/test_nucleus_detection_engine.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index 2bcc065c4..48369164c 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -4,16 +4,16 @@ import shutil from collections.abc import Callable +import dask.array as da +import numpy as np import pandas as pd import pytest from tiatoolbox.annotation.storage import SQLiteStore from tiatoolbox.models.engine.nucleus_detector import NucleusDetector from tiatoolbox.utils import env_detection as toolbox_env -from tiatoolbox.wsicore.wsireader import WSIReader from tiatoolbox.utils.misc import imwrite -import numpy as np -import dask.array as da +from tiatoolbox.wsicore.wsireader import WSIReader device = "cuda" if toolbox_env.has_gpu() else "cpu" @@ -195,9 +195,8 @@ def test_nucleus_detector_patch( _rm_dir(save_dir) -def test_nucleus_detector_write_centroid_maps(tmp_path: pathlib.Path)->None: +def test_nucleus_detector_write_centroid_maps(tmp_path: pathlib.Path) -> None: """Test for _write_centroid_maps function.""" - detection_maps = np.zeros((20, 20, 1), dtype=np.uint8) detection_maps = da.from_array(detection_maps, chunks=(20, 20, 1)) @@ -222,4 +221,4 @@ def test_nucleus_detector_write_centroid_maps(tmp_path: pathlib.Path)->None: assert annotation.properties["type"] == "nucleus" assert annotation.geometry.centroid.x == 10.0 assert annotation.geometry.centroid.y == 10.0 - store.close() \ No newline at end of file + store.close() From 6c26a0f200c24be405fd6535853a6c703f36a188 Mon Sep 17 00:00:00 2001 From: Jiaqi-Lv <60471431+Jiaqi-Lv@users.noreply.github.com> Date: Tue, 25 Nov 2025 15:44:01 +0000 Subject: [PATCH 25/26] fix deepsource --- tiatoolbox/models/architecture/sccnn.py | 1 + tiatoolbox/models/engine/nucleus_detector.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index 8ba6ee12f..4446408da 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -328,6 +328,7 @@ def spatially_constrained_layer1( ) return self.spatially_constrained_layer2(s1_sigmoid0, s1_sigmoid1, s1_sigmoid2) + # skipcq: PYL-W0221 # noqa: ERA001 def postproc( self: SCCNN, block: np.ndarray, diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index ceb1b1ba4..0d33d9046 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -132,9 +132,9 @@ def post_process_wsi( """ logger.info("Post processing WSI predictions in NucleusDetector") - logger.info(f"Raw probabilities shape: {prediction_shape}") - logger.info(f"Raw probabilities dtype: {prediction_dtype}") - logger.info(f"Raw chunk size: {raw_predictions.chunks}") + logger.info("Raw probabilities shape: %s", prediction_shape) + logger.info("Raw probabilities dtype %s", prediction_dtype) + logger.info("Raw chunk size: %s", raw_predictions.chunks) # Add halo (overlap) around each block for post-processing depth_h = self.model.min_distance @@ -194,8 +194,9 @@ def save_predictions( # Only "annotationstore" output type is supported for NucleusDetector if output_type != "annotationstore": logger.warning( - f"Output type '{output_type}' is not supported by NucleusDetector. " - "Defaulting to 'annotationstore'." + "Output type %s is not supported by NucleusDetector. " + "Defaulting to 'annotationstore'.", + output_type, ) output_type = "annotationstore" @@ -262,7 +263,7 @@ def nucleus_detection_nms( if radius <= 0: msg = "radius must be > 0" raise ValueError(msg) - if not (overlap_min < overlap_threshold <= overlap_max): + if not overlap_min < overlap_threshold <= overlap_max: msg = f"overlap_threshold must be in (0.0, 1.0], got {overlap_threshold}" raise ValueError(msg) @@ -396,6 +397,7 @@ def _write_detection_records_to_store( labels = np.array([class_dict.get(int(k), int(k)) for k in t], dtype=object) def make_points(xb: np.ndarray, yb: np.ndarray) -> list[Point]: + """Create Shapely Point geometries from coordinate arrays.""" return [Point(int(xx), int(yy)) for xx, yy in zip(xb, yb, strict=True)] written = 0 @@ -461,7 +463,7 @@ def write_centroid_maps_to_store( # IMPORTANT: SQLite is single-writer; run sequentially with ProgressBar(): total = dask.compute(*writes, scheduler="single-threaded") - logger.info(f"Total detections written to store: {sum(total)}") + logger.info("Total detections written to store: %s", sum(total)) # if a save directory is provided, then dump store into a file if save_path: From 7ffea5b2682dcfc394303f685d5898f81f72ab73 Mon Sep 17 00:00:00 2001 From: Jiaqi-Lv <60471431+Jiaqi-Lv@users.noreply.github.com> Date: Thu, 27 Nov 2025 16:05:24 +0000 Subject: [PATCH 26/26] fix deepsource --- tests/engines/test_nucleus_detection_engine.py | 3 ++- tiatoolbox/models/architecture/sccnn.py | 1 + tiatoolbox/models/engine/nucleus_detector.py | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/engines/test_nucleus_detection_engine.py b/tests/engines/test_nucleus_detection_engine.py index 48369164c..9cda077d1 100644 --- a/tests/engines/test_nucleus_detection_engine.py +++ b/tests/engines/test_nucleus_detection_engine.py @@ -162,6 +162,7 @@ def test_nucleus_detector_patch( images=[patch_1, patch_2], save_dir=save_dir, overwrite=True, + class_dict=None, ) store_1 = SQLiteStore.open(save_dir / "0.db") @@ -201,7 +202,7 @@ def test_nucleus_detector_write_centroid_maps(tmp_path: pathlib.Path) -> None: detection_maps = da.from_array(detection_maps, chunks=(20, 20, 1)) store = NucleusDetector.write_centroid_maps_to_store( - detection_maps=detection_maps, + detection_maps=detection_maps, class_dict=None ) assert len(store.values()) == 0 store.close() diff --git a/tiatoolbox/models/architecture/sccnn.py b/tiatoolbox/models/architecture/sccnn.py index 4446408da..0f4fc945a 100644 --- a/tiatoolbox/models/architecture/sccnn.py +++ b/tiatoolbox/models/architecture/sccnn.py @@ -399,6 +399,7 @@ def postproc( def infer_batch( model: nn.Module, batch_data: torch.Tensor, + *, device: str, ) -> np.ndarray: """Run inference on an input batch. diff --git a/tiatoolbox/models/engine/nucleus_detector.py b/tiatoolbox/models/engine/nucleus_detector.py index 0d33d9046..d1aeba676 100644 --- a/tiatoolbox/models/engine/nucleus_detector.py +++ b/tiatoolbox/models/engine/nucleus_detector.py @@ -145,8 +145,8 @@ def post_process_wsi( rechunked_prediction_map = raw_predictions.rechunk( (self.model.postproc_tile_shape[0], self.model.postproc_tile_shape[1], -1) ) - logger.info(f"Post-processing tile size: {rechunked_prediction_map.chunks}") - logger.info(f"Post-processing tiles overlap: (h={depth_h}, w={depth_w})") + logger.info("Post-processing tile size: %s", rechunked_prediction_map.chunks) + logger.info("Post-processing tiles overlap: (h=%d, w=%d)", depth_h, depth_w) return da.map_overlap( rechunked_prediction_map,