Source code for cellmil.features.extractor.morphological
import SimpleITK as sitk
import numpy as np
import matplotlib.pyplot as plt
from typing import Any, Literal
from radiomics import featureextractor # type: ignore
from skimage.measure import regionprops # type: ignore
from skimage.morphology import convex_hull_image # type: ignore
from cellmil.interfaces.FeatureExtractorConfig import ExtractorType
from scipy.ndimage import distance_transform_edt # type: ignore
from skimage.color import rgb2hed, rgb2gray, rgb2hsv # type: ignore
# TODO: Take into account the magnification factor when extracting features
[docs]class MorphologicalExtractor:
"""Class to handle feature extraction."""
[docs] def __init__(self, extractor_name: ExtractorType) -> None:
"""Initialize the Extractor with a PyRadiomics feature extractor."""
self.extractor_name = extractor_name
# Ensure SimpleITK uses a single thread per process when multiprocessing
try:
sitk.ProcessObject.SetGlobalDefaultNumberOfThreads(1) # type: ignore
except Exception:
pass
if self.extractor_name == "pyradiomics_gray":
self.extractor = PyRadiomicsExtractor(enable_all_features=True, mode="gray")
elif self.extractor_name == "pyradiomics_hed":
self.extractor = PyRadiomicsExtractor(enable_all_features=True, mode="hed")
elif self.extractor_name == "pyradiomics_hue":
self.extractor = PyRadiomicsExtractor(enable_all_features=True, mode="hue")
elif self.extractor_name == "morphometrics":
self.extractor = MorphometricsExtractor()
else:
raise ValueError(
f"Unsupported extractor type: {self.extractor}. Supported extractors are: pyradiomics_gray, pyradiomics_hed, pyradiomics_hue, morphometrics"
)
[docs] def extract_features(
self, image: np.ndarray[Any, Any], mask: np.ndarray[Any, Any], debug: bool = False
) -> dict[str, Any]:
"""Extract features from the given image and mask."""
if debug:
fig, axs = plt.subplots(1, 3, figsize=(12, 4)) # type: ignore
axs[0].imshow(image)
axs[0].set_title(f"Image {image.shape}")
axs[0].axis("off")
axs[1].imshow(mask, cmap="gray")
axs[1].set_title(f"Mask {mask.shape}")
axs[1].axis("off")
# Overlay: show image with mask as red transparent overlay
overlay = image.copy()
if overlay.ndim == 2 or overlay.shape[2] == 1:
overlay = np.stack([overlay]*3, axis=-1)
mask_rgb = np.zeros_like(overlay)
mask_rgb[..., 0] = mask * 255 # Red channel
alpha = 0.4
overlay_img = overlay.copy()
overlay_img = np.clip((1 - alpha) * overlay + alpha * mask_rgb, 0, 255).astype(np.uint8)
axs[2].imshow(overlay_img)
axs[2].set_title("Overlay")
axs[2].axis("off")
plt.tight_layout()
plt.show() # type: ignore
input("Press 'y' and Enter to continue...")
try:
features = self.extractor.extract_features(image, mask)
return features
except Exception as e:
raise RuntimeError(
f"Error during feature extraction with {self.extractor_name}: {e}"
) from e
[docs]class PyRadiomicsExtractor:
"""Class to handle PyRadiomics feature extraction."""
[docs] def __init__(self, enable_all_features: bool = True, mode: Literal["gray", "hed", "hue"] = "gray") -> None:
"""Initialize the PyRadiomics feature extractor.
Args:
enable_all_features: Whether to enable all feature classes or just shape2D
mode: 'gray' for grayscale, 'hed' for H channel from HED deconvolution
"""
self.mode = mode
# Create a new extractor instance
self.extractor = featureextractor.RadiomicsFeatureExtractor()
# Configure settings
settings: dict[str, Any] = {
# Discretization: choose ONE of these blocks
# Option 1: fixed bin count (robust)
"binCount": 64, # <- use this
# "binWidth": 25, # <- REMOVE if using binCount
# Option 2 (alternative): tighter bin width if you keep binWidth
# "binWidth": 0.2, # good with normalize=True (z-score range ~ -3..3)
# (do not set binCount at the same time)
# Small-ROI friendly textures
"force2D": True, # compute textures per-slice then aggregate
"force2Ddimension": 0, # axial; change if your data is sagittal/coronal
"distances": [1], # avoid larger neighborhoods
# Resampling helps tiny masks form neighborhoods
"resampledPixelSpacing": [0.5, 0.5], # (use 3 values for 3D data)
"interpolator": sitk.sitkBSpline, # images (masks use NN internally) # type: ignore
# Intensity handling
"normalize": True, # ok to keep; just don't use a huge binWidth
"normalizeScale": 1,
# If intensities can be ≤0 and you compute entropy-like features:
# "voxelArrayShift": 1.0,
"correctMask": True,
"label": 1,
"verbose": False,
}
# Apply settings
for key, value in settings.items():
self.extractor.settings[key] = value # type: ignore
# Enable image types and features based on parameter
self.extractor.enableImageTypeByName("Original") # type: ignore
if enable_all_features:
# self.extractor.enableImageTypeByName("Wavelet") # type: ignore
self.extractor.enableImageTypeByName("LoG") # type: ignore
self.extractor.enableFeatureClassByName("firstorder") # type: ignore
self.extractor.enableFeatureClassByName("shape2D") # type: ignore
self.extractor.enableFeatureClassByName("glcm") # type: ignore
self.extractor.enableFeatureClassByName("glrlm") # type: ignore
self.extractor.enableFeatureClassByName("glszm") # type: ignore
self.extractor.enableFeatureClassByName("gldm") # type: ignore
self.extractor.enableFeatureClassByName("ngtdm") # type: ignore
else:
self.extractor.enableFeatureClassByName("shape2D") # type: ignore
[docs] def _preprocess(
self, image: np.ndarray[Any, Any], mask: np.ndarray[Any, Any]
) -> tuple[sitk.Image, sitk.Image]:
"""Convert numpy arrays to SimpleITK images for PyRadiomics, with support for grayscale or HED-H channel."""
if self.mode == "gray":
image_proc = (rgb2gray(image) * 255).astype(np.uint8) # type: ignore
image_proc = 255 - image_proc # type: ignore
elif self.mode == "hue":
hsv = rgb2hsv(image) # type: ignore
image_proc = (hsv[:, :, 0] * 255).astype(np.uint8) # type: ignore
elif self.mode == "hed":
hed = rgb2hed(image) # type: ignore
image_proc = hed[:, :, 0] # type: ignore
else:
raise ValueError(f"Unknown preprocessing mode: {self.mode}")
# Convert to SimpleITK
sitk_image = sitk.GetImageFromArray(image_proc.astype(np.float64)) # type: ignore
sitk_mask = sitk.GetImageFromArray(mask.astype(np.int32)) # type: ignore
return sitk_image, sitk_mask
[docs] def extract_features(
self, image: np.ndarray[Any, Any], mask: np.ndarray[Any, Any]
) -> dict[str, float]:
"""Extract features from the given image and mask."""
sitk_image, sitk_mask = self._preprocess(image, mask)
_features = self.extractor.execute(sitk_image, sitk_mask) # type: ignore
features: dict[str, float] = {
k: v for k, v in _features.items() if not k.startswith("diagnostics_") # type: ignore
}
return features # type: ignore
[docs]class MorphometricsExtractor:
"""Placeholder class for morphometric feature extraction."""
[docs] def extract_features(
self,
image: np.ndarray[Any, Any],
mask: np.ndarray[Any, Any],
magnification: float = 0.25,
) -> dict[str, float]:
"""Extract morphometrics features from the given image and mask."""
# Get basic shape features from PyRadiomics
props = regionprops(mask.astype(np.uint8), cache=False) # type: ignore
major_axis_length: float = props[0].major_axis_length # type: ignore
minor_axis_length: float = props[0].minor_axis_length # type: ignore
area = props[0].area # type: ignore
perimeter = props[0].perimeter # type: ignore
d_max = props[0].feret_diameter_max # type: ignore
ch_mask = convex_hull_image(mask.astype(np.uint8)) # type: ignore
ch_props = regionprops(ch_mask.astype(np.uint8), cache=False) # type: ignore
ch_area = ch_props[0].area # type: ignore
ch_perimeter = ch_props[0].perimeter # type: ignore
dist = distance_transform_edt(mask.astype(np.int8)) # type: ignore
d_ins = 2 * dist.max() # type: ignore
features: dict[str, float] = {
"axial_ratio": (props[0].bbox[2] - props[0].bbox[0]) / (props[0].bbox[3] - props[0].bbox[1]), # type:ignore
"aspect_ratio": major_axis_length / minor_axis_length, # AR = a/b where a and b are axes of best fitted ellipse
"eccentricity": minor_axis_length / major_axis_length,
"rectangular_factor": area / (major_axis_length * minor_axis_length),
"elongation_index": np.log2(major_axis_length / minor_axis_length), # type: ignore
"dispersion_index": np.log2(np.pi * major_axis_length * minor_axis_length), # type: ignore
"circularity": 4 * np.pi * area / (perimeter**2),
"roundness": perimeter / np.sqrt(4 * np.pi * area), # type: ignore
"roundness_factor": 4 * area / (np.pi * d_max**2),
"convexity": ch_perimeter / perimeter,
"spreading_index": (np.pi * ch_perimeter) / (4 * ch_area),
"irregularity_index": d_max / d_ins,
"solidity": area / ch_area,
}
return features