Source code for cellmil.visualization.feature_visualizer

import torch
import numpy as np
import pandas as pd
from typing import Dict, List, Any, Optional, cast
from pathlib import Path
from cellmil.interfaces import FeatureVisualizerConfig
from cellmil.interfaces.CellSegmenterConfig import TYPE_NUCLEI_DICT, ModelType
import json

import dash
from dash import dcc, html, Input, Output, dash_table
import plotly.graph_objects as go  # type: ignore
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import scipy.stats as stats  # type: ignore
from plotly.subplots import make_subplots  # type: ignore
from scipy.stats import gaussian_kde  # type: ignore

from cellmil.utils import logger

COLORS = [
    "#1f77b4",
    "#ff7f0e",
    "#2ca02c",
    "#d62728",
    "#9467bd",
    "#8c564b",
    "#e377c2",
    "#7f7f7f",
]

# Common style dictionaries
STYLES: dict[str, dict[str, Any]] = {
    "error": {
        "backgroundColor": "#ffcccc",
        "padding": 15,
        "borderRadius": 5,
        "border": "1px solid #f5c6cb",
        "fontFamily": "'Segoe UI', Arial, sans-serif",
    },
    "warning": {
        "padding": "20px",
        "backgroundColor": "#fff3cd",
        "border": "1px solid #ffc107",
        "borderRadius": "5px",
        "color": "#856404",
        "fontFamily": "'Segoe UI', Arial, sans-serif",
    },
    "info": {
        "padding": "20px",
        "backgroundColor": "#e7f3ff",
        "border": "1px solid #2196F3",
        "borderRadius": "5px",
        "color": "#0d47a1",
        "fontFamily": "'Segoe UI', Arial, sans-serif",
    },
    "cell_type_error": {
        "padding": "20px",
        "backgroundColor": "#f8d7da",
        "border": "1px solid #f5c6cb",
        "borderRadius": "5px",
        "color": "#721c24",
        "fontFamily": "'Segoe UI', Arial, sans-serif",
    },
    "section_combined": {
        "marginBottom": 40,
        "padding": "20px",
        "backgroundColor": "#e8f4f8",
        "borderRadius": "10px",
    },
    "section_slide": {
        "marginBottom": 30,
        "padding": "20px",
        "backgroundColor": "#ffffff",
        "borderRadius": "10px",
        "boxShadow": "0 2px 4px rgba(0,0,0,0.1)",
    },
    "section_comparison": {
        "marginBottom": 40,
        "padding": "20px",
        "backgroundColor": "#f8f9fa",
        "borderRadius": "10px",
    },
}


[docs]class FeatureVisualizer:
[docs]    def __init__(self, config: FeatureVisualizerConfig):
        self.config = config
        self.max_dropdown_levels = 5  # Maximum number of dropdown levels to support

    # ==================== Helper Methods ====================

[docs]    @staticmethod
    def _to_numpy(data: Any) -> np.ndarray[Any, Any]:
        """Convert various data types to numpy array."""
        if hasattr(data, "numpy"):
            return data.numpy()
        elif hasattr(data, "detach"):
            return data.detach().numpy()
        return np.array(data)

[docs]    @staticmethod
    def _sample_data(
        features: np.ndarray[Any, Any],
        labels: np.ndarray[Any, Any] | None,
        n_samples: int,
    ) -> tuple[np.ndarray[Any, Any], np.ndarray[Any, Any] | None, str]:
        """
        Sample data if it exceeds n_samples.
        Returns: (sampled_features, sampled_labels, sample_info_string)
        """
        if features.shape[0] > n_samples:
            np.random.seed(42)  # Set seed for reproducibility
            indices = np.random.choice(features.shape[0], n_samples, replace=False)
            sampled_features = features[indices]
            sampled_labels = labels[indices] if labels is not None else None
            sample_info = f" (sampled {n_samples} out of {features.shape[0]})"
        else:
            sampled_features = features
            sampled_labels = labels
            sample_info = ""
        return sampled_features, sampled_labels, sample_info

[docs]    @staticmethod
    def _validate_positive_int(value: int | None, default: int) -> int:
        """Validate and return a positive integer, or default if invalid."""
        return value if value and value > 0 else default

[docs]    @staticmethod
    def _adjust_perplexity(n_samples: int, requested_perplexity: int) -> int:
        """Adjust perplexity to be valid for the given number of samples."""
        max_perplexity = (n_samples - 1) // 3
        actual_perplexity = min(requested_perplexity, max_perplexity)
        if actual_perplexity != requested_perplexity:
            logger.warning(
                f"Perplexity adjusted from {requested_perplexity} to {actual_perplexity} "
                f"for {n_samples} samples"
            )
        return actual_perplexity

[docs]    @staticmethod
    def _create_error_message(
        title: str, message: str, style_key: str = "error"
    ) -> html.Div:
        """Create a standardized error message component."""
        return html.Div(
            [
                html.H4(title, style={"marginBottom": "10px"}),
                html.P(message),
            ],
            style=STYLES.get(style_key, STYLES["error"]),
        )

[docs]    @staticmethod
    def _create_cell_type_unavailable_message() -> html.Div:
        """Create a standardized message for when cell type data is unavailable."""
        return html.Div(
            [
                html.H4(
                    "Cell Type Information Not Available",
                    style={"marginBottom": "10px"},
                ),
                html.P(
                    "Cell type data could not be loaded from the slides. "
                    "Make sure cell detection data exists for the selected feature extraction path."
                ),
            ],
            style=STYLES["cell_type_error"],
        )

[docs]    def _build_path_from_values(self, *selected_values: str | None) -> List[str]:
        """Build path list from selected dropdown values."""
        current_path: list[str] = []
        for value in selected_values:
            if value is not None:
                current_path.append(value)
            else:
                break
        return current_path

[docs]    def _standardize_and_fit_pca(
        self, features: np.ndarray[Any, Any], n_components: int = 2
    ) -> tuple[np.ndarray[Any, Any], PCA]:
        """Standardize features and fit PCA."""
        n_samples, n_features = features.shape

        # Validate n_components
        max_components = min(n_samples, n_features)
        if n_components > max_components:
            logger.warning(
                f"n_components={n_components} exceeds max allowed ({max_components}). "
                f"Reducing to {max_components}."
            )
            n_components = max_components

        scaler = StandardScaler()
        features_scaled = cast(np.ndarray[Any, Any], scaler.fit_transform(features))  # type: ignore
        pca = PCA(n_components=n_components)
        pca_result = cast(np.ndarray[Any, Any], pca.fit_transform(features_scaled))  # type: ignore
        return pca_result, pca

[docs]    def _standardize_and_fit_tsne(
        self, features: np.ndarray[Any, Any], perplexity: int
    ) -> np.ndarray[Any, Any]:
        """Standardize features and fit t-SNE."""
        scaler = StandardScaler()
        features_scaled = cast(np.ndarray[Any, Any], scaler.fit_transform(features))  # type: ignore
        tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42, n_jobs=-1)
        return tsne.fit_transform(features_scaled)  # type: ignore

[docs]    def _create_scatter_by_labels(
        self,
        coordinates: np.ndarray[Any, Any],
        labels: np.ndarray[Any, Any],
        label_names: Dict[int, str],
        title: str,
        xlabel: str,
        ylabel: str,
        sample_info: str = "",
    ) -> go.Figure:
        """
        Create a scatter plot colored by labels (cell types or slides).
        Reduces duplication across PCA/t-SNE by cell type methods.
        """
        fig = go.Figure()

        for i, (label_id, label_name) in enumerate(sorted(label_names.items())):
            mask = labels == label_id
            color = COLORS[i % len(COLORS)]

            fig.add_trace(  # type: ignore
                go.Scatter(
                    x=coordinates[mask, 0],
                    y=coordinates[mask, 1],
                    mode="markers",
                    name=label_name,
                    marker=dict(size=5, opacity=0.6, color=color),
                    text=[f"{label_name}<br>Cell {idx}" for idx in np.where(mask)[0]],
                    hovertemplate="<b>%{text}</b><br>X: %{x:.2f}<br>Y: %{y:.2f}<extra></extra>",
                )
            )

        fig.update_layout(  # type: ignore
            title=f"{title}{sample_info}",
            xaxis_title=xlabel,
            yaxis_title=ylabel,
            legend=dict(orientation="v", yanchor="top", y=1, xanchor="right", x=1),
            hovermode="closest",
            width=800,
            height=600,
        )

        return fig

[docs]    def _create_js_divergence_table_component(
        self,
        js_df: pd.DataFrame,
        reference_cell_type_name: str,
        is_combined: bool = False,
    ) -> html.Div:
        """
        Create a standardized JS divergence table component.
        Reduces duplication between single slide and combined dataset views.
        """
        dataset_text = "Combined Dataset" if is_combined else ""

        # Prepare data for the table
        table_data = js_df.reset_index().to_dict("records")  # type: ignore
        table_columns: list[dict[str, str | dict[str, str]]] = [
            {"name": "Feature", "id": "Feature"}
        ]
        for col in js_df.columns:
            table_columns.append(
                {
                    "name": col,
                    "id": col,
                    "type": "numeric",
                    "format": {"specifier": ".4f"},
                }
            )

        # Build style_data_conditional list
        style_conditions: list[dict[str, Any]] = [
            {"if": {"row_index": "odd"}, "backgroundColor": "#f9f9f9"},
            {
                "if": {"column_id": "Feature"},
                "fontWeight": "500",
                "backgroundColor": "#ecf0f1",
            },
        ]

        # Add color coding for divergence values
        for col in js_df.columns:
            style_conditions.extend(
                [
                    {
                        "if": {"filter_query": f"{{{col}}} < 0.1", "column_id": col},
                        "backgroundColor": "#d4edda",
                        "color": "#155724",
                    },
                    {
                        "if": {
                            "filter_query": f"{{{col}}} >= 0.1 && {{{col}}} < 0.3",
                            "column_id": col,
                        },
                        "backgroundColor": "#fff3cd",
                        "color": "#856404",
                    },
                    {
                        "if": {"filter_query": f"{{{col}}} >= 0.3", "column_id": col},
                        "backgroundColor": "#f8d7da",
                        "color": "#721c24",
                    },
                ]
            )

        return html.Div(
            [
                html.H4(
                    f"Jensen-Shannon Divergence: {reference_cell_type_name} vs Other Cell Types {dataset_text}",
                    style={
                        "marginBottom": "20px",
                        "fontFamily": "'Segoe UI', Arial, sans-serif",
                        "color": "#2c3e50",
                    },
                ),
                html.P(
                    f"Values represent the Jensen-Shannon divergence between the distribution of each feature in {reference_cell_type_name} cells and other cell types"
                    + (" across all slides" if is_combined else "")
                    + ". Lower values indicate more similar distributions (0 = identical, 1 = completely different).",
                    style={
                        "marginBottom": "20px",
                        "fontFamily": "'Segoe UI', Arial, sans-serif",
                        "color": "#7f8c8d",
                        "fontSize": "14px",
                    },
                ),
                dash_table.DataTable(
                    data=table_data,  # type: ignore
                    columns=table_columns,  # type: ignore
                    style_table={
                        "overflowX": "auto",
                        "maxHeight": "600px",
                        "overflowY": "auto",
                    },
                    style_cell={
                        "textAlign": "left",
                        "padding": "10px",
                        "fontFamily": "'Segoe UI', Arial, sans-serif",
                        "fontSize": "13px",
                    },
                    style_header={
                        "backgroundColor": "#34495e",
                        "color": "white",
                        "fontWeight": "bold",
                        "textAlign": "left",
                        "padding": "12px",
                        "fontFamily": "'Segoe UI', Arial, sans-serif",
                    },
                    style_data_conditional=style_conditions,  # type: ignore
                    page_size=20,
                    sort_action="native",
                    filter_action="native",
                ),
            ],
            style={"padding": "20px"},
        )

    # ==================== Data Loading Methods ====================

[docs]    def _get_available_slides(self) -> List[str]:
        """
        Get list of available slide folders in the dataset directory.
        """
        if not self.config.dataset.exists() or not self.config.dataset.is_dir():
            logger.warning(f"Dataset path does not exist: {self.config.dataset}")
            return []

        slides: list[str] = []
        for item in self.config.dataset.iterdir():
            if item.is_dir():
                slides.append(item.name)

        return sorted(slides)

[docs]    def _explore_directory(
        self, path: Path, current_path_parts: Optional[List[str]] = None
    ) -> Dict[str, Any]:
        """
        Recursively explore directory structure to find features.pt files.
        Returns a nested dictionary structure representing the directory tree.
        """
        if current_path_parts is None:
            current_path_parts = []

        if not path.exists() or not path.is_dir():
            return {}

        result: dict[str, Any] = {}

        for item in path.iterdir():
            if item.is_dir():
                # Check if this directory contains features.pt
                features_file = item / "features.pt"
                if features_file.exists():
                    # This is a terminal directory with features
                    result[item.name] = {
                        "_has_features": True,
                        "_path": str(item),
                        "_path_parts": current_path_parts + [item.name],
                    }
                else:
                    # Recursively explore subdirectories
                    subdirs = self._explore_directory(
                        item, current_path_parts + [item.name]
                    )
                    if subdirs:  # Only add if there are subdirectories with features
                        result[item.name] = subdirs

        return result

[docs]    def _get_available_options_at_level(
        self, structure: Dict[str, Any], path_parts: List[str]
    ) -> List[str]:
        """
        Get available options at a specific level in the directory structure.
        """
        current = structure

        # Navigate to the specified level
        for part in path_parts:
            if part in current and isinstance(current[part], dict):
                current = current[part]
            else:
                return []

        # Return available options at this level
        options: list[str] = []
        for key, _ in current.items():
            if not key.startswith("_"):  # Skip metadata keys
                options.append(key)

        return sorted(options)

[docs]    def _can_load_features(
        self, structure: Dict[str, Any], path_parts: List[str]
    ) -> bool:
        """
        Check if we can load features at the current path.
        """
        current = structure

        # Navigate to the specified level
        for part in path_parts:
            if part in current and isinstance(current[part], dict):
                current = current[part]
            else:
                return False

        return current.get("_has_features", False)

[docs]    def _get_features_path(
        self, structure: Dict[str, Any], path_parts: List[str]
    ) -> str:
        """
        Get the full path to the features.pt file for the given path parts.
        """
        current = structure

        # Navigate to the specified level
        for part in path_parts:
            if part in current and isinstance(current[part], dict):
                current = current[part]
            else:
                raise ValueError(f"Invalid path: {'/'.join(path_parts)}")

        if not current.get("_has_features", False):
            raise ValueError(f"No features available at path: {'/'.join(path_parts)}")

        return current["_path"]

[docs]    def _load_features(self, slide_name: str, path_parts: List[str]):
        """
        Load features for the specified slide and path parts.
        """
        feature_extraction_path = (
            self.config.dataset / slide_name / "feature_extraction"
        )
        directory_structure = self._explore_directory(feature_extraction_path)

        if not self._can_load_features(directory_structure, path_parts):
            raise ValueError(f"No features available at path: {'/'.join(path_parts)}")

        features_path = self._get_features_path(directory_structure, path_parts)

        return torch.load(
            Path(features_path) / "features.pt",
            map_location=torch.device("cpu"),
            weights_only=False,
        )

[docs]    def _prepare_data(self, slide_name: str, path_parts: List[str]) -> Dict[str, Any]:
        """
        Prepare data for visualization by loading features and converting to DataFrame.
        """
        feature_data = self._load_features(slide_name, path_parts)

        # Extract components
        features = feature_data["features"]  # Shape: (N, D)
        feature_names = feature_data["feature_names"]  # Column names

        # Convert to DataFrame for easier manipulation
        df = pd.DataFrame(features, columns=feature_names)

        return {
            "df": df,
            "features": features,
            "feature_names": feature_names,
            "shape": features.shape,
        }

[docs]    def _load_cell_types(
        self, slide_name: str, path_parts: List[str]
    ) -> Optional[Dict[int, int]]:
        """
        Load cell types for the specified slide and path parts.
        Returns a dictionary mapping cell_id to cell_type.
        """
        try:
            # The slide_name indicates which slide folder to look in
            # path_parts structure: might be like [extractor_name, ...] or [extractor_name, model_name, ...]

            # Build the slide path from the dataset and slide name
            slide_path = self.config.dataset / slide_name
            logger.info(f"Looking for cell types in slide path: {slide_path}")

            # Try to find segmentation model from path_parts first
            segmentation_model = None
            for part in path_parts:
                try:
                    segmentation_model = ModelType(part)
                    logger.info(
                        f"Found segmentation model in path: {segmentation_model}"
                    )
                    break
                except ValueError:
                    continue

            # If no segmentation model in path, try to find any available cell detection
            cell_detection_base = slide_path / "cell_detection"

            if not cell_detection_base.exists():
                logger.warning(
                    f"Cell detection directory does not exist: {cell_detection_base}"
                )
                return None

            # If we found a segmentation model in the path, use it
            cell_detection_path = None
            if segmentation_model:
                test_path = (
                    cell_detection_base
                    / str(segmentation_model)
                    / "cell_detection.json"
                )
                if test_path.exists():
                    cell_detection_path = test_path
                    logger.info(f"Loading cell types from: {cell_detection_path}")
                else:
                    logger.warning(f"Cell detection file not found: {test_path}")
                    segmentation_model = None

            # If no model specified or file not found, search for any available model
            if not segmentation_model:
                logger.info("Searching for available cell detection files...")
                for model_type in ModelType:
                    test_path = (
                        cell_detection_base / str(model_type) / "cell_detection.json"
                    )
                    if test_path.exists():
                        segmentation_model = model_type
                        cell_detection_path = test_path
                        logger.info(f"Found cell detection for model: {model_type}")
                        break

                if not segmentation_model or cell_detection_path is None:
                    logger.warning("No cell detection files found for any model")
                    return None

            if cell_detection_path is None:
                logger.warning("Cell detection path is None")
                return None

            # Load the cell detection data
            with open(cell_detection_path, "r") as f:
                cell_data = json.load(f)

            cells = cell_data.get("cells", [])
            cell_type_dict: Dict[int, int] = {}
            for cell in cells:
                cell_id = cell.get("cell_id")
                cell_type = cell.get("type", 0)
                if cell_id is not None:
                    cell_type_dict[cell_id] = cell_type

            logger.info(f"Loaded {len(cell_type_dict)} cell types")
            return cell_type_dict

        except Exception as e:
            logger.error(f"Error loading cell types: {e}")
            import traceback

            logger.error(traceback.format_exc())
            return None

[docs]    def _prepare_data_with_cell_types(
        self, slide_name: str, path_parts: List[str]
    ) -> Dict[str, Any]:
        """
        Prepare data with cell types for visualization.
        """
        feature_data = self._load_features(slide_name, path_parts)

        # Extract components
        features = feature_data["features"]  # Shape: (N, D)
        feature_names = feature_data["feature_names"]  # Column names
        cell_indices = feature_data.get("cell_indices", {})  # cell_id -> index mapping

        logger.info(
            f"Feature data loaded: {features.shape[0]} cells, {features.shape[1]} features"
        )
        logger.info(
            f"Cell indices available: {len(cell_indices) > 0}, count: {len(cell_indices)}"
        )

        # Load cell types
        cell_types_dict = self._load_cell_types(slide_name, path_parts)

        logger.info(f"Cell types dict loaded: {cell_types_dict is not None}")
        if cell_types_dict:
            logger.info(f"Cell types count: {len(cell_types_dict)}")

        # Convert to DataFrame
        df = pd.DataFrame(features, columns=feature_names)

        # Add cell type information if available
        cell_types = None
        cell_type_names = None

        if cell_types_dict:
            if cell_indices:
                # Map cell types to feature indices using cell_indices mapping
                logger.info("Mapping cell types using cell_indices")
                cell_types = np.zeros(len(features), dtype=int)
                mapped_count = 0
                for cell_id, feature_idx in cell_indices.items():
                    if cell_id in cell_types_dict:
                        cell_types[feature_idx] = cell_types_dict[cell_id]
                        mapped_count += 1

                logger.info(f"Mapped {mapped_count} cells with types")

                # Filter out cell type 0 (background/unknown)
                valid_mask = cell_types != 0
                features = features[valid_mask]
                cell_types = cell_types[valid_mask]
                df = df[valid_mask].reset_index(drop=True)  # type: ignore

                logger.info(
                    f"Filtered out {np.sum(~valid_mask)} cells with type 0 (background/unknown)"
                )

                # Add cell type column to dataframe
                df["cell_type"] = cell_types

                # Create cell type names (excluding type 0)
                cell_type_names = {
                    int(cell_type): TYPE_NUCLEI_DICT.get(
                        int(cell_type), f"Type {int(cell_type)}"
                    )
                    for cell_type in np.unique(cell_types)
                    if int(cell_type) != 0
                }
            else:
                # No cell_indices mapping, assume direct correspondence if counts match
                logger.info("No cell_indices mapping available")
                if len(cell_types_dict) == len(features):
                    logger.info(
                        "Assuming direct cell ID to feature index mapping (counts match)"
                    )
                    cell_types = np.zeros(len(features), dtype=int)
                    # Sort cell_ids to create a consistent mapping
                    sorted_cell_ids = sorted(cell_types_dict.keys())
                    for idx, cell_id in enumerate(sorted_cell_ids):
                        if idx < len(cell_types):
                            cell_types[idx] = cell_types_dict[cell_id]

                    # Filter out cell type 0 (background/unknown)
                    valid_mask = cell_types != 0
                    features = features[valid_mask]
                    cell_types = cell_types[valid_mask]
                    df = df[valid_mask].reset_index(drop=True)  # type: ignore

                    logger.info(
                        f"Filtered out {np.sum(~valid_mask)} cells with type 0 (background/unknown)"
                    )

                    df["cell_type"] = cell_types
                    cell_type_names = {
                        int(cell_type): TYPE_NUCLEI_DICT.get(
                            int(cell_type), f"Type {int(cell_type)}"
                        )
                        for cell_type in np.unique(cell_types)
                        if int(cell_type) != 0
                    }
                    logger.info(
                        f"Created cell type mapping with {len(cell_type_names)} types"
                    )
                else:
                    logger.warning(
                        f"Cannot map cell types: cell_types_dict has {len(cell_types_dict)} entries but features has {len(features)} rows"
                    )

        return {
            "df": df,
            "features": features,
            "feature_names": feature_names,
            "shape": features.shape,
            "cell_types": cell_types,
            "cell_type_names": cell_type_names,
        }

[docs]    def _prepare_combined_data(
        self,
        slides: List[str],
        path_parts: List[str],
        max_samples_per_slide: int | None = 1000,
    ) -> Dict[str, Any]:
        """
        Prepare combined data from multiple slides for dataset-wide analysis.
        Samples up to max_samples_per_slide from each slide.
        If max_samples_per_slide is None, use all cells from each slide.
        """
        all_features: list[np.ndarray[Any, Any]] = []
        all_slide_labels: list[int] = []
        feature_names: list[str] | None = None
        total_cells = 0

        logger.info(f"Loading combined data from {len(slides)} slides...")

        for slide_idx, slide_name in enumerate(slides):
            try:
                data = self._prepare_data(slide_name, path_parts)
                features = data["features"]

                if feature_names is None:
                    feature_names = data["feature_names"]
                elif data["feature_names"] != feature_names:
                    logger.warning(
                        f"Feature names mismatch for slide {slide_name}, skipping"
                    )
                    continue

                # Sample if needed and max_samples_per_slide is specified
                if (
                    max_samples_per_slide is not None
                    and len(features) > max_samples_per_slide
                ):
                    np.random.seed(42 + slide_idx)  # Different seed per slide
                    indices = np.random.choice(
                        len(features), max_samples_per_slide, replace=False
                    )
                    features = features[indices]

                all_features.append(features)
                all_slide_labels.extend([slide_idx] * len(features))
                total_cells += len(features)
                logger.info(f"Loaded {len(features)} cells from slide {slide_name}")

            except Exception as e:
                logger.error(f"Error loading slide {slide_name}: {e}")
                continue

        if not all_features:
            raise ValueError("No data could be loaded from any slide")

        # Combine all features
        combined_features = np.vstack(all_features)
        slide_labels = np.array(all_slide_labels)

        # Create DataFrame
        df = pd.DataFrame(combined_features, columns=feature_names)

        logger.info(f"Combined dataset: {total_cells} cells from {len(slides)} slides")

        return {
            "df": df,
            "features": combined_features,
            "feature_names": feature_names,
            "shape": combined_features.shape,
            "slide_labels": slide_labels,
            "slides": slides,
        }

[docs]    def _prepare_combined_data_with_cell_types(
        self,
        slides: List[str],
        path_parts: List[str],
        max_samples_per_slide: int | None = 1000,
    ) -> Dict[str, Any]:
        """
        Prepare combined data with cell types from multiple slides.
        If max_samples_per_slide is None, use all cells from each slide.
        """
        all_features: list[np.ndarray[Any, Any]] = []
        all_cell_types: list[np.ndarray[Any, Any]] = []
        all_slide_labels: list[int] = []
        feature_names: list[str] | None = None
        cell_type_names: dict[int, str] | None = None
        total_cells = 0

        logger.info(
            f"Loading combined data with cell types from {len(slides)} slides..."
        )

        for slide_idx, slide_name in enumerate(slides):
            try:
                data = self._prepare_data_with_cell_types(slide_name, path_parts)

                # Skip if no cell types available
                if data["cell_types"] is None:
                    logger.warning(f"No cell types for slide {slide_name}, skipping")
                    continue

                features = data["features"]
                cell_types = data["cell_types"]

                if feature_names is None:
                    feature_names = data["feature_names"]
                    cell_type_names = data["cell_type_names"]
                elif data["feature_names"] != feature_names:
                    logger.warning(
                        f"Feature names mismatch for slide {slide_name}, skipping"
                    )
                    continue

                # Sample if needed and max_samples_per_slide is specified
                if (
                    max_samples_per_slide is not None
                    and len(features) > max_samples_per_slide
                ):
                    np.random.seed(42 + slide_idx)  # Different seed per slide
                    indices = np.random.choice(
                        len(features), max_samples_per_slide, replace=False
                    )
                    features = features[indices]
                    cell_types = cell_types[indices]

                all_features.append(features)
                all_cell_types.append(cell_types)
                all_slide_labels.extend([slide_idx] * len(features))
                total_cells += len(features)
                logger.info(f"Loaded {len(features)} cells from slide {slide_name}")

            except Exception as e:
                logger.error(f"Error loading slide {slide_name}: {e}")
                continue

        if not all_features:
            return {
                "df": pd.DataFrame(),
                "features": np.array([]),
                "feature_names": [],
                "shape": (0, 0),
                "cell_types": None,
                "cell_type_names": None,
                "slide_labels": np.array([]),
                "slides": slides,
            }

        # Combine all data
        combined_features = np.vstack(all_features)
        combined_cell_types = np.concatenate(all_cell_types)
        slide_labels = np.array(all_slide_labels)

        # Filter out cell type 0 (background/unknown/unlabeled cells)
        valid_mask = combined_cell_types != 0
        combined_features = combined_features[valid_mask]
        combined_cell_types = combined_cell_types[valid_mask]
        slide_labels = slide_labels[valid_mask]

        # Update total cells count after filtering
        total_cells = len(combined_features)
        logger.info(
            f"Filtered out {np.sum(~valid_mask)} cells with type 0 (background/unknown)"
        )

        # Create complete cell_type_names dictionary from all unique cell types (excluding type 0)
        all_unique_types = np.unique(combined_cell_types)
        cell_type_names = {
            int(cell_type): TYPE_NUCLEI_DICT.get(
                int(cell_type), f"Type {int(cell_type)}"
            )
            for cell_type in all_unique_types
            if int(cell_type) != 0  # Explicitly exclude type 0
        }
        logger.info(
            f"Created cell type mapping with {len(cell_type_names)} types: {list(cell_type_names.values())}"
        )

        # Create DataFrame
        df = pd.DataFrame(combined_features, columns=feature_names)

        logger.info(f"Combined dataset: {total_cells} cells from {len(slides)} slides")

        return {
            "df": df,
            "features": combined_features,
            "feature_names": feature_names,
            "shape": combined_features.shape,
            "cell_types": combined_cell_types,
            "cell_type_names": cell_type_names,
            "slide_labels": slide_labels,
            "slides": slides,
        }

[docs]    def _calculate_first_order_stats(
        self, data: np.ndarray[Any, Any]
    ) -> Dict[str, Any]:
        """Calculate first-order statistics for features."""
        # Convert to numpy array if needed
        data = self._to_numpy(data)

        return {
            "mean": np.mean(data, axis=0),
            "std": np.std(data, axis=0),
            "min": np.min(data, axis=0),
            "max": np.max(data, axis=0),
            "median": np.median(data, axis=0),
            "q25": np.percentile(data, 25, axis=0),
            "q75": np.percentile(data, 75, axis=0),
            "skewness": stats.skew(data, axis=0),  # type: ignore
            "kurtosis": stats.kurtosis(data, axis=0),  # type: ignore
        }

[docs]    def _create_correlation_matrix(
        self, df: pd.DataFrame, feature_names: List[str]
    ) -> go.Figure:
        """Create correlation matrix heatmap for features."""
        logger.info(
            "Computing correlation matrix... This may take a moment for large datasets."
        )

        # Limit to first 20 features to avoid overcrowding
        limited_features = feature_names[:20]
        correlation_matrix = df[limited_features].corr()

        fig = go.Figure(
            data=go.Heatmap(
                z=correlation_matrix.values,  # type: ignore
                x=correlation_matrix.columns,
                y=correlation_matrix.columns,
                colorscale="RdBu",
                zmid=0,
                text=correlation_matrix.values,  # type: ignore
                texttemplate="%{text:.2f}",
                textfont={"size": 8},
                hovertemplate="<b>%{x}</b><br><b>%{y}</b><br>Correlation: %{z:.3f}<extra></extra>",
            )
        )

        fig.update_layout(  # type: ignore
            title="Feature Correlation Matrix",
            xaxis_title="Features",
            yaxis_title="Features",
            width=800,
            height=600,
            font=dict(family="Montserrat"),
        )

        return fig

[docs]    def _create_distribution_plot(
        self, df: pd.DataFrame, feature_name: str
    ) -> go.Figure:
        """Create distribution plot for a specific feature."""
        fig = go.Figure()

        # Histogram
        fig.add_trace(  # type: ignore
            go.Histogram(
                x=df[feature_name], name="Distribution", nbinsx=50, opacity=0.7
            )
        )

        fig.update_layout(  # type: ignore
            title=f"Distribution of {feature_name}",
            xaxis_title=feature_name,
            yaxis_title="Frequency",
            font=dict(family="Montserrat"),
        )

        return fig

[docs]    def _create_pca_plot(
        self,
        features: np.ndarray[Any, Any],
        feature_names: List[str],
        n_samples: int = 1000,
    ) -> go.Figure:
        """Create PCA visualization."""
        logger.info(
            f"Computing PCA with {n_samples} samples... This may take a moment for large datasets."
        )

        # Convert and sample data
        features = self._to_numpy(features)
        features, _, sample_info = self._sample_data(features, None, n_samples)

        # Standardize and perform PCA
        pca_result, pca = self._standardize_and_fit_pca(features)

        # Create scatter plot
        fig = go.Figure()

        fig.add_trace(  # type: ignore
            go.Scatter(
                x=pca_result[:, 0],
                y=pca_result[:, 1],
                mode="markers",
                marker=dict(
                    size=5,
                    opacity=0.6,
                    color=np.arange(len(pca_result)),  # type: ignore
                    colorscale="Viridis",
                    showscale=True,
                    colorbar=dict(title="Cell Index"),
                ),
                text=[f"Cell {i}" for i in range(len(pca_result))],  # type: ignore
                hovertemplate="<b>%{text}</b><br>PC1: %{x:.2f}<br>PC2: %{y:.2f}<extra></extra>",
            )
        )

        fig.update_layout(  # type: ignore
            title=f"PCA Visualization{sample_info} (Explained Variance: PC1={pca.explained_variance_ratio_[0]:.2%}, PC2={pca.explained_variance_ratio_[1]:.2%})",  # type: ignore
            xaxis_title=f"PC1 ({pca.explained_variance_ratio_[0]:.2%})",  # type: ignore
            yaxis_title=f"PC2 ({pca.explained_variance_ratio_[1]:.2%})",  # type: ignore
            font=dict(family="Montserrat"),
        )

        return fig

[docs]    def _create_tsne_plot(
        self,
        features: np.ndarray[Any, Any],
        n_samples: int = 1000,
        perplexity: int = 30,
    ) -> go.Figure:
        """Create t-SNE visualization."""
        logger.info("Computing t-SNE (this may take a while)...")

        # Convert and sample data
        features = self._to_numpy(features)
        features, _, sample_info = self._sample_data(features, None, n_samples)

        # Adjust perplexity and perform t-SNE
        actual_perplexity = self._adjust_perplexity(features.shape[0], perplexity)
        tsne_result = self._standardize_and_fit_tsne(features, actual_perplexity)

        # Create scatter plot
        fig = go.Figure()

        fig.add_trace(  # type: ignore
            go.Scatter(
                x=tsne_result[:, 0],
                y=tsne_result[:, 1],
                mode="markers",
                marker=dict(
                    size=5,
                    opacity=0.6,
                    color=np.arange(len(tsne_result)),  # type: ignore
                    colorscale="Viridis",
                    showscale=True,
                    colorbar=dict(title="Cell Index"),
                ),
                text=[f"Cell {i}" for i in range(len(tsne_result))],  # type: ignore
                hovertemplate="<b>%{text}</b><br>t-SNE1: %{x:.2f}<br>t-SNE2: %{y:.2f}<extra></extra>",
            )
        )

        fig.update_layout(  # type: ignore
            title=f"t-SNE Visualization{sample_info}<br>(perplexity={actual_perplexity})",
            xaxis_title="t-SNE Component 1",
            yaxis_title="t-SNE Component 2",
            font=dict(family="Montserrat"),
        )

        return fig

[docs]    def _create_stats_table(
        self, stats_dict: Dict[str, Any], feature_names: List[str]
    ) -> go.Figure:
        """Create a table with first-order statistics."""

        # Create table data
        table_data = []
        for i, feature_name in enumerate(feature_names):
            if i < len(stats_dict["mean"]):  # Safety check
                table_data.append(  # type: ignore
                    [
                        feature_name,
                        f"{stats_dict['mean'][i]:.4f}",
                        f"{stats_dict['std'][i]:.4f}",
                        f"{stats_dict['min'][i]:.4f}",
                        f"{stats_dict['max'][i]:.4f}",
                        f"{stats_dict['median'][i]:.4f}",
                        f"{stats_dict['q25'][i]:.4f}",
                        f"{stats_dict['q75'][i]:.4f}",
                        f"{stats_dict['skewness'][i]:.4f}",
                        f"{stats_dict['kurtosis'][i]:.4f}",
                    ]
                )

        # Transpose the data for proper table formatting
        if table_data:
            transposed_data = list(zip(*table_data))  # type: ignore
        else:
            transposed_data = [[] for _ in range(10)]  # type: ignore

        fig = go.Figure(
            data=[
                go.Table(  # type: ignore
                    header=dict(
                        values=[
                            "Feature",
                            "Mean",
                            "Std",
                            "Min",
                            "Max",
                            "Median",
                            "Q25",
                            "Q75",
                            "Skewness",
                            "Kurtosis",
                        ],
                        fill_color="paleturquoise",
                        align="left",
                        font=dict(size=12),
                    ),
                    cells=dict(
                        values=transposed_data,  # type: ignore
                        fill_color="lavender",
                        align="left",
                        font=dict(size=10),
                    ),
                )
            ]
        )

        fig.update_layout(
            title="First-Order Statistics", font=dict(family="Montserrat")
        )  # type: ignore

        return fig

[docs]    def _create_distribution_comparison_plot(
        self,
        df: pd.DataFrame,
        feature_name: str,
        cell_types: np.ndarray[Any, Any],
        cell_type_names: Dict[int, str],
    ) -> go.Figure:
        """Create overlaid distribution plots for different cell types with normalized densities and KDE curves."""

        # Create figure with 2 subplots (histogram on top, KDE below)
        fig = make_subplots(
            rows=2,
            cols=1,
            row_heights=[0.55, 0.45],
            subplot_titles=(
                "Normalized Histograms",
                "Smoothed Kernel Density Estimates",
            ),
            vertical_spacing=0.12,
        )

        # Define a color palette for cell types
        colors = COLORS

        # Get overall data range for consistent x-axis
        data_min = df[feature_name].min()
        data_max = df[feature_name].max()
        data_range = data_max - data_min
        x_range = [data_min - 0.05 * data_range, data_max + 0.05 * data_range]

        # Add normalized histogram and KDE for each cell type
        for i, (cell_type, type_name) in enumerate(sorted(cell_type_names.items())):
            if cell_type == 0:  # Skip unknown type
                continue
            mask = cell_types == cell_type
            cell_count = mask.sum()
            if cell_count == 0:
                continue

            color = colors[i % len(colors)]
            # Calculate percentage of total cells
            total_cells = len(cell_types)
            percentage = (cell_count / total_cells) * 100

            data_values = cast(np.ndarray[Any, Any], df[feature_name][mask].values)  # type: ignore
            label = f"{type_name} (n={cell_count}, {percentage:.1f}%)"

            # Add histogram to first subplot
            fig.add_trace(  # type: ignore
                go.Histogram(
                    x=data_values,
                    name=label,
                    nbinsx=50,
                    opacity=0.6,
                    marker_color=color,
                    histnorm="probability density",
                    legendgroup=f"group{i}",
                    showlegend=True,
                ),
                row=1,
                col=1,
            )

            # Calculate and add KDE to second subplot
            if cell_count > 1:  # Need at least 2 points for KDE
                try:
                    kde = gaussian_kde(data_values)
                    # Create smooth x values for the KDE curve
                    x_smooth = cast(
                        np.ndarray[Any, Any], np.linspace(x_range[0], x_range[1], 300)
                    )
                    y_smooth = cast(np.ndarray[Any, Any], kde(x_smooth))

                    fig.add_trace(  # type: ignore
                        go.Scatter(
                            x=x_smooth,
                            y=y_smooth,
                            name=label,
                            mode="lines",
                            line=dict(color=color, width=2.5),
                            legendgroup=f"group{i}",
                            showlegend=False,  # Already shown in histogram
                            hovertemplate=f"<b>{type_name}</b><br>{feature_name}: %{{x:.3f}}<br>Density: %{{y:.3f}}<extra></extra>",
                        ),
                        row=2,
                        col=1,
                    )
                except Exception as e:
                    logger.warning(f"Could not compute KDE for {type_name}: {e}")

        # Update layout
        fig.update_layout(  # type: ignore
            title=f"Distribution Comparison of {feature_name} by Cell Type",
            barmode="overlay",
            legend=dict(
                orientation="v",
                yanchor="top",
                y=0.98,
                xanchor="right",
                x=0.99,
                bgcolor="rgba(255, 255, 255, 0.8)",
                bordercolor="rgba(0, 0, 0, 0.2)",
                borderwidth=1,
            ),
            hovermode="closest",
            height=800,
            font=dict(family="Montserrat"),
        )

        # Update x and y axes
        fig.update_xaxes(title_text=feature_name, row=1, col=1, range=x_range)  # type: ignore
        fig.update_xaxes(title_text=feature_name, row=2, col=1, range=x_range)  # type: ignore
        fig.update_yaxes(title_text="Probability Density", row=1, col=1)  # type: ignore
        fig.update_yaxes(title_text="Density", row=2, col=1)  # type: ignore

        return fig

[docs]    def _calculate_js_divergence_table(
        self,
        df: pd.DataFrame,
        cell_types: np.ndarray[Any, Any],
        cell_type_names: Dict[int, str],
        reference_cell_type: int,
    ) -> pd.DataFrame:
        """
        Calculate Jensen-Shannon divergence between reference cell type and all other types
        for each feature in the dataframe.

        Returns a DataFrame where:
        - Rows are features
        - Columns are cell types (excluding reference type)
        - Values are JS divergence scores
        """
        from scipy.spatial.distance import jensenshannon

        # Get all features (columns in df)
        features = [col for col in df.columns if col != "cell_type"]

        # Get other cell types (excluding reference and unknown type 0)
        other_types = sorted(
            [
                ct
                for ct in cell_type_names.keys()
                if ct != reference_cell_type and ct != 0
            ]
        )

        # Initialize results dictionary
        results: dict[str, list[np.float64 | float]] = {
            cell_type_names[ct]: [] for ct in other_types
        }

        # Calculate JS divergence for each feature
        for feature in features:
            ref_mask = cell_types == reference_cell_type
            ref_values = cast(np.ndarray[Any, Any], df[feature][ref_mask].values)  # type: ignore

            if len(ref_values) < 2:
                # Not enough data for reference type
                for ct in other_types:
                    results[cell_type_names[ct]].append(np.nan)
                continue

            # Calculate histogram for reference type
            ref_counts, bin_edges = np.histogram(ref_values, bins=50, density=False)

            for ct in other_types:
                ct_mask = cell_types == ct
                ct_values = cast(np.ndarray[Any, Any], df[feature][ct_mask].values)  # type: ignore

                if len(ct_values) < 2:
                    results[cell_type_names[ct]].append(np.nan)
                    continue

                # Calculate histogram for comparison type using same bins
                ct_counts, _ = np.histogram(ct_values, bins=bin_edges, density=False)

                # Add small epsilon to avoid log(0) and normalize to probability
                epsilon = 1e-10
                ref_prob = (ref_counts + epsilon) / (
                    ref_counts.sum() + epsilon * len(ref_counts)
                )
                ct_prob = (ct_counts + epsilon) / (
                    ct_counts.sum() + epsilon * len(ct_counts)
                )

                # Calculate Jensen-Shannon divergence
                js_div = jensenshannon(ref_prob, ct_prob)
                results[cell_type_names[ct]].append(js_div)

        # Create DataFrame
        js_df = pd.DataFrame(results, index=features)
        js_df.index.name = "Feature"

        return js_df

[docs]    def _create_pca_by_cell_type(
        self,
        features: np.ndarray[Any, Any] | torch.Tensor,
        cell_types: np.ndarray[Any, Any],
        cell_type_names: Dict[int, str],
        n_samples: int = 1000,
    ) -> go.Figure:
        """Create PCA visualization colored by cell type."""
        logger.info("Computing PCA with cell types...")

        # Convert and sample data
        features = self._to_numpy(features)
        features, sampled_cell_types, sample_info = self._sample_data(
            features, cell_types, n_samples
        )
        # sampled_cell_types will not be None since we passed cell_types
        assert sampled_cell_types is not None
        cell_types = sampled_cell_types

        # Standardize and perform PCA
        pca_result, pca = self._standardize_and_fit_pca(features)

        # Create scatter plot using unified method
        title = (
            f"PCA by Cell Type<br>Explained Variance: "
            f"PC1={pca.explained_variance_ratio_[0]:.2%}, "  # type: ignore
            f"PC2={pca.explained_variance_ratio_[1]:.2%}"  # type: ignore
        )
        xlabel = f"PC1 ({pca.explained_variance_ratio_[0]:.2%})"  # type: ignore
        ylabel = f"PC2 ({pca.explained_variance_ratio_[1]:.2%})"  # type: ignore

        return self._create_scatter_by_labels(
            pca_result, cell_types, cell_type_names, title, xlabel, ylabel, sample_info
        )

[docs]    def _create_tsne_by_cell_type(
        self,
        features: np.ndarray[Any, Any] | torch.Tensor,
        cell_types: np.ndarray[Any, Any],
        cell_type_names: Dict[int, str],
        n_samples: int = 1000,
        perplexity: int = 30,
    ) -> go.Figure:
        """Create t-SNE visualization colored by cell type."""
        logger.info("Computing t-SNE with cell types (this may take a while)...")

        # Convert and sample data
        features = self._to_numpy(features)
        features, sampled_cell_types, sample_info = self._sample_data(
            features, cell_types, n_samples
        )
        # sampled_cell_types will not be None since we passed cell_types
        assert sampled_cell_types is not None
        cell_types = sampled_cell_types

        # Adjust perplexity and perform t-SNE
        actual_perplexity = self._adjust_perplexity(features.shape[0], perplexity)
        tsne_result = self._standardize_and_fit_tsne(features, actual_perplexity)

        # Create scatter plot using unified method
        title = f"t-SNE by Cell Type<br>(perplexity={actual_perplexity})"
        xlabel = "t-SNE Component 1"
        ylabel = "t-SNE Component 2"

        return self._create_scatter_by_labels(
            tsne_result, cell_types, cell_type_names, title, xlabel, ylabel, sample_info
        )

[docs]    def _create_combined_pca_plot(
        self,
        features: np.ndarray[Any, Any],
        slide_labels: np.ndarray[Any, Any],
        slides: List[str],
    ) -> go.Figure:
        """Create PCA visualization colored by slide for combined dataset."""
        logger.info("Computing PCA for combined dataset...")

        # Convert to numpy array if needed
        if hasattr(features, "numpy"):
            features = features.numpy()  # type: ignore
        elif hasattr(features, "detach"):
            features = features.detach().numpy()  # type: ignore
        features = np.array(features)  # type: ignore

        # Standardize and perform PCA
        scaler = StandardScaler()
        features_scaled = scaler.fit_transform(features)  # type: ignore
        pca = PCA(n_components=2)
        pca_result = pca.fit_transform(features_scaled)  # type: ignore

        # Create scatter plot with different colors for each slide
        fig = go.Figure()
        colors = COLORS

        for i, slide_name in enumerate(slides):
            mask = slide_labels == i
            if mask.sum() == 0:
                continue

            color = colors[i % len(colors)]
            fig.add_trace(  # type: ignore
                go.Scatter(
                    x=pca_result[mask, 0],
                    y=pca_result[mask, 1],
                    mode="markers",
                    name=slide_name,
                    marker=dict(
                        size=5,
                        opacity=0.6,
                        color=color,
                    ),
                    hovertemplate=f"<b>{slide_name}</b><br>PC1: %{{x:.2f}}<br>PC2: %{{y:.2f}}<extra></extra>",
                )
            )

        fig.update_layout(  # type: ignore
            title=f"PCA - Combined Dataset<br>Explained Variance: PC1={pca.explained_variance_ratio_[0]:.2%}, PC2={pca.explained_variance_ratio_[1]:.2%}<br>Total cells: {len(features)}",  # type: ignore
            xaxis_title=f"PC1 ({pca.explained_variance_ratio_[0]:.2%})",  # type: ignore
            yaxis_title=f"PC2 ({pca.explained_variance_ratio_[1]:.2%})",  # type: ignore
            legend=dict(orientation="v", yanchor="top", y=1, xanchor="right", x=1),
            hovermode="closest",
            width=900,
            height=700,
            font=dict(family="Montserrat"),
        )

        return fig

[docs]    def _create_combined_tsne_plot(
        self,
        features: np.ndarray[Any, Any],
        slide_labels: np.ndarray[Any, Any],
        slides: List[str],
        perplexity: int = 30,
    ) -> go.Figure:
        """Create t-SNE visualization colored by slide for combined dataset."""
        logger.info("Computing t-SNE for combined dataset...")

        # Convert to numpy array if needed
        if hasattr(features, "numpy"):
            features = features.numpy()  # type: ignore
        elif hasattr(features, "detach"):
            features = features.detach().numpy()  # type: ignore
        features = np.array(features)  # type: ignore

        # Adjust perplexity if needed
        max_perplexity = (features.shape[0] - 1) // 3
        actual_perplexity = min(perplexity, max_perplexity)
        if actual_perplexity != perplexity:
            logger.warning(
                f"Perplexity adjusted from {perplexity} to {actual_perplexity} based on dataset size"
            )

        # Standardize and perform t-SNE
        scaler = StandardScaler()
        features_scaled = scaler.fit_transform(features)  # type: ignore
        tsne = TSNE(
            n_components=2, perplexity=actual_perplexity, random_state=42, n_jobs=-1
        )
        tsne_result = tsne.fit_transform(features_scaled)  # type: ignore

        # Create scatter plot with different colors for each slide
        fig = go.Figure()
        colors = COLORS

        for i, slide_name in enumerate(slides):
            mask = slide_labels == i
            if mask.sum() == 0:
                continue

            color = colors[i % len(colors)]
            fig.add_trace(  # type: ignore
                go.Scatter(
                    x=tsne_result[mask, 0],
                    y=tsne_result[mask, 1],
                    mode="markers",
                    name=slide_name,
                    marker=dict(
                        size=5,
                        opacity=0.6,
                        color=color,
                    ),
                    hovertemplate=f"<b>{slide_name}</b><br>t-SNE1: %{{x:.2f}}<br>t-SNE2: %{{y:.2f}}<extra></extra>",
                )
            )

        fig.update_layout(  # type: ignore
            title=f"t-SNE - Combined Dataset<br>(perplexity={actual_perplexity})<br>Total cells: {len(features)}",
            xaxis_title="t-SNE Component 1",
            yaxis_title="t-SNE Component 2",
            legend=dict(orientation="v", yanchor="top", y=1, xanchor="right", x=1),
            hovermode="closest",
            width=900,
            height=700,
            font=dict(family="Montserrat"),
        )

        return fig

[docs]    def _create_combined_cell_type_distribution(
        self,
        cell_types: np.ndarray[Any, Any],
        cell_type_names: Dict[int, str],
        slide_labels: np.ndarray[Any, Any],
        slides: List[str],
    ) -> go.Figure:
        """Create stacked bar chart showing cell type distribution across slides."""

        # Calculate cell type counts per slide
        data_for_plot: list[dict[str, list[int] | str]] = []

        for cell_type, type_name in sorted(cell_type_names.items()):
            if cell_type == 0:  # Skip unknown
                continue

            counts: list[int] = []
            for i in range(len(slides)):
                mask = (slide_labels == i) & (cell_types == cell_type)
                counts.append(mask.sum())

            data_for_plot.append({"type_name": type_name, "counts": counts})

        # Create grouped bar chart
        fig = go.Figure()
        colors = COLORS

        for i, data in enumerate(data_for_plot):
            color = colors[i % len(colors)]
            fig.add_trace(  # type: ignore
                go.Bar(
                    name=data["type_name"],
                    x=slides,
                    y=data["counts"],
                    marker_color=color,
                    hovertemplate=f"<b>{data['type_name']}</b><br>Slide: %{{x}}<br>Count: %{{y}}<extra></extra>",
                )
            )

        fig.update_layout(  # type: ignore
            title="Cell Type Distribution Across Slides",
            xaxis_title="Slide",
            yaxis_title="Cell Count",
            barmode="group",
            legend=dict(orientation="v", yanchor="top", y=1, xanchor="right", x=1),
            height=600,
            font=dict(family="Montserrat"),
        )

        return fig

[docs]    def _create_combined_distribution_comparison(
        self,
        df: pd.DataFrame,
        feature_name: str,
        cell_types: np.ndarray[Any, Any],
        cell_type_names: Dict[int, str],
    ) -> go.Figure:
        """Create distribution comparison across all cell types for combined dataset."""

        # Create figure with 2 subplots
        fig = make_subplots(
            rows=2,
            cols=1,
            row_heights=[0.55, 0.45],
            subplot_titles=(
                "Normalized Histograms",
                "Smoothed Kernel Density Estimates",
            ),
            vertical_spacing=0.12,
        )

        colors = COLORS

        # Get overall data range
        data_min = df[feature_name].min()
        data_max = df[feature_name].max()
        data_range = data_max - data_min
        x_range = [data_min - 0.05 * data_range, data_max + 0.05 * data_range]

        # Add histogram and KDE for each cell type
        for i, (cell_type, type_name) in enumerate(sorted(cell_type_names.items())):
            if cell_type == 0:  # Skip unknown
                continue

            mask = cell_types == cell_type
            cell_count = mask.sum()
            if cell_count == 0:
                continue

            data = df[feature_name][mask]
            color = colors[i % len(colors)]
            legend_name = f"{type_name} (n={cell_count})"

            # Normalized histogram
            fig.add_trace(  # type: ignore
                go.Histogram(
                    x=data,
                    name=legend_name,
                    marker_color=color,
                    opacity=0.6,
                    histnorm="probability density",
                    nbinsx=50,
                    showlegend=True,
                    legendgroup=type_name,
                    hovertemplate=f"<b>{type_name}</b><br>{feature_name}: %{{x:.2f}}<br>Density: %{{y:.4f}}<extra></extra>",
                ),
                row=1,
                col=1,
            )

            # KDE curve
            if len(data) > 1:
                try:
                    kde = gaussian_kde(data)
                    x_vals = cast(
                        np.ndarray[Any, Any], np.linspace(x_range[0], x_range[1], 200)
                    )
                    kde_vals = cast(np.ndarray[Any, Any], kde(x_vals))

                    fig.add_trace(  # type: ignore
                        go.Scatter(
                            x=x_vals,
                            y=kde_vals,
                            name=legend_name,
                            mode="lines",
                            line=dict(color=color, width=2),
                            showlegend=False,
                            legendgroup=type_name,
                            hovertemplate=f"<b>{type_name}</b><br>{feature_name}: %{{x:.2f}}<br>Density: %{{y:.4f}}<extra></extra>",
                        ),
                        row=2,
                        col=1,
                    )
                except Exception as e:
                    logger.warning(f"Could not compute KDE for {type_name}: {e}")

        fig.update_layout(  # type: ignore
            title=f"Distribution Comparison of {feature_name} by Cell Type (Combined Dataset)",
            barmode="overlay",
            legend=dict(
                orientation="v",
                yanchor="top",
                y=0.98,
                xanchor="right",
                x=0.99,
                bgcolor="rgba(255, 255, 255, 0.8)",
                bordercolor="rgba(0, 0, 0, 0.2)",
                borderwidth=1,
            ),
            hovermode="closest",
            height=800,
            font=dict(family="Montserrat"),
        )

        fig.update_xaxes(title_text=feature_name, row=1, col=1, range=x_range)  # type: ignore
        fig.update_xaxes(title_text=feature_name, row=2, col=1, range=x_range)  # type: ignore
        fig.update_yaxes(title_text="Probability Density", row=1, col=1)  # type: ignore
        fig.update_yaxes(title_text="Density", row=2, col=1)  # type: ignore

        return fig

[docs]    def _create_combined_pca_by_cell_type(
        self,
        features: np.ndarray[Any, Any] | torch.Tensor,
        cell_types: np.ndarray[Any, Any],
        cell_type_names: Dict[int, str],
    ) -> go.Figure:
        """Create PCA visualization colored by cell type for combined dataset."""
        logger.info("Computing PCA by cell type for combined dataset...")

        # Convert to numpy array and perform PCA
        features = self._to_numpy(features)
        pca_result, pca = self._standardize_and_fit_pca(features)

        # Create scatter plot using unified method
        title = (
            f"PCA by Cell Type - Combined Dataset<br>"
            f"Explained Variance: PC1={pca.explained_variance_ratio_[0]:.2%}, "  # type: ignore
            f"PC2={pca.explained_variance_ratio_[1]:.2%}<br>"  # type: ignore
            f"Total cells: {len(features)}"
        )
        xlabel = f"PC1 ({pca.explained_variance_ratio_[0]:.2%})"  # type: ignore
        ylabel = f"PC2 ({pca.explained_variance_ratio_[1]:.2%})"  # type: ignore

        fig = self._create_scatter_by_labels(
            pca_result, cell_types, cell_type_names, title, xlabel, ylabel
        )
        fig.update_layout(width=900, height=700)  # type: ignore
        return fig

[docs]    def _create_combined_tsne_by_cell_type(
        self,
        features: np.ndarray[Any, Any] | torch.Tensor,
        cell_types: np.ndarray[Any, Any],
        cell_type_names: Dict[int, str],
        perplexity: int = 30,
    ) -> go.Figure:
        """Create t-SNE visualization colored by cell type for combined dataset."""
        logger.info("Computing t-SNE by cell type for combined dataset...")

        # Convert to numpy array and perform t-SNE
        features = self._to_numpy(features)
        actual_perplexity = self._adjust_perplexity(features.shape[0], perplexity)
        tsne_result = self._standardize_and_fit_tsne(features, actual_perplexity)

        # Create scatter plot using unified method
        title = (
            f"t-SNE by Cell Type - Combined Dataset<br>"
            f"(perplexity={actual_perplexity})<br>"
            f"Total cells: {len(features)}"
        )
        xlabel = "t-SNE Component 1"
        ylabel = "t-SNE Component 2"

        fig = self._create_scatter_by_labels(
            tsne_result, cell_types, cell_type_names, title, xlabel, ylabel
        )
        fig.update_layout(width=900, height=700)  # type: ignore
        return fig

        fig.update_layout(  # type: ignore
            title=f"t-SNE by Cell Type - Combined Dataset<br>(perplexity={actual_perplexity})<br>Total cells: {len(features)}",
            xaxis_title="t-SNE Component 1",
            yaxis_title="t-SNE Component 2",
            legend=dict(orientation="v", yanchor="top", y=1, xanchor="right", x=1),
            hovermode="closest",
            width=900,
            height=700,
        )

        return fig

[docs]    def visualize(self, host: str = "127.0.0.1", port: int = 8050, debug: bool = True):
        """
        Launch the Dash web application for feature visualization.
        """
        app = dash.Dash(__name__)

        # Get available slides
        available_slides = self._get_available_slides()

        # Generate dynamic dropdowns based on max levels
        def create_dropdown_components():
            components: list[Any] = []
            for level in range(self.max_dropdown_levels):
                components.append(
                    html.Div(
                        [
                            html.Label(f"Level {level + 1}:"),
                            dcc.Dropdown(
                                id=f"dropdown-level-{level}",
                                style={"marginBottom": 10},
                            ),
                        ],
                        style={
                            "width": f"{90 // min(3, self.max_dropdown_levels)}%",
                            "display": "inline-block",
                            "marginRight": "2%",
                        },
                        id=f"dropdown-container-{level}",
                    )
                )
            return components

        # Define the layout
        app.layout = html.Div(
            [
                html.H1(
                    "Feature Visualizer Dashboard",
                    style={
                        "textAlign": "center",
                        "marginBottom": 30,
                        "color": "#2c3e50",
                        "fontWeight": "bold",
                        "fontFamily": "'Segoe UI', 'Helvetica Neue', Arial, sans-serif",
                        "padding": "20px",
                        "backgroundColor": "#ecf0f1",
                        "borderRadius": "10px",
                        "boxShadow": "0 2px 4px rgba(0,0,0,0.1)",
                    },
                ),
                # Analysis Mode Selection
                html.Div(
                    [
                        html.H3(
                            "Analysis Mode",
                            style={
                                "marginBottom": 15,
                                "color": "#2c3e50",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        ),
                        dcc.RadioItems(
                            id="analysis-mode",
                            options=[
                                {
                                    "label": " Dataset-Wide Analysis (All Slides Combined)",
                                    "value": "combined",
                                },
                                {
                                    "label": " Slide-Specific Analysis",
                                    "value": "single",
                                },
                            ],
                            value="combined",
                            labelStyle={"display": "block", "marginBottom": "10px"},
                            style={"fontSize": "16px"},
                        ),
                    ],
                    style={
                        "marginBottom": 30,
                        "padding": "20px",
                        "backgroundColor": "#ffffff",
                        "borderRadius": "10px",
                        "boxShadow": "0 2px 4px rgba(0,0,0,0.1)",
                    },
                ),
                # Slide Selection
                html.Div(
                    [
                        html.H3(
                            "Slide Selection",
                            style={
                                "marginBottom": 20,
                                "color": "#2c3e50",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        ),
                        html.Div(
                            [
                                html.Label("Select Slide:"),
                                dcc.Dropdown(
                                    id="slide-dropdown",
                                    options=[
                                        {"label": slide, "value": slide}
                                        for slide in available_slides
                                    ],
                                    value=available_slides[0]
                                    if available_slides
                                    else None,
                                    style={"marginBottom": 10},
                                ),
                            ],
                        ),
                    ],
                    style={
                        "marginBottom": 30,
                        "padding": "20px",
                        "backgroundColor": "#ffffff",
                        "borderRadius": "10px",
                        "boxShadow": "0 2px 4px rgba(0,0,0,0.1)",
                    },
                    id="slide-selection-container",
                ),
                # Dynamic Controls
                html.Div(
                    [
                        html.H3(
                            "Feature Path Selection",
                            style={
                                "marginBottom": 20,
                                "color": "#2c3e50",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        ),
                        html.Div(
                            create_dropdown_components(),
                            style={"marginBottom": 20},
                        ),
                    ],
                    style={
                        "marginBottom": 30,
                        "padding": "20px",
                        "backgroundColor": "#ffffff",
                        "borderRadius": "10px",
                        "boxShadow": "0 2px 4px rgba(0,0,0,0.1)",
                    },
                ),
                # Data info
                html.Div(id="data-info", style={"marginBottom": 30}),
                # PCA and Correlation Matrix Section
                html.Div(
                    [
                        html.H3(
                            "Overview Analysis",
                            style={
                                "marginBottom": 20,
                                "color": "#2c3e50",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        ),
                        # Sample Size Input
                        html.Div(
                            [
                                html.Label("Sample Size (for performance):"),
                                dcc.Input(
                                    id="pca-sample-size",
                                    type="number",
                                    value=1000,
                                    min=100,
                                    max=10000,
                                    step=100,
                                    style={"marginLeft": 10, "width": "100px"},
                                ),
                                html.Small(
                                    " (Randomly samples this many points for PCA and t-SNE)",
                                    style={"marginLeft": 10, "color": "gray"},
                                ),
                            ],
                            style={"marginBottom": 15},
                        ),
                        # t-SNE Perplexity Input
                        html.Div(
                            [
                                html.Label("t-SNE Perplexity:"),
                                dcc.Input(
                                    id="overview-tsne-perplexity",
                                    type="number",
                                    value=30,
                                    min=5,
                                    max=50,
                                    step=5,
                                    style={"marginLeft": 10, "width": "100px"},
                                ),
                                html.Small(
                                    " (Higher values preserve global structure)",
                                    style={"marginLeft": 10, "color": "gray"},
                                ),
                            ],
                            style={"marginBottom": 15},
                        ),
                        dcc.Tabs(
                            id="overview-tabs",
                            value="correlation",
                            children=[
                                dcc.Tab(
                                    label="Correlation Matrix", value="correlation"
                                ),
                                dcc.Tab(label="PCA", value="pca"),
                                dcc.Tab(label="t-SNE", value="tsne"),
                            ],
                        ),
                        dcc.Loading(
                            id="loading-overview",
                            type="default",
                            children=[
                                html.Div(id="overview-content", style={"marginTop": 20})
                            ],
                            style={"minHeight": "400px"},
                            color="#1f77b4",
                        ),
                    ],
                    style={
                        "marginBottom": 40,
                        "padding": "20px",
                        "backgroundColor": "#ffffff",
                        "borderRadius": "10px",
                        "boxShadow": "0 2px 4px rgba(0,0,0,0.1)",
                    },
                ),
                # Feature Selection Section
                html.Div(
                    [
                        html.H3(
                            "Feature Analysis",
                            style={
                                "marginBottom": 20,
                                "color": "#2c3e50",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        ),
                        html.Div(
                            [
                                html.Label("Select Feature:"),
                                dcc.Dropdown(
                                    id="feature-dropdown", style={"marginBottom": 20}
                                ),
                            ],
                            style={"marginBottom": 20},
                        ),
                        # Tabs for feature-specific visualizations
                        dcc.Tabs(
                            id="feature-tabs",
                            value="distribution",
                            children=[
                                dcc.Tab(label="Distribution", value="distribution"),
                                dcc.Tab(label="Statistics", value="stats"),
                            ],
                        ),
                        dcc.Loading(
                            id="loading-feature",
                            type="default",
                            children=[
                                html.Div(id="feature-content", style={"marginTop": 20})
                            ],
                            style={"minHeight": "300px"},
                            color="#1f77b4",
                        ),
                    ],
                    style={
                        "marginBottom": 40,
                        "padding": "20px",
                        "backgroundColor": "#ffffff",
                        "borderRadius": "10px",
                        "boxShadow": "0 2px 4px rgba(0,0,0,0.1)",
                    },
                ),
                # Combined Dataset Analysis Section
                html.Div(
                    [
                        html.H3(
                            "Dataset-Wide Analysis",
                            style={
                                "marginBottom": 20,
                                "color": "#2c3e50",
                                "fontWeight": "bold",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        ),
                        html.P(
                            "Analyze all slides combined to understand global patterns",
                            style={"color": "#7f8c8d", "marginBottom": 20},
                        ),
                        # Sample size control for combined analysis
                        html.Div(
                            [
                                html.Label("Samples per Slide:"),
                                dcc.Input(
                                    id="combined-sample-size",
                                    type="number",
                                    value=1000,
                                    min=100,
                                    max=5000,
                                    step=100,
                                    style={"marginLeft": 10, "width": "100px"},
                                ),
                                html.Small(
                                    " (Number of cells to sample from each slide)",
                                    style={"marginLeft": 10, "color": "gray"},
                                ),
                            ],
                            style={"marginBottom": 15},
                        ),
                        # t-SNE Perplexity for combined
                        html.Div(
                            [
                                html.Label("t-SNE Perplexity:"),
                                dcc.Input(
                                    id="combined-tsne-perplexity",
                                    type="number",
                                    value=30,
                                    min=5,
                                    max=50,
                                    step=5,
                                    style={"marginLeft": 10, "width": "100px"},
                                ),
                            ],
                            style={"marginBottom": 20},
                        ),
                        dcc.Tabs(
                            id="combined-tabs",
                            value="combined-pca",
                            children=[
                                dcc.Tab(label="PCA", value="combined-pca"),
                                dcc.Tab(label="t-SNE", value="combined-tsne"),
                                dcc.Tab(
                                    label="Cell Type Distribution",
                                    value="combined-celltype-dist",
                                ),
                            ],
                        ),
                        dcc.Loading(
                            id="loading-combined",
                            type="default",
                            children=[
                                html.Div(id="combined-content", style={"marginTop": 20})
                            ],
                            style={"minHeight": "400px"},
                            color="#1f77b4",
                        ),
                    ],
                    id="combined-analysis-section",
                    style={
                        "marginBottom": 40,
                        "padding": "20px",
                        "backgroundColor": "#e8f4f8",
                        "borderRadius": "10px",
                    },
                ),
                # Combined Cell Type Comparison Section
                html.Div(
                    [
                        html.H3(
                            "Combined Cell Type Comparison",
                            style={
                                "marginBottom": 20,
                                "color": "#2c3e50",
                                "fontWeight": "bold",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        ),
                        html.P(
                            "Compare features across cell types using all slides combined",
                            style={"color": "#7f8c8d", "marginBottom": 20},
                        ),
                        # Sample size control for combined cell type comparison
                        html.Div(
                            [
                                html.Label("Samples per Slide:"),
                                dcc.Input(
                                    id="combined-comparison-sample-size",
                                    type="number",
                                    value=1000,
                                    min=100,
                                    max=5000,
                                    step=100,
                                    style={"marginLeft": 10, "width": "100px"},
                                ),
                                html.Small(
                                    " (Number of cells to sample from each slide)",
                                    style={"marginLeft": 10, "color": "gray"},
                                ),
                            ],
                            style={"marginBottom": 15},
                        ),
                        # t-SNE Perplexity
                        html.Div(
                            [
                                html.Label("t-SNE Perplexity:"),
                                dcc.Input(
                                    id="combined-comparison-tsne-perplexity",
                                    type="number",
                                    value=30,
                                    min=5,
                                    max=50,
                                    step=5,
                                    style={"marginLeft": 10, "width": "100px"},
                                ),
                            ],
                            style={"marginBottom": 15},
                        ),
                        # Feature selector for distribution comparison
                        html.Div(
                            [
                                html.Label(
                                    "Select Feature for Distribution Comparison:"
                                ),
                                dcc.Dropdown(
                                    id="combined-comparison-feature-dropdown",
                                    style={"marginBottom": 20},
                                ),
                            ],
                            style={"marginBottom": 20},
                        ),
                        # Cell type selector for JS divergence table
                        html.Div(
                            [
                                html.Label(
                                    "Select Reference Cell Type for JS Divergence:"
                                ),
                                dcc.Dropdown(
                                    id="combined-js-celltype-dropdown",
                                    style={"marginBottom": 20},
                                ),
                            ],
                            style={"marginBottom": 20},
                        ),
                        dcc.Tabs(
                            id="combined-comparison-tabs",
                            value="combined-dist-comparison",
                            children=[
                                dcc.Tab(
                                    label="Distribution Comparison",
                                    value="combined-dist-comparison",
                                ),
                                dcc.Tab(
                                    label="PCA by Cell Type",
                                    value="combined-pca-celltype",
                                ),
                                dcc.Tab(
                                    label="t-SNE by Cell Type",
                                    value="combined-tsne-celltype",
                                ),
                                dcc.Tab(
                                    label="JS Divergence Table",
                                    value="combined-js-divergence",
                                ),
                            ],
                        ),
                        dcc.Loading(
                            id="loading-combined-comparison",
                            type="default",
                            children=[
                                html.Div(
                                    id="combined-comparison-content",
                                    style={"marginTop": 20},
                                )
                            ],
                            style={"minHeight": "400px"},
                            color="#1f77b4",
                        ),
                    ],
                    id="combined-cell-type-comparison-section",
                    style={
                        "marginBottom": 40,
                        "padding": "20px",
                        "backgroundColor": "#e8f4f8",
                        "borderRadius": "10px",
                    },
                ),
                # Cell Type Comparison Section
                html.Div(
                    [
                        html.H3(
                            "Cell Type Comparison",
                            style={
                                "marginBottom": 20,
                                "color": "#2c3e50",
                                "fontWeight": "bold",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        ),
                        html.P(
                            "Compare features across different cell types",
                            style={"color": "#7f8c8d", "marginBottom": 20},
                        ),
                        # Sample size control
                        html.Div(
                            [
                                html.Label("Sample Size for Dimensionality Reduction:"),
                                dcc.Input(
                                    id="comparison-sample-size",
                                    type="number",
                                    value=1000,
                                    min=100,
                                    max=10000,
                                    step=100,
                                    style={"marginLeft": 10, "width": "100px"},
                                ),
                                html.Small(
                                    " (Affects PCA and t-SNE plots)",
                                    style={"marginLeft": 10, "color": "gray"},
                                ),
                            ],
                            style={"marginBottom": 15},
                        ),
                        # t-SNE perplexity control
                        html.Div(
                            [
                                html.Label("t-SNE Perplexity:"),
                                dcc.Input(
                                    id="tsne-perplexity",
                                    type="number",
                                    value=30,
                                    min=5,
                                    max=50,
                                    step=5,
                                    style={"marginLeft": 10, "width": "100px"},
                                ),
                                html.Small(
                                    " (Higher values preserve global structure)",
                                    style={"marginLeft": 10, "color": "gray"},
                                ),
                            ],
                            style={"marginBottom": 15},
                        ),
                        # Feature selector for distribution comparison
                        html.Div(
                            [
                                html.Label(
                                    "Select Feature for Distribution Comparison:"
                                ),
                                dcc.Dropdown(
                                    id="comparison-feature-dropdown",
                                    style={"marginBottom": 20},
                                ),
                            ],
                            style={"marginBottom": 20},
                        ),
                        # Cell type selector for JS divergence table
                        html.Div(
                            [
                                html.Label(
                                    "Select Reference Cell Type for Divergence Analysis:"
                                ),
                                dcc.Dropdown(
                                    id="js-celltype-dropdown",
                                    style={"marginBottom": 20},
                                ),
                            ],
                            style={"marginBottom": 20},
                        ),
                        dcc.Tabs(
                            id="comparison-tabs",
                            value="dist-comparison",
                            children=[
                                dcc.Tab(
                                    label="Distribution Comparison",
                                    value="dist-comparison",
                                ),
                                dcc.Tab(label="PCA by Cell Type", value="pca-celltype"),
                                dcc.Tab(
                                    label="t-SNE by Cell Type", value="tsne-celltype"
                                ),
                                dcc.Tab(
                                    label="JS Divergence Table", value="js-divergence"
                                ),
                            ],
                        ),
                        dcc.Loading(
                            id="loading-comparison",
                            type="default",
                            children=[
                                html.Div(
                                    id="comparison-content", style={"marginTop": 20}
                                )
                            ],
                            style={"minHeight": "400px"},
                            color="#1f77b4",
                        ),
                    ],
                    id="cell-type-comparison-section",
                    style={
                        "marginBottom": 40,
                        "padding": "20px",
                        "backgroundColor": "#f8f9fa",
                        "borderRadius": "10px",
                    },
                ),
                # Hidden div to store directory structure
                html.Div(id="directory-structure", style={"display": "none"}),
            ],
            style={
                "padding": "20px",
                "maxWidth": "1400px",
                "margin": "0 auto",
                "fontFamily": "'Segoe UI', 'Helvetica Neue', Arial, sans-serif",
                "backgroundColor": "#f5f7fa",
            },
        )

        # Callback to control visibility of sections based on analysis mode
        @app.callback(  # type: ignore
            Output("combined-analysis-section", "style"),
            Output("combined-cell-type-comparison-section", "style"),
            Output("slide-selection-container", "style"),
            Output("cell-type-comparison-section", "style"),
            Input("analysis-mode", "value"),
        )
        def toggle_analysis_mode(  # type: ignore
            mode: str,
        ) -> tuple[
            dict[str, str | int],
            dict[str, str | int],
            dict[str, str | int],
            dict[str, str | int],
        ]:
            base_combined_style = STYLES["section_combined"]
            base_slide_style = STYLES["section_slide"]
            base_comparison_style = STYLES["section_comparison"]

            if mode == "combined":
                # Show combined sections, hide slide-specific sections
                return (
                    base_combined_style,
                    base_combined_style,
                    {**base_slide_style, "display": "none"},
                    {**base_comparison_style, "display": "none"},
                )
            else:  # single
                # Hide combined sections, show slide-specific sections
                return (
                    {**base_combined_style, "display": "none"},
                    {**base_combined_style, "display": "none"},
                    base_slide_style,
                    base_comparison_style,
                )

        # Callback for combined analysis content
        @app.callback(  # type: ignore
            Output("combined-content", "children"),
            Input("combined-tabs", "value"),
            Input("combined-sample-size", "value"),
            Input("combined-tsne-perplexity", "value"),
            *[
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_combined_content(  # type: ignore
            active_tab: str,
            sample_size: int,
            perplexity: int,
            *selected_values: str | None,
        ):
            # Build path from selected values
            current_path = self._build_path_from_values(*selected_values)

            if not current_path:
                return html.Div(
                    "Please select a feature path to view combined analysis.",
                    style=STYLES["warning"],
                )

            try:
                # Validate inputs
                samples_per_slide = self._validate_positive_int(sample_size, 1000)
                tsne_perp = self._validate_positive_int(perplexity, 30)

                if active_tab == "combined-pca":
                    data = self._prepare_combined_data(
                        available_slides, current_path, samples_per_slide
                    )
                    fig = self._create_combined_pca_plot(
                        data["features"], data["slide_labels"], data["slides"]
                    )
                    return dcc.Graph(figure=fig)

                elif active_tab == "combined-tsne":
                    data = self._prepare_combined_data(
                        available_slides, current_path, samples_per_slide
                    )
                    fig = self._create_combined_tsne_plot(
                        data["features"],
                        data["slide_labels"],
                        data["slides"],
                        tsne_perp,
                    )
                    return dcc.Graph(figure=fig)

                elif active_tab == "combined-celltype-dist":
                    # Use all cells for distribution (no sampling)
                    data = self._prepare_combined_data_with_cell_types(
                        available_slides, current_path, max_samples_per_slide=None
                    )
                    if data["cell_types"] is None:
                        return html.Div(
                            [
                                html.H4("Cell Type Information Not Available"),
                                html.P(
                                    "Cell type data could not be loaded from the slides."
                                ),
                            ],
                            style={
                                "padding": "20px",
                                "backgroundColor": "#f8d7da",
                                "border": "1px solid #f5c6cb",
                                "borderRadius": "5px",
                                "color": "#721c24",
                            },
                        )
                    fig = self._create_combined_cell_type_distribution(
                        data["cell_types"],
                        data["cell_type_names"],
                        data["slide_labels"],
                        data["slides"],
                    )
                    return dcc.Graph(figure=fig)

            except Exception as e:
                logger.error(f"Error generating combined visualization: {e}")
                import traceback

                logger.error(traceback.format_exc())
                return self._create_error_message(
                    "Error",
                    f"Failed to generate combined visualization: {str(e)}",
                )

            return html.Div("Select a tab to view combined analysis.")

        # Callback for updating combined comparison feature dropdown
        @app.callback(  # type: ignore
            Output("combined-comparison-feature-dropdown", "options"),
            Output("combined-comparison-feature-dropdown", "value"),
            Input("analysis-mode", "value"),
            *[
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_combined_comparison_feature_dropdown(  # type: ignore
            analysis_mode: str,
            *selected_values: str | None,
        ) -> tuple[list[Any], str | None]:
            # Only populate in combined mode
            if analysis_mode != "combined":
                return [], None

            # Build path from selected values
            current_path: list[str] = []
            for value in selected_values:
                if value is not None:
                    current_path.append(value)
                else:
                    break

            if not current_path:
                return [], None

            try:
                data = self._prepare_combined_data_with_cell_types(
                    available_slides,
                    current_path,
                    100,  # Just load a small sample to get feature names
                )
                if data["cell_types"] is None or len(data["features"]) == 0:
                    return [], None
                feature_names = data["feature_names"]
                options = [{"label": f, "value": f} for f in feature_names]
                value = feature_names[0] if feature_names else None
                return options, value
            except Exception as e:
                logger.error(f"Error loading features for combined comparison: {e}")
                return [], None

        # Callback for updating combined JS divergence cell type dropdown
        @app.callback(  # type: ignore
            Output("combined-js-celltype-dropdown", "options"),
            Output("combined-js-celltype-dropdown", "value"),
            Input("analysis-mode", "value"),
            Input("combined-comparison-tabs", "value"),
            *[
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_combined_js_celltype_dropdown(  # type: ignore
            analysis_mode: str,
            active_tab: str,
            *selected_values: str | None,
        ) -> tuple[list[Any], str | None]:
            # Only populate in combined mode
            if analysis_mode != "combined":
                return [], None

            # Build path from selected values
            current_path: list[str] = []
            for value in selected_values:
                if value is not None:
                    current_path.append(value)
                else:
                    break

            if not current_path:
                logger.info("Combined JS celltype dropdown: No path selected")
                return [], None

            try:
                logger.info(
                    f"Combined JS celltype dropdown: Loading data for path {current_path}"
                )
                # Use None to load all cells (not just a sample) to ensure we get all cell types
                data = self._prepare_combined_data_with_cell_types(
                    available_slides,
                    current_path,
                    max_samples_per_slide=None,  # Load all cells to get all cell types
                )

                logger.info(
                    f"Combined JS celltype dropdown: Loaded {len(data['features'])} cells"
                )

                if data["cell_types"] is None:
                    logger.warning("Combined JS celltype dropdown: No cell types found")
                    return [], None

                if len(data["features"]) == 0:
                    logger.warning("Combined JS celltype dropdown: No features found")
                    return [], None

                cell_type_names = data["cell_type_names"]
                if cell_type_names is None or len(cell_type_names) == 0:
                    logger.warning(
                        "Combined JS celltype dropdown: No cell type names found"
                    )
                    return [], None

                unique_types = sorted(set(data["cell_types"]))
                logger.info(
                    f"Combined JS celltype dropdown: Found {len(unique_types)} unique cell types"
                )

                options = [
                    {"label": cell_type_names[i], "value": i} for i in unique_types
                ]
                value = options[0]["value"] if options else None
                return options, value
            except Exception as e:
                logger.error(
                    f"Error loading cell types for combined JS divergence: {e}"
                )
                import traceback

                logger.error(traceback.format_exc())
                return [], None

        # Callback for updating combined cell type comparison content
        @app.callback(  # type: ignore
            Output("combined-comparison-content", "children"),
            Input("combined-comparison-tabs", "value"),
            Input("combined-comparison-feature-dropdown", "value"),
            Input("combined-js-celltype-dropdown", "value"),
            Input("combined-comparison-sample-size", "value"),
            Input("combined-comparison-tsne-perplexity", "value"),
            *[
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_combined_comparison_content(  # type: ignore
            active_tab: str,
            selected_feature: str,
            selected_celltype: int,
            sample_size: int,
            perplexity: int,
            *selected_values: str | None,
        ):
            # Build path from selected values
            current_path: list[str] = []
            for value in selected_values:
                if value is not None:
                    current_path.append(value)
                else:
                    break

            if not current_path:
                return html.Div(
                    "Please select a feature path to enable combined cell type comparison.",
                    style={
                        "padding": "20px",
                        "backgroundColor": "#fff3cd",
                        "border": "1px solid #ffc107",
                        "borderRadius": "5px",
                        "color": "#856404",
                    },
                )

            try:
                # Validate inputs
                samples_per_slide = (
                    sample_size if sample_size and sample_size > 0 else 1000
                )
                tsne_perp = perplexity if perplexity and perplexity > 0 else 30

                if active_tab == "combined-dist-comparison":
                    # Use all cells for distribution comparison (no sampling)
                    data = self._prepare_combined_data_with_cell_types(
                        available_slides, current_path, max_samples_per_slide=None
                    )

                    # Check if cell types are available
                    if data["cell_types"] is None or data["cell_type_names"] is None:
                        return html.Div(
                            [
                                html.H4(
                                    "Cell Type Information Not Available",
                                    style={"marginBottom": "10px"},
                                ),
                                html.P(
                                    "Cell type data could not be loaded from the slides. "
                                    "Make sure cell detection data exists for the selected feature extraction path."
                                ),
                            ],
                            style={
                                "padding": "20px",
                                "backgroundColor": "#f8d7da",
                                "border": "1px solid #f5c6cb",
                                "borderRadius": "5px",
                                "color": "#721c24",
                            },
                        )

                    if not selected_feature:
                        return html.Div("Please select a feature.")
                    fig = self._create_combined_distribution_comparison(
                        data["df"],
                        selected_feature,
                        data["cell_types"],
                        data["cell_type_names"],
                    )
                    return dcc.Graph(figure=fig)

                elif active_tab == "combined-pca-celltype":
                    # Use sampled data for PCA
                    data = self._prepare_combined_data_with_cell_types(
                        available_slides, current_path, samples_per_slide
                    )

                    # Check if cell types are available
                    if data["cell_types"] is None or data["cell_type_names"] is None:
                        return html.Div(
                            [
                                html.H4(
                                    "Cell Type Information Not Available",
                                    style={"marginBottom": "10px"},
                                ),
                                html.P(
                                    "Cell type data could not be loaded from the slides. "
                                    "Make sure cell detection data exists for the selected feature extraction path."
                                ),
                            ],
                            style={
                                "padding": "20px",
                                "backgroundColor": "#f8d7da",
                                "border": "1px solid #f5c6cb",
                                "borderRadius": "5px",
                                "color": "#721c24",
                            },
                        )

                    fig = self._create_combined_pca_by_cell_type(
                        data["features"],
                        data["cell_types"],
                        data["cell_type_names"],
                    )
                    return dcc.Graph(figure=fig)

                elif active_tab == "combined-tsne-celltype":
                    # Use sampled data for t-SNE
                    data = self._prepare_combined_data_with_cell_types(
                        available_slides, current_path, samples_per_slide
                    )

                    # Check if cell types are available
                    if data["cell_types"] is None or data["cell_type_names"] is None:
                        return html.Div(
                            [
                                html.H4(
                                    "Cell Type Information Not Available",
                                    style={"marginBottom": "10px"},
                                ),
                                html.P(
                                    "Cell type data could not be loaded from the slides. "
                                    "Make sure cell detection data exists for the selected feature extraction path."
                                ),
                            ],
                            style={
                                "padding": "20px",
                                "backgroundColor": "#f8d7da",
                                "border": "1px solid #f5c6cb",
                                "borderRadius": "5px",
                                "color": "#721c24",
                            },
                        )

                    fig = self._create_combined_tsne_by_cell_type(
                        data["features"],
                        data["cell_types"],
                        data["cell_type_names"],
                        tsne_perp,
                    )
                    return dcc.Graph(figure=fig)

                elif active_tab == "combined-js-divergence":
                    if selected_celltype is None:  # type: ignore
                        return html.Div(
                            "Please select a reference cell type.",
                            style={
                                "padding": "20px",
                                "textAlign": "center",
                                "color": "#7f8c8d",
                            },
                        )

                    # Use all cells for JS divergence calculation
                    data = self._prepare_combined_data_with_cell_types(
                        available_slides, current_path, max_samples_per_slide=None
                    )

                    # Check if cell types are available
                    if data["cell_types"] is None or data["cell_type_names"] is None:
                        return html.Div(
                            [
                                html.H4(
                                    "Cell Type Information Not Available",
                                    style={"marginBottom": "10px"},
                                ),
                                html.P(
                                    "Cell type data could not be loaded from the slides. "
                                    "Make sure cell detection data exists for the selected feature extraction path."
                                ),
                            ],
                            style={
                                "padding": "20px",
                                "backgroundColor": "#f8d7da",
                                "border": "1px solid #f5c6cb",
                                "borderRadius": "5px",
                                "color": "#721c24",
                            },
                        )

                    js_df = self._calculate_js_divergence_table(
                        data["df"],
                        data["cell_types"],
                        data["cell_type_names"],
                        selected_celltype,
                    )

                    # Get reference cell type name
                    ref_name = data["cell_type_names"].get(selected_celltype, "Unknown")

                    # Prepare data for the table
                    table_data = js_df.reset_index().to_dict("records")  # type: ignore
                    table_columns: list[dict[str, str | dict[str, str]]] = [
                        {"name": "Feature", "id": "Feature"}
                    ]
                    for col in js_df.columns:
                        table_columns.append(
                            {
                                "name": col,
                                "id": col,
                                "type": "numeric",
                                "format": {"specifier": ".4f"},
                            }
                        )

                    # Build style_data_conditional list
                    style_conditions: list[dict[str, Any]] = [
                        {
                            "if": {"row_index": "odd"},
                            "backgroundColor": "#f9f9f9",
                        },
                        {
                            "if": {"column_id": "Feature"},
                            "fontWeight": "500",
                            "backgroundColor": "#ecf0f1",
                        },
                    ]

                    # Add color coding for divergence values (low = green, medium = yellow, high = red)
                    for col in js_df.columns:
                        # Low divergence (< 0.1) - green
                        style_conditions.append(
                            {
                                "if": {
                                    "filter_query": f"{{{col}}} < 0.1",
                                    "column_id": col,
                                },
                                "backgroundColor": "#d4edda",
                                "color": "#155724",
                            }
                        )
                        # Medium divergence (0.1 - 0.3) - yellow
                        style_conditions.append(
                            {
                                "if": {
                                    "filter_query": f"{{{col}}} >= 0.1 && {{{col}}} < 0.3",
                                    "column_id": col,
                                },
                                "backgroundColor": "#fff3cd",
                                "color": "#856404",
                            }
                        )
                        # High divergence (>= 0.3) - red
                        style_conditions.append(
                            {
                                "if": {
                                    "filter_query": f"{{{col}}} >= 0.3",
                                    "column_id": col,
                                },
                                "backgroundColor": "#f8d7da",
                                "color": "#721c24",
                            }
                        )

                    return html.Div(
                        [
                            html.H4(
                                f"Jensen-Shannon Divergence: {ref_name} vs Other Cell Types (Combined Dataset)",
                                style={
                                    "marginBottom": "20px",
                                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                                    "color": "#2c3e50",
                                },
                            ),
                            html.P(
                                f"Values represent the Jensen-Shannon divergence between the distribution of each feature in {ref_name} cells and other cell types across all slides. "
                                "Lower values indicate more similar distributions (0 = identical, 1 = completely different).",
                                style={
                                    "marginBottom": "20px",
                                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                                    "color": "#7f8c8d",
                                    "fontSize": "14px",
                                },
                            ),
                            dash_table.DataTable(
                                data=table_data,  # type: ignore
                                columns=table_columns,  # type: ignore
                                style_table={
                                    "overflowX": "auto",
                                    "maxHeight": "600px",
                                    "overflowY": "auto",
                                },
                                style_cell={
                                    "textAlign": "left",
                                    "padding": "10px",
                                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                                    "fontSize": "13px",
                                },
                                style_header={
                                    "backgroundColor": "#34495e",
                                    "color": "white",
                                    "fontWeight": "bold",
                                    "textAlign": "left",
                                    "padding": "12px",
                                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                                },
                                style_data_conditional=style_conditions,  # type: ignore
                                page_size=20,
                                sort_action="native",
                                filter_action="native",
                            ),
                        ],
                        style={"padding": "20px"},
                    )

            except Exception as e:
                logger.error(f"Error generating combined cell type comparison: {e}")
                import traceback

                logger.error(traceback.format_exc())
                return html.Div(
                    [
                        html.H4("Error"),
                        html.P(
                            f"Failed to generate combined cell type comparison: {str(e)}"
                        ),
                    ],
                    style={
                        "backgroundColor": "#ffcccc",
                        "padding": 15,
                        "borderRadius": 5,
                    },
                )

            return html.Div(
                "Select a tab above to view combined cell type comparisons.",
                style={
                    "padding": "20px",
                    "textAlign": "center",
                    "color": "#7f8c8d",
                },
            )

        # Initialize the first dropdown
        @app.callback(  # type: ignore
            [
                Output(f"dropdown-level-{i}", "options")
                for i in range(self.max_dropdown_levels)
            ]
            + [
                Output(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ]
            + [
                Output(f"dropdown-container-{i}", "style")
                for i in range(self.max_dropdown_levels)
            ],
            [Input("slide-dropdown", "value")]
            + [
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_dropdowns(  # type: ignore
            selected_slide: str | None, *selected_values: str | None
        ) -> list[dict[str, str] | str | list[dict[str, str]] | None]:
            # If no slide is selected, return empty
            if selected_slide is None:
                return cast(
                    list[dict[str, str] | str | list[dict[str, str]] | None],
                    (
                        [[] for _ in range(self.max_dropdown_levels)]
                        + [None for _ in range(self.max_dropdown_levels)]
                        + [{"display": "none"} for _ in range(self.max_dropdown_levels)]
                    ),
                )

            # Explore directory structure for the selected slide
            feature_extraction_path = (
                self.config.dataset / selected_slide / "feature_extraction"
            )
            directory_structure = self._explore_directory(feature_extraction_path)
            # Prepare outputs
            options_outputs: list[list[dict[str, str]]] = [
                [] for _ in range(self.max_dropdown_levels)
            ]
            value_outputs: list[str | None] = [
                None for _ in range(self.max_dropdown_levels)
            ]
            style_outputs: list[dict[str, str]] = []

            # Build path from selected values
            current_path: list[str] = []
            for _, value in enumerate(selected_values):
                if value is not None:
                    current_path.append(value)
                else:
                    break

            # Update options for each level
            for level in range(self.max_dropdown_levels):
                if level == 0:
                    # First level: show top-level directories
                    options = self._get_available_options_at_level(
                        directory_structure, []
                    )
                    if options:
                        options_outputs[level] = [
                            {"label": opt, "value": opt} for opt in options
                        ]
                        if level < len(current_path):
                            value_outputs[level] = current_path[level]
                        # Remove the auto-selection logic for first dropdown
                        # Keep value_outputs[level] as None if no path is selected
                    style_outputs.append(
                        {
                            "width": f"{90 // min(3, self.max_dropdown_levels)}%",
                            "display": "inline-block",
                            "marginRight": "2%",
                        }
                    )
                else:
                    # Subsequent levels: show options based on current path
                    if level <= len(current_path):
                        path_to_check = current_path[:level]
                        options = self._get_available_options_at_level(
                            directory_structure, path_to_check
                        )

                        if options:
                            options_outputs[level] = [
                                {"label": opt, "value": opt} for opt in options
                            ]
                            if level < len(current_path):
                                value_outputs[level] = current_path[level]
                            style_outputs.append(
                                {
                                    "width": f"{90 // min(3, self.max_dropdown_levels)}%",
                                    "display": "inline-block",
                                    "marginRight": "2%",
                                }
                            )
                        else:
                            # No more options available, hide this dropdown
                            style_outputs.append({"display": "none"})
                    else:
                        # Hide this dropdown
                        style_outputs.append({"display": "none"})

            return options_outputs + value_outputs + style_outputs

        # Callback for updating feature dropdown
        @app.callback(  # type: ignore
            Output("feature-dropdown", "options"),
            Output("feature-dropdown", "value"),
            [Input("slide-dropdown", "value")]
            + [
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_feature_dropdown(  # type: ignore
            selected_slide: str | None,
            *selected_values: str | None,
        ) -> tuple[list[Any], str | None]:
            if selected_slide is None:
                return [], None

            # Explore directory structure for the selected slide
            feature_extraction_path = (
                self.config.dataset / selected_slide / "feature_extraction"
            )
            directory_structure = self._explore_directory(feature_extraction_path)

            # Build path from selected values
            current_path: list[str] = []
            for value in selected_values:
                if value is not None:
                    current_path.append(value)
                else:
                    break

            if current_path and self._can_load_features(
                directory_structure, current_path
            ):
                try:
                    data = self._prepare_data(selected_slide, current_path)
                    feature_names = data["feature_names"]
                    options = [{"label": f, "value": f} for f in feature_names]
                    value = feature_names[0] if feature_names else None
                    return options, value
                except Exception as e:
                    print(f"Error loading features: {e}")
                    return [], None
            return [], None

        # Callback for updating data info
        @app.callback(  # type: ignore
            Output("data-info", "children"),
            [Input("slide-dropdown", "value")]
            + [
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_data_info(selected_slide: str | None, *selected_values: str | None):  # type: ignore
            if selected_slide is None:
                return html.Div()

            # Explore directory structure for the selected slide
            feature_extraction_path = (
                self.config.dataset / selected_slide / "feature_extraction"
            )
            directory_structure = self._explore_directory(feature_extraction_path)

            # Build path from selected values
            current_path: list[str] = []
            for value in selected_values:
                if value is not None:
                    current_path.append(value)
                else:
                    break

            if current_path and self._can_load_features(
                directory_structure, current_path
            ):
                try:
                    data = self._prepare_data(selected_slide, current_path)
                    shape = data["shape"]
                    return html.Div(
                        [
                            html.H4("Data Information"),
                            html.P(f"Shape: {shape[0]} cells × {shape[1]} features"),
                            html.P(f"Path: {' → '.join(current_path)}"),
                        ],
                        style={
                            "backgroundColor": "#f0f0f0",
                            "padding": 15,
                            "borderRadius": 5,
                        },
                    )
                except Exception as e:
                    return html.Div(
                        [html.H4("Error"), html.P(f"Failed to load data: {str(e)}")],
                        style={
                            "backgroundColor": "#ffcccc",
                            "padding": 15,
                            "borderRadius": 5,
                        },
                    )
            elif current_path:
                return html.Div(
                    [
                        html.H4("Path Selection"),
                        html.P(f"Current path: {' → '.join(current_path)}"),
                    ],
                    style={
                        "backgroundColor": "#fff3cd",
                        "padding": 15,
                        "borderRadius": 5,
                    },
                )
            return html.Div()

        # Callback for updating overview content (PCA, t-SNE, and Correlation Matrix)
        @app.callback(  # type: ignore
            Output("overview-content", "children"),
            Input("overview-tabs", "value"),
            Input("pca-sample-size", "value"),
            Input("overview-tsne-perplexity", "value"),
            Input("slide-dropdown", "value"),
            *[
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_overview_content(  # type: ignore
            active_tab: str,
            pca_sample_size: int,
            tsne_perplexity: int,
            selected_slide: str | None,
            *selected_values: str | None,
        ):
            if selected_slide is None:
                return html.Div("Please select a slide.")

            # Explore directory structure for the selected slide
            feature_extraction_path = (
                self.config.dataset / selected_slide / "feature_extraction"
            )
            directory_structure = self._explore_directory(feature_extraction_path)

            # Build path from selected values
            current_path: list[str] = []
            for value in selected_values:
                if value is not None:
                    current_path.append(value)
                else:
                    break

            if not current_path or not self._can_load_features(
                directory_structure, current_path
            ):
                return html.Div("Please select a complete path to features.")

            try:
                data = self._prepare_data(selected_slide, current_path)
                df = data["df"]
                features = data["features"]
                feature_names = data["feature_names"]

                # Validate inputs
                sample_size = (
                    pca_sample_size if pca_sample_size and pca_sample_size > 0 else 1000
                )
                perplexity = (
                    tsne_perplexity if tsne_perplexity and tsne_perplexity > 0 else 30
                )

                if active_tab == "pca":
                    fig = self._create_pca_plot(features, feature_names, sample_size)
                    return html.Div(
                        dcc.Graph(figure=fig),
                        style={"display": "flex", "justifyContent": "center"},
                    )

                elif active_tab == "tsne":
                    fig = self._create_tsne_plot(features, sample_size, perplexity)
                    return html.Div(
                        dcc.Graph(figure=fig),
                        style={"display": "flex", "justifyContent": "center"},
                    )

                elif active_tab == "correlation":
                    fig = self._create_correlation_matrix(df, feature_names)
                    return html.Div(
                        dcc.Graph(figure=fig),
                        style={"display": "flex", "justifyContent": "center"},
                    )

            except Exception as e:
                return html.Div(
                    [
                        html.H4("Error"),
                        html.P(f"Failed to generate visualization: {str(e)}"),
                    ],
                    style={
                        "backgroundColor": "#ffcccc",
                        "padding": 15,
                        "borderRadius": 5,
                    },
                )

            return html.Div("Select a tab to view visualizations.")

        # Callback for updating feature content (Distribution and Statistics)
        @app.callback(  # type: ignore
            Output("feature-content", "children"),
            Input("feature-tabs", "value"),
            Input("feature-dropdown", "value"),
            Input("slide-dropdown", "value"),
            *[
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_feature_content(  # type: ignore
            active_tab: str,
            selected_feature: str,
            selected_slide: str | None,
            *selected_values: str | None,
        ):
            if selected_slide is None:
                return html.Div("Please select a slide.")

            # Explore directory structure for the selected slide
            feature_extraction_path = (
                self.config.dataset / selected_slide / "feature_extraction"
            )
            directory_structure = self._explore_directory(feature_extraction_path)

            # Build path from selected values
            current_path: list[str] = []
            for value in selected_values:
                if value is not None:
                    current_path.append(value)
                else:
                    break

            if not current_path or not self._can_load_features(
                directory_structure, current_path
            ):
                return html.Div("Please select a complete path to features.")

            try:
                data = self._prepare_data(selected_slide, current_path)
                df = data["df"]
                features = data["features"]
                feature_names = data["feature_names"]

                if active_tab == "distribution":
                    if selected_feature:
                        fig = self._create_distribution_plot(df, selected_feature)
                        return dcc.Graph(figure=fig)
                    else:
                        return html.Div("Please select a feature.")

                elif active_tab == "stats":
                    stats_dict = self._calculate_first_order_stats(features)
                    fig = self._create_stats_table(stats_dict, feature_names)
                    return dcc.Graph(figure=fig)

            except Exception as e:
                return html.Div(
                    [
                        html.H4("Error"),
                        html.P(f"Failed to generate visualization: {str(e)}"),
                    ],
                    style={
                        "backgroundColor": "#ffcccc",
                        "padding": 15,
                        "borderRadius": 5,
                    },
                )

            return html.Div("Select a tab to view visualizations.")

        # Callback for updating comparison feature dropdown
        @app.callback(  # type: ignore
            Output("comparison-feature-dropdown", "options"),
            Output("comparison-feature-dropdown", "value"),
            [Input("slide-dropdown", "value")]
            + [
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_comparison_feature_dropdown(  # type: ignore
            selected_slide: str | None,
            *selected_values: str | None,
        ) -> tuple[list[Any], str | None]:
            if selected_slide is None:
                return [], None

            # Explore directory structure for the selected slide
            feature_extraction_path = (
                self.config.dataset / selected_slide / "feature_extraction"
            )
            directory_structure = self._explore_directory(feature_extraction_path)

            # Build path from selected values
            current_path: list[str] = []
            for value in selected_values:
                if value is not None:
                    current_path.append(value)
                else:
                    break

            if current_path and self._can_load_features(
                directory_structure, current_path
            ):
                try:
                    data = self._prepare_data_with_cell_types(
                        selected_slide, current_path
                    )
                    if data["cell_types"] is None:
                        return [], None
                    feature_names = data["feature_names"]
                    options = [{"label": f, "value": f} for f in feature_names]
                    value = feature_names[0] if feature_names else None
                    return options, value
                except Exception as e:
                    print(f"Error loading features for comparison: {e}")
                    return [], None
            return [], None

        # Callback for updating JS divergence cell type dropdown
        @app.callback(  # type: ignore
            Output("js-celltype-dropdown", "options"),
            Output("js-celltype-dropdown", "value"),
            [Input("slide-dropdown", "value")]
            + [
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_js_celltype_dropdown(  # type: ignore
            selected_slide: str | None,
            *selected_values: str | None,
        ) -> tuple[list[Any], int | None]:
            if selected_slide is None:
                return [], None

            # Explore directory structure for the selected slide
            feature_extraction_path = (
                self.config.dataset / selected_slide / "feature_extraction"
            )
            directory_structure = self._explore_directory(feature_extraction_path)

            # Build path from selected values
            current_path: list[str] = []
            for value in selected_values:
                if value is not None:
                    current_path.append(value)
                else:
                    break

            if current_path and self._can_load_features(
                directory_structure, current_path
            ):
                try:
                    data = self._prepare_data_with_cell_types(
                        selected_slide, current_path
                    )
                    if data["cell_types"] is None or data["cell_type_names"] is None:
                        return [], None

                    cell_type_names = data["cell_type_names"]
                    # Exclude unknown type (0)
                    options = [
                        {"label": name, "value": ct}
                        for ct, name in sorted(cell_type_names.items())
                        if ct != 0
                    ]
                    # Default to first non-zero cell type
                    value = options[0]["value"] if options else None
                    return options, value
                except Exception as e:
                    print(f"Error loading cell types for JS divergence: {e}")
                    return [], None
            return [], None

        # Callback for updating cell type comparison content
        @app.callback(  # type: ignore
            Output("comparison-content", "children"),
            Input("comparison-tabs", "value"),
            Input("comparison-feature-dropdown", "value"),
            Input("comparison-sample-size", "value"),
            Input("tsne-perplexity", "value"),
            Input("js-celltype-dropdown", "value"),
            Input("slide-dropdown", "value"),
            *[
                Input(f"dropdown-level-{i}", "value")
                for i in range(self.max_dropdown_levels)
            ],
        )
        def update_comparison_content(  # type: ignore
            active_tab: str,
            selected_feature: str,
            sample_size: int,
            perplexity: int,
            selected_celltype: int,
            selected_slide: str | None,
            *selected_values: str | None,
        ):
            if selected_slide is None:
                return html.Div("Please select a slide.")

            # Explore directory structure for the selected slide
            feature_extraction_path = (
                self.config.dataset / selected_slide / "feature_extraction"
            )
            directory_structure = self._explore_directory(feature_extraction_path)

            # Build path from selected values
            current_path: list[str] = []
            for value in selected_values:
                if value is not None:
                    current_path.append(value)
                else:
                    break

            if not current_path or not self._can_load_features(
                directory_structure, current_path
            ):
                message = html.Div(
                    "Please select a complete path to features to enable cell type comparison.",
                    style={
                        "padding": "20px",
                        "backgroundColor": "#fff3cd",
                        "border": "1px solid #ffc107",
                        "borderRadius": "5px",
                        "color": "#856404",
                        "fontFamily": "'Segoe UI', Arial, sans-serif",
                    },
                )
                return message

            try:
                data = self._prepare_data_with_cell_types(selected_slide, current_path)

                # Check if cell types are available
                if data["cell_types"] is None or data["cell_type_names"] is None:
                    # Create debug info
                    debug_info = f"Path: {current_path}, Cell types: {data['cell_types'] is not None}, Cell type names: {data['cell_type_names'] is not None}"
                    logger.warning(f"Cell type comparison not available: {debug_info}")

                    message = html.Div(
                        [
                            html.H4(
                                "Cell Type Information Not Available",
                                style={
                                    "marginBottom": "10px",
                                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                                },
                            ),
                            html.P(
                                "Cell type data was not found for this dataset. Check the browser console and terminal logs for details.",
                                style={"fontFamily": "'Segoe UI', Arial, sans-serif"},
                            ),
                            html.P(
                                f"Debug info: {debug_info}",
                                style={
                                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                                    "fontSize": "12px",
                                    "marginTop": "10px",
                                },
                            ),
                            html.P(
                                "This may be because:",
                                style={
                                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                                    "marginTop": "10px",
                                },
                            ),
                            html.Ul(
                                [
                                    html.Li(
                                        "Cell detection JSON file doesn't exist",
                                        style={
                                            "fontFamily": "'Segoe UI', Arial, sans-serif"
                                        },
                                    ),
                                    html.Li(
                                        "Cell indices are missing from the feature file",
                                        style={
                                            "fontFamily": "'Segoe UI', Arial, sans-serif"
                                        },
                                    ),
                                    html.Li(
                                        "Segmentation model path doesn't match",
                                        style={
                                            "fontFamily": "'Segoe UI', Arial, sans-serif"
                                        },
                                    ),
                                ]
                            ),
                        ],
                        style={
                            "padding": "20px",
                            "backgroundColor": "#f8d7da",
                            "border": "1px solid #f5c6cb",
                            "borderRadius": "5px",
                            "color": "#721c24",
                            "fontFamily": "'Segoe UI', Arial, sans-serif",
                        },
                    )
                    return message

                df = data["df"]
                features = data["features"]
                cell_types = data["cell_types"]
                cell_type_names = data["cell_type_names"]

                # Validate sample size
                sample_size = sample_size if sample_size and sample_size > 0 else 1000
                perplexity = perplexity if perplexity and perplexity > 0 else 30

                if active_tab == "dist-comparison":
                    if selected_feature:
                        fig = self._create_distribution_comparison_plot(
                            df, selected_feature, cell_types, cell_type_names
                        )
                        return dcc.Graph(figure=fig)
                    else:
                        return html.Div(
                            "Please select a feature from the dropdown above.",
                            style={
                                "padding": "20px",
                                "backgroundColor": "#e7f3ff",
                                "border": "1px solid #2196F3",
                                "borderRadius": "5px",
                                "color": "#0d47a1",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        )

                elif active_tab == "pca-celltype":
                    fig = self._create_pca_by_cell_type(
                        features, cell_types, cell_type_names, sample_size
                    )
                    return html.Div(
                        dcc.Graph(figure=fig),
                        style={"display": "flex", "justifyContent": "center"},
                    )

                elif active_tab == "tsne-celltype":
                    fig = self._create_tsne_by_cell_type(
                        features, cell_types, cell_type_names, sample_size, perplexity
                    )
                    return html.Div(
                        dcc.Graph(figure=fig),
                        style={"display": "flex", "justifyContent": "center"},
                    )

                elif active_tab == "js-divergence":
                    if selected_celltype is None:  # type: ignore
                        return html.Div(
                            "Please select a reference cell type from the dropdown above.",
                            style={
                                "padding": "20px",
                                "backgroundColor": "#e7f3ff",
                                "border": "1px solid #2196F3",
                                "borderRadius": "5px",
                                "color": "#0d47a1",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        )

                    # Calculate JS divergence table
                    js_df = self._calculate_js_divergence_table(
                        df, cell_types, cell_type_names, selected_celltype
                    )

                    # Get reference cell type name
                    ref_name = cell_type_names.get(selected_celltype, "Unknown")

                    # Create the table
                    # Prepare data for the table
                    table_data = js_df.reset_index().to_dict("records")  # type: ignore
                    table_columns: list[dict[str, str | dict[str, str]]] = [
                        {"name": "Feature", "id": "Feature"}
                    ]
                    for col in js_df.columns:
                        table_columns.append(
                            {
                                "name": col,
                                "id": col,
                                "type": "numeric",
                                "format": {"specifier": ".4f"},
                            }
                        )

                    # Build style_data_conditional list
                    style_conditions: list[dict[str, Any]] = [
                        {
                            "if": {"row_index": "odd"},
                            "backgroundColor": "#f9f9f9",
                        },
                        {
                            "if": {"column_id": "Feature"},
                            "fontWeight": "500",
                            "backgroundColor": "#ecf0f1",
                        },
                    ]

                    # Add color coding for divergence values (low = green, medium = yellow, high = red)
                    for col in js_df.columns:
                        # Low divergence (< 0.1) - green
                        style_conditions.append(
                            {
                                "if": {
                                    "filter_query": f"{{{col}}} < 0.1",
                                    "column_id": col,
                                },
                                "backgroundColor": "#d4edda",
                                "color": "#155724",
                            }
                        )
                        # Medium divergence (0.1 - 0.3) - yellow
                        style_conditions.append(
                            {
                                "if": {
                                    "filter_query": f"{{{col}}} >= 0.1 && {{{col}}} < 0.3",
                                    "column_id": col,
                                },
                                "backgroundColor": "#fff3cd",
                                "color": "#856404",
                            }
                        )
                        # High divergence (>= 0.3) - red
                        style_conditions.append(
                            {
                                "if": {
                                    "filter_query": f"{{{col}}} >= 0.3",
                                    "column_id": col,
                                },
                                "backgroundColor": "#f8d7da",
                                "color": "#721c24",
                            }
                        )

                    return html.Div(
                        [
                            html.H4(
                                f"Jensen-Shannon Divergence: {ref_name} vs Other Cell Types",
                                style={
                                    "marginBottom": "20px",
                                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                                    "color": "#2c3e50",
                                },
                            ),
                            html.P(
                                f"Values represent the Jensen-Shannon divergence between the distribution of each feature in {ref_name} cells and other cell types. "
                                "Lower values indicate more similar distributions (0 = identical, 1 = completely different).",
                                style={
                                    "marginBottom": "20px",
                                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                                    "color": "#7f8c8d",
                                    "fontSize": "14px",
                                },
                            ),
                            dash_table.DataTable(
                                data=table_data,  # type: ignore
                                columns=table_columns,  # type: ignore
                                style_table={
                                    "overflowX": "auto",
                                    "maxHeight": "600px",
                                    "overflowY": "auto",
                                },
                                style_cell={
                                    "textAlign": "left",
                                    "padding": "10px",
                                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                                    "fontSize": "13px",
                                    "minWidth": "120px",
                                },
                                style_header={
                                    "backgroundColor": "#2c3e50",
                                    "color": "white",
                                    "fontWeight": "bold",
                                    "textAlign": "center",
                                    "fontSize": "14px",
                                    "padding": "12px",
                                },
                                style_data={
                                    "backgroundColor": "white",
                                    "border": "1px solid #ddd",
                                },
                                style_data_conditional=style_conditions,  # type: ignore
                                page_size=20,
                                sort_action="native",
                                filter_action="native",
                            ),
                        ],
                        style={
                            "padding": "20px",
                            "backgroundColor": "white",
                            "borderRadius": "10px",
                            "boxShadow": "0 2px 4px rgba(0,0,0,0.1)",
                        },
                    )

            except Exception as e:
                error_div = html.Div(
                    [
                        html.H4(
                            "Error",
                            style={
                                "marginBottom": "10px",
                                "fontFamily": "'Segoe UI', Arial, sans-serif",
                            },
                        ),
                        html.P(
                            f"Failed to generate cell type comparison: {str(e)}",
                            style={"fontFamily": "'Segoe UI', Arial, sans-serif"},
                        ),
                    ],
                    style={
                        "backgroundColor": "#ffcccc",
                        "padding": 15,
                        "borderRadius": 5,
                        "border": "1px solid #f5c6cb",
                        "fontFamily": "'Segoe UI', Arial, sans-serif",
                    },
                )
                return error_div

            return html.Div(
                "Select a tab above to view cell type comparisons.",
                style={
                    "padding": "20px",
                    "backgroundColor": "#e7f3ff",
                    "border": "1px solid #2196F3",
                    "borderRadius": "5px",
                    "color": "#0d47a1",
                    "fontFamily": "'Segoe UI', Arial, sans-serif",
                },
            )

        # Run the app
        print(f"Starting Feature Visualizer Dashboard at http://{host}:{port}")
        app.run(host=host, port=port, debug=debug)  # type: ignore