Source code for glasscut.data.fetcher

"""Data fetcher for downloading and managing histology slide samples."""

from pathlib import Path
from typing import Protocol

from pooch import create, os_cache  # type: ignore
import openslide

from .registry import registry, registry_urls


[docs] class PoochProtocol(Protocol): """Protocol for pooch.Pooch interface. Defines the minimal interface we use from pooch to allow proper typing despite pooch's limited type annotations. """
[docs] def fetch(self, fname: str) -> str: """Fetch a file from the registry. Parameters ---------- fname : str Filename in the registry Returns ------- str Path to the local file """ ...
[docs] class DataFetcher: """Manages downloading, caching, and loading histology slide samples. Uses pooch for robust file downloading, caching, and verification. Attributes ---------- pup : PoochProtocol | None Pooch instance for downloading, or None if initialization failed """
[docs] def __init__(self) -> None: """Initialize the data fetcher.""" self.pup: PoochProtocol | None = self._create_pooch()
def _create_pooch(self) -> PoochProtocol | None: """Create and configure pooch instance for data management. Returns ------- PoochProtocol | None Configured pooch instance, or None if initialization failed """ try: pup = create( path=os_cache("glasscut-data"), base_url="", # URLs from registry_urls registry=registry, urls=registry_urls, ) return pup except Exception: return None
[docs] def get_file(self, filename: str) -> Path: """Get or download a data file from the registry. Attempts to load file from cache, then downloads if necessary using pooch. Parameters ---------- filename : str Relative path in registry (e.g. 'aperio/JP2K-33003-1.svs') Returns ------- Path Path to the file Raises ------ KeyError If filename is not in registry """ if filename not in registry: raise KeyError(f"File '{filename}' not found in registry") # Try pooch first (if available) if self.pup is not None: try: file_path = self.pup.fetch(filename) return Path(file_path) except Exception: # Fallback to local distribution if pooch fails pass # If pooch is not available, raise error if self.pup is None: raise ModuleNotFoundError( f"Cannot load '{filename}': pooch is not installed. " "Install it with: pip install pooch" ) # Last resort: try pooch again, let it raise the real error file_path = self.pup.fetch(filename) return Path(file_path)
[docs] def load_slide(self, filename: str) -> tuple[openslide.OpenSlide, Path]: """Load an SVS file from the data registry. Parameters ---------- filename : str Relative path in registry Returns ------- Tuple[openslide.OpenSlide, Path] Tuple of (OpenSlide object, file path) Raises ------ openslide.OpenSlideError If the file cannot be opened KeyError If filename is not in registry """ file_path = self.get_file(filename) try: slide = openslide.OpenSlide(str(file_path)) return slide, file_path except Exception as e: raise openslide.OpenSlideError( f"Cannot open WSI file {file_path}: {e}" ) from e