Source code for ultrasound_metrics.data.uff

"""
UFF dataset utilities for ultrasound data.

This module provides functions for loading UFF datasets that require pyuff_ustb.
If you don't work with UFF files, you can use the general utilities in visualize_bmode.py
without needing to install pyuff_ustb.
"""

from pathlib import Path
from typing import TYPE_CHECKING, Any, TypedDict, cast

from ultrasound_metrics._utils.array_api import ArrayAPIObj
from ultrasound_metrics.data.downloads import cached_download

if TYPE_CHECKING:
    pass

# Check for pyuff_ustb availability at module level
try:
    import pyuff_ustb as uff  # ty: ignore[unresolved-import]  # pyright: ignore[reportMissingImports]
    from pyuff_ustb import (  # ty: ignore[unresolved-import]  # pyright: ignore[reportMissingImports]
        BeamformedData,
        ChannelData,
        Uff,
    )
    from pyuff_ustb.readers.base import (  # ty: ignore[unresolved-import]  # pyright: ignore[reportMissingImports]
        ReaderKeyError,
    )

    _HAS_PYUFF_USTB = True
except ImportError as err:
    if "pyuff_ustb" in str(err):
        # Only raise ImportError if this is the specific missing dependency
        raise ImportError(
            "pyuff_ustb is required. Install with: `uv pip install ultrasound-metrics[uff]` or `pip install pyuff_ustb`"
        ) from err
    else:
        # Re-raise other import errors
        raise


# Default cache directory for downloaded datasets
[docs] CACHE_DIR = Path.home() / ".cache" / "ultrasound-metrics" / "datasets"
[docs] class DatasetInfo(TypedDict): """Type definition for dataset information."""
[docs] url: str
[docs] filename: str
[docs] description: str
[docs] size: int
# Dataset registry with download URLs and metadata
[docs] USTB_DATASETS: dict[str, DatasetInfo] = { "picmus_resolution_experiment": { "url": "https://f004.backblazeb2.com/b2api/v1/b2_download_file_by_id?fileId=4_z81bac298ed734da8927d0614_f112a4a231dbce513_d20250729_m192149_c004_v0402004_t0044_u01753816909257", # We can also use the USTB URL, but it has rate-limits # "url": "http://www.ustb.no/datasets/PICMUS_experiment_resolution_distortion.uff", "filename": "PICMUS_experiment_resolution_distortion.uff", "description": "PICMUS challenge resolution/distortion test (experiment)", "size": 145518524, }, "picmus_contrast_experiment": { "url": "https://f004.backblazeb2.com/b2api/v1/b2_download_file_by_id?fileId=4_z81bac298ed734da8927d0614_f100d6106d29bf5da_d20250729_m192144_c004_v0402027_t0027_u01753816904341", # We can also use the USTB URL, but it has rate-limits # "url": "http://www.ustb.no/datasets/PICMUS_experiment_contrast_speckle.uff", "filename": "PICMUS_experiment_contrast_speckle.uff", "description": "PICMUS challenge contrast/speckle test (experiment)", "size": 145518504, }, }
def _check_pyuff_ustb() -> None: """ Check if pyuff_ustb is available and raise ImportError if not. Raises ------ ImportError If pyuff_ustb is not installed. """ if not _HAS_PYUFF_USTB: raise ImportError("pyuff_ustb is required to load UFF datasets. Install with: pip install pyuff_ustb") def list_available_datasets() -> dict[str, DatasetInfo]: """ List all available datasets with their metadata. Returns ------- dict Dictionary mapping dataset names to their information. """ return USTB_DATASETS.copy() def inspect_dataset(dataset_name: str) -> dict: """ Inspect a specific dataset and its cache status. Parameters ---------- dataset_name Name of the dataset to inspect. Returns ------- dict Dictionary with dataset metadata and cache information. Raises ------ KeyError If the dataset is not found. Examples -------- >>> info = inspect_dataset("picmus_resolution_experiment") >>> print(f"URL: {info['url']}") >>> print(f"Cached: {info['cached']}") >>> print(f"Size: {info['size']} bytes") """ if dataset_name not in USTB_DATASETS: raise KeyError(f"Dataset {dataset_name} not found") dataset_info = USTB_DATASETS[dataset_name] cached_file = CACHE_DIR / dataset_info["filename"] result = { "name": dataset_name, "url": dataset_info["url"], "filename": dataset_info["filename"], "description": dataset_info["description"], "cached": cached_file.exists(), "cache_path": cached_file, "size": cached_file.stat().st_size if cached_file.exists() else None, } return result def load_dataset(dataset_name: str, download_if_missing: bool = True, key: str = "/beamformed_data") -> ArrayAPIObj: """ Load a dataset using pyuff_ustb. Parameters ---------- dataset_name Name of the dataset to load. download_if_missing Whether to download the dataset if not cached. key Key to read from the UFF file. Common keys include: - "/beamformed_data": Beamformed ultrasound data (default) - "/channel_data": Channel data for temporal analysis Returns ------- ndarray The loaded dataset as a numpy array (default key: "/beamformed_data"). Raises ------ ImportError If pyuff_ustb is not installed. KeyError If the dataset is not found. FileNotFoundError If dataset is not cached and download_if_missing=False. Examples -------- Load beamformed data (default): >>> data = load_dataset("picmus_resolution_experiment") >>> print(f"Dataset shape: {data.shape}") Load channel data for temporal analysis: >>> channel_data = load_dataset("picmus_resolution_experiment", key="/channel_data") >>> print(f"Channel data shape: {channel_data.shape}") """ # Fail fast if pyuff_ustb is not available _check_pyuff_ustb() if dataset_name not in USTB_DATASETS: raise KeyError(f"Dataset {dataset_name} not found") dataset_info = USTB_DATASETS[dataset_name] cached_file = CACHE_DIR / dataset_info["filename"] # Download if missing and requested if download_if_missing: cached_download( url=dataset_info["url"], filename=dataset_info["filename"], expected_size=dataset_info["size"], ) elif not cached_file.exists(): raise FileNotFoundError(f"Dataset {dataset_name} is not cached") # Load and return the actual data uff_file: Uff = Uff(str(cached_file)) try: key_data = uff_file.read(key) except ReaderKeyError as err: raise KeyError(f"Key {key} not found in dataset {dataset_name}") from err if not hasattr(key_data, "data"): raise ValueError(f"Key {key} does not contain data") return cast(BeamformedData | ChannelData, key_data).data def load_uff_dataset(dataset_name: str) -> tuple[ArrayAPIObj, Any]: """ Load and reshape any UFF dataset. Parameters ---------- dataset_name Name of the dataset to load. Returns ------- tuple Tuple of (beamformed_image, scan_info). """ # Get dataset information and download if not cached dataset_info = inspect_dataset(dataset_name) data_file = cached_download( url=dataset_info["url"], filename=dataset_info["filename"], expected_size=dataset_info.get("size"), ) # Load using UFF uff_file = uff.Uff(str(data_file)) beamformed_data = uff_file.read("/beamformed_data") beamformed_data: BeamformedData = cast(BeamformedData, beamformed_data) # Get scan geometry scan = beamformed_data.scan # Reshape data using scan geometry beamformed_image = beamformed_data.data.reshape( (scan.x_axis.size, scan.z_axis.size), ) return beamformed_image, scan