Source code for nabu.pipeline.fullfield.computations

from math import ceil
from silx.image.tomography import get_next_power
from ...utils import check_supported



[docs]
def estimate_required_memory(
    process_config, delta_z=None, delta_a=None, max_mem_allocation_GB=None, fft_plans=True, debug=False
):
    """
    Estimate the memory (RAM) in Bytes needed for a reconstruction.

    Parameters
    -----------
    process_config: `ProcessConfig` object
        Data structure with the processing configuration
    delta_z: int, optional
        How many lines are to be loaded in the each projection image.
        Default is to load lines [start_z:end_z] (where start_z and end_z come from the user configuration file)
    delta_a: int, optional
        How many (partial) projection images to load at the same time. Default is to load all the projection images.
    max_mem_allocation_GB: float, optional
        Maximum amount of memory in GB for one single array.

    Returns
    -------
    required_memory: float
        Total required memory (in bytes).

    Raises
    ------
    ValueError if one single-array allocation exceeds "max_mem_allocation_GB"

    Notes
    -----
    pycuda <= 2022.1 cannot use arrays with more than 2**32 items (i.e 17.18 GB for float32).
    This was solved in more recent versions.
    """

    def check_memory_limit(mem_GB, name):
        if max_mem_allocation_GB is None:
            return
        if mem_GB > max_mem_allocation_GB:
            raise ValueError(
                "Cannot allocate array '%s' %.3f GB > max_mem_allocation_GB = %.3f GB"
                % (name, mem_GB, max_mem_allocation_GB)
            )

    dataset = process_config.dataset_info
    processing_steps = process_config.processing_steps

    # The "x" dimension (last axis) is always the image width, because
    #  - Processing is never "cut" along this axis (we either split along frames, or images lines)
    #  - Even if we want to reconstruct a vertical slice (i.e end_x - start_x == 1),
    #    the tomography reconstruction will need the full information along this axis.
    # The only case where reading a x-subregion would be useful is to crop the initial data
    # (i.e knowing that a part of each image will be completely unused). This is not supported yet.
    Nx = process_config.radio_shape(binning=True)[-1]

    if delta_z is not None:
        Nz = delta_z // process_config.binning_z
    else:
        Nz = process_config.rec_delta_z  # accounting for binning
    if delta_a is not None:
        Na = ceil(delta_a / process_config.subsampling_factor)
    else:
        Na = process_config.n_angles(subsampling=True)

    total_memory_needed = 0

    # Read data
    # ----------
    data_image_size = Nx * Nz * 4
    data_volume_size = Na * data_image_size
    check_memory_limit(data_volume_size / 1e9, "projections")
    total_memory_needed += data_volume_size

    # CCD processing
    # ---------------
    if "flatfield" in processing_steps:
        # Flat-field is done in-place, but still need to load darks/flats
        n_darks = len(dataset.darks)
        n_flats = len(dataset.flats)
        total_memory_needed += (n_darks + n_flats) * data_image_size

    if "ccd_correction" in processing_steps:
        # CCD filter is "batched 2D"
        total_memory_needed += data_image_size

    # Phase retrieval
    # ---------------
    if "phase" in processing_steps:
        # Phase retrieval is done image-wise, so near in-place, but needs to allocate some memory:
        # filter with padded shape, radio_padded, radio_padded_fourier, and possibly FFT plan.
        # CTF phase retrieval uses "2 filters" (num and denom) but let's neglect this.
        Nx_p = get_next_power(2 * Nx)
        Nz_p = get_next_power(2 * Nz)
        img_size_real = Nx_p * Nz_p * 4
        img_size_cplx = ((Nx_p * Nz_p) // 2 + 1) * 8  # assuming RFFT
        factor = 1
        if fft_plans:
            factor = 2
        total_memory_needed += (2 * img_size_real + img_size_cplx) * factor

    # Sinogram de-ringing
    # -------------------
    if "sino_rings_correction" in processing_steps:
        method = process_config.processing_options["sino_rings_correction"]["method"]
        if method == "munch":
            # Process is done image-wise.
            # Needs one Discrete Wavelets transform and one FFT/IFFT plan for each scale
            factor = 2 if not (fft_plans) else 5.5  # approx!
            total_memory_needed += (Nx * Na * 4) * factor
        elif method == "vo":
            # cupy-based implementation makes many calls to "scipy-like" functions, where the memory usage is not under control
            # TODO try to estimate this
            pass

    # Reconstruction
    # ---------------
    reconstructed_volume_size = 0
    if "reconstruction" in processing_steps:
        rec_config = process_config.processing_options["reconstruction"]
        Nx_rec = rec_config["end_x"] - rec_config["start_x"] + 1
        Ny_rec = rec_config["end_y"] - rec_config["start_y"] + 1
        reconstructed_volume_size = Nz * Nx_rec * Ny_rec * 4
        check_memory_limit(reconstructed_volume_size / 1e9, "reconstructions")
        total_memory_needed += reconstructed_volume_size
        if process_config.rec_params["method"] == "cone":
            # In cone-beam reconstruction, need both sinograms and reconstruction inside GPU.
            # That's big!
            total_memory_needed += 2 * data_volume_size

    if debug:
        print(
            "Mem for (delta_z=%s, delta_a=%s)  ==>  (Na=%d, Nz=%d, Nx=%d) : %.3f GB"
            % (delta_z, delta_a, Na, Nz, Nx, total_memory_needed / 1e9)
        )

    return total_memory_needed




[docs]
def estimate_max_chunk_size(
    available_memory_GB,
    process_config,
    pipeline_part="all",
    n_rows=None,
    step=10,
    max_mem_allocation_GB=None,
    fft_plans=True,
    debug=False,
):
    """
    Estimate the maximum size of the data chunk that can be loaded in memory.

    Parameters
    ----------
    available_memory_GB: float
        available memory in Giga Bytes (GB - not GiB !).
    process_config: ProcessConfig
        ProcessConfig object
    pipeline_part: str
        Which pipeline part to consider. Possible options are:
          - "full": Account for all the pipeline steps (reading data all the way to reconstruction).
          - "radios": Consider only the processing steps on projection images (ignore sinogram-based steps and reconstruction)
          - "sinogram": Consider only the processing steps related to sinograms and reconstruction
    n_rows: int, optional
        How many lines to load in each projection. Only accounted for pipeline_part="radios".
    step: int, optional
        Step size when doing the iterative memory estimation
    max_mem_allocation_GB: float, optional
        Maximum size (in GB) for one single array.

    Returns
    -------
    n_max: int
        If pipeline_par is "full" or "sinos": return the maximum number of lines that can be loaded
        in all the projections while fitting memory, i.e `data[:, 0:n_max, :]`
        If pipeline_part is "radios", return the maximum number of (partial) images that can be loaded
        while fitting memory, i.e `data[:, zmin:zmax, 0:n_max]`


    Notes
    -----
    pycuda <= 2022.1 cannot use arrays with more than 2**32 items (i.e 17.18 GB for float32).
    This was solved in more recent versions.
    """

    supported_pipeline_parts = ["all", "radios", "sinos"]
    check_supported(pipeline_part, supported_pipeline_parts, "pipeline_part")

    processing_steps_bak = process_config.processing_steps.copy()
    reconstruction_steps = ["sino_rings_correction", "reconstruction"]

    if pipeline_part == "all":
        # load lines from all the projections
        delta_a = None
        delta_z = 0
    if pipeline_part == "radios":
        # order should not matter
        process_config.processing_steps = list(set(process_config.processing_steps) - set(reconstruction_steps))
        # load lines from only a subset of projections
        delta_a = 0
        delta_z = n_rows
    if pipeline_part == "sinos":
        process_config.processing_steps = [
            step for step in process_config.processing_steps if step in reconstruction_steps
        ]
        # load lines from all the projections
        delta_a = None
        delta_z = 0

    mem = 0
    # pylint: disable=E0606, E0601
    last_valid_delta_a = delta_a
    last_valid_delta_z = delta_z
    while True:
        try:
            mem = estimate_required_memory(
                process_config,
                delta_z=delta_z,
                delta_a=delta_a,
                max_mem_allocation_GB=max_mem_allocation_GB,
                fft_plans=fft_plans,
                debug=debug,
            )
        except ValueError:
            # For very big dataset this function might return "0".
            # Either start at 1, or use a smaller step...
            break
        if mem / 1e9 > available_memory_GB:
            break
        if delta_a is not None and delta_a > process_config.n_angles():
            break
        if delta_z is not None and delta_z > process_config.radio_shape()[0]:
            break
        last_valid_delta_a, last_valid_delta_z = delta_a, delta_z
        if pipeline_part == "radios":
            delta_a += step
        else:
            delta_z += step

    process_config.processing_steps = processing_steps_bak

    if pipeline_part != "radios":
        if mem / 1e9 < available_memory_GB:
            res = min(delta_z, process_config.radio_shape()[0])
        else:
            res = last_valid_delta_z
    else:
        if mem / 1e9 < available_memory_GB:
            res = min(delta_a, process_config.n_angles())
        else:
            res = last_valid_delta_a

    # Really not ideal. For very large dataset, "step" should be very small.
    # Otherwise we go from 0 -> OK to 10 -> not OK, and then retain 0...
    if res == 0:
        res = 1
    #

    return res