Source code for compass.landice.mesh

import os
import re
import sys
import time
import uuid
from shutil import copyfile

import jigsawpy
import matplotlib.pyplot as plt
import mpas_tools.io
import numpy as np
import xarray
from geometric_features import FeatureCollection, GeometricFeatures
from matplotlib.path import Path
from mpas_tools.io import write_netcdf
from mpas_tools.logging import check_call
from mpas_tools.mesh.conversion import convert, cull
from mpas_tools.mesh.creation import build_planar_mesh
from mpas_tools.mesh.creation.sort_mesh import sort_mesh
from mpas_tools.scrip.from_mpas import scrip_from_mpas
from netCDF4 import Dataset
from pyproj import Transformer
from scipy import ndimage
from scipy.interpolate import interpn
from scipy.ndimage import binary_dilation, distance_transform_edt



[docs]
def mpas_flood_fill(seed_mask, grow_mask, cellsOnCell, nEdgesOnCell,
                    grow_iters=sys.maxsize):
    """
    Flood-fill for mpas meshes using mpas cells.

    Parameters
    ----------
    seed_mask : numpy.ndarray
        Integer array of locations from which to flood fill
        0 = invalid, 1 = valid

    grow_mask : numpy.ndarray
        Integer array of locations valid for growing into
        0 = invalid, 1 = valid

    cellsOnCell : numpy.ndarray
        cellsOnCell array from the mpas mesh

    nEdgesOnCell : numpy.ndarray
        nEdgesOnCell array from the mpas mesh

    grow_iters : integer
        optional argument limiting the number of iterations
        over which to extend the mask

    Returns
    -------
    keep_mask : numpy.ndarray
        mask calculated by the flood fill routine,
        where cells connected to seed_mask
        are 1 and everything else is 0.
    """

    iter = 0
    keep_mask = seed_mask.copy()
    n_mask_cells = keep_mask.sum()
    for iter in range(grow_iters):
        mask_ind = np.nonzero(keep_mask == 1)[0]
        print(f'iter={iter}, keep_mask size={keep_mask.sum()}')
        new_keep_mask = keep_mask.copy()
        for iCell in mask_ind:
            neighs = cellsOnCell[iCell, :nEdgesOnCell[iCell]] - 1
            neighs = neighs[neighs >= 0]  # drop garbage cell
            for jCell in neighs:
                if grow_mask[jCell] == 1:
                    new_keep_mask[jCell] = 1
        keep_mask = new_keep_mask.copy()
        n_mask_cells_new = keep_mask.sum()
        if n_mask_cells_new == n_mask_cells:
            break
        n_mask_cells = n_mask_cells_new
        iter += 1
    return keep_mask




[docs]
def gridded_flood_fill(field, iStart=None, jStart=None):
    """
    Uses a flood fill to fill disconnected regions.
    Any disconnected regions are set to 0.
    This version uses scipy.ndimage.label() to identify connected
    components of field > 0, then keeps only the component containing
    the seed point.

    Parameters
    ----------
    field : ndarray
        Input 2D field. Cells with field > 0 are considered fillable.

    iStart, jStart : int, optional
        Seed location. If not provided, defaults to the center of the array.

    Returns
    -------
    flood_mask : ndarray
        Integer mask with 1 for the connected component containing the seed,
        and 0 elsewhere.
    """
    sz = field.shape

    if iStart is None and jStart is None:
        iStart = sz[0] // 2
        jStart = sz[1] // 2

    # cells eligible to be part of the connected region
    valid = field > 0.0

    # if the seed is not in a valid cell, nothing gets filled
    if not valid[iStart, jStart]:
        return np.zeros(sz, dtype=int)

    # 4-connectivity to match the original implementation
    structure = np.array([[0, 1, 0],
                          [1, 1, 1],
                          [0, 1, 0]], dtype=int)

    labels, _ = ndimage.label(valid, structure=structure)
    seed_label = labels[iStart, jStart]

    return (labels == seed_label).astype(int)




[docs]
def set_rectangular_geom_points_and_edges(xmin, xmax, ymin, ymax):
    """
    Set node and edge coordinates to pass to
    :py:func:`mpas_tools.mesh.creation.build_mesh.build_planar_mesh()`.

    Parameters
    ----------
    xmin : int or float
        Left-most x-coordinate in region to mesh

    xmax : int or float
        Right-most x-coordinate in region to mesh

    ymin : int or float
        Bottom-most y-coordinate in region to mesh

    ymax : int or float
        Top-most y-coordinate in region to mesh

    Returns
    -------
    geom_points : jigsawpy.jigsaw_msh_t.VERT2_t
        xy node coordinates to pass to ``build_planar_mesh()``

    geom_edges : jigsawpy.jigsaw_msh_t.EDGE2_t
        xy edge coordinates between nodes to pass to ``build_planar_mesh()``
    """

    geom_points = np.array([  # list of xy "node" coordinates
        ((xmin, ymin), 0),
        ((xmax, ymin), 0),
        ((xmax, ymax), 0),
        ((xmin, ymax), 0)],
        dtype=jigsawpy.jigsaw_msh_t.VERT2_t)

    geom_edges = np.array([  # list of "edges" between nodes
        ((0, 1), 0),
        ((1, 2), 0),
        ((2, 3), 0),
        ((3, 0), 0)],
        dtype=jigsawpy.jigsaw_msh_t.EDGE2_t)

    return geom_points, geom_edges




[docs]
def clip_mesh_to_bounding_box(mask_ds, base_ds, bounding_box):
    """
    Set cells to culled if they lay outside the bounding box

    Parameters
    ----------
    mask_ds: xr.Dataset
        mask dataset, generated by ``compute_mpas_region_mask``
    base_ds: xr.Dataset
        unculled mesh dataset
    bounding_box: list of 4 ints
        Bounding box [x_min, x_max, y_min, y_max] to cull mesh outside of

    Returns
    -------
    mask_ds: xarray.Dataset
        mask dataset with updated masks based on bounding box
    """

    if len(bounding_box) != 4:
        msg = f"bounding box must be len 4, instead is len {len(bounding_box)}"
        raise ValueError(msg)

    x_min, x_max, y_min, y_max = bounding_box

    if (x_max < x_min) or (y_max < y_min):
        msg = "Bounding box must be ordered: [x_min, x_max, y_min, y_max]"
        msg += (
            f"\n x_max < x_min ({x_max:.2f} < {x_min:.2f})"
            if x_max < x_min else ""
        )
        msg += (
            f"\n y_max < y_min ({y_max:.2f} < {y_min:.2f})"
            if y_max < y_min else ""
        )
        raise ValueError(msg)

    for loc in ["Cell", "Edge", "Vertex"]:
        mask_var = f"region{loc}Masks"

        if mask_var not in mask_ds:
            continue

        mask = (
            (base_ds[f"x{loc}"] < x_min) |
            (base_ds[f"x{loc}"] > x_max) |
            (base_ds[f"y{loc}"] < y_min) |
            (base_ds[f"y{loc}"] > y_max)
        )

        mask_ds[mask_var] = xarray.where(mask, 0, mask_ds[mask_var])

    return mask_ds




[docs]
def set_cell_width(self, section_name, thk, bed, vx=None, vy=None,
                   dist_to_edge=None, dist_to_grounding_line=None,
                   dist_to_coast=None,
                   flood_fill_iStart=None, flood_fill_jStart=None):
    """
    Set cell widths based on settings in config file to pass to
    :py:func:`mpas_tools.mesh.creation.build_mesh.build_planar_mesh()`.

    Parameters
    ----------
    section_name : str
        Section of the config file from which to read parameters. The
        following options to be set in the given config section:
        ``levels``, ``x_min``, ``x_max``, ``y_min``, ``y_max``,
        ``min_spac``, ``max_spac``, ``high_log_speed``, ``low_log_speed``,
        ``high_dist``, ``low_dist``, ``high_dist_coast``, ``low_dist_coast``,
        ``high_dist_bed``, ``low_dist_bed``, ``high_bed``, ``low_bed``,
        ``cull_distance``, ``use_speed``, ``use_dist_to_edge``,
        ``use_dist_to_grounding_line``, ``use_dist_to_coast``, and ``use_bed``.
        See the Land-Ice Framework section of the Users or Developers guide
        for more information about these options and their uses.

    thk : numpy.ndarray
        Ice thickness field from gridded dataset,
        usually after trimming to flood fill mask

    bed : numpy.ndarray
        Bed topography from gridded dataset

    vx : numpy.ndarray, optional
        x-component of ice velocity from gridded dataset,
        usually after trimming to flood fill mask. Can be set to ``None``
        if ``use_speed == False`` in config file.

    vy : numpy.ndarray, optional
        y-component of ice velocity from gridded dataset,
        usually after trimming to flood fill mask. Can be set to ``None``
        if ``use_speed == False`` in config file.

    dist_to_edge : numpy.ndarray, optional
        Distance from each cell to ice edge, calculated in separate function.
        Can be set to ``None`` if ``use_dist_to_edge == False`` in config file
        and you do not want to set large ``cell_width`` where cells will be
        culled anyway, but this is not recommended.

    dist_to_grounding_line : numpy.ndarray, optional
        Distance from each cell to grounding line, calculated in separate
        function.  Can be set to ``None`` if
        ``use_dist_to_grounding_line == False`` in config file.

    dist_to_coast : numpy.ndarray, optional
        Distance from each cell to coast, calculated in separate
        function.  Can be set to ``None`` if
        ``use_dist_to_coast == False`` in config file.

    flood_fill_iStart : int, optional
        x-index location to start flood-fill when using bed topography

    flood_fill_jStart : int, optional
        y-index location to start flood-fill when using bed topography

    Returns
    -------
    cell_width : numpy.ndarray
        Desired width of MPAS cells based on mesh desnity functions to pass to
        :py:func:`mpas_tools.mesh.creation.build_mesh.build_planar_mesh()`.
    """

    logger = self.logger
    section = self.config[section_name]

    # Get config inputs for cell spacing functions
    min_spac = section.getfloat('min_spac')
    max_spac = section.getfloat('max_spac')
    # convert km to m
    cull_distance = section.getfloat('cull_distance') * 1.e3

    land_mask = np.logical_and(thk == 0.0, bed >= 0.)
    ocean_mask = np.logical_and(thk == 0.0, bed < 0.)

    # Cell spacing function based on union of masks
    if section.get('use_bed') == 'True':
        logger.info('Using bed elevation for spacing.')
        high_dist_bed = section.getfloat('high_dist_bed')
        low_dist_bed = section.getfloat('low_dist_bed')
        low_bed = section.getfloat('low_bed')
        high_bed = section.getfloat('high_bed')
        if flood_fill_iStart is not None and flood_fill_jStart is not None:
            logger.info('calling gridded_flood_fill to find ' +
                        'bedTopography <= low_bed connected to the ocean.')
            tic = time.time()
            # initialize mask to low bed topography
            in_mask = (bed <= low_bed)
            # Do not let flood fill reach further than high_dist_bed into
            # the ice sheet interior.
            in_mask[np.logical_and(
                thk > 0, dist_to_grounding_line >= high_dist_bed)] = 0
            low_bed_mask = gridded_flood_fill(in_mask,
                                              iStart=flood_fill_iStart,
                                              jStart=flood_fill_jStart)
            toc = time.time()
            logger.info(f'Flood fill finished in {toc - tic} seconds.')
        # Use a logistics curve for bed topography spacing.
        k = 0.05  # This works well, but could try other values
        spacing_bed = min_spac + (max_spac - min_spac) / (1.0 + np.exp(
            -k * (bed - np.mean([high_bed, low_bed]))))
        # We only want bed topography to influence spacing within high_dist_bed
        # from the ice margin. In the region between high_dist_bed and
        # low_dist_bed, use a linear ramp to damp influence of bed topo.
        spacing_bed[dist_to_grounding_line >= low_dist_bed] = (
            (1.0 - (dist_to_grounding_line[
                dist_to_grounding_line >= low_dist_bed] -
                low_dist_bed) / (high_dist_bed - low_dist_bed)) *
            spacing_bed[dist_to_grounding_line >= low_dist_bed] +
            (dist_to_grounding_line[dist_to_grounding_line >=
                                    low_dist_bed] - low_dist_bed) /
            (high_dist_bed - low_dist_bed) * max_spac)
        spacing_bed[dist_to_grounding_line >= high_dist_bed] = max_spac
        if flood_fill_iStart is not None and flood_fill_jStart is not None:
            spacing_bed[low_bed_mask == 0] = max_spac
            # Do one more flood fill to eliminate isolated pockets
            # of high resolution that were separated when we set
            # spacing_bed[dist_to_grounding_line >= high_dist_bed] = max_spac
            in_mask2 = (bed <= low_bed)
            in_mask2[np.logical_and(
                thk > 0, spacing_bed > (2. * min_spac))] = 0
            low_bed_mask2 = gridded_flood_fill(in_mask2,
                                               iStart=flood_fill_iStart,
                                               jStart=flood_fill_jStart)
            spacing_bed[low_bed_mask2 == 0] = max_spac
    else:
        spacing_bed = max_spac * np.ones_like(thk)

    # Make cell spacing function mapping from log speed to cell spacing
    if section.get('use_speed') == 'True':
        logger.info('Using speed for cell spacing')
        high_log_speed = section.getfloat('high_log_speed')
        low_log_speed = section.getfloat('low_log_speed')
        speed = (vx ** 2 + vy ** 2) ** 0.5
        lspd = np.log10(speed)
        spacing_speed = np.interp(lspd, [low_log_speed, high_log_speed],
                                  [max_spac, min_spac], left=max_spac,
                                  right=min_spac)

        # Clean up where we have missing velocities. These are usually nans
        # or the default netCDF _FillValue of ~10.e36
        missing_data_mask = np.logical_or(
            np.logical_or(np.isnan(vx), np.isnan(vy)),
            np.logical_or(np.abs(vx) > 1.e5,
                          np.abs(vy) > 1.e5))
        spacing_speed[missing_data_mask] = max_spac
        logger.info(f'Found {np.sum(missing_data_mask)} points in input '
                    f'dataset with missing velocity values. Setting '
                    f'velocity-based spacing to maximum value.')

    else:
        spacing_speed = max_spac * np.ones_like(thk)

    # Make cell spacing function mapping from distance to ice edge
    if section.get('use_dist_to_edge') == 'True':
        logger.info('Using distance to ice edge for cell spacing')
        high_dist = section.getfloat('high_dist')
        low_dist = section.getfloat('low_dist')
        spacing_edge = np.interp(dist_to_edge, [low_dist, high_dist],
                                 [min_spac, max_spac], left=min_spac,
                                 right=max_spac)
    else:
        spacing_edge = max_spac * np.ones_like(thk)

    # Make cell spacing function mapping from distance to grounding line
    if section.get('use_dist_to_grounding_line') == 'True':
        logger.info('Using distance to grounding line for cell spacing')
        high_dist = section.getfloat('high_dist')
        low_dist = section.getfloat('low_dist')
        spacing_gl = np.interp(dist_to_grounding_line, [low_dist, high_dist],
                               [min_spac, max_spac], left=min_spac,
                               right=max_spac)
    else:
        spacing_gl = max_spac * np.ones_like(thk)

    # Make cell spacing function mapping from distance to coast
    if section.get('use_dist_to_coast') == 'True':
        logger.info('Using distance to coast for cell spacing')
        high_dist_coast = section.getfloat('high_dist_coast')
        low_dist_coast = section.getfloat('low_dist_coast')
        spacing_coast = np.interp(dist_to_coast,
                                  [low_dist_coast, high_dist_coast],
                                  [min_spac, max_spac], left=min_spac,
                                  right=max_spac)
        # distance from coast is only used to set spacing in ocean
        spacing_coast[bed > 0.] = max_spac
    else:
        spacing_coast = max_spac * np.ones_like(thk)
        # If not using distance from coast, use finest spacing
        # in the ocean as well as ice-free land.
        spacing_coast[land_mask] = min_spac
        spacing_coast[ocean_mask] = min_spac

    # Merge cell spacing methods
    cell_width = max_spac * np.ones_like(thk)
    for width in [spacing_bed, spacing_speed, spacing_edge,
                  spacing_gl, spacing_coast]:
        cell_width = np.minimum(cell_width, width)

    # Set large cell_width in areas we are going to cull anyway (speeds up
    # whole process). Use 3x the cull_distance to avoid this affecting
    # cell size in the final mesh. There may be a more rigorous way to set
    # that distance.
    if dist_to_edge is not None:
        mask = np.logical_and(
            thk == 0.0, dist_to_edge > np.abs(3. * cull_distance))
        logger.info('Setting cell_width in outer regions to max_spac '
                    f'for {mask.sum()} cells')
        cell_width[mask] = max_spac

    return cell_width




[docs]
def get_dist_to_edge_and_gl(self, thk, topg, x, y, section_name,
                            window_size=None):
    """
    Calculate distance from each point to ice edge and grounding line,
    to be used in mesh density functions in
    :py:func:`compass.landice.mesh.set_cell_width()`.

    Parameters
    ----------
    thk : numpy.ndarray
        Ice thickness field from gridded dataset,
        usually after trimming to flood fill mask

    topg : numpy.ndarray
        Bed topography field from gridded dataset

    x : numpy.ndarray
        x coordinates from gridded dataset

    y : numpy.ndarray
        y coordinates from gridded dataset

    section_name : str
        Section of the config file from which to read parameters. The
        following options to be set in the given config section:
        ``levels``, ``x_min``, ``x_max``, ``y_min``, ``y_max``,
        ``min_spac``, ``max_spac``, ``high_log_speed``, ``low_log_speed``,
        ``high_dist``, ``low_dist``, ``high_dist_bed``, ``low_dist_bed``,
        ``high_bed``, ``low_bed``, ``cull_distance``, ``use_speed``,
        ``use_dist_to_edge``, ``use_dist_to_grounding_line``, and ``use_bed``.
        See the Land-Ice Framework section of the Users or Developers guide
        for more information about these options and their uses.

    window_size : int or float
        Size (in meters) of a search 'box' (one-directional) to use
        to calculate the distance from each cell to the ice margin.
        Bigger number makes search slower, but if too small, the transition
        zone could get truncated. We usually want this calculated as the
        maximum of ``high_dist`` and ``high_dist_bed``, but there may be cases
        in which it is useful to set it manually. However, it should never be
        smaller than either ``high_dist`` or ``high_dist_bed``.

    Returns
    -------
    dist_to_edge : numpy.ndarray
        Distance from each cell to the ice edge

    dist_to_grounding_line : numpy.ndarray
        Distance from each cell to the grounding line

    dist_to_coast : numpy.ndarray
        Distance from each cell to the coast, defined as the last cell
        adjacent to ocean (ice free with bed < 0) whether ice-filled
        or ice-free.
    """

    logger = self.logger
    section = self.config[section_name]

    tic = time.time()

    high_dist = float(section.get('high_dist'))
    high_dist_bed = float(section.get('high_dist_bed'))

    if window_size is None:
        window_size = max(high_dist, high_dist_bed)
    elif window_size < min(high_dist, high_dist_bed):
        logger.info(
            'WARNING: window_size was set smaller than high_dist and/or '
            'high_dist_bed. Resetting window_size to '
            f'{max(high_dist, high_dist_bed)}'
        )
        window_size = max(high_dist, high_dist_bed)

    dx = float(x[1] - x[0])
    dy = float(y[1] - y[0])

    # neighbor stencil in all 8 rectangular directions
    neighbors = np.array([
        [1, 0], [-1, 0], [0, 1], [0, -1],
        [1, 1], [-1, 1], [1, -1], [-1, -1]
    ])

    ice_mask = thk > 0.0
    ocean_mask = np.logical_and(thk == 0., topg < 0.)
    grounded_mask = np.logical_and(thk > (-1028.0 / 910.0 * topg),
                                   ice_mask)
    float_mask = np.logical_and(thk < (-1028.0 / 910.0 * topg),
                                ice_mask)
    margin_mask = np.zeros(thk.shape, dtype=bool)
    grounding_line_mask = np.zeros(thk.shape, dtype=bool)
    coast_mask = np.zeros(thk.shape, dtype=bool)

    for n in neighbors:
        shifted_ice = np.roll(ice_mask, n, axis=[0, 1])
        margin_mask = np.logical_or(margin_mask, ~shifted_ice)

        shifted_grounded = np.roll(grounded_mask, n, axis=[0, 1])
        shifted_float = np.roll(float_mask, n, axis=[0, 1])
        not_grounded_mask = (~shifted_grounded) & shifted_float
        grounding_line_mask = np.logical_or(grounding_line_mask,
                                            not_grounded_mask)

        shifted_ocean = np.roll(ocean_mask, n, axis=[0, 1])
        coast_mask = np.logical_or(coast_mask, shifted_ocean)

    # where ice exists and neighbors non-ice locations
    margin_mask = np.logical_and(margin_mask, ice_mask)
    # where grounded ice exists and neighbors floating ice
    grounding_line_mask = np.logical_and(grounding_line_mask, grounded_mask)
    # where non-ocean exists and neighbors ocean locations
    coast_mask = np.logical_and(coast_mask, np.logical_not(ocean_mask))

    fig, ax = plt.subplots(1, 3, sharex=True, sharey=True, figsize=(9, 3))
    margin_plot = ax[0].pcolor(margin_mask)
    gl_plot = ax[1].pcolor(grounding_line_mask)  # noqa F841
    coast_plot = ax[2].pcolor(coast_mask)  # noqa F841
    ax[0].set_title("margin mask")
    ax[1].set_title("grounding line mask")
    ax[2].set_title("coast mask")
    plt.colorbar(margin_plot, ax=ax, shrink=0.7)
    [ax.set_aspect('equal') for ax in ax]
    fig.savefig("masks.png", dpi=400)

    # EDT computes distance to nearest zero.
    # So invert the boundary masks: non-boundary=1, boundary=0.
    dist_to_edge = distance_transform_edt(
        ~margin_mask, sampling=(dy, dx)
    )
    dist_to_grounding_line = distance_transform_edt(
        ~grounding_line_mask, sampling=(dy, dx)
    )
    dist_to_coast = distance_transform_edt(
        ~coast_mask, sampling=(dy, dx)
    )

    # Limit maximum distance
    max_dist = float(np.ceil(window_size / max(dx, dy))) * max(dx, dy)
    dist_to_edge = np.minimum(dist_to_edge, max_dist)
    dist_to_grounding_line = np.minimum(dist_to_grounding_line, max_dist)
    dist_to_coast = np.minimum(dist_to_coast, max_dist)

    toc = time.time()
    logger.info(
        'compass.landice.mesh.get_dist_to_edge_and_gl() took '
        f'{toc - tic:0.2f} seconds'
    )

    return dist_to_edge, dist_to_grounding_line, dist_to_coast




[docs]
def build_cell_width(self, section_name, gridded_dataset,
                     flood_fill_start=[None, None]):
    """
    Determine MPAS mesh cell size based on user-defined density function.

    Parameters
    ----------
    section_name : str
        Section of the config file from which to read parameters. The
        following options to be set in the given config section:
        ``levels``, ``x_min``, ``x_max``, ``y_min``, ``y_max``,
        ``min_spac``, ``max_spac``, ``high_log_speed``, ``low_log_speed``,
        ``high_dist``, ``low_dist``, ``high_dist_bed``, ``low_dist_bed``,
        ``high_bed``, ``low_bed``, ``cull_distance``, ``use_speed``,
        ``use_dist_to_edge``, ``use_dist_to_grounding_line``, and ``use_bed``.
        See the Land-Ice Framework section of the Users or Developers guide
        for more information about these options and their uses.

    gridded_dataset : str
        name of NetCDF file used to define cell spacing

    flood_fill_start : list of ints
        ``i`` and ``j`` indices used to define starting location for flood
        fill. Most cases will use ``[None, None]``, which will just start the
        flood fill in the center of the gridded dataset.

    Returns
    -------
    cell_width : numpy.ndarray
        Desired width of MPAS cells based on mesh desnity functions to pass to
        :py:func:`mpas_tools.mesh.creation.build_mesh.build_planar_mesh()`.

    x1 : float
        x coordinates from gridded dataset

    y1 : float
        y coordinates from gridded dataset

    geom_points : jigsawpy.jigsaw_msh_t.VERT2_t
        xy node coordinates to pass to ``build_planar_mesh()``

    geom_edges : jigsawpy.jigsaw_msh_t.EDGE2_t
        xy edge coordinates between nodes to pass to ``build_planar_mesh()``

    flood_mask : numpy.ndarray
        mask calculated by the flood fill routine,
        where cells connected to the ice sheet (or main feature)
        are 1 and everything else is 0.
    """

    section = self.config[section_name]
    # get needed fields from gridded dataset
    f = Dataset(gridded_dataset, 'r')
    f.set_auto_mask(False)  # disable masked arrays

    x1 = f.variables['x1'][:]
    y1 = f.variables['y1'][:]
    thk = f.variables['thk'][0, :, :]
    topg = f.variables['topg'][0, :, :]
    vx = f.variables['vx'][0, :, :]
    vy = f.variables['vy'][0, :, :]

    f.close()

    # Get bounds defined by user, or use bounds from the gridded dataset.
    bnds = get_mesh_config_bounding_box(
        section,
        default_bounds=[np.min(x1), np.max(x1), np.min(y1), np.max(y1)])

    geom_points, geom_edges = set_rectangular_geom_points_and_edges(*bnds)

    # Remove ice not connected to the ice sheet.
    flood_mask = gridded_flood_fill(thk)
    thk[flood_mask == 0] = 0.0
    vx[flood_mask == 0] = 0.0
    vy[flood_mask == 0] = 0.0

    # Calculate distance from each grid point to ice edge
    # and grounding line, for use in cell spacing functions.
    distToEdge, distToGL, distToCoast = get_dist_to_edge_and_gl(
        self, thk, topg, x1,
        y1, section_name=section_name)

    # Set cell widths based on mesh parameters set in config file
    cell_width = set_cell_width(self, section_name=section_name,
                                thk=thk, bed=topg, vx=vx, vy=vy,
                                dist_to_edge=distToEdge,
                                dist_to_grounding_line=distToGL,
                                dist_to_coast=distToCoast,
                                flood_fill_iStart=flood_fill_start[0],
                                flood_fill_jStart=flood_fill_start[1])

    return (cell_width.astype('float64'), x1.astype('float64'),
            y1.astype('float64'), geom_points, geom_edges, flood_mask)




[docs]
def build_mali_mesh(self, cell_width, x1, y1, geom_points,
                    geom_edges, mesh_name, section_name,
                    gridded_dataset, projection, geojson_file=None,
                    cores=1, bounding_box=None):
    """
    Create the MALI mesh based on final cell widths determined by
    :py:func:`compass.landice.mesh.build_cell_width()`, using Jigsaw and
    MPAS-Tools functions. Culls the mesh based on config options, interpolates
    all available fields from the gridded dataset to the MALI mesh using the
    bilinear method, and marks domain boundaries as Dirichlet cells.

    Parameters
    ----------
    cell_width : numpy.ndarray
        Desired width of MPAS cells calculated by :py:func:`build_cell_width()`
        based on mesh density functions define in :py:func:`set_cell_width()`
        to pass to
        :py:func:`mpas_tools.mesh.creation.build_mesh.build_planar_mesh()`.

    x1 : float
        x coordinates from gridded dataset

    y1 : float
        y coordinates from gridded dataset

    geom_points : jigsawpy.jigsaw_msh_t.VERT2_t
        xy node coordinates to pass to ``build_planar_mesh()``

    geom_edges : jigsawpy.jigsaw_msh_t.EDGE2_t
        xy edge coordinates between nodes to pass to ``build_planar_mesh()``

    mesh_name : str
        Filename to be used for final MALI NetCDF mesh file.

    section_name : str
        Section of the config file from which to read parameters. The
        following options to be set in the given config section:
        ``levels``, ``x_min``, ``x_max``, ``y_min``, ``y_max``,
        ``min_spac``, ``max_spac``, ``high_log_speed``, ``low_log_speed``,
        ``high_dist``, ``low_dist``, ``high_dist_coast``, ``low_dist_coast``,
        ``high_dist_bed``, ``low_dist_bed``, ``high_bed``, ``low_bed``,
        ``cull_distance``, ``use_speed``, ``use_dist_to_edge``,
        ``use_dist_to_grounding_line``, ``use_dist_to_coast``, and ``use_bed``.
        See the Land-Ice Framework section of the Users or Developers guide
        for more information about these options and their uses.

    gridded_dataset : str
        Name of gridded dataset file to be used for interpolation to MALI mesh

    projection : str
        Projection to be used for setting lat-long fields.
        Likely ``'gis-gimp'`` or ``'ais-bedmap2'``

    geojson_file : str, optional
        Name of geojson file that defines regional domain extent.

    cores : int, optional
        The number of cores to use for mask creation

    bounding_box : array_like of float, shape (4,), optional
        Bounding box [x_min, x_max, y_min, y_max] to cull mesh outside of
    """

    if bounding_box is not None and geojson_file is None:
        msg = (
            "Bounding box clipping can only be applied to an existing cull"
            "mask. You must provide a geojson file for this to work."
        )
        raise ValueError(msg)

    logger = self.logger
    section = self.config[section_name]

    logger.info('calling build_planar_mesh')
    build_planar_mesh(cell_width, x1, y1, geom_points,
                      geom_edges, logger=logger)
    dsMesh = xarray.open_dataset('base_mesh.nc')
    logger.info('culling mesh')
    dsMesh = cull(dsMesh, logger=logger)
    logger.info('converting to MPAS mesh')
    dsMesh = convert(dsMesh, logger=logger)
    logger.info('writing grid_converted.nc')
    write_netcdf(dsMesh, 'grid_converted.nc')
    levels = section.get('levels')
    args = ['create_landice_grid_from_generic_mpas_grid',
            '-i', 'grid_converted.nc',
            '-o', 'grid_preCull.nc',
            '-l', levels, '-v', 'glimmer']

    check_call(args, logger=logger)

    args = ['interpolate_to_mpasli_grid', '-s',
            gridded_dataset, '-d',
            'grid_preCull.nc', '-m', 'b', '-t']

    check_call(args, logger=logger)

    cullDistance = section.get('cull_distance')
    if float(cullDistance) > 0.:
        args = ['define_landice_cull_mask', '-f',
                'grid_preCull.nc', '-m',
                'distance', '-d', cullDistance]

        check_call(args, logger=logger)
    else:
        logger.info('cullDistance <= 0 in config file. '
                    'Will not cull by distance to margin. \n')

    if geojson_file is not None:
        # This step is only necessary because the GeoJSON region
        # is defined by lat-lon.
        args = ['set_lat_lon_fields_in_planar_grid', '-f',
                'grid_preCull.nc', '-p', projection]

        check_call(args, logger=logger)

        args = ['compute_mpas_region_masks',
                '-m', 'grid_preCull.nc',
                '-o', 'mask.nc',
                '-g', geojson_file,
                '--process_count', f'{cores}',
                '--format', mpas_tools.io.default_format,
                '--engine', mpas_tools.io.default_engine]

        check_call(args, logger=logger)

        logger.info('culling to geojson file')

    dsMesh = xarray.open_dataset('grid_preCull.nc')
    if geojson_file is not None:
        mask = xarray.open_dataset('mask.nc')

        if bounding_box is not None:
            mask = clip_mesh_to_bounding_box(mask, dsMesh, bounding_box)

    else:
        mask = None

    dsMesh = cull(dsMesh, dsInverse=mask, logger=logger)
    write_netcdf(dsMesh, 'culled.nc')

    logger.info('Marking horns for culling')
    args = ['mark_horns_for_culling', '-f', 'culled.nc']

    check_call(args, logger=logger)

    logger.info('culling, converting, and sorting')
    dsMesh = xarray.open_dataset('culled.nc')
    dsMesh = cull(dsMesh, logger=logger)
    dsMesh = convert(dsMesh, logger=logger)
    dsMesh = sort_mesh(dsMesh)
    write_netcdf(dsMesh, 'dehorned.nc')

    args = ['create_landice_grid_from_generic_mpas_grid', '-i',
            'dehorned.nc', '-o',
            mesh_name, '-l', levels, '-v', 'glimmer',
            '--beta', '--thermal', '--obs', '--diri']

    check_call(args, logger=logger)

    args = ['interpolate_to_mpasli_grid', '-s',
            gridded_dataset, '-d', mesh_name, '-m', 'b']

    check_call(args, logger=logger)

    logger.info('Marking domain boundaries dirichlet')
    args = ['mark_domain_boundaries_dirichlet',
            '-f', mesh_name]
    check_call(args, logger=logger)

    args = ['set_lat_lon_fields_in_planar_grid', '-f',
            mesh_name, '-p', projection]
    check_call(args, logger=logger)




[docs]
def make_region_masks(self, mesh_filename, mask_filename,
                      cores, tags, component='landice', all_tags=True):
    """
    Create masks for ice-sheet subregions based on data
    in ``MPAS-Dev/geometric_fatures``.

    Parameters
    ----------
    mesh_filename : str
        name of NetCDF mesh file for which to create region masks

    mask_filename : str
        name of NetCDF file to contain region masks

    cores : int
        number of processors used to create region masks

    tags : list of str
        Groups of regions for which masks are to be defined
    """

    logger = self.logger
    logger.info('creating region masks')
    gf = GeometricFeatures()
    fcMask = FeatureCollection()

    fc = gf.read(componentName=component, objectType='region',
                 tags=tags, allTags=all_tags)
    fcMask.merge(fc)

    geojson_filename = 'regionMask.geojson'
    fcMask.to_geojson(geojson_filename)

    args = ['compute_mpas_region_masks',
            '-m', mesh_filename,
            '-g', geojson_filename,
            '-o', mask_filename,
            '-t', 'cell', 'edge',
            '--process_count', f'{cores}',
            '--format', mpas_tools.io.default_format,
            '--engine', mpas_tools.io.default_engine]
    check_call(args, logger=logger)




[docs]
def add_bedmachine_thk_to_ais_gridded_data(self, source_gridded_dataset,
                                           bedmachine_path):
    """
    Copy BedMachine thickness to AIS reference gridded dataset.
    Replace thickness field in the compilation dataset with the one we
    will be using from BedMachine for actual thickness interpolation.
    There are significant inconsistencies between the masking of the two,
    particularly along the Antarctic Peninsula, that lead to funky
    mesh extent and culling if we use the thickness from 8km composite
    dataset to define the cullMask but then actually interpolate thickness
    from BedMachine.
    This function uses bilinear interpolation to interpolate from the 500 m
    resolution of BedMachine to the 8 km resolution of the reference dataset.
    It is not particularly accurate, but is fast and adequate for generating
    the flood filled mask for culling the mesh.  Highly accurate conservative
    remapping is performed later for actually interpolating BedMachine
    thickness to the final MALI mesh.

    Parameters
    ----------
    source_gridded_dataset : str
        name of NetCDF file containing original AIS gridded datasets

    bedmachine_path : str
        path to BedMachine dataset

    Returns
    -------
    gridded_dataset_with_bm_thk : str
        name of NetCDF file with gridded dataset with BedMachine thk added
    """

    logger = self.logger

    tic = time.perf_counter()
    bm_data = Dataset(bedmachine_path, 'r')
    bm_x = bm_data.variables['x'][:]
    bm_y = bm_data.variables['y'][:]
    bm_mask = bm_data.variables['iceMask'][:]
    bm_thk = bm_data.variables['thk'][:]
    # BedMachine v2 includes a mask with: 0=ocean, 1=land, 2=grd ice
    #                                  3=flt ice, 4=vostok
    # NOTE: Later versions of BedMachine may not have the same mask values!
    # We only want to keep thickness where the mask has ice;
    # this is necessary because thickness has been extrapolated.
    bm_thk *= (bm_mask > 1.5)
    # The two datasets are oriented differently, so align them.
    bm_thk = np.flipud(np.rot90(bm_thk))
    gridded_dataset_with_bm_thk = \
        f"{source_gridded_dataset.split('.')[:-1][0]}_BedMachineThk.nc"
    copyfile(source_gridded_dataset, gridded_dataset_with_bm_thk)
    gg = Dataset(gridded_dataset_with_bm_thk, 'r+')
    gg_x = gg.variables['x1'][:]
    gg_y = gg.variables['y1'][:]
    gg_xx, gg_yy = np.meshgrid(gg_x, gg_y)
    gg_thk = interpn((bm_x, bm_y), bm_thk, (gg_xx, gg_yy),
                     bounds_error=False, fill_value=0.0)
    gg.variables['thk'][0, :, :] = gg_thk
    gg.close()
    bm_data.close()
    toc = time.perf_counter()
    logger.info('Finished interpolating BedMachine thickness to reference '
                f'grid in {toc - tic} seconds')
    return gridded_dataset_with_bm_thk




[docs]
def preprocess_ais_data(self, source_gridded_dataset, floodFillMask):
    """
    Perform adjustments to gridded AIS datasets needed
    for rest of compass workflow to utilize them

    Parameters
    ----------
    source_gridded_dataset : str
        name of NetCDF file containing original AIS gridded datasets

    floodFillMask : numpy.ndarray
        0/1 mask of flood filled ice region

    Returns
    -------
    preprocessed_gridded_dataset : str
        name of NetCDF file with preprocessed version of gridded dataset
    """
    logger = self.logger

    def _nearest_fill_from_valid(field2d, valid_mask):
        """
        Fill invalid cells in a 2D regular raster using the value from the
        nearest valid cell on the same grid.

        Parameters
        ----------
        field2d : numpy.ndarray
            2D field to be filled
        valid_mask : numpy.ndarray
            Boolean mask where True marks valid cells

        Returns
        -------
        filled : numpy.ndarray
            Copy of field2d with invalid cells filled
        """
        valid_mask = np.asarray(valid_mask, dtype=bool)

        if field2d.shape != valid_mask.shape:
            raise ValueError('field2d and valid_mask must have the same shape')

        if not np.any(valid_mask):
            raise ValueError('No valid cells available for nearest fill.')

        # For EDT, foreground=True cells get mapped to nearest background=False
        # cell when return_indices=True. So we pass ~valid_mask.
        nearest_inds = distance_transform_edt(
            ~valid_mask, return_distances=False, return_indices=True
        )

        filled = np.array(field2d, copy=True)
        invalid = ~valid_mask
        filled[invalid] = field2d[
            nearest_inds[0, invalid],
            nearest_inds[1, invalid]
        ]
        return filled

    # Apply floodFillMask to thickness field to help with culling
    file_with_flood_fill = \
        f"{source_gridded_dataset.split('.')[:-1][0]}_floodFillMask.nc"
    copyfile(source_gridded_dataset, file_with_flood_fill)

    gg = Dataset(file_with_flood_fill, 'r+')
    gg.variables['thk'][0, :, :] *= floodFillMask
    gg.variables['vx'][0, :, :] *= floodFillMask
    gg.variables['vy'][0, :, :] *= floodFillMask
    gg.close()

    # Now deal with the peculiarities of the AIS dataset.
    preprocessed_gridded_dataset = \
        f"{file_with_flood_fill.split('.')[:-1][0]}_filledFields.nc"
    copyfile(file_with_flood_fill, preprocessed_gridded_dataset)

    data = Dataset(preprocessed_gridded_dataset, 'r+')
    data.set_auto_mask(False)

    x1 = data.variables["x1"][:]
    y1 = data.variables["y1"][:]

    thk = data.variables["thk"][0, :, :]
    cellsWithIce = thk > 0.0

    data.createVariable('iceMask', 'f', ('time', 'y1', 'x1'))
    data.variables['iceMask'][:] = data.variables["thk"][:] > 0.0

    # Note: dhdt is only reported over grounded ice, so we will have to
    # either update the dataset to include ice shelves or give them values of
    # 0 with reasonably large uncertainties.
    dHdt = data.variables["dhdt"][:]
    dHdtErr = 0.05 * dHdt  # assign arbitrary uncertainty of 5%
    # Where dHdt data are missing, set large uncertainty
    dHdtErr[dHdt > 1.e30] = 1.0

    # Extrapolate fields beyond region with ice to avoid interpolation
    # artifacts of undefined values outside the ice domain.
    #
    # The masks below are masks of valid cells.
    bigTic = time.perf_counter()
    for field in ['thk', 'bheatflx', 'vx', 'vy',
                  'ex', 'ey', 'thkerr', 'dhdt']:
        tic = time.perf_counter()
        logger.info(f'Beginning nearest-fill preprocessing for {field}')

        field2d = data.variables[field][0, :, :]

        if field in ['thk', 'thkerr']:
            valid_mask = cellsWithIce
        elif field == 'bheatflx':
            valid_mask = np.logical_and(field2d < 1.0e9, field2d != 0.0)
        elif field in ['vx', 'vy', 'ex', 'ey', 'dhdt']:
            valid_mask = np.logical_and(field2d < 1.0e9, cellsWithIce)
        else:
            valid_mask = cellsWithIce

        logger.info(f'{field}: {valid_mask.sum()} valid cells, '
                    f'{(~valid_mask).sum()} cells to fill')

        filled2d = _nearest_fill_from_valid(field2d, valid_mask)
        data.variables[field][0, :, :] = filled2d

        toc = time.perf_counter()
        logger.info(f'Nearest-fill preprocessing for {field} completed in '
                    f'{toc - tic:.3f} seconds')

    bigToc = time.perf_counter()
    logger.info(f'All nearest-fill preprocessing completed in '
                f'{bigToc - bigTic:.3f} seconds.')

    # Now perform some additional clean up adjustments to the dataset
    data.createVariable('dHdtErr', 'f', ('time', 'y1', 'x1'))
    data.variables['dHdtErr'][:] = dHdtErr

    data.createVariable('vErr', 'f', ('time', 'y1', 'x1'))
    data.variables['vErr'][:] = np.sqrt(data.variables['ex'][:]**2 +
                                        data.variables['ey'][:]**2)

    data.variables['bheatflx'][:] *= 1.e-3  # correct units
    data.variables['bheatflx'].units = 'W m-2'

    data.variables['subm'][:] *= -1.0  # correct basal melting sign
    data.variables['subm_ss'][:] *= -1.0
    data.renameVariable('dhdt', 'dHdt')
    data.renameVariable('thkerr', 'topgerr')

    data.createVariable('x', 'f', ('x1'))
    data.createVariable('y', 'f', ('y1'))
    data.variables['x'][:] = x1
    data.variables['y'][:] = y1

    data.close()

    return preprocessed_gridded_dataset



# Common helpers shared by build_dst_scrip_hull,
# add_grid_imask_from_dst_scrip_hull, and the diagnostic plotting code.

LANDICE_PROJECTIONS = {
    'greenland': (
        '+proj=stere +lat_ts=70.0 +lat_0=90 +lon_0=315.0 +k_0=1.0 '
        '+x_0=0.0 +y_0=0.0 +ellps=WGS84'
    ),
    'antarctica': (
        '+proj=stere +lat_ts=-71.0 +lat_0=-90 +lon_0=0.0 +k_0=1.0 '
        '+x_0=0.0 +y_0=0.0 +ellps=WGS84'
    ),
}
LANDICE_PROJECTIONS['gis-gimp'] = LANDICE_PROJECTIONS['greenland']
LANDICE_PROJECTIONS['ais-bedmap2'] = LANDICE_PROJECTIONS['antarctica']


def _resolve_mesh_crs(domain, mesh_crs):
    """Return *mesh_crs* resolved from *domain* if it was ``None``."""
    if mesh_crs is not None:
        return mesh_crs
    if domain not in LANDICE_PROJECTIONS:
        raise ValueError(
            f"Unknown domain '{domain}'. Expected one of "
            f"{list(LANDICE_PROJECTIONS.keys())}, or supply mesh_crs "
            f"explicitly."
        )
    return LANDICE_PROJECTIONS[domain]


def _maybe_deg(lon, lat):
    """
    Convert lon/lat from radians to degrees if they appear to be in radians.
    """
    if (np.nanmax(np.abs(lon)) <= 2.0 * np.pi + 1.0e-6 and
            np.nanmax(np.abs(lat)) <= 0.5 * np.pi + 1.0e-6):
        lon = np.rad2deg(lon)
        lat = np.rad2deg(lat)
    return lon, lat



[docs]
def build_dst_scrip_hull(dest_scrip, domain, buffer_m=50e3,
                         source_crs='EPSG:4326', mesh_crs=None,
                         logger=None):
    """
    Build a buffered concave boundary from the footprint of a destination
    SCRIP file and return it as a :py:class:`matplotlib.path.Path`.

    The boundary is constructed by rasterising the destination mesh points
    onto a coarse grid (10 km cell size), dilating by ``buffer_m``, and
    extracting the outer contour. This produces a tight, concave mask that
    follows the actual domain shape (including bays and fjords) while
    remaining computationally inexpensive (< 1 s for any realistic mesh).

    This is factored out of
    :py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull` so
    that the boundary can be computed once and reused across multiple source
    datasets that share the same destination mesh, avoiding redundant I/O
    and computation.

    Parameters
    ----------
    dest_scrip : str
        Destination SCRIP file

    domain : str
        Projection domain. Recognised convenience keys: 'greenland',
        'gis-gimp', 'antarctica', 'ais-bedmap2'. Any other value requires
        mesh_crs to be supplied explicitly.

    buffer_m : float, optional
        Outward buffer distance in meters (default 50 km)

    source_crs : str, optional
        CRS of lon/lat variables in the SCRIP file (default 'EPSG:4326')

    mesh_crs : str, optional
        Proj4/EPSG string for planar coordinates. Derived from domain if None.

    logger : logging.Logger, optional
        Logger for status messages; falls back to print if None.

    Returns
    -------
    hull_path : matplotlib.path.Path
        Buffered concave boundary of the destination mesh footprint.
    """

    def _log(msg):
        if logger is not None:
            logger.info(msg)
        else:
            print(msg)

    mesh_crs = _resolve_mesh_crs(domain, mesh_crs)
    transformer = Transformer.from_crs(source_crs, mesh_crs, always_xy=True)

    def _projected_dest_points(ds):
        if ('grid_corner_x' in ds.variables and
                'grid_corner_y' in ds.variables):
            x = ds['grid_corner_x'].values
            y = ds['grid_corner_y'].values
            _log('Using existing projected destination corners.')
        elif ('grid_corner_lon' in ds.variables and
              'grid_corner_lat' in ds.variables):
            lon = ds['grid_corner_lon'].values
            lat = ds['grid_corner_lat'].values
            lon, lat = _maybe_deg(lon, lat)
            x, y = transformer.transform(lon, lat)
            _log('Projected destination corners from lon/lat.')
        elif ('grid_center_x' in ds.variables and
              'grid_center_y' in ds.variables):
            x = ds['grid_center_x'].values
            y = ds['grid_center_y'].values
            _log('Using existing projected destination centers.')
        else:
            lon = ds['grid_center_lon'].values
            lat = ds['grid_center_lat'].values
            lon, lat = _maybe_deg(lon, lat)
            x, y = transformer.transform(lon, lat)
            _log('Projected destination centers from lon/lat.')
        pts = np.column_stack([np.ravel(x), np.ravel(y)])
        pts = pts[np.all(np.isfinite(pts), axis=1)]
        return pts

    with xarray.open_dataset(dest_scrip) as ds_dst:
        dst_points = _projected_dest_points(ds_dst)

    if dst_points.shape[0] < 3:
        raise ValueError(
            'Not enough destination points to build a boundary.')

    # --- Rasterize-dilate-contour approach ---
    grid_spacing = 10e3  # 10 km cell size for the coarse raster

    x_min = dst_points[:, 0].min() - buffer_m
    x_max = dst_points[:, 0].max() + buffer_m
    y_min = dst_points[:, 1].min() - buffer_m
    y_max = dst_points[:, 1].max() + buffer_m

    nx = int(np.ceil((x_max - x_min) / grid_spacing)) + 1
    ny = int(np.ceil((y_max - y_min) / grid_spacing)) + 1

    # Bin destination points onto the coarse grid
    ix = ((dst_points[:, 0] - x_min) / grid_spacing).astype(int)
    iy = ((dst_points[:, 1] - y_min) / grid_spacing).astype(int)
    ix = np.clip(ix, 0, nx - 1)
    iy = np.clip(iy, 0, ny - 1)
    occupancy = np.zeros((ny, nx), dtype=bool)
    occupancy[iy, ix] = True

    # Build a disk structuring element for dilation
    radius_px = int(np.ceil(buffer_m / grid_spacing))
    yy, xx = np.ogrid[-radius_px:radius_px + 1,
                      -radius_px:radius_px + 1]
    disk = (xx ** 2 + yy ** 2) <= radius_px ** 2

    # Dilate the occupancy mask
    dilated = binary_dilation(occupancy, structure=disk)

    # Pad with False so the contour never exits the grid boundary
    # (prevents straight-line artifacts from open contour paths)
    dilated_padded = np.pad(dilated, pad_width=1,
                            constant_values=False)

    # Extract the outer contour at the 0.5 level
    fig_tmp, ax_tmp = plt.subplots()
    cs = ax_tmp.contour(dilated_padded.astype(float), levels=[0.5])
    plt.close(fig_tmp)

    # Find the longest contour path (outer boundary)
    all_paths = cs.allsegs[0]
    if not all_paths:
        raise ValueError(
            'Could not extract a contour from the dilated mask.')
    longest = max(all_paths, key=lambda p: len(p))

    # Convert pixel coordinates back to projected coordinates
    # (subtract 1 to account for the padding pixel)
    poly_x = x_min + (longest[:, 0] - 1) * grid_spacing
    poly_y = y_min + (longest[:, 1] - 1) * grid_spacing
    boundary_pts = np.column_stack([poly_x, poly_y])

    _log(f'destination boundary x extent: '
         f'{boundary_pts[:, 0].min():.0f} to '
         f'{boundary_pts[:, 0].max():.0f}')
    _log(f'destination boundary y extent: '
         f'{boundary_pts[:, 1].min():.0f} to '
         f'{boundary_pts[:, 1].max():.0f}')
    _log(f'boundary polygon vertices: {len(boundary_pts)}')

    return Path(boundary_pts, closed=True)




[docs]
def add_grid_imask_from_dst_scrip_hull(source_scrip, dest_scrip,
                                       masked_source_scrip, domain,
                                       buffer_m=50e3,
                                       source_crs='EPSG:4326',
                                       mesh_crs=None,
                                       hull_path=None,
                                       logger=None):
    """
    Create a new source SCRIP file with grid_imask set to 1 only for cells
    that fall within the boundary (plus buffer) of the destination SCRIP
    footprint. This dramatically reduces the work ESMF_RegridWeightGen must do
    when the source dataset is much larger than the destination mesh.

    Parameters
    ----------
    source_scrip : str
        Input source SCRIP file

    dest_scrip : str
        Destination SCRIP file used by ESMF_RegridWeightGen. Ignored if
        hull_path is provided.

    masked_source_scrip : str
        Output source SCRIP file with updated grid_imask

    domain : str
        Projection domain. Recognised convenience keys: 'greenland',
        'gis-gimp', 'antarctica', 'ais-bedmap2'. Any other value requires
        mesh_crs to be supplied explicitly.

    buffer_m : float, optional
        Approximate outward hull expansion in meters. Ignored if hull_path
        is provided.

    source_crs : str, optional
        CRS of lon/lat variables in SCRIP files (default 'EPSG:4326')

    mesh_crs : str, optional
        Proj4/EPSG string for planar coordinates. Derived from domain if None.

    hull_path : matplotlib.path.Path or None, optional
        Pre-built buffered concave boundary of the destination mesh footprint,
        as returned by :py:func:`compass.landice.mesh.build_dst_scrip_hull`.
        When provided, dest_scrip is not read and the boundary is not
        recomputed, which is useful when masking multiple source datasets
        against the same destination mesh.

    logger : logging.Logger, optional
        Logger for status messages; falls back to print if None.
    """

    def _log(msg):
        if logger is not None:
            logger.info(msg)
        else:
            print(msg)

    mesh_crs = _resolve_mesh_crs(domain, mesh_crs)
    transformer = Transformer.from_crs(source_crs, mesh_crs, always_xy=True)

    def _projected_centers(ds):
        if ('grid_center_x' in ds.variables and
                'grid_center_y' in ds.variables):
            xc = ds['grid_center_x'].values
            yc = ds['grid_center_y'].values
            _log('Using existing projected source centers.')
        else:
            lon = ds['grid_center_lon'].values
            lat = ds['grid_center_lat'].values
            lon, lat = _maybe_deg(lon, lat)
            xc, yc = transformer.transform(lon, lat)
            _log('Projected source centers from lon/lat.')
        return np.asarray(xc), np.asarray(yc)

    if hull_path is None:
        hull_path = build_dst_scrip_hull(
            dest_scrip=dest_scrip,
            domain=domain,
            buffer_m=buffer_m,
            source_crs=source_crs,
            mesh_crs=mesh_crs,
            logger=logger)

    with xarray.open_dataset(source_scrip) as ds_src:
        xc, yc = _projected_centers(ds_src)

        _log(f'source x extent: {np.nanmin(xc):.0f} to {np.nanmax(xc):.0f}')
        _log(f'source y extent: {np.nanmin(yc):.0f} to {np.nanmax(yc):.0f}')

        src_pts = np.column_stack([xc, yc])
        inside = hull_path.contains_points(src_pts).astype(np.int32)
        _log(f'active source cells after masking: '
             f'{inside.sum()} / {inside.size}')

        ds_out = ds_src.copy()
        ds_out['grid_imask'] = xarray.DataArray(
            inside,
            dims=('grid_size',),
            attrs={'long_name': '0/1 mask for active source cells'}
        )
        ds_out.to_netcdf(masked_source_scrip, mode='w')




[docs]
def plot_hull_diagnostic(hull_path, dest_scrip, source_bbox_files,
                         domain, masked_scrip=None, logger=None):
    """
    Save a diagnostic PNG (``mesh_boundary.png``) showing the masking geometry.

    Parameters
    ----------
    hull_path : matplotlib.path.Path
        Buffered concave boundary of the destination mesh.

    dest_scrip : str
        Destination SCRIP file (used to scatter MALI domain extent).

    source_bbox_files : list of tuple
        Each entry is ``(filepath, label, edgecolor)`` for a source dataset
        whose bounding box should be drawn.

    domain : str
        Projection domain key (e.g. 'greenland', 'antarctica') or a
        proj4/EPSG string used to build the planar transformer.

    masked_scrip : str or None, optional
        Path to a masked source SCRIP file. If provided and the file exists,
        active (masked-in) source cells are plotted as an ice-sheet extent
        indicator.

    logger : logging.Logger, optional
        Logger for status messages; falls back to print if None.
    """
    def _log(msg):
        if logger is not None:
            logger.info(msg)
        else:
            print(msg)

    try:
        from matplotlib.patches import Polygon as MplPolygon
        from matplotlib.patches import Rectangle

        mesh_crs = LANDICE_PROJECTIONS.get(domain, domain)
        transformer = Transformer.from_crs(
            'EPSG:4326', mesh_crs, always_xy=True)

        # --- Load active source cells from masked SCRIP (if available) ---
        active_xc = np.array([])
        active_yc = np.array([])
        if masked_scrip is not None and os.path.exists(masked_scrip):
            with xarray.open_dataset(masked_scrip) as ds_m:
                imask = ds_m['grid_imask'].values.astype(bool)
                if ('grid_center_x' in ds_m.variables and
                        'grid_center_y' in ds_m.variables):
                    active_xc = ds_m['grid_center_x'].values[imask]
                    active_yc = ds_m['grid_center_y'].values[imask]
                elif ('grid_center_lon' in ds_m.variables and
                      'grid_center_lat' in ds_m.variables):
                    lon = ds_m['grid_center_lon'].values[imask]
                    lat = ds_m['grid_center_lat'].values[imask]
                    lon, lat = _maybe_deg(lon, lat)
                    active_xc, active_yc = transformer.transform(lon, lat)
                    active_xc = np.asarray(active_xc)
                    active_yc = np.asarray(active_yc)

        # --- destination SCRIP centers (MALI domain extent) ---
        with xarray.open_dataset(dest_scrip) as ds_dst:
            if ('grid_center_x' in ds_dst.variables and
                    'grid_center_y' in ds_dst.variables):
                xd = np.asarray(ds_dst['grid_center_x'].values)
                yd = np.asarray(ds_dst['grid_center_y'].values)
            else:
                lon = ds_dst['grid_center_lon'].values
                lat = ds_dst['grid_center_lat'].values
                lon, lat = _maybe_deg(lon, lat)
                xd, yd = transformer.transform(lon, lat)
                xd, yd = np.asarray(xd), np.asarray(yd)

        rng = np.random.default_rng(seed=0)

        def _subsample(x, y, n=50000):
            idx = np.where(np.isfinite(x) & np.isfinite(y))[0]
            if len(idx) > n:
                idx = rng.choice(idx, size=n, replace=False)
            return x[idx], y[idx]

        def _src_extent(filepath):
            """Return (x_min, x_max, y_min, y_max) from a gridded dataset."""
            with xarray.open_dataset(filepath) as ds:
                if 'x1' in ds and 'y1' in ds:
                    x, y = ds['x1'].values, ds['y1'].values
                elif 'x' in ds and 'y' in ds:
                    x, y = ds['x'].values, ds['y'].values
                else:
                    return None
            return float(x.min()), float(x.max()), \
                float(y.min()), float(y.max())

        fig, ax = plt.subplots(figsize=(10, 10))

        # MALI domain extent (tab:pink)
        xs, ys = _subsample(xd, yd)
        ax.scatter(xs, ys, s=1.5, color='tab:pink', alpha=0.6,
                   label='MALI cells (subsampled)', rasterized=True)

        # source bounding boxes
        for filepath, label, color in source_bbox_files:
            ext = _src_extent(filepath)
            if ext is None:
                continue
            x0, x1, y0, y1 = ext
            bbox = Rectangle(
                (x0, y0), x1 - x0, y1 - y0,
                linewidth=1.5, edgecolor=color,
                facecolor='none', label=label)
            ax.add_patch(bbox)

        # mesh boundary (concave, buffered)
        hull_verts = hull_path.vertices
        hull_patch = MplPolygon(
            hull_verts, closed=True,
            linewidth=2, edgecolor='tab:green',
            facecolor='none', label='mesh boundary (buffered)')
        ax.add_patch(hull_patch)

        ax.set_aspect('equal')
        ax.set_xlabel('x (m)')
        ax.set_ylabel('y (m)')
        ax.legend(loc='best', markerscale=6)
        ax.set_title('Source SCRIP masking: boundary vs. domain')
        fig.savefig('mesh_boundary.png', dpi=150,
                    bbox_inches='tight', facecolor=fig.get_facecolor())
        plt.close(fig)
        _log('Saved masking diagnostic plot to mesh_boundary.png')
    except Exception as exc:
        _log(f'Warning: could not save mesh boundary plot: {exc}')




[docs]
def interp_gridded2mali(self, source_file, mali_scrip, parallel_executable,
                        nProcs, dest_file, proj, variables="all",
                        hull_path=None):
    """
    Interpolate gridded dataset (e.g. MEASURES, BedMachine) onto a MALI mesh

    Parameters
    ----------
    source_file : str
        filepath to the source gridded datatset to be interpolated

    mali_scrip : str
        name of scrip file corresponding to destination MALI mesh

    parallel_executable : str
        executable needed to launch a parallel job

    nProcs : int
        number of processors to use for generating remapping weights

    dest_file: str
        MALI input file to which data should be remapped

    proj: str
        projection of the source dataset

    variables: "all" or list of strings
        either the string "all" or a list of strings

    hull_path : matplotlib.path.Path or None, optional
        Pre-built buffered concave boundary of the destination mesh footprint,
        as returned by :py:func:`compass.landice.mesh.build_dst_scrip_hull`.
        When provided, the boundary is not recomputed from mali_scrip, which
        avoids redundant I/O and computation when this function is called
        multiple times with the same destination mesh.

    Returns
    -------
    masked_source_scrip : str
        Path to the masked source SCRIP file written during interpolation.
    """

    logger = self.logger

    bare = os.path.splitext(os.path.basename(source_file))[0]
    match = re.search(r'(^.*[_-]v\d*[_-])+', bare)
    if match:
        scrip_stem = bare[:match.end() - 1]
    else:
        scrip_stem = bare
    source_scrip = f'{scrip_stem}.scrip.nc'
    weights_filename = "gridded_to_MPAS_weights.nc"

    # make sure variables is a list, encompasses the variables="all" case
    if isinstance(variables, str):
        variables = [variables]
    if not isinstance(variables, list):
        raise TypeError("Arugment 'variables' is of incorrect type, must"
                        " either the string 'all' or a list of strings")

    logger.info('creating scrip file for source dataset')
    # Note: writing scrip file to workdir
    args = ['create_scrip_file_from_planar_rectangular_grid',
            '-i', source_file,
            '-s', source_scrip,
            '-p', proj,
            '-r', '2']
    check_call(args, logger=logger)

    # Mask source SCRIP to the footprint of the destination mesh so that
    # ESMF_RegridWeightGen only processes the cells that overlap the target.
    stem = os.path.splitext(source_scrip)[0]  # strips .nc
    masked_source_scrip = f'{stem}_masked.nc'
    logger.info('masking source SCRIP to destination mesh footprint')
    add_grid_imask_from_dst_scrip_hull(
        source_scrip=source_scrip,
        dest_scrip=mali_scrip,
        masked_source_scrip=masked_source_scrip,
        domain=proj,
        hull_path=hull_path,
        logger=logger)

    # Generate remapping weights
    logger.info('generating gridded dataset -> MPAS weights')
    args = [parallel_executable, '-n', nProcs, 'ESMF_RegridWeightGen',
            '--source', masked_source_scrip,
            '--destination', mali_scrip,
            '--weight', weights_filename,
            '--method', 'conserve',
            "--netcdf4",
            "--dst_regional", "--src_regional", '--ignore_unmapped']
    check_call(args, logger=logger)

    # Perform actual interpolation using the weights
    logger.info('calling interpolate_to_mpasli_grid')
    args = ['interpolate_to_mpasli_grid',
            '-s', source_file,
            '-d', dest_file,
            '-m', 'e',
            '-w', weights_filename,
            '-v'] + variables

    check_call(args, logger=logger)

    return masked_source_scrip




[docs]
def clean_up_after_interp(fname):
    """
    Perform some final clean up steps after interpolation

    Parameters
    ----------
    fname : str
        name of file on which to perform clean up
    """

    # Create a backup in case clean-up goes awry
    backup_name = f"{fname.split('.')[:-1][0]}_backup.nc"
    copyfile(fname, backup_name)

    # Clean up: trim to iceMask and set large velocity
    # uncertainties where appropriate.
    data = Dataset(fname, 'r+')
    data.set_auto_mask(False)
    data.variables['thickness'][:] *= (data.variables['iceMask'][:] > 1.5)

    mask = np.logical_or(
        np.isnan(data.variables['observedSurfaceVelocityUncertainty'][:]),
        data.variables['thickness'][:] < 1.0)
    mask = np.logical_or(
        mask,
        data.variables['observedSurfaceVelocityUncertainty'][:] == 0.0)
    data.variables['observedSurfaceVelocityUncertainty'][0, mask[0, :]] = 1.0
    data.close()




[docs]
def get_optional_interp_datasets(section, logger):
    """
    Determine whether optional interpolation inputs are configured.

    Parameters
    ----------
    section : configparser.SectionProxy
        Config section containing optional interpolation options

    logger : logging.Logger
        Logger for status messages

    Returns
    -------
    bedmachine_dataset : str or None
        Path to BedMachine dataset if configured, otherwise ``None``

    measures_dataset : str or None
        Path to MEaSUREs dataset if configured, otherwise ``None``
    """

    def _specified(value):
        return value is not None and str(value).strip().lower() not in [
            '', 'none']

    data_path = section.get('data_path', fallback=None)
    bedmachine_filename = section.get('bedmachine_filename', fallback=None)
    measures_filename = section.get('measures_filename', fallback=None)

    use_bedmachine_interp = _specified(data_path) and \
        _specified(bedmachine_filename)
    use_measures_interp = _specified(data_path) and \
        _specified(measures_filename)

    if use_bedmachine_interp:
        bedmachine_dataset = os.path.join(data_path, bedmachine_filename)
    else:
        bedmachine_dataset = None
        logger.info('Skipping BedMachine interpolation because '
                    '`data_path` and/or `bedmachine_filename` are '
                    'not specified in config.')

    if use_measures_interp:
        measures_dataset = os.path.join(data_path, measures_filename)
    else:
        measures_dataset = None
        logger.info('Skipping MEaSUREs interpolation because '
                    '`data_path` and/or `measures_filename` are '
                    'not specified in config.')

    return bedmachine_dataset, measures_dataset




[docs]
def get_mesh_config_bounding_box(section, default_bounds=None):
    """
    Get bounding-box coordinates from a mesh config section.

    Parameters
    ----------
    section : configparser.SectionProxy
        Mesh config section containing ``x_min``, ``x_max``, ``y_min``,
        and ``y_max``

    default_bounds : list of float, optional
        Default bounds in the form ``[x_min, x_max, y_min, y_max]`` to use
        when config values are missing or set to ``None``

    Returns
    -------
    bounding_box : list of float
        Bounding box in the form ``[x_min, x_max, y_min, y_max]``
    """

    if default_bounds is None:
        default_bounds = [None, None, None, None]

    def _get_bound(option, default):
        value = section.get(option, fallback=None)
        if value is None or str(value).strip().lower() in ['', 'none']:
            if default is None:
                raise ValueError(
                    f'Missing required config option `{option}` and no '
                    'default was provided.')
            return float(default)
        return float(value)

    return [
        _get_bound('x_min', default_bounds[0]),
        _get_bound('x_max', default_bounds[1]),
        _get_bound('y_min', default_bounds[2]),
        _get_bound('y_max', default_bounds[3])]




[docs]
def subset_gridded_dataset_to_bounds(
        source_dataset, bounding_box, subset_tag, logger):
    """
    Subset a gridded source dataset to a bounding box.

    Parameters
    ----------
    source_dataset : str
        Path to source gridded dataset

    bounding_box : list of float
        Bounding box in the form ``[x_min, x_max, y_min, y_max]``

    subset_tag : str
        Tag to include in the subset filename

    logger : logging.Logger
        Logger for status messages

    Returns
    -------
    subset_dataset : str
        Path to subsetted gridded dataset written to the current directory
    """

    x_min, x_max, y_min, y_max = bounding_box
    ds = xarray.open_dataset(source_dataset)

    if 'x1' in ds and 'y1' in ds:
        x_name = 'x1'
        y_name = 'y1'
    elif 'x' in ds and 'y' in ds:
        x_name = 'x'
        y_name = 'y'
    else:
        ds.close()
        raise ValueError(
            f'Could not find x/y coordinates in {source_dataset}. '
            'Expected either x1/y1 or x/y.')

    subset = ds.where(
        (ds[x_name] >= x_min) & (ds[x_name] <= x_max) &
        (ds[y_name] >= y_min) & (ds[y_name] <= y_max),
        drop=True)

    # Check for empty subset, handling possible mismatch
    # between variable and dimension names
    x_dim = x_name if x_name in subset.sizes else (
        'x' if 'x' in subset.sizes else None)
    y_dim = y_name if y_name in subset.sizes else (
        'y' if 'y' in subset.sizes else None)
    if x_dim is None or y_dim is None or subset.sizes[x_dim] == 0 or subset.sizes[y_dim] == 0:  # noqa
        subset.close()
        ds.close()
        raise ValueError(
            f'Bounding box {bounding_box} produced an empty subset for '
            f'{source_dataset}. Dimension names in subset: '
            f'{list(subset.sizes.keys())}')

    base = os.path.splitext(os.path.basename(source_dataset))[0]
    unique_id = uuid.uuid4().hex
    subset_dataset = f'{base}_{subset_tag}_{unique_id}_subset.nc'
    logger.info(f'Writing subset dataset: {subset_dataset}')
    subset.to_netcdf(subset_dataset)

    subset.close()
    ds.close()
    return subset_dataset




[docs]
def run_optional_interpolation(
        self, mesh_filename, src_proj, parallel_executable, nProcs,
        bedmachine_dataset=None, measures_dataset=None, subset_bounds=None):
    """
    Run optional interpolation from high-resolution BedMachine and MEaSUREs
    datasets and perform some necessary cleanup. This can require many
    more resources than the rest of the mesh generation process, so it is
    usually desirable to skip this step when prototyping meshes. This step
    is only run if `interpolate_data` is set to True in the config file
    and the necessary dataset paths are provided.

    Parameters
    ----------
    self : compass.step.Step
        Step instance providing logger and context

    mesh_filename : str
        Destination MALI mesh file to interpolate to

    src_proj : str
        Source dataset projection for SCRIP generation

    parallel_executable : str
        Parallel launcher executable (e.g. ``srun``/``mpirun``)

    nProcs : int or str
        Number of processes for regridding weight generation

    bedmachine_dataset : str, optional
        BedMachine dataset path; if ``None`` this interpolation is skipped

    measures_dataset : str, optional
        MEaSUREs dataset path; if ``None`` this interpolation is skipped

    subset_bounds : list of float, optional
        Optional source-dataset subset bounds in the form
        ``[x_min, x_max, y_min, y_max]``. If provided, BedMachine and
        MEaSUREs datasets are subsetted before SCRIP generation and
        interpolation.
    """

    logger = self.logger
    do_optional_interp = bedmachine_dataset is not None or \
        measures_dataset is not None
    if not do_optional_interp:
        return

    if nProcs is None:
        raise ValueError("nProcs must be provided as an int or str")
    nProcs = str(nProcs)

    subset_files = []

    try:
        if subset_bounds is not None:
            if bedmachine_dataset is not None:
                bedmachine_dataset = subset_gridded_dataset_to_bounds(
                    bedmachine_dataset,
                    subset_bounds,
                    'bedmachine',
                    logger)
                subset_files.append(bedmachine_dataset)
            if measures_dataset is not None:
                measures_dataset = subset_gridded_dataset_to_bounds(
                    measures_dataset,
                    subset_bounds,
                    'measures',
                    logger)
                subset_files.append(measures_dataset)

        logger.info('creating scrip file for destination mesh')
        mesh_base = os.path.splitext(mesh_filename)[0]
        dst_scrip_file = f'{mesh_base}_scrip.nc'
        scrip_from_mpas(mesh_filename, dst_scrip_file)

        # Build the destination boundary once so it can be reused for both
        # BedMachine and MEaSUREs without re-reading dst_scrip_file.
        logger.info('building destination mesh boundary for source masking')
        hull_path = build_dst_scrip_hull(
            dest_scrip=dst_scrip_file,
            domain=src_proj,
            logger=logger)

        bm_masked_scrip = None
        if bedmachine_dataset is not None:
            bm_masked_scrip = interp_gridded2mali(
                self, bedmachine_dataset, dst_scrip_file,
                parallel_executable, nProcs,
                mesh_filename, src_proj, variables='all',
                hull_path=hull_path)

        if measures_dataset is not None:
            measures_vars = ['observedSurfaceVelocityX',
                             'observedSurfaceVelocityY',
                             'observedSurfaceVelocityUncertainty']
            interp_gridded2mali(self, measures_dataset, dst_scrip_file,
                                parallel_executable, nProcs,
                                mesh_filename, src_proj,
                                variables=measures_vars,
                                hull_path=hull_path)

        # Diagnostic plot: show hull, MALI domain, and bounding boxes for
        # all source datasets that were interpolated.
        if bedmachine_dataset is not None or measures_dataset is not None:
            bbox_files = []
            if bedmachine_dataset is not None:
                bbox_files.append((
                    bedmachine_dataset,
                    'BedMachine bounding box',
                    'black'))
            if measures_dataset is not None:
                bbox_files.append((
                    measures_dataset,
                    'MEaSUREs bounding box',
                    'grey'))

            plot_hull_diagnostic(
                hull_path=hull_path,
                dest_scrip=dst_scrip_file,
                source_bbox_files=bbox_files,
                domain=src_proj,
                masked_scrip=bm_masked_scrip,
                logger=logger)

        clean_up_after_interp(mesh_filename)
    finally:
        for subset_file in subset_files:
            if os.path.exists(subset_file):
                logger.info(f'Removing subset dataset: {subset_file}')
                try:
                    os.remove(subset_file)
                except OSError as exc:
                    logger.warning('Could not remove subset dataset '
                                   f'{subset_file}: {exc}')