An Exception was encountered at ‘In [7]’.

Regional Ocean: Checking Open Boundary Conditions#

Hide code cell source

import xarray as xr
import numpy as np
import os
from pathlib import Path
import glob
import regional_utils as utils

Hide code cell source

case_name = ""  # "/glade/campaign/cgd/oce/projects/CROCODILE/workshops/2025/Diagnostics/CESM_Output/"
CESM_output_dir = ""  # "CROCODILE_tutorial_nwa12_MARBL"

# As regional domains vary so much in purpose, simulation length, and extent, we don't want to assume a minimum duration
## Thus, we ignore start and end dates and simply reduce/output over the whole time frame for all of the examples given.
start_date = None  # "0001-01-01"
end_date = None  # "0101-01-01

save_figs = False
fig_output_dir = None

lc_kwargs = {}
serial = False

## Notebook Specific Arguments
# case_root is automatically populated in a CESM CUPiD workflow
# If case_root is specified, and mom6_input_dir is not, INPUTDIR will be inferred from user_nl_mom in case_root
# If mom6_input_dir is specified, case_root will be ignored
# NOTE: If case_root and mom6_input_dir are both None, the notebook will not run
# If obc_file_str is unspecified, defaults to "forcing_obc_segment_00*.nc"
case_root = None
mom6_input_dir = None  # "/glade/campaign/cgd/oce/projects/CROCODILE/workshops/2025/Diagnostics/Input_Dir/CROCODILE_tutorial_nwa12_MARBL_ocnice"
obc_file_str = None  # Pattern for OBC file names: "forcing_obc_segment_00*.nc"
# Parameters
case_name = "CROCODILE_tutorial_nwa12_MARBL"
CESM_output_dir = (
    "/glade/campaign/cgd/oce/projects/CROCODILE/workshops/2025/Diagnostics/CESM_Output/"
)
start_date = ""
end_date = ""
save_figs = True
fig_output_dir = None
lc_kwargs = {"threads_per_worker": 1}
serial = True
case_root = None
mom6_input_dir = "/glade/campaign/cgd/oce/projects/CROCODILE/workshops/2025/Diagnostics/Input_Dir/CROCODILE_tutorial_nwa12_MARBL_ocnice"
obc_file_str = "forcing_obc_segment_00*.nc"
subset_kwargs = {}
product = "/glade/work/ajanney/crocodile_2025/CUPiD/examples/regional_ocean/computed_notebooks//ocn/Regional_Ocean_OBC.ipynb"

Hide code cell source

OUTDIR = f"{CESM_output_dir}/{case_name}/ocn/hist/"
print("Output directory is:", OUTDIR)
Output directory is: /glade/campaign/cgd/oce/projects/CROCODILE/workshops/2025/Diagnostics/CESM_Output//CROCODILE_tutorial_nwa12_MARBL/ocn/hist/

Hide code cell source

case_output_dir = os.path.join(CESM_output_dir, case_name, "ocn", "hist")

# Xarray time decoding things
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)

## Static data includes hgrid, vgrid, bathymetry, land/sea mask
static_data = xr.open_mfdataset(
    os.path.join(case_output_dir, f"*static.nc"),
    decode_timedelta=True,
    decode_times=time_coder,
)

## Surface Data
sfc_data = xr.open_mfdataset(
    os.path.join(case_output_dir, f"*sfc*.nc"),
    decode_timedelta=True,
    decode_times=time_coder,
)

# ## Monthly Domain Data
# monthly_data = xr.open_mfdataset(
#     os.path.join(case_output_dir, f"*z*.nc"),
#     decode_timedelta=True,
#     decode_times=time_coder,
# )

# ## Native Monthly Domain Data
# native_data = xr.open_mfdataset(
#     os.path.join(case_output_dir, f"*native*.nc"),
#     decode_timedelta=True,
#     decode_times=time_coder,
# )

## Image/Gif Output Directory
if fig_output_dir is None:
    image_output_dir = os.path.join(
        "/glade/derecho/scratch/",
        os.environ["USER"],
        "archive",
        case_name,
        "ocn",
        "cupid_images",
    )
else:
    image_output_dir = os.path.join(fig_output_dir, case_name, "ocn", "cupid_images")
if not os.path.exists(image_output_dir):
    os.makedirs(image_output_dir)
print("Image output directory is:", image_output_dir)
Image output directory is: /glade/derecho/scratch/ajanney/archive/CROCODILE_tutorial_nwa12_MARBL/ocn/cupid_images
## Apply time boundaries
if len(start_date) > 0 and len(end_date) > 0:
    import cftime

    calendar = sfc_data.time.encoding.get("calendar", "standard")

    calendar_map = {
        "gregorian": cftime.DatetimeProlepticGregorian,
        "noleap": cftime.DatetimeNoLeap,
    }

    CFTime = calendar_map.get(calendar, cftime.DatetimeGregorian)
    y, m, d = [int(i) for i in start_date.split("-")]
    start_date_time = CFTime(y, m, d)
    y, m, d = [int(i) for i in end_date.split("-")]
    end_date_time = CFTime(y, m, d)

    sfc_data = sfc_data.sel(time=slice(start_date_time, end_date_time))

Accessing and Processing Open Boundary Conditions#

Regional MOM6 Input Directory#

Regional MOM6 configurations use a separate input directory containing the data for the initial state, tides, and boundary conditions. This notebook needs to access the boundary conditions. There are a couple options here:

We instead look for the input directory path in two locations in the case_root directory:

  • user_nl_mom

  • Buildconf/momconf/MOM_input

In both files, the input directory path should have the name INPUTDIR. If this path cannot be found, the rest of this notebook will fail to run, but the user can manually set the input directory path below.

Execution using papermill encountered an exception here and stopped:

input_dir = None

for file in ["user_nl_mom", "BuildConf/momconf/MOM_input"]:
    filepath = Path(os.path.join(case_root, file))

    if filepath.is_file():
        with open(filepath, "r") as f:
            for line in f:
                if line.strip().startswith("INPUTDIR"):
                    key, value = line.split("=", 1)

                    input_dir = value.strip()
                    break

    if Path(input_dir).is_dir():
        print(f"Found INPUTDIR in {file}")
        break

# Override if autmatic method doesn't work:
# input_dir = user/specified/path

if input_dir is None:
    print(
        "INPUTDIR not found. Try Setting it manually in the `Regional_Ocean_OBC` notebook."
    )
    raise FileNotFoundError(
        "INPUTDIR not found. Try Setting it manually in the `Regional_Ocean_OBC` notebook."
    )

print(f"INPUTDIR: {input_dir}")
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[7], line 4
      1 input_dir = None
      3 for file in ["user_nl_mom", "BuildConf/momconf/MOM_input"]:
----> 4     filepath = Path(os.path.join(case_root, file))
      6     if filepath.is_file():
      7         with open(filepath, "r") as f:

File /glade/work/ajanney/conda-envs/cupid-analysis/lib/python3.11/posixpath.py:76, in join(a, *p)
     71 def join(a, *p):
     72     """Join two or more pathname components, inserting '/' as needed.
     73     If any component is an absolute path, all previous path components
     74     will be discarded.  An empty last part will result in a path that
     75     ends with a separator."""
---> 76     a = os.fspath(a)
     77     sep = _get_sep(a)
     78     path = a

TypeError: expected str, bytes or os.PathLike object, not NoneType

Check for Boundary Conditions, Verify Horizontal Grid#

The input directory should contain a few different files:

  • init*: initial fields

  • ocean_hgrid.nc, ocean_topo.nc, vgrid*.nc: grids and bathymetry

  • tu* and tz*: tidal forcing

What we care about:

  • forcing_obc_segment*.nc: open boundary conditions to force the model

The numbers at the end of the file each correspond to a different edge of the domain.

  • 001: south

  • 002: north

  • 003: west

  • 004: east

boundary_conds = glob.glob(os.path.join(input_dir, "forcing_obc_segment_*.nc"))
print(f"Found {len(boundary_conds)} OBC files:", boundary_conds)

if len(boundary_conds) == 0:
    raise FileNotFoundError(f"No boundary condition files found at {input_dir}")

Opening and Checking Open Boundary Conditions#

segment_num_to_dir = {
    "001": "south",
    "002": "north",
    "003": "west",
    "004": "east",
}
segment_dir_to_num = {v: k for k, v in segment_num_to_dir.items()}
list_boundaries = []
temp_time_slice = slice(0, 5)

for boundary_file in boundary_conds:

    boundary = xr.open_mfdataset(boundary_file)

    # Temp time slice
    boundary = boundary.isel(time=temp_time_slice)

    code = (boundary_file.split("_")[-1]).split(".")[0]
    bound = segment_num_to_dir[code]

    # Variables on the U/V grid (corners)
    uv_vars = [f"u_segment_{code}", f"v_segment_{code}"]

    # Variables on the T grid (cell center)
    tracer_vars = [
        f"salt_segment_{code}",
        f"temp_segment_{code}",
        f"eta_segment_{code}",
    ]

    # Create separate datasets for each grid type
    boundary_uv = boundary[uv_vars]
    boundary_t = boundary[tracer_vars]

    # Rename coordinates for both datasets
    boundary_uv = boundary_uv.rename(
        {
            f"ny_segment_{code}": "yq",
            f"nx_segment_{code}": "xq",
            f"lon_segment_{code}": "longitude_q",
            f"lat_segment_{code}": "latitude_q",
        }
    )
    boundary_t = boundary_t.rename(
        {
            f"ny_segment_{code}": "yh",
            f"nx_segment_{code}": "xh",
            f"lon_segment_{code}": "longitude_h",
            f"lat_segment_{code}": "latitude_h",
        }
    )

    # Drop variables and handle squeeze for both
    boundary_uv = boundary_uv.drop_vars(["nxp", "nyp"])
    boundary_t = boundary_t.drop_vars(["nxp", "nyp"])

    if bound in ["north", "south"]:
        # Slicing for a C-grid
        boundary_uv = boundary_uv.isel(xq=slice(0, None, 2))
        boundary_t = boundary_t.isel(xh=slice(1, None, 2))

        # Assign new coordinates
        boundary_uv = boundary_uv.assign_coords(
            xq=("xq", boundary_uv["longitude_q"].values)
        )
        boundary_t = boundary_t.assign_coords(
            xh=("xh", boundary_t["longitude_h"].values)
        )

        # boundary_uv = boundary_uv.squeeze('yq', drop = True)
        # boundary_t = boundary_t.squeeze('yh', drop = True)
    else:
        # Slicing for a C-grid
        boundary_uv = boundary_uv.isel(yq=slice(0, None, 2))
        boundary_t = boundary_t.isel(yh=slice(1, None, 2))

        # Assign new coordinates
        boundary_uv = boundary_uv.assign_coords(
            yq=("yq", boundary_uv["latitude_q"].values)
        )
        boundary_t = boundary_t.assign_coords(
            yh=("yh", boundary_t["latitude_h"].values)
        )

        # boundary_uv = boundary_uv.squeeze('xq', drop = True)
        # boundary_t = boundary_t.squeeze('xh', drop = True)

    # Combine the processed datasets
    boundary = xr.merge([boundary_uv, boundary_t])

    # Remove longitude and latitude variables from the datasets
    for coord in ["longitude_q", "latitude_q"]:
        if coord in boundary:
            boundary = boundary.drop_vars(coord)
    for coord in ["longitude_h", "latitude_h"]:
        if coord in boundary:
            boundary = boundary.drop_vars(coord)

    # Dumb check to make sure OBCs are on the same grid as model output
    model_xh = static_data["xh"]
    model_yh = static_data["yh"]
    model_xq = static_data["xq"]
    model_yq = static_data["yq"]

    # if boundary['xh'].size > 1: assert np.allclose(boundary['xh'].values, model_xh.values, atol=1e-6), f"Boundary {bound} x-coordinates do not match model grid"
    # if boundary['yh'].size > 1: assert np.allclose(boundary['yh'].values, model_yh.values, atol=1e-6), f"Boundary {bound} y-coordinates do not match model grid"
    # if boundary['xq'].size > 1: assert np.allclose(boundary['xq'].values, model_xq.values, atol=1e-6), f"Boundary {bound} corner x-coordinates do not match model grid"
    # if boundary['yq'].size > 1: assert np.allclose(boundary['yq'].values, model_yq.values, atol=1e-6), f"Boundary {bound} corner y-coordinates do not match model grid"

    z_map = [
        f"nz_segment_{code}_u",
        f"nz_segment_{code}_v",
        f"nz_segment_{code}_temp",
        f"nz_segment_{code}_salt",
    ]
    new_boundary = {}
    for var in boundary.data_vars:
        da = boundary[var]
        da.load()

        zdim = [str(d) for d in da.dims if str(d) in z_map]
        if len(zdim) > 0:
            da = da.rename({zdim[0]: "z_l"})
        new_boundary[var.split("_")[0]] = da

    new_boundary = xr.Dataset(new_boundary).expand_dims(boundary=[bound])
    list_boundaries.append(new_boundary)

# boundaries = xr.concat(list_boundaries, dim = 'boundary')
list_boundaries[1]
static_data