CrocoDash.raw_data_access.datasets package

Contents

CrocoDash.raw_data_access.datasets package#

Submodules#

CrocoDash.raw_data_access.datasets.empty_products module#

class CrocoDash.raw_data_access.datasets.empty_products.CESMInputData#

Bases: BaseProduct

description = 'The CESM Input SVN repo holds all files CrocoDash exposes publicly themselves at the following repo link'#
logger = <Logger CESMInputData (INFO)>#
product_name = 'CESM Inputdata'#
class CrocoDash.raw_data_access.datasets.empty_products.TPXO#

Bases: BaseProduct

description = 'TPXO (TOPEX/POSEIDON Global Tidal Ocean I think) is a public tidal model dataset'#
logger = <Logger TPXO (INFO)>#
product_name = 'tpxo'#

CrocoDash.raw_data_access.datasets.gebco module#

class CrocoDash.raw_data_access.datasets.gebco.GEBCO#

Bases: BaseProduct

description = 'GEBCO (General Bathymetric Chart of the Ocean) is a public dataset of global ocean bathymetry'#
static get_gebco_data_script(output_folder=None, output_filename=None)#
static get_gebco_data_with_python(output_folder=None, output_filename=None)#
logger = <Logger GEBCO (INFO)>#
product_name = 'gebco'#

CrocoDash.raw_data_access.datasets.glofas module#

class CrocoDash.raw_data_access.datasets.glofas.GLOFAS#

Bases: DatedBaseProduct

description = '\tGLOFAS (Global Flood Awareness System) is a public river discharge/runoff Product'#
static get_global_data_with_python(dates, output_folder=PosixPath('.'), output_filename='glofas_data.nc')#

Downloads glofas data using cdsapi library. Note that users need to have an account with copernicus and have cdsapi installed and configured.

Parameters:
  • date (str, optional) – What dates to download.

  • output_folder (str, optional) – Directory where downloaded files will be saved.

static get_processed_global_glofas_script_for_cli(dates='UNUSED', output_folder=PosixPath('.'), output_filename='processed_glofas.nc')#

Downloads chlor_a data from the CESM inputdata repository by generating a script users can run in their terminal. :param date: Currently unused; placeholder for future date-based filtering. :type date: str, optional :param lat_min: Currently unused; placeholder for future spatial filtering. :type lat_min: float, optional :param lat_max: Currently unused; placeholder for future spatial filtering. :type lat_max: float, optional :param lon_min: Currently unused; placeholder for future spatial filtering. :type lon_min: float, optional :param lon_max: Currently unused; placeholder for future spatial filtering. :type lon_max: float, optional :param output_folder: Directory where downloaded files will be saved. :type output_folder: str, optional :param output_filename: filename in output directory :type output_filename: str, optional

logger = <Logger GLOFAS (INFO)>#
product_name = 'glofas'#

CrocoDash.raw_data_access.datasets.glorys module#

Data Access Module -> Glorys

class CrocoDash.raw_data_access.datasets.glorys.GLORYS#

Bases: ForcingProduct

boundary_fill_method = 'regional_mom6'#
depth_coord = 'depth'#
description = 'GLORYS (Global Ocean Physics Reanalysis) is a public dataset provided through the copernicus marine service.'#
eta_var_name = 'zos'#
static get_glorys_data_from_cds_api(dates, lat_min, lat_max, lon_min, lon_max, output_folder=None, output_filename=None, variables=['zos', 'uo', 'vo', 'so', 'thetao'])#

Using the copernucismarine api, query GLORYS data (any dates)

static get_glorys_data_from_rda(dates: list, lat_min, lat_max, lon_min, lon_max, output_folder=PosixPath('.'), output_filename='raw_glorys.nc', variables=['time', 'latitude', 'longitude', 'depth', 'zos', 'uo', 'vo', 'so', 'thetao']) Dataset#

Gather GLORYS Data on Derecho Computers from the campaign storage and return the dataset sliced to the llc and urc coordinates at the specific dates

static get_glorys_data_script_for_cli(dates: tuple, lat_min, lat_max, lon_min, lon_max, output_folder, output_filename, variables=None) None#

Script to run the GLORYS data query for the CLI

logger = <Logger GLORYS (INFO)>#
product_name = 'glorys'#
time_units = 'days'#
time_var_name = 'time'#
tracer_lat_coord = 'latitude'#
tracer_lon_coord = 'longitude'#
tracer_var_names = {'salt': 'so', 'temp': 'thetao'}#
tracer_x_coord = 'longitude'#
tracer_y_coord = 'latitude'#
u_lat_coord = 'latitude'#
u_lon_coord = 'longitude'#
u_var_name = 'uo'#
u_x_coord = 'longitude'#
u_y_coord = 'latitude'#
v_lat_coord = 'latitude'#
v_lon_coord = 'longitude'#
v_var_name = 'vo'#
v_x_coord = 'longitude'#
v_y_coord = 'latitude'#

CrocoDash.raw_data_access.datasets.mom6_output module#

Data Access Module -> CESM output

class CrocoDash.raw_data_access.datasets.mom6_output.MOM6_OUTPUT#

Bases: ForcingProduct

boundary_fill_method = 'regional_mom6'#
calendar = 'noleap'#
delimiter = '.'#
depth_coord = ['z_t', 'z_t_150m']#
description = 'CESM had old runs that can be used for IC and OBC'#
eta_var_name = 'SSH'#
static get_mom6_data(dates: list, lat_min, lat_max, lon_min, lon_max, output_folder=PosixPath('.'), output_filename=None, variables=['SSH', 'TEMP', 'SALT', 'VVEL', 'UVEL'], dataset_path='/glade/campaign/collections/cmip/CMIP6/CESM-HR/FOSI_BGC/HR/g.e22.TL319_t13.G1850ECOIAF_JRA_HR.4p2z.001/ocn/proc/tseries/month_1', date_format: str = '%Y%m%d', regex='(\\d{6,8})-(\\d{6,8})', delimiter='.', preview=False)#
logger = <Logger MOM6_OUTPUT (INFO)>#
marbl_var_names = {'ALK': 'ALK', 'ALK_ALT_CO2': 'ALK_ALT_CO2', 'DIC': 'DIC', 'DIC_ALT_CO2': 'DIC_ALT_CO2', 'DOC': 'DOC', 'DOCr': 'DOCr', 'DON': 'DON', 'DONr': 'DONr', 'DOP': 'DOP', 'DOPr': 'DOPr', 'Fe': 'Fe', 'Lig': 'Lig', 'NH4': 'NH4', 'NO3': 'NO3', 'O2': 'O2', 'PO4': 'PO4', 'SiO3': 'SiO3', 'coccoC': 'coccoC', 'coccoCaCO3': 'coccoCaCO3', 'coccoChl': 'coccoChl', 'coccoFe': 'coccoFe', 'coccoP': 'coccoP', 'diatC': 'diatC', 'diatChl': 'diatChl', 'diatFe': 'diatFe', 'diatP': 'diatP', 'diatSi': 'diatSi', 'diazC': 'diazC', 'diazChl': 'diazChl', 'diazFe': 'diazFe', 'diazP': 'diazP', 'mesozooC': 'mesozooC', 'microzooC': 'microzooC', 'spC': 'spC', 'spChl': 'spChl', 'spFe': 'spFe', 'spP': 'spP'}#
product_name = 'mom6_output'#
time_units = 'days since 1850-01-01'#
time_var_name = 'time'#
tracer_lat_coord = 'TLAT'#
tracer_lon_coord = 'TLONG'#
tracer_var_names = {'salt': 'SALT', 'temp': 'TEMP'}#
tracer_x_coord = 'nlon'#
tracer_y_coord = 'nlat'#
u_lat_coord = 'TLAT'#
u_lon_coord = 'TLONG'#
u_var_name = 'UVEL'#
u_x_coord = 'nlon'#
u_y_coord = 'nlat'#
v_lat_coord = 'TLAT'#
v_lon_coord = 'TLONG'#
v_var_name = 'VVEL'#
v_x_coord = 'nlon'#
v_y_coord = 'nlat'#
z_unit_conversion = 0.01#
CrocoDash.raw_data_access.datasets.mom6_output.drop_extra_cftime_vars(ds)#
CrocoDash.raw_data_access.datasets.mom6_output.first_value(da_var)#
CrocoDash.raw_data_access.datasets.mom6_output.get_date_range_from_filename(path, regex)#
CrocoDash.raw_data_access.datasets.mom6_output.parse_dataset(variable_names: list[str], dataset_path: str | Path, start_date: str, end_date: str, date_format: str = '%Y%m%d', regex='(\\d{6,8})-(\\d{6,8})', space_character='.') dict#

Parses the dataset to find variable names and their corresponding file paths.

Parameters:
  • variable_names (list[str]) – List of variable names to search for.

  • dataset_path (str | xr.Dataset | Path) – Path to the dataset (or folder with dataset)

  • space_character (str) – Character that separates words in variable names in the filenames. Default is “.”.

Returns:

A dictionary with variable names as keys and their file paths as values.

Return type:

dict

CrocoDash.raw_data_access.datasets.mom6_output.subset_dataset(variable_info: dict, output_path: str | Path, lat_min: float, lat_max: float, lon_min: float, lon_max: float, lat_name='lat', lon_name='lon', dates=None, preview: bool = False) None#

Subsets (and merges) the dataset based on the provided variable names and geographical bounds into the output path :param variable_info: A dictionary with variable names as keys and their file paths as values. :type variable_info: dict :param output_path: The path where the subsetted dataset will be saved. :type output_path: str | Path :param lat_min: Minimum latitude for subsetting. :type lat_min: float :param lat_max: Maximum latitude for subsetting. :type lat_max: float :param lon_min: Minimum longitude for subsetting. :type lon_min: float :param lon_max: Maximum longitude for subsetting. :type lon_max: float :param lat_name: Name of the latitude variable in the dataset. Default is “lat”. :type lat_name: str :param lon_name: Name of the longitude variable in the dataset. Default is “lon”. :type lon_name: str :param dates: Just used for the file naming :type dates: tuple :param preview: If True, only previews the subsetting without saving. Default is False. :type preview: bool

CrocoDash.raw_data_access.datasets.mom6_output.subtract_month(dt)#

CrocoDash.raw_data_access.datasets.seawifs module#

class CrocoDash.raw_data_access.datasets.seawifs.SeaWIFS#

Bases: BaseProduct

description = 'SEAWIFS is a Chlorophyll Dataset for MOM6'#
static get_global_seawifs_script_for_cli(output_folder=None, output_filename='UNUSED', username='')#

Downloads chlor_a data using NASA OceanData API with authentication.

Parameters:
  • username (str) – NASA Earthdata username (password will be prompted at runtime).

  • date (str, optional) – Currently unused; placeholder for future date-based filtering.

  • lat_min (float, optional) – Currently unused; placeholder for future spatial filtering.

  • lat_max (float, optional) – Currently unused; placeholder for future spatial filtering.

  • lon_min (float, optional) – Currently unused; placeholder for future spatial filtering.

  • lon_max (float, optional) – Currently unused; placeholder for future spatial filtering.

  • output_folder (str, optional) – Directory where downloaded files will be saved.

static get_processed_global_seawifs_script_for_cli(output_folder=PosixPath('.'), output_filename='processed_seawifs.nc')#

Downloads chlor_a data from the CESM inputdata repository by generating a script users can run in their terminal. :param date: Currently unused; placeholder for future date-based filtering. :type date: str, optional :param lat_min: Currently unused; placeholder for future spatial filtering. :type lat_min: float, optional :param lat_max: Currently unused; placeholder for future spatial filtering. :type lat_max: float, optional :param lon_min: Currently unused; placeholder for future spatial filtering. :type lon_min: float, optional :param lon_max: Currently unused; placeholder for future spatial filtering. :type lon_max: float, optional :param output_folder: Directory where downloaded files will be saved. :type output_folder: str, optional :param output_filename: filename in output directory :type output_filename: str, optional

logger = <Logger SeaWIFS (INFO)>#
product_name = 'seawifs'#

CrocoDash.raw_data_access.datasets.utils module#

CrocoDash.raw_data_access.datasets.utils.convert_lons_to_180_range(*lons)#
CrocoDash.raw_data_access.datasets.utils.write_bash_curl_script(url, script_name, output_folder, output_filename)#

Module contents#

CrocoDash.raw_data_access.datasets.load_all_datasets()#

Dynamically import all modules in this package so that their _init_subclass hook run and populate the registry.