CrocoDash.raw_data_access.datasets package

CrocoDash.raw_data_access.datasets package#

Submodules#

CrocoDash.raw_data_access.datasets.empty_products module#

class CrocoDash.raw_data_access.datasets.empty_products.CESMInputData#

Bases: BaseProduct

description = 'The CESM Input SVN repo holds all files CrocoDash exposes publicly themselves at the following repo link'#

link = 'https://svn-ccsm-inputdata.cgd.ucar.edu/trunk/inputdata/ocn/mom/croc'#

logger = <Logger CESMInputData (INFO)>#

product_name = 'CESM Inputdata'#

class CrocoDash.raw_data_access.datasets.empty_products.TPXO#

Bases: BaseProduct

description = 'TPXO (TOPEX/POSEIDON Global Tidal Ocean I think) is a public tidal model dataset'#

link = 'https://www.tpxo.net/global'#

logger = <Logger TPXO (INFO)>#

product_name = 'tpxo'#

CrocoDash.raw_data_access.datasets.gebco module#

class CrocoDash.raw_data_access.datasets.gebco.GEBCO#

Bases: BaseProduct

description = 'GEBCO (General Bathymetric Chart of the Ocean) is a public dataset of global ocean bathymetry'#

static get_gebco_data_script(output_folder=None, output_filename=None)#

static get_gebco_data_with_python(output_folder=None, output_filename=None)#

link = 'https://www.gebco.net/data_and_products/gridded_bathymetry_data/#global'#

logger = <Logger GEBCO (INFO)>#

product_name = 'gebco'#

CrocoDash.raw_data_access.datasets.glofas module#

class CrocoDash.raw_data_access.datasets.glofas.GLOFAS#

Bases: DatedBaseProduct

description = '\tGLOFAS (Global Flood Awareness System) is a public river discharge/runoff Product'#

static get_global_data_with_python(dates, output_folder=PosixPath('.'), output_filename='glofas_data.nc')#

Downloads glofas data using cdsapi library. Note that users need to have an account with copernicus and have cdsapi installed and configured.

Parameters:

date (str, optional) – What dates to download.
output_folder (str, optional) – Directory where downloaded files will be saved.

static get_processed_global_glofas_script_for_cli(dates='UNUSED', output_folder=PosixPath('.'), output_filename='processed_glofas.nc')#: Downloads chlor_a data from the CESM inputdata repository by generating a script users can run in their terminal. :param date: Currently unused; placeholder for future date-based filtering. :type date: str, optional :param lat_min: Currently unused; placeholder for future spatial filtering. :type lat_min: float, optional :param lat_max: Currently unused; placeholder for future spatial filtering. :type lat_max: float, optional :param lon_min: Currently unused; placeholder for future spatial filtering. :type lon_min: float, optional :param lon_max: Currently unused; placeholder for future spatial filtering. :type lon_max: float, optional :param output_folder: Directory where downloaded files will be saved. :type output_folder: str, optional :param output_filename: filename in output directory :type output_filename: str, optional

link = 'https://ewds.climate.copernicus.eu/datasets/cems-glofas-historical?tab=download'#

logger = <Logger GLOFAS (INFO)>#

product_name = 'glofas'#

CrocoDash.raw_data_access.datasets.glorys module#

Data Access Module -> Glorys

class CrocoDash.raw_data_access.datasets.glorys.GLORYS#

Bases: ForcingProduct

boundary_fill_method = 'regional_mom6'#

depth_coord = 'depth'#

description = 'GLORYS (Global Ocean Physics Reanalysis) is a public dataset provided through the copernicus marine service.'#

eta_var_name = 'zos'#

static get_glorys_data_from_cds_api(dates, lat_min, lat_max, lon_min, lon_max, output_folder=None, output_filename=None, variables=['zos', 'uo', 'vo', 'so', 'thetao'])#: Using the copernucismarine api, query GLORYS data (any dates)

static get_glorys_data_from_rda(dates: list, lat_min, lat_max, lon_min, lon_max, output_folder=PosixPath('.'), output_filename='raw_glorys.nc', variables=['time', 'latitude', 'longitude', 'depth', 'zos', 'uo', 'vo', 'so', 'thetao']) → Dataset#: Gather GLORYS Data on Derecho Computers from the campaign storage and return the dataset sliced to the llc and urc coordinates at the specific dates

static get_glorys_data_script_for_cli(dates: tuple, lat_min, lat_max, lon_min, lon_max, output_folder, output_filename, variables=None) → None#: Script to run the GLORYS data query for the CLI

link = 'https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_PHY_001_030/description'#

logger = <Logger GLORYS (INFO)>#

product_name = 'glorys'#

time_units = 'days'#

time_var_name = 'time'#

tracer_lat_coord = 'latitude'#

tracer_lon_coord = 'longitude'#

tracer_var_names = {'salt': 'so', 'temp': 'thetao'}#

tracer_x_coord = 'longitude'#

tracer_y_coord = 'latitude'#

u_lat_coord = 'latitude'#

u_lon_coord = 'longitude'#

u_var_name = 'uo'#

u_x_coord = 'longitude'#

u_y_coord = 'latitude'#

v_lat_coord = 'latitude'#

v_lon_coord = 'longitude'#

v_var_name = 'vo'#

v_x_coord = 'longitude'#

v_y_coord = 'latitude'#

CrocoDash.raw_data_access.datasets.mom6_output module#

Data Access Module -> CESM output

class CrocoDash.raw_data_access.datasets.mom6_output.MOM6_OUTPUT#

Bases: ForcingProduct

boundary_fill_method = 'regional_mom6'#

calendar = 'noleap'#

delimiter = '.'#

depth_coord = ['z_t', 'z_t_150m']#

description = 'CESM had old runs that can be used for IC and OBC'#

eta_var_name = 'SSH'#

static get_mom6_data(dates: list, lat_min, lat_max, lon_min, lon_max, output_folder=PosixPath('.'), output_filename=None, variables=['SSH', 'TEMP', 'SALT', 'VVEL', 'UVEL'], dataset_path='/glade/campaign/collections/cmip/CMIP6/CESM-HR/FOSI_BGC/HR/g.e22.TL319_t13.G1850ECOIAF_JRA_HR.4p2z.001/ocn/proc/tseries/month_1', date_format: str = '%Y%m%d', regex='(\\d{6,8})-(\\d{6,8})', delimiter='.', preview=False)#

link = 'https://gdex.ucar.edu/datasets/d267000/'#

logger = <Logger MOM6_OUTPUT (INFO)>#

marbl_var_names = {'ALK': 'ALK', 'ALK_ALT_CO2': 'ALK_ALT_CO2', 'DIC': 'DIC', 'DIC_ALT_CO2': 'DIC_ALT_CO2', 'DOC': 'DOC', 'DOCr': 'DOCr', 'DON': 'DON', 'DONr': 'DONr', 'DOP': 'DOP', 'DOPr': 'DOPr', 'Fe': 'Fe', 'Lig': 'Lig', 'NH4': 'NH4', 'NO3': 'NO3', 'O2': 'O2', 'PO4': 'PO4', 'SiO3': 'SiO3', 'coccoC': 'coccoC', 'coccoCaCO3': 'coccoCaCO3', 'coccoChl': 'coccoChl', 'coccoFe': 'coccoFe', 'coccoP': 'coccoP', 'diatC': 'diatC', 'diatChl': 'diatChl', 'diatFe': 'diatFe', 'diatP': 'diatP', 'diatSi': 'diatSi', 'diazC': 'diazC', 'diazChl': 'diazChl', 'diazFe': 'diazFe', 'diazP': 'diazP', 'mesozooC': 'mesozooC', 'microzooC': 'microzooC', 'spC': 'spC', 'spChl': 'spChl', 'spFe': 'spFe', 'spP': 'spP'}#

product_name = 'mom6_output'#

time_units = 'days since 1850-01-01'#

time_var_name = 'time'#

tracer_lat_coord = 'TLAT'#

tracer_lon_coord = 'TLONG'#

tracer_var_names = {'salt': 'SALT', 'temp': 'TEMP'}#

tracer_x_coord = 'nlon'#

tracer_y_coord = 'nlat'#

u_lat_coord = 'TLAT'#

u_lon_coord = 'TLONG'#

u_var_name = 'UVEL'#

u_x_coord = 'nlon'#

u_y_coord = 'nlat'#

v_lat_coord = 'TLAT'#

v_lon_coord = 'TLONG'#

v_var_name = 'VVEL'#

v_x_coord = 'nlon'#

v_y_coord = 'nlat'#

z_unit_conversion = 0.01#

CrocoDash.raw_data_access.datasets.mom6_output.drop_extra_cftime_vars(ds)#

CrocoDash.raw_data_access.datasets.mom6_output.first_value(da_var)#

CrocoDash.raw_data_access.datasets.mom6_output.get_date_range_from_filename(path, regex)#

CrocoDash.raw_data_access.datasets.mom6_output.parse_dataset(variable_names: list[str], dataset_path: str | Path, start_date: str, end_date: str, date_format: str = '%Y%m%d', regex='(\\d{6,8})-(\\d{6,8})', space_character='.') → dict#

Parses the dataset to find variable names and their corresponding file paths.

Parameters:

variable_names (list[str]) – List of variable names to search for.
dataset_path (str | xr.Dataset | Path) – Path to the dataset (or folder with dataset)
space_character (str) – Character that separates words in variable names in the filenames. Default is “.”.

Returns:

A dictionary with variable names as keys and their file paths as values.

Return type:

dict

CrocoDash.raw_data_access.datasets.mom6_output.subset_dataset(variable_info: dict, output_path: str | Path, lat_min: float, lat_max: float, lon_min: float, lon_max: float, lat_name='lat', lon_name='lon', dates=None, preview: bool = False) → None#: Subsets (and merges) the dataset based on the provided variable names and geographical bounds into the output path :param variable_info: A dictionary with variable names as keys and their file paths as values. :type variable_info: dict :param output_path: The path where the subsetted dataset will be saved. :type output_path: str | Path :param lat_min: Minimum latitude for subsetting. :type lat_min: float :param lat_max: Maximum latitude for subsetting. :type lat_max: float :param lon_min: Minimum longitude for subsetting. :type lon_min: float :param lon_max: Maximum longitude for subsetting. :type lon_max: float :param lat_name: Name of the latitude variable in the dataset. Default is “lat”. :type lat_name: str :param lon_name: Name of the longitude variable in the dataset. Default is “lon”. :type lon_name: str :param dates: Just used for the file naming :type dates: tuple :param preview: If True, only previews the subsetting without saving. Default is False. :type preview: bool

CrocoDash.raw_data_access.datasets.mom6_output.subtract_month(dt)#

CrocoDash.raw_data_access.datasets.seawifs module#

class CrocoDash.raw_data_access.datasets.seawifs.SeaWIFS#

Bases: BaseProduct

description = 'SEAWIFS is a Chlorophyll Dataset for MOM6'#

static get_global_seawifs_script_for_cli(output_folder=None, output_filename='UNUSED', username='')#

Downloads chlor_a data using NASA OceanData API with authentication.

Parameters:

username (str) – NASA Earthdata username (password will be prompted at runtime).
date (str, optional) – Currently unused; placeholder for future date-based filtering.
lat_min (float, optional) – Currently unused; placeholder for future spatial filtering.
lat_max (float, optional) – Currently unused; placeholder for future spatial filtering.
lon_min (float, optional) – Currently unused; placeholder for future spatial filtering.
lon_max (float, optional) – Currently unused; placeholder for future spatial filtering.
output_folder (str, optional) – Directory where downloaded files will be saved.

static get_processed_global_seawifs_script_for_cli(output_folder=PosixPath('.'), output_filename='processed_seawifs.nc')#: Downloads chlor_a data from the CESM inputdata repository by generating a script users can run in their terminal. :param date: Currently unused; placeholder for future date-based filtering. :type date: str, optional :param lat_min: Currently unused; placeholder for future spatial filtering. :type lat_min: float, optional :param lat_max: Currently unused; placeholder for future spatial filtering. :type lat_max: float, optional :param lon_min: Currently unused; placeholder for future spatial filtering. :type lon_min: float, optional :param lon_max: Currently unused; placeholder for future spatial filtering. :type lon_max: float, optional :param output_folder: Directory where downloaded files will be saved. :type output_folder: str, optional :param output_filename: filename in output directory :type output_filename: str, optional

link = 'https://oceandata.sci.gsfc.nasa.gov/getfile/SEASTAR_SEAWIFS_GAC.19980201_20100228.L3m.MC.CHL.chlor_a.9km.nc'#

logger = <Logger SeaWIFS (INFO)>#

product_name = 'seawifs'#

CrocoDash.raw_data_access.datasets.utils module#

CrocoDash.raw_data_access.datasets.utils.convert_lons_to_180_range(*lons)#

CrocoDash.raw_data_access.datasets.utils.write_bash_curl_script(url, script_name, output_folder, output_filename)#

Module contents#

CrocoDash.raw_data_access.datasets.load_all_datasets()#: Dynamically import all modules in this package so that their _init_subclass hook run and populate the registry.

CrocoDash.raw_data_access.datasets package

Contents

CrocoDash.raw_data_access.datasets package#

Submodules#

CrocoDash.raw_data_access.datasets.empty_products module#

CrocoDash.raw_data_access.datasets.gebco module#

CrocoDash.raw_data_access.datasets.glofas module#

CrocoDash.raw_data_access.datasets.glorys module#

CrocoDash.raw_data_access.datasets.mom6_output module#

CrocoDash.raw_data_access.datasets.seawifs module#

CrocoDash.raw_data_access.datasets.utils module#

Module contents#