Source code for cleopy.sdmout_src.pyzarr

"""
Copyright (c) 2025 MPI-M, Clara Bayley


----- CLEO -----
File: pyzarr.py
Project: sdmout_src
Created Date: Tuesday 24th October 2023
Author: Clara Bayley (CB)
Additional Contributors:
-----
License: BSD 3-Clause "New" or "Revised" License
https://opensource.org/licenses/BSD-3-Clause
-----
File Description:
functions to return zarr data in useful formats for plotting e.g. for handling ragged arrays
of superdroplet attributes
"""


import numpy as np
import xarray as xr
import awkward as ak
from pathlib import Path

from . import thermodata
from . import superdrops
from . import massmoms
from . import timedata


def get_rawdataset(dataset):
    return xr.open_dataset(dataset, engine="zarr", consolidated=False)


def get_rawdata4key(dataset, key):
    return get_rawdataset(dataset)[key]


def get_rawdata4raggedkey(dataset, key):
    ds = get_rawdataset(dataset)
    return ak.unflatten(ds[key].values, ds["raggedcount"].values)


[docs] def raggedvar_fromzarr(ds, raggedcount, var): """returns ragged ak.Array dims [time, ragged] for a variable "var" in zarr ds""" if isinstance(ds, str) or isinstance(ds, Path): ds = get_rawdataset(ds) return ak.unflatten(ds[var].values, raggedcount)
[docs] def var4d_fromzarr(ds, ntime, ndims, key): """' returns 4D variable with dims [time, y, x, z] from zarr dataset "ds" """ if isinstance(ds, str) or isinstance(ds, Path): ds = get_rawdataset(ds) reshape = [ntime] + list(ndims) return np.reshape(ds[key].values, reshape)
[docs] def var3d_fromzarr(ds, ndims, key): """' returns 3D variable with dims [y, x, z] from zarr dataset "ds" """ if isinstance(ds, str) or isinstance(ds, Path): ds = get_rawdataset(ds) return np.reshape(ds[key].values, ndims)
[docs] def get_thermodata(dataset, ntime, ndims, consts, getwinds=False): """returns a thermodynamic data in a dictionary. The value under each key is the thermodynamics data in a 2D array with dimensions [time, gridbox]. E.g. thermo["qvap"][:,0] gives the timeseries of qvap for the 0th gridbox. thermo["qvap][0] gives the qvap of all gridboxes at the 0th output time""" thermo = thermodata.Thermodata(dataset, ntime, ndims, consts) if getwinds: winds = thermodata.Winddata(dataset, ntime, ndims, consts) return thermo, winds else: return thermo
def get_supers(dataset, consts, is_print=False): return superdrops.Superdrops(dataset, consts, is_print=is_print) def get_time(dataset): return timedata.Time(dataset) def get_massmoms(dataset, ntime, ndims): return massmoms.MassMoms(dataset, ntime, ndims) def get_rainmassmoms(dataset, ntime, ndims): return massmoms.MassMoms(dataset, ntime, ndims, lab="_raindrops") def get_gbxindex(dataset, ndims): return var3d_fromzarr(dataset, ndims, "gbxindex") def get_totnsupers(dataset): if isinstance(dataset, str) or isinstance(dataset, Path): dataset = get_rawdataset(dataset) try: return dataset["totnsupers"].values except KeyError: return dataset["raggedcount"].values def get_nsupers(dataset, ntime, ndims): return var4d_fromzarr(dataset, ntime, ndims, "nsupers")
[docs] def surfaceprecip_estimate(dataset, gbxs): """use last radius of SDs before they leave the domain to estimate the volume of precipitation at each timestep. Values should be approx. equal to sum over gbxs (multiplied by area_gbx/area_domain) of logbook values for precip""" ds = get_rawdataset(dataset) sdId = ak.unflatten(ds["sdId"].values, ds["raggedcount"].values) radius = ak.unflatten(ds["radius"].values, ds["raggedcount"].values) xi = ak.unflatten(ds["xi"].values, ds["raggedcount"].values) r3sum = [] for ti in range(ds.time.shape[0] - 1): sd_ti, r_ti, xi_ti = sdId[ti], radius[ti], xi[ti] sds_gone = set(sd_ti) - set( sdId[ti + 1] ) # set of SD indexes that have left domain during timestep ti -> ti+1 isgone = np.where( np.isin(sd_ti, list(sds_gone)) ) # indexes in ragged arrays of SDs that leave during timestep ti -> ti+1 r3sum.append( np.dot(r_ti[isgone] ** 3, xi_ti[isgone]) ) # sum of (real) droplet radii^3 that left domain [microns^3] precipvol = ( 4 / 3 * np.pi * np.asarray(r3sum) / (1e18) ) # volume of water that left domain [m^3] domainy = np.amax(gbxs["yhalf"]) - np.amin(gbxs["yhalf"]) # [m] domainx = np.amax(gbxs["xhalf"]) - np.amin(gbxs["xhalf"]) # [m] deltat = np.diff(ds["time"].values) / 60 / 60 # [hrs] preciprate = precipvol * 1000 / (domainx * domainy) / deltat # [mm/hr] precipaccum = np.cumsum(preciprate * deltat) # [mm] preciprate = np.insert(preciprate, 0, 0) # at t=0, precip rate = 0 precipaccum = np.insert(precipaccum, 0, 0) # at t=0, accumulated precip = 0 return preciprate, precipaccum # [mm/hr] , [mm]