Source code for va_am.utils.functions

# Function of the heatwave index
from __future__ import annotations
import xarray as xr
import numpy as np
import pandas as pd
import datetime
from typing import Union


[docs]def hw_pctl(data : xr.Dataset, years : list[str], pctl: Union[int, float] = 90, window_size : int = 15) -> xr.Dataset: """ This function calculates the heatwave percentile threshold for a given dataset, years and pctl. Parameters ---------- data : xr.Dataset. Temperature Dataset where the percentile threshold should be performed (usually t2m temperature). years : str. Period to perform the percentile threshold, given the start and end year (usually [1981, 2010]). pctl : int or float. Value (integer or float) wich defines the percentile to perform (default 90). window_size: int Size of the rolling window. Returns ------- pctl_th_ds : xr.Dataset. A xr.Dataset with the percentile threshold (called pctl_th as variable) for the given period. """ # Remove February 29th data = data.sel(time=~((data.time.dt.month == 2) & (data.time.dt.day == 29))) # Selecting the period with buffer start_date = datetime.datetime(int(years[0]), 1, 1) - datetime.timedelta(days=window_size) end_date = datetime.datetime(int(years[1]), 12, 31) + datetime.timedelta(days=window_size) temp_data = data.sel(time=slice(start_date, end_date)) # Create day-of-year array doys = temp_data.time.dt.dayofyear.values years_arr = temp_data.time.dt.year.values # Initialize array for thresholds pctl_values = np.full(365, np.nan) # Vectorized calculation for each day of year for doy in range(1, 366): # Find all indices within ± window_size days of this DOY # We need to handle year boundaries properly mask = np.zeros_like(doys, dtype=bool) # For each year in reference period for year in range(int(years[0]), int(years[1]) + 1): # Calculate the window for this year year_mask = (years_arr == year) # Calculate day-of-year range (handle wrap-around) doy_min = doy - window_size doy_max = doy + window_size if doy_min < 1: # Window spans previous year mask |= (year_mask & ((doys >= doy_min + 365) | (doys <= doy_max))) elif doy_max > 365: # Window spans next year mask |= (year_mask & ((doys >= doy_min) | (doys <= doy_max - 365))) else: # Normal case mask |= (year_mask & (doys >= doy_min) & (doys <= doy_max)) # Extract values and calculate percentile window_values = temp_data.values[mask] window_values = window_values[~np.isnan(window_values)] if len(window_values) > 0: pctl_values[doy-1] = np.percentile(window_values, pctl) # Create DataArray pctl_th = xr.DataArray( pctl_values, dims=['dayofyear'], coords={'dayofyear': range(1, 366)} ) return pctl_th.to_dataset(name='pctl_th')
[docs]def isHW_in_ds(data : xr.Dataset, pctl_th : xr.Dataset, var_name : str = 't2m_dailyMax') -> xr.Dataset: """ This function add a new variable to the dataset with the boolean values of the heatwave index. Parameters ---------- data : xr.Dataset. Temperature Dataset where the identification of HW is performed (usually t2m temperature). pctl_th : xr.Dataset. Dataset with the corresponding threshold, based on percentile, that identifies the HW. Returns ------- data : xr.Dataset. A xr.Dataset with the boolean value (called isHW) thath indicate if HW does or does not exist. """ # Checking k if the data is a xarray dataset or a xarray dataarray if not isinstance(data, xr.core.dataset.Dataset): data = data.to_dataset() else: pass # Identifying the heatwave data['isHW'] = xr.where(data[var_name].groupby('time.dayofyear') > pctl_th.pctl_th, 1, 0) return data