Calculate the monthly climatological humidity from ERA5

Calculate the monthly climatological humidity from ERA5#

This notebook demonstrates how to retrieve, process, and plot the data displayed in the ERA Explorer

==> For further training materials, consider the Copernicus Climate Change Service (C3S) data tutorials

In this example we will be using earthkit to retrieve the data and standard Python packages to process and plot it.

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import calendar
import earthkit.data

If you want to run this code yourself, make sure to get a CDS API key from the CDS website.

Following the guidance there, you’ll then need to set up your local .cdsapirc to be able to authenticate with the CDS and download data. Then, you will be able to customise the date ranges, variables, and much more!

This is the latitude/longitude that we will extract the nearest grid cell from. You can change these to plot different locations. You can see the currently used ones in the ERA Explorer URL. In this example we are plotting data at the gridpoint closest to Brussels, Belgium.

lat = 50.86
lng = 4.35

This is the variable we are getting from the ERA dataset, and the time period

variable = "2m_temperature"
date_range = ["1991-01-01", "2020-12-31"]

To calculate relative humidity we need dewpoint temperature as well

variable2 = "2m_dewpoint_temperature"

For convenience, we make a function to handle the data retrieval:

def retrieve_data(variable, date_range, lat, lng):
    # Define the dataset and request parameters
    dataset = "reanalysis-era5-single-levels-timeseries"
    request = {
        "variable": [
        variable,  # Variable to retrieve
        ],
        "date": date_range,  # Date range for the data
        "location": {"longitude": lng, "latitude": lat},  # Location coordinates
        "data_format": "netcdf"  # Format of the retrieved data
    }

    # Use "earthkit" to retrieve the data
    ekds = earthkit.data.from_source(
        "cds", dataset, request
    ).to_xarray()

    return ekds

Now let’s download the data!

data = retrieve_data(variable, date_range, lat, lng)

To calculate relative humidity we need dewpoint temperature as well

data2 = retrieve_data(variable2, date_range, lat, lng)

And let’s make some bespoke functions to process the data. Each one is described with a “””docstring”””

# Define a function to calculate relative humidity
def calculate_rh(data_d2m_pt, data_t2m_pt, method):
    """
    Calculate relative humidity using specified method.
    Parameters:
    data_d2m_pt (numpy.ndarray): Dew point temperature data in Kelvin.
    data_t2m_pt (numpy.ndarray): Air temperature data in Kelvin.
    method (str): Method to use for calculation.
    Options are "August-Roche-Magnus" or "Clausius-Clapeyron".
    Returns:
    numpy.ndarray: Calculated relative humidity as a percentage.
    Raises:
    ValueError: If an invalid method is specified.
    """

    # Convert temperatures from Kelvin to Celsius
    data_t2m_pt -= 273.15
    data_d2m_pt -= 273.15

    if (method == "August-Roche-Magnus"):
        # Values are calculated using the August-Roche-Magnus approximation.
        # This is the method used in the ERA Explorer.
        numerator = np.exp((17.625 * (data_d2m_pt)) / (243.04 + (data_d2m_pt)))
        denominator = np.exp((17.625 * (data_t2m_pt)) / (243.04 + (data_t2m_pt)))
        rh = (numerator * 100)/denominator
    elif (method == "Clausius-Clapeyron"):
        # Values are calculated using the Clausius-Clapeyron relation.
        numerator = 6.11 * 10 ^ (7.5 * ((data_d2m_pt)/(237.7 + data_d2m_pt)))
        denominator = 6.11 * 10 ^ (7.5 * ((data_t2m_pt)/(237.7 + data_t2m_pt)))
        rh = (numerator * 100)/denominator
    else:
        raise ValueError("Invalid method specified")

    return rh

# Make a function to compute the relative humidity monthly climatology
def humidityMonthlyClimatology():
    """
    Calculate the monthly climatology of relative humidity.
    This function reads temperature and dew point temperature data from NetCDF files,
    calculates the relative humidity using the specified method, and then computes
    the monthly climatology of daily minimum and maximum relative humidity.
    Returns:
        pandas.DataFrame: A DataFrame containing the monthly climatological averages
                          for daily minimum and maximum relative humidity.
    """
 
    data_t2m_pt = data.t2m
    data_d2m_pt = data2.d2m

    # Specify the method for calculating relative humidity
    method = "August-Roche-Magnus"

    # Calculate relative humidity
    data_rh_pt = calculate_rh(data_d2m_pt, data_t2m_pt, method)

    # Convert the time coordinate to a pandas datetime index
    time_index = pd.to_datetime(data_rh_pt.valid_time.values)
    
    # Create a DataFrame for easier manipulation
    df = pd.DataFrame(data_rh_pt.values, index=time_index, columns=['rh'])
    
    # Resample to find daily minimum and maximum
    daily_min = df.resample('D').min()
    daily_max = df.resample('D').max()
    
    # Combine the daily min and max into a single DataFrame
    daily_stats = pd.DataFrame({
        'daily_min': daily_min['rh'],
        'daily_max': daily_max['rh']
    })

    # Extract the month from DateTimeIndex
    daily_stats['month'] = daily_stats.index.month
    
    # Group by month and calculate climatological averages for daily min and max
    grouped_by_month = daily_stats.groupby('month')
    monthly_climatology = grouped_by_month.mean()

    # Get the actual lat/lon used
    nearest_lat = data_t2m_pt.latitude.values
    nearest_lng = data_t2m_pt.longitude.values
    
    return monthly_climatology, nearest_lat, nearest_lng
    
# Call our function
clim, nearest_lat, nearest_lng = humidityMonthlyClimatology()

And finally, let’s set up the plot nicely. This can easily be customised, but for now we do something similar to ERA Explorer

# Determine suffix for latitude (N/S) and longitude (E/W) based on their sign
latSuffix = 'N' if nearest_lat > 0 else 'S'  # 'N' for positive, 'S' for negative latitude
lngSuffix = 'E' if nearest_lng > 0 else 'W'  # 'E' for positive, 'W' for negative longitude

# Create a new figure with a specified size (8 inches by 6 inches)
plt.figure(figsize=(8, 6))

# Plot data as a bar chart:
bars = plt.bar(
    clim.index,                      # X-axis values (months)
    clim['daily_max'] - clim['daily_min'],  # Bar heights (range of humidity)
    bottom=clim['daily_min'],        # Starting position of bars
    width=0.8,                       # Width of bars
    color='#76787B',                 # Bar fill color (grayish tone)
    edgecolor='white',               # Edge color of bars
    label=f'{abs(nearest_lat):.2f} °{latSuffix}, {abs(nearest_lng):.2f} °{lngSuffix}'  # Legend label
)

# Add labels to each bar
for index, bar in enumerate(bars):
    daily_min = clim['daily_min'].iloc[index]
    daily_max = clim['daily_max'].iloc[index]
    x = bar.get_x() + bar.get_width() / 2  # Center position of each bar

    # Minimum relative humidity label
    plt.text(
        x, bar.get_y() + 0.5, f'{daily_min:.0f}', ha='center', va='bottom',
        fontsize=10, color='black'
    )

    # Maximum relative humidity label
    plt.text(
        x, bar.get_height() + bar.get_y() - 1, f'{daily_max:.0f}', 
        ha='center', va='top', fontsize=10, color='black'
    )

# Add legend with transparent background
plt.legend(framealpha=0)

# Customize axis labels
plt.xlabel('Month', fontsize=12)
plt.ylabel('Relative humidity (min to max) [%]', fontsize=12)

# Set y-axis limits and x-ticks
plt.ylim(0, 100)
plt.xticks(
    ticks=clim.index, 
    labels=[calendar.month_abbr[i] for i in clim.index]
)

# Add title with dynamic dataset information
plt.title(
    f'Monthly Relative Humidity Climatologies from ERA5 '
    f'({date_range[0][:4]}-{date_range[1][:4]})', 
    fontsize=14
)

# Adjust layout and display the plot
plt.tight_layout()
plt.show()
../../_images/73fb2f3d428e7639371265ee6f4969b9a66fc2db96ebc09515ea978eee67ac40.png