Calculate the annual temperature timeseries from ERA5#
This notebook demonstrates how to retrieve, process, and plot the data displayed in the ERA Explorer
==> For further training materials, consider the Copernicus Climate Change Service (C3S) data tutorials
In this example we will be using earthkit to retrieve the data and standard Python packages to process and plot it.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import earthkit.data
If you want to run this code yourself, make sure to get a CDS API key from the CDS website.
Following the guidance there, you’ll then need to set up your local .cdsapirc to be able to authenticate with the CDS and download data. Then, you will be able to customise the date ranges, variables, and much more!
This is the latitude/longitude that we will extract the nearest grid cell from. You can change these to plot different locations. You can see the currently used ones in the ERA Explorer URL. In this example we are plotting data at the gridpoint closest to Brussels, Belgium.
lat = 50.86
lng = 4.35
This is the variable we are getting from the ERA dataset, and the time period
variable = "2m_temperature"
date_range = ["1940-01-01", "2100-12-31"]
For convenience, we make a function to handle the data retrieval:
def retrieve_data(variable, date_range, lat, lng):
# Define the dataset and request parameters
dataset = "reanalysis-era5-single-levels-timeseries"
request = {
"variable": [
variable, # Variable to retrieve
],
"date": date_range, # Date range for the data
"location": {"longitude": lng, "latitude": lat}, # Location coordinates
"data_format": "netcdf" # Format of the retrieved data
}
# Use "earthkit" to retrieve the data
ekds = earthkit.data.from_source(
"cds", dataset, request
).to_xarray()
return ekds
Get the data. This will download a NetCDF file
data = retrieve_data(variable, date_range, lat, lng)
And let’s make some bespoke functions to process the data. Each one is described with a “””docstring”””
def truncate_data(var):
"""
Truncate the input dataset to include only complete years where the final hour
of December 31st is present in the dataset.
Args:
var (xarray.Dataset or xarray.DataArray): Input dataset containing a 'valid_time'
coordinate with datetime information.
Returns:
xarray.Dataset or xarray.DataArray: The truncated dataset containing data
only for complete years.
"""
# Create a range of the final hours of each year in the dataset
start_year = var.valid_time.dt.year.min().item()
end_year = var.valid_time.dt.year.max().item()
final_hours = pd.date_range(f"{start_year}-12-31T23:00:00",
f"{end_year}-12-31T23:00:00",
freq="YE")
# Filter only the years where the final hour is in the dataset
valid_years = [dt.year for dt in final_hours if dt in var.valid_time]
# Select data for those years
var_truncated = var.sel(
valid_time=var.valid_time.dt.year.isin(valid_years)
)
return var_truncated
# Make a function to compute the annual mean temperature time series
def temperatureAnnualTimeseries():
"""
Processes temperature annual timeseries data.
This function reads temperature data from a NetCDF file, removes incomplete
years, resamples the data to annual means, and converts the temperature
values from Kelvin to Celsius.
Returns:
tuple: A tuple containing:
- years (numpy.ndarray): An array of years corresponding to the annual means.
- abs_values (numpy.ndarray): Array of annual mean temperatures in Celsius.
"""
data_t2m_pt = data.t2m
# Remove incomplete year
data_t2m_pt_trun = truncate_data(data_t2m_pt)
# Resample the data to annual means
data_t2m_pt_agg = data_t2m_pt_trun.resample(valid_time="YE").mean()
years = data_t2m_pt_agg.valid_time.to_index().year
abs_values = (data_t2m_pt_agg - 273.15).values # Convert from Kelvin to Celsius
# Get the actual lat/lon used
nearest_lat = data_t2m_pt.latitude.values
nearest_lng = data_t2m_pt.longitude.values
return years, abs_values, nearest_lat, nearest_lng
# Call our function
years1, ts1, nearest_lat, nearest_lng = temperatureAnnualTimeseries()
And finally, let’s set up the plot nicely. This can easily be customised, but for now we do something similar to ERA Explorer
# Define line width for the plot
lw = 2
# Define marker style and size for the data points
marker = 'o'
markersize = 4
# Determine suffix for latitude (N/S) and longitude (E/W) based on their sign
latSuffix = 'N' if nearest_lat > 0 else 'S' # 'N' for +ve latitude, 'S' for -ve latitude
lngSuffix = 'E' if nearest_lng > 0 else 'W' # 'E' for +ve longitude, 'W' for -ve longitude
# Create a new figure for the plot with a specified size
plt.figure(figsize=(7, 4))
# Plot the temperature data with customization
# - `years1` represents the x-axis (years)
# - `ts1` represents the y-axis (temperature)
# - Customize marker, color, line width, and label (latitude/longitude info)
plt.plot(
years1, ts1,
marker=marker, # Marker style for data points
markersize=markersize, # Marker size
label=f'{abs(nearest_lat):.2f} °{latSuffix:s}, {abs(nearest_lng):.2f} °{lngSuffix:s}',
color='#CF7E92', # Line color
lw=lw, # Line width
clip_on=False # Prevent clipping of elements outside the axes
)
# Add a legend with a transparent background
plt.legend(framealpha=0)
# Customize the x-axis ticks:
# - Show ticks every 5 years, starting from 1940 to 2095
# - Rotate the tick labels vertically for better readability
plt.xticks(np.arange(1940, 2100, 5), rotation=90)
# Set x-axis limits to match the range of the data
plt.xlim(years1[0], years1[-1])
# Add minor ticks on the x-axis to represent each year
plt.gca().xaxis.set_minor_locator(MultipleLocator(1))
# Label the x-axis and y-axis with appropriate titles and font size
plt.xlabel('Year', fontsize=12) # X-axis: Year
plt.ylabel('Temperature [°C]', fontsize=12) # Y-axis: Temperature in Celsius
# Add a title to the plot with a larger font size
plt.title('Annual mean temperature from ERA5', fontsize=14)
# Adjust the layout to ensure elements fit without overlapping
plt.tight_layout()
# Display the plot
plt.show()