Source code for pvlib.iotools.era5

import requests
import pandas as pd
from io import BytesIO, StringIO
import zipfile
import time


VARIABLE_MAP = {
    # short names
    'd2m': 'temp_dew',
    't2m': 'temp_air',
    'sp': 'pressure',
    'ssrd': 'ghi',
    'tp': 'precipitation',

    # long names
    '2m_dewpoint_temperature': 'temp_dew',
    '2m_temperature': 'temp_air',
    'surface_pressure': 'pressure',
    'surface_solar_radiation_downwards': 'ghi',
    'total_precipitation': 'precipitation',
}


def _same(x):
    return x


def _k_to_c(temp_k):
    return temp_k - 273.15


def _j_to_w(j):
    return j / 3600


def _m_to_cm(m):
    return m / 100


UNITS = {
    'u100': _same,
    'v100': _same,
    'u10': _same,
    'v10': _same,
    'd2m': _k_to_c,
    't2m': _k_to_c,
    'msl': _same,
    'sst': _k_to_c,
    'skt': _k_to_c,
    'sp': _same,
    'ssrd': _j_to_w,
    'strd': _j_to_w,
    'tp': _m_to_cm,
}



[docs]
def get_era5(latitude, longitude, start, end, variables, api_key,
             map_variables=True, timeout=60,
             url='https://cds.climate.copernicus.eu/api/retrieve/v1/'):
    """
    Retrieve ERA5 reanalysis data from the ECMWF's Copernicus Data Store.

    A CDS API key is needed to access this API.  Register for one at [1]_.

    This API [2]_ provides a subset of the full ERA5 dataset.  See [3]_ for
    the available variables.  Data are available on a 0.25° x 0.25° grid.

    Parameters
    ----------
    latitude : float
        In decimal degrees, north is positive (ISO 19115).
    longitude: float
        In decimal degrees, east is positive (ISO 19115).
    start : datetime like or str
        First day of the requested period.  Assumed to be UTC if not localized.
    end : datetime like or str
        Last day of the requested period.  Assumed to be UTC if not localized.
    variables : list of str
        List of variable names to retrieve, for example
        ``['ghi', 'temp_air']``. Both pvlib and ERA5 names can be used.
        See [1]_ for additional options.
    api_key : str
        ECMWF CDS API key.
    map_variables : bool, default True
        When true, renames columns of the DataFrame to pvlib variable names
        where applicable. Also converts units of some variables. See variable
        :const:`VARIABLE_MAP` and :const:`UNITS`.
    timeout : int, default 60
        Number of seconds to wait for the requested data to become available
        before timeout.
    url : str, optional
        API endpoint URL.

    Raises
    ------
    Exception
        If ``timeout`` is reached without the job finishing.

    Returns
    -------
    data : pd.DataFrame
        Time series data. The index corresponds to the start of the interval.
    meta : dict
        Metadata.

    References
    ----------
    .. [1] https://cds.climate.copernicus.eu/
    .. [2] https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-timeseries?tab=overview
    .. [3] https://confluence.ecmwf.int/pages/viewpage.action?pageId=505390919
    """  # noqa: E501

    def _to_utc_dt_notz(dt):
        dt = pd.to_datetime(dt)
        if dt.tzinfo is not None:
            dt = dt.tz_convert("UTC")
        return dt

    start = _to_utc_dt_notz(start).strftime("%Y-%m-%d")
    end = _to_utc_dt_notz(end).strftime("%Y-%m-%d")

    headers = {'PRIVATE-TOKEN': api_key}

    # allow variables to be specified with pvlib names
    reverse_map = {v: k for k, v in VARIABLE_MAP.items()}
    variables = [reverse_map.get(k, k) for k in variables]

    # Step 1: submit data request (add it to the queue)
    params = {
        "inputs": {
            "variable": variables,
            "location": {"longitude": longitude, "latitude": latitude},
            "date": [f"{start}/{end}"],
            "data_format": "csv"
        }
    }
    slug = "processes/reanalysis-era5-single-levels-timeseries/execution"
    response = requests.post(url + slug, json=params, headers=headers,
                             timeout=timeout)
    submission_response = response.json()
    if not response.ok:
        raise Exception(submission_response)  # likely need to accept license

    job_id = submission_response['jobID']

    # Step 2: poll until the data request is ready
    slug = "jobs/" + job_id
    poll_interval = 1
    num_polls = 0
    while True:
        response = requests.get(url + slug, headers=headers, timeout=timeout)
        poll_response = response.json()
        job_status = poll_response['status']

        if job_status == 'successful':
            break  # ready to proceed to next step
        elif job_status == 'failed':
            msg = (
                'Request failed. Please check the ECMWF website for details: '
                'https://cds.climate.copernicus.eu/requests?tab=all'
            )
            raise Exception(msg)

        num_polls += 1
        if num_polls * poll_interval > timeout:
            raise requests.exceptions.Timeout(
                'Request timed out. Try increasing the timeout parameter or '
                'reducing the request size.'
            )

        time.sleep(1)

    # Step 3: get the download link for our requested dataset
    slug = "jobs/" + job_id + "/results"
    response = requests.get(url + slug, headers=headers, timeout=timeout)
    results_response = response.json()
    download_url = results_response['asset']['value']['href']

    # Step 4: finally, download our dataset.  it's a zipfile of one CSV
    response = requests.get(download_url, timeout=timeout)
    zipbuffer = BytesIO(response.content)
    archive = zipfile.ZipFile(zipbuffer)
    filename = archive.filelist[0].filename
    csvbuffer = StringIO(archive.read(filename).decode('utf-8'))
    df = pd.read_csv(csvbuffer)

    # and parse into the usual formats
    metadata = submission_response['metadata']  # include messages from ECMWF
    metadata['jobID'] = job_id
    if not df.empty:
        metadata['latitude'] = df['latitude'].values[0]
        metadata['longitude'] = df['longitude'].values[0]

    df.index = pd.to_datetime(df['valid_time']).dt.tz_localize('UTC')
    df = df.drop(columns=['valid_time', 'latitude', 'longitude'])

    if map_variables:
        # convert units and rename
        for shortname in df.columns:
            converter = UNITS.get(shortname, _same)
            df[shortname] = converter(df[shortname])
        df = df.rename(columns=VARIABLE_MAP)

    return df, metadata