cinei.download

Download CEDS v_2021_04_21 gridded emission data from PNNL DataHub.

Parameters

save_dir : str Directory to save downloaded and extracted files. species : list of str, optional Species to extract. Case-insensitive. e.g. ['CO', 'NOx'] or ['co', 'nox'] or ['CO', 'NOX'] If None, all species are extracted. Available: SO2, NOx, CO, BC, OC, NH3, NMVOC, CO2, CH4, N2O, PM2.5, PM10 keep_tar : bool, optional If True, keep the .tar file after extraction. Default False.

Returns

list of str Paths to extracted NetCDF files.

Examples

import cinei files = cinei.download_ceds( ... save_dir='/work/bb1554/data/CEDS', ... species=['CO', 'NOx'] # case-insensitive: 'co','nox' also works ... )

Source code in cinei/download.py

def download_ceds(save_dir, species=None, keep_tar=False):
    """
    Download CEDS v_2021_04_21 gridded emission data from PNNL DataHub.

    Parameters
    ----------
    save_dir : str
        Directory to save downloaded and extracted files.
    species : list of str, optional
        Species to extract. Case-insensitive.
        e.g. ['CO', 'NOx'] or ['co', 'nox'] or ['CO', 'NOX']
        If None, all species are extracted.
        Available: SO2, NOx, CO, BC, OC, NH3, NMVOC, CO2, CH4, N2O, PM2.5, PM10
    keep_tar : bool, optional
        If True, keep the .tar file after extraction. Default False.

    Returns
    -------
    list of str
        Paths to extracted NetCDF files.

    Examples
    --------
    >>> import cinei
    >>> files = cinei.download_ceds(
    ...     save_dir='/work/bb1554/data/CEDS',
    ...     species=['CO', 'NOx']   # case-insensitive: 'co','nox' also works
    ... )
    """
    save_dir = Path(save_dir)
    save_dir.mkdir(parents=True, exist_ok=True)

    tar_path = save_dir / "CEDS_v_2021_04_21.tar"

    # ── Validate species input ─────────────────────────────────────────
    if species is not None:
        species = _normalize_species(species)

    print(f"[CINEI] CEDS v_2021_04_21 Download")
    print(f"[CINEI] Source  : {CEDS_REGISTRY['doi']}")
    print(f"[CINEI] Save to : {save_dir}")
    print(f"[CINEI] Species : {species if species else 'ALL'}")
    print()

    # ── Step 1: Download tar (with resume) ────────────────────────────
    _download_with_resume(CEDS_REGISTRY["url"], tar_path)

    # ── Step 2: Extract selected species ──────────────────────────────
    extracted = _extract_species(tar_path, save_dir, species)

    # ── Step 3: Cleanup tar ───────────────────────────────────────────
    if not keep_tar and tar_path.exists():
        os.remove(tar_path)
        print(f"[CINEI] Removed tar file: {tar_path.name}")

    print(f"\n[CINEI] ✅ Done! {len(extracted)} file(s) saved to {save_dir}")
    for f in extracted:
        print(f"         {Path(f).name}")
    return extracted

Print available CEDS species and their accepted name variants.

Source code in cinei/download.py

def list_ceds_species():
    """Print available CEDS species and their accepted name variants."""
    print("[CINEI] Available CEDS species (case-insensitive):")
    for canonical, variants in SPECIES_VARIANTS.items():
        clean = [v.strip('_') for v in variants]
        print(f"  {canonical:<8} →  accepted input: {clean}")

Download MEIC v1.4 sample data (2017) from Zenodo.

This provides two sample months (January and July 2017) in speciated NetCDF format, suitable for testing CINEI workflows. For the full multi-year MEIC dataset, use get_meic_info().

Parameters

save_dir : str Directory to save downloaded files. months : list of str, optional Which months to download. Options: ['jan', 'jul', 'sectoral'] Default: ['jan', 'jul'] (both sample months) extract : bool, optional If True, automatically unzip downloaded files. Default True. keep_zip : bool, optional If True, keep .zip files after extraction. Default False.

Returns

list of str Paths to downloaded (and extracted) files.

Examples

import cinei

Download both sample months

cinei.download_meic_sample(save_dir='/work/bb1554/data/MEIC')

Download only January

cinei.download_meic_sample( ... save_dir='/work/bb1554/data/MEIC', ... months=['jan'] ... )

Download sectoral totals only

cinei.download_meic_sample( ... save_dir='/work/bb1554/data/MEIC', ... months=['sectoral'] ... )

Source code in cinei/download.py

def download_meic_sample(save_dir, months=None, extract=True, keep_zip=False):
    """
    Download MEIC v1.4 sample data (2017) from Zenodo.

    This provides two sample months (January and July 2017) in speciated
    NetCDF format, suitable for testing CINEI workflows.
    For the full multi-year MEIC dataset, use get_meic_info().

    Parameters
    ----------
    save_dir : str
        Directory to save downloaded files.
    months : list of str, optional
        Which months to download. Options: ['jan', 'jul', 'sectoral']
        Default: ['jan', 'jul'] (both sample months)
    extract : bool, optional
        If True, automatically unzip downloaded files. Default True.
    keep_zip : bool, optional
        If True, keep .zip files after extraction. Default False.

    Returns
    -------
    list of str
        Paths to downloaded (and extracted) files.

    Examples
    --------
    >>> import cinei
    >>> # Download both sample months
    >>> cinei.download_meic_sample(save_dir='/work/bb1554/data/MEIC')

    >>> # Download only January
    >>> cinei.download_meic_sample(
    ...     save_dir='/work/bb1554/data/MEIC',
    ...     months=['jan']
    ... )

    >>> # Download sectoral totals only
    >>> cinei.download_meic_sample(
    ...     save_dir='/work/bb1554/data/MEIC',
    ...     months=['sectoral']
    ... )
    """
    save_dir = Path(save_dir)
    save_dir.mkdir(parents=True, exist_ok=True)

    # ── Default months ─────────────────────────────────────────────────
    if months is None:
        months = ["jan", "jul"]

    # ── Validate months ────────────────────────────────────────────────
    valid = list(MEIC_REGISTRY["files"].keys())
    invalid = [m for m in months if m.lower() not in valid]
    if invalid:
        raise ValueError(
            f"[CINEI] Unrecognized month keys: {invalid}\n"
            f"        Available: {valid}"
        )

    print(f"[CINEI] MEIC v1.4 Sample Data Download")
    print(f"[CINEI] Source  : {MEIC_REGISTRY['doi']}")
    print(f"[CINEI] Save to : {save_dir}")
    print(f"[CINEI] Months  : {months}")
    print(f"[CINEI] Species : {MEIC_REGISTRY['species']}")
    print()

    downloaded = []
    for key in months:
        info = MEIC_REGISTRY["files"][key.lower()]
        zip_path = save_dir / info["name"]

        print(f"[CINEI] → {info['month']}  ({info['size']})")

        # ── Download ───────────────────────────────────────────────────
        _download_with_resume(info["url"], zip_path)

        # ── MD5 check ─────────────────────────────────────────────────
        actual_md5 = _md5(zip_path)
        if actual_md5 == info["md5"]:
            print(f"[CINEI]   ✅ MD5 verified")
        else:
            print(f"[CINEI]   ⚠️  MD5 mismatch! File may be corrupted.")
            print(f"[CINEI]      Expected : {info['md5']}")
            print(f"[CINEI]      Got      : {actual_md5}")

        # ── Extract ────────────────────────────────────────────────────
        if extract:
            out_subdir = save_dir / info["name"].replace(".zip", "")
            out_subdir.mkdir(exist_ok=True)
            import zipfile
            with zipfile.ZipFile(zip_path, "r") as z:
                z.extractall(out_subdir)
            print(f"[CINEI]   📂 Extracted to: {out_subdir}")
            downloaded.append(str(out_subdir))

            if not keep_zip:
                os.remove(zip_path)
        else:
            downloaded.append(str(zip_path))

    print(f"\n[CINEI] ✅ Done! Files saved to {save_dir}")
    print(f"\n[CINEI] Citation:")
    print(f"  {MEIC_REGISTRY['citation']}")
    return downloaded

Print information and instructions for downloading the full MEIC dataset.

The full MEIC dataset requires registration at the official website. This function prints step-by-step instructions.

Examples

import cinei cinei.get_meic_info()

Source code in cinei/download.py

def get_meic_info():
    """
    Print information and instructions for downloading the full MEIC dataset.

    The full MEIC dataset requires registration at the official website.
    This function prints step-by-step instructions.

    Examples
    --------
    >>> import cinei
    >>> cinei.get_meic_info()
    """
    print("=" * 65)
    print("  MEIC — Multi-resolution Emission Inventory for China")
    print("=" * 65)
    print()
    print("  Sample data (2017 Jan & Jul) — publicly available:")
    print(f"  {MEIC_REGISTRY['doi']}")
    print()
    print("  Full dataset — registration required:")
    print(f"  {MEIC_REGISTRY['full_data_url']}")
    print()
    print("  Steps to download full MEIC data:")
    print("  1. Visit the URL above")
    print("  2. Register for an account")
    print("  3. Select species: SO2, NOx, CO, BC, OC, NH3, NMVOC, PM2.5, PM10")
    print("  4. Select sectors: agriculture, industry, power, residential,")
    print("                     transportation")
    print("  5. Download monthly NetCDF files for your target year")
    print("  6. Place files in your MEIC directory, e.g.:")
    print("       /work/bb1554/data/MEIC/2017/")
    print()
    print("  Expected filename pattern after download:")
    print("    *_{month}_*_{species}.nc")
    print("    e.g. agr_Jan_2017_SO2.nc, ind_Jan_2017_NOx.nc")
    print()
    print("  Species available: " + ", ".join(MEIC_REGISTRY["species"]))
    print()
    print("  Citation:")
    print(f"    {MEIC_REGISTRY['citation']}")
    print("=" * 65)

List expected MEIC filenames for given year, species, months, sectors.

Useful to verify your downloaded MEIC files match the expected naming.

Parameters

year : int or str Target year, e.g. 2017 species : list of str, optional Species list, e.g. ['NOx', 'SO2']. Default: all species. Case-insensitive. Available: NOx, SO2, CO, BC, OC, NH3, PM2.5, PM10 months : list of int, optional Month numbers 1-12. Default: all 12 months. e.g. [1, 7] for January and July only. sectors : list of str, optional Sector names. Default: all 5 sectors. Available: agriculture, industry, power, residential, transportation

Returns

list of str Expected MEIC filenames.

Examples

import cinei

List all expected files for 2017 NOx, January only

cinei.list_meic_filenames(2017, species=['NOx'], months=[1]) ['2017_01_agriculture_NOx.nc', '2017_01_industry_NOx.nc', '2017_01_power_NOx.nc', '2017_01_residential_NOx.nc', '2017_01_transportation_NOx.nc']

Source code in cinei/download.py

def list_meic_filenames(year, species=None, months=None, sectors=None):
    """
    List expected MEIC filenames for given year, species, months, sectors.

    Useful to verify your downloaded MEIC files match the expected naming.

    Parameters
    ----------
    year : int or str
        Target year, e.g. 2017
    species : list of str, optional
        Species list, e.g. ['NOx', 'SO2']. Default: all species.
        Case-insensitive. Available: NOx, SO2, CO, BC, OC, NH3, PM2.5, PM10
    months : list of int, optional
        Month numbers 1-12. Default: all 12 months.
        e.g. [1, 7] for January and July only.
    sectors : list of str, optional
        Sector names. Default: all 5 sectors.
        Available: agriculture, industry, power, residential, transportation

    Returns
    -------
    list of str
        Expected MEIC filenames.

    Examples
    --------
    >>> import cinei
    >>> # List all expected files for 2017 NOx, January only
    >>> cinei.list_meic_filenames(2017, species=['NOx'], months=[1])
    ['2017_01_agriculture_NOx.nc',
     '2017_01_industry_NOx.nc',
     '2017_01_power_NOx.nc',
     '2017_01_residential_NOx.nc',
     '2017_01_transportation_NOx.nc']
    """
    if species is None:
        sp_keys = list(MEIC_SPECIES_FILENAME.keys())
    else:
        sp_keys = _normalize_meic_species(species)

    if months is None:
        months = list(range(1, 13))

    if sectors is None:
        sectors = MEIC_SECTORS

    # Validate months
    invalid_months = [m for m in months if m not in range(1, 13)]
    if invalid_months:
        raise ValueError(
            f"[CINEI] Invalid month numbers: {invalid_months}\n"
            f"        Expected integers 1-12."
        )

    # Validate sectors
    invalid_sectors = [s for s in sectors if s not in MEIC_SECTORS]
    if invalid_sectors:
        raise ValueError(
            f"[CINEI] Invalid sectors: {invalid_sectors}\n"
            f"        Available: {MEIC_SECTORS}"
        )

    filenames = []
    for sp_key in sp_keys:
        sp_str = MEIC_SPECIES_FILENAME[sp_key]
        for mon in sorted(months):
            mon_str = MEIC_MONTHS[mon]
            for sector in sectors:
                filenames.append(f"{year}_{mon_str}_{sector}_{sp_str}.nc")

    return filenames

Check which expected MEIC files are present or missing in a directory.

Parameters

meic_dir : str Path to directory containing MEIC NetCDF files. year : int or str Target year, e.g. 2017 species : list of str, optional Species to check. Default: all species. months : list of int, optional Months 1-12 to check. Default: all 12 months. sectors : list of str, optional Sectors to check. Default: all 5 sectors.

Returns

dict with keys 'found', 'missing' Each value is a list of filenames.

Examples

import cinei result = cinei.check_meic_files( ... meic_dir='/work/bb1554/data/MEIC/2017', ... year=2017, ... species=['NOx', 'SO2'], ... months=[1, 7] ... ) print(result['missing'])

Source code in cinei/download.py

def check_meic_files(meic_dir, year, species=None, months=None, sectors=None):
    """
    Check which expected MEIC files are present or missing in a directory.

    Parameters
    ----------
    meic_dir : str
        Path to directory containing MEIC NetCDF files.
    year : int or str
        Target year, e.g. 2017
    species : list of str, optional
        Species to check. Default: all species.
    months : list of int, optional
        Months 1-12 to check. Default: all 12 months.
    sectors : list of str, optional
        Sectors to check. Default: all 5 sectors.

    Returns
    -------
    dict with keys 'found', 'missing'
        Each value is a list of filenames.

    Examples
    --------
    >>> import cinei
    >>> result = cinei.check_meic_files(
    ...     meic_dir='/work/bb1554/data/MEIC/2017',
    ...     year=2017,
    ...     species=['NOx', 'SO2'],
    ...     months=[1, 7]
    ... )
    >>> print(result['missing'])
    """
    meic_dir = Path(meic_dir)
    if not meic_dir.exists():
        raise FileNotFoundError(
            f"[CINEI] MEIC directory not found: {meic_dir}"
        )

    expected = list_meic_filenames(year, species, months, sectors)
    found    = [f for f in expected if (meic_dir / f).exists()]
    missing  = [f for f in expected if not (meic_dir / f).exists()]

    print(f"[CINEI] MEIC file check: {meic_dir}")
    print(f"[CINEI] Year    : {year}")
    print(f"[CINEI] Species : {species if species else 'ALL'}")
    print(f"[CINEI] Months  : {months if months else 'ALL (1-12)'}")
    print(f"[CINEI] Sectors : {sectors if sectors else 'ALL'}")
    print()
    print(f"[CINEI] ✅ Found  : {len(found):>3} / {len(expected)} files")
    print(f"[CINEI] ❌ Missing: {len(missing):>3} / {len(expected)} files")

    if missing:
        print(f"\n[CINEI] Missing files:")
        for f in missing:
            print(f"          {f}")

    return {"found": found, "missing": missing}

Download HTAP v3 gridded emission data from Zenodo.

Coverage: 2000-2018, monthly, 9 species, 16 sectors. Each NetCDF file contains all 12 months and all sectors for one year.

Parameters

save_dir : str Directory to save downloaded files. species : list of str, optional Species to download. Case-insensitive. e.g. ['NOx', 'SO2'] or ['nox', 'so2'] or ['NOX', 'PM2.5'] Default: all 9 species. Available: BC, CO, NH3, NMVOC, NOx, OC, PM10, PM2.5, SO2 resolution : str, optional Spatial resolution. Options: - '05x05' : 0.5° x 0.5° (~500-800 MB per species) [default] - '01x01' : 0.1° x 0.1° (~8-13 GB per species) data_type : str, optional Data type. Options: - 'emissions' : Mg/month [default] - 'fluxes' : kg/m2/s extract : bool, optional If True, automatically unzip after download. Default True. keep_zip : bool, optional If True, keep .zip files after extraction. Default False.

Returns

list of str Paths to downloaded (and extracted) files/directories.

Examples

import cinei

Download NOx and SO2 at 0.5° resolution (recommended)

cinei.download_htap( ... save_dir='/work/bb1554/data/HTAP', ... species=['NOx', 'SO2'], ... resolution='05x05' ... )

Download all species at 0.1° (warning: very large ~90 GB)

cinei.download_htap( ... save_dir='/work/bb1554/data/HTAP', ... resolution='01x01' ... )

Source code in cinei/download.py

def download_htap(save_dir, species=None, resolution="05x05",
                  data_type="emissions", extract=True, keep_zip=False):
    """
    Download HTAP v3 gridded emission data from Zenodo.

    Coverage: 2000-2018, monthly, 9 species, 16 sectors.
    Each NetCDF file contains all 12 months and all sectors for one year.

    Parameters
    ----------
    save_dir : str
        Directory to save downloaded files.
    species : list of str, optional
        Species to download. Case-insensitive.
        e.g. ['NOx', 'SO2'] or ['nox', 'so2'] or ['NOX', 'PM2.5']
        Default: all 9 species.
        Available: BC, CO, NH3, NMVOC, NOx, OC, PM10, PM2.5, SO2
    resolution : str, optional
        Spatial resolution. Options:
        - '05x05' : 0.5° x 0.5° (~500-800 MB per species) [default]
        - '01x01' : 0.1° x 0.1° (~8-13 GB per species)
    data_type : str, optional
        Data type. Options:
        - 'emissions' : Mg/month  [default]
        - 'fluxes'    : kg/m2/s
    extract : bool, optional
        If True, automatically unzip after download. Default True.
    keep_zip : bool, optional
        If True, keep .zip files after extraction. Default False.

    Returns
    -------
    list of str
        Paths to downloaded (and extracted) files/directories.

    Examples
    --------
    >>> import cinei
    >>> # Download NOx and SO2 at 0.5° resolution (recommended)
    >>> cinei.download_htap(
    ...     save_dir='/work/bb1554/data/HTAP',
    ...     species=['NOx', 'SO2'],
    ...     resolution='05x05'
    ... )

    >>> # Download all species at 0.1° (warning: very large ~90 GB)
    >>> cinei.download_htap(
    ...     save_dir='/work/bb1554/data/HTAP',
    ...     resolution='01x01'
    ... )
    """
    save_dir = Path(save_dir)
    save_dir.mkdir(parents=True, exist_ok=True)

    # ── Validate resolution ────────────────────────────────────────────
    if resolution not in HTAP_REGISTRY["resolutions"]:
        raise ValueError(
            f"[CINEI] Invalid resolution: '{resolution}'\n"
            f"        Available: {HTAP_REGISTRY['resolutions']}\n"
            f"        Tip: use '05x05' (0.5°) to save disk space."
        )

    # ── Validate data_type ─────────────────────────────────────────────
    if data_type not in HTAP_REGISTRY["types"]:
        raise ValueError(
            f"[CINEI] Invalid data_type: '{data_type}'\n"
            f"        Available: {HTAP_REGISTRY['types']}"
        )

    # ── Normalize species ──────────────────────────────────────────────
    if species is None:
        sp_keys = list(HTAP_SPECIES_FILENAME.keys())
    else:
        sp_keys = _normalize_htap_species(species)

    # ── Estimate total size ────────────────────────────────────────────
    total_info = [
        HTAP_REGISTRY["files"][(resolution, data_type, HTAP_SPECIES_FILENAME[k])]
        for k in sp_keys
    ]

    print(f"[CINEI] HTAP v3 Download")
    print(f"[CINEI] Source     : {HTAP_REGISTRY['doi']}")
    print(f"[CINEI] Save to    : {save_dir}")
    print(f"[CINEI] Resolution : {resolution.replace('x', '° x ')}°")
    print(f"[CINEI] Data type  : {data_type} "
          f"({'Mg/month' if data_type == 'emissions' else 'kg/m²/s'})")
    print(f"[CINEI] Species    : {[HTAP_SPECIES_FILENAME[k] for k in sp_keys]}")
    print(f"[CINEI] File sizes : "
          f"{', '.join(i['size'] for i in total_info)}")
    print()

    downloaded = []
    for sp_key in sp_keys:
        sp_str  = HTAP_SPECIES_FILENAME[sp_key]
        fname   = f"gridmaps_{resolution}_{data_type}_{sp_str}.zip"
        url     = (f"https://zenodo.org/records/7516361/files/"
                   f"{fname}?download=1")
        zip_path = save_dir / fname
        info    = HTAP_REGISTRY["files"][(resolution, data_type, sp_str)]

        print(f"[CINEI] → {sp_str}  ({info['size']})")

        # ── Download with resume ───────────────────────────────────────
        _download_with_resume(url, zip_path)

        # ── MD5 check ─────────────────────────────────────────────────
        actual_md5 = _md5(zip_path)
        if actual_md5 == info["md5"]:
            print(f"[CINEI]   ✅ MD5 verified")
        else:
            print(f"[CINEI]   ⚠️  MD5 mismatch!")
            print(f"[CINEI]      Expected : {info['md5']}")
            print(f"[CINEI]      Got      : {actual_md5}")

        # ── Extract ────────────────────────────────────────────────────
        if extract:
            import zipfile
            out_subdir = save_dir / fname.replace(".zip", "")
            out_subdir.mkdir(exist_ok=True)
            print(f"[CINEI]   📂 Extracting to: {out_subdir.name}/")
            with zipfile.ZipFile(zip_path, "r") as z:
                z.extractall(out_subdir)
            if not keep_zip:
                os.remove(zip_path)
            downloaded.append(str(out_subdir))
        else:
            downloaded.append(str(zip_path))

    print(f"\n[CINEI] ✅ Done! {len(downloaded)} species downloaded.")
    print(f"\n[CINEI] Citation:")
    print(f"  {HTAP_REGISTRY['citation']}")
    return downloaded

List available HTAP v3 files with sizes.

Parameters

resolution : str '05x05' or '01x01' data_type : str 'emissions' or 'fluxes' species : list of str, optional Filter by species. Default: all.

Examples

import cinei cinei.list_htap_files(resolution='05x05', data_type='emissions')

Source code in cinei/download.py

def list_htap_files(resolution="05x05", data_type="emissions", species=None):
    """
    List available HTAP v3 files with sizes.

    Parameters
    ----------
    resolution : str
        '05x05' or '01x01'
    data_type : str
        'emissions' or 'fluxes'
    species : list of str, optional
        Filter by species. Default: all.

    Examples
    --------
    >>> import cinei
    >>> cinei.list_htap_files(resolution='05x05', data_type='emissions')
    """
    if species is None:
        sp_keys = list(HTAP_SPECIES_FILENAME.keys())
    else:
        sp_keys = _normalize_htap_species(species)

    res_label = resolution.replace("x", "° x ") + "°"
    print(f"[CINEI] HTAP v3 — {res_label}  {data_type}")
    print(f"[CINEI] {'Species':<10} {'Filename':<45} {'Size':>10}")
    print(f"[CINEI] {'-'*65}")
    for sp_key in sp_keys:
        sp_str = HTAP_SPECIES_FILENAME[sp_key]
        fname  = f"gridmaps_{resolution}_{data_type}_{sp_str}.zip"
        info   = HTAP_REGISTRY["files"][(resolution, data_type, sp_str)]
        print(f"[CINEI] {sp_str:<10} {fname:<45} {info['size']:>10}")

Download HTAP v3 data and extract a specific month.

Parameters

save_dir : str Directory to save files. species : list of str Species to download, e.g. ['NOx', 'SO2']. Case-insensitive. year : int Target year. HTAP coverage: 2000-2018. month : int Target month (1-12). resolution : str, optional '05x05' (default) or '01x01'. data_type : str, optional 'emissions' (Mg/month, default) or 'fluxes' (kg/m²/s). keep_annual : bool, optional If True, keep the full annual NetCDF. Default False.

Returns

list of str Paths to extracted monthly NetCDF files.

Examples

import cinei cinei.download_htap_monthly( ... save_dir='/work/bb1554/data/HTAP', ... species=['NOx', 'SO2'], ... year=2017, ... month=7 # July ... )

Source code in cinei/download.py

def download_htap_monthly(save_dir, species, year, month,
                          resolution="05x05", data_type="emissions",
                          keep_annual=False):
    """
    Download HTAP v3 data and extract a specific month.

    Parameters
    ----------
    save_dir : str
        Directory to save files.
    species : list of str
        Species to download, e.g. ['NOx', 'SO2']. Case-insensitive.
    year : int
        Target year. HTAP coverage: 2000-2018.
    month : int
        Target month (1-12).
    resolution : str, optional
        '05x05' (default) or '01x01'.
    data_type : str, optional
        'emissions' (Mg/month, default) or 'fluxes' (kg/m²/s).
    keep_annual : bool, optional
        If True, keep the full annual NetCDF. Default False.

    Returns
    -------
    list of str
        Paths to extracted monthly NetCDF files.

    Examples
    --------
    >>> import cinei
    >>> cinei.download_htap_monthly(
    ...     save_dir='/work/bb1554/data/HTAP',
    ...     species=['NOx', 'SO2'],
    ...     year=2017,
    ...     month=7   # July
    ... )
    """
    import xarray as xr

    save_dir = Path(save_dir)
    save_dir.mkdir(parents=True, exist_ok=True)

    if month not in range(1, 13):
        raise ValueError(f"[CINEI] Invalid month: {month}. Must be 1-12.")

    if not (2000 <= year <= 2018):
        raise ValueError(
            f"[CINEI] Invalid year: {year}. HTAP coverage: 2000-2018."
        )

    sp_keys  = _normalize_htap_species(species)
    mon_name = MONTH_NAMES[month]
    mon_idx  = month - 1

    print(f"[CINEI] HTAP v3 Monthly Extract")
    print(f"[CINEI] Year       : {year}")
    print(f"[CINEI] Month      : {month:02d} ({mon_name})")
    print(f"[CINEI] Resolution : {resolution}")
    print(f"[CINEI] Species    : "
          f"{[HTAP_SPECIES_FILENAME[k] for k in sp_keys]}")
    print()

    extracted = []
    for sp_key in sp_keys:
        sp_str   = HTAP_SPECIES_FILENAME[sp_key]
        zip_name = f"gridmaps_{resolution}_{data_type}_{sp_str}.zip"
        zip_path = save_dir / zip_name
        ann_dir  = save_dir / zip_name.replace(".zip", "")
        # ── Download big zip, extract year zip, extract nc ───────────
        import zipfile, shutil
        year_nc = ann_dir / f"edgar_HTAPv3_{year}_{sp_str}.nc"

        if not year_nc.exists():
            if ann_dir.exists() and not any(ann_dir.iterdir()):
                shutil.rmtree(ann_dir)
            ann_dir.mkdir(parents=True, exist_ok=True)

            # Step 1: Download big zip if not present
            if not zip_path.exists():
                print(f"[CINEI] → Downloading {sp_str} ({resolution})...")
                url = (
                    f"https://zenodo.org/records/7516361/files/"
                    f"{zip_name}?download=1"
                )
                _download_with_resume(url, zip_path)
            else:
                print(f"[CINEI] → Using existing zip: {zip_name}")

            # Step 2: Extract year-specific zip from monthly/ folder
            year_zip_name = f"monthly/edgar_HTAPv3_{year}_{sp_str}.zip"
            print(f"[CINEI]   📦 Extracting {year_zip_name}...")
            with zipfile.ZipFile(zip_path, "r") as z:
                if year_zip_name not in z.namelist():
                    raise FileNotFoundError(
                        f"[CINEI] {year_zip_name} not found.\n"
                        f"        Available years: 2000-2018"
                    )
                z.extract(year_zip_name, ann_dir)

            # Step 3: Extract nc from year zip
            year_zip_path = ann_dir / "monthly" / f"edgar_HTAPv3_{year}_{sp_str}.zip"
            with zipfile.ZipFile(year_zip_path, "r") as z:
                nc_members = [m for m in z.namelist() if m.endswith(".nc")]
                for m in nc_members:
                    z.extract(m, ann_dir)
            year_zip_path.unlink()

            # Step 4: Remove big zip to save space
            os.remove(zip_path)
            print(f"[CINEI]   ✅ Extracted: {year_nc.name}")
        else:
            print(f"[CINEI] → Already extracted: {year_nc.name}")

        nc_path = year_nc

        # ── Extract month ──────────────────────────────────────────────
        print(f"[CINEI]   📅 Extracting month {month:02d} ({mon_name})...")
        ds = xr.open_dataset(nc_path)

        time_dims = [d for d in ds.dims if d in ("time", "month", "months")]
        if time_dims:
            ds_mon = ds.isel({time_dims[0]: mon_idx})
        else:
            ds_mon = ds.isel({list(ds.dims)[0]: mon_idx})

        ds_mon.attrs["month"]      = month
        ds_mon.attrs["month_name"] = mon_name
        ds_mon.attrs["year"]       = year
        ds_mon.attrs["source"]     = f"HTAP v3 {sp_str} {year}-{month:02d}"

        out_name = (
            f"HTAP_v3_{sp_str}_{resolution}_{year}_{month:02d}"
            f"_{mon_name}_{data_type}.nc"
        )
        out_path = save_dir / out_name
        ds_mon.to_netcdf(out_path)
        print(f"[CINEI]   ✅ Saved: {out_name}")
        extracted.append(str(out_path))

        if not keep_annual and ann_dir.exists():
            import shutil
            shutil.rmtree(ann_dir)
            print(f"[CINEI]   🗑️  Removed annual dir: {ann_dir.name}/")

        ds.close()

    print(f"\n[CINEI] ✅ Done! {len(extracted)} monthly file(s) saved.")
    return extracted

Download EDGAR v8.1 gridded air pollutant emission data from JRC FTP.

Coverage: 1970-2022, monthly, 0.1° x 0.1° resolution, 9 species. Each NetCDF file contains one year with 12 months and all sectors.

Parameters

save_dir : str Directory to save downloaded files. species : list of str, optional Species to download. Case-insensitive. e.g. ['NOx', 'SO2'] or ['nox', 'so2'] or ['PM2.5'] Default: all 9 species. Available: BC, CO, NH3, NMVOC, NOx, OC, PM10, PM2.5, SO2 years : list of int, optional Years to download. Range: 1970-2022. e.g. [2015, 2016, 2017] or list(range(2010, 2018)) Default: [2017] (single year) data_type : str, optional Data type. Options: - 'fluxes' : kg/m2/s [default] - 'emissions' : Mg/month extract : bool, optional If True, automatically unzip after download. Default True. keep_zip : bool, optional If True, keep .zip files after extraction. Default False.

Returns

list of str Paths to downloaded (and extracted) files.

Examples

import cinei

Download NOx and SO2 for 2017

cinei.download_edgar( ... save_dir='/work/bb1554/data/EDGAR', ... species=['NOx', 'SO2'], ... years=[2017] ... )

Download all species for 2015-2017

cinei.download_edgar( ... save_dir='/work/bb1554/data/EDGAR', ... years=list(range(2015, 2018)) ... )

Source code in cinei/download.py

def download_edgar(save_dir, species=None, years=None,
                   data_type="fluxes", extract=True, keep_zip=False):
    """
    Download EDGAR v8.1 gridded air pollutant emission data from JRC FTP.

    Coverage: 1970-2022, monthly, 0.1° x 0.1° resolution, 9 species.
    Each NetCDF file contains one year with 12 months and all sectors.

    Parameters
    ----------
    save_dir : str
        Directory to save downloaded files.
    species : list of str, optional
        Species to download. Case-insensitive.
        e.g. ['NOx', 'SO2'] or ['nox', 'so2'] or ['PM2.5']
        Default: all 9 species.
        Available: BC, CO, NH3, NMVOC, NOx, OC, PM10, PM2.5, SO2
    years : list of int, optional
        Years to download. Range: 1970-2022.
        e.g. [2015, 2016, 2017] or list(range(2010, 2018))
        Default: [2017]  (single year)
    data_type : str, optional
        Data type. Options:
        - 'fluxes'    : kg/m2/s  [default]
        - 'emissions' : Mg/month
    extract : bool, optional
        If True, automatically unzip after download. Default True.
    keep_zip : bool, optional
        If True, keep .zip files after extraction. Default False.

    Returns
    -------
    list of str
        Paths to downloaded (and extracted) files.

    Examples
    --------
    >>> import cinei
    >>> # Download NOx and SO2 for 2017
    >>> cinei.download_edgar(
    ...     save_dir='/work/bb1554/data/EDGAR',
    ...     species=['NOx', 'SO2'],
    ...     years=[2017]
    ... )

    >>> # Download all species for 2015-2017
    >>> cinei.download_edgar(
    ...     save_dir='/work/bb1554/data/EDGAR',
    ...     years=list(range(2015, 2018))
    ... )
    """
    save_dir = Path(save_dir)
    save_dir.mkdir(parents=True, exist_ok=True)

    # ── Defaults ──────────────────────────────────────────────────────
    if years is None:
        years = [2017]
    if species is None:
        sp_keys = list(EDGAR_REGISTRY["species_filename"].keys())
    else:
        sp_keys = _normalize_edgar_species(species)

    # ── Validate data_type ─────────────────────────────────────────────
    if data_type not in EDGAR_REGISTRY["types"]:
        raise ValueError(
            f"[CINEI] Invalid data_type: '{data_type}'\n"
            f"        Available: {list(EDGAR_REGISTRY['types'].keys())}"
        )

    # ── Validate years ─────────────────────────────────────────────────
    invalid_years = [y for y in years if not (1970 <= y <= 2022)]
    if invalid_years:
        raise ValueError(
            f"[CINEI] Invalid years: {invalid_years}\n"
            f"        EDGAR v8.1 coverage: 1970-2022"
        )

    type_info = EDGAR_REGISTRY["types"][data_type]
    unit = "kg/m²/s" if data_type == "fluxes" else "Mg/month"

    print(f"[CINEI] EDGAR v8.1 Download")
    print(f"[CINEI] Source     : {EDGAR_REGISTRY['doi']}")
    print(f"[CINEI] Save to    : {save_dir}")
    print(f"[CINEI] Resolution : {EDGAR_REGISTRY['resolution']}")
    print(f"[CINEI] Data type  : {data_type} ({unit})")
    print(f"[CINEI] Species    : "
          f"{[EDGAR_REGISTRY['species_filename'][k] for k in sp_keys]}")
    print(f"[CINEI] Years      : {years}")
    print(f"[CINEI] Files      : {len(sp_keys) * len(years)} total")
    print()

    downloaded = []
    for sp_key in sp_keys:
        sp_str = EDGAR_REGISTRY["species_filename"][sp_key]
        for year in sorted(years):
            fname = (
                f"v8.1_FT2022_AP_{sp_str}_{year}"
                f"_TOTALS_{type_info['suffix']}.zip"
            )
            url = (
                f"{EDGAR_REGISTRY['base_url']}/"
                f"{EDGAR_REGISTRY['dataset']}/"
                f"{sp_str}/TOTALS/{type_info['folder']}/{fname}"
            )
            zip_path = save_dir / fname

            print(f"[CINEI] → {sp_str}  {year}")

            # ── Download with resume ───────────────────────────────────
            _download_with_resume(url, zip_path)

            # ── Extract ────────────────────────────────────────────────
            if extract:
                import zipfile
                out_subdir = save_dir / fname.replace(".zip", "")
                out_subdir.mkdir(exist_ok=True)
                print(f"[CINEI]   📂 Extracting to: {out_subdir.name}/")
                with zipfile.ZipFile(zip_path, "r") as z:
                    z.extractall(out_subdir)
                if not keep_zip:
                    os.remove(zip_path)
                downloaded.append(str(out_subdir))
            else:
                downloaded.append(str(zip_path))

    print(f"\n[CINEI] ✅ Done! {len(downloaded)} file(s) downloaded.")
    print(f"\n[CINEI] Citation:")
    print(f"  {EDGAR_REGISTRY['citation']}")
    return downloaded

Print available EDGAR v8.1 species.

Source code in cinei/download.py

def list_edgar_species():
    """Print available EDGAR v8.1 species."""
    print("[CINEI] Available EDGAR v8.1 species (case-insensitive):")
    print(f"  {'Input':<10} → {'Filename':<10}  Coverage")
    print(f"  {'-'*45}")
    for key, fname in EDGAR_REGISTRY["species_filename"].items():
        variants = EDGAR_SPECIES_VARIANTS.get(key, [])
        print(f"  {fname:<10}   also accepted: "
              f"{[v for v in variants if v != fname]}")
    print(f"\n  Year coverage : {EDGAR_REGISTRY['coverage']}")
    print(f"  Resolution    : {EDGAR_REGISTRY['resolution']}")

Download EDGAR v8.1 data and extract a specific month.

Downloads the annual NetCDF file (if not already present), then extracts the requested month as a standalone [lat, lon] file.

Parameters

save_dir : str Directory to save files. species : list of str Species to download, e.g. ['NOx', 'SO2']. Case-insensitive. year : int Target year, e.g. 2017. Range: 1970-2022. month : int Target month (1-12), e.g. 1 for January. data_type : str, optional 'fluxes' (kg/m²/s, default) or 'emissions' (Mg/month). keep_annual : bool, optional If True, keep the full annual NetCDF after extraction. Default False (saves disk space).

Returns

list of str Paths to extracted monthly NetCDF files.

Examples

import cinei

Download NOx and SO2 for January 2017

cinei.download_edgar_monthly( ... save_dir='/work/bb1554/data/EDGAR', ... species=['NOx', 'SO2'], ... year=2017, ... month=1 ... )

Source code in cinei/download.py

def download_edgar_monthly(save_dir, species, year, month,
                           data_type="fluxes", keep_annual=False):
    """
    Download EDGAR v8.1 data and extract a specific month.

    Downloads the annual NetCDF file (if not already present),
    then extracts the requested month as a standalone [lat, lon] file.

    Parameters
    ----------
    save_dir : str
        Directory to save files.
    species : list of str
        Species to download, e.g. ['NOx', 'SO2']. Case-insensitive.
    year : int
        Target year, e.g. 2017. Range: 1970-2022.
    month : int
        Target month (1-12), e.g. 1 for January.
    data_type : str, optional
        'fluxes' (kg/m²/s, default) or 'emissions' (Mg/month).
    keep_annual : bool, optional
        If True, keep the full annual NetCDF after extraction.
        Default False (saves disk space).

    Returns
    -------
    list of str
        Paths to extracted monthly NetCDF files.

    Examples
    --------
    >>> import cinei
    >>> # Download NOx and SO2 for January 2017
    >>> cinei.download_edgar_monthly(
    ...     save_dir='/work/bb1554/data/EDGAR',
    ...     species=['NOx', 'SO2'],
    ...     year=2017,
    ...     month=1
    ... )
    """
    import xarray as xr

    save_dir = Path(save_dir)
    save_dir.mkdir(parents=True, exist_ok=True)

    # ── Validate month ─────────────────────────────────────────────────
    if month not in range(1, 13):
        raise ValueError(
            f"[CINEI] Invalid month: {month}. Must be 1-12."
        )

    sp_keys   = _normalize_edgar_species(species)
    type_info = EDGAR_REGISTRY["types"][data_type]
    mon_name  = MONTH_NAMES[month]
    mon_idx   = month - 1   # 0-based index for xarray

    print(f"[CINEI] EDGAR v8.1 Monthly Extract")
    print(f"[CINEI] Year    : {year}")
    print(f"[CINEI] Month   : {month:02d} ({mon_name})")
    print(f"[CINEI] Species : {[EDGAR_REGISTRY['species_filename'][k] for k in sp_keys]}")
    print(f"[CINEI] Type    : {data_type}")
    print()

    extracted = []
    for sp_key in sp_keys:
        sp_str = EDGAR_REGISTRY["species_filename"][sp_key]

        # ── Step 1: Download annual zip if needed ──────────────────────
        zip_name = (
            f"v8.1_FT2022_AP_{sp_str}_{year}"
            f"_TOTALS_{type_info['suffix']}.zip"
        )
        zip_path    = save_dir / zip_name
        annual_dir  = save_dir / zip_name.replace(".zip", "")

        # Check if annual NetCDF already exists
        nc_files = list(annual_dir.glob("*.nc")) if annual_dir.exists() else []

        if not nc_files:
            print(f"[CINEI] → Downloading annual file for {sp_str} {year}...")
            url = (
                f"{EDGAR_REGISTRY['base_url']}/"
                f"{EDGAR_REGISTRY['dataset']}/"
                f"{sp_str}/TOTALS/{type_info['folder']}/{zip_name}"
            )
            _download_with_resume(url, zip_path)

            import zipfile
            annual_dir.mkdir(exist_ok=True)
            with zipfile.ZipFile(zip_path, "r") as z:
                z.extractall(annual_dir)
            os.remove(zip_path)
            nc_files = list(annual_dir.glob("*.nc"))
            print(f"[CINEI]   📂 Extracted: {annual_dir.name}/")
        else:
            print(f"[CINEI] → Annual file already exists: {annual_dir.name}/")

        if not nc_files:
            raise FileNotFoundError(
                f"[CINEI] No NetCDF found in {annual_dir}"
            )

        nc_path = nc_files[0]

        # ── Step 2: Extract month ──────────────────────────────────────
        print(f"[CINEI]   📅 Extracting month {month:02d} ({mon_name})...")
        ds = xr.open_dataset(nc_path)

        # Find time dimension (could be 'time', 'month', or integer index)
        time_dims = [d for d in ds.dims if d in ("time", "month", "months")]
        if time_dims:
            ds_mon = ds.isel({time_dims[0]: mon_idx})
        else:
            # Try first dimension if unnamed
            first_dim = list(ds.dims)[0]
            ds_mon = ds.isel({first_dim: mon_idx})

        ds_mon.attrs["month"]   = month
        ds_mon.attrs["month_name"] = mon_name
        ds_mon.attrs["year"]    = year
        ds_mon.attrs["source"]  = f"EDGAR v8.1 {sp_str} {year}-{month:02d}"

        # ── Step 3: Save monthly file ──────────────────────────────────
        out_name = (
            f"EDGAR_v8.1_{sp_str}_{year}_{month:02d}_{mon_name}"
            f"_{data_type}.nc"
        )
        out_path = save_dir / out_name
        ds_mon.to_netcdf(out_path)
        print(f"[CINEI]   ✅ Saved: {out_name}")
        extracted.append(str(out_path))

        # ── Step 4: Optionally remove annual file ──────────────────────
        if not keep_annual and annual_dir.exists():
            import shutil
            shutil.rmtree(annual_dir)
            print(f"[CINEI]   🗑️  Removed annual dir: {annual_dir.name}/")

        ds.close()

    print(f"\n[CINEI] ✅ Done! {len(extracted)} monthly file(s) saved.")
    return extracted