"""NISRA Population Projections for Northern Ireland.
Provides access to official NISRA population projections with demographic breakdowns
by year, age, sex, and projection variant.
NI-level projections (2024-based, 2024-2074) are served via the PxStat API.
LGD sub-area projections are not yet available via PxStat and remain Excel-based.
Data Source:
**PxStat API** (NI-level, used by this module):
https://ws-data.nisra.gov.uk/public/api.restful/PxStat.Data.Cube_API.ReadDataset/{MATRIX}/CSV/1.0/en
Matrix codes:
- ``PPMY02T01``: NI projections by single year of age (0-90+) and sex — principal + variants
- ``PPMY02T02``: NI projections by 5-year age bands and sex — principal only
- ``PPMY02T03``: Variant projections (high/low fertility, life expectancy, migration)
**Original publication pages** (for reference and LGD projections):
- Principal: https://www.nisra.gov.uk/publications/2024-based-population-projections-northern-ireland
- Variants: https://www.nisra.gov.uk/publications/2024-based-population-projections-northern-ireland-variant-projections
- LGD sub-areas: https://www.nisra.gov.uk/publications/2022-based-population-projections-areas-within-northern-ireland
Update Frequency: Biennial (NI-level)
Geographic Coverage: Northern Ireland overall (LGD projections not yet in PxStat)
Projection Horizon: 2024-2074 (NI-level via API)
Example:
>>> from bolster.data_sources.nisra import population_projections
>>> df = population_projections.get_latest_projections()
>>> 'population' in df.columns
True
>>> df_decade = population_projections.get_latest_projections(
... start_year=2025,
... end_year=2035
... )
>>> len(df_decade) > 0
True
"""
import logging
import pandas as pd
from ._base import NISRAValidationError
from .pxstat import PxStatError, read_dataset # noqa: F401 — re-exported for callers
[docs]
logger = logging.getLogger(__name__)
# PxStat matrix codes
_MATRIX_SYA = "PPMY02T01" # single year of age, principal projection
_MATRIX_5YR = "PPMY02T02" # 5-year age bands, principal projection
_MATRIX_VARIANTS = "PPMY02T03" # variant projections
[docs]
def get_latest_projections(
start_year: int | None = None,
end_year: int | None = None,
age_groups: str = "5yr",
force_refresh: bool = False,
) -> pd.DataFrame:
"""Retrieve NI population projections (principal projection).
Args:
start_year: First projection year to include (default: first available).
end_year: Last projection year to include (default: last available).
age_groups: Age breakdown format:
- ``'5yr'``: 5-year age bands (default) — smaller result set
- ``'single'``: Single year of age (0-90+) — larger result set
force_refresh: Ignored — kept for API compatibility. The PxStat API
always returns current data.
Returns:
DataFrame with columns:
``year``, ``age_group``, ``sex``, ``population``, ``base_year``
Raises:
NISRAValidationError: If the API returns empty or invalid data.
PxStatError: If the API request fails.
Example:
>>> df = get_latest_projections()
>>> 'population' in df.columns
True
"""
if force_refresh:
logger.debug("force_refresh is ignored for PxStat-backed modules")
matrix = _MATRIX_SYA if age_groups == "single" else _MATRIX_5YR
age_col = "Single year of age" if age_groups == "single" else "Five year age bands"
df = read_dataset(matrix)
result = df[["Year", age_col, "Sex Label", "VALUE"]].rename(
columns={"Year": "year", age_col: "age_group", "Sex Label": "sex", "VALUE": "population"}
)
result["base_year"] = 2024
if start_year:
result = result[result["year"] >= start_year]
if end_year:
result = result[result["year"] <= end_year]
result = result.sort_values(["year", "age_group", "sex"]).reset_index(drop=True)
if result.empty:
raise NISRAValidationError("Population projections data is empty")
return result
[docs]
def get_variant_projections(
variant: str | None = None,
start_year: int | None = None,
end_year: int | None = None,
force_refresh: bool = False,
) -> pd.DataFrame:
"""Retrieve NI population projections including variant scenarios.
Args:
variant: Filter to a specific variant label (partial match, case-insensitive).
E.g. ``'high fertility'``, ``'low fertility'``, ``'high life expectancy'``.
If None, all variants are returned.
start_year: First projection year to include.
end_year: Last projection year to include.
force_refresh: Ignored — kept for API compatibility.
Returns:
DataFrame with columns:
``year``, ``age_group``, ``sex``, ``variant``, ``population``
"""
if force_refresh:
logger.debug("force_refresh is ignored for PxStat-backed modules")
df = read_dataset(_MATRIX_VARIANTS)
result = df[["Year", "Single year of age", "Sex Label", "Variant Label", "VALUE"]].rename(
columns={
"Year": "year",
"Single year of age": "age_group",
"Sex Label": "sex",
"Variant Label": "variant",
"VALUE": "population",
}
)
if variant:
result = result[result["variant"].str.lower().str.contains(variant.lower())]
if start_year:
result = result[result["year"] >= start_year]
if end_year:
result = result[result["year"] <= end_year]
return result.sort_values(["variant", "year", "age_group", "sex"]).reset_index(drop=True)
[docs]
def validate_projections(df: pd.DataFrame) -> bool:
"""Validate a projections DataFrame for basic integrity.
Args:
df: DataFrame from :func:`get_latest_projections`.
Returns:
True if valid.
Raises:
NISRAValidationError: If validation fails.
"""
required = {"year", "age_group", "sex", "population"}
missing = required - set(df.columns)
if missing:
raise NISRAValidationError(f"Missing required columns: {missing}")
if df.empty:
raise NISRAValidationError("DataFrame is empty")
if (df["population"] < 0).any():
raise NISRAValidationError("Negative population values found")
return True