56 lines
1.6 KiB
Python
56 lines
1.6 KiB
Python
import argparse
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
import polars as pl
|
|
|
|
pl.Config.set_tbl_cols(-1)
|
|
|
|
|
|
URL = "https://www.ethnicity-facts-figures.service.gov.uk/uk-population-by-ethnicity/national-and-regional-populations/regional-ethnic-diversity/latest/downloads/population-by-ethnicity-and-local-authority-2021.csv"
|
|
|
|
|
|
def download_and_convert(output_path: Path) -> None:
|
|
print("Downloading ethnicity data...")
|
|
response = httpx.get(URL, follow_redirects=True, timeout=60)
|
|
response.raise_for_status()
|
|
|
|
df = pl.read_csv(response.content)
|
|
print(f"Raw shape: {df.head(100)}")
|
|
|
|
# Keep only broad ethnicity categories (5+1), exclude "All" totals
|
|
df = df.filter(
|
|
(pl.col("Ethnicity_type") == "ONS 2021 5+1") & (pl.col("Ethnicity") != "All")
|
|
)
|
|
|
|
# Pivot: one row per local authority, columns = ethnicity percentages
|
|
wide = df.pivot(
|
|
on="Ethnicity",
|
|
index="Geography_code",
|
|
values="Value1",
|
|
)
|
|
|
|
# Rename columns to be descriptive
|
|
rename_map = {col: f"% {col}" for col in wide.columns if col != "Geography_code"}
|
|
wide = wide.rename(rename_map)
|
|
|
|
print(f"Output shape: {wide.shape}")
|
|
print(f"Columns: {wide.columns}")
|
|
|
|
wide.write_parquet(output_path, compression="zstd")
|
|
print(f"Saved to {output_path}")
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(
|
|
description="Download and convert ethnicity by local authority data"
|
|
)
|
|
parser.add_argument(
|
|
"--output", type=Path, required=True, help="Output parquet file path"
|
|
)
|
|
args = parser.parse_args()
|
|
download_and_convert(args.output)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|