Add crime per 1k people

This commit is contained in:
Andras Schmelczer 2026-03-08 21:02:29 +00:00
parent 245c16a212
commit e0798b24f7
4 changed files with 136 additions and 2 deletions

View file

@ -0,0 +1,71 @@
"""Download Census 2021 usual resident population by LSOA.
Source: NOMIS (ONS Census 2021 TS001 dataset)
License: Open Government Licence v3.0
"""
import argparse
from io import BytesIO
from pathlib import Path
import httpx
import polars as pl
# NOMIS API: Census 2021 TS001 (usual residents) by LSOA 2021 (TYPE151)
# c2021_restype_3=0 selects "Total: All usual residents"
# NOMIS paginates at 25,000 rows by default, so we paginate with recordoffset.
BASE_URL = "https://www.nomisweb.co.uk/api/v01/dataset/NM_2021_1.data.csv?date=latest&geography=TYPE151&measures=20100&c2021_restype_3=0&select=GEOGRAPHY_CODE,OBS_VALUE"
PAGE_SIZE = 25000
def download_and_convert(output_path: Path) -> None:
print("Downloading Census 2021 LSOA population from NOMIS...")
frames = []
offset = 0
while True:
url = f"{BASE_URL}&recordoffset={offset}"
response = httpx.get(url, follow_redirects=True, timeout=120)
response.raise_for_status()
if len(response.content) == 0:
break
chunk = pl.read_csv(BytesIO(response.content))
if chunk.height == 0:
break
frames.append(chunk)
print(f" Fetched {chunk.height} rows (offset={offset})")
if chunk.height < PAGE_SIZE:
break
offset += PAGE_SIZE
df = pl.concat(frames)
print(f"Total rows: {df.height}")
result = df.rename({"GEOGRAPHY_CODE": "lsoa21", "OBS_VALUE": "population"}).with_columns(
pl.col("population").cast(pl.UInt32),
)
# Filter to England only (E prefix)
result = result.filter(pl.col("lsoa21").str.starts_with("E"))
print(f"England LSOAs: {result.height}")
print(f"Population range: {result['population'].min()} - {result['population'].max()}")
print(f"Mean population: {result['population'].mean():.0f}")
output_path.parent.mkdir(parents=True, exist_ok=True)
result.write_parquet(output_path, compression="zstd")
print(f"Saved to {output_path}")
def main() -> None:
parser = argparse.ArgumentParser(
description="Download Census 2021 population by LSOA"
)
parser.add_argument(
"--output", type=Path, required=True, help="Output parquet file path"
)
args = parser.parse_args()
download_and_convert(args.output)
if __name__ == "__main__":
main()