Add crime per 1k people
This commit is contained in:
parent
245c16a212
commit
e0798b24f7
4 changed files with 136 additions and 2 deletions
|
|
@ -50,6 +50,7 @@ PBF := $(DATA_DIR)/great-britain-latest.osm.pbf
|
|||
PLACES := $(DATA_DIR)/places.parquet
|
||||
LISTINGS_BUY := $(DATA_DIR)/online_listings_buy.parquet
|
||||
LISTINGS_RENT := $(DATA_DIR)/online_listings_rent.parquet
|
||||
LSOA_POP := $(DATA_DIR)/lsoa_population.parquet
|
||||
|
||||
# Sentinel files for directory targets (Make doesn't track directories well)
|
||||
GEOSURE_STAMP := $(GEOSURE_DIR)/.done
|
||||
|
|
@ -63,7 +64,7 @@ PMTILES_VERSION := 1.22.3
|
|||
download-arcgis download-price-paid download-deprivation download-ethnicity \
|
||||
download-naptan download-pois download-ofsted download-broadband download-rental-prices \
|
||||
download-postcodes download-geosure download-noise download-inspire \
|
||||
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-pbf download-places \
|
||||
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-pbf download-places download-lsoa-population \
|
||||
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
|
||||
transform-school-proximity transform-geosure transform-postcode-boundaries \
|
||||
generate-postcode-boundaries
|
||||
|
|
@ -90,6 +91,7 @@ download-transit-network: $(TRANSIT_STAMP)
|
|||
download-greenspace: $(GREENSPACE)
|
||||
download-pbf: $(PBF)
|
||||
download-places: $(PLACES)
|
||||
download-lsoa-population: $(LSOA_POP)
|
||||
transform-pois: $(POIS_FILTERED)
|
||||
transform-epc-pp: $(EPC_PP)
|
||||
transform-crime: $(CRIME)
|
||||
|
|
@ -182,6 +184,9 @@ $(GREENSPACE): $(PBF)
|
|||
$(PLACES): $(PBF)
|
||||
uv run python -m pipeline.download.places --output $@ --pbf $(PBF)
|
||||
|
||||
$(LSOA_POP):
|
||||
uv run python -m pipeline.download.lsoa_population --output $@
|
||||
|
||||
# ── Transforms ────────────────────────────────────────────────────────────────
|
||||
|
||||
$(POIS_FILTERED): $(POIS_RAW) $(NAPTAN)
|
||||
|
|
@ -228,7 +233,7 @@ $(PC_BOUNDARIES):
|
|||
# ── Final merge → postcode.parquet + properties.parquet ──────────────────────
|
||||
|
||||
$(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(GEOSURE) $(RENTAL)
|
||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(GEOSURE) $(RENTAL) $(LSOA_POP)
|
||||
uv run python -m pipeline.transform.merge \
|
||||
--epc-pp $(EPC_PP) \
|
||||
--arcgis $(ARCGIS) \
|
||||
|
|
@ -241,6 +246,7 @@ $(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
|||
--broadband $(BROADBAND) \
|
||||
--geosure $(GEOSURE) \
|
||||
--rental-prices $(RENTAL) \
|
||||
--lsoa-population $(LSOA_POP) \
|
||||
--output-postcodes $(POSTCODES_PQ) \
|
||||
--output-properties $(PROPERTIES_PQ)
|
||||
@touch $@
|
||||
|
|
|
|||
71
pipeline/download/lsoa_population.py
Normal file
71
pipeline/download/lsoa_population.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
"""Download Census 2021 usual resident population by LSOA.
|
||||
|
||||
Source: NOMIS (ONS Census 2021 — TS001 dataset)
|
||||
License: Open Government Licence v3.0
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
import polars as pl
|
||||
|
||||
# NOMIS API: Census 2021 TS001 (usual residents) by LSOA 2021 (TYPE151)
|
||||
# c2021_restype_3=0 selects "Total: All usual residents"
|
||||
# NOMIS paginates at 25,000 rows by default, so we paginate with recordoffset.
|
||||
BASE_URL = "https://www.nomisweb.co.uk/api/v01/dataset/NM_2021_1.data.csv?date=latest&geography=TYPE151&measures=20100&c2021_restype_3=0&select=GEOGRAPHY_CODE,OBS_VALUE"
|
||||
PAGE_SIZE = 25000
|
||||
|
||||
|
||||
def download_and_convert(output_path: Path) -> None:
|
||||
print("Downloading Census 2021 LSOA population from NOMIS...")
|
||||
frames = []
|
||||
offset = 0
|
||||
while True:
|
||||
url = f"{BASE_URL}&recordoffset={offset}"
|
||||
response = httpx.get(url, follow_redirects=True, timeout=120)
|
||||
response.raise_for_status()
|
||||
if len(response.content) == 0:
|
||||
break
|
||||
chunk = pl.read_csv(BytesIO(response.content))
|
||||
if chunk.height == 0:
|
||||
break
|
||||
frames.append(chunk)
|
||||
print(f" Fetched {chunk.height} rows (offset={offset})")
|
||||
if chunk.height < PAGE_SIZE:
|
||||
break
|
||||
offset += PAGE_SIZE
|
||||
|
||||
df = pl.concat(frames)
|
||||
print(f"Total rows: {df.height}")
|
||||
|
||||
result = df.rename({"GEOGRAPHY_CODE": "lsoa21", "OBS_VALUE": "population"}).with_columns(
|
||||
pl.col("population").cast(pl.UInt32),
|
||||
)
|
||||
|
||||
# Filter to England only (E prefix)
|
||||
result = result.filter(pl.col("lsoa21").str.starts_with("E"))
|
||||
|
||||
print(f"England LSOAs: {result.height}")
|
||||
print(f"Population range: {result['population'].min()} - {result['population'].max()}")
|
||||
print(f"Mean population: {result['population'].mean():.0f}")
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
result.write_parquet(output_path, compression="zstd")
|
||||
print(f"Saved to {output_path}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download Census 2021 population by LSOA"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", type=Path, required=True, help="Output parquet file path"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
download_and_convert(args.output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -43,6 +43,8 @@ _AREA_COLUMNS = [
|
|||
"Other crime (avg/yr)",
|
||||
"Serious crime (avg/yr)",
|
||||
"Minor crime (avg/yr)",
|
||||
"Serious crime per 1k residents (avg/yr)",
|
||||
"Minor crime per 1k residents (avg/yr)",
|
||||
# Amenities
|
||||
"Number of restaurants within 2km",
|
||||
"Number of grocery shops and supermarkets within 2km",
|
||||
|
|
@ -77,6 +79,7 @@ def _build(
|
|||
broadband_path: Path,
|
||||
geosure_path: Path,
|
||||
rental_prices_path: Path,
|
||||
lsoa_population_path: Path,
|
||||
) -> tuple[pl.DataFrame, pl.DataFrame]:
|
||||
"""Build postcode and properties dataframes from epc_pp + auxiliary data.
|
||||
|
||||
|
|
@ -171,6 +174,17 @@ def _build(
|
|||
).alias("minor_crime_avg_yr"),
|
||||
)
|
||||
|
||||
lsoa_pop = pl.scan_parquet(lsoa_population_path)
|
||||
wide = wide.join(lsoa_pop, on="lsoa21", how="left")
|
||||
wide = wide.with_columns(
|
||||
(pl.col("serious_crime_avg_yr") / pl.col("population") * 1000)
|
||||
.round(1)
|
||||
.alias("serious_crime_per_1k"),
|
||||
(pl.col("minor_crime_avg_yr") / pl.col("population") * 1000)
|
||||
.round(1)
|
||||
.alias("minor_crime_per_1k"),
|
||||
).drop("population")
|
||||
|
||||
poi_counts = pl.scan_parquet(poi_proximity_path)
|
||||
wide = wide.join(poi_counts, on="postcode", how="left")
|
||||
|
||||
|
|
@ -301,6 +315,8 @@ def _build(
|
|||
"max_download_speed": "Max available download speed (Mbps)",
|
||||
"serious_crime_avg_yr": "Serious crime (avg/yr)",
|
||||
"minor_crime_avg_yr": "Minor crime (avg/yr)",
|
||||
"serious_crime_per_1k": "Serious crime per 1k residents (avg/yr)",
|
||||
"minor_crime_per_1k": "Minor crime per 1k residents (avg/yr)",
|
||||
"environmental_risk": "Environmental risk",
|
||||
"collapsible_deposits_risk": "Collapsible deposits risk",
|
||||
"compressible_ground_risk": "Compressible ground risk",
|
||||
|
|
@ -389,6 +405,12 @@ def main():
|
|||
required=True,
|
||||
help="ONS rental prices by LA and bedroom count parquet file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lsoa-population",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Census 2021 population by LSOA parquet file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-postcodes", type=Path, required=True, help="Output postcode parquet file path"
|
||||
)
|
||||
|
|
@ -409,6 +431,7 @@ def main():
|
|||
broadband_path=args.broadband,
|
||||
geosure_path=args.geosure,
|
||||
rental_prices_path=args.rental_prices,
|
||||
lsoa_population_path=args.lsoa_population,
|
||||
)
|
||||
|
||||
print(f"\nPostcode columns: {postcode_df.columns}")
|
||||
|
|
|
|||
|
|
@ -606,6 +606,40 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
|||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Serious crime per 1k residents (avg/yr)",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 2.0,
|
||||
high: 98.0,
|
||||
},
|
||||
step: 0.1,
|
||||
description: "Serious crime rate per 1,000 residents per year",
|
||||
detail: "Violence, robbery, burglary, and weapons possession per 1,000 usual residents per year in the LSOA. Uses police.uk street-level crime data (2023-2025) and Census 2021 population counts. Normalises for population density so areas are comparable regardless of size.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
FeatureConfig {
|
||||
name: "Minor crime per 1k residents (avg/yr)",
|
||||
bounds: Bounds::Percentile {
|
||||
low: 2.0,
|
||||
high: 98.0,
|
||||
},
|
||||
step: 0.1,
|
||||
description: "Minor crime rate per 1,000 residents per year",
|
||||
detail: "Anti-social behaviour, shoplifting, bicycle theft, and other lower-severity crime per 1,000 usual residents per year in the LSOA. Uses police.uk street-level crime data (2023-2025) and Census 2021 population counts. Normalises for population density so areas are comparable regardless of size.",
|
||||
source: "crime",
|
||||
prefix: "",
|
||||
suffix: "/yr",
|
||||
raw: false,
|
||||
absolute: false,
|
||||
modes: &[],
|
||||
linked: "",
|
||||
},
|
||||
],
|
||||
},
|
||||
FeatureGroup {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue