Add crime per 1k people
This commit is contained in:
parent
245c16a212
commit
e0798b24f7
4 changed files with 136 additions and 2 deletions
|
|
@ -50,6 +50,7 @@ PBF := $(DATA_DIR)/great-britain-latest.osm.pbf
|
||||||
PLACES := $(DATA_DIR)/places.parquet
|
PLACES := $(DATA_DIR)/places.parquet
|
||||||
LISTINGS_BUY := $(DATA_DIR)/online_listings_buy.parquet
|
LISTINGS_BUY := $(DATA_DIR)/online_listings_buy.parquet
|
||||||
LISTINGS_RENT := $(DATA_DIR)/online_listings_rent.parquet
|
LISTINGS_RENT := $(DATA_DIR)/online_listings_rent.parquet
|
||||||
|
LSOA_POP := $(DATA_DIR)/lsoa_population.parquet
|
||||||
|
|
||||||
# Sentinel files for directory targets (Make doesn't track directories well)
|
# Sentinel files for directory targets (Make doesn't track directories well)
|
||||||
GEOSURE_STAMP := $(GEOSURE_DIR)/.done
|
GEOSURE_STAMP := $(GEOSURE_DIR)/.done
|
||||||
|
|
@ -63,7 +64,7 @@ PMTILES_VERSION := 1.22.3
|
||||||
download-arcgis download-price-paid download-deprivation download-ethnicity \
|
download-arcgis download-price-paid download-deprivation download-ethnicity \
|
||||||
download-naptan download-pois download-ofsted download-broadband download-rental-prices \
|
download-naptan download-pois download-ofsted download-broadband download-rental-prices \
|
||||||
download-postcodes download-geosure download-noise download-inspire \
|
download-postcodes download-geosure download-noise download-inspire \
|
||||||
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-pbf download-places \
|
download-oa-boundaries download-uprn-lookup download-transit-network download-greenspace download-pbf download-places download-lsoa-population \
|
||||||
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
|
transform-pois transform-epc-pp transform-crime transform-poi-proximity \
|
||||||
transform-school-proximity transform-geosure transform-postcode-boundaries \
|
transform-school-proximity transform-geosure transform-postcode-boundaries \
|
||||||
generate-postcode-boundaries
|
generate-postcode-boundaries
|
||||||
|
|
@ -90,6 +91,7 @@ download-transit-network: $(TRANSIT_STAMP)
|
||||||
download-greenspace: $(GREENSPACE)
|
download-greenspace: $(GREENSPACE)
|
||||||
download-pbf: $(PBF)
|
download-pbf: $(PBF)
|
||||||
download-places: $(PLACES)
|
download-places: $(PLACES)
|
||||||
|
download-lsoa-population: $(LSOA_POP)
|
||||||
transform-pois: $(POIS_FILTERED)
|
transform-pois: $(POIS_FILTERED)
|
||||||
transform-epc-pp: $(EPC_PP)
|
transform-epc-pp: $(EPC_PP)
|
||||||
transform-crime: $(CRIME)
|
transform-crime: $(CRIME)
|
||||||
|
|
@ -182,6 +184,9 @@ $(GREENSPACE): $(PBF)
|
||||||
$(PLACES): $(PBF)
|
$(PLACES): $(PBF)
|
||||||
uv run python -m pipeline.download.places --output $@ --pbf $(PBF)
|
uv run python -m pipeline.download.places --output $@ --pbf $(PBF)
|
||||||
|
|
||||||
|
$(LSOA_POP):
|
||||||
|
uv run python -m pipeline.download.lsoa_population --output $@
|
||||||
|
|
||||||
# ── Transforms ────────────────────────────────────────────────────────────────
|
# ── Transforms ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
$(POIS_FILTERED): $(POIS_RAW) $(NAPTAN)
|
$(POIS_FILTERED): $(POIS_RAW) $(NAPTAN)
|
||||||
|
|
@ -228,7 +233,7 @@ $(PC_BOUNDARIES):
|
||||||
# ── Final merge → postcode.parquet + properties.parquet ──────────────────────
|
# ── Final merge → postcode.parquet + properties.parquet ──────────────────────
|
||||||
|
|
||||||
$(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
$(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
||||||
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(GEOSURE) $(RENTAL)
|
$(ETHNICITY) $(CRIME) $(NOISE) $(SCHOOL_PROX) $(BROADBAND) $(GEOSURE) $(RENTAL) $(LSOA_POP)
|
||||||
uv run python -m pipeline.transform.merge \
|
uv run python -m pipeline.transform.merge \
|
||||||
--epc-pp $(EPC_PP) \
|
--epc-pp $(EPC_PP) \
|
||||||
--arcgis $(ARCGIS) \
|
--arcgis $(ARCGIS) \
|
||||||
|
|
@ -241,6 +246,7 @@ $(MERGE_STAMP): $(EPC_PP) $(ARCGIS) $(IOD) $(POI_PROXIMITY) \
|
||||||
--broadband $(BROADBAND) \
|
--broadband $(BROADBAND) \
|
||||||
--geosure $(GEOSURE) \
|
--geosure $(GEOSURE) \
|
||||||
--rental-prices $(RENTAL) \
|
--rental-prices $(RENTAL) \
|
||||||
|
--lsoa-population $(LSOA_POP) \
|
||||||
--output-postcodes $(POSTCODES_PQ) \
|
--output-postcodes $(POSTCODES_PQ) \
|
||||||
--output-properties $(PROPERTIES_PQ)
|
--output-properties $(PROPERTIES_PQ)
|
||||||
@touch $@
|
@touch $@
|
||||||
|
|
|
||||||
71
pipeline/download/lsoa_population.py
Normal file
71
pipeline/download/lsoa_population.py
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
"""Download Census 2021 usual resident population by LSOA.
|
||||||
|
|
||||||
|
Source: NOMIS (ONS Census 2021 — TS001 dataset)
|
||||||
|
License: Open Government Licence v3.0
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import polars as pl
|
||||||
|
|
||||||
|
# NOMIS API: Census 2021 TS001 (usual residents) by LSOA 2021 (TYPE151)
|
||||||
|
# c2021_restype_3=0 selects "Total: All usual residents"
|
||||||
|
# NOMIS paginates at 25,000 rows by default, so we paginate with recordoffset.
|
||||||
|
BASE_URL = "https://www.nomisweb.co.uk/api/v01/dataset/NM_2021_1.data.csv?date=latest&geography=TYPE151&measures=20100&c2021_restype_3=0&select=GEOGRAPHY_CODE,OBS_VALUE"
|
||||||
|
PAGE_SIZE = 25000
|
||||||
|
|
||||||
|
|
||||||
|
def download_and_convert(output_path: Path) -> None:
|
||||||
|
print("Downloading Census 2021 LSOA population from NOMIS...")
|
||||||
|
frames = []
|
||||||
|
offset = 0
|
||||||
|
while True:
|
||||||
|
url = f"{BASE_URL}&recordoffset={offset}"
|
||||||
|
response = httpx.get(url, follow_redirects=True, timeout=120)
|
||||||
|
response.raise_for_status()
|
||||||
|
if len(response.content) == 0:
|
||||||
|
break
|
||||||
|
chunk = pl.read_csv(BytesIO(response.content))
|
||||||
|
if chunk.height == 0:
|
||||||
|
break
|
||||||
|
frames.append(chunk)
|
||||||
|
print(f" Fetched {chunk.height} rows (offset={offset})")
|
||||||
|
if chunk.height < PAGE_SIZE:
|
||||||
|
break
|
||||||
|
offset += PAGE_SIZE
|
||||||
|
|
||||||
|
df = pl.concat(frames)
|
||||||
|
print(f"Total rows: {df.height}")
|
||||||
|
|
||||||
|
result = df.rename({"GEOGRAPHY_CODE": "lsoa21", "OBS_VALUE": "population"}).with_columns(
|
||||||
|
pl.col("population").cast(pl.UInt32),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Filter to England only (E prefix)
|
||||||
|
result = result.filter(pl.col("lsoa21").str.starts_with("E"))
|
||||||
|
|
||||||
|
print(f"England LSOAs: {result.height}")
|
||||||
|
print(f"Population range: {result['population'].min()} - {result['population'].max()}")
|
||||||
|
print(f"Mean population: {result['population'].mean():.0f}")
|
||||||
|
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
result.write_parquet(output_path, compression="zstd")
|
||||||
|
print(f"Saved to {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Download Census 2021 population by LSOA"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output", type=Path, required=True, help="Output parquet file path"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
download_and_convert(args.output)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -43,6 +43,8 @@ _AREA_COLUMNS = [
|
||||||
"Other crime (avg/yr)",
|
"Other crime (avg/yr)",
|
||||||
"Serious crime (avg/yr)",
|
"Serious crime (avg/yr)",
|
||||||
"Minor crime (avg/yr)",
|
"Minor crime (avg/yr)",
|
||||||
|
"Serious crime per 1k residents (avg/yr)",
|
||||||
|
"Minor crime per 1k residents (avg/yr)",
|
||||||
# Amenities
|
# Amenities
|
||||||
"Number of restaurants within 2km",
|
"Number of restaurants within 2km",
|
||||||
"Number of grocery shops and supermarkets within 2km",
|
"Number of grocery shops and supermarkets within 2km",
|
||||||
|
|
@ -77,6 +79,7 @@ def _build(
|
||||||
broadband_path: Path,
|
broadband_path: Path,
|
||||||
geosure_path: Path,
|
geosure_path: Path,
|
||||||
rental_prices_path: Path,
|
rental_prices_path: Path,
|
||||||
|
lsoa_population_path: Path,
|
||||||
) -> tuple[pl.DataFrame, pl.DataFrame]:
|
) -> tuple[pl.DataFrame, pl.DataFrame]:
|
||||||
"""Build postcode and properties dataframes from epc_pp + auxiliary data.
|
"""Build postcode and properties dataframes from epc_pp + auxiliary data.
|
||||||
|
|
||||||
|
|
@ -171,6 +174,17 @@ def _build(
|
||||||
).alias("minor_crime_avg_yr"),
|
).alias("minor_crime_avg_yr"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
lsoa_pop = pl.scan_parquet(lsoa_population_path)
|
||||||
|
wide = wide.join(lsoa_pop, on="lsoa21", how="left")
|
||||||
|
wide = wide.with_columns(
|
||||||
|
(pl.col("serious_crime_avg_yr") / pl.col("population") * 1000)
|
||||||
|
.round(1)
|
||||||
|
.alias("serious_crime_per_1k"),
|
||||||
|
(pl.col("minor_crime_avg_yr") / pl.col("population") * 1000)
|
||||||
|
.round(1)
|
||||||
|
.alias("minor_crime_per_1k"),
|
||||||
|
).drop("population")
|
||||||
|
|
||||||
poi_counts = pl.scan_parquet(poi_proximity_path)
|
poi_counts = pl.scan_parquet(poi_proximity_path)
|
||||||
wide = wide.join(poi_counts, on="postcode", how="left")
|
wide = wide.join(poi_counts, on="postcode", how="left")
|
||||||
|
|
||||||
|
|
@ -301,6 +315,8 @@ def _build(
|
||||||
"max_download_speed": "Max available download speed (Mbps)",
|
"max_download_speed": "Max available download speed (Mbps)",
|
||||||
"serious_crime_avg_yr": "Serious crime (avg/yr)",
|
"serious_crime_avg_yr": "Serious crime (avg/yr)",
|
||||||
"minor_crime_avg_yr": "Minor crime (avg/yr)",
|
"minor_crime_avg_yr": "Minor crime (avg/yr)",
|
||||||
|
"serious_crime_per_1k": "Serious crime per 1k residents (avg/yr)",
|
||||||
|
"minor_crime_per_1k": "Minor crime per 1k residents (avg/yr)",
|
||||||
"environmental_risk": "Environmental risk",
|
"environmental_risk": "Environmental risk",
|
||||||
"collapsible_deposits_risk": "Collapsible deposits risk",
|
"collapsible_deposits_risk": "Collapsible deposits risk",
|
||||||
"compressible_ground_risk": "Compressible ground risk",
|
"compressible_ground_risk": "Compressible ground risk",
|
||||||
|
|
@ -389,6 +405,12 @@ def main():
|
||||||
required=True,
|
required=True,
|
||||||
help="ONS rental prices by LA and bedroom count parquet file",
|
help="ONS rental prices by LA and bedroom count parquet file",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--lsoa-population",
|
||||||
|
type=Path,
|
||||||
|
required=True,
|
||||||
|
help="Census 2021 population by LSOA parquet file",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output-postcodes", type=Path, required=True, help="Output postcode parquet file path"
|
"--output-postcodes", type=Path, required=True, help="Output postcode parquet file path"
|
||||||
)
|
)
|
||||||
|
|
@ -409,6 +431,7 @@ def main():
|
||||||
broadband_path=args.broadband,
|
broadband_path=args.broadband,
|
||||||
geosure_path=args.geosure,
|
geosure_path=args.geosure,
|
||||||
rental_prices_path=args.rental_prices,
|
rental_prices_path=args.rental_prices,
|
||||||
|
lsoa_population_path=args.lsoa_population,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"\nPostcode columns: {postcode_df.columns}")
|
print(f"\nPostcode columns: {postcode_df.columns}")
|
||||||
|
|
|
||||||
|
|
@ -606,6 +606,40 @@ pub static FEATURE_GROUPS: &[FeatureGroup] = &[
|
||||||
modes: &[],
|
modes: &[],
|
||||||
linked: "",
|
linked: "",
|
||||||
},
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Serious crime per 1k residents (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 0.1,
|
||||||
|
description: "Serious crime rate per 1,000 residents per year",
|
||||||
|
detail: "Violence, robbery, burglary, and weapons possession per 1,000 usual residents per year in the LSOA. Uses police.uk street-level crime data (2023-2025) and Census 2021 population counts. Normalises for population density so areas are comparable regardless of size.",
|
||||||
|
source: "crime",
|
||||||
|
prefix: "",
|
||||||
|
suffix: "/yr",
|
||||||
|
raw: false,
|
||||||
|
absolute: false,
|
||||||
|
modes: &[],
|
||||||
|
linked: "",
|
||||||
|
},
|
||||||
|
FeatureConfig {
|
||||||
|
name: "Minor crime per 1k residents (avg/yr)",
|
||||||
|
bounds: Bounds::Percentile {
|
||||||
|
low: 2.0,
|
||||||
|
high: 98.0,
|
||||||
|
},
|
||||||
|
step: 0.1,
|
||||||
|
description: "Minor crime rate per 1,000 residents per year",
|
||||||
|
detail: "Anti-social behaviour, shoplifting, bicycle theft, and other lower-severity crime per 1,000 usual residents per year in the LSOA. Uses police.uk street-level crime data (2023-2025) and Census 2021 population counts. Normalises for population density so areas are comparable regardless of size.",
|
||||||
|
source: "crime",
|
||||||
|
prefix: "",
|
||||||
|
suffix: "/yr",
|
||||||
|
raw: false,
|
||||||
|
absolute: false,
|
||||||
|
modes: &[],
|
||||||
|
linked: "",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
FeatureGroup {
|
FeatureGroup {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue