No geosure

This commit is contained in:
Andras Schmelczer 2026-03-24 22:52:33 +00:00
parent c4423b6c9a
commit 300209b192
3 changed files with 2 additions and 141 deletions

View file

@ -37,8 +37,6 @@ NAPTAN := $(DATA_DIR)/naptan.parquet
BROADBAND := $(DATA_DIR)/broadband.parquet
SCHOOL_PROX := $(DATA_DIR)/school_proximity.parquet
RENTAL := $(DATA_DIR)/rental_prices.parquet
GEOSURE_DIR := $(DATA_DIR)/geosure
GEOSURE := $(DATA_DIR)/geosure.parquet
INSPIRE_DIR := $(DATA_DIR)/inspire
OA_BOUNDARIES := $(DATA_DIR)/oa_boundaries.gpkg
UPRN_LOOKUP := $(DATA_DIR)/uprn_lookup.parquet
@ -55,7 +53,6 @@ ENGLAND_BOUNDARY := $(DATA_DIR)/england_boundary.geojson
RM_OUTCODES := frontend/src/lib/rightmove-outcodes.json
# Sentinel files for directory targets (Make doesn't track directories well)
GEOSURE_STAMP := $(GEOSURE_DIR)/.done
INSPIRE_STAMP := $(INSPIRE_DIR)/.done
PMTILES_VERSION := 1.22.3

View file

@ -58,14 +58,8 @@ _AREA_COLUMNS = [
# Schools
"Good+ primary schools within 5km",
"Good+ secondary schools within 5km",
# GeoSure
"Environmental risk",
"Collapsible deposits risk",
"Compressible ground risk",
"Landslide risk",
"Running sand risk",
"Shrink-swell risk",
"Soluble rocks risk",
# Demographics
"Median age",
]

View file

@ -1,130 +0,0 @@
"""Spatial-join GeoSure 5km hex grid risk data to postcode centroids.
Reads six ESRI Shapefiles (one per hazard type), converts polygons from
BNG to WGS84, and queries postcode centroids against them using an STRtree.
Outputs a postcode-level parquet with six risk columns plus an overall max.
Source: Ordnance Survey GeoSure (Open Data)
"""
import argparse
from pathlib import Path
import numpy as np
import polars as pl
import shapefile as shp
from pyproj import Transformer
from shapely import STRtree, points
from shapely.geometry import shape as to_shapely
_GEOSURE_RISKS = [
("CollapsibleDeposits", "collapsible_deposits_risk"),
("CompressibleGround", "compressible_ground_risk"),
("Landslides", "landslide_risk"),
("RunningSand", "running_sand_risk"),
("ShrinkSwell", "shrink_swell_risk"),
("SolubleRocks", "soluble_rocks_risk"),
]
def transform(geosure_dir: Path, arcgis_path: Path) -> pl.DataFrame:
"""Spatial-join GeoSure 5km hex grid risk data to postcode centroids."""
to_wgs84 = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)
print("Loading postcode centroids for GeoSure join...")
postcodes_df = pl.read_parquet(arcgis_path, columns=["pcds", "lat", "long"])
lats = postcodes_df["lat"].to_numpy()
lons = postcodes_df["long"].to_numpy()
pts = points(np.column_stack([lons, lats]))
result = pl.DataFrame({"postcode": postcodes_df["pcds"]})
risk_cols = []
for risk_key, col_name in _GEOSURE_RISKS:
shp_files = list(geosure_dir.rglob(f"*_GS_{risk_key}_*.shp"))
if not shp_files:
print(f" Warning: No shapefile found for {risk_key}")
result = result.with_columns(pl.lit(None).cast(pl.UInt8).alias(col_name))
risk_cols.append(col_name)
continue
print(f" Reading {shp_files[0].name}...")
reader = shp.Reader(str(shp_files[0]))
hex_polys = []
hex_classes = []
for sr in reader.shapeRecords():
geo = sr.shape.__geo_interface__
new_coords = []
for ring in geo["coordinates"]:
xs, ys = zip(*ring)
t_lons, t_lats = to_wgs84.transform(list(xs), list(ys))
new_coords.append(list(zip(t_lons, t_lats)))
geo["coordinates"] = new_coords
hex_polys.append(to_shapely(geo))
hex_classes.append(int(sr.record["CLASS"]))
classes_arr = np.array(hex_classes, dtype=np.uint8)
print(f" Querying {len(pts)} postcodes against {len(hex_polys)} hexagons...")
tree = STRtree(hex_polys)
pt_idx, hex_idx = tree.query(pts, predicate="intersects")
risk_values = np.zeros(len(pts), dtype=np.uint8)
np.maximum.at(risk_values, pt_idx, classes_arr[hex_idx])
result = result.with_columns(pl.Series(col_name, risk_values))
risk_cols.append(col_name)
# Overall environmental risk = max across all 6
result = result.with_columns(
pl.max_horizontal(*risk_cols).alias("environmental_risk")
)
# Convert 0 → null, 1/2/3 → Low/Moderate/Significant
label_map = {1: "Low", 2: "Moderate", 3: "Significant"}
for col in risk_cols + ["environmental_risk"]:
result = result.with_columns(
pl.col(col)
.replace_strict(label_map, default=None, return_dtype=pl.Utf8)
.alias(col)
)
print(f" GeoSure join complete: {result.height} postcodes")
return result
def main() -> None:
parser = argparse.ArgumentParser(
description="Spatial-join GeoSure ground stability data to postcode centroids"
)
parser.add_argument(
"--geosure",
type=Path,
required=True,
help="GeoSure shapefile directory",
)
parser.add_argument(
"--arcgis",
type=Path,
required=True,
help="ArcGIS postcode data parquet (for lat/lon coordinates)",
)
parser.add_argument(
"--output",
type=Path,
required=True,
help="Output parquet file path",
)
args = parser.parse_args()
result = transform(args.geosure, args.arcgis)
args.output.parent.mkdir(parents=True, exist_ok=True)
result.write_parquet(args.output, compression="zstd")
size_mb = args.output.stat().st_size / (1024 * 1024)
print(f"Wrote {args.output} ({size_mb:.1f} MB)")
if __name__ == "__main__":
main()