Improve data

This commit is contained in:
Andras Schmelczer 2026-06-10 07:54:25 +01:00
parent b4d66a28c1
commit 85da1941aa
31 changed files with 901 additions and 319 deletions

View file

@ -53,6 +53,18 @@ _OUTPUT_PRECISION_DEG = 0.000001
# tolerance), we fatten it just enough to survive snapping rather than drop it.
_MIN_FOOTPRINT_BUFFER_M = 0.5
# Building-scale buffer for POINTLIKE inputs that carry no real extent. Multi-
# dwelling (tower-block) postcodes have every UPRN geocoded to a single shared
# coordinate, so the boundary collapses to a point; a 0.5 m buffer then yields an
# invisible ~0.8 m² dot covering hundreds of homes. Such inputs get a ~200 m²
# building-scale footprint instead. (Genuine thin slivers, which still carry
# length, keep the minimal buffer.) _resolve_overlaps runs afterwards, so any
# overlap this introduces is trimmed; a postcode shaved back to sub-grid still
# falls through to the tiny _grid_footprint, so this can only improve the result.
_POINT_RESCUE_BUFFER_M = 8.0
_POINTLIKE_AREA_M2 = 1.0
_POINTLIKE_PERIMETER_M = 4.0
def _snap_to_wgs84_geojson(geom_bng: Polygon | MultiPolygon) -> dict | None:
"""Transform a BNG polygon to WGS84, snap to output precision, validate.
@ -90,8 +102,23 @@ def _snap_to_wgs84_geojson(geom_bng: Polygon | MultiPolygon) -> dict | None:
def _rescue_footprint(geom_bng) -> dict | None:
"""Fatten a degenerate BNG geometry into a representable footprint and snap."""
footprint = _largest_polygonal(geom_bng.buffer(_MIN_FOOTPRINT_BUFFER_M))
"""Fatten a degenerate BNG geometry into a representable footprint and snap.
A POINTLIKE input (a point, or a near-zero-area/short-perimeter polygon the
signature of a tower-block postcode whose UPRNs all share one coordinate)
gets a building-scale buffer so it is not reduced to an invisible sub-metre
dot; thin slivers that still carry length keep the minimal buffer.
"""
buffer_m = _MIN_FOOTPRINT_BUFFER_M
try:
if (
geom_bng.area < _POINTLIKE_AREA_M2
and geom_bng.length < _POINTLIKE_PERIMETER_M
):
buffer_m = _POINT_RESCUE_BUFFER_M
except GEOSException:
pass
footprint = _largest_polygonal(geom_bng.buffer(buffer_m))
if footprint is None:
return None
return _snap_to_wgs84_geojson(footprint)

View file

@ -906,6 +906,37 @@ class TestToWgs84Geojson:
assert result is not None
assert result["type"] == "Polygon"
def test_pointlike_input_gets_building_scale_footprint(self):
"""A tower-block postcode (all UPRNs at one point) must not collapse to a
sub-metre dot; it gets a building-scale footprint instead."""
import pyproj
from shapely.geometry import Point, shape
from shapely.ops import transform as transform_geometry
to_bng = pyproj.Transformer.from_crs(
"EPSG:4326", "EPSG:27700", always_xy=True
)
result = to_wgs84_geojson(Point(360000, 170000))
assert result is not None
area_m2 = transform_geometry(to_bng.transform, shape(result)).area
assert area_m2 > 100, f"point footprint only {area_m2:.1f} m^2"
def test_thin_sliver_keeps_minimal_buffer(self):
"""A genuine elongated sliver still carries length, so it is NOT inflated
to building scale only truly pointlike inputs are."""
import pyproj
from shapely.geometry import LineString, shape
from shapely.ops import transform as transform_geometry
to_bng = pyproj.Transformer.from_crs(
"EPSG:4326", "EPSG:27700", always_xy=True
)
sliver = LineString([(360000, 170000), (360040, 170000)]).buffer(0.05)
result = to_wgs84_geojson(sliver)
assert result is not None
area_m2 = transform_geometry(to_bng.transform, shape(result)).area
assert area_m2 < 100, f"sliver inflated to {area_m2:.1f} m^2"
def test_coordinates_have_limited_precision(self):
"""GeoJSON coordinates should be rounded to 6 decimal places."""
import json