Fix data pipelines once and for all
This commit is contained in:
parent
08560476c5
commit
4012e4e047
46 changed files with 4508 additions and 855 deletions
|
|
@ -921,6 +921,49 @@ class TestToWgs84Geojson:
|
|||
area_m2 = transform_geometry(to_bng.transform, shape(result)).area
|
||||
assert area_m2 > 100, f"point footprint only {area_m2:.1f} m^2"
|
||||
|
||||
def test_snappable_pointlike_polygon_still_gets_building_scale_footprint(self):
|
||||
"""A collapsed-but-snappable footprint (e.g. EC2A 2FJ: 181 properties on
|
||||
0.86 m²) must NOT ship as-is just because it survives precision snapping;
|
||||
pointlike inputs are rescued to a ~201 m² disc unconditionally."""
|
||||
import pyproj
|
||||
from shapely.geometry import shape
|
||||
from shapely.ops import transform as transform_geometry
|
||||
|
||||
to_bng = pyproj.Transformer.from_crs(
|
||||
"EPSG:4326", "EPSG:27700", always_xy=True
|
||||
)
|
||||
# 0.9m x 0.9m square: area 0.81 m², perimeter 3.6 m — pointlike, yet
|
||||
# large enough (~8 output-grid cells) to survive the 1e-6 deg snap.
|
||||
tiny = box(530000, 180000, 530000.9, 180000.9)
|
||||
from .output import _snap_to_wgs84_geojson
|
||||
|
||||
assert _snap_to_wgs84_geojson(tiny) is not None, (
|
||||
"precondition: this polygon must be snappable, otherwise the test "
|
||||
"exercises the old snap-fails path instead of the new one"
|
||||
)
|
||||
result = to_wgs84_geojson(tiny)
|
||||
assert result is not None
|
||||
area_m2 = transform_geometry(to_bng.transform, shape(result)).area
|
||||
assert 150 < area_m2 < 300, (
|
||||
f"pointlike snappable footprint shipped at {area_m2:.2f} m^2 "
|
||||
"instead of a building-scale (~201 m^2) disc"
|
||||
)
|
||||
|
||||
def test_normal_polygon_area_unchanged(self):
|
||||
"""A normal polygon must pass through without rescue inflation."""
|
||||
import pyproj
|
||||
from shapely.geometry import shape
|
||||
from shapely.ops import transform as transform_geometry
|
||||
|
||||
to_bng = pyproj.Transformer.from_crs(
|
||||
"EPSG:4326", "EPSG:27700", always_xy=True
|
||||
)
|
||||
poly = box(530000, 180000, 530100, 180100) # 10,000 m²
|
||||
result = to_wgs84_geojson(poly)
|
||||
assert result is not None
|
||||
area_m2 = transform_geometry(to_bng.transform, shape(result)).area
|
||||
assert area_m2 == pytest.approx(10_000, rel=0.01)
|
||||
|
||||
def test_thin_sliver_keeps_minimal_buffer(self):
|
||||
"""A genuine elongated sliver still carries length, so it is NOT inflated
|
||||
to building scale — only truly pointlike inputs are."""
|
||||
|
|
@ -1132,6 +1175,26 @@ class TestSubtractGreenspace:
|
|||
# 80% < 90% cap, so subtraction should happen
|
||||
assert result.area == pytest.approx(2000, rel=0.01)
|
||||
|
||||
def test_load_greenspace_repairs_invalid_and_drops_empty(self, tmp_path):
|
||||
"""An invalid (bow-tie) park polygon in the parquet must be repaired on
|
||||
load: it would otherwise make the per-postcode intersects/difference
|
||||
liable to raise hours into a merge."""
|
||||
from .greenspace import load_greenspace
|
||||
|
||||
bowtie = Polygon([(0, 0), (10, 10), (10, 0), (0, 10)]) # self-intersects
|
||||
assert not bowtie.is_valid
|
||||
valid = box(20, 20, 30, 30)
|
||||
path = tmp_path / "greenspace.parquet"
|
||||
pl.DataFrame({"geometry": [bowtie.wkb, valid.wkb]}).write_parquet(path)
|
||||
|
||||
tree, geoms = load_greenspace(path)
|
||||
assert len(geoms) == 2
|
||||
assert all(g.is_valid and not g.is_empty for g in geoms)
|
||||
# The repaired bow-tie must still subtract cleanly.
|
||||
result = subtract_greenspace(box(0, 0, 100, 100), tree, geoms)
|
||||
assert result.is_valid
|
||||
assert result.area < 10_000
|
||||
|
||||
|
||||
class TestToWgs84GeojsonValidity:
|
||||
"""to_wgs84_geojson must emit GeoJSON that round-trips to a valid geometry."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue