This commit is contained in:
Andras Schmelczer 2026-05-31 20:20:41 +01:00
parent 8688b7475e
commit e8345cbdc1
40 changed files with 1980 additions and 904 deletions

View file

@ -3,6 +3,8 @@
Each test targets a specific bug or edge case identified during code review.
"""
import json
import numpy as np
import polars as pl
import pytest
@ -11,7 +13,12 @@ from shapely.ops import unary_union
from .oa_boundaries import parse_gpkg_geometry
from .greenspace import subtract_greenspace
from .output import _fill_holes, merge_fragments, to_wgs84_geojson
from .output import (
_fill_holes,
merge_fragments,
to_wgs84_geojson,
write_district_geojson,
)
from .process_oa import _extract_polygonal, process_oa
from .uprn import get_oa_uprns, load_uprns
from .voronoi import _equal_split_fallback, compute_voronoi_regions
@ -154,6 +161,7 @@ class TestWhitespacePostcodes:
"pcds": ["AA1 1AA", "AA1 1AB"],
"east1m": [500010, 500030],
"north1m": [180010, 180020],
"oa21cd": ["E00000001", "E00000001"],
"doterm": ["2020-01-01", None],
"ctry25cd": ["E92000001", "E92000001"],
}
@ -165,6 +173,65 @@ class TestWhitespacePostcodes:
assert loaded_df["PCDS"].to_list() == ["AA1 1AB"]
def test_arcgis_filters_to_active_english_postcodes(self, tmp_path):
uprns = pl.DataFrame(
{
"GRIDGB1E": [500010, 500020],
"GRIDGB1N": [180010, 180020],
"PCDS": ["AA1 1AA", "CF1 1AA"],
"OA21CD": ["E00000001", "E00000001"],
}
)
uprn_path = tmp_path / "uprn.parquet"
uprns.write_parquet(uprn_path)
arcgis = pl.DataFrame(
{
"pcds": ["AA1 1AA", "CF1 1AA"],
"east1m": [500010, 300010],
"north1m": [180010, 220010],
"oa21cd": ["E00000001", "W00000001"],
"doterm": [None, None],
"ctry25cd": ["E92000001", "W92000004"],
}
)
arcgis_path = tmp_path / "arcgis.parquet"
arcgis.write_parquet(arcgis_path)
loaded_df, _offsets = load_uprns(uprn_path, arcgis_path)
assert loaded_df["PCDS"].to_list() == ["AA1 1AA"]
def test_arcgis_adds_centroid_seed_for_active_postcode_without_uprn(self, tmp_path):
uprns = pl.DataFrame(
{
"GRIDGB1E": [500010],
"GRIDGB1N": [180010],
"PCDS": ["AA1 1AA"],
"OA21CD": ["E00000001"],
}
)
uprn_path = tmp_path / "uprn.parquet"
uprns.write_parquet(uprn_path)
arcgis = pl.DataFrame(
{
"pcds": ["AA1 1AA", "BB1 1BB"],
"east1m": [500010, 510000],
"north1m": [180010, 190000],
"oa21cd": ["E00000001", "E00000002"],
"doterm": [None, None],
"ctry25cd": ["E92000001", "E92000001"],
}
)
arcgis_path = tmp_path / "arcgis.parquet"
arcgis.write_parquet(arcgis_path)
loaded_df, offsets = load_uprns(uprn_path, arcgis_path)
assert set(loaded_df["PCDS"].to_list()) == {"AA1 1AA", "BB1 1BB"}
points, postcodes = get_oa_uprns(loaded_df, offsets, "E00000002")
assert postcodes == ["BB1 1BB"]
assert points.tolist() == [[510000.0, 190000.0]]
# ---------------------------------------------------------------------------
# Bug 3: Voronoi deduplication is first-seen-wins
@ -450,7 +517,9 @@ class TestProcessOAInspireParcelAssignment:
)
postcodes = ["A", "B"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[left, right])
fragments = process_oa(
oa_geom, points, postcodes, inspire_candidates=[left, right]
)
frag_dict = dict(fragments)
assert "A" in frag_dict and "B" in frag_dict
@ -494,7 +563,9 @@ class TestProcessOAInspireParcelAssignment:
)
postcodes = ["A", "B"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[left, right])
fragments = process_oa(
oa_geom, points, postcodes, inspire_candidates=[left, right]
)
frag_dict = dict(fragments)
assert "A" in frag_dict and "B" in frag_dict
@ -539,7 +610,9 @@ class TestProcessOAInspireParcelAssignment:
)
postcodes = ["A", "B"]
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[straddling])
fragments = process_oa(
oa_geom, points, postcodes, inspire_candidates=[straddling]
)
for _, geom in fragments:
assert geom.difference(oa_geom).area < 0.01
@ -651,6 +724,22 @@ class TestToWgs84Geojson:
assert lon_dp <= 6, f"Longitude {lon_s} has {lon_dp} decimal places"
assert lat_dp <= 6, f"Latitude {lat_s} has {lat_dp} decimal places"
def test_write_district_geojson_replaces_stale_units(self, tmp_path):
stale_units = tmp_path / "units"
stale_units.mkdir()
(stale_units / "ZZ1.geojson").write_text(
json.dumps({"type": "FeatureCollection", "features": []})
)
file_count = write_district_geojson(
{"AA1 1AA": box(530000, 180000, 530100, 180100)}, tmp_path
)
assert file_count == 1
assert not (stale_units / "ZZ1.geojson").exists()
written = json.loads((stale_units / "AA1.geojson").read_text())
assert written["features"][0]["properties"]["postcodes"] == "AA1 1AA"
# ---------------------------------------------------------------------------
# Edge case: parse_gpkg_geometry rejects unknown envelope types