idk
This commit is contained in:
parent
a04ac2d857
commit
d43da9708c
47 changed files with 4120 additions and 573 deletions
|
|
@ -11,12 +11,20 @@ import pytest
|
|||
from shapely.geometry import MultiPolygon, Polygon, box
|
||||
from shapely.ops import unary_union
|
||||
|
||||
from .fragments_cache import (
|
||||
fragments_cache_is_fresh,
|
||||
load_fragments,
|
||||
save_fragments,
|
||||
)
|
||||
from .__main__ import _oa_fragments, _process_oas
|
||||
from .inspire import build_inspire_index
|
||||
from .oa_boundaries import parse_gpkg_geometry
|
||||
from .greenspace import subtract_greenspace
|
||||
from .output import (
|
||||
_fill_holes,
|
||||
merge_fragments,
|
||||
to_wgs84_geojson,
|
||||
to_wgs84_geojson_multi,
|
||||
write_district_geojson,
|
||||
)
|
||||
from .process_oa import _extract_polygonal, process_oa
|
||||
|
|
@ -173,6 +181,52 @@ class TestWhitespacePostcodes:
|
|||
|
||||
assert loaded_df["PCDS"].to_list() == ["AA1 1AB"]
|
||||
|
||||
def test_remapped_terminated_postcode_adopts_successor_oa(self, tmp_path):
|
||||
"""When a terminated postcode is remapped to its active successor, the
|
||||
remapped seed point must carry the SUCCESSOR's OA (and coords), not the
|
||||
terminated postcode's original OA. Pre-fix the row kept OA21CD of the
|
||||
terminated postcode, seeding the successor into an OA it doesn't belong
|
||||
to and splitting its boundary across OAs."""
|
||||
# Terminated AA1 1AA sits in OA E00000001. Its nearest active successor
|
||||
# AA1 1AB lives in a DIFFERENT OA (E00000002) far away.
|
||||
uprns = pl.DataFrame(
|
||||
{
|
||||
"GRIDGB1E": [500010],
|
||||
"GRIDGB1N": [180010],
|
||||
"PCDS": ["AA1 1AA"],
|
||||
"OA21CD": ["E00000001"],
|
||||
}
|
||||
)
|
||||
uprn_path = tmp_path / "uprn.parquet"
|
||||
uprns.write_parquet(uprn_path)
|
||||
arcgis = pl.DataFrame(
|
||||
{
|
||||
"pcds": ["AA1 1AA", "AA1 1AB"],
|
||||
"east1m": [500010, 500030],
|
||||
"north1m": [180010, 180020],
|
||||
# AA1 1AA terminated → only AA1 1AB is an active successor, and
|
||||
# it belongs to a different OA than the terminated postcode.
|
||||
"oa21cd": ["E00000001", "E00000002"],
|
||||
"doterm": ["2020-01-01", None],
|
||||
"ctry25cd": ["E92000001", "E92000001"],
|
||||
}
|
||||
)
|
||||
arcgis_path = tmp_path / "arcgis.parquet"
|
||||
arcgis.write_parquet(arcgis_path)
|
||||
|
||||
loaded_df, offsets = load_uprns(uprn_path, arcgis_path)
|
||||
|
||||
# The remapped point must be grouped under the successor's OA, not the
|
||||
# terminated postcode's OA.
|
||||
assert "E00000002" in offsets, "Successor OA missing — remap kept old OA"
|
||||
assert "E00000001" not in offsets, (
|
||||
"Remapped point still lives in the terminated postcode's OA"
|
||||
)
|
||||
points, postcodes = get_oa_uprns(loaded_df, offsets, "E00000002")
|
||||
assert postcodes == ["AA1 1AB"]
|
||||
# It should also adopt the successor's authoritative coordinates.
|
||||
assert points.tolist() == [[500030.0, 180020.0]]
|
||||
|
||||
def test_arcgis_filters_to_active_english_postcodes(self, tmp_path):
|
||||
uprns = pl.DataFrame(
|
||||
{
|
||||
|
|
@ -617,6 +671,32 @@ class TestProcessOAInspireParcelAssignment:
|
|||
for _, geom in fragments:
|
||||
assert geom.difference(oa_geom).area < 0.01
|
||||
|
||||
def test_shared_parcel_keeps_every_contained_postcode(self):
|
||||
"""A single parcel containing UPRNs for [A, A, B] must yield a fragment
|
||||
for BOTH A and B. Pre-fix the majority winner (A) claimed the whole
|
||||
parcel, excluding it from `remaining`, so B's UPRNs were trapped inside
|
||||
claimed land and B vanished entirely (no fragment)."""
|
||||
oa_geom = box(0, 0, 100, 100)
|
||||
parcel = box(0, 0, 100, 100) # one parcel covering the whole OA
|
||||
points = np.array(
|
||||
[
|
||||
[20, 50], # postcode A
|
||||
[30, 50], # postcode A (majority)
|
||||
[80, 50], # postcode B (minority — would be dropped pre-fix)
|
||||
]
|
||||
)
|
||||
postcodes = ["A", "A", "B"]
|
||||
|
||||
fragments = process_oa(oa_geom, points, postcodes, inspire_candidates=[parcel])
|
||||
frag_dict = dict(fragments)
|
||||
|
||||
assert "A" in frag_dict, "Majority postcode A must keep a fragment"
|
||||
assert "B" in frag_dict, "Minority postcode B must not be dropped"
|
||||
assert frag_dict["A"].area > 0
|
||||
assert frag_dict["B"].area > 0
|
||||
# The split must partition the parcel without overlap.
|
||||
assert frag_dict["A"].intersection(frag_dict["B"]).area < 0.01
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _extract_polygonal helper
|
||||
|
|
@ -656,6 +736,21 @@ class TestExtractPolygonal:
|
|||
|
||||
assert _extract_polygonal(LineString([(0, 0), (1, 1)])) is None
|
||||
|
||||
def test_overlapping_collection_unioned_to_valid(self):
|
||||
"""A GeometryCollection with OVERLAPPING polygons must be unioned into a
|
||||
VALID geometry (not a raw MultiPolygon, which would be invalid and crash
|
||||
the next .difference()), and must not double-count the overlap area."""
|
||||
from shapely.geometry import GeometryCollection
|
||||
|
||||
a = box(0, 0, 100, 100)
|
||||
b = box(50, 50, 150, 150) # overlaps a by 50x50
|
||||
result = _extract_polygonal(GeometryCollection([a, b]))
|
||||
assert result is not None
|
||||
assert result.is_valid
|
||||
assert result.area == pytest.approx(unary_union([a, b]).area)
|
||||
# And the formerly-crashing op now works:
|
||||
assert result.difference(box(0, 0, 10, 10)).is_valid
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Edge case: merge_fragments handles single-OA postcodes
|
||||
|
|
@ -763,12 +858,12 @@ class TestParseGpkgGeometry:
|
|||
|
||||
|
||||
class TestFillHoles:
|
||||
"""_fill_holes must remove all interior holes from polygons."""
|
||||
"""_fill_holes fills small artifact holes but keeps large (real-enclosed) ones."""
|
||||
|
||||
def test_polygon_with_hole(self):
|
||||
"""A polygon with an interior ring should become a solid polygon."""
|
||||
def test_small_artifact_hole_filled(self):
|
||||
"""A small (<1000 m²) interior ring is an artifact and gets filled."""
|
||||
outer = [(0, 0), (100, 0), (100, 100), (0, 100), (0, 0)]
|
||||
hole = [(30, 30), (70, 30), (70, 70), (30, 70), (30, 30)]
|
||||
hole = [(40, 40), (60, 40), (60, 60), (40, 60), (40, 40)] # 20x20 = 400 m²
|
||||
poly_with_hole = Polygon(outer, [hole])
|
||||
assert len(list(poly_with_hole.interiors)) == 1
|
||||
result = _fill_holes(poly_with_hole)
|
||||
|
|
@ -776,6 +871,15 @@ class TestFillHoles:
|
|||
assert len(list(result.interiors)) == 0
|
||||
assert result.area == pytest.approx(Polygon(outer).area)
|
||||
|
||||
def test_large_hole_kept(self):
|
||||
"""A large (>=1000 m²) hole is likely a real enclosed postcode — keep it."""
|
||||
outer = [(0, 0), (100, 0), (100, 100), (0, 100), (0, 0)]
|
||||
hole = [(20, 20), (80, 20), (80, 80), (20, 80), (20, 20)] # 60x60 = 3600 m²
|
||||
poly_with_hole = Polygon(outer, [hole])
|
||||
result = _fill_holes(poly_with_hole)
|
||||
assert len(list(result.interiors)) == 1
|
||||
assert result.area == pytest.approx(10000 - 3600)
|
||||
|
||||
def test_multipolygon_with_holes(self):
|
||||
"""A MultiPolygon where each part has holes should have all holes removed."""
|
||||
outer1 = [(0, 0), (50, 0), (50, 50), (0, 50), (0, 0)]
|
||||
|
|
@ -944,3 +1048,356 @@ class TestGreenspaceHolePreserved:
|
|||
merged = result["TEST1"]
|
||||
assert len(list(merged.interiors)) == 1
|
||||
assert merged.area == pytest.approx(10000 - 1600, rel=0.05)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# merge_fragments keeps substantial detached parts (no OA-seam coverage gaps)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestKeepDetachedParts:
|
||||
"""A postcode split across an OA seam (railway/river) must keep both parts
|
||||
instead of dropping all but the largest, which left ~1.8% uncovered gaps."""
|
||||
|
||||
def test_far_apart_parts_both_kept(self):
|
||||
# Two 50x50m blocks 30m apart — wider than the 10m merge buffer.
|
||||
a = box(0, 0, 50, 50) # 2500 m²
|
||||
b = box(80, 0, 130, 50) # 2500 m², 30m gap
|
||||
geom = merge_fragments([("AA1 1AA", a), ("AA1 1AA", b)])["AA1 1AA"]
|
||||
assert geom.geom_type == "MultiPolygon"
|
||||
assert len(geom.geoms) == 2
|
||||
assert geom.area == pytest.approx(5000, rel=0.01)
|
||||
|
||||
def test_tiny_noise_part_dropped(self):
|
||||
main = box(0, 0, 100, 100) # 10000 m²
|
||||
noise = box(200, 200, 205, 205) # 25 m² < 100 m² threshold
|
||||
geom = merge_fragments([("AA1 1AA", main), ("AA1 1AA", noise)])["AA1 1AA"]
|
||||
assert geom.geom_type == "Polygon"
|
||||
assert geom.area == pytest.approx(10000, rel=0.01)
|
||||
|
||||
|
||||
class TestMultiPolygonOutput:
|
||||
"""to_wgs84_geojson_multi / the writer must emit MultiPolygon for split
|
||||
postcodes (the Rust server + loader already parse MultiPolygon)."""
|
||||
|
||||
def test_multipolygon_preserves_all_parts(self):
|
||||
from shapely.geometry import shape
|
||||
|
||||
mp = MultiPolygon(
|
||||
[
|
||||
box(530000, 180000, 530100, 180100),
|
||||
box(531000, 180000, 531100, 180100),
|
||||
]
|
||||
)
|
||||
gj = to_wgs84_geojson_multi(mp)
|
||||
assert gj["type"] == "MultiPolygon"
|
||||
assert len(gj["coordinates"]) == 2
|
||||
rt = shape(gj)
|
||||
assert rt.is_valid and not rt.is_empty
|
||||
assert len(rt.geoms) == 2
|
||||
|
||||
def test_single_part_stays_polygon(self):
|
||||
gj = to_wgs84_geojson_multi(box(530000, 180000, 530100, 180100))
|
||||
assert gj["type"] == "Polygon"
|
||||
|
||||
def test_writer_emits_multipolygon_feature(self, tmp_path):
|
||||
mp = MultiPolygon(
|
||||
[
|
||||
box(530000, 180000, 530100, 180100),
|
||||
box(531000, 180000, 531100, 180100),
|
||||
]
|
||||
)
|
||||
assert write_district_geojson({"AA1 1AA": mp}, tmp_path) == 1
|
||||
coll = json.loads((tmp_path / "units" / "AA1.geojson").read_text())
|
||||
assert coll["features"][0]["geometry"]["type"] == "MultiPolygon"
|
||||
|
||||
|
||||
class TestOutputPartition:
|
||||
"""The writer must emit a partition: overlapping postcodes are made disjoint
|
||||
(no two cover the same ground) without dropping an active postcode."""
|
||||
|
||||
def test_overlapping_postcodes_made_disjoint(self, tmp_path):
|
||||
from shapely.geometry import shape
|
||||
|
||||
a = box(530000, 180000, 530100, 180100)
|
||||
b = box(530090, 180000, 530200, 180100) # overlaps `a` in a 10m strip
|
||||
assert a.intersection(b).area > 0 # precondition: they overlap
|
||||
|
||||
write_district_geojson({"AA1 1AA": a, "AA1 1AB": b}, tmp_path)
|
||||
coll = json.loads((tmp_path / "units" / "AA1.geojson").read_text())
|
||||
geoms = {
|
||||
f["properties"]["postcodes"]: shape(f["geometry"])
|
||||
for f in coll["features"]
|
||||
}
|
||||
assert set(geoms) == {"AA1 1AA", "AA1 1AB"} # neither dropped
|
||||
# Disjoint interiors (share at most an edge).
|
||||
assert geoms["AA1 1AA"].intersection(geoms["AA1 1AB"]).area == pytest.approx(
|
||||
0.0, abs=1e-12
|
||||
)
|
||||
assert all(g.area > 0 for g in geoms.values())
|
||||
|
||||
def test_enclosed_postcode_makes_container_a_donut(self, tmp_path):
|
||||
"""A postcode fully INSIDE another must stay disjoint: the smaller (inner)
|
||||
keeps its area, the container gets a hole. A plain `overlaps` query misses
|
||||
containment, so this is the regression guard for that fix."""
|
||||
from shapely.geometry import shape
|
||||
|
||||
outer = box(530000, 180000, 530300, 180300) # 90,000 m²
|
||||
inner = box(530100, 180100, 530200, 180200) # 10,000 m², fully inside outer
|
||||
assert outer.contains(inner) # precondition
|
||||
|
||||
write_district_geojson({"AA1 1AA": outer, "AA1 1AB": inner}, tmp_path)
|
||||
coll = json.loads((tmp_path / "units" / "AA1.geojson").read_text())
|
||||
geoms = {
|
||||
f["properties"]["postcodes"]: shape(f["geometry"])
|
||||
for f in coll["features"]
|
||||
}
|
||||
assert set(geoms) == {"AA1 1AA", "AA1 1AB"} # neither dropped
|
||||
assert geoms["AA1 1AA"].intersection(geoms["AA1 1AB"]).area == pytest.approx(
|
||||
0.0, abs=1e-12
|
||||
)
|
||||
# Container is now a donut around the enclosed postcode.
|
||||
assert geoms["AA1 1AA"].geom_type == "Polygon"
|
||||
assert len(list(geoms["AA1 1AA"].interiors)) == 1
|
||||
assert geoms["AA1 1AB"].area > 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# InspireIndex must return the same candidates as a brute-force bbox scan
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestInspireIndex:
|
||||
"""The grid index replaces a per-OA linear scan of all parcel bboxes; it must
|
||||
return an identical candidate set (and order) so Phase 3 output is unchanged."""
|
||||
|
||||
@staticmethod
|
||||
def _brute(bboxes, box):
|
||||
e0, n0, e1, n1 = box
|
||||
mask = (
|
||||
(bboxes[:, 2] >= e0)
|
||||
& (bboxes[:, 0] <= e1)
|
||||
& (bboxes[:, 3] >= n0)
|
||||
& (bboxes[:, 1] <= n1)
|
||||
)
|
||||
return np.where(mask)[0]
|
||||
|
||||
def test_matches_brute_force_over_random_queries(self):
|
||||
rng = np.random.default_rng(0)
|
||||
x = rng.uniform(0, 10000, 5000)
|
||||
y = rng.uniform(0, 10000, 5000)
|
||||
w = rng.uniform(1, 60, 5000) # all <= 500m cell → CSR path
|
||||
h = rng.uniform(1, 60, 5000)
|
||||
bboxes = np.column_stack([x, y, x + w, y + h]).astype(np.float64)
|
||||
idx = build_inspire_index(bboxes, None, None, cell_size=500.0)
|
||||
|
||||
for _ in range(400):
|
||||
cx, cy = rng.uniform(0, 10000), rng.uniform(0, 10000)
|
||||
sz = float(rng.choice([30.0, 200.0, 1000.0, 3000.0]))
|
||||
box = (cx, cy, cx + sz, cy + sz)
|
||||
got = idx.candidate_indices(box)
|
||||
expected = np.sort(self._brute(bboxes, box))
|
||||
assert np.array_equal(got, expected)
|
||||
|
||||
def test_oversized_parcel_is_found(self):
|
||||
# A parcel larger than a cell goes to the overflow list, not the grid;
|
||||
# a query deep inside it (away from the small parcels) must still find it.
|
||||
bboxes = np.array(
|
||||
[
|
||||
[0.0, 0.0, 5000.0, 5000.0], # 5km parcel >> 500m cell
|
||||
[100.0, 100.0, 120.0, 120.0],
|
||||
[4000.0, 4000.0, 4020.0, 4020.0],
|
||||
]
|
||||
)
|
||||
idx = build_inspire_index(bboxes, None, None, cell_size=500.0)
|
||||
box = (2000.0, 2000.0, 2050.0, 2050.0)
|
||||
got = idx.candidate_indices(box)
|
||||
assert 0 in got
|
||||
assert np.array_equal(got, np.sort(self._brute(bboxes, box)))
|
||||
|
||||
def test_no_overlap_returns_empty(self):
|
||||
bboxes = np.array([[0.0, 0.0, 10.0, 10.0], [20.0, 20.0, 30.0, 30.0]])
|
||||
idx = build_inspire_index(bboxes, None, None, cell_size=500.0)
|
||||
assert len(idx.candidate_indices((100.0, 100.0, 110.0, 110.0))) == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parallel OA processing must match the sequential result exactly
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestParallelProcessing:
|
||||
"""_process_oas across workers must produce the same fragments as workers=1.
|
||||
Uses single-postcode OAs (fast path), so it exercises the chunking + WKB
|
||||
round-trip + fork machinery without needing INSPIRE data."""
|
||||
|
||||
@staticmethod
|
||||
def _inputs(n_oas=60):
|
||||
import pyarrow as pa
|
||||
|
||||
oa_geoms = {
|
||||
f"E{i:08d}": box(i * 100.0, 0.0, i * 100.0 + 50.0, 50.0)
|
||||
for i in range(n_oas)
|
||||
}
|
||||
codes = sorted(oa_geoms)
|
||||
east, north, pcs = [], [], []
|
||||
offsets = {}
|
||||
pos = 0
|
||||
for i, code in enumerate(codes):
|
||||
east += [i * 100.0 + 10.0, i * 100.0 + 20.0]
|
||||
north += [10.0, 20.0]
|
||||
pcs += [f"AA{i % 5} {i % 9}AA"] * 2 # one postcode per OA → fast path
|
||||
offsets[code] = (pos, pos + 2)
|
||||
pos += 2
|
||||
return (
|
||||
codes,
|
||||
oa_geoms,
|
||||
np.array(east),
|
||||
np.array(north),
|
||||
pa.array(pcs, type=pa.large_string()),
|
||||
offsets,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _norm(frags):
|
||||
return sorted((pc, geom.wkb_hex) for pc, geom in frags)
|
||||
|
||||
def test_parallel_matches_sequential(self):
|
||||
codes, oa, east, north, pcs, offs = self._inputs()
|
||||
seq, s1 = _process_oas(codes, oa, east, north, pcs, offs, None, workers=1)
|
||||
par, s2 = _process_oas(codes, oa, east, north, pcs, offs, None, workers=3)
|
||||
assert len(seq) == len(codes) # one fragment per single-postcode OA
|
||||
assert s1 == s2 == len(codes)
|
||||
assert self._norm(seq) == self._norm(par)
|
||||
|
||||
def test_oa_failure_is_tagged_with_oa_code(self):
|
||||
"""A failure inside per-OA processing must re-raise with the OA code, so a
|
||||
single bad OA is attributable instead of an anonymous worker abort."""
|
||||
# Missing OA in the geoms dict → KeyError, wrapped with the OA code.
|
||||
with pytest.raises(RuntimeError, match="E00099999"):
|
||||
_oa_fragments("E00099999", {}, None, None, None, {}, None)
|
||||
|
||||
|
||||
class TestDegenerateGeometryHandling:
|
||||
"""Every active postcode must keep a boundary (validate_outputs is strict),
|
||||
so a sub-grid sliver is fattened rather than dropped. A genuinely empty
|
||||
geometry is skipped without aborting the whole write (the 10h regression)."""
|
||||
|
||||
# Three near-collinear vertices in BNG: bbox ~28m x 7m but area ~0.04 m²,
|
||||
# i.e. AL10 0TU. Without the rescue it snaps to empty at output precision.
|
||||
SLIVER = Polygon(
|
||||
[(523045.34, 209625.56), (523040.47, 209624.33), (523017.0, 209618.42)]
|
||||
)
|
||||
|
||||
def test_sliver_is_rescued_to_valid_geometry(self):
|
||||
from shapely.geometry import shape
|
||||
|
||||
result = to_wgs84_geojson(self.SLIVER)
|
||||
assert result is not None, "sliver must be rescued, not dropped"
|
||||
rt = shape(result)
|
||||
assert not rt.is_empty
|
||||
assert rt.is_valid
|
||||
|
||||
def test_collinear_zero_area_input_is_rescued(self):
|
||||
"""A zero-area collinear 'polygon' (can't be cleaned to a polygon) must
|
||||
still be rescued via the representative-point fallback, not dropped."""
|
||||
from shapely.geometry import shape
|
||||
|
||||
degenerate = Polygon(
|
||||
[(523000, 209600), (523010, 209600), (523020, 209600), (523000, 209600)]
|
||||
)
|
||||
assert degenerate.area == 0.0
|
||||
result = to_wgs84_geojson(degenerate)
|
||||
assert result is not None, "degenerate input must be rescued, not dropped"
|
||||
rt = shape(result)
|
||||
assert not rt.is_empty
|
||||
assert rt.is_valid
|
||||
|
||||
def test_sliver_postcode_present_in_output(self, tmp_path):
|
||||
postcodes = {
|
||||
"AA1 1AA": box(530000, 180000, 530100, 180100),
|
||||
"AA1 1AB": self.SLIVER, # must survive
|
||||
}
|
||||
file_count = write_district_geojson(postcodes, tmp_path)
|
||||
assert file_count == 1
|
||||
collection = json.loads((tmp_path / "units" / "AA1.geojson").read_text())
|
||||
written = {f["properties"]["postcodes"] for f in collection["features"]}
|
||||
assert written == {"AA1 1AA", "AA1 1AB"}
|
||||
|
||||
def test_empty_geometry_skipped_not_raised(self, tmp_path):
|
||||
# The last-resort safety net: an unrescuable (empty) geometry is skipped
|
||||
# so one bad postcode can never abort a multi-hour run.
|
||||
postcodes = {
|
||||
"AA1 1AA": box(530000, 180000, 530100, 180100),
|
||||
"AA1 1AB": Polygon(), # genuinely empty
|
||||
}
|
||||
file_count = write_district_geojson(postcodes, tmp_path)
|
||||
assert file_count == 1
|
||||
collection = json.loads((tmp_path / "units" / "AA1.geojson").read_text())
|
||||
written = {f["properties"]["postcodes"] for f in collection["features"]}
|
||||
assert written == {"AA1 1AA"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# fragments_cache round-trips Phase 3 output and validates freshness
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFragmentsCache:
|
||||
"""Persisting Phase 3 lets a crashed run resume without the ~10h OA loop."""
|
||||
|
||||
def test_round_trip_preserves_postcodes_and_geometry(self, tmp_path):
|
||||
fragments = [
|
||||
("AA1 1AA", box(0, 0, 100, 100)),
|
||||
("AA1 1AB", box(200, 200, 250, 260)),
|
||||
# A postcode spanning multiple OAs appears as repeated entries.
|
||||
("AA1 1AA", box(100, 0, 150, 100)),
|
||||
("AA1 1AC", MultiPolygon([box(0, 0, 10, 10), box(20, 20, 30, 30)])),
|
||||
]
|
||||
cache = tmp_path / "fragments_cache.parquet"
|
||||
save_fragments(cache, fragments)
|
||||
loaded = load_fragments(cache)
|
||||
|
||||
assert [pc for pc, _ in loaded] == [pc for pc, _ in fragments]
|
||||
for (_, original), (_, restored) in zip(fragments, loaded):
|
||||
assert restored.equals(original)
|
||||
|
||||
def test_save_is_atomic_no_tmp_left_behind(self, tmp_path):
|
||||
cache = tmp_path / "fragments_cache.parquet"
|
||||
save_fragments(cache, [("AA1 1AA", box(0, 0, 1, 1))])
|
||||
assert cache.exists()
|
||||
assert not (tmp_path / "fragments_cache.parquet.tmp").exists()
|
||||
|
||||
def test_missing_cache_is_not_fresh(self, tmp_path):
|
||||
cache = tmp_path / "fragments_cache.parquet"
|
||||
inp = tmp_path / "uprn.parquet"
|
||||
inp.write_text("x")
|
||||
assert fragments_cache_is_fresh(cache, [inp]) is False
|
||||
|
||||
def test_cache_newer_than_inputs_is_fresh(self, tmp_path):
|
||||
import os
|
||||
|
||||
inp = tmp_path / "uprn.parquet"
|
||||
inp.write_text("x")
|
||||
cache = tmp_path / "fragments_cache.parquet"
|
||||
cache.write_text("c")
|
||||
os.utime(inp, (1_000, 1_000))
|
||||
os.utime(cache, (2_000, 2_000))
|
||||
assert fragments_cache_is_fresh(cache, [inp, None]) is True
|
||||
|
||||
def test_cache_older_than_any_input_is_stale(self, tmp_path):
|
||||
import os
|
||||
|
||||
inp = tmp_path / "oa.gpkg"
|
||||
inp.write_text("x")
|
||||
cache = tmp_path / "fragments_cache.parquet"
|
||||
cache.write_text("c")
|
||||
os.utime(cache, (1_000, 1_000))
|
||||
os.utime(inp, (2_000, 2_000)) # input touched after the cache
|
||||
assert fragments_cache_is_fresh(cache, [inp]) is False
|
||||
|
||||
def test_missing_input_is_ignored(self, tmp_path):
|
||||
cache = tmp_path / "fragments_cache.parquet"
|
||||
cache.write_text("c")
|
||||
# arcgis is optional/absent — it cannot have invalidated the cache.
|
||||
assert fragments_cache_is_fresh(cache, [tmp_path / "absent.parquet"]) is True
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue