This commit is contained in:
Andras Schmelczer 2026-05-26 19:45:13 +01:00
parent c645b0f1d4
commit 39ef5c6646
79 changed files with 5660 additions and 2199 deletions

View file

@ -22,6 +22,7 @@ LISTED_BUILDING_FEATURE = "Listed building"
LISTED_BUILDING_MATCH_RADIUS_M = 250.0
LISTED_BUILDING_NEAREST_POSTCODES = 3
LISTED_BUILDING_MIN_MATCH_SCORE = 95
_UNPUBLISHED_CONSERVATION_AREA_PREFIX = "no data available for publication"
_IOD_PERCENTILE_COLUMNS = [
"Education, Skills and Training Score",
@ -429,19 +430,38 @@ def _normalise_crs(crs: object | None) -> str:
return str(crs) if crs else "EPSG:4326"
def _is_unpublished_conservation_area_record(name: object) -> bool:
return (
isinstance(name, str)
and name.strip().casefold().startswith(_UNPUBLISHED_CONSERVATION_AREA_PREFIX)
)
def _load_conservation_area_geometries(
conservation_areas_path: Path,
) -> tuple[list[BaseGeometry], str]:
metadata, table = pyogrio.read_arrow(conservation_areas_path, columns=[])
metadata, table = pyogrio.read_arrow(conservation_areas_path, columns=["NAME"])
geometry_name = metadata.get("geometry_name") or table.column_names[-1]
names = table["NAME"].combine_chunks().to_pylist()
geometries = []
for geom in from_wkb(table[geometry_name].combine_chunks().to_pylist()):
if geom is not None and not geom.is_empty:
skipped_unpublished = 0
for name, geom in zip(
names, from_wkb(table[geometry_name].combine_chunks().to_pylist()), strict=True
):
if _is_unpublished_conservation_area_record(name):
skipped_unpublished += 1
elif geom is not None and not geom.is_empty:
geometries.append(geom)
if not geometries:
raise ValueError(
f"{conservation_areas_path} does not contain any usable polygon geometries"
)
if skipped_unpublished:
print(
"Skipped "
f"{skipped_unpublished} Historic England unpublished conservation-area "
"placeholder polygons"
)
return geometries, _normalise_crs(metadata.get("crs"))