has issues

This commit is contained in:
Andras Schmelczer 2026-05-25 13:20:17 +01:00
parent 2e112d7398
commit c645b0f1d4
96 changed files with 2147083 additions and 5787 deletions

View file

@ -1,11 +1,17 @@
import polars as pl
import pytest
from shapely import box
from pipeline.transform.merge import (
_AREA_COLUMNS,
CONSERVATION_AREA_FEATURE,
LISTED_BUILDING_FEATURE,
TREE_DENSITY_FEATURE,
_is_dynamic_poi_metric_column,
_less_deprived_percentile_expr,
_matched_listed_building_flags,
_postcode_conservation_area_flags,
_postcode_listed_building_candidates,
_tree_density_by_postcode,
_validate_lad_source_coverage,
_validate_property_postcodes,
@ -48,6 +54,106 @@ def test_country_code_is_kept_in_postcode_area_columns() -> None:
assert "ctry25cd" in _AREA_COLUMNS
def test_conservation_area_feature_is_area_level() -> None:
assert CONSERVATION_AREA_FEATURE in _AREA_COLUMNS
def test_listed_building_feature_is_property_level() -> None:
assert LISTED_BUILDING_FEATURE not in _AREA_COLUMNS
def test_postcode_conservation_area_flags_marks_point_membership() -> None:
postcodes = pl.DataFrame(
{
"postcode": ["AA1 1AA", "BB1 1BB", "CC1 1CC"],
"lat": [0.5, 2.0, None],
"lon": [0.5, 2.0, 0.5],
}
)
result = _postcode_conservation_area_flags(
postcodes, [box(0, 0, 1, 1)], "EPSG:4326", batch_size=2
).sort("postcode")
assert result.to_dicts() == [
{"postcode": "AA1 1AA", CONSERVATION_AREA_FEATURE: "Yes"},
{"postcode": "BB1 1BB", CONSERVATION_AREA_FEATURE: "No"},
{"postcode": "CC1 1CC", CONSERVATION_AREA_FEATURE: "No"},
]
def test_postcode_listed_building_candidates_uses_nearby_postcodes() -> None:
listed_points = pl.DataFrame(
{
"ListEntry": [1234, 5678],
"Name": ["1 and 2 High Street", "Distant Hall"],
"Grade": ["II", "I"],
"Easting": [100.0, 1000.0],
"Northing": [100.0, 1000.0],
}
).with_columns(
pl.col("Name")
.str.to_uppercase()
.str.replace_all(r"[^0-9A-Z]+", " ")
.str.replace_all(r"\s+", " ")
.str.strip_chars()
.alias("_listed_match_name")
)
active_postcodes = pl.DataFrame(
{
"postcode": ["AA1 1AA", "BB1 1BB"],
"east1m": [105.0, 5000.0],
"north1m": [105.0, 5000.0],
}
)
result = _postcode_listed_building_candidates(
listed_points,
active_postcodes,
nearest_postcodes=1,
max_distance_m=25,
)
assert result.select("postcode", "_listed_match_name").to_dicts() == [
{"postcode": "AA1 1AA", "_listed_match_name": "1 AND 2 HIGH STREET"}
]
def test_matched_listed_building_flags_requires_address_match() -> None:
properties = pl.DataFrame(
{
"postcode": ["AA1 1AA", "AA1 1AA", "BB1 1BB"],
"pp_address": ["1 HIGH STREET", "99 HIGH STREET", "THE OLD RECTORY"],
"epc_address": ["1, High Street", "99, High Street", "Old Rectory"],
}
)
listed_candidates = pl.DataFrame(
{
"postcode": ["AA1 1AA", "BB1 1BB"],
"_listed_match_name": ["1 AND 2 HIGH STREET", "OLD RECTORY"],
"_listed_grade": ["II", "II*"],
"_listed_entry": [1234, 5678],
}
)
result = _matched_listed_building_flags(
properties.lazy(), listed_candidates, min_score=95
).sort("postcode", "pp_address")
assert result.to_dicts() == [
{
"postcode": "AA1 1AA",
"pp_address": "1 HIGH STREET",
LISTED_BUILDING_FEATURE: "Yes",
},
{
"postcode": "BB1 1BB",
"pp_address": "THE OLD RECTORY",
LISTED_BUILDING_FEATURE: "Yes",
},
]
def test_validate_property_postcodes_rejects_blank_rows() -> None:
df = pl.DataFrame(
{