improve
This commit is contained in:
parent
8688b7475e
commit
e8345cbdc1
40 changed files with 1980 additions and 904 deletions
|
|
@ -10,8 +10,10 @@ from pipeline.transform.merge import (
|
|||
LISTED_BUILDING_FEATURE,
|
||||
TREE_DENSITY_FEATURE,
|
||||
_LISTING_OVERLAY_SOURCES,
|
||||
_active_english_postcode_area,
|
||||
_build_unmatched_listing_seed_rows,
|
||||
_canonical_postcode_expr,
|
||||
_filter_to_active_english_postcodes,
|
||||
_finalize_listings,
|
||||
_integrate_listings,
|
||||
_match_direct_epc,
|
||||
|
|
@ -24,8 +26,11 @@ from pipeline.transform.merge import (
|
|||
_matched_listed_building_flags,
|
||||
_postcode_conservation_area_flags,
|
||||
_postcode_listed_building_candidates,
|
||||
_remap_terminated_postcodes,
|
||||
_split_normal_outputs,
|
||||
_tree_density_by_postcode,
|
||||
_validate_lad_source_coverage,
|
||||
_validate_postcode_feature_output,
|
||||
_validate_property_postcodes,
|
||||
)
|
||||
|
||||
|
|
@ -79,6 +84,113 @@ def test_crime_columns_are_spatial_counts_not_per_capita() -> None:
|
|||
assert "Minor crime per 1k residents (avg/yr)" not in _AREA_COLUMNS
|
||||
|
||||
|
||||
def test_active_english_postcode_area_filters_to_active_england() -> None:
|
||||
arcgis = pl.DataFrame(
|
||||
{
|
||||
"pcds": ["AA1 1AA", "AA1 1AB", "CF1 1AA"],
|
||||
"ctry25cd": ["E92000001", "E92000001", "W92000004"],
|
||||
"doterm": [None, "2020-01-01", None],
|
||||
"lat": [51.0, 51.1, 52.0],
|
||||
"long": [-0.1, -0.2, -3.0],
|
||||
"lsoa21cd": ["L1", "L2", "L3"],
|
||||
"oa21cd": ["O1", "O2", "O3"],
|
||||
"pcon24cd": ["P1", "P2", "P3"],
|
||||
}
|
||||
)
|
||||
|
||||
result = _active_english_postcode_area(arcgis.lazy()).collect()
|
||||
|
||||
assert result.to_dicts() == [
|
||||
{
|
||||
"postcode": "AA1 1AA",
|
||||
"lat": 51.0,
|
||||
"lon": -0.1,
|
||||
"ctry25cd": "E92000001",
|
||||
"lsoa21": "L1",
|
||||
"oa21": "O1",
|
||||
"pcon": "P1",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_remap_then_active_filter_keeps_terminated_english_properties() -> None:
|
||||
wide = pl.DataFrame(
|
||||
{
|
||||
"postcode": ["OLD 1AA", "NEW 1AA", "CF1 1AA"],
|
||||
"row_id": [1, 2, 3],
|
||||
}
|
||||
).lazy()
|
||||
mapping = pl.DataFrame(
|
||||
{"old_postcode": ["OLD 1AA"], "new_postcode": ["NEW 1AA"]}
|
||||
).lazy()
|
||||
active_postcodes = pl.DataFrame({"postcode": ["NEW 1AA"]}).lazy()
|
||||
|
||||
result = (
|
||||
_filter_to_active_english_postcodes(
|
||||
_remap_terminated_postcodes(wide, mapping), active_postcodes
|
||||
)
|
||||
.collect()
|
||||
.sort("row_id")
|
||||
)
|
||||
|
||||
assert result.to_dicts() == [
|
||||
{"postcode": "NEW 1AA", "row_id": 1},
|
||||
{"postcode": "NEW 1AA", "row_id": 2},
|
||||
]
|
||||
|
||||
|
||||
def test_split_normal_outputs_uses_postcode_feature_universe() -> None:
|
||||
df = pl.DataFrame(
|
||||
{
|
||||
"Postcode": ["AA1 1AA"],
|
||||
"Address per Property Register": ["1 Example Road"],
|
||||
"Last known price": [250_000],
|
||||
"lat": [51.0],
|
||||
"lon": [-0.1],
|
||||
"ctry25cd": ["E92000001"],
|
||||
"lsoa21": ["L1"],
|
||||
}
|
||||
)
|
||||
postcode_features = pl.DataFrame(
|
||||
{
|
||||
"Postcode": ["AA1 1AA", "BB1 1BB"],
|
||||
"lat": [51.0, 52.0],
|
||||
"lon": [-0.1, -0.2],
|
||||
"ctry25cd": ["E92000001", "E92000001"],
|
||||
"lsoa21": ["L1", "L2"],
|
||||
"Distance to nearest amenity (Park) (km)": [0.3, 0.8],
|
||||
}
|
||||
)
|
||||
|
||||
postcode_df, properties_df = _split_normal_outputs(
|
||||
df, postcode_features, expected_postcode_count=2
|
||||
)
|
||||
|
||||
assert postcode_df["Postcode"].to_list() == ["AA1 1AA", "BB1 1BB"]
|
||||
assert "Distance to nearest amenity (Park) (km)" in postcode_df.columns
|
||||
assert properties_df.to_dicts() == [
|
||||
{
|
||||
"Postcode": "AA1 1AA",
|
||||
"Address per Property Register": "1 Example Road",
|
||||
"Last known price": 250_000,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_postcode_feature_validation_rejects_unsupported_or_ungeocoded_rows() -> None:
|
||||
postcode_df = pl.DataFrame(
|
||||
{
|
||||
"Postcode": ["AA1 1AA", "CF1 1AA"],
|
||||
"lat": [51.0, None],
|
||||
"lon": [-0.1, None],
|
||||
"ctry25cd": ["E92000001", "W92000004"],
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="unsupported or ungeocoded"):
|
||||
_validate_postcode_feature_output(postcode_df, expected_postcode_count=2)
|
||||
|
||||
|
||||
def test_listed_building_feature_is_property_level() -> None:
|
||||
assert LISTED_BUILDING_FEATURE not in _AREA_COLUMNS
|
||||
|
||||
|
|
@ -418,9 +530,7 @@ def test_build_unmatched_listing_seed_rows_fills_property_shape_fields(
|
|||
)
|
||||
unmatched_idxs = listings.select("_listing_idx")
|
||||
|
||||
seed = _build_unmatched_listing_seed_rows(
|
||||
unmatched_idxs, listings, template_schema
|
||||
)
|
||||
seed = _build_unmatched_listing_seed_rows(unmatched_idxs, listings, template_schema)
|
||||
|
||||
assert seed.height == 1
|
||||
assert seed["postcode"].to_list() == ["SW1A 1AA"]
|
||||
|
|
@ -550,7 +660,12 @@ def test_match_direct_epc_matches_by_uprn_across_postcodes() -> None:
|
|||
[{"_listing_uprn": "100000000001", "_listing_match_postcode": "ZZ99ZZ"}]
|
||||
),
|
||||
_direct_epc_candidates(
|
||||
[{"_direct_epc_uprn": "100000000001", "_direct_epc_match_postcode": "AA11AA"}]
|
||||
[
|
||||
{
|
||||
"_direct_epc_uprn": "100000000001",
|
||||
"_direct_epc_match_postcode": "AA11AA",
|
||||
}
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue