Fix enrich listing
This commit is contained in:
parent
c2945567d7
commit
cf39ad754e
3 changed files with 529 additions and 6 deletions
|
|
@ -954,6 +954,173 @@ def test_match_direct_epc_matches_by_address_in_same_postcode() -> None:
|
|||
assert matches["_direct_epc_match_method"].to_list() == ["address"]
|
||||
|
||||
|
||||
def test_match_direct_epc_street_fallback_matches_numberless_listing() -> None:
|
||||
# A street-level listing address (the Rightmove norm: no house number, no
|
||||
# UPRN) cannot pass the strict number gate, but must still pick up
|
||||
# street-representative EPC facts from a same-street certificate in its own
|
||||
# postcode, labelled with the lower-confidence "street" method.
|
||||
matches = _match_direct_epc(
|
||||
_listing_matches([{"_listing_match_address": "EXAMPLE ROAD BROMLEY"}]),
|
||||
_direct_epc_candidates([{"_direct_epc_match_address": "7 EXAMPLE ROAD"}]),
|
||||
)
|
||||
|
||||
assert matches.height == 1
|
||||
assert matches["_direct_epc_match_method"].to_list() == ["street"]
|
||||
|
||||
|
||||
def test_match_direct_epc_street_fallback_prefers_attribute_agreement() -> None:
|
||||
# Every same-street certificate ties on street similarity, so the listing's
|
||||
# attributes (floor area here) must pick the most plausible one.
|
||||
listings = pl.DataFrame(
|
||||
[
|
||||
{
|
||||
"_listing_idx": 0,
|
||||
"_listing_match_address": "EXAMPLE ROAD BROMLEY",
|
||||
"_listing_match_postcode": "AA11AA",
|
||||
"_listing_uprn": None,
|
||||
"_actual_total_floor_area": 78.0,
|
||||
}
|
||||
],
|
||||
schema={**_LISTING_MATCH_SCHEMA, "_actual_total_floor_area": pl.Float64},
|
||||
)
|
||||
matches = _match_direct_epc(
|
||||
listings,
|
||||
_direct_epc_candidates(
|
||||
[
|
||||
{
|
||||
"_direct_epc_match_address": "7 EXAMPLE ROAD",
|
||||
"_direct_epc_address": "7, Example Road",
|
||||
"_direct_total_floor_area": 150.0,
|
||||
},
|
||||
{
|
||||
"_direct_epc_row": 1,
|
||||
"_direct_epc_match_address": "9 EXAMPLE ROAD",
|
||||
"_direct_epc_address": "9, Example Road",
|
||||
"_direct_total_floor_area": 80.0,
|
||||
},
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
assert matches.height == 1
|
||||
assert matches["_direct_epc_address"].to_list() == ["9, Example Road"]
|
||||
assert matches["_direct_epc_match_method"].to_list() == ["street"]
|
||||
|
||||
|
||||
def test_match_direct_epc_street_fallback_spans_postcodes_within_outcode() -> None:
|
||||
# Long streets cross postcode units. A street-only listing whose own
|
||||
# postcode has no certificate must still pick up a same-street certificate
|
||||
# from a sibling postcode in the same outcode.
|
||||
matches = _match_direct_epc(
|
||||
_listing_matches(
|
||||
[
|
||||
{
|
||||
"_listing_match_address": "EXAMPLE ROAD BROMLEY",
|
||||
"_listing_match_postcode": "AA12ZZ",
|
||||
}
|
||||
]
|
||||
),
|
||||
_direct_epc_candidates(
|
||||
[
|
||||
{
|
||||
"_direct_epc_match_address": "7 EXAMPLE ROAD",
|
||||
"_direct_epc_match_postcode": "AA11AA",
|
||||
}
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
assert matches.height == 1
|
||||
assert matches["_direct_epc_match_method"].to_list() == ["street"]
|
||||
|
||||
|
||||
def test_match_direct_epc_street_fallback_prefers_own_postcode_segment() -> None:
|
||||
# Within one street, the certificate in the listing's own postcode unit is
|
||||
# the nearest segment and must win over an equal candidate further along.
|
||||
matches = _match_direct_epc(
|
||||
_listing_matches([{"_listing_match_address": "EXAMPLE ROAD BROMLEY"}]),
|
||||
_direct_epc_candidates(
|
||||
[
|
||||
{
|
||||
"_direct_epc_match_address": "7 EXAMPLE ROAD",
|
||||
"_direct_epc_address": "7, Example Road",
|
||||
"_direct_epc_match_postcode": "AA12ZZ",
|
||||
},
|
||||
{
|
||||
"_direct_epc_row": 1,
|
||||
"_direct_epc_match_address": "9 EXAMPLE ROAD",
|
||||
"_direct_epc_address": "9, Example Road",
|
||||
"_direct_epc_match_postcode": "AA11AA",
|
||||
},
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
assert matches.height == 1
|
||||
assert matches["_direct_epc_address"].to_list() == ["9, Example Road"]
|
||||
|
||||
|
||||
def test_match_direct_epc_street_fallback_recovers_numbered_listing() -> None:
|
||||
# A numbered listing whose house number has no certificate (number sets
|
||||
# disjoint, so the strict gate skips every candidate) still picks up a
|
||||
# street-representative certificate via the fallback.
|
||||
matches = _match_direct_epc(
|
||||
_listing_matches([{"_listing_match_address": "17 EXAMPLE ROAD BROMLEY"}]),
|
||||
_direct_epc_candidates([{"_direct_epc_match_address": "9 EXAMPLE ROAD"}]),
|
||||
)
|
||||
|
||||
assert matches.height == 1
|
||||
assert matches["_direct_epc_match_method"].to_list() == ["street"]
|
||||
|
||||
|
||||
def test_match_direct_epc_street_fallback_rejects_town_only_address() -> None:
|
||||
# A town-only listing address ("COULSDON SURREY") shares only the locality
|
||||
# suffix that most street keys in the outcode carry; without a street-name
|
||||
# anchor it must not subset-inflate onto an arbitrary street.
|
||||
matches = _match_direct_epc(
|
||||
_listing_matches([{"_listing_match_address": "COULSDON SURREY"}]),
|
||||
_direct_epc_candidates(
|
||||
[
|
||||
{
|
||||
"_direct_epc_row": i,
|
||||
"_direct_epc_match_address": f"{number} {street} SURREY COULSDON",
|
||||
}
|
||||
for i, (number, street) in enumerate(
|
||||
[
|
||||
("49", "LACKFORD ROAD"),
|
||||
("12", "CHIPSTEAD VALLEY ROAD"),
|
||||
("3", "WINDERMERE ROAD"),
|
||||
]
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
assert matches.height == 0
|
||||
|
||||
|
||||
def test_match_direct_epc_street_fallback_rejects_single_token_query() -> None:
|
||||
# token_set_ratio scores 100 whenever the query's tokens subset the
|
||||
# candidate's, so a bare one-token name must not street-match anything.
|
||||
matches = _match_direct_epc(
|
||||
_listing_matches([{"_listing_match_address": "KINGSWOOD"}]),
|
||||
_direct_epc_candidates([{"_direct_epc_match_address": "4 KINGSWOOD ROAD"}]),
|
||||
)
|
||||
|
||||
assert matches.height == 0
|
||||
|
||||
|
||||
def test_match_direct_epc_street_fallback_rejects_different_street() -> None:
|
||||
# The fallback is street-identity within the postcode, not "anything in the
|
||||
# postcode": a certificate on another street must not match.
|
||||
matches = _match_direct_epc(
|
||||
_listing_matches([{"_listing_match_address": "OLDSTEAD ROAD BROMLEY"}]),
|
||||
_direct_epc_candidates([{"_direct_epc_match_address": "5 CAMBRIDGE ROAD"}]),
|
||||
)
|
||||
|
||||
assert matches.height == 0
|
||||
|
||||
|
||||
def test_normalize_uprn_handles_types_and_floats() -> None:
|
||||
assert _normalize_uprn(None) is None
|
||||
assert _normalize_uprn("") is None
|
||||
|
|
@ -1167,13 +1334,20 @@ def test_match_listing_properties_uprn_wins_dedup_tie() -> None:
|
|||
assert matches["_property_match_method"].to_list() == ["uprn"]
|
||||
|
||||
|
||||
def test_match_direct_epc_does_not_match_other_postcode_without_uprn() -> None:
|
||||
# Matching is by postcode/UPRN/street — never by coordinate proximity — so a
|
||||
# same-street EPC in a different postcode with no shared UPRN is skipped.
|
||||
def test_match_direct_epc_does_not_match_other_outcode_without_uprn() -> None:
|
||||
# Matching is by postcode/UPRN/street — never by coordinate proximity — and
|
||||
# the street fallback is outcode-scoped, so a same-street EPC in a different
|
||||
# OUTCODE with no shared UPRN is skipped.
|
||||
matches = _match_direct_epc(
|
||||
_listing_matches([{"_listing_match_postcode": "AA11AA"}]),
|
||||
_direct_epc_candidates(
|
||||
[{"_direct_epc_match_postcode": "BB22BB", "_direct_epc_uprn": None}]
|
||||
[
|
||||
{
|
||||
"_direct_epc_match_postcode": "BB22BB",
|
||||
"_direct_epc_outcode": "BB2",
|
||||
"_direct_epc_uprn": None,
|
||||
}
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue