perfect-postcode/finder/test_transform.py

175 lines
6.5 KiB
Python

from transform import (
build_register_address,
clean_listing_address,
extract_full_postcode,
extract_outcode,
resolve_listing_postcode,
transform_property,
)
class StubPostcodeIndex:
def __init__(self, postcode: str = "SW1A 9ZZ") -> None:
self._postcode = postcode
def nearest(self, lat: float, lng: float) -> str:
return self._postcode
def test_extract_full_postcode_normalizes_spacing() -> None:
assert extract_full_postcode("10 Downing Street SW1A2AA") == "SW1A 2AA"
assert extract_full_postcode("10 Downing Street, SW1A 2AA") == "SW1A 2AA"
assert extract_full_postcode("Downing Street, Westminster") is None
def test_clean_listing_address_removes_postcode_and_outcode_suffixes() -> None:
assert clean_listing_address("10 Downing Street, SW1A 2AA") == "10 Downing Street"
assert clean_listing_address("Hawthorne Road, Bromley, Kent, BR1") == (
"Hawthorne Road, Bromley, Kent"
)
assert clean_listing_address("Kings Avenue, Bromley") == "Kings Avenue, Bromley"
def test_build_register_address_prepends_house_number_or_name() -> None:
# House number/name prepended, with the trailing outcode/postcode stripped.
assert (
build_register_address("South Street, Bromley BR1", "12")
== "12, South Street, Bromley"
)
assert (
build_register_address("Riverside, Martham NR29", "Martham Mill")
== "Martham Mill, Riverside, Martham"
)
# No number/name -> identical to the plain cleaned address.
assert build_register_address("Kings Avenue, Bromley", None) == "Kings Avenue, Bromley"
# Already starts with the number/name -> no duplication.
assert (
build_register_address("12 South Street, Bromley", "12")
== "12 South Street, Bromley"
)
# Empty/whitespace number/name is ignored.
assert build_register_address("Kings Avenue, Bromley", " ") == "Kings Avenue, Bromley"
def test_extract_outcode() -> None:
assert extract_outcode("SW1A 2AA") == "SW1A"
assert extract_outcode("n4 2ha") == "N4"
assert extract_outcode("SW1A2AA") == "SW1A"
assert extract_outcode(None) is None
assert extract_outcode("") is None
def test_resolve_listing_postcode() -> None:
# Outcode matches -> trust the more precise extracted postcode.
assert resolve_listing_postcode("SW1A 2AA", "SW1A 9ZZ") == ("SW1A 2AA", "address")
# Outcode mismatch -> fall back to the spatially-correct inferred postcode.
assert resolve_listing_postcode("E14 9SS", "SW1A 9ZZ") == ("SW1A 9ZZ", "coordinates")
# Well-formed but fabricated postcode in a different outcode is rejected.
assert resolve_listing_postcode("ZZ9 9ZZ", "SW1A 9ZZ") == ("SW1A 9ZZ", "coordinates")
# No extracted postcode -> inferred is authoritative.
assert resolve_listing_postcode(None, "SW1A 9ZZ") == ("SW1A 9ZZ", "coordinates")
def test_rightmove_transform_prefers_postcode_from_display_address() -> None:
prop = {
"id": "123",
"location": {"latitude": 51.5, "longitude": -0.1},
"price": {"amount": 750000, "displayPrices": []},
"propertySubType": "Terraced",
"bedrooms": 3,
"bathrooms": 1,
"keyFeatures": [],
"propertyUrl": "/properties/123",
"displayAddress": "Flat 2, 10 Downing Street, SW1A 2AA",
}
result = transform_property(prop, "SW1A", StubPostcodeIndex())
assert result is not None
assert result["Postcode"] == "SW1A 2AA"
assert result["Postcode source"] == "address"
assert result["Extracted postcode"] == "SW1A 2AA"
assert result["Inferred postcode"] == "SW1A 9ZZ"
assert result["Listing raw address"] == "Flat 2, 10 Downing Street, SW1A 2AA"
assert result["Address per Property Register"] == "Flat 2, 10 Downing Street"
def test_rightmove_transform_rejects_postcode_from_wrong_outcode() -> None:
prop = {
"id": "124",
"location": {"latitude": 51.5, "longitude": -0.1},
"price": {"amount": 750000, "displayPrices": []},
"propertySubType": "Terraced",
"bedrooms": 3,
"bathrooms": 1,
"keyFeatures": [],
"propertyUrl": "/properties/124",
# Address postcode is in a different outcode than the coordinate-nearest one.
"displayAddress": "10 Downing Street, E14 9SS",
}
result = transform_property(prop, "SW1A", StubPostcodeIndex())
assert result is not None
# The spatially-correct inferred postcode wins over the mismatching extracted one.
assert result["Postcode"] == "SW1A 9ZZ"
assert result["Postcode source"] == "coordinates"
assert result["Extracted postcode"] == "E14 9SS"
def _rightmove_prop() -> dict:
return {
"id": "200",
"location": {"latitude": 51.5, "longitude": -0.1},
"price": {"amount": 750000, "displayPrices": []},
"propertySubType": "Terraced",
"bedrooms": 3,
"bathrooms": 1,
"keyFeatures": [],
"propertyUrl": "/properties/200",
# Search API only ever exposes the outcode in the display address.
"displayAddress": "Caldwell Street, Stockwell, SW9",
}
def test_rightmove_transform_prefers_detail_postcode() -> None:
# The detail page's true full postcode (same outcode as the location) is
# preferred over the coordinate-nearest guess.
result = transform_property(
_rightmove_prop(),
"SW9",
StubPostcodeIndex("SW9 7AA"),
detail_postcode="SW9 0HD",
)
assert result is not None
assert result["Postcode"] == "SW9 0HD"
assert result["Postcode source"] == "detail_address"
# The coordinate inference is still surfaced separately.
assert result["Inferred postcode"] == "SW9 7AA"
def test_rightmove_transform_rejects_detail_postcode_from_wrong_outcode() -> None:
# A detail postcode whose outcode disagrees with the location must not
# relocate the listing; the coordinate postcode wins instead.
result = transform_property(
_rightmove_prop(),
"SW9",
StubPostcodeIndex("SW9 7AA"),
detail_postcode="E14 9SS",
)
assert result is not None
assert result["Postcode"] == "SW9 7AA"
assert result["Postcode source"] == "coordinates"
def test_rightmove_transform_without_detail_keeps_coordinate_logic() -> None:
# No detail postcode -> behaviour is unchanged (coordinate-nearest).
result = transform_property(
_rightmove_prop(), "SW9", StubPostcodeIndex("SW9 7AA")
)
assert result is not None
assert result["Postcode"] == "SW9 7AA"
assert result["Postcode source"] == "coordinates"