from zoopla import _detail_cache_key, parse_detail_geo, transform_property def test_detail_cache_key_uses_listing_id() -> None: assert _detail_cache_key("/for-sale/details/59888978/") == "59888978" assert _detail_cache_key("https://www.zoopla.co.uk/for-sale/details/59888978/") == "59888978" # No id in the URL -> fall back to the URL itself as the key. assert _detail_cache_key("/for-sale/property/br1/") == "/for-sale/property/br1/" class StubPostcodeIndex: """Spatial index stub whose nearest-lookup returns a fixed postcode.""" def __init__(self, postcode: str = "BR1 2AB") -> None: self._postcode = postcode def nearest(self, lat: float, lng: float) -> str: return self._postcode # London-ish postcodes with coordinates, plus the Norfolk sample used by the # verified detail-page snippet (well inside the England bounds check). PC_COORDS = { "BR1 2AB": (51.40, 0.01), "SW1A 1AA": (51.50, -0.14), "NR29 4RG": (52.716014, 1.614495), } # Verified RSC `location` object (listing 59888978), as it appears escaped inside # a self.__next_f flight chunk in page.content(). _LOCATION_ESCAPED = ( '' ) def test_parse_detail_geo_location_object_escaped() -> None: geo = parse_detail_geo(_LOCATION_ESCAPED, search_outcode="NR29") assert geo == { "lat": 52.716014, "lng": 1.614495, "postcode": "NR29 4RG", "outcode": "NR29", "source": "detail_location", "uprn": "10023461458", "number_or_name": "Martham Mill", # No `address` twin in this snippet, so there is no full street address. "full_address": None, } def test_parse_detail_geo_location_object_unescaped() -> None: html = ( '"location":{"outcode":"NR29",' '"coordinates":{"latitude":52.716014,"longitude":1.614495},' '"uprn":"10023461458","postalCode":"NR29 4RG"}' ) geo = parse_detail_geo(html) assert geo is not None assert geo["source"] == "detail_location" assert geo["postcode"] == "NR29 4RG" def test_parse_detail_geo_address_twin() -> None: html = ( '"address":{"fullAddress":"Riverside, Martham NR29",' '"latitude":52.716014,"longitude":1.614495,' '"outcode":"NR29","postcode":"NR29 4RG","uprn":"10023461458"}' ) geo = parse_detail_geo(html) assert geo is not None assert geo["source"] == "detail_address_obj" assert (geo["lat"], geo["lng"], geo["postcode"]) == (52.716014, 1.614495, "NR29 4RG") assert geo["uprn"] == "10023461458" assert geo["full_address"] == "Riverside, Martham NR29" def test_parse_detail_geo_merges_location_uprn_with_address_full_address() -> None: # Real detail pages carry both wrappers: the `location` object holds the # uprn + house number/name, the `address` twin holds the full street # address. They share a uprn, so the twin's fullAddress is attached. html = ( '"location":{"outcode":"NR29",' '"coordinates":{"latitude":52.716014,"longitude":1.614495},' '"uprn":"10023461458","postalCode":"NR29 4RG",' '"propertyNumberOrName":"Martham Mill"}' '"address":{"fullAddress":"Riverside, Martham NR29",' '"latitude":52.716014,"longitude":1.614495,' '"outcode":"NR29","postcode":"NR29 4RG","uprn":"10023461458"}' ) geo = parse_detail_geo(html) assert geo is not None assert geo["source"] == "detail_location" assert geo["uprn"] == "10023461458" assert geo["number_or_name"] == "Martham Mill" assert geo["full_address"] == "Riverside, Martham NR29" def test_parse_detail_geo_does_not_borrow_comparable_full_address() -> None: # The only `address` twin on the page belongs to a different uprn (a # comparable listing). With a uprn to match on, an unrelated twin is never # borrowed — full_address stays None rather than grabbing the wrong street. html = ( '"location":{"outcode":"NR29",' '"coordinates":{"latitude":52.716014,"longitude":1.614495},' '"uprn":"10023461458","postalCode":"NR29 4RG"}' '"address":{"fullAddress":"Some Comparable, Elsewhere EN2",' '"latitude":51.65,"longitude":-0.08,"uprn":"99999999"}' ) geo = parse_detail_geo(html) assert geo is not None assert geo["uprn"] == "10023461458" assert geo["full_address"] is None def test_parse_detail_geo_ignores_poi_coordinates() -> None: # A charger POI (its coordinates NOT wrapped in a "location" object) followed # by the property's own "location" wrapper. Anchoring on the wrapper means # the POI's coordinates are ignored and the property's are returned. poi = ( '"name":"Martham Community Centre","numberOfConnectors":2,' '"postcode":"NR29 4SN","coordinates":{"latitude":52.699379,"longitude":1.62921}' ) prop = ( '"location":{"outcode":"NR29",' '"coordinates":{"latitude":52.716014,"longitude":1.614495},' '"uprn":"10023461458","postalCode":"NR29 4RG"}' ) geo = parse_detail_geo(poi + prop) assert geo is not None assert geo["source"] == "detail_location" # The property's coords win, not the community centre's. assert (geo["lat"], geo["lng"]) == (52.716014, 1.614495) assert geo["postcode"] == "NR29 4RG" def test_parse_detail_geo_prefers_location_matching_search_outcode() -> None: # Page embeds two location objects (e.g. a comparable then the property). # With a search outcode, the one in that outcode is preferred; without one, # the first (document order = primary listing) is returned. comparable = ( '"location":{"outcode":"EN2",' '"coordinates":{"latitude":51.65,"longitude":-0.08},' '"postalCode":"EN2 6SN"}' ) target = ( '"location":{"outcode":"NR29",' '"coordinates":{"latitude":52.716014,"longitude":1.614495},' '"postalCode":"NR29 4RG"}' ) geo = parse_detail_geo(comparable + target, search_outcode="NR29") assert geo is not None and geo["postcode"] == "NR29 4RG" geo_first = parse_detail_geo(comparable + target) assert geo_first is not None and geo_first["postcode"] == "EN2 6SN" def test_parse_detail_geo_rejects_out_of_england() -> None: html = ( '"location":{"outcode":"NR29",' '"coordinates":{"latitude":10.0,"longitude":10.0},' '"uprn":"1","postalCode":"NR29 4RG"}' ) assert parse_detail_geo(html) is None def test_parse_detail_geo_drops_inconsistent_postcode() -> None: # postalCode outcode (AB12) disagrees with the object's own outcode (NR29): # keep the coordinates, drop the untrustworthy postcode. html = ( '"location":{"outcode":"NR29",' '"coordinates":{"latitude":52.716014,"longitude":1.614495},' '"uprn":"1","postalCode":"AB12 3CD"}' ) geo = parse_detail_geo(html) assert geo is not None assert geo["lat"] == 52.716014 assert geo["postcode"] is None def test_parse_detail_geo_returns_none_for_garbage() -> None: assert parse_detail_geo("no data here") is None assert parse_detail_geo("") is None # Coordinates that are not inside a property location/address wrapper (e.g. # only an unwrapped POI) yield nothing — safe degradation to the outcode. assert parse_detail_geo('"name":"X","coordinates":{"latitude":51.5,"longitude":-0.1}') is None def _raw(**overrides) -> dict: raw = { "id": "123", "url": "/for-sale/details/123/", "address": "South Street, Bromley BR1", "price": 500000, "beds": 2, "baths": 1, "property_type": "Flat", } raw.update(overrides) return raw def test_transform_uses_detail_coordinates_with_agreeing_postcode() -> None: detail = {"lat": 51.401, "lng": 0.011, "postcode": "BR1 3CD", "outcode": "BR1"} result = transform_property( _raw(), StubPostcodeIndex("BR1 2AB"), PC_COORDS, search_outcode="BR1", detail=detail ) assert result is not None # Extracted detail postcode agrees with the coordinate-nearest outcode -> trusted. assert result["Postcode"] == "BR1 3CD" assert result["Postcode source"] == "detail_address" assert result["Inferred postcode"] == "BR1 2AB" assert (result["lat"], result["lon"]) == (51.401, 0.011) def test_transform_uses_nearest_when_detail_postcode_mismatches() -> None: detail = {"lat": 51.401, "lng": 0.011, "postcode": "E14 9SS", "outcode": "E14"} result = transform_property( _raw(), StubPostcodeIndex("BR1 2AB"), PC_COORDS, search_outcode="BR1", detail=detail ) assert result is not None # Mismatching detail postcode is rejected in favour of the spatial value. assert result["Postcode"] == "BR1 2AB" assert result["Postcode source"] == "detail_coordinates" def test_transform_geocodes_detail_postcode_without_coordinates() -> None: detail = {"lat": None, "lng": None, "postcode": "SW1A 1AA", "outcode": "SW1A"} result = transform_property( _raw(), StubPostcodeIndex(), PC_COORDS, search_outcode="BR1", detail=detail ) assert result is not None assert result["Postcode"] == "SW1A 1AA" assert result["Postcode source"] == "detail_address" assert (result["lat"], result["lon"]) == PC_COORDS["SW1A 1AA"] def test_transform_without_detail_falls_back_to_search_outcode() -> None: # No detail, address has no recognizable outcode -> coarse search-outcode centroid. result = transform_property( _raw(address="A street with no postcode"), StubPostcodeIndex(), PC_COORDS, search_outcode="BR1", detail=None, ) assert result is not None assert result["Postcode"] == "BR1 2AB" assert result["Postcode source"] == "search_outcode" # No detail page -> no UPRN / house number recovered. assert result["UPRN"] is None assert result["Property number or name"] is None def test_transform_emits_uprn_and_house_numbered_address_from_detail() -> None: detail = { "lat": 51.401, "lng": 0.011, "postcode": "BR1 3CD", "outcode": "BR1", "uprn": "100023461458", "number_or_name": "12", "full_address": "South Street, Bromley BR1", } result = transform_property( _raw(), StubPostcodeIndex("BR1 2AB"), PC_COORDS, search_outcode="BR1", detail=detail ) assert result is not None assert result["UPRN"] == "100023461458" assert result["Property number or name"] == "12" # The detail full address replaces the outcode-level card address, and the # house number is prepended for a near-exact Property Register match. assert result["Listing raw address"] == "South Street, Bromley BR1" assert result["Address per Property Register"] == "12, South Street, Bromley" def test_transform_ignores_out_of_england_detail_coords() -> None: detail = {"lat": 10.0, "lng": 10.0, "postcode": "ZZ9 9ZZ", "outcode": "ZZ9"} result = transform_property( _raw(), StubPostcodeIndex("BR1 2AB"), PC_COORDS, search_outcode="BR1", detail=detail ) assert result is not None # Bad detail coords are discarded; falls through to the address outcode (BR1). assert result["Postcode source"] == "address_outcode" assert 49 <= result["lat"] <= 56