"""Tests for the Rightmove detail-page postcode extractor. The search API only returns an outcode-level ``displayAddress``; the property's TRUE full postcode lives on its detail page inside ``window.__PAGE_MODEL`` as ``propertyData.address.{outcode, incode}``. ``parse_detail_postcode`` recovers it. These tests build a faithful __PAGE_MODEL: a devalue-style flattened object graph whose ``data`` field is a JSON STRING of a flat array where every integer inside a container is an index reference into that same array. """ import json from rightmove import _extract_page_model_literal, parse_detail_postcode def _page_model_html(flat: list, *, encoding: str = "json") -> str: """Wrap a flattened object-graph array in a realistic detail-page \n" "" ) # A faithful slice of a real listing: root -> propertyData -> address, with a # decoy nearestStations array (which carries NO postcodes on the live page) to # prove the parser anchors on the property's own address, not a nearby POI. _FLAT_SW9 = [ {"propertyData": 1}, # 0: root { "id": "89089584", "address": 2, "location": 4, "nearestStations": 6, }, # 1: propertyData { "displayAddress": "Caldwell Street, Stockwell", "countryCode": "GB", "ukCountry": "England", "outcode": "SW9", "incode": "0HD", }, # 2: address None, # 3: filler { "latitude": 51.477238, "longitude": -0.116819, "pinType": "ACCURATE_POINT", }, # 4: location None, # 5: filler [7, 8], # 6: nearestStations (references) {"name": "Oval Station", "distance": 0.36}, # 7: station, no postcode {"name": "Stockwell Station", "distance": 0.41}, # 8: station, no postcode ] def test_parses_full_postcode_from_outcode_and_incode() -> None: html = _page_model_html(_FLAT_SW9) assert parse_detail_postcode(html) == "SW9 0HD" def test_extract_page_model_literal_brace_matches_nested_object() -> None: # The literal must include the whole nested object, not stop at the first # closing brace inside the escaped data string. html = _page_model_html(_FLAT_SW9) literal = _extract_page_model_literal(html) assert literal is not None assert literal.startswith("{") and literal.endswith("}") # Round-trips back to a dict with the expected top-level keys. assert set(json.loads(literal)) == {"data", "encoding"} def test_normalises_unspaced_incode() -> None: flat = [dict(node) if isinstance(node, dict) else node for node in _FLAT_SW9] flat[2] = {**_FLAT_SW9[2], "outcode": "e20", "incode": "1fh"} assert parse_detail_postcode(_page_model_html(flat)) == "E20 1FH" def test_returns_none_when_address_missing() -> None: # The location wrapper can be empty/absent on some listings; the caller then # keeps the coordinate fallback, so we must return None (not raise). flat = [ {"propertyData": 1}, {"id": "1", "location": 2}, {"latitude": 51.5, "longitude": -0.1}, ] assert parse_detail_postcode(_page_model_html(flat)) is None def test_returns_none_when_incode_blank() -> None: flat = [dict(node) if isinstance(node, dict) else node for node in _FLAT_SW9] flat[2] = {**_FLAT_SW9[2], "incode": ""} assert parse_detail_postcode(_page_model_html(flat)) is None def test_returns_none_for_non_postcode_pair() -> None: # A structurally-invalid outcode/incode pair is rejected by the validator. flat = [dict(node) if isinstance(node, dict) else node for node in _FLAT_SW9] flat[2] = {**_FLAT_SW9[2], "outcode": "NOTAPC", "incode": "ZZ"} assert parse_detail_postcode(_page_model_html(flat)) is None def test_returns_none_without_page_model() -> None: assert parse_detail_postcode("") is None assert parse_detail_postcode("no model") is None # Malformed JSON in the data field degrades gracefully. broken = '' assert parse_detail_postcode(broken) is None