import logging import time import httpx from constants import ( PAGE_SIZE, DELAY_BETWEEN_PAGES, SEARCH_URL, TYPEAHEAD_URL, ) from http_client import fetch_with_retry from spatial import PostcodeSpatialIndex from transform import transform_property log = logging.getLogger("rightmove") # Outcode ID cache (Rightmove typeahead → internal ID) outcode_cache: dict[str, str] = {} # Rightmove hard-caps pagination at index 1008 (42 pages × 24 results). # Requesting index >= 1008 returns HTTP 400. _MAX_INDEX = 1008 def resolve_outcode_id(client: httpx.Client, outcode: str) -> str | None: """Look up Rightmove's internal ID for an outcode via typeahead API.""" if outcode in outcode_cache: return outcode_cache[outcode] data = fetch_with_retry( client, TYPEAHEAD_URL, {"query": outcode, "limit": "10", "exclude": "STREET"} ) if not data: return None for match in data.get("matches", []): if match.get("type") == "OUTCODE" and match.get("displayName") == outcode: rid = str(match["id"]) outcode_cache[outcode] = rid return rid log.debug("Outcode %s not found in typeahead results", outcode) return None def _paginate( client: httpx.Client, outcode_id: str, outcode: str, channel_cfg: dict, pc_index: PostcodeSpatialIndex, max_properties: int | None = None, ) -> tuple[list[dict], int]: """Paginate through search results. Returns (properties, result_count).""" properties = [] index = 0 result_count = 0 while True: params = { "useLocationIdentifier": "true", "locationIdentifier": f"OUTCODE^{outcode_id}", "index": str(index), "sortType": channel_cfg["sortType"], "channel": channel_cfg["channel"], "transactionType": channel_cfg["transactionType"], } data = fetch_with_retry(client, SEARCH_URL, params) if not data: log.warning( "Failed to fetch index %d for %s/%s", index, outcode, channel_cfg["channel"], ) break raw_props = data.get("properties", []) if not raw_props: break for prop in raw_props: try: transformed = transform_property(prop, outcode, pc_index) except Exception as exc: log.warning( "Rightmove %s/%s property %s failed to transform: %s", outcode, channel_cfg["channel"], prop.get("id", "?"), exc, ) continue if transformed: properties.append(transformed) if max_properties is not None and len(properties) >= max_properties: return properties, result_count # Check if there are more pages result_count_str = data.get("resultCount", "0") result_count = int(result_count_str.replace(",", "")) index += PAGE_SIZE if index >= result_count: break if index >= _MAX_INDEX: log.warning( "%s/%s: %d results exceed Rightmove's %d-result page cap", outcode, channel_cfg["channel"], result_count, _MAX_INDEX, ) break time.sleep(DELAY_BETWEEN_PAGES) return properties, result_count def search_outcode( client: httpx.Client, outcode_id: str, outcode: str, channel_cfg: dict, pc_index: PostcodeSpatialIndex, max_properties: int | None = None, ) -> list[dict]: """Paginate through unfiltered sale results for one outcode+channel.""" properties, _ = _paginate( client, outcode_id, outcode, channel_cfg, pc_index, max_properties=max_properties, ) if max_properties is not None and len(properties) >= max_properties: return properties[:max_properties] return properties